Add html entity decoding, class name pattern parsing

This commit is contained in:
2023-08-11 17:39:34 -05:00
parent d796cbbe17
commit 4e1e3836c2
3 changed files with 30 additions and 2 deletions

View File

@@ -2,17 +2,22 @@ import {Cheerio, Element, load} from "cheerio";
import {readFileSync} from "fs";
import {parse} from "date-fns";
import {inspect} from "util";
import {decode} from "html-entities";
import fetch from "node-fetch";
import {z} from "zod";
const $ = load(readFileSync('list.html'));
const classes = $('#scheduleListView').children('.listViewWrapper');
const name_pattern = /(.+) (\d{4}) Section (\d+)/;
const subject_schema = z.object({
code: z.string(),
description: z.string(),
description: z.string().transform((v) => decode(v, {level: 'all'})),
});
type Subject = z.infer<typeof subject_schema>;
const subjects = await z.array(subject_schema).parseAsync(
await fetch(
@@ -27,6 +32,8 @@ const subjects = await z.array(subject_schema).parseAsync(
})
);
const subject_by_name = new Map(subjects.map((subject) => [subject.description, subject]));
function getOffset(period: string) {
switch (period) {
@@ -61,9 +68,24 @@ function extractDetails(source: Cheerio<Element>) {
const [start_date, end_date] = raw_date.split("--").map((date) => parse(date.trim(), "MM/dd/yyyy", new Date()));
const identifier = source.find('span.list-view-subj-course-section').text();
const name_match = name_pattern.exec(identifier);
if (name_match === null) {
throw new Error(`Could not parse identifier: ${identifier}`);
}
const [subject_name, code, section] = name_match.slice(1);
const subject = subject_by_name.get(subject_name);
if (subject === undefined) {
throw new Error(`Unknown subject: ${subject_name}`);
}
return {
identifier,
identifier: {
subject,
code: code,
section: section,
crn: null,
},
date: {
start: start_date,
end: end_date,

View File

@@ -17,6 +17,7 @@
"@types/node": "^20.4.9",
"cheerio": "^1.0.0-rc.12",
"date-fns": "^2.30.0",
"html-entities": "^2.4.0",
"ics": "^3.2.0",
"node-fetch": "^3.3.2",
"ts-node": "^10.9.1",

View File

@@ -192,6 +192,11 @@ formdata-polyfill@^4.0.10:
dependencies:
fetch-blob "^3.1.2"
html-entities@^2.4.0:
version "2.4.0"
resolved "https://registry.yarnpkg.com/html-entities/-/html-entities-2.4.0.tgz#edd0cee70402584c8c76cc2c0556db09d1f45061"
integrity sha512-igBTJcNNNhvZFRtm8uA6xMY6xYleeDwn3PeBCkDz7tHttv4F2hsDI2aPgNERWzvRcNYHNT3ymRaQzllmXj4YsQ==
htmlparser2@^8.0.1:
version "8.0.2"
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-8.0.2.tgz#f002151705b383e62433b5cf466f5b716edaec21"