mirror of
https://github.com/Xevion/schedule-extract.git
synced 2025-12-06 11:16:19 -06:00
Add html entity decoding, class name pattern parsing
This commit is contained in:
26
index.ts
26
index.ts
@@ -2,17 +2,22 @@ import {Cheerio, Element, load} from "cheerio";
|
||||
import {readFileSync} from "fs";
|
||||
import {parse} from "date-fns";
|
||||
import {inspect} from "util";
|
||||
import {decode} from "html-entities";
|
||||
import fetch from "node-fetch";
|
||||
import {z} from "zod";
|
||||
|
||||
const $ = load(readFileSync('list.html'));
|
||||
const classes = $('#scheduleListView').children('.listViewWrapper');
|
||||
|
||||
const name_pattern = /(.+) (\d{4}) Section (\d+)/;
|
||||
|
||||
const subject_schema = z.object({
|
||||
code: z.string(),
|
||||
description: z.string(),
|
||||
description: z.string().transform((v) => decode(v, {level: 'all'})),
|
||||
});
|
||||
|
||||
type Subject = z.infer<typeof subject_schema>;
|
||||
|
||||
|
||||
const subjects = await z.array(subject_schema).parseAsync(
|
||||
await fetch(
|
||||
@@ -27,6 +32,8 @@ const subjects = await z.array(subject_schema).parseAsync(
|
||||
})
|
||||
);
|
||||
|
||||
const subject_by_name = new Map(subjects.map((subject) => [subject.description, subject]));
|
||||
|
||||
|
||||
function getOffset(period: string) {
|
||||
switch (period) {
|
||||
@@ -61,9 +68,24 @@ function extractDetails(source: Cheerio<Element>) {
|
||||
const [start_date, end_date] = raw_date.split("--").map((date) => parse(date.trim(), "MM/dd/yyyy", new Date()));
|
||||
|
||||
const identifier = source.find('span.list-view-subj-course-section').text();
|
||||
const name_match = name_pattern.exec(identifier);
|
||||
if (name_match === null) {
|
||||
throw new Error(`Could not parse identifier: ${identifier}`);
|
||||
}
|
||||
|
||||
const [subject_name, code, section] = name_match.slice(1);
|
||||
const subject = subject_by_name.get(subject_name);
|
||||
if (subject === undefined) {
|
||||
throw new Error(`Unknown subject: ${subject_name}`);
|
||||
}
|
||||
|
||||
return {
|
||||
identifier,
|
||||
identifier: {
|
||||
subject,
|
||||
code: code,
|
||||
section: section,
|
||||
crn: null,
|
||||
},
|
||||
date: {
|
||||
start: start_date,
|
||||
end: end_date,
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
"@types/node": "^20.4.9",
|
||||
"cheerio": "^1.0.0-rc.12",
|
||||
"date-fns": "^2.30.0",
|
||||
"html-entities": "^2.4.0",
|
||||
"ics": "^3.2.0",
|
||||
"node-fetch": "^3.3.2",
|
||||
"ts-node": "^10.9.1",
|
||||
|
||||
@@ -192,6 +192,11 @@ formdata-polyfill@^4.0.10:
|
||||
dependencies:
|
||||
fetch-blob "^3.1.2"
|
||||
|
||||
html-entities@^2.4.0:
|
||||
version "2.4.0"
|
||||
resolved "https://registry.yarnpkg.com/html-entities/-/html-entities-2.4.0.tgz#edd0cee70402584c8c76cc2c0556db09d1f45061"
|
||||
integrity sha512-igBTJcNNNhvZFRtm8uA6xMY6xYleeDwn3PeBCkDz7tHttv4F2hsDI2aPgNERWzvRcNYHNT3ymRaQzllmXj4YsQ==
|
||||
|
||||
htmlparser2@^8.0.1:
|
||||
version "8.0.2"
|
||||
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-8.0.2.tgz#f002151705b383e62433b5cf466f5b716edaec21"
|
||||
|
||||
Reference in New Issue
Block a user