From dfcd0a0ef232ccc8eb0c8c80cdc5427de4e3d979 Mon Sep 17 00:00:00 2001 From: Xevion Date: Fri, 11 Aug 2023 03:23:53 -0500 Subject: [PATCH] Work on parsing/traversing HTML tree --- index.ts | 51 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 7 deletions(-) diff --git a/index.ts b/index.ts index f4e9cf3..bd8122f 100644 --- a/index.ts +++ b/index.ts @@ -1,13 +1,50 @@ -import * as cheerio from "cheerio"; +import {Cheerio, Element, load} from "cheerio"; import * as fs from "fs"; -const $ = cheerio.load(fs.readFileSync('list.html')); -const classes = $('#scheduleListView').children('.listViewWrapper').toArray(); +const $ = load(fs.readFileSync('list.html')); +const classes = $('#scheduleListView').children('.listViewWrapper'); -const getClassName = (html) => { - console.log(html); - return html.find('list-view-course-title').text(); + +function extractDetails(source: Cheerio) { + source.find('div.list-view-crn-info-div > span.bold').toArray().forEach((descriptor) => { + const descriptor_element = $(descriptor); + + // console.log([descriptor_element.text(), descriptor_element.next().text()]) + }) + + // console.log( + // source.find('div.listViewMeetingInformation').children('span').last().text() + // ) + + // For some reason this div divides the information into two parts. + const div_information_divier = source.find('div.list-view-pillbox.ui-pillbox'); + const timing_information = div_information_divier.next(); + + const raw_time = timing_information.text(); + const match = raw_time.match(/(\d{2})\s*:\s*(\d{2})\s*(AM|PM)\D*(\d{2})\s*:\s*(\d{2})\s*(AM|PM)/) + if (match === null) { + throw new Error(`Could not parse time: ${raw_time}`); + } + + const [start_hour, start_minute, end_hour, end_minute] = [1, 2, 4, 5].map((index) => parseInt(match[index])); + const [start_period, end_period] = [match[3], match[6]]; + + return { + start: {minute: start_minute, hour: start_hour, period: start_period}, + end: {minute: end_minute, hour: end_hour, period: end_period}, + days: source.find('div.ui-pillbox-summary.screen-reader').text().split(",").map((day) => day.trim()), + name: source.find('span.list-view-course-title > a.section-details-link').text().trim() + }; } console.log(`${classes.length} classes identified.`); -console.log(`First class: ${getClassName(classes[0])}`); \ No newline at end of file + +classes.toArray().forEach((element) => { + console.log(extractDetails( + $(element) + )) +}); + +// console.log(classes.toArray().map((element) => element.attribs.class)) + +// console.log(`First class: ${extractDetails(classes)}`); \ No newline at end of file