175 Commits

Author SHA1 Message Date
47132e71d7 chore(master): release 0.6.1 (#1) 2026-01-31 00:37:13 -06:00
87db1a4ccb refactor: extract Justfile inline scripts into scripts/ directory
Move all [script("bun")] blocks into standalone TypeScript files under
scripts/ with shared utilities in scripts/lib/. The Justfile is now ~40
lines of thin `bun scripts/*.ts` wrappers.

Shared code consolidated into two lib files:
- lib/proc.ts: process spawning (run, spawnCollect, raceInOrder, ProcessGroup)
- lib/fmt.ts: color output, elapsed timers, reusable flag parser
2026-01-31 00:34:27 -06:00
e203e8e182 feat(build): auto-regenerate TypeScript bindings on source changes 2026-01-31 00:27:27 -06:00
cbb0a51bca refactor(terms): move term formatting from frontend to backend 2026-01-31 00:26:41 -06:00
c533768362 feat(scraper): improve results visibility and loading states 2026-01-30 23:36:23 -06:00
16039e02a9 fix(metrics): always emit baseline metrics on initial course insertion 2026-01-30 23:32:04 -06:00
7d2255a988 fix(data): decode HTML entities in course titles and instructor names 2026-01-30 23:31:05 -06:00
8bfc14e55c feat(course): distinguish async from synchronous online courses
Add logic to detect and label asynchronous online sections (INT building
with TBA times) separately from synchronous online courses. Update table
rendering to show "Async" instead of "TBA" for these sections.
2026-01-30 23:27:54 -06:00
2689587dd5 fix: avoid status flickering on subjects table 2026-01-30 22:04:48 -06:00
1ad614dad0 feat(scraper): improve dashboard clarity with stat tooltips 2026-01-30 22:00:59 -06:00
ebb7a97c11 fix(ci): add postgres container service for rust tests
Also updated deprecated codeql action to v4.
2026-01-30 21:36:32 -06:00
2df0ba0ec5 chore: add ts-rs generated bindings 2026-01-30 21:29:32 -06:00
dd148e08a0 fix(ci): fix rust/frontend/security job failures and expand local checks 2026-01-30 21:22:01 -06:00
3494341e3f ci: split quality checks into parallel jobs with security scanning
Reorganize CI pipeline into separate jobs for Rust quality, frontend
quality, tests, Docker build, and security audits. Add cargo-audit,
bun audit, and Trivy filesystem scanning. Allow formatting checks to
pass with warnings on push events while failing on PRs.
2026-01-30 21:08:16 -06:00
acccaa54d4 chore: update frontend packages 2026-01-30 21:07:08 -06:00
6863ee58d0 ci: add Release Please automation for changelog and version management 2026-01-30 21:05:15 -06:00
550401b85c refactor: use friendly term codes in URL query parameters 2026-01-30 20:49:08 -06:00
b02a0738e2 chore: bump to v0.6.0, update roadmap & changelog 2026-01-30 20:37:31 -06:00
5d7d60cd96 fix: prevent session pool deadlock on acquire cancellation
Replace is_creating mutex with atomic flag and RAII guard to ensure
proper cleanup when acquire() futures are cancelled mid-creation,
preventing permanent deadlock for subsequent callers.
2026-01-30 20:19:10 -06:00
1954166db6 feat: add name parsing and normalization for instructor-RMP matching 2026-01-30 20:02:59 -06:00
a2a9116b7a fix: avoid clipping page content 2026-01-30 19:32:05 -06:00
a103f0643a feat: refactor admin instructor UI with component extraction and optimistic updates 2026-01-30 19:31:31 -06:00
474d519b9d feat: add auto-format recovery when formatting is sole check failure
Enhances check recipe to detect when only formatting checks fail while
peers pass, automatically applies formatters, then re-verifies. Supports
both Rust (rustfmt + cargo-check) and web (biome + svelte-check)
domains. Displays results eagerly as checks complete instead of in
original order.
2026-01-30 16:01:56 -06:00
fb27bdc119 feat: implement session expiry extension and 401 recovery 2026-01-30 16:01:17 -06:00
669dec0235 feat: add timeline API with schedule-aware enrollment aggregation
Implements POST /api/timeline endpoint that aggregates enrollment by
subject over 15-minute slots, filtering courses by their actual meeting
times. Includes ISR-style schedule cache with hourly background refresh
using stale-while-revalidate pattern, database indexes for efficient
queries, and frontend refactor to dynamically discover subjects from API.
2026-01-30 10:56:11 -06:00
67ba63339a fix: instructor/course mismatching, build order-independent map for association 2026-01-30 09:53:03 -06:00
7b8c11ac13 feat: add calendar export endpoints for ICS and Google Calendar 2026-01-30 04:08:16 -06:00
a767a3f8be feat: add root error page handling 2026-01-30 04:07:53 -06:00
8ce398c0e0 feat: add scraper analytics dashboard with timeseries and subject monitoring 2026-01-30 03:46:48 -06:00
9fed651641 feat: add adaptive scheduling and scraper admin endpoints
Subjects now have individually calculated scrape intervals based on their
historical change ratio, consecutive zero-change runs, failure counts, and
the current time of day. This reduces unnecessary scrapes during inactive
periods while maintaining responsiveness during peak hours. Includes four
new admin endpoints for monitoring scraper health and scheduling decisions.
2026-01-30 02:14:37 -06:00
75a99c10ea feat: add scrape job result persistence for effectiveness tracking 2026-01-30 01:37:41 -06:00
857ceabcca fix: prevent ts-rs serde warnings 2026-01-30 01:36:57 -06:00
203c337cf0 feat: add confidence-based RMP matching with manual review workflow
Replace simple auto-matching with scored candidate generation that
considers department overlap, name uniqueness, and rating volume.
Candidates above 0.85 auto-accept; others require admin approval.
2026-01-30 01:31:11 -06:00
39ba131322 feat: add mobile touch controls with gesture detection 2026-01-29 23:56:45 -06:00
2fad9c969d fix: avoid title on icon, use simpler href-based login redirect 2026-01-29 23:44:05 -06:00
47b4f3315f feat: enhance login page with FAQ section and improved styling 2026-01-29 23:40:48 -06:00
fa28f13a45 feat: add interactive timeline visualization for class times
Implements a canvas-based timeline view with D3 scales showing class
counts across subjects. Features drag-to-pan, mouse wheel zoom, subject
filtering, hover tooltips, and smooth animations. Timeline auto-follows
current time and supports keyboard navigation.
2026-01-29 23:19:39 -06:00
5a6ea1e53a fix: handle backend startup delays with retry logic in auth 2026-01-29 20:04:50 -06:00
ba2b2fc50a fix: increase Banner API timeouts to handle slow responses 2026-01-29 19:49:57 -06:00
cfe098d193 feat: add websocket support for real-time scrape job monitoring 2026-01-29 19:31:04 -06:00
d861888e5e fix: proper centering for login page content, avoid unnecssary scrollbar 2026-01-29 18:05:50 -06:00
f0645d82d9 refactor: persist audit log state in module scope for cross-navigation caching 2026-01-29 17:54:27 -06:00
7a1cd2a39b refactor: centralize number formatting with locale-aware utility 2026-01-29 17:53:38 -06:00
d2985f98ce feat: enhance audit log with smart diffing, conditional request caching, auto refreshing 2026-01-29 17:35:11 -06:00
b58eb840f3 refactor: consolidate navigation with top nav bar and route groups 2026-01-29 17:01:47 -06:00
2bc6fbdf30 feat: implement relative time feedback and improve tooltip customization 2026-01-29 16:44:06 -06:00
e41b970d6e fix: implement i64 serialization for JavaScript compatibility, fixing avatar URL display 2026-01-29 15:51:19 -06:00
e880126281 feat: implement worker timeout protection and crash recovery for job queue
Add JOB_TIMEOUT constant to fail stuck jobs after 5 minutes, and
LOCK_EXPIRY to reclaim abandoned locks after 10 minutes. Introduce
force_unlock_all to recover orphaned jobs at startup. Fix retry limit
off-by-one error and update deduplication to include locked jobs.
2026-01-29 15:50:09 -06:00
db0ec1e69d feat: add rmp profile links and confidence-aware rating display 2026-01-29 15:43:39 -06:00
2947face06 fix: run frontend build first with -e embed flag in Justfile 2026-01-29 15:00:13 -06:00
36bcc27d7f feat: setup smart page transitions, fix laggy theme-aware element transitions 2026-01-29 14:59:47 -06:00
9e403e5043 refactor: modernize Justfile commands and simplify service management 2026-01-29 14:33:16 -06:00
98a6d978c6 feat: implement course change auditing with time-series metrics endpoint 2026-01-29 14:19:36 -06:00
4deeef2f00 feat: optimize asset delivery with build-time compression and encoding negotiation 2026-01-29 13:56:10 -06:00
e008ee5a12 feat: show search duration and result count feedback 2026-01-29 13:15:25 -06:00
a007ccb6a2 fix: remove out:fade transition from CourseTable 2026-01-29 13:08:45 -06:00
527cbebc6a feat: implement user authentication system with admin dashboard 2026-01-29 12:56:51 -06:00
4207783cdd docs: add changelog entries and roadmap completion tracking 2026-01-29 12:27:46 -06:00
c90bd740de refactor: consolidate query logic and eliminate N+1 instructor loads 2026-01-29 12:03:06 -06:00
61f8bd9de7 refactor: consolidate menu snippets and strengthen type safety
Replaces duplicated dropdown/context menu code with parameterized snippet,
eliminates unsafe type casts, adds error handling for clipboard and API
calls, and improves accessibility annotations.
2026-01-29 11:40:55 -06:00
b5eaedc9bc feat: add delivery mode indicators and tooltips to location column 2026-01-29 11:32:35 -06:00
58475c8673 feat: add page selector dropdown with animated pagination controls
Replace Previous/Next buttons with 5-slot page navigation centered in
pagination bar. Current page becomes a dropdown trigger allowing direct
page jumps. Side slots animate on page transitions.
2026-01-29 11:31:55 -06:00
78159707e2 feat: table FLIP animations, improved time tooltip details & day abbreviations 2026-01-29 03:40:40 -06:00
779144a4d5 feat: implement smart name abbreviation for instructor display 2026-01-29 03:14:55 -06:00
0da2e810fe feat: add multi-select subject filtering with searchable comboboxes 2026-01-29 03:03:21 -06:00
ed72ac6bff refactor: extract reusable SimpleTooltip component and enhance UI hints 2026-01-29 01:37:04 -06:00
57b5cafb27 feat: enhance table scrolling and eliminate initial theme flash 2026-01-29 01:18:02 -06:00
841191c44d feat: integrate OverlayScrollbars with theme-aware styling 2026-01-29 01:05:19 -06:00
67d7c81ef4 feat: implement interactive data table with sorting and column control
Replaces static course table with TanStack Table featuring sortable
columns, column visibility management, and server-side sort handling.
Adds reusable data-table primitives adapted for Svelte 5 runes.
2026-01-29 01:04:18 -06:00
d108a41f91 feat: sync RMP professor ratings and display in course search interface 2026-01-29 00:26:40 -06:00
5fab8c216a feat: add course search UI with ts-rs type bindings
Integrate ts-rs for Rust-to-TypeScript type generation, build course
search page with filters, pagination, and expandable detail rows,
and refactor theme toggle into a reactive store with view transition
animation.
2026-01-28 22:11:17 -06:00
15256ff91c docs: replace feature wishlist with organized roadmap and changelog 2026-01-28 21:07:10 -06:00
6df4303bd6 feat: implement comprehensive course data model with reference cache and search 2026-01-28 21:06:53 -06:00
e3b855b956 refactor: migrate frontend from React to SvelteKit 2026-01-28 21:06:53 -06:00
0ce0257fdc chore: reduce tracing verbosity and normalize log levels 2026-01-28 20:13:53 -06:00
fa2fc45aa9 refactor: extract theme toggle styles to CSS and improve timeout handling 2026-01-28 19:47:24 -06:00
7cc8267c2e feat: implement real-time service status tracking and health reporting 2026-01-28 18:37:42 -06:00
1733ee5f86 feat: extract database operations module and add extensive test suite 2026-01-28 17:32:27 -06:00
992263205c refactor: consolidate types, remove dead code, and fix minor bugs
Replace DayOfWeek with chrono::Weekday via extension traits, unify
RateLimitConfig into the config module, and remove the unused time
command, BannerState, and ClassDetails stub. Fix open_only query
parameter to respect false values and correct 12-hour time display.
2026-01-28 16:31:11 -06:00
37942378ae test: add comprehensive unit tests for query builder, CLI args, and config parsing 2026-01-28 14:29:03 -06:00
c445190838 chore: update frontend dependencies to latest versions 2026-01-28 14:24:18 -06:00
57a6a9871f feat: add conditional asset embedding with dev/prod build separation
- Add embed-assets feature flag to make rust-embed/mime_guess optional
- Update Justfile with backend-dev command for development (no embedded assets)
- Add CORS middleware when assets not embedded (for Vite proxy)
- Improve db recipe with Bun script supporting start/reset/rm commands
2026-01-28 13:31:00 -06:00
Ryan Walters
966732a6d2 feat: modernize build tooling and add CI/CD workflow
Switch to Bun for 2-5x faster frontend builds, implement cargo-chef for
reliable Rust dependency caching, and add Biome for fast code
formatting.

Build system improvements:
- Replace pnpm with Bun for frontend package management
- Add cargo-chef to Dockerfile for better Rust build layer caching
- Update all commands to use bun instead of pnpm

Developer experience:
- Add comprehensive Justfile commands (format, format-check, db)
- Implement automated PostgreSQL Docker setup with random port
allocation
- Add stricter checks (--deny warnings on clippy, --all-features flag)

Code quality:
- Add Biome formatter for 10-100x faster TypeScript/JavaScript
formatting
- Add GitHub Actions CI/CD workflow for automated checks
- Update .dockerignore with comprehensive exclusions
- Format all code with cargo fmt (Rust) and Biome (TypeScript)

All changes maintain backward compatibility and can be tested
incrementally.
2025-11-18 18:59:03 -06:00
Ryan Walters
3292d35521 build(docker): copy migrations directory to build context
Ensures database migration files are available during the Docker build process.
2025-11-03 12:07:27 -06:00
Ryan Walters
71ac0782d0 feat(json): enhance error context with debug mode detailed reporting
Improve JSON parsing error messages with build-specific behavior:
- Debug builds: Show full parent object context and type mismatch details
- Release builds: Keep minimal snippets to avoid log spam

Add comprehensive test coverage for error handling and path parsing.
2025-11-03 12:04:20 -06:00
Ryan Walters
1c6d2d4b6e perf: implement batch operations and optimize database indexes
Add batch upsert functionality to reduce database round-trips from N to 1 when inserting courses. Create comprehensive database indexes for common query patterns including term/subject lookups, time-series metrics, and job scheduling. Remove redundant indexes and add monitoring guidance for BRIN index effectiveness.
2025-11-03 11:18:42 -06:00
Ryan Walters
51f8256e61 feat: implement comprehensive retry mechanism and improve observability
Add retry tracking to scrape jobs with configurable max retries (default 5), implement
automatic database migrations on startup, and significantly reduce logging noise from
infrastructure layers. Enhanced tracing with structured spans for better debugging while
keeping output readable by suppressing verbose trace logs from rate limiters and session
management. Improved error handling with detailed retry context and proper session cookie
validation.
2025-11-03 10:18:07 -06:00
Ryan Walters
b1ed2434f8 feat: add ESLint configuration and testing infrastructure
Add comprehensive ESLint setup with React and TypeScript support, create basic integration tests for the shutdown utilities, and enhance the Justfile with a new check command that runs all validation steps (cargo check, clippy, tests, and linting).
2025-11-03 02:21:35 -06:00
Ryan Walters
47c23459f1 refactor: implement comprehensive graceful shutdown across all services
Implements graceful shutdown with broadcast channels and proper timeout handling
for scraper workers, scheduler, bot service, and status update tasks. Introduces
centralized shutdown utilities and improves service manager to handle parallel
shutdown with per-service timeouts instead of shared timeout budgets.

Key changes:
- Add utils module with shutdown helper functions
- Update ScraperService to return errors on shutdown failures
- Refactor scheduler with cancellable work tasks and 5s grace period
- Extract worker shutdown logic into helper methods for clarity
- Add broadcast channel shutdown support to BotService and status task
- Improve ServiceManager to shutdown services in parallel with individual timeouts
2025-11-03 02:10:01 -06:00
Ryan Walters
8af9b0a1a2 refactor(scraper): implement graceful shutdown with broadcast channels
Replace task abortion with broadcast-based graceful shutdown for scheduler and workers. Implement cancellation tokens for in-progress work with 5s timeout. Add tokio-util dependency for CancellationToken support. Update ServiceManager to use completion channels and abort handles for better service lifecycle control.
2025-11-03 01:22:12 -06:00
020a00254f chore: improve database pool connection options, tighter thresholds & limits 2025-09-14 12:18:39 -05:00
45de5be60d refactor: redistribute main.rs into new modules for app & service initialization 2025-09-14 12:18:15 -05:00
8384f418c8 refactor: remove unused/dead code, apply allowances to the rest 2025-09-14 01:57:30 -05:00
3dca896a35 feat(web): add 10 second timeout layer 2025-09-14 01:47:52 -05:00
1b7d2d2824 fix: make version retrieval search current dir, add basic logs, existence check 2025-09-13 22:08:48 -05:00
e370008d75 fix: pass RAILWAY_GIT_COMMIT_SHA through Docker, provide Cargo.toml for frontend (version retrieval) 2025-09-13 22:04:44 -05:00
176574343f fix: provide proper theme-based colors to all elements necessary 2025-09-13 21:57:56 -05:00
91899bb109 fix: limit devtools panel to dev mode 2025-09-13 21:52:14 -05:00
08ae54c093 fix: use wildcard COPY for .git directory, use RAILWAY_GIT_COMMIT_SHA as fallback 2025-09-13 21:20:16 -05:00
33b8681b19 chore: use locale-based number formatting 2025-09-13 21:12:13 -05:00
398a1b9474 feat: dark mode with theme toggle button 2025-09-13 21:11:16 -05:00
a732ff9a15 feat: better frontend state implementation, acquire version in frontend build time 2025-09-13 20:29:18 -05:00
bfcd868337 refactor: proper implementation of services status, better styling/appearance/logic 2025-09-13 19:34:34 -05:00
99f0d0bc49 fix: add build.rs and .git dir to Dockerfile COPY build step, add git dependency 2025-09-13 19:09:27 -05:00
8b7729788d chore: replace template properties 2025-09-13 19:02:01 -05:00
27b0cb877e feat: display project version on frontend 2025-09-13 18:58:35 -05:00
8ec2f7d36f chore: bump version to 0.3.2 2025-09-13 18:52:23 -05:00
28a8a15b6b feat: embed git commit into binary, provide link on frontend 2025-09-13 18:51:48 -05:00
19b3a98f66 feat: setup span recording for CustomJsonFormatter, use 'yansi' for better ANSI terminal colors in CustomPrettyFormatter 2025-09-13 18:40:55 -05:00
b64aa41b14 feat: better profile-based router assembly, tracing layer for responses with span-based request paths 2025-09-13 18:03:20 -05:00
64449e8976 feat: setup pretty frontend for system status 2025-09-13 17:49:35 -05:00
2e0fefa5ee feat: implement interval backoff for presence indicator 2025-09-13 16:15:33 -05:00
97488494fb chore: bump version to 0.3.0 2025-09-13 15:52:40 -05:00
b3322636a9 feat: setup frontend build code, tune .dockerignore patterns
also removed diesel.toml
2025-09-13 15:48:25 -05:00
878cc5f773 docs: setup proper documentation, organize & clean README 2025-09-13 15:27:32 -05:00
94fb6b4190 chore: set banner URL default in config, remove old mentions of redis 2025-09-13 14:48:49 -05:00
e3b638a7d8 feat: add ETag & Cache-Control headers, cached hexadecimal hashes via rapidhash 2025-09-13 13:24:54 -05:00
404a52e64c feat: cache mime types for valid assets, use octet-stream content type 2025-09-13 12:37:36 -05:00
a917315967 fix: simplify asset serving, use fallback primarily 2025-09-13 12:23:27 -05:00
9d51fde893 feat: add arguments for enabling/disabling srevices 2025-09-13 12:06:10 -05:00
79fc931077 refactor: remove 'auto' mode, just specify value via constant for better clap visibility 2025-09-13 11:38:43 -05:00
f3861a60c4 chore: add dev-release helper profile into Cargo.toml 2025-09-13 11:34:25 -05:00
26b1a88860 chore: use clippy by default for check command, fix lint 2025-09-13 11:31:09 -05:00
27ac9a7302 feat: add formatter CLI argument, setup asset embedding in release mode 2025-09-13 11:30:57 -05:00
1d345ed247 chore: customize bacon, add 'dev' job 2025-09-13 11:30:23 -05:00
6f831f5fa6 feat: setup web/ for tanstack router frontend 2025-09-13 11:30:11 -05:00
ac2638dd9a feat: implement proper SIGTERM handling for container shutdown 2025-09-13 09:43:47 -05:00
cfb847f2e5 feat: holiday exclusion logic for ICS command 2025-09-13 02:20:27 -05:00
e7d47f1f96 feat: implement ICS command 2025-09-13 01:50:18 -05:00
9a48587479 chore: drop redis 2025-09-13 01:49:47 -05:00
624247ee14 feat: basic activity status 2025-09-13 01:04:46 -05:00
430e2a255b fix: avoid crashing due to odd url parse 2025-09-13 01:01:49 -05:00
bbc78131ec feat: setup recoverable/unrecoverable job error distinction, delete unrecoverable jobs 2025-09-13 00:48:11 -05:00
77ab71d4d5 feat: map RAILWAY_DEPLOYMENT_DRAINING_SECONDS to SHUTDOWN_TIMEOUT 2025-09-13 00:36:11 -05:00
9d720bb0a7 feat: implement common job trait & better interface for scheduler & workers 2025-09-13 00:17:53 -05:00
dcc564dee6 fix: credit_hour_session is optional 2025-09-12 23:50:36 -05:00
4ca55a1fd4 feat: schedule & query jobs efficiently in batches 2025-09-12 23:41:27 -05:00
a6e7adcaef fix: improve json error handling, make email_address optional 2025-09-12 23:36:07 -05:00
752c855dec chore: drop env prefixed config vars 2025-09-12 22:39:32 -05:00
14b02df8f4 feat: much better JSON logging, project-wide logging improvements, better use of debug/trace levels, field attributes 2025-09-12 22:01:14 -05:00
00cb209052 fix: disable poor error snippet 2025-09-12 21:40:07 -05:00
dfc05a2789 feat: setup rate limiter middleware & config 2025-09-12 21:12:06 -05:00
fe798e1867 fix: avoid COPY of non existent dir, add .dockerignore 2025-09-12 20:57:33 -05:00
39688f800f chore: update Dockerfile rust to 1.89.0 2025-09-12 20:53:24 -05:00
b2b4bb67f0 chore: rustfmt 2025-09-12 20:52:07 -05:00
e5d8cec2d6 refactor: reorganize banner api files, fix clippy lints, reformat 2025-09-12 20:50:47 -05:00
e9a0558535 feat: asynchronous, rate limited term session acquisition 2025-09-12 20:35:12 -05:00
353c36bcf2 feat: 'search' example binary 2025-09-12 20:12:41 -05:00
2f853a7de9 feat: middleware headers, fix concurrent session cookies issue, middleware headers, invalid session details 2025-09-12 20:12:12 -05:00
dd212c3239 chore: update dependencies, add sqlx 'macros', add futures, add 'http' (explicit) 2025-09-12 20:11:13 -05:00
8ff3a18c3e feat: Dockerfile 2025-09-01 00:47:26 -05:00
43647096e9 feat: scraper system 2025-09-01 00:46:38 -05:00
1bdbd1d6d6 chore: remove unused dependencies 2025-09-01 00:26:20 -05:00
23be6035ed feat: much better, smarter session acquisition 2025-08-31 15:34:49 -05:00
139e4aa635 feat: translate over to sqlx, remove diesel 2025-08-31 15:34:49 -05:00
677bb05b87 chore: update & sort dependencies, add sqlx, remove 'migrations' 2025-08-29 12:52:46 -05:00
f2bd02c970 chore: add bacon config 2025-08-29 12:10:57 -05:00
8cdf969a53 feat: command logging, explicit builtin command error handler 2025-08-29 12:10:57 -05:00
4764d48ac9 feat: move scraper into separate module, begin building data models 2025-08-29 11:07:46 -05:00
e734e40347 feat: setup diesel & schema, course with metrics/audit tables 2025-08-27 18:57:43 -05:00
c7117f14a3 feat: smart day string, terse refactor and use types properly, work on unimplemented commands lightly, util modules, 2025-08-27 13:46:41 -05:00
cb8a595326 chore: solve lints, improve formatting 2025-08-27 12:43:43 -05:00
ac70306c04 feat: improve logging, solve lints, improve implementations, remove unused code, standardize things 2025-08-27 12:43:43 -05:00
9972357cf6 feat: implement simple web service, improve ServiceManager encapsulation 2025-08-27 11:58:57 -05:00
2ec899cf25 feat: by CRN querying, redis caching, fixed deserialization, gcal integration 2025-08-27 11:12:08 -05:00
ede064be87 feat: add current term identification, term point state machine 2025-08-27 02:36:59 -05:00
a17bcf0247 fix: broken recurrence, enhanced handling, simpler/terse form 2025-08-27 02:36:59 -05:00
c529bf9727 feat: sort meeting times in gcal command 2025-08-27 00:23:38 -05:00
5ace08327d refactor: clean up MeetingScheduleInfo methods and enhance Term season handling 2025-08-27 00:12:15 -05:00
a01a30d047 feat: continue work on gcal, better meetings schedule types 2025-08-26 23:57:06 -05:00
31ab29c2f1 feat!: first pass re-implementation of banner, gcal command 2025-08-26 21:40:18 -05:00
5018ad0d31 chore: remove dummy service 2025-08-26 19:16:26 -05:00
87100a57d5 feat: service manager for coordination, configureable smart graceful shutdown timeout 2025-08-26 19:16:26 -05:00
cff672b30a feat: use anyhow, refactor services & coordinator out of main.rs 2025-08-26 19:16:26 -05:00
d4c55a3fd8 feat!: begin rust rewrite
service scheduling, configs, all dependencies, tracing, graceful
shutdown, concurrency
2025-08-26 19:16:26 -05:00
251 changed files with 36591 additions and 3792 deletions
+11
View File
@@ -0,0 +1,11 @@
# cargo-audit configuration
# https://github.com/rustsec/rustsec/tree/main/cargo-audit
[advisories]
# Transitive dependencies we can't control
ignore = [
# rsa: Marvin Attack timing sidechannel (via sqlx-mysql, no fix available)
"RUSTSEC-2023-0071",
# derivative: unmaintained (via poise)
"RUSTSEC-2024-0388",
]
+2
View File
@@ -0,0 +1,2 @@
[env]
TS_RS_EXPORT_DIR = { value = "web/src/lib/bindings/", relative = true }
+51
View File
@@ -0,0 +1,51 @@
# Build artifacts
target/
**/target/
# Documentation
README.md
docs/
*.md
# Old Go codebase
go/
# Development configuration
bacon.toml
.env
.env.*
!.env.example
# CI/CD
.github/
.git/
# Development tools
Justfile
rust-toolchain.toml
# Frontend build artifacts and cache
web/node_modules/
web/dist/
web/.vite/
web/.tanstack/
web/.vscode/
# IDE and editor files
.vscode/
.idea/
*.swp
*.swo
*~
# OS files
.DS_Store
Thumbs.db
# Test coverage
coverage/
*.profdata
*.profraw
# SQLx offline mode (include this in builds)
!.sqlx/
+34
View File
@@ -0,0 +1,34 @@
{
"$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json",
"changelog-sections": [
{ "type": "feat", "section": "Features" },
{ "type": "fix", "section": "Bug Fixes" },
{ "type": "perf", "section": "Performance Improvements" },
{ "type": "refactor", "section": "Code Refactoring" },
{ "type": "docs", "section": "Documentation" },
{ "type": "ci", "section": "Continuous Integration" },
{ "type": "build", "section": "Build System" },
{ "type": "chore", "section": "Miscellaneous" },
{ "type": "style", "section": "Styles", "hidden": true },
{ "type": "test", "section": "Tests", "hidden": true }
],
"bump-minor-pre-major": true,
"always-update": true,
"bump-patch-for-minor-pre-major": true,
"include-v-in-tag": true,
"include-component-in-tag": false,
"plugins": ["sentence-case"],
"packages": {
".": {
"release-type": "rust",
"exclude-paths": [".vscode", "docs"],
"extra-files": [
{
"type": "toml",
"path": "Cargo.lock",
"jsonpath": "$.package[?(@.name=='banner')].version"
}
]
}
}
}
+3
View File
@@ -0,0 +1,3 @@
{
".": "0.6.1"
}
+185
View File
@@ -0,0 +1,185 @@
name: CI
on:
push:
branches: [master]
pull_request:
branches: [master]
env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1
jobs:
rust-quality:
name: Rust Quality
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable
with:
components: rustfmt, clippy
- name: Cache Rust dependencies
uses: Swatinem/rust-cache@v2
with:
cache-on-failure: true
- name: Check formatting
run: |
if [ "${{ github.event_name }}" = "pull_request" ]; then
cargo fmt --all -- --check
else
cargo fmt --all -- --check || echo "::warning::Rust formatting issues found (not failing on push)"
fi
- name: Clippy
run: cargo clippy --no-default-features -- -D warnings
frontend-quality:
name: Frontend Quality
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Bun
uses: oven-sh/setup-bun@v2
with:
bun-version: latest
- name: Install dependencies
working-directory: web
run: bun install --frozen-lockfile
- name: Check formatting
working-directory: web
run: |
if [ "${{ github.event_name }}" = "pull_request" ]; then
bun run format:check
else
bun run format:check || echo "::warning::Frontend formatting issues found (not failing on push)"
fi
- name: Lint
working-directory: web
run: bun run lint
- name: Type check
working-directory: web
run: bun run typecheck
rust-tests:
name: Rust Tests
runs-on: ubuntu-latest
services:
postgres:
image: postgres:17-alpine
env:
POSTGRES_USER: banner
POSTGRES_PASSWORD: banner
POSTGRES_DB: banner
ports:
- 5432:5432
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
env:
DATABASE_URL: postgresql://banner:banner@localhost:5432/banner
steps:
- uses: actions/checkout@v4
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable
- name: Cache Rust dependencies
uses: Swatinem/rust-cache@v2
with:
cache-on-failure: true
- name: Run tests
run: cargo test --no-default-features
frontend-tests:
name: Frontend Tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Bun
uses: oven-sh/setup-bun@v2
with:
bun-version: latest
- name: Install dependencies
working-directory: web
run: bun install --frozen-lockfile
- name: Run tests
working-directory: web
run: bun run test
docker-build:
name: Docker Build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build image
uses: docker/build-push-action@v6
with:
context: .
push: false
cache-from: type=gha
cache-to: type=gha,mode=max
security:
name: Security Scan
runs-on: ubuntu-latest
permissions:
contents: read
security-events: write
steps:
- uses: actions/checkout@v4
- name: Install cargo-audit
uses: taiki-e/install-action@cargo-audit
- name: Rust security audit
run: cargo audit
- name: Setup Bun
uses: oven-sh/setup-bun@v2
with:
bun-version: latest
- name: Install frontend dependencies
working-directory: web
run: bun install --frozen-lockfile
- name: Frontend security audit
working-directory: web
run: bun audit --audit-level=moderate
continue-on-error: true
- name: Trivy filesystem scan
uses: aquasecurity/trivy-action@master
with:
scan-type: fs
scan-ref: .
format: sarif
output: trivy-results.sarif
severity: CRITICAL,HIGH
exit-code: 0
- name: Upload Trivy results
uses: github/codeql-action/upload-sarif@v4
if: always() && hashFiles('trivy-results.sarif') != ''
with:
sarif_file: trivy-results.sarif
+27
View File
@@ -0,0 +1,27 @@
name: Release Please
on:
workflow_dispatch:
workflow_run:
workflows: ["CI"]
types:
- completed
branches:
- master
permissions:
contents: write
pull-requests: write
jobs:
release-please:
name: Create Release PR
runs-on: ubuntu-latest
if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }}
steps:
- uses: googleapis/release-please-action@v4
with:
token: ${{ secrets.RELEASE_PLEASE_TOKEN }}
config-file: .github/release-please-config.json
manifest-file: .github/release-please-manifest.json
Vendored
+4 -9
View File
@@ -1,10 +1,5 @@
.env
cover.cov
/banner
.*.go
dumps/
js/
.vscode/
*.prof
.task/
bin/
/target
/scripts/node_modules
+3
View File
@@ -0,0 +1,3 @@
{
"rust-analyzer.check.command": "clippy"
}
+141
View File
@@ -0,0 +1,141 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
## [0.6.1](https://github.com/Xevion/Banner/compare/v0.6.0...v0.6.1) (2026-01-31)
### Features
* **build:** Auto-regenerate TypeScript bindings on source changes ([e203e8e](https://github.com/Xevion/Banner/commit/e203e8e182f7a0b0224a8f9e6bf79d15259215a2))
* **course:** Distinguish async from synchronous online courses ([8bfc14e](https://github.com/Xevion/Banner/commit/8bfc14e55c1bdf5acc2006096476e0b1eb1b7cc6))
* **scraper:** Improve dashboard clarity with stat tooltips ([1ad614d](https://github.com/Xevion/Banner/commit/1ad614dad03d3631a8d119203786718c814e72c7))
* **scraper:** Improve results visibility and loading states ([c533768](https://github.com/Xevion/Banner/commit/c53376836238f3aca92ac82cd5fd59a077bcceff))
### Bug Fixes
* Avoid status flickering on subjects table ([2689587](https://github.com/Xevion/Banner/commit/2689587dd53c572a65eeb91f74c737662e1f148b))
* **ci:** Add postgres container service for rust tests ([ebb7a97](https://github.com/Xevion/Banner/commit/ebb7a97c113fa1d4b61b8637dfe97cae5260075c))
* **ci:** Fix rust/frontend/security job failures and expand local checks ([dd148e0](https://github.com/Xevion/Banner/commit/dd148e08a0b6d5b7afe4ff614d7d6e4e4d0dfce6))
* **data:** Decode HTML entities in course titles and instructor names ([7d2255a](https://github.com/Xevion/Banner/commit/7d2255a988a23f6e1b1c8e7cb5a8ead833ad34da))
* **metrics:** Always emit baseline metrics on initial course insertion ([16039e0](https://github.com/Xevion/Banner/commit/16039e02a999c668d4969a43eb9ed1d4e8d370e1))
### Code Refactoring
* **terms:** Move term formatting from frontend to backend ([cbb0a51](https://github.com/Xevion/Banner/commit/cbb0a51bca9e4e0d6a8fcee90465c93943f2a30e))
* Use friendly term codes in URL query parameters ([550401b](https://github.com/Xevion/Banner/commit/550401b85ceb8a447e316209b479c69062c5b658))
### Continuous Integration
* Add Release Please automation for changelog and version management ([6863ee5](https://github.com/Xevion/Banner/commit/6863ee58d0a5778303af1b7626b2a9eda3043ca0))
* Split quality checks into parallel jobs with security scanning ([3494341](https://github.com/Xevion/Banner/commit/3494341e3fbe9ffd96b6fcd8abbe7f95ecec6f45))
### Miscellaneous
* Add ts-rs generated bindings ([2df0ba0](https://github.com/Xevion/Banner/commit/2df0ba0ec58155d73830a66132cb635dc819e8a9))
* Update frontend packages ([acccaa5](https://github.com/Xevion/Banner/commit/acccaa54d4455500db60d1b6437cad1c592445f1))
## [Unreleased]
## [0.6.0] - 2026-01-30
### Added
- User authentication system with Discord OAuth, sessions, admin roles, and login page with FAQ.
- Interactive timeline visualization with D3 canvas, pan/zoom, touch gestures, and enrollment aggregation API.
- Scraper analytics dashboard with timeseries charts, subject monitoring, and per-subject detail views.
- Adaptive scraper scheduling with admin endpoints for monitoring and configuration.
- Scrape job result persistence for effectiveness tracking.
- WebSocket support for real-time scrape job monitoring with connection status indicators.
- Course change auditing with field-level tracking and time-series metrics endpoint.
- Audit log UI with smart JSON diffing, conditional request caching, and auto-refresh.
- Calendar export web endpoints for ICS download and Google Calendar redirect.
- Confidence-based RMP matching with manual review workflow and admin instructor UI.
- RMP profile links and confidence-aware rating display.
- Name parsing and normalization for improved instructor-RMP matching.
- Mobile touch controls with gesture detection for timeline.
- Worker timeout protection and crash recovery for job queue.
- Build-time asset compression with encoding negotiation (gzip, brotli, zstd).
- Smart page transitions with theme-aware element transitions.
- Search duration and result count feedback.
- Root error page handling.
- Login page with FAQ section and improved styling.
### Changed
- Consolidated navigation with top nav bar and route groups.
- Centralized number formatting with locale-aware utility.
- Modernized Justfile commands and simplified service management.
- Persisted audit log state in module scope for cross-navigation caching.
- Relative time feedback and improved tooltip customization.
### Fixed
- Instructor/course mismatching via build-order-independent map for association.
- Page content clipping.
- Backend startup delays with retry logic in auth.
- Banner API timeouts increased to handle slow responses.
- i64 serialization for JavaScript compatibility, fixing avatar URL display.
- Frontend build ordering with `-e` embed flag in Justfile.
- Login page centering and unnecessary scrollbar.
- ts-rs serde warnings.
## [0.5.0] - 2026-01-29
### Added
- Multi-select subject filtering with searchable comboboxes.
- Smart instructor name abbreviation for compact table display.
- Delivery mode indicators and tooltips in location column.
- Page selector dropdown with animated pagination controls.
- FLIP animations for smooth table row transitions during pagination.
- Time tooltip with detailed meeting schedule and day abbreviations.
- Reusable SimpleTooltip component for consistent UI hints.
### Changed
- Consolidated query logic and eliminated N+1 instructor loads via batch fetching.
- Consolidated menu snippets and strengthened component type safety.
- Enhanced table scrolling with OverlayScrollbars and theme-aware styling.
- Eliminated initial theme flash on page load.
## [0.4.0] - 2026-01-28
### Added
- Web-based course search UI with interactive data table, multi-column sorting, and column visibility controls.
- TypeScript type bindings generated from Rust types via ts-rs.
- RateMyProfessors integration: bulk professor sync via GraphQL and inline rating display in search results.
- Course detail expansion panel with enrollment, meeting times, and instructor info.
- OverlayScrollbars integration for styled, theme-aware scrollable areas.
- Pagination component for navigating large search result sets.
- Footer component with version display.
- API endpoints: `/api/courses/search`, `/api/courses/:term/:crn`, `/api/terms`, `/api/subjects`, `/api/reference/:category`.
- Frontend API client with typed request/response handling and test coverage.
- Course formatting utilities with comprehensive unit tests.
## [0.3.4] - 2026-01
### Added
- Live service status tracking on web dashboard with auto-refresh and health indicators.
- DB operation extraction for improved testability.
- Unit test suite foundation covering core functionality.
- Docker support for PostgreSQL development environment.
- ICS calendar export with comprehensive holiday exclusion coverage.
- Google Calendar link generation with recurrence rules and meeting details.
- Job queue with priority-based scheduling for background scraping.
- Rate limiting with burst allowance for Banner API requests.
- Session management and caching for Banner API interactions.
- Discord bot commands: search, terms, ics, gcal.
- Intelligent scraping system with priority queues and retry tracking.
### Changed
- Type consolidation and dead code removal across the codebase.
Generated
+4640
View File
File diff suppressed because it is too large Load Diff
+71
View File
@@ -0,0 +1,71 @@
[package]
name = "banner"
version = "0.6.1"
edition = "2024"
default-run = "banner"
[features]
default = ["embed-assets"]
embed-assets = ["dep:rust-embed", "dep:mime_guess"]
[dependencies]
anyhow = "1.0.99"
async-trait = "0.1"
axum = { version = "0.8.4", features = ["ws"] }
bitflags = { version = "2.9.4", features = ["serde"] }
chrono = { version = "0.4.42", features = ["serde"] }
compile-time = "0.2.0"
cookie = "0.18.1"
dashmap = "6.1.0"
dotenvy = "0.15.7"
figment = { version = "0.10.19", features = ["toml", "env"] }
fundu = "2.0.1"
futures = "0.3"
http = "1.3.1"
poise = "0.6.1"
rand = "0.9.2"
regex = "1.10"
reqwest = { version = "0.12.23", features = ["json", "cookies"] }
reqwest-middleware = { version = "0.4.2", features = ["json"] }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.143"
serenity = { version = "0.12.4", features = ["rustls_backend"] }
sqlx = { version = "0.8.6", features = [
"runtime-tokio-rustls",
"postgres",
"chrono",
"json",
"macros",
"migrate",
] }
thiserror = "2.0.16"
time = "0.3.43"
tokio = { version = "1.47.1", features = ["full"] }
tokio-util = "0.7"
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.20", features = ["env-filter", "json"] }
url = "2.5"
governor = "0.10.1"
serde_path_to_error = "0.1.17"
num-format = "0.4.4"
tower-http = { version = "0.6.0", features = ["cors", "trace", "timeout", "compression-full"] }
rust-embed = { version = "8.0", features = ["include-exclude"], optional = true }
mime_guess = { version = "2.0", optional = true }
clap = { version = "4.5", features = ["derive"] }
rapidhash = "4.1.0"
yansi = "1.0.1"
extension-traits = "2"
ts-rs = { version = "11.1.0", features = ["chrono-impl", "serde-compat", "serde-json-impl", "no-serde-warnings"] }
html-escape = "0.2.13"
axum-extra = { version = "0.12.5", features = ["query"] }
urlencoding = "2.1.3"
chrono-tz = "0.10.4"
htmlize = { version = "1.0.6", features = ["unescape"] }
unicode-normalization = "0.1.25"
[dev-dependencies]
# A 'release mode' profile that compiles quickly, but still 'appears' like a release build, useful for debugging
[profile.dev-release]
inherits = "dev"
debug-assertions = false
+119
View File
@@ -0,0 +1,119 @@
# Build arguments
ARG RUST_VERSION=1.89.0
ARG RAILWAY_GIT_COMMIT_SHA
# --- Frontend Build Stage ---
FROM oven/bun:1 AS frontend-builder
WORKDIR /app
# Install zstd for pre-compression
RUN apt-get update && apt-get install -y --no-install-recommends zstd && rm -rf /var/lib/apt/lists/*
# Copy backend Cargo.toml for build-time version retrieval
COPY ./Cargo.toml ./
# Copy frontend package files
COPY ./web/package.json ./web/bun.lock* ./
# Install dependencies
RUN bun install --frozen-lockfile
# Copy frontend source code
COPY ./web ./
# Build frontend, then pre-compress static assets (gzip, brotli, zstd)
RUN bun run build && bun run scripts/compress-assets.ts
# --- Chef Base Stage ---
FROM lukemathwalker/cargo-chef:latest-rust-${RUST_VERSION} AS chef
WORKDIR /app
# --- Planner Stage ---
FROM chef AS planner
COPY Cargo.toml Cargo.lock ./
COPY build.rs ./
COPY src ./src
# Migrations & .sqlx specifically left out to avoid invalidating cache
RUN cargo chef prepare --recipe-path recipe.json --bin banner
# --- Rust Build Stage ---
FROM chef AS builder
# Set build-time environment variable for Railway Git commit SHA
ARG RAILWAY_GIT_COMMIT_SHA
ENV RAILWAY_GIT_COMMIT_SHA=${RAILWAY_GIT_COMMIT_SHA}
# Copy recipe from planner and build dependencies only
COPY --from=planner /app/recipe.json recipe.json
RUN cargo chef cook --release --recipe-path recipe.json --bin banner
# Install build dependencies for final compilation
RUN apt-get update && apt-get install -y \
pkg-config \
libssl-dev \
git \
&& rm -rf /var/lib/apt/lists/*
# Copy source code and built frontend assets
COPY Cargo.toml Cargo.lock ./
COPY build.rs ./
COPY .git* ./
COPY src ./src
COPY migrations ./migrations
COPY --from=frontend-builder /app/dist ./web/dist
# Build web app with embedded assets
RUN cargo build --release --bin banner
# Strip the binary to reduce size
RUN strip target/release/banner
# --- Runtime Stage ---
FROM debian:12-slim
ARG APP=/usr/src/app
ARG APP_USER=appuser
ARG UID=1000
ARG GID=1000
# Install runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
tzdata \
wget \
&& rm -rf /var/lib/apt/lists/*
ARG TZ=Etc/UTC
ENV TZ=${TZ}
# Create user with specific UID/GID
RUN addgroup --gid $GID $APP_USER \
&& adduser --uid $UID --disabled-password --gecos "" --ingroup $APP_USER $APP_USER \
&& mkdir -p ${APP}
# Copy application binary
COPY --from=builder --chown=$APP_USER:$APP_USER /app/target/release/banner ${APP}/banner
# Set proper permissions
RUN chmod +x ${APP}/banner
USER $APP_USER
WORKDIR ${APP}
# Build-time arg for PORT, default to 8000
ARG PORT=8000
# Runtime environment var for PORT, default to build-time arg
ENV PORT=${PORT}
EXPOSE ${PORT}
# Add health check
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD wget --no-verbose --tries=1 --spider http://localhost:${PORT}/health || exit 1
# Can be explicitly overriden with different hosts & ports
ENV HOSTS=0.0.0.0,[::]
# Implicitly uses PORT environment variable
# Runs all services: web, bot, and scraper
CMD ["sh", "-c", "exec ./banner"]
+46
View File
@@ -0,0 +1,46 @@
set dotenv-load
default:
just --list
# Run all checks in parallel. Pass -f/--fix to auto-format and fix first.
check *flags:
bun scripts/check.ts {{flags}}
# Format all Rust and TypeScript code
format:
cargo fmt --all
bun run --cwd web format
# Run tests. Usage: just test [rust|web|<nextest filter args>]
test *args:
bun scripts/test.ts {{args}}
# Generate TypeScript bindings from Rust types (ts-rs)
bindings:
bun scripts/bindings.ts
# Run the Banner API search demo (hits live UTSA API, ~20s)
search *ARGS:
cargo run -q --bin search -- {{ARGS}}
# Dev server. Flags: -f(rontend) -b(ackend) -W(no-watch) -n(o-build) -r(elease) -e(mbed) --tracing <fmt>
# Pass args to binary after --: just dev -n -- --some-flag
dev *flags:
bun scripts/dev.ts {{flags}}
# Production build. Flags: -d(ebug) -f(rontend-only) -b(ackend-only)
build *flags:
bun scripts/build.ts {{flags}}
# Start PostgreSQL in Docker and update .env with connection string
# Commands: start (default), reset, rm
db cmd="start":
bun scripts/db.ts {{cmd}}
alias b := bun
bun *ARGS:
cd web && bun {{ ARGS }}
sql *ARGS:
lazysql ${DATABASE_URL}
+33 -108
View File
@@ -1,125 +1,50 @@
# banner
A discord bot for executing queries & searches on the Ellucian Banner instance hosting all of UTSA's class data.
A complex multi-service system providing a Discord bot and browser-based interface to UTSA's course data.
## Feature Wishlist
## Services
- Commands
- ICS Download (get a ICS download of your classes with location & timing perfectly - set for every class you're in)
- Classes Now (find classes happening)
- Autocomplete
- Class Title
- Course Number
- Term/Part of Term
- Professor
- Attribute
- Component Pagination
- RateMyProfessor Integration (Linked/Embedded)
- Smart term selection (i.e. Summer 2024 will be selected automatically when opened)
- Rate Limiting (bursting with global/user limits)
- DMs Integration (allow usage of the bot in DMs)
- Class Change Notifications (get notified when details about a class change)
- Multi-term Querying (currently the backend for searching is kinda weird)
- Full Autocomplete for Every Search Option
- Metrics, Log Query, Privileged Error Feedback
- Search for Classes
- Major, Professor, Location, Name, Time of Day
- Subscribe to Classes
- Availability (seat, pre-seat)
- Waitlist Movement
- Detail Changes (meta, time, location, seats, professor)
- `time` Start, End, Days of Week
- `seats` Any change in seat/waitlist data
- `meta`
- Lookup via Course Reference Number (CRN)
- Smart Time of Day Handling
- "2 PM" -> Start within 2:00 PM to 2:59 PM
- "2-3 PM" -> Start within 2:00 PM to 3:59 PM
- "ends by 2 PM" -> Ends within 12:00 AM to 2:00 PM
- "after 2 PM" -> Start within 2:01 PM to 11:59 PM
- "before 2 PM" -> Ends within 12:00 AM to 1:59 PM
- Get By Section Command
- CS 4393 001 =>
- Will require SQL to be able to search for a class by its section number
The application consists of three modular services that can be run independently or together:
## Analysis Required
- Discord Bot ([`bot`][src-bot])
Some of the features and architecture of Ellucian's Banner system are not clear.
The follow features, JSON, and more require validation & analysis:
- Primary interface for course monitoring and data queries
- Built with [Serenity][serenity] and [Poise][poise] frameworks for robust command handling
- Uses slash commands with comprehensive error handling and logging
- Struct Nullability
- Much of the responses provided by Ellucian contain nulls, and most of them are uncertain as to when and why they're null.
- Analysis must be conducted to be sure of when to use a string and when it should nillable (pointer).
- Multiple Professors / Primary Indicator
- Multiple Meeting Times
- Meeting Schedule Types
- AFF vs AIN vs AHB etc.
- Do CRNs repeat between years?
- Check whether partOfTerm is always filled in, and it's meaning for various class results.
- Check which API calls are affected by change in term/sessionID term select
- SessionIDs
- How long does a session ID work?
- Do I really require a separate one per term?
- How many can I activate, are there any restrictions?
- How should session IDs be checked as 'invalid'?
- What action(s) keep a session ID 'active', if any?
- Are there any courses with multiple meeting times?
- Google Calendar link generation, as an alternative to ICS file generation
- Web Server ([`web`][src-web])
## Change Identification
- [Axum][axum]-based server with Vite/React-based frontend
- [Embeds static assets][rust-embed] at compile time with E-Tags & Cache-Control headers
- Important attributes of a class will be parsed on both the old and new data.
- These attributes will be compared and given identifiers that can be subscribed to.
- When a user subscribes to one of these identifiers, any changes identified will be sent to the user.
- Scraper ([`scraper`][src-scraper])
## Real-time Suggestions
- Intelligent data collection system with priority-based queuing inside PostgreSQL via [`sqlx`][sqlx]
- Rate-limited scraping with burst handling to respect UTSA's systems
- Handles course data updates, availability changes, and metadata synchronization
Various commands arguments have the ability to have suggestions appear.
## Quick Start
- They must be fast. As ephemeral suggestions that are only relevant for seconds or less, they need to be delivered in less than a second.
- They need to be easy to acquire. With as many commands & arguments to search as I do, it is paramount that the API be easy to understand & use.
- It cannot be complicated. I only have so much time to develop this.
- It does not need to be persistent. Since the data is scraped and rolled periodically from the Banner system, the data used will be deleted and re-requested occasionally.
```bash
bun install --cwd web # Install frontend dependencies
cargo build # Build the backend
For these reasons, I believe SQLite to be the ideal place for this data to be stored.
It is exceptionally fast, works well in-memory, and is less complicated compared to most other solutions.
just dev # Runs auto-reloading dev build with all services
just dev-build # Development build with release characteristics (frontend is embedded, non-auto-reloading)
- Only required data about the class will be stored, along with the JSON-encoded string.
- For now, this would only be the CRN (and possibly the Term).
- Potentially, a binary encoding could be used for performance, but it is unlikely to be better.
- Database dumping into R2 would be good to ensure that over-scraping of the Banner system does not occur.
- Upon a safe close requested
- Must be done quickly (<8 seconds)
- Every 30 minutes, if any scraping ocurred.
- May cause locking of commands.
just build # Production build that embeds assets
```
## Scraping
## Documentation
In order to keep the in-memory database of the bot up-to-date with the Banner system, the API must be scraped.
Scraping will be separated by major to allow for priority majors (namely, Computer Science) to be scraped more often compared to others.
This will lower the overall load on the Banner system while ensuring that data presented by the app is still relevant.
Comprehensive documentation is available in the [`docs/`][documentation] folder.
For now, all majors will be scraped fully every 4 hours with at least 5 minutes between each one.
- On startup, priority majors will be scraped first (if required).
- Other majors will be scraped in arbitrary order (if required).
- Scrape timing will be stored in Redis.
- CRNs will be the Primary Key within SQLite
- If CRNs are duplicated between terms, then the primary key will be (CRN, Term)
Considerations
- Change in metadata should decrease the interval
- The number of courses scraped should change the interval (2 hours per 500 courses involved)
## Rate Limiting, Costs & Bursting
Ideally, this application would implement dynamic rate limiting to ensure overload on the server does not occur.
Better, it would also ensure that priority requests (commands) are dispatched faster than background processes (scraping), while making sure different requests are weighted differently.
For example, a recent scrape of 350 classes should be weighted 5x more than a search for 8 classes by a user.
Still, even if the cap does not normally allow for this request to be processed immediately, the small user search should proceed with a small bursting cap.
The requirements to this hypothetical system would be:
- Conditional Bursting: background processes or other requests deemed "low priority" are not allowed to use bursting.
- Arbitrary Costs: rate limiting is considered in the form of the request size/speed more or less, such that small simple requests can be made more frequently, unlike large requests.
[documentation]: docs/README.md
[src-bot]: src/bot
[src-web]: src/web
[src-scraper]: src/scraper
[serenity]: https://github.com/serenity-rs/serenity
[poise]: https://github.com/serenity-rs/poise
[axum]: https://github.com/tokio-rs/axum
[rust-embed]: https://lib.rs/crates/rust-embed
[sqlx]: https://github.com/launchbadge/sqlx
-46
View File
@@ -1,46 +0,0 @@
version: "3"
tasks:
build:
desc: Build the application
cmds:
- go build -o bin/banner ./cmd/banner
sources:
- ./cmd/banner/**/*.go
- ./internal/**/*.go
generates:
- bin/banner
run:
desc: Run the application
cmds:
- go run ./cmd/banner
deps: [build]
test:
desc: Run tests
cmds:
- go test ./tests/...
env:
ENVIRONMENT: test
test-coverage:
desc: Run tests with coverage
cmds:
- go test -coverpkg=./internal/... -cover ./tests/...
env:
ENVIRONMENT: test
clean:
desc: Clean build artifacts
cmds:
- rm -rf bin/
- go clean -cache
- go clean -modcache
dev:
desc: Run in development mode
cmds:
- go run ./cmd/banner
env:
ENVIRONMENT: development
+52
View File
@@ -0,0 +1,52 @@
# This is a configuration file for the bacon tool
#
# Complete help on configuration: https://dystroy.org/bacon/config/
#
# You may check the current default at
# https://github.com/Canop/bacon/blob/main/defaults/default-bacon.toml
default_job = "check"
env.CARGO_TERM_COLOR = "always"
[jobs.check]
command = ["cargo", "check", "--all-targets"]
need_stdout = false
[jobs.clippy]
command = ["cargo", "clippy", "--all-targets"]
need_stdout = false
[jobs.test]
command = [
"cargo", "nextest", "run",
]
need_stdout = true
analyzer = "nextest"
[jobs.run]
command = [
"cargo", "run",
]
need_stdout = true
allow_warnings = true
background = false
on_change_strategy = "kill_then_restart"
# kill = ["pkill", "-TERM", "-P"]'
[jobs.dev]
command = [
"just", "dev"
]
need_stdout = true
allow_warnings = true
background = false
on_change_strategy = "kill_then_restart"
# You may define here keybindings that would be specific to
# a project, for example a shortcut to launch a specific job.
# Shortcuts to internal functions (scrolling, toggling, etc.)
# should go in your personal global prefs.toml file instead.
[keybindings]
c = "job:clippy" # comment this to have 'c' run clippy on only the default target
shift-c = "job:check"
d = "job:dev"
+36
View File
@@ -0,0 +1,36 @@
use std::process::Command;
fn main() {
// Try to get Git commit hash from Railway environment variable first
let git_hash = std::env::var("RAILWAY_GIT_COMMIT_SHA").unwrap_or_else(|_| {
// Fallback to git command if not on Railway
let output = Command::new("git").args(["rev-parse", "HEAD"]).output();
match output {
Ok(output) => {
if output.status.success() {
String::from_utf8_lossy(&output.stdout).trim().to_string()
} else {
"unknown".to_string()
}
}
Err(_) => "unknown".to_string(),
}
});
// Get the short hash (first 7 characters)
let short_hash = if git_hash != "unknown" && git_hash.len() >= 7 {
git_hash[..7].to_string()
} else {
git_hash.clone()
};
// Set the environment variables that will be available at compile time
println!("cargo:rustc-env=GIT_COMMIT_HASH={}", git_hash);
println!("cargo:rustc-env=GIT_COMMIT_SHORT={}", short_hash);
// Rebuild if the Git commit changes (only works when .git directory is available)
if std::path::Path::new(".git/HEAD").exists() {
println!("cargo:rerun-if-changed=.git/HEAD");
println!("cargo:rerun-if-changed=.git/refs/heads");
}
}
-299
View File
@@ -1,299 +0,0 @@
// Package main is the entry point for the banner application.
package main
import (
"context"
"flag"
"net/http"
"net/http/cookiejar"
_ "net/http/pprof"
"os"
"os/signal"
"strings"
"syscall"
"time"
_ "time/tzdata"
"github.com/bwmarrin/discordgo"
"github.com/joho/godotenv"
"github.com/redis/go-redis/v9"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/rs/zerolog/pkgerrors"
"github.com/samber/lo"
"resty.dev/v3"
"banner/internal"
"banner/internal/api"
"banner/internal/bot"
"banner/internal/config"
)
var (
Session *discordgo.Session
)
const (
ICalTimestampFormatUtc = "20060102T150405Z"
ICalTimestampFormatLocal = "20060102T150405"
CentralTimezoneName = "America/Chicago"
)
func init() {
// Load environment variables
if err := godotenv.Load(); err != nil {
log.Debug().Err(err).Msg("Error loading .env file")
}
// Set zerolog's timestamp function to use the central timezone
zerolog.TimestampFunc = func() time.Time {
// TODO: Move this to config
loc, err := time.LoadLocation(CentralTimezoneName)
if err != nil {
panic(err)
}
return time.Now().In(loc)
}
zerolog.ErrorStackMarshaler = pkgerrors.MarshalStack
// Use the custom console writer if we're in development
isDevelopment := internal.GetFirstEnv("ENVIRONMENT", "RAILWAY_ENVIRONMENT")
if isDevelopment == "" {
isDevelopment = "development"
}
if isDevelopment == "development" {
log.Logger = zerolog.New(config.NewConsoleWriter()).With().Timestamp().Logger()
} else {
log.Logger = zerolog.New(config.LogSplitter{Std: os.Stdout, Err: os.Stderr}).With().Timestamp().Logger()
}
log.Debug().Str("environment", isDevelopment).Msg("Loggers Setup")
// Set discordgo's logger to use zerolog
discordgo.Logger = internal.DiscordGoLogger
}
// initRedis initializes the Redis client and pings the server to ensure a connection.
func initRedis(cfg *config.Config) {
// Setup redis
redisUrl := internal.GetFirstEnv("REDIS_URL", "REDIS_PRIVATE_URL")
if redisUrl == "" {
log.Fatal().Stack().Msg("REDIS_URL/REDIS_PRIVATE_URL not set")
}
// Parse URL and create client
options, err := redis.ParseURL(redisUrl)
if err != nil {
log.Fatal().Stack().Err(err).Msg("Cannot parse redis url")
}
kv := redis.NewClient(options)
cfg.SetRedis(kv)
var lastPingErr error
pingCount := 0 // Nth ping being attempted
totalPings := 5 // Total pings to attempt
// Wait for private networking to kick in (production only)
if !cfg.IsDevelopment {
time.Sleep(250 * time.Millisecond)
}
// Test the redis instance, try to ping every 2 seconds 5 times, otherwise panic
for {
pingCount++
if pingCount > totalPings {
log.Fatal().Stack().Err(lastPingErr).Msg("Reached ping limit while trying to connect")
}
// Ping redis
pong, err := cfg.KV.Ping(cfg.Ctx).Result()
// Failed; log error and wait 2 seconds
if err != nil {
lastPingErr = err
log.Warn().Err(err).Int("pings", pingCount).Int("remaining", totalPings-pingCount).Msg("Cannot ping redis")
time.Sleep(2 * time.Second)
continue
}
log.Debug().Str("ping", pong).Msg("Redis connection successful")
break
}
}
func main() {
flag.Parse()
cfg, err := config.New()
if err != nil {
log.Fatal().Stack().Err(err).Msg("Cannot create config")
}
// Try to grab the environment variable, or default to development
environment := internal.GetFirstEnv("ENVIRONMENT", "RAILWAY_ENVIRONMENT")
if environment == "" {
environment = "development"
}
cfg.SetEnvironment(environment)
initRedis(cfg)
if strings.EqualFold(os.Getenv("PPROF_ENABLE"), "true") {
// Start pprof server with graceful shutdown
go func() {
port := os.Getenv("PORT")
log.Info().Str("port", port).Msg("Starting pprof server")
server := &http.Server{
Addr: ":" + port,
}
// Start server in a separate goroutine
go func() {
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
log.Fatal().Stack().Err(err).Msg("Cannot start pprof server")
}
}()
// Wait for context cancellation and then shutdown
<-cfg.Ctx.Done()
log.Info().Msg("Shutting down pprof server")
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second)
defer shutdownCancel()
if err := server.Shutdown(shutdownCtx); err != nil {
log.Error().Err(err).Msg("Pprof server forced to shutdown")
}
}()
}
// Create cookie jar
cookies, err := cookiejar.New(nil)
if err != nil {
log.Err(err).Msg("Cannot create cookie jar")
}
// Create Resty client with timeout and cookie jar
baseURL := os.Getenv("BANNER_BASE_URL")
client := resty.New().
SetBaseURL(baseURL).
SetTimeout(30*time.Second).
SetCookieJar(cookies).
SetHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36").
AddResponseMiddleware(api.SessionMiddleware)
cfg.SetClient(client)
cfg.SetBaseURL(baseURL)
apiInstance := api.New(cfg)
apiInstance.Setup()
// Create discord session
session, err := discordgo.New("Bot " + os.Getenv("BOT_TOKEN"))
if err != nil {
log.Err(err).Msg("Invalid bot parameters")
}
botInstance := bot.New(session, apiInstance, cfg)
botInstance.RegisterHandlers()
// Open discord session
session.AddHandler(func(s *discordgo.Session, r *discordgo.Ready) {
log.Info().Str("username", r.User.Username).Str("discriminator", r.User.Discriminator).Str("id", r.User.ID).Str("session", s.State.SessionID).Msg("Bot is logged in")
})
err = session.Open()
if err != nil {
log.Fatal().Stack().Err(err).Msg("Cannot open the session")
}
// Setup command handlers
// Register commands with discord
arr := zerolog.Arr()
lo.ForEach(bot.CommandDefinitions, func(cmd *discordgo.ApplicationCommand, _ int) {
arr.Str(cmd.Name)
})
log.Info().Array("commands", arr).Msg("Registering commands")
// In development, use test server, otherwise empty (global) for command registration
guildTarget := ""
if cfg.IsDevelopment {
guildTarget = os.Getenv("BOT_TARGET_GUILD")
}
// Register commands
existingCommands, err := session.ApplicationCommands(session.State.User.ID, guildTarget)
if err != nil {
log.Fatal().Stack().Err(err).Msg("Cannot get existing commands")
}
newCommands, err := session.ApplicationCommandBulkOverwrite(session.State.User.ID, guildTarget, bot.CommandDefinitions)
if err != nil {
log.Fatal().Stack().Err(err).Msg("Cannot register commands")
}
// Compare existing commands with new commands
for _, newCommand := range newCommands {
existingCommand, found := lo.Find(existingCommands, func(cmd *discordgo.ApplicationCommand) bool {
return cmd.Name == newCommand.Name
})
// New command
if !found {
log.Info().Str("commandName", newCommand.Name).Msg("Registered new command")
continue
}
// Compare versions
if newCommand.Version != existingCommand.Version {
log.Info().Str("commandName", newCommand.Name).
Str("oldVersion", existingCommand.Version).Str("newVersion", newCommand.Version).
Msg("Command Updated")
}
}
// Fetch terms on startup
err = apiInstance.TryReloadTerms()
if err != nil {
log.Fatal().Stack().Err(err).Msg("Cannot fetch terms on startup")
}
// Launch a goroutine to scrape the banner system periodically
go func() {
ticker := time.NewTicker(3 * time.Minute)
defer ticker.Stop()
for {
select {
case <-cfg.Ctx.Done():
log.Info().Msg("Periodic scraper stopped due to context cancellation")
return
case <-ticker.C:
err := apiInstance.Scrape()
if err != nil {
log.Err(err).Stack().Msg("Periodic Scrape Failed")
}
}
}
}()
// Close session, ensure Resty client closes
defer session.Close()
defer client.Close()
// Setup signal handler channel
stop := make(chan os.Signal, 1)
signal.Notify(stop, os.Interrupt) // Ctrl+C signal
signal.Notify(stop, syscall.SIGTERM) // Container stop signal
// Wait for signal (indefinite)
closingSignal := <-stop
botInstance.SetClosing() // TODO: Switch to atomic lock with forced close after 10 seconds
// Cancel the context to signal all operations to stop
cfg.CancelFunc()
// Defers are called after this
log.Warn().Str("signal", closingSignal.String()).Msg("Gracefully shutting down")
}
+117
View File
@@ -0,0 +1,117 @@
# Architecture
## System Overview
The Banner project is built as a multi-service application with the following components:
- **Discord Bot Service**: Handles Discord interactions and commands (Serenity/Poise)
- **Web Service**: Axum HTTP server serving the SvelteKit frontend and REST API endpoints
- **Scraper Service**: Background data collection and synchronization with job queue
- **Database Layer**: PostgreSQL 17 for persistent storage (SQLx with compile-time verification)
- **RateMyProfessors Client**: GraphQL-based bulk sync of professor ratings
### Frontend Stack
- **SvelteKit** with Svelte 5 runes (`$state`, `$derived`, `$effect`)
- **Tailwind CSS v4** via `@tailwindcss/vite`
- **bits-ui** for headless UI primitives (comboboxes, tooltips, dropdowns)
- **TanStack Table** for interactive data tables with sorting and column control
- **OverlayScrollbars** for styled, theme-aware scrollable areas
- **ts-rs** generates TypeScript type bindings from Rust structs
### API Endpoints
| Endpoint | Description |
|---|---|
| `GET /api/health` | Health check |
| `GET /api/status` | Service status, version, and commit hash |
| `GET /api/metrics` | Basic metrics |
| `GET /api/courses/search` | Paginated course search with filters (term, subject, query, open-only, sort) |
| `GET /api/courses/:term/:crn` | Single course detail with instructors and RMP ratings |
| `GET /api/terms` | Available terms from reference cache |
| `GET /api/subjects?term=` | Subjects for a term, ordered by enrollment |
| `GET /api/reference/:category` | Reference data lookups (campuses, instructional methods, etc.) |
## Technical Analysis
### Banner System Integration
Some of the features and architecture of Ellucian's Banner system are not clear.
The following features, JSON, and more require validation & analysis:
- Struct Nullability
- Much of the responses provided by Ellucian contain nulls, and most of them are uncertain as to when and why they're null.
- Analysis must be conducted to be sure of when to use a string and when it should nillable (pointer).
- Multiple Professors / Primary Indicator
- Multiple Meeting Times
- Meeting Schedule Types
- AFF vs AIN vs AHB etc.
- Do CRNs repeat between years?
- Check whether partOfTerm is always filled in, and it's meaning for various class results.
- Check which API calls are affected by change in term/sessionID term select
- SessionIDs
- How long does a session ID work?
- Do I really require a separate one per term?
- How many can I activate, are there any restrictions?
- How should session IDs be checked as 'invalid'?
- What action(s) keep a session ID 'active', if any?
- Are there any courses with multiple meeting times?
- Google Calendar link generation, as an alternative to ICS file generation
## Change Identification
- Important attributes of a class will be parsed on both the old and new data.
- These attributes will be compared and given identifiers that can be subscribed to.
- When a user subscribes to one of these identifiers, any changes identified will be sent to the user.
## Real-time Suggestions
Various commands arguments have the ability to have suggestions appear.
- They must be fast. As ephemeral suggestions that are only relevant for seconds or less, they need to be delivered in less than a second.
- They need to be easy to acquire. With as many commands & arguments to search as I do, it is paramount that the API be easy to understand & use.
- It cannot be complicated. I only have so much time to develop this.
- It does not need to be persistent. Since the data is scraped and rolled periodically from the Banner system, the data used will be deleted and re-requested occasionally.
For these reasons, I believe PostgreSQL to be the ideal place for this data to be stored.
It is exceptionally fast, works well in-memory, and is less complicated compared to most other solutions.
- Only required data about the class will be stored, along with the JSON-encoded string.
- For now, this would only be the CRN (and possibly the Term).
- Potentially, a binary encoding could be used for performance, but it is unlikely to be better.
- Database dumping into R2 would be good to ensure that over-scraping of the Banner system does not occur.
- Upon a safe close requested
- Must be done quickly (<8 seconds)
- Every 30 minutes, if any scraping ocurred.
- May cause locking of commands.
## Scraping System
In order to keep the in-memory database of the bot up-to-date with the Banner system, the API must be scraped.
Scraping will be separated by major to allow for priority majors (namely, Computer Science) to be scraped more often compared to others.
This will lower the overall load on the Banner system while ensuring that data presented by the app is still relevant.
For now, all majors will be scraped fully every 4 hours with at least 5 minutes between each one.
- On startup, priority majors will be scraped first (if required).
- Other majors will be scraped in arbitrary order (if required).
- Scrape timing will be stored in database.
- CRNs will be the Primary Key within database
- If CRNs are duplicated between terms, then the primary key will be (CRN, Term)
Considerations
- Change in metadata should decrease the interval
- The number of courses scraped should change the interval (2 hours per 500 courses involved)
## Rate Limiting, Costs & Bursting
Ideally, this application would implement dynamic rate limiting to ensure overload on the server does not occur.
Better, it would also ensure that priority requests (commands) are dispatched faster than background processes (scraping), while making sure different requests are weighted differently.
For example, a recent scrape of 350 classes should be weighted 5x more than a search for 8 classes by a user.
Still, even if the cap does not normally allow for this request to be processed immediately, the small user search should proceed with a small bursting cap.
The requirements to this hypothetical system would be:
- Conditional Bursting: background processes or other requests deemed "low priority" are not allowed to use bursting.
- Arbitrary Costs: rate limiting is considered in the form of the request size/speed more or less, such that small simple requests can be made more frequently, unlike large requests.
+8 -2
View File
@@ -1,11 +1,17 @@
# Sessions
# Banner
All notes on the internal workings of the Banner system by Ellucian.
## Sessions
All notes on the internal workings of Sessions in the Banner system.
- Sessions are generated on demand with a random string of characters.
- The format `{5 random characters}{milliseconds since epoch}`
- Example: ``
- Sessions are invalidated after 30 minutes, but may change.
- This delay can be found in the original HTML returned, find `meta[name="maxInactiveInterval"]` and read the `content` attribute.
- This is read at runtime by the javascript on initialization.
- This is read at runtime (in the browser, by javascript) on initialization.
- Multiple timers exist, one is for the Inactivity Timer.
- A dialog will appear asking the user to continue their session.
- If they click the button, the session will be extended via the keepAliveURL (see `meta[name="keepAliveURL"]`).
+43
View File
@@ -0,0 +1,43 @@
# Documentation
This folder contains detailed documentation for the Banner project. This file acts as the index.
## Files
- [`CHANGELOG.md`](CHANGELOG.md) - Notable changes by version
- [`ROADMAP.md`](ROADMAP.md) - Planned features and priorities
- [`BANNER.md`](BANNER.md) - General API documentation on the Banner system
- [`ARCHITECTURE.md`](ARCHITECTURE.md) - Technical implementation details, system design, and analysis
## Samples
The `samples/` folder contains real Banner API response examples:
- `search/` - Course search API responses with various filters
- [`searchResults.json`](samples/search/searchResults.json)
- [`searchResults_500.json`](samples/search/searchResults_500.json)
- [`searchResults_CS500.json`](samples/search/searchResults_CS500.json)
- [`searchResults_malware.json`](samples/search/searchResults_malware.json)
- `meta/` - Metadata API responses (terms, subjects, instructors, etc.)
- [`get_attribute.json`](samples/meta/get_attribute.json)
- [`get_campus.json`](samples/meta/get_campus.json)
- [`get_instructionalMethod.json`](samples/meta/get_instructionalMethod.json)
- [`get_instructor.json`](samples/meta/get_instructor.json)
- [`get_partOfTerm.json`](samples/meta/get_partOfTerm.json)
- [`get_subject.json`](samples/meta/get_subject.json)
- [`getTerms.json`](samples/meta/getTerms.json)
- `course/` - Course detail API responses (HTML and JSON)
- [`getFacultyMeetingTimes.json`](samples/course/getFacultyMeetingTimes.json)
- [`getClassDetails.html`](samples/course/getClassDetails.html)
- [`getCorequisites.html`](samples/course/getCorequisites.html)
- [`getCourseDescription.html`](samples/course/getCourseDescription.html)
- [`getEnrollmentInfo.html`](samples/course/getEnrollmentInfo.html)
- [`getFees.html`](samples/course/getFees.html)
- [`getLinkedSections.html`](samples/course/getLinkedSections.html)
- [`getRestrictions.html`](samples/course/getRestrictions.html)
- [`getSectionAttributes.html`](samples/course/getSectionAttributes.html)
- [`getSectionBookstoreDetails.html`](samples/course/getSectionBookstoreDetails.html)
- [`getSectionPrerequisites.html`](samples/course/getSectionPrerequisites.html)
- [`getXlistSections.html`](samples/course/getXlistSections.html)
These samples are used for development, testing, and understanding the Banner API structure.
+44
View File
@@ -0,0 +1,44 @@
# Roadmap
## Now
- **Discord bot revival** - Audit and fix all existing commands (search, terms, ics, gcal) against the current data model. Add test coverage. Bot has been untouched since ~0.3.4 and commands may be broken.
- **Notification and subscription system** - Subscribe to courses and get alerts on seat availability, waitlist movement, and detail changes (time, location, professor, seats). Deliver via Discord bot and web dashboard.
- **Mobile/responsive redesign** - Hamburger nav for sidebar, responsive table column hiding, mobile-friendly admin pages. Timeline is the only area with solid mobile support; most pages need work.
- **Professor name search filter** - Filter search results by instructor. Backend code exists but is commented out.
- **Search field autocomplete** - Typeahead for course titles, course numbers, professors, and terms.
- **Large component extraction** - Break down CourseTable, Instructors page, and TimelineCanvas into smaller, testable subcomponents.
## Soon
- **Bot slash command parity** - Keep Discord bot commands in sync with web features: timeline summaries, RMP lookups, audit log highlights, notification management via bot.
- **E2E test suite** - Playwright tests for critical user flows: search, login, admin pages, timeline interaction.
- **Settings page** - Replace placeholder with theme preferences, notification settings, default term/subject selection.
- **Profile enhancements** - Expand from read-only stub to subscription management, saved searches, and course watchlists.
- **Smart time-of-day search parsing** - Support natural queries like "2 PM", "ends by 2 PM", "after 2 PM" mapped to time ranges.
- **Multi-term querying** - Query across multiple terms in a single search instead of one at a time.
- **Historical analytics visualization** - Build trend UI on top of existing course metrics and timeline API. Fill-rate charts per course or professor.
- **Schedule builder** - Visual weekly schedule tool for assembling a conflict-free course lineup. Timeline visualization serves as a foundation.
## Eventually
- **API rate limiting** - Rate limiter on public API endpoints. Needed before any public or external exposure.
- **Bulk admin operations** - Batch RMP match/reject, bulk user management, data export from admin pages.
- **Degree audit helper** - Map available courses to degree requirements and suggest what to take next.
- **DM support** - Allow the Discord bot to respond in direct messages, not just guild channels.
- **"Classes Now" command** - Find classes currently in session based on the current day and time.
- **Privileged error feedback** - Detailed error information surfaced to bot admins when commands fail.
## Done
- **Interactive timeline visualization** - D3 canvas with pan/zoom, touch gestures, and enrollment aggregation API. *(0.6.0)*
- **Scraper analytics dashboard** - Timeseries charts, subject monitoring, adaptive scheduling, and admin endpoints. *(0.6.0)*
- **WebSocket job monitoring** - Real-time scrape job queue with live connection status indicators. *(0.6.0)*
- **Course change audit log** - Field-level change tracking with smart diffing, conditional caching, and auto-refresh. *(0.6.0)*
- **User authentication system** - Discord OAuth, sessions, admin roles, and login page. *(0.6.0)*
- **Dynamic scraper scheduling** - Adaptive scrape intervals based on change frequency and course volume. *(0.6.0)*
- **Metrics dashboard** - Scraper and service metrics surfaced on the web dashboard. *(0.6.0)*
- **Subject/major search filter** - Multi-select subject filtering with searchable comboboxes. *(0.5.0)*
- **Web course search UI** - Browser-based course search with interactive data table, sorting, pagination, and column controls. *(0.4.0)*
- **RateMyProfessor integration** - Bulk professor sync via GraphQL with inline ratings in search results. *(0.4.0)*
- **Test coverage expansion** - Unit tests for course formatting, API client, query builder, CLI args, and config parsing. *(0.3.4--0.4.0)*
-27
View File
@@ -1,27 +0,0 @@
module banner
go 1.24.0
toolchain go1.24.2
require (
github.com/bwmarrin/discordgo v0.29.0
github.com/joho/godotenv v1.5.1
github.com/pkg/errors v0.9.1
github.com/redis/go-redis/v9 v9.12.1
github.com/rs/zerolog v1.34.0
github.com/samber/lo v1.51.0
resty.dev/v3 v3.0.0-beta.3
)
require (
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
github.com/gorilla/websocket v1.5.3 // indirect
github.com/mattn/go-colorable v0.1.14 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
golang.org/x/crypto v0.41.0 // indirect
golang.org/x/net v0.43.0 // indirect
golang.org/x/sys v0.35.0 // indirect
golang.org/x/text v0.28.0 // indirect
)
-52
View File
@@ -1,52 +0,0 @@
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/bwmarrin/discordgo v0.29.0 h1:FmWeXFaKUwrcL3Cx65c20bTRW+vOb6k8AnaP+EgjDno=
github.com/bwmarrin/discordgo v0.29.0/go.mod h1:NJZpH+1AfhIcyQsPeuBKsUtYrRnjkyu0kIVMCHkZtRY=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/redis/go-redis/v9 v9.12.1 h1:k5iquqv27aBtnTm2tIkROUDp8JBXhXZIVu1InSgvovg=
github.com/redis/go-redis/v9 v9.12.1/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw=
github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY=
github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6wYQ=
github.com/samber/lo v1.51.0 h1:kysRYLbHy/MB7kQZf5DSN50JHmMsNEdeY24VzJFu7wI=
github.com/samber/lo v1.51.0/go.mod h1:4+MXEGsJzbKGaUEQFKBq2xtfuznW9oz/WrgyzMzRoM0=
golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
resty.dev/v3 v3.0.0-beta.3 h1:3kEwzEgCnnS6Ob4Emlk94t+I/gClyoah7SnNi67lt+E=
resty.dev/v3 v3.0.0-beta.3/go.mod h1:OgkqiPvTDtOuV4MGZuUDhwOpkY8enjOsjjMzeOHefy4=
-491
View File
@@ -1,491 +0,0 @@
package api
import (
"banner/internal"
"banner/internal/config"
"banner/internal/models"
"context"
"encoding/json"
"errors"
"fmt"
"net/url"
"strconv"
"strings"
"time"
"github.com/redis/go-redis/v9"
"github.com/rs/zerolog/log"
"github.com/samber/lo"
"resty.dev/v3"
)
// API provides a client for interacting with the Banner API.
type API struct {
config *config.Config
}
// New creates a new API client with the given configuration.
func New(config *config.Config) *API {
return &API{config: config}
}
var (
latestSession string
sessionTime time.Time
expiryTime = 25 * time.Minute
)
// SessionMiddleware creates a Resty middleware that resets the session timer on each successful Banner API call.
func SessionMiddleware(_ *resty.Client, r *resty.Response) error {
// log.Debug().Str("url", r.Request.RawRequest.URL.Path).Msg("Session middleware")
// Reset session timer on successful requests to Banner API endpoints
if r.IsSuccess() && strings.HasPrefix(r.Request.RawRequest.URL.Path, "StudentRegistrationSsb/ssb/classSearch/") {
// Only reset the session time if the session is still valid
if time.Since(sessionTime) <= expiryTime {
sessionTime = time.Now()
}
}
return nil
}
// GenerateSession generates a new session ID for use with the Banner API.
// This function should not be used directly; use EnsureSession instead.
func GenerateSession() string {
return internal.RandomString(5) + internal.Nonce()
}
// DefaultTerm returns the default term, which is the current term if it exists, otherwise the next term.
func (a *API) DefaultTerm(t time.Time) config.Term {
currentTerm, nextTerm := config.GetCurrentTerm(*a.config.SeasonRanges, t)
if currentTerm == nil {
return *nextTerm
}
return *currentTerm
}
var terms []BannerTerm
var lastTermUpdate time.Time
// TryReloadTerms attempts to reload the terms if they are not loaded or if the last update was more than 24 hours ago.
func (a *API) TryReloadTerms() error {
if len(terms) > 0 && time.Since(lastTermUpdate) < 24*time.Hour {
return nil
}
// Load the terms
var err error
terms, err = a.GetTerms("", 1, 100)
if err != nil {
return fmt.Errorf("failed to load terms: %w", err)
}
lastTermUpdate = time.Now()
return nil
}
// IsTermArchived checks if the given term is archived (view only).
//
// TODO: Add error handling for when a term does not exist.
func (a *API) IsTermArchived(term string) bool {
// Ensure the terms are loaded
err := a.TryReloadTerms()
if err != nil {
log.Err(err).Stack().Msg("Failed to reload terms")
return true
}
// Check if the term is in the list of terms
bannerTerm, exists := lo.Find(terms, func(t BannerTerm) bool {
return t.Code == term
})
if !exists {
log.Warn().Str("term", term).Msg("Term does not exist")
return true
}
return bannerTerm.Archived()
}
// EnsureSession ensures that a valid session is available, creating one if necessary.
func (a *API) EnsureSession() string {
if latestSession == "" || time.Since(sessionTime) >= expiryTime {
latestSession = GenerateSession()
sessionTime = time.Now()
}
return latestSession
}
// Pair represents a key-value pair from the Banner API.
type Pair struct {
Code string `json:"code"`
Description string `json:"description"`
}
// BannerTerm represents a term in the Banner system.
type BannerTerm Pair
// Instructor represents an instructor in the Banner system.
type Instructor Pair
// Archived returns true if the term is in an archival (view-only) state.
func (term BannerTerm) Archived() bool {
return strings.Contains(term.Description, "View Only")
}
// GetTerms retrieves a list of terms from the Banner API.
// The page number must be at least 1.
func (a *API) GetTerms(search string, page int, maxResults int) ([]BannerTerm, error) {
// Ensure offset is valid
if page <= 0 {
return nil, errors.New("offset must be greater than 0")
}
req := a.config.Client.NewRequest().
SetQueryParam("searchTerm", search).
SetQueryParam("offset", strconv.Itoa(page)).
SetQueryParam("max", strconv.Itoa(maxResults)).
SetQueryParam("_", internal.Nonce()).
SetExpectResponseContentType("application/json").
SetResult(&[]BannerTerm{})
res, err := req.Get("/classSearch/getTerms")
if err != nil {
return nil, fmt.Errorf("failed to get terms: %w", err)
}
terms, ok := res.Result().(*[]BannerTerm)
if !ok {
return nil, fmt.Errorf("terms parsing failed to cast: %v", res.Result())
}
return *terms, nil
}
// SelectTerm selects a term in the Banner system for the given session.
// This is required before other API calls can be made.
func (a *API) SelectTerm(term string, sessionID string) error {
form := url.Values{
"term": {term},
"studyPath": {""},
"studyPathText": {""},
"startDatepicker": {""},
"endDatepicker": {""},
"uniqueSessionId": {sessionID},
}
type RedirectResponse struct {
FwdURL string `json:"fwdUrl"`
}
req := a.config.Client.NewRequest().
SetResult(&RedirectResponse{}).
SetQueryParam("mode", "search").
SetBody(form.Encode()).
SetExpectResponseContentType("application/json").
SetHeader("Content-Type", "application/x-www-form-urlencoded")
res, err := req.Post("/term/search")
if err != nil {
return fmt.Errorf("failed to select term: %w", err)
}
redirectResponse := res.Result().(*RedirectResponse)
// TODO: Mild validation to ensure the redirect is appropriate
// Make a GET request to the fwdUrl
req = a.config.Client.NewRequest()
res, err = req.Get(redirectResponse.FwdURL)
// Assert that the response is OK (200)
if res.StatusCode() != 200 {
return fmt.Errorf("redirect response was not OK: %d", res.StatusCode())
}
return nil
}
// GetPartOfTerms retrieves a list of parts of a term from the Banner API.
// The page number must be at least 1.
func (a *API) GetPartOfTerms(search string, term int, offset int, maxResults int) ([]BannerTerm, error) {
// Ensure offset is valid
if offset <= 0 {
return nil, errors.New("offset must be greater than 0")
}
req := a.config.Client.NewRequest().
SetQueryParam("searchTerm", search).
SetQueryParam("term", strconv.Itoa(term)).
SetQueryParam("offset", strconv.Itoa(offset)).
SetQueryParam("max", strconv.Itoa(maxResults)).
SetQueryParam("uniqueSessionId", a.EnsureSession()).
SetQueryParam("_", internal.Nonce()).
SetExpectResponseContentType("application/json").
SetResult(&[]BannerTerm{})
res, err := req.Get("/classSearch/get_partOfTerm")
if err != nil {
return nil, fmt.Errorf("failed to get part of terms: %w", err)
}
terms, ok := res.Result().(*[]BannerTerm)
if !ok {
return nil, fmt.Errorf("term parsing failed to cast: %v", res.Result())
}
return *terms, nil
}
// GetInstructors retrieves a list of instructors from the Banner API.
func (a *API) GetInstructors(search string, term string, offset int, maxResults int) ([]Instructor, error) {
// Ensure offset is valid
if offset <= 0 {
return nil, errors.New("offset must be greater than 0")
}
req := a.config.Client.NewRequest().
SetQueryParam("searchTerm", search).
SetQueryParam("term", term).
SetQueryParam("offset", strconv.Itoa(offset)).
SetQueryParam("max", strconv.Itoa(maxResults)).
SetQueryParam("uniqueSessionId", a.EnsureSession()).
SetQueryParam("_", internal.Nonce()).
SetExpectResponseContentType("application/json").
SetResult(&[]Instructor{})
res, err := req.Get("/classSearch/get_instructor")
if err != nil {
return nil, fmt.Errorf("failed to get instructors: %w", err)
}
instructors, ok := res.Result().(*[]Instructor)
if !ok {
return nil, fmt.Errorf("instructor parsing failed to cast: %v", res.Result())
}
return *instructors, nil
}
// ClassDetails represents the detailed information for a class.
//
// TODO: Implement this struct and the associated GetCourseDetails function.
type ClassDetails struct {
}
// GetCourseDetails retrieves the details for a specific course.
func (a *API) GetCourseDetails(term int, crn int) (*ClassDetails, error) {
body, err := json.Marshal(map[string]string{
"term": strconv.Itoa(term),
"courseReferenceNumber": strconv.Itoa(crn),
"first": "first", // TODO: What is this?
})
if err != nil {
log.Fatal().Stack().Err(err).Msg("Failed to marshal body")
}
req := a.config.Client.NewRequest().
SetBody(body).
SetExpectResponseContentType("application/json").
SetResult(&ClassDetails{})
res, err := req.Get("/searchResults/getClassDetails")
if err != nil {
return nil, fmt.Errorf("failed to get course details: %w", err)
}
details, ok := res.Result().(*ClassDetails)
if !ok {
return nil, fmt.Errorf("course details parsing failed to cast: %v", res.Result())
}
return details, nil
}
// Search performs a search for courses with the given query and returns the results.
func (a *API) Search(term string, query *Query, sort string, sortDescending bool) (*models.SearchResult, error) {
a.ResetDataForm()
params := query.Paramify()
params["txt_term"] = term
params["uniqueSessionId"] = a.EnsureSession()
params["sortColumn"] = sort
params["sortDirection"] = "asc"
// These dates are not available for usage anywhere in the UI, but are included in every query
params["startDatepicker"] = ""
params["endDatepicker"] = ""
req := a.config.Client.NewRequest().
SetQueryParams(params).
SetExpectResponseContentType("application/json").
SetResult(&models.SearchResult{})
res, err := req.Get("/searchResults/searchResults")
if err != nil {
return nil, fmt.Errorf("failed to search: %w", err)
}
searchResult, ok := res.Result().(*models.SearchResult)
if !ok {
return nil, fmt.Errorf("search result parsing failed to cast: %v", res.Result())
}
return searchResult, nil
}
// GetSubjects retrieves a list of subjects from the Banner API.
// The page number must be at least 1.
func (a *API) GetSubjects(search string, term string, offset int, maxResults int) ([]Pair, error) {
// Ensure offset is valid
if offset <= 0 {
return nil, errors.New("offset must be greater than 0")
}
req := a.config.Client.NewRequest().
SetQueryParam("searchTerm", search).
SetQueryParam("term", term).
SetQueryParam("offset", strconv.Itoa(offset)).
SetQueryParam("max", strconv.Itoa(maxResults)).
SetQueryParam("uniqueSessionId", a.EnsureSession()).
SetQueryParam("_", internal.Nonce()).
SetExpectResponseContentType("application/json").
SetResult(&[]Pair{})
res, err := req.Get("/classSearch/get_subject")
if err != nil {
return nil, fmt.Errorf("failed to get subjects: %w", err)
}
subjects, ok := res.Result().(*[]Pair)
if !ok {
return nil, fmt.Errorf("subjects parsing failed to cast: %v", res.Result())
}
return *subjects, nil
}
// GetCampuses retrieves a list of campuses from the Banner API.
// The page number must be at least 1.
func (a *API) GetCampuses(search string, term int, offset int, maxResults int) ([]Pair, error) {
// Ensure offset is valid
if offset <= 0 {
return nil, errors.New("offset must be greater than 0")
}
req := a.config.Client.NewRequest().
SetQueryParam("searchTerm", search).
SetQueryParam("term", strconv.Itoa(term)).
SetQueryParam("offset", strconv.Itoa(offset)).
SetQueryParam("max", strconv.Itoa(maxResults)).
SetQueryParam("uniqueSessionId", a.EnsureSession()).
SetQueryParam("_", internal.Nonce()).
SetExpectResponseContentType("application/json").
SetResult(&[]Pair{})
res, err := req.Get("/classSearch/get_campus")
if err != nil {
return nil, fmt.Errorf("failed to get campuses: %w", err)
}
campuses, ok := res.Result().(*[]Pair)
if !ok {
return nil, fmt.Errorf("campuses parsing failed to cast: %v", res.Result())
}
return *campuses, nil
}
// GetInstructionalMethods retrieves a list of instructional methods from the Banner API.
// The page number must be at least 1.
func (a *API) GetInstructionalMethods(search string, term string, offset int, maxResults int) ([]Pair, error) {
// Ensure offset is valid
if offset <= 0 {
return nil, errors.New("offset must be greater than 0")
}
req := a.config.Client.NewRequest().
SetQueryParam("searchTerm", search).
SetQueryParam("term", term).
SetQueryParam("offset", strconv.Itoa(offset)).
SetQueryParam("max", strconv.Itoa(maxResults)).
SetQueryParam("uniqueSessionId", a.EnsureSession()).
SetQueryParam("_", internal.Nonce()).
SetExpectResponseContentType("application/json").
SetResult(&[]Pair{})
res, err := req.Get("/classSearch/get_instructionalMethod")
if err != nil {
return nil, fmt.Errorf("failed to get instructional methods: %w", err)
}
methods, ok := res.Result().(*[]Pair)
if !ok {
return nil, fmt.Errorf("instructional methods parsing failed to cast: %v", res.Result())
}
return *methods, nil
}
// GetCourseMeetingTime retrieves the meeting time information for a course.
func (a *API) GetCourseMeetingTime(term int, crn int) ([]models.MeetingTimeResponse, error) {
type responseWrapper struct {
Fmt []models.MeetingTimeResponse `json:"fmt"`
}
req := a.config.Client.NewRequest().
SetQueryParam("term", strconv.Itoa(term)).
SetQueryParam("courseReferenceNumber", strconv.Itoa(crn)).
SetExpectResponseContentType("application/json").
SetResult(&responseWrapper{})
res, err := req.Get("/searchResults/getFacultyMeetingTimes")
if err != nil {
return nil, fmt.Errorf("failed to get meeting time: %w", err)
}
result, ok := res.Result().(*responseWrapper)
if !ok {
return nil, fmt.Errorf("meeting times parsing failed to cast: %v", res.Result())
}
return result.Fmt, nil
}
// ResetDataForm resets the search form in the Banner system.
// This must be called before a new search can be performed.
func (a *API) ResetDataForm() {
req := a.config.Client.NewRequest()
_, err := req.Post("/classSearch/resetDataForm")
if err != nil {
log.Fatal().Stack().Err(err).Msg("Failed to reset data form")
}
}
// GetCourse retrieves course information from the Redis cache.
func (a *API) GetCourse(crn string) (*models.Course, error) {
// Create a timeout context for Redis operations
ctx, cancel := context.WithTimeout(a.config.Ctx, 5*time.Second)
defer cancel()
// Retrieve raw data
result, err := a.config.KV.Get(ctx, fmt.Sprintf("class:%s", crn)).Result()
if err != nil {
if err == redis.Nil {
return nil, fmt.Errorf("course not found: %w", err)
}
return nil, fmt.Errorf("failed to get course: %w", err)
}
// Unmarshal the raw data
var course models.Course
err = json.Unmarshal([]byte(result), &course)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal course: %w", err)
}
return &course, nil
}
-240
View File
@@ -1,240 +0,0 @@
// Package api provides the core functionality for interacting with the Banner API.
package api
import (
"banner/internal"
"banner/internal/models"
"context"
"fmt"
"math/rand"
"time"
"github.com/rs/zerolog/log"
"github.com/samber/lo"
)
const (
// MaxPageSize is the maximum number of courses one can scrape per page.
MaxPageSize = 500
)
var (
// PriorityMajors is a list of majors that are considered to be high priority for scraping.
// This list is used to determine which majors to scrape first/most often.
PriorityMajors = []string{"CS", "CPE", "MAT", "EE", "IS"}
// AncillaryMajors is a list of majors that are considered to be low priority for scraping.
// This list will not contain any majors that are in PriorityMajors.
AncillaryMajors []string
// AllMajors is a list of all majors that are available in the Banner system.
AllMajors []string
)
// Scrape retrieves all courses from the Banner API and stores them in Redis.
// This is a long-running process that should be run in a goroutine.
//
// TODO: Switch from hardcoded term to dynamic term
func (a *API) Scrape() error {
// For each subject, retrieve all courses
// For each course, get the details and store it in redis
// Make sure to handle pagination
subjects, err := a.GetSubjects("", "202510", 1, 100)
if err != nil {
return fmt.Errorf("failed to get subjects: %w", err)
}
// Ensure subjects were found
if len(subjects) == 0 {
return fmt.Errorf("no subjects found")
}
// Extract major code name
for _, subject := range subjects {
// Add to AncillaryMajors if not in PriorityMajors
if !lo.Contains(PriorityMajors, subject.Code) {
AncillaryMajors = append(AncillaryMajors, subject.Code)
}
}
AllMajors = lo.Flatten([][]string{PriorityMajors, AncillaryMajors})
expiredSubjects, err := a.GetExpiredSubjects()
if err != nil {
return fmt.Errorf("failed to get scrapable majors: %w", err)
}
log.Info().Strs("majors", expiredSubjects).Msg("Scraping majors")
for _, subject := range expiredSubjects {
err := a.ScrapeMajor(subject)
if err != nil {
return fmt.Errorf("failed to scrape major %s: %w", subject, err)
}
}
return nil
}
// GetExpiredSubjects returns a list of subjects that have expired and should be scraped again.
// It checks Redis for the "scraped" status of each major for the current term.
func (a *API) GetExpiredSubjects() ([]string, error) {
term := a.DefaultTerm(time.Now()).ToString()
subjects := make([]string, 0)
// Create a timeout context for Redis operations
ctx, cancel := context.WithTimeout(a.config.Ctx, 10*time.Second)
defer cancel()
// Get all subjects
values, err := a.config.KV.MGet(ctx, lo.Map(AllMajors, func(major string, _ int) string {
return fmt.Sprintf("scraped:%s:%s", major, term)
})...).Result()
if err != nil {
return nil, fmt.Errorf("failed to get all subjects: %w", err)
}
// Extract expired subjects
for i, value := range values {
subject := AllMajors[i]
// If the value is nil or "0", then the subject is expired
if value == nil || value == "0" {
subjects = append(subjects, subject)
}
}
log.Debug().Strs("majors", subjects).Msg("Expired Subjects")
return subjects, nil
}
// ScrapeMajor scrapes all courses for a specific major.
// This function does not check whether scraping is required at this time; it is assumed that the caller has already done so.
func (a *API) ScrapeMajor(subject string) error {
offset := 0
totalClassCount := 0
for {
// Build & execute the query
query := NewQuery().Offset(offset).MaxResults(MaxPageSize * 2).Subject(subject)
term := a.DefaultTerm(time.Now()).ToString()
result, err := a.Search(term, query, "subjectDescription", false)
if err != nil {
return fmt.Errorf("search failed: %w (%s)", err, query.String())
}
// Isn't it bullshit that they decided not to leave an actual 'reason' field for the failure?
if !result.Success {
return fmt.Errorf("result marked unsuccessful when searching for classes (%s)", query.String())
}
classCount := len(result.Data)
totalClassCount += classCount
log.Debug().Str("subject", subject).Int("count", classCount).Int("offset", offset).Msg("Placing classes in Redis")
// Process each class and store it in Redis
for _, course := range result.Data {
// Store class in Redis
err := a.IntakeCourse(course)
if err != nil {
log.Error().Err(err).Msg("failed to store class in Redis")
}
}
// Increment and continue if the results are full
if classCount >= MaxPageSize {
// This is unlikely to happen, but log it just in case
if classCount > MaxPageSize {
log.Warn().Int("page", offset).Int("count", classCount).Msg("Results exceed MaxPageSize")
}
offset += MaxPageSize
// TODO: Replace sleep with smarter rate limiting
log.Debug().Str("subject", subject).Int("nextOffset", offset).Msg("Sleeping before next page")
time.Sleep(time.Second * 3)
continue
}
// Log the number of classes scraped
log.Info().Str("subject", subject).Int("total", totalClassCount).Msgf("Subject %s Scraped", subject)
break
}
term := a.DefaultTerm(time.Now()).ToString()
// Calculate the expiry time for the scrape (1 hour for every 200 classes, random +-15%) with a minimum of 1 hour
var scrapeExpiry time.Duration
if totalClassCount == 0 {
scrapeExpiry = time.Hour * 12
} else {
scrapeExpiry = a.CalculateExpiry(term, totalClassCount, lo.Contains(PriorityMajors, subject))
}
// Mark the major as scraped
if totalClassCount == 0 {
totalClassCount = -1
}
// Create a timeout context for Redis operations
ctx, cancel := context.WithTimeout(a.config.Ctx, 5*time.Second)
defer cancel()
err := a.config.KV.Set(ctx, fmt.Sprintf("scraped:%s:%s", subject, term), totalClassCount, scrapeExpiry).Err()
if err != nil {
log.Error().Err(err).Msg("failed to mark major as scraped")
}
return nil
}
// CalculateExpiry calculates the expiry time until the next scrape for a major.
// The duration is based on the number of courses, whether the major is a priority, and if the term is archived.
func (a *API) CalculateExpiry(term string, count int, priority bool) time.Duration {
// An hour for every 100 classes
baseExpiry := time.Hour * time.Duration(count/100)
// Subjects with less than 50 classes have a reversed expiry (less classes, longer interval)
// 1 class => 12 hours, 49 classes => 1 hour
if count < 50 {
hours := internal.Slope(internal.Point{X: 1, Y: 12}, internal.Point{X: 49, Y: 1}, float64(count)).Y
baseExpiry = time.Duration(hours * float64(time.Hour))
}
// If the subject is a priority, then the expiry is halved without variance
if priority {
return baseExpiry / 3
}
// If the term is considered "view only" or "archived", then the expiry is multiplied by 5
var expiry = baseExpiry
if a.IsTermArchived(term) {
expiry *= 5
}
// Add minor variance to the expiry
expiryVariance := baseExpiry.Seconds() * (rand.Float64() * 0.15) // Between 0 and 15% of the total
if rand.Intn(2) == 0 {
expiry -= time.Duration(expiryVariance) * time.Second
} else {
expiry += time.Duration(expiryVariance) * time.Second
}
// Ensure the expiry is at least 1 hour with up to 15 extra minutes
if expiry < time.Hour {
baseExpiry = time.Hour + time.Duration(rand.Intn(60*15))*time.Second
}
return baseExpiry
}
// IntakeCourse stores a course in Redis.
// This function will be used to handle change identification, notifications, and SQLite upserts in the future.
func (a *API) IntakeCourse(course models.Course) error {
// Create a timeout context for Redis operations
ctx, cancel := context.WithTimeout(a.config.Ctx, 5*time.Second)
defer cancel()
err := a.config.KV.Set(ctx, fmt.Sprintf("class:%s", course.CourseReferenceNumber), course, 0).Err()
if err != nil {
return fmt.Errorf("failed to store class in Redis: %w", err)
}
return nil
}
-350
View File
@@ -1,350 +0,0 @@
package api
import (
"fmt"
"strconv"
"strings"
"time"
"github.com/samber/lo"
)
const (
paramSubject = "txt_subject"
paramTitle = "txt_courseTitle"
paramKeywords = "txt_keywordlike"
paramOpenOnly = "chk_open_only"
paramTermPart = "txt_partOfTerm"
paramCampus = "txt_campus"
paramAttributes = "txt_attribute"
paramInstructor = "txt_instructor"
paramStartTimeHour = "select_start_hour"
paramStartTimeMinute = "select_start_min"
paramStartTimeMeridiem = "select_start_ampm"
paramEndTimeHour = "select_end_hour"
paramEndTimeMinute = "select_end_min"
paramEndTimeMeridiem = "select_end_ampm"
paramMinCredits = "txt_credithourlow"
paramMaxCredits = "txt_credithourhigh"
paramCourseNumberLow = "txt_course_number_range"
paramCourseNumberHigh = "txt_course_number_range_to"
paramOffset = "pageOffset"
paramMaxResults = "pageMaxSize"
)
// Query represents a search query for courses.
// It is a builder that allows for chaining methods to construct a query.
type Query struct {
subject *string
title *string
keywords *[]string
openOnly *bool
termPart *[]string // e.g. [1, B6, 8, J]
campus *[]string // e.g. [9, 1DT, 1LR]
instructionalMethod *[]string // e.g. [HB]
attributes *[]string // e.g. [060, 010]
instructor *[]uint64 // e.g. [27957, 27961]
startTime *time.Duration
endTime *time.Duration
minCredits *int
maxCredits *int
offset int
maxResults int
courseNumberRange *Range
}
// NewQuery creates a new Query with default values.
func NewQuery() *Query {
return &Query{maxResults: 8, offset: 0}
}
// Subject sets the subject for the query.
func (q *Query) Subject(subject string) *Query {
q.subject = &subject
return q
}
// Title sets the title for the query.
func (q *Query) Title(title string) *Query {
q.title = &title
return q
}
// Keywords sets the keywords for the query.
func (q *Query) Keywords(keywords []string) *Query {
q.keywords = &keywords
return q
}
// Keyword adds a keyword to the query.
func (q *Query) Keyword(keyword string) *Query {
if q.keywords == nil {
q.keywords = &[]string{keyword}
} else {
*q.keywords = append(*q.keywords, keyword)
}
return q
}
// OpenOnly sets whether to search for open courses only.
func (q *Query) OpenOnly(openOnly bool) *Query {
q.openOnly = &openOnly
return q
}
// TermPart sets the term part for the query.
func (q *Query) TermPart(termPart []string) *Query {
q.termPart = &termPart
return q
}
// Campus sets the campuses for the query.
func (q *Query) Campus(campus []string) *Query {
q.campus = &campus
return q
}
// InstructionalMethod sets the instructional methods for the query.
func (q *Query) InstructionalMethod(instructionalMethod []string) *Query {
q.instructionalMethod = &instructionalMethod
return q
}
// Attributes sets the attributes for the query.
func (q *Query) Attributes(attributes []string) *Query {
q.attributes = &attributes
return q
}
// Instructor sets the instructors for the query.
func (q *Query) Instructor(instructor []uint64) *Query {
q.instructor = &instructor
return q
}
// StartTime sets the start time for the query.
func (q *Query) StartTime(startTime time.Duration) *Query {
q.startTime = &startTime
return q
}
// EndTime sets the end time for the query.
func (q *Query) EndTime(endTime time.Duration) *Query {
q.endTime = &endTime
return q
}
// Credits sets the credit range for the query.
func (q *Query) Credits(low int, high int) *Query {
q.minCredits = &low
q.maxCredits = &high
return q
}
// MinCredits sets the minimum credits for the query.
func (q *Query) MinCredits(value int) *Query {
q.minCredits = &value
return q
}
// MaxCredits sets the maximum credits for the query.
func (q *Query) MaxCredits(value int) *Query {
q.maxCredits = &value
return q
}
// CourseNumbers sets the course number range for the query.
func (q *Query) CourseNumbers(low int, high int) *Query {
q.courseNumberRange = &Range{low, high}
return q
}
// Offset sets the offset for pagination.
func (q *Query) Offset(offset int) *Query {
q.offset = offset
return q
}
// MaxResults sets the maximum number of results to return.
func (q *Query) MaxResults(maxResults int) *Query {
q.maxResults = maxResults
return q
}
// Range represents a range of two integers.
type Range struct {
Low int
High int
}
// FormatTimeParameter formats a time.Duration into a tuple of strings for use in a POST request.
// It returns the hour, minute, and meridiem (AM/PM) as separate strings.
func FormatTimeParameter(d time.Duration) (string, string, string) {
hourParameter, minuteParameter, meridiemParameter := "", "", ""
hours := int64(d.Hours())
minutes := int64(d.Minutes()) % 60
minuteParameter = strconv.FormatInt(minutes, 10)
if hours >= 12 {
hourParameter = "PM"
// Exceptional case: 12PM = 12, 1PM = 1, 2PM = 2
if hours >= 13 {
hourParameter = strconv.FormatInt(hours-12, 10) // 13 - 12 = 1, 14 - 12 = 2
} else {
hourParameter = strconv.FormatInt(hours, 10)
}
} else {
meridiemParameter = "AM"
hourParameter = strconv.FormatInt(hours, 10)
}
return hourParameter, minuteParameter, meridiemParameter
}
// Paramify converts a Query into a map of parameters for a POST request.
// This function assumes each query key only appears once.
func (q *Query) Paramify() map[string]string {
params := map[string]string{}
if q.subject != nil {
params[paramSubject] = *q.subject
}
if q.title != nil {
// Whitespace can prevent valid queries from succeeding
params[paramTitle] = strings.TrimSpace(*q.title)
}
if q.keywords != nil {
params[paramKeywords] = strings.Join(*q.keywords, " ")
}
if q.openOnly != nil {
params[paramOpenOnly] = "true"
}
if q.termPart != nil {
params[paramTermPart] = strings.Join(*q.termPart, ",")
}
if q.campus != nil {
params[paramCampus] = strings.Join(*q.campus, ",")
}
if q.attributes != nil {
params[paramAttributes] = strings.Join(*q.attributes, ",")
}
if q.instructor != nil {
params[paramInstructor] = strings.Join(lo.Map(*q.instructor, func(i uint64, _ int) string {
return strconv.FormatUint(i, 10)
}), ",")
}
if q.startTime != nil {
hour, minute, meridiem := FormatTimeParameter(*q.startTime)
params[paramStartTimeHour] = hour
params[paramStartTimeMinute] = minute
params[paramStartTimeMeridiem] = meridiem
}
if q.endTime != nil {
hour, minute, meridiem := FormatTimeParameter(*q.endTime)
params[paramEndTimeHour] = hour
params[paramEndTimeMinute] = minute
params[paramEndTimeMeridiem] = meridiem
}
if q.minCredits != nil {
params[paramMinCredits] = strconv.Itoa(*q.minCredits)
}
if q.maxCredits != nil {
params[paramMaxCredits] = strconv.Itoa(*q.maxCredits)
}
if q.courseNumberRange != nil {
params[paramCourseNumberLow] = strconv.Itoa(q.courseNumberRange.Low)
params[paramCourseNumberHigh] = strconv.Itoa(q.courseNumberRange.High)
}
params[paramOffset] = strconv.Itoa(q.offset)
params[paramMaxResults] = strconv.Itoa(q.maxResults)
return params
}
// String returns a string representation of the query, ideal for debugging & logging.
func (q *Query) String() string {
var sb strings.Builder
if q.subject != nil {
fmt.Fprintf(&sb, "subject=%s, ", *q.subject)
}
if q.title != nil {
// Whitespace can prevent valid queries from succeeding
fmt.Fprintf(&sb, "title=%s, ", strings.TrimSpace(*q.title))
}
if q.keywords != nil {
fmt.Fprintf(&sb, "keywords=%s, ", strings.Join(*q.keywords, " "))
}
if q.openOnly != nil {
fmt.Fprintf(&sb, "openOnly=%t, ", *q.openOnly)
}
if q.termPart != nil {
fmt.Fprintf(&sb, "termPart=%s, ", strings.Join(*q.termPart, ","))
}
if q.campus != nil {
fmt.Fprintf(&sb, "campus=%s, ", strings.Join(*q.campus, ","))
}
if q.attributes != nil {
fmt.Fprintf(&sb, "attributes=%s, ", strings.Join(*q.attributes, ","))
}
if q.instructor != nil {
fmt.Fprintf(&sb, "instructor=%s, ", strings.Join(lo.Map(*q.instructor, func(i uint64, _ int) string {
return strconv.FormatUint(i, 10)
}), ","))
}
if q.startTime != nil {
hour, minute, meridiem := FormatTimeParameter(*q.startTime)
fmt.Fprintf(&sb, "startTime=%s:%s%s, ", hour, minute, meridiem)
}
if q.endTime != nil {
hour, minute, meridiem := FormatTimeParameter(*q.endTime)
fmt.Fprintf(&sb, "endTime=%s:%s%s, ", hour, minute, meridiem)
}
if q.minCredits != nil {
fmt.Fprintf(&sb, "minCredits=%d, ", *q.minCredits)
}
if q.maxCredits != nil {
fmt.Fprintf(&sb, "maxCredits=%d, ", *q.maxCredits)
}
if q.courseNumberRange != nil {
fmt.Fprintf(&sb, "courseNumberRange=%d-%d, ", q.courseNumberRange.Low, q.courseNumberRange.High)
}
fmt.Fprintf(&sb, "offset=%d, ", q.offset)
fmt.Fprintf(&sb, "maxResults=%d", q.maxResults)
return sb.String()
}
// Dict returns a map representation of the query, ideal for debugging & logging.
// This dict is represented with zerolog's Event type.
// func (q *Query) Dict() *zerolog.Event {
// }
-64
View File
@@ -1,64 +0,0 @@
package api
import (
"banner/internal"
"net/url"
log "github.com/rs/zerolog/log"
)
// Setup makes the initial requests to set up the session cookies for the application.
func (a *API) Setup() {
// Makes the initial requests that sets up the session cookies for the rest of the application
log.Info().Msg("Setting up session...")
requestQueue := []string{
"/registration/registration",
"/selfServiceMenu/data",
}
for _, path := range requestQueue {
req := a.config.Client.NewRequest().
SetQueryParam("_", internal.Nonce()).
SetExpectResponseContentType("application/json")
res, err := req.Get(path)
if err != nil {
log.Fatal().Stack().Str("path", path).Err(err).Msg("Failed to make request")
}
if res.StatusCode() != 200 {
log.Fatal().Stack().Str("path", path).Int("status", res.StatusCode()).Msg("Failed to make request")
}
}
// Validate that cookies were set
baseURLParsed, err := url.Parse(a.config.BaseURL)
if err != nil {
log.Fatal().Stack().Str("baseURL", a.config.BaseURL).Err(err).Msg("Failed to parse baseURL")
}
currentCookies := a.config.Client.CookieJar().Cookies(baseURLParsed)
requiredCookies := map[string]bool{
"JSESSIONID": false,
"SSB_COOKIE": false,
}
for _, cookie := range currentCookies {
_, present := requiredCookies[cookie.Name]
// Check if this cookie is required
if present {
requiredCookies[cookie.Name] = true
}
}
// Check if all required cookies were set
for cookieName, cookieSet := range requiredCookies {
if !cookieSet {
log.Warn().Str("cookieName", cookieName).Msg("Required cookie not set")
}
}
log.Debug().Msg("All required cookies set, session setup complete")
// TODO: Validate that the session allows access to termSelection
}
-649
View File
@@ -1,649 +0,0 @@
package bot
import (
"banner/internal"
"banner/internal/api"
"banner/internal/models"
"fmt"
"net/url"
"regexp"
"strconv"
"strings"
"time"
"github.com/bwmarrin/discordgo"
"github.com/pkg/errors"
"github.com/rs/zerolog/log"
"github.com/samber/lo"
)
const (
// ICalTimestampLayoutUtc is the formatting layout for timestamps in the UTC timezone.
ICalTimestampLayoutUtc = "20060102T150405Z"
// ICalTimestampLayoutLocal is the formatting layout for timestamps in the local timezone.
ICalTimestampLayoutLocal = "20060102T150405"
)
// CommandHandler is a function that handles a slash command interaction.
type CommandHandler func(b *Bot, s *discordgo.Session, i *discordgo.InteractionCreate) error
var (
// CommandDefinitions is a list of all the bot's command definitions.
CommandDefinitions = []*discordgo.ApplicationCommand{TermCommandDefinition, TimeCommandDefinition, SearchCommandDefinition, IcsCommandDefinition, GCalCommandDefinition}
// CommandHandlers is a map of command names to their handlers.
CommandHandlers = map[string]CommandHandler{
TimeCommandDefinition.Name: TimeCommandHandler,
TermCommandDefinition.Name: TermCommandHandler,
SearchCommandDefinition.Name: SearchCommandHandler,
IcsCommandDefinition.Name: IcsCommandHandler,
GCalCommandDefinition.Name: GCalCommandHandler,
}
)
var SearchCommandDefinition = &discordgo.ApplicationCommand{
Name: "search",
Description: "Search for a course",
Options: []*discordgo.ApplicationCommandOption{
{
Type: discordgo.ApplicationCommandOptionString,
MinLength: internal.GetIntPointer(0),
MaxLength: 48,
Name: "title",
Description: "Course Title (exact, use autocomplete)",
Required: false,
Autocomplete: true,
},
{
Type: discordgo.ApplicationCommandOptionString,
Name: "code",
MinLength: internal.GetIntPointer(4),
Description: "Course Code (e.g. 3743, 3000-3999, 3xxx, 3000-)",
Required: false,
},
{
Type: discordgo.ApplicationCommandOptionInteger,
Name: "max",
Description: "Maximum number of results",
Required: false,
},
{
Type: discordgo.ApplicationCommandOptionString,
Name: "keywords",
Description: "Keywords in Title or Description (space separated)",
},
{
Type: discordgo.ApplicationCommandOptionString,
Name: "instructor",
Description: "Instructor Name",
Required: false,
Autocomplete: true,
},
{
Type: discordgo.ApplicationCommandOptionString,
Name: "subject",
Description: "Subject (e.g. Computer Science/CS, Mathematics/MAT)",
Required: false,
Autocomplete: true,
},
},
}
// SearchCommandHandler handles the /search command, which allows users to search for courses.
func SearchCommandHandler(b *Bot, s *discordgo.Session, i *discordgo.InteractionCreate) error {
data := i.ApplicationCommandData()
query := api.NewQuery().Credits(3, 6)
for _, option := range data.Options {
switch option.Name {
case "title":
query.Title(option.StringValue())
case "code":
var (
low = -1
high = -1
)
var err error
valueRaw := strings.TrimSpace(option.StringValue())
// Partially/fully specified range
if strings.Contains(valueRaw, "-") {
match := regexp.MustCompile(`(\d{1,4})-(\d{1,4})?`).FindSubmatch([]byte(valueRaw))
if match == nil {
return fmt.Errorf("invalid range format: %s", valueRaw)
}
// If not 2 or 3 matches, it's invalid
if len(match) != 3 && len(match) != 4 {
return fmt.Errorf("invalid range format: %s", match[0])
}
low, err = strconv.Atoi(string(match[1]))
if err != nil {
return errors.Wrap(err, "error parsing course code (low)")
}
// If there's not a high value, set it to max (open ended)
if len(match) == 2 || len(match[2]) == 0 {
high = 9999
} else {
high, err = strconv.Atoi(string(match[2]))
if err != nil {
return errors.Wrap(err, "error parsing course code (high)")
}
}
}
// #xxx, ##xx, ###x format (34xx -> 3400-3499)
if strings.Contains(valueRaw, "x") {
if len(valueRaw) != 4 {
return fmt.Errorf("code range format invalid: must be 1 or more digits followed by x's (%s)", valueRaw)
}
match := regexp.MustCompile(`\d{1,}([xX]{1,3})`).Match([]byte(valueRaw))
if !match {
return fmt.Errorf("code range format invalid: must be 1 or more digits followed by x's (%s)", valueRaw)
}
// Replace x's with 0's
low, err = strconv.Atoi(strings.Replace(valueRaw, "x", "0", -1))
if err != nil {
return errors.Wrap(err, "error parsing implied course code (low)")
}
// Replace x's with 9's
high, err = strconv.Atoi(strings.Replace(valueRaw, "x", "9", -1))
if err != nil {
return errors.Wrap(err, "error parsing implied course code (high)")
}
} else if len(valueRaw) == 4 {
// 4 digit code
low, err = strconv.Atoi(valueRaw)
if err != nil {
return errors.Wrap(err, "error parsing course code")
}
high = low
}
if low == -1 || high == -1 {
return fmt.Errorf("course code range invalid (%s)", valueRaw)
}
if low > high {
return fmt.Errorf("course code range is invalid: low is greater than high (%d > %d)", low, high)
}
if low < 1000 || high < 1000 || low > 9999 || high > 9999 {
return fmt.Errorf("course code range is invalid: must be 1000-9999 (%d-%d)", low, high)
}
query.CourseNumbers(low, high)
case "keywords":
query.Keywords(
strings.Split(option.StringValue(), " "),
)
case "max":
query.MaxResults(
min(8, int(option.IntValue())),
)
}
}
term, err := b.GetSession()
if err != nil {
return err
}
courses, err := b.API.Search(term, query, "", false)
if err != nil {
s.InteractionRespond(i.Interaction, &discordgo.InteractionResponse{
Type: discordgo.InteractionResponseChannelMessageWithSource,
Data: &discordgo.InteractionResponseData{
Content: "Error searching for courses",
},
})
return err
}
fetchTime := time.Now()
fields := []*discordgo.MessageEmbedField{}
for _, course := range courses.Data {
// Safe instructor name handling
displayName := "TBA"
if len(course.Faculty) > 0 {
displayName = course.Faculty[0].DisplayName
}
categoryLink := fmt.Sprintf("[%s](https://catalog.utsa.edu/undergraduate/coursedescriptions/%s/)", course.Subject, strings.ToLower(course.Subject))
classLink := fmt.Sprintf("[%s-%s](https://catalog.utsa.edu/search/?P=%s%%20%s)", course.CourseNumber, course.SequenceNumber, course.Subject, course.CourseNumber)
professorLink := fmt.Sprintf("[%s](https://www.ratemyprofessors.com/search/professors/1516?q=%s)", displayName, url.QueryEscape(displayName))
identifierText := fmt.Sprintf("%s %s (CRN %s)\n%s", categoryLink, classLink, course.CourseReferenceNumber, professorLink)
// Safe meeting time handling
meetingTime := "No scheduled meetings"
if len(course.MeetingsFaculty) > 0 {
meetingTime = course.MeetingsFaculty[0].String()
}
fields = append(fields, &discordgo.MessageEmbedField{
Name: "Identifier",
Value: identifierText,
Inline: true,
}, &discordgo.MessageEmbedField{
Name: "Name",
Value: course.CourseTitle,
Inline: true,
}, &discordgo.MessageEmbedField{
Name: "Meeting Time",
Value: meetingTime,
Inline: true,
},
)
}
// Blue if there are results, orange if there are none
color := 0x0073FF
if courses.TotalCount == 0 {
color = 0xFF6500
}
err = s.InteractionRespond(i.Interaction, &discordgo.InteractionResponse{
Type: discordgo.InteractionResponseChannelMessageWithSource,
Data: &discordgo.InteractionResponseData{
Embeds: []*discordgo.MessageEmbed{
{
Footer: internal.GetFetchedFooter(b.Config, fetchTime),
Description: fmt.Sprintf("%d Class%s", courses.TotalCount, internal.Plural(courses.TotalCount)),
Fields: fields[:min(25, len(fields))],
Color: color,
},
},
AllowedMentions: &discordgo.MessageAllowedMentions{},
},
})
return err
}
var TermCommandDefinition = &discordgo.ApplicationCommand{
Name: "terms",
Description: "Guess the current term, or search for a specific term",
Options: []*discordgo.ApplicationCommandOption{
{
Type: discordgo.ApplicationCommandOptionString,
MinLength: internal.GetIntPointer(0),
MaxLength: 8,
Name: "search",
Description: "Term to search for",
Required: false,
},
{
Type: discordgo.ApplicationCommandOptionInteger,
Name: "page",
Description: "Page Number",
Required: false,
MinValue: internal.GetFloatPointer(1),
},
},
}
// TermCommandHandler handles the /terms command, which allows users to search for terms.
func TermCommandHandler(b *Bot, s *discordgo.Session, i *discordgo.InteractionCreate) error {
data := i.ApplicationCommandData()
searchTerm := ""
pageNumber := 1
for _, option := range data.Options {
switch option.Name {
case "search":
searchTerm = option.StringValue()
case "page":
pageNumber = int(option.IntValue())
default:
log.Warn().Str("option", option.Name).Msg("Unexpected option in term command")
}
}
termResult, err := b.API.GetTerms(searchTerm, pageNumber, 25)
if err != nil {
internal.RespondError(s, i.Interaction, "Error while fetching terms", err)
return err
}
fields := []*discordgo.MessageEmbedField{}
for _, t := range termResult {
fields = append(fields, &discordgo.MessageEmbedField{
Name: t.Description,
Value: t.Code,
Inline: true,
})
}
fetchTime := time.Now()
if len(fields) > 25 {
log.Warn().Int("count", len(fields)).Msg("Too many fields in term command (trimmed)")
}
err = s.InteractionRespond(i.Interaction, &discordgo.InteractionResponse{
Type: discordgo.InteractionResponseChannelMessageWithSource,
Data: &discordgo.InteractionResponseData{
Embeds: []*discordgo.MessageEmbed{
{
Footer: internal.GetFetchedFooter(b.Config, fetchTime),
Description: fmt.Sprintf("%d term%s (page %d)", len(termResult), internal.Plural(len(termResult)), pageNumber),
Fields: fields[:min(25, len(fields))],
},
},
AllowedMentions: &discordgo.MessageAllowedMentions{},
},
})
return err
}
var TimeCommandDefinition = &discordgo.ApplicationCommand{
Name: "time",
Description: "Get Class Meeting Time",
Options: []*discordgo.ApplicationCommandOption{
{
Type: discordgo.ApplicationCommandOptionInteger,
Name: "crn",
Description: "Course Reference Number",
Required: true,
},
},
}
// TimeCommandHandler handles the /time command, which allows users to get the meeting times for a course.
func TimeCommandHandler(b *Bot, s *discordgo.Session, i *discordgo.InteractionCreate) error {
fetchTime := time.Now()
crn := i.ApplicationCommandData().Options[0].IntValue()
// Fix static term
meetingTimes, err := b.API.GetCourseMeetingTime(202510, int(crn))
if err != nil {
s.InteractionRespond(i.Interaction, &discordgo.InteractionResponse{
Type: discordgo.InteractionResponseChannelMessageWithSource,
Data: &discordgo.InteractionResponseData{
Content: "Error getting meeting time",
},
})
return err
}
if len(meetingTimes) == 0 {
s.InteractionRespond(i.Interaction, &discordgo.InteractionResponse{
Type: discordgo.InteractionResponseChannelMessageWithSource,
Data: &discordgo.InteractionResponseData{
Content: "No meeting times found for this course",
},
})
return fmt.Errorf("no meeting times found for CRN %d", crn)
}
meetingTime := meetingTimes[0]
duration := meetingTime.EndTime().Sub(meetingTime.StartTime())
s.InteractionRespond(i.Interaction, &discordgo.InteractionResponse{
Type: discordgo.InteractionResponseChannelMessageWithSource,
Data: &discordgo.InteractionResponseData{
Embeds: []*discordgo.MessageEmbed{
{
Footer: internal.GetFetchedFooter(b.Config, fetchTime),
Description: "",
Fields: []*discordgo.MessageEmbedField{
{
Name: "Start Date",
Value: meetingTime.StartDay().Format("Monday, January 2, 2006"),
},
{
Name: "End Date",
Value: meetingTime.EndDay().Format("Monday, January 2, 2006"),
},
{
Name: "Start/End Time",
Value: fmt.Sprintf("%s - %s (%d min)", meetingTime.StartTime().String(), meetingTime.EndTime().String(), int64(duration.Minutes())),
},
{
Name: "Days of Week",
Value: internal.WeekdaysToString(meetingTime.Days()),
},
},
},
},
AllowedMentions: &discordgo.MessageAllowedMentions{},
},
})
return nil
}
var IcsCommandDefinition = &discordgo.ApplicationCommand{
Name: "ics",
Description: "Generate an ICS file for a course",
Options: []*discordgo.ApplicationCommandOption{
{
Type: discordgo.ApplicationCommandOptionInteger,
Name: "crn",
Description: "Course Reference Number",
Required: true,
},
},
}
var GCalCommandDefinition = &discordgo.ApplicationCommand{
Name: "gcal",
Description: "Generate a link to create a Google Calendar event for a course",
Options: []*discordgo.ApplicationCommandOption{
{
Type: discordgo.ApplicationCommandOptionInteger,
Name: "crn",
Description: "Course Reference Number",
Required: true,
},
},
}
// GCalCommandHandler handles the /gcal command, which allows users to generate a link to create a Google Calendar event for a course.
func GCalCommandHandler(b *Bot, s *discordgo.Session, i *discordgo.InteractionCreate) error {
// Parse all options
options := internal.ParseOptions(i.ApplicationCommandData().Options)
crn := options.GetInt("crn")
course, err := b.API.GetCourse(strconv.Itoa(int(crn)))
if err != nil {
return fmt.Errorf("Error retrieving course data: %w", err)
}
meetingTimes, err := b.API.GetCourseMeetingTime(202510, int(crn))
if err != nil {
return fmt.Errorf("Error requesting meeting time: %w", err)
}
if len(meetingTimes) == 0 {
return fmt.Errorf("unexpected - no meeting time data found for course")
}
// Check if the course has any meeting times
meetingTime, exists := lo.Find(meetingTimes, func(mt models.MeetingTimeResponse) bool {
switch mt.MeetingTime.MeetingType {
case "ID", "OA":
return false
default:
return true
}
})
if !exists {
internal.RespondError(s, i.Interaction, "The course requested does not meet at a defined moment in time.", nil)
return nil
}
startDay := meetingTime.StartDay()
startTime := meetingTime.StartTime()
endTime := meetingTime.EndTime()
// Create timestamps in UTC
dtStart := time.Date(startDay.Year(), startDay.Month(), startDay.Day(), int(startTime.Hours), int(startTime.Minutes), 0, 0, b.Config.CentralTimeLocation)
dtEnd := time.Date(startDay.Year(), startDay.Month(), startDay.Day(), int(endTime.Hours), int(endTime.Minutes), 0, 0, b.Config.CentralTimeLocation)
// Format times in UTC for Google Calendar
startStr := dtStart.UTC().Format(ICalTimestampLayoutUtc)
endStr := dtEnd.UTC().Format(ICalTimestampLayoutUtc)
// Generate RRULE for recurrence
rrule := meetingTime.RRule()
recurRule := fmt.Sprintf("FREQ=WEEKLY;BYDAY=%s;UNTIL=%s", rrule.ByDay, rrule.Until)
// Build calendar URL
params := url.Values{}
params.Add("action", "TEMPLATE")
params.Add("text", fmt.Sprintf("%s %s - %s", course.Subject, course.CourseNumber, course.CourseTitle))
params.Add("dates", fmt.Sprintf("%s/%s", startStr, endStr))
params.Add("details", fmt.Sprintf("CRN: %s\nInstructor: %s\nDays: %s", course.CourseReferenceNumber, meetingTime.Faculty[0].DisplayName, internal.WeekdaysToString(meetingTime.Days())))
params.Add("location", meetingTime.PlaceString())
params.Add("trp", "true")
params.Add("ctz", b.Config.CentralTimeLocation.String())
params.Add("recur", "RRULE:"+recurRule)
calendarURL := "https://calendar.google.com/calendar/render?" + params.Encode()
err = s.InteractionRespond(i.Interaction, &discordgo.InteractionResponse{
Type: discordgo.InteractionResponseChannelMessageWithSource,
Data: &discordgo.InteractionResponseData{
Content: fmt.Sprintf("[Add to Google Calendar](<%s>)", calendarURL),
AllowedMentions: &discordgo.MessageAllowedMentions{},
},
})
return err
}
// IcsCommandHandler handles the /ics command, which allows users to generate an ICS file for a course.
func IcsCommandHandler(b *Bot, s *discordgo.Session, i *discordgo.InteractionCreate) error {
// Parse all options
options := internal.ParseOptions(i.ApplicationCommandData().Options)
crn := options.GetInt("crn")
course, err := b.API.GetCourse(strconv.Itoa(int(crn)))
if err != nil {
return fmt.Errorf("Error retrieving course data: %w", err)
}
// Fix static term
meetingTimes, err := b.API.GetCourseMeetingTime(202510, int(crn))
if err != nil {
return fmt.Errorf("Error requesting meeting time: %w", err)
}
if len(meetingTimes) == 0 {
return fmt.Errorf("unexpected - no meeting time data found for course")
}
// Check if the course has any meeting times
_, exists := lo.Find(meetingTimes, func(mt models.MeetingTimeResponse) bool {
switch mt.MeetingTime.MeetingType {
case "ID", "OA":
return false
default:
return true
}
})
if !exists {
log.Warn().Str("crn", course.CourseReferenceNumber).Msg("Non-meeting course requested for ICS file")
internal.RespondError(s, i.Interaction, "The course requested does not meet at a defined moment in time.", nil)
return nil
}
events := []string{}
for _, meeting := range meetingTimes {
now := time.Now().In(b.Config.CentralTimeLocation)
uid := fmt.Sprintf("%d-%s@ical.banner.xevion.dev", now.Unix(), meeting.CourseReferenceNumber)
startDay := meeting.StartDay()
startTime := meeting.StartTime()
endTime := meeting.EndTime()
dtStart := time.Date(startDay.Year(), startDay.Month(), startDay.Day(), int(startTime.Hours), int(startTime.Minutes), 0, 0, b.Config.CentralTimeLocation)
dtEnd := time.Date(startDay.Year(), startDay.Month(), startDay.Day(), int(endTime.Hours), int(endTime.Minutes), 0, 0, b.Config.CentralTimeLocation)
// endDay := meeting.EndDay()
// until := time.Date(endDay.Year(), endDay.Month(), endDay.Day(), 23, 59, 59, 0, b.Config.CentralTimeLocation)
summary := fmt.Sprintf("%s %s %s", course.Subject, course.CourseNumber, course.CourseTitle)
// Safe instructor name handling
instructorName := "TBA"
if len(course.Faculty) > 0 {
instructorName = course.Faculty[0].DisplayName
}
description := fmt.Sprintf("Instructor: %s\nSection: %s\nCRN: %s", instructorName, course.SequenceNumber, meeting.CourseReferenceNumber)
location := meeting.PlaceString()
rrule := meeting.RRule()
event := fmt.Sprintf(`BEGIN:VEVENT
DTSTAMP:%s
UID:%s
DTSTART;TZID=America/Chicago:%s
RRULE:FREQ=WEEKLY;BYDAY=%s;UNTIL=%s
DTEND;TZID=America/Chicago:%s
SUMMARY:%s
DESCRIPTION:%s
LOCATION:%s
END:VEVENT`, now.Format(ICalTimestampLayoutLocal), uid, dtStart.Format(ICalTimestampLayoutLocal), rrule.ByDay, rrule.Until, dtEnd.Format(ICalTimestampLayoutLocal), summary, strings.Replace(description, "\n", `\n`, -1), location)
events = append(events, event)
}
// TODO: Make this dynamically requested, parsed & cached from tzurl.org
vTimezone := `BEGIN:VTIMEZONE
TZID:America/Chicago
LAST-MODIFIED:20231222T233358Z
TZURL:https://www.tzurl.org/zoneinfo-outlook/America/Chicago
X-LIC-LOCATION:America/Chicago
BEGIN:DAYLIGHT
TZNAME:CDT
TZOFFSETFROM:-0600
TZOFFSETTO:-0500
DTSTART:19700308T020000
RRULE:FREQ=YEARLY;BYMONTH=3;BYDAY=2SU
END:DAYLIGHT
BEGIN:STANDARD
TZNAME:CST
TZOFFSETFROM:-0500
TZOFFSETTO:-0600
DTSTART:19701101T020000
RRULE:FREQ=YEARLY;BYMONTH=11;BYDAY=1SU
END:STANDARD
END:VTIMEZONE`
ics := fmt.Sprintf(`BEGIN:VCALENDAR
VERSION:2.0
PRODID:-//xevion//Banner Discord Bot//EN
CALSCALE:GREGORIAN
%s
%s
END:VCALENDAR`, vTimezone, strings.Join(events, "\n"))
s.InteractionRespond(i.Interaction, &discordgo.InteractionResponse{
Type: discordgo.InteractionResponseChannelMessageWithSource,
Data: &discordgo.InteractionResponseData{
Files: []*discordgo.File{
{
Name: fmt.Sprintf("%s-%s-%s_%s.ics", course.Subject, course.CourseNumber, course.SequenceNumber, course.CourseReferenceNumber),
ContentType: "text/calendar",
Reader: strings.NewReader(ics),
},
},
AllowedMentions: &discordgo.MessageAllowedMentions{},
},
})
return nil
}
-91
View File
@@ -1,91 +0,0 @@
package bot
import (
"banner/internal"
"fmt"
"github.com/bwmarrin/discordgo"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
// RegisterHandlers registers the bot's command handlers.
func (b *Bot) RegisterHandlers() {
b.Session.AddHandler(func(internalSession *discordgo.Session, interaction *discordgo.InteractionCreate) {
// Handle commands during restart (highly unlikely, but just in case)
if b.isClosing {
err := internal.RespondError(internalSession, interaction.Interaction, "Bot is currently restarting, try again later.", nil)
if err != nil {
log.Error().Err(err).Msg("Failed to respond with restart error feedback")
}
return
}
name := interaction.ApplicationCommandData().Name
if handler, ok := CommandHandlers[name]; ok {
// Build dict of options for the log
options := zerolog.Dict()
for _, option := range interaction.ApplicationCommandData().Options {
options.Str(option.Name, fmt.Sprintf("%v", option.Value))
}
event := log.Info().Str("name", name).Str("user", internal.GetUser(interaction).Username).Dict("options", options)
// If the command was invoked in a guild, add guild & channel info to the log
if interaction.Member != nil {
guild := zerolog.Dict()
guild.Str("id", interaction.GuildID)
guild.Str("name", internal.GetGuildName(b.Config, internalSession, interaction.GuildID))
event.Dict("guild", guild)
channel := zerolog.Dict()
channel.Str("id", interaction.ChannelID)
guild.Str("name", internal.GetChannelName(b.Config, internalSession, interaction.ChannelID))
event.Dict("channel", channel)
} else {
// If the command was invoked in a DM, add the user info to the log
user := zerolog.Dict()
user.Str("id", interaction.User.ID)
user.Str("name", interaction.User.Username)
event.Dict("user", user)
}
// Log command invocation
event.Msg("Command Invoked")
// Prepare to recover
defer func() {
if err := recover(); err != nil {
log.Error().Stack().Str("commandName", name).Interface("detail", err).Msg("Command Handler Panic")
// Respond with error
err := internal.RespondError(internalSession, interaction.Interaction, "Unexpected Error: command handler panic", nil)
if err != nil {
log.Error().Stack().Str("commandName", name).Err(err).Msg("Failed to respond with panic error feedback")
}
}
}()
// Call handler
err := handler(b, internalSession, interaction)
// Log & respond error
if err != nil {
// TODO: Find a way to merge the response with the handler's error
log.Error().Str("commandName", name).Err(err).Msg("Command Handler Error")
// Respond with error
err = internal.RespondError(internalSession, interaction.Interaction, fmt.Sprintf("Unexpected Error: %s", err.Error()), nil)
if err != nil {
log.Error().Stack().Str("commandName", name).Err(err).Msg("Failed to respond with error feedback")
}
}
} else {
log.Error().Stack().Str("commandName", name).Msg("Command Interaction Has No Handler")
// Respond with error
internal.RespondError(internalSession, interaction.Interaction, "Unexpected Error: interaction has no handler", nil)
}
})
}
-44
View File
@@ -1,44 +0,0 @@
// Package bot provides the core functionality for the Discord bot.
package bot
import (
"banner/internal/api"
"banner/internal/config"
"fmt"
"time"
"github.com/bwmarrin/discordgo"
"github.com/rs/zerolog/log"
)
// Bot represents the state of the Discord bot.
type Bot struct {
Session *discordgo.Session
API *api.API
Config *config.Config
isClosing bool
}
// New creates a new Bot instance.
func New(s *discordgo.Session, a *api.API, c *config.Config) *Bot {
return &Bot{Session: s, API: a, Config: c}
}
// SetClosing marks the bot as closing, preventing new commands from being processed.
func (b *Bot) SetClosing() {
b.isClosing = true
}
// GetSession ensures a valid session is available and selects the default term.
func (b *Bot) GetSession() (string, error) {
sessionID := b.API.EnsureSession()
term := b.API.DefaultTerm(time.Now()).ToString()
log.Info().Str("term", term).Str("sessionID", sessionID).Msg("Setting selected term")
err := b.API.SelectTerm(term, sessionID)
if err != nil {
return "", fmt.Errorf("failed to select term while generating session ID: %w", err)
}
return sessionID, nil
}
-72
View File
@@ -1,72 +0,0 @@
package config
import (
"context"
"time"
"github.com/redis/go-redis/v9"
"resty.dev/v3"
)
// Config holds the application's configuration.
type Config struct {
// Ctx is the application's root context.
Ctx context.Context
// CancelFunc cancels the application's root context.
CancelFunc context.CancelFunc
// KV provides access to the Redis cache.
KV *redis.Client
// Client is the HTTP client for making API requests.
Client *resty.Client
// IsDevelopment is true if the application is running in a development environment.
IsDevelopment bool
// BaseURL is the base URL for the Banner API.
BaseURL string
// Environment is the application's running environment (e.g. "development").
Environment string
// CentralTimeLocation is the time.Location for US Central Time.
CentralTimeLocation *time.Location
// SeasonRanges is the time.Location for US Central Time.
SeasonRanges *SeasonRanges
}
// New creates a new Config instance with a cancellable context.
func New() (*Config, error) {
ctx, cancel := context.WithCancel(context.Background())
loc, err := time.LoadLocation("America/Chicago")
if err != nil {
cancel()
return nil, err
}
seasonRanges := GetYearDayRange(loc, uint16(time.Now().Year()))
return &Config{
Ctx: ctx,
CancelFunc: cancel,
CentralTimeLocation: loc,
SeasonRanges: &seasonRanges,
}, nil
}
// SetBaseURL sets the base URL for the Banner API.
func (c *Config) SetBaseURL(url string) {
c.BaseURL = url
}
// SetEnvironment sets the application's environment.
func (c *Config) SetEnvironment(env string) {
c.Environment = env
c.IsDevelopment = env == "development"
}
// SetClient sets the Resty client for making HTTP requests.
func (c *Config) SetClient(client *resty.Client) {
c.Client = client
}
// SetRedis sets the Redis client for caching.
func (c *Config) SetRedis(r *redis.Client) {
c.KV = r
}
-71
View File
@@ -1,71 +0,0 @@
// Package config provides the configuration and logging setup for the application.
package config
import (
"io"
"os"
"github.com/rs/zerolog"
)
const timeFormat = "2006-01-02 15:04:05"
// NewConsoleWriter creates a new console writer that splits logs between stdout and stderr.
func NewConsoleWriter() zerolog.LevelWriter {
return &ConsoleLogSplitter{
stdConsole: zerolog.ConsoleWriter{
Out: os.Stdout,
TimeFormat: timeFormat,
NoColor: false,
PartsOrder: []string{zerolog.TimestampFieldName, zerolog.LevelFieldName, zerolog.MessageFieldName},
PartsExclude: []string{},
FieldsExclude: []string{},
},
errConsole: zerolog.ConsoleWriter{
Out: os.Stderr,
TimeFormat: timeFormat,
NoColor: false,
PartsOrder: []string{zerolog.TimestampFieldName, zerolog.LevelFieldName, zerolog.MessageFieldName},
PartsExclude: []string{},
FieldsExclude: []string{},
},
}
}
// ConsoleLogSplitter is a zerolog.LevelWriter that writes to stdout for info/debug logs and stderr for warn/error logs, with console-friendly formatting.
type ConsoleLogSplitter struct {
stdConsole zerolog.ConsoleWriter
errConsole zerolog.ConsoleWriter
}
// Write is a passthrough to the standard console writer and should not be called directly.
func (c *ConsoleLogSplitter) Write(p []byte) (n int, err error) {
return c.stdConsole.Write(p)
}
// WriteLevel writes to the appropriate output (stdout or stderr) with console formatting based on the log level.
func (c *ConsoleLogSplitter) WriteLevel(level zerolog.Level, p []byte) (n int, err error) {
if level <= zerolog.WarnLevel {
return c.stdConsole.Write(p)
}
return c.errConsole.Write(p)
}
// LogSplitter is a zerolog.LevelWriter that writes to stdout for info/debug logs and stderr for warn/error logs.
type LogSplitter struct {
Std io.Writer
Err io.Writer
}
// Write is a passthrough to the standard writer and should not be called directly.
func (l LogSplitter) Write(p []byte) (n int, err error) {
return l.Std.Write(p)
}
// WriteLevel writes to the appropriate output (stdout or stderr) based on the log level.
func (l LogSplitter) WriteLevel(level zerolog.Level, p []byte) (n int, err error) {
if level <= zerolog.WarnLevel {
return l.Std.Write(p)
}
return l.Err.Write(p)
}
-140
View File
@@ -1,140 +0,0 @@
package config
import (
"fmt"
"strconv"
"time"
)
// Term selection should yield smart results based on the current time, as well as the input provided.
// Fall 2024, "spring" => Spring 2025
// Fall 2024, "fall" => Fall 2025
// Summer 2024, "fall" => Fall 2024
const (
// Fall is the first term of the school year.
Fall = iota
// Spring is the second term of the school year.
Spring
// Summer is the third term of the school year.
Summer
)
// Term represents a school term, consisting of a year and a season.
type Term struct {
Year uint16
Season uint8
}
// SeasonRanges represents the start and end day of each term within a year.
type SeasonRanges struct {
Spring YearDayRange
Summer YearDayRange
Fall YearDayRange
}
// YearDayRange represents the start and end day of a term within a year.
type YearDayRange struct {
Start uint16
End uint16
}
// GetYearDayRange returns the start and end day of each term for the given year.
// The ranges are inclusive of the start day and exclusive of the end day.
func GetYearDayRange(loc *time.Location, year uint16) SeasonRanges {
springStart := time.Date(int(year), time.January, 14, 0, 0, 0, 0, loc).YearDay()
springEnd := time.Date(int(year), time.May, 1, 0, 0, 0, 0, loc).YearDay()
summerStart := time.Date(int(year), time.May, 25, 0, 0, 0, 0, loc).YearDay()
summerEnd := time.Date(int(year), time.August, 15, 0, 0, 0, 0, loc).YearDay()
fallStart := time.Date(int(year), time.August, 18, 0, 0, 0, 0, loc).YearDay()
fallEnd := time.Date(int(year), time.December, 10, 0, 0, 0, 0, loc).YearDay()
return SeasonRanges{
Spring: YearDayRange{
Start: uint16(springStart),
End: uint16(springEnd),
},
Summer: YearDayRange{
Start: uint16(summerStart),
End: uint16(summerEnd),
},
Fall: YearDayRange{
Start: uint16(fallStart),
End: uint16(fallEnd),
},
}
}
// GetCurrentTerm returns the current and next terms based on the provided time.
// The current term can be nil if the time falls between terms.
// The 'year' in the term corresponds to the academic year, which may differ from the calendar year.
func GetCurrentTerm(ranges SeasonRanges, now time.Time) (*Term, *Term) {
literalYear := uint16(now.Year())
dayOfYear := uint16(now.YearDay())
// If we're past the end of the summer term, we're 'in' the next school year.
var termYear uint16
if dayOfYear > ranges.Summer.End {
termYear = literalYear + 1
} else {
termYear = literalYear
}
if (dayOfYear < ranges.Spring.Start) || (dayOfYear >= ranges.Fall.End) {
// Fall over, Spring not yet begun
return nil, &Term{Year: termYear, Season: Spring}
} else if (dayOfYear >= ranges.Spring.Start) && (dayOfYear < ranges.Spring.End) {
// Spring
return &Term{Year: termYear, Season: Spring}, &Term{Year: termYear, Season: Summer}
} else if dayOfYear < ranges.Summer.Start {
// Spring over, Summer not yet begun
return nil, &Term{Year: termYear, Season: Summer}
} else if (dayOfYear >= ranges.Summer.Start) && (dayOfYear < ranges.Summer.End) {
// Summer
return &Term{Year: termYear, Season: Summer}, &Term{Year: termYear, Season: Fall}
} else if dayOfYear < ranges.Fall.Start {
// Summer over, Fall not yet begun
return nil, &Term{Year: termYear, Season: Fall}
} else if (dayOfYear >= ranges.Fall.Start) && (dayOfYear < ranges.Fall.End) {
// Fall
return &Term{Year: termYear, Season: Fall}, nil
}
panic(fmt.Sprintf("Impossible Code Reached (dayOfYear: %d)", dayOfYear))
}
// ParseTerm converts a Banner term code string to a Term struct.
func ParseTerm(code string) Term {
year, _ := strconv.ParseUint(code[0:4], 10, 16)
var season uint8
termCode := code[4:6]
switch termCode {
case "10":
season = Fall
case "20":
season = Spring
case "30":
season = Summer
}
return Term{
Year: uint16(year),
Season: season,
}
}
// ToString converts a Term struct to a Banner term code string.
func (term Term) ToString() string {
var season string
switch term.Season {
case Fall:
season = "10"
case Spring:
season = "20"
case Summer:
season = "30"
}
return fmt.Sprintf("%d%s", term.Year, season)
}
-13
View File
@@ -1,13 +0,0 @@
package internal
import "fmt"
// UnexpectedContentTypeError is returned when the Content-Type header of a response does not match the expected value.
type UnexpectedContentTypeError struct {
Expected string
Actual string
}
func (e *UnexpectedContentTypeError) Error() string {
return fmt.Sprintf("Expected content type '%s', received '%s'", e.Expected, e.Actual)
}
-376
View File
@@ -1,376 +0,0 @@
package internal
import (
"fmt"
"io"
"math/rand"
"net/http"
"net/url"
"os"
"runtime"
"sort"
"strconv"
"strings"
"time"
"github.com/bwmarrin/discordgo"
"github.com/rs/zerolog"
log "github.com/rs/zerolog/log"
"resty.dev/v3"
"banner/internal/config"
)
// Options is a map of options from a Discord command.
type Options map[string]*discordgo.ApplicationCommandInteractionDataOption
// GetInt returns the integer value of an option, or 0 if it doesn't exist.
func (o Options) GetInt(key string) int64 {
if opt, ok := o[key]; ok {
return opt.IntValue()
}
return 0
}
// ParseOptions parses slash command options into a map for easier access.
func ParseOptions(options []*discordgo.ApplicationCommandInteractionDataOption) Options {
optionMap := make(Options)
for _, opt := range options {
optionMap[opt.Name] = opt
}
return optionMap
}
// AddUserAgent adds a consistent user agent to the request to mimic a real browser.
func AddUserAgent(req *http.Request) {
req.Header.Add("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36")
}
// ContentTypeMatch checks if a Resty response has the given content type.
func ContentTypeMatch(res *resty.Response, expectedContentType string) bool {
contentType := res.Header().Get("Content-Type")
if contentType == "" {
return expectedContentType == "application/octect-stream"
}
return strings.HasPrefix(contentType, expectedContentType)
}
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
// RandomString returns a random string of length n.
// The character set is chosen to mimic Ellucian's Banner session ID generation.
func RandomString(n int) string {
b := make([]byte, n)
for i := range b {
b[i] = letterBytes[rand.Intn(len(letterBytes))]
}
return string(b)
}
// DiscordGoLogger is a helper function that implements discordgo's logging interface, directing all logs to zerolog.
func DiscordGoLogger(msgL, caller int, format string, a ...interface{}) {
pc, file, line, _ := runtime.Caller(caller)
files := strings.Split(file, "/")
file = files[len(files)-1]
name := runtime.FuncForPC(pc).Name()
fns := strings.Split(name, ".")
name = fns[len(fns)-1]
msg := fmt.Sprintf(format, a...)
var event *zerolog.Event
switch msgL {
case 0:
event = log.Debug()
case 1:
event = log.Info()
case 2:
event = log.Warn()
case 3:
event = log.Error()
default:
event = log.Info()
}
event.Str("file", file).Int("line", line).Str("function", name).Msg(msg)
}
// Nonce returns the current time in milliseconds since the Unix epoch as a string.
// This is typically used as a query parameter to prevent request caching.
func Nonce() string {
return strconv.Itoa(int(time.Now().UnixMilli()))
}
// Plural returns "s" if n is not 1.
func Plural(n int) string {
if n == 1 {
return ""
}
return "s"
}
// Plurale returns "es" if n is not 1.
func Plurale(n int) string {
if n == 1 {
return ""
}
return "es"
}
// WeekdaysToString converts a map of weekdays to a compact string representation (e.g., "MWF").
func WeekdaysToString(days map[time.Weekday]bool) string {
// If no days are present
numDays := len(days)
if numDays == 0 {
return "None"
}
// If all days are present
if numDays == 7 {
return "Everyday"
}
str := ""
if days[time.Monday] {
str += "M"
}
if days[time.Tuesday] {
str += "Tu"
}
if days[time.Wednesday] {
str += "W"
}
if days[time.Thursday] {
str += "Th"
}
if days[time.Friday] {
str += "F"
}
if days[time.Saturday] {
str += "Sa"
}
if days[time.Sunday] {
str += "Su"
}
return str
}
// NaiveTime represents a time of day without a date or timezone.
type NaiveTime struct {
Hours uint
Minutes uint
}
// Sub returns the duration between two NaiveTime instances.
func (nt *NaiveTime) Sub(other *NaiveTime) time.Duration {
return time.Hour*time.Duration(nt.Hours-other.Hours) + time.Minute*time.Duration(nt.Minutes-other.Minutes)
}
// ParseNaiveTime converts an integer representation of time (e.g., 1430) to a NaiveTime struct.
func ParseNaiveTime(integer uint64) *NaiveTime {
minutes := uint(integer % 100)
hours := uint(integer / 100)
return &NaiveTime{Hours: hours, Minutes: minutes}
}
// String returns a string representation of the NaiveTime in 12-hour format (e.g., "2:30PM").
func (nt NaiveTime) String() string {
meridiem := "AM"
hour := nt.Hours
if nt.Hours >= 12 {
meridiem = "PM"
if nt.Hours > 12 {
hour -= 12
}
}
return fmt.Sprintf("%d:%02d%s", hour, nt.Minutes, meridiem)
}
// GetFirstEnv returns the value of the first environment variable that is set.
func GetFirstEnv(key ...string) string {
for _, k := range key {
if v := os.Getenv(k); v != "" {
return v
}
}
return ""
}
// GetIntPointer returns a pointer to the given integer.
func GetIntPointer(value int) *int {
return &value
}
// GetFloatPointer returns a pointer to the given float.
func GetFloatPointer(value float64) *float64 {
return &value
}
var extensionMap = map[string]string{
"text/plain": "txt",
"application/json": "json",
"text/html": "html",
"text/css": "css",
"text/csv": "csv",
"text/calendar": "ics",
"text/markdown": "md",
"text/xml": "xml",
"text/yaml": "yaml",
"text/javascript": "js",
"text/vtt": "vtt",
"image/jpeg": "jpg",
"image/png": "png",
"image/gif": "gif",
"image/webp": "webp",
"image/tiff": "tiff",
"image/svg+xml": "svg",
"image/bmp": "bmp",
"image/vnd.microsoft.icon": "ico",
"image/x-icon": "ico",
"image/x-xbitmap": "xbm",
"image/x-xpixmap": "xpm",
"image/x-xwindowdump": "xwd",
"image/avif": "avif",
"image/apng": "apng",
"image/jxl": "jxl",
}
// GuessExtension guesses the file extension for a given content type.
func GuessExtension(contentType string) string {
ext, ok := extensionMap[strings.ToLower(contentType)]
if !ok {
return ""
}
return ext
}
// DumpResponse dumps the body of a Resty response to a file for debugging.
func DumpResponse(res *resty.Response) {
contentType := res.Header().Get("Content-Type")
ext := GuessExtension(contentType)
// Use current time as filename + /dumps/ prefix
filename := fmt.Sprintf("dumps/%d.%s", time.Now().Unix(), ext)
file, err := os.Create(filename)
if err != nil {
log.Err(err).Stack().Msg("Error creating file")
return
}
defer file.Close()
body, err := io.ReadAll(res.Body)
if err != nil {
log.Err(err).Stack().Msg("Error reading response body")
return
}
_, err = file.Write(body)
if err != nil {
log.Err(err).Stack().Msg("Error writing response body")
return
}
log.Info().Str("filename", filename).Str("content-type", contentType).Msg("Dumped response body")
}
// RespondError responds to an interaction with a formatted error message.
func RespondError(session *discordgo.Session, interaction *discordgo.Interaction, message string, err error) error {
// Optional: log the error
if err != nil {
log.Err(err).Stack().Msg(message)
}
return session.InteractionRespond(interaction, &discordgo.InteractionResponse{
Type: discordgo.InteractionResponseChannelMessageWithSource,
Data: &discordgo.InteractionResponseData{
Embeds: []*discordgo.MessageEmbed{
{
Footer: &discordgo.MessageEmbedFooter{
Text: fmt.Sprintf("Occurred at %s", time.Now().Format("Monday, January 2, 2006 at 3:04:05PM")),
},
Description: message,
Color: 0xff0000,
},
},
AllowedMentions: &discordgo.MessageAllowedMentions{},
},
})
}
// GetFetchedFooter returns a standard footer for embeds, indicating when the data was fetched.
func GetFetchedFooter(cfg *config.Config, time time.Time) *discordgo.MessageEmbedFooter {
return &discordgo.MessageEmbedFooter{
Text: fmt.Sprintf("Fetched at %s", time.In(cfg.CentralTimeLocation).Format("Monday, January 2, 2006 at 3:04:05PM")),
}
}
// GetUser returns the user from an interaction, regardless of whether it was in a guild or a DM.
func GetUser(interaction *discordgo.InteractionCreate) *discordgo.User {
// If the interaction is in a guild, the user is in the Member field
if interaction.Member != nil {
return interaction.Member.User
}
// If the interaction is in a DM, the user is in the User field
return interaction.User
}
// EncodeParams encodes a map of parameters into a URL-encoded string, sorted by key.
func EncodeParams(params map[string]*[]string) string {
// Escape hatch for nil
if params == nil {
return ""
}
// Sort the keys
keys := make([]string, 0, len(params))
for k := range params {
keys = append(keys, k)
}
sort.Strings(keys)
var buf strings.Builder
for _, k := range keys {
// Multiple values are allowed, so extract the slice & prepare the key
values := params[k]
keyEscaped := url.QueryEscape(k)
for _, v := range *values {
// If any parameters have been written, add the ampersand
if buf.Len() > 0 {
buf.WriteByte('&')
}
// Write the key and value
buf.WriteString(keyEscaped)
buf.WriteByte('=')
buf.WriteString(url.QueryEscape(v))
}
}
return buf.String()
}
// Point represents a point in 2D space.
type Point struct {
X, Y float64
}
// Slope calculates the y-coordinate of a point on a line given two other points and an x-coordinate.
func Slope(p1 Point, p2 Point, x float64) Point {
slope := (p2.Y - p1.Y) / (p2.X - p1.X)
newY := slope*(x-p1.X) + p1.Y
return Point{X: x, Y: newY}
}
-96
View File
@@ -1,96 +0,0 @@
// Package internal provides shared functionality for the banner application.
package internal
import (
"banner/internal/config"
"context"
"time"
"github.com/bwmarrin/discordgo"
"github.com/redis/go-redis/v9"
log "github.com/rs/zerolog/log"
)
// GetGuildName returns the name of a guild by its ID, using Redis for caching.
func GetGuildName(cfg *config.Config, session *discordgo.Session, guildID string) string {
// Create a timeout context for Redis operations
ctx, cancel := context.WithTimeout(cfg.Ctx, 5*time.Second)
defer cancel()
// Check Redis for the guild name
guildName, err := cfg.KV.Get(ctx, "guild:"+guildID+":name").Result()
if err != nil && err != redis.Nil {
log.Error().Stack().Err(err).Msg("Error getting guild name from Redis")
return "err"
}
// If the guild name is invalid (1 character long), then return "unknown"
if len(guildName) == 1 {
return "unknown"
}
// If the guild name isn't in Redis, get it from Discord and cache it
guild, err := session.Guild(guildID)
if err != nil {
log.Error().Stack().Err(err).Msg("Error getting guild name")
// Store an invalid value in Redis so we don't keep trying to get the guild name
ctx2, cancel2 := context.WithTimeout(cfg.Ctx, 5*time.Second)
defer cancel2()
_, err := cfg.KV.Set(ctx2, "guild:"+guildID+":name", "x", time.Minute*5).Result()
if err != nil {
log.Error().Stack().Err(err).Msg("Error setting false guild name in Redis")
}
return "unknown"
}
// Cache the guild name in Redis
ctx3, cancel3 := context.WithTimeout(cfg.Ctx, 5*time.Second)
defer cancel3()
cfg.KV.Set(ctx3, "guild:"+guildID+":name", guild.Name, time.Hour*3)
return guild.Name
}
// GetChannelName returns the name of a channel by its ID, using Redis for caching.
func GetChannelName(cfg *config.Config, session *discordgo.Session, channelID string) string {
// Create a timeout context for Redis operations
ctx, cancel := context.WithTimeout(cfg.Ctx, 5*time.Second)
defer cancel()
// Check Redis for the channel name
channelName, err := cfg.KV.Get(ctx, "channel:"+channelID+":name").Result()
if err != nil && err != redis.Nil {
log.Error().Stack().Err(err).Msg("Error getting channel name from Redis")
return "err"
}
// If the channel name is invalid (1 character long), then return "unknown"
if len(channelName) == 1 {
return "unknown"
}
// If the channel name isn't in Redis, get it from Discord and cache it
channel, err := session.Channel(channelID)
if err != nil {
log.Error().Stack().Err(err).Msg("Error getting channel name")
// Store an invalid value in Redis so we don't keep trying to get the channel name
ctx2, cancel2 := context.WithTimeout(cfg.Ctx, 5*time.Second)
defer cancel2()
_, err := cfg.KV.Set(ctx2, "channel:"+channelID+":name", "x", time.Minute*5).Result()
if err != nil {
log.Error().Stack().Err(err).Msg("Error setting false channel name in Redis")
}
return "unknown"
}
// Cache the channel name in Redis
ctx3, cancel3 := context.WithTimeout(cfg.Ctx, 5*time.Second)
defer cancel3()
cfg.KV.Set(ctx3, "channel:"+channelID+":name", channel.Name, time.Hour*3)
return channel.Name
}
-323
View File
@@ -1,323 +0,0 @@
// Package models provides the data structures for the Banner API.
package models
import (
"banner/internal"
"encoding/json"
"fmt"
"strconv"
"strings"
"time"
log "github.com/rs/zerolog/log"
)
// FacultyItem represents a faculty member associated with a course.
type FacultyItem struct {
BannerID string `json:"bannerId"`
Category *string `json:"category"`
Class string `json:"class"`
CourseReferenceNumber string `json:"courseReferenceNumber"`
DisplayName string `json:"displayName"`
Email string `json:"emailAddress"`
Primary bool `json:"primaryIndicator"`
Term string `json:"term"`
}
// MeetingTimeResponse represents the meeting time information for a course.
type MeetingTimeResponse struct {
Category *string `json:"category"`
Class string `json:"class"`
CourseReferenceNumber string `json:"courseReferenceNumber"`
Faculty []FacultyItem
MeetingTime struct {
Category string `json:"category"`
// Some sort of metadata used internally by Banner (net.hedtech.banner.student.schedule.SectionSessionDecorator)
Class string `json:"class"`
// The start date of the meeting time in MM/DD/YYYY format (e.g. 01/16/2024)
StartDate string `json:"startDate"`
// The end date of the meeting time in MM/DD/YYYY format (e.g. 05/10/2024)
EndDate string `json:"endDate"`
// The start time of the meeting time in 24-hour format, hours & minutes, digits only (e.g. 1630)
BeginTime string `json:"beginTime"`
// The end time of the meeting time in 24-hour format, hours & minutes, digits only (e.g. 1745)
EndTime string `json:"endTime"`
// The room number within the building this course takes place at (e.g. 3.01.08, 200A)
Room string `json:"room"`
// The internal identifier for the term this course takes place in (e.g. 202420)
Term string `json:"term"`
// The internal identifier for the building this course takes place at (e.g. SP1)
Building string `json:"building"`
// The long name of the building this course takes place at (e.g. San Pedro I - Data Science)
BuildingDescription string `json:"buildingDescription"`
// The internal identifier for the campus this course takes place at (e.g. 1DT)
Campus string `json:"campus"`
// The long name of the campus this course takes place at (e.g. Main Campus, Downtown Campus)
CampusDescription string `json:"campusDescription"`
CourseReferenceNumber string `json:"courseReferenceNumber"`
// The number of credit hours this class is worth (assumably)
CreditHourSession float64 `json:"creditHourSession"`
// The number of hours per week this class meets (e.g. 2.5)
HoursWeek float64 `json:"hoursWeek"`
// Unknown meaning - e.g. AFF, AIN, AHB, FFF, AFF, EFF, DFF, IFF, EHB, JFF, KFF, BFF, BIN
MeetingScheduleType string `json:"meetingScheduleType"`
// The short identifier for the meeting type (e.g. FF, HB, OS, OA)
MeetingType string `json:"meetingType"`
// The long name of the meeting type (e.g. Traditional in-person)
MeetingTypeDescription string `json:"meetingTypeDescription"`
// A boolean indicating if the class will meet on each Monday of the term
Monday bool `json:"monday"`
// A boolean indicating if the class will meet on each Tuesday of the term
Tuesday bool `json:"tuesday"`
// A boolean indicating if the class will meet on each Wednesday of the term
Wednesday bool `json:"wednesday"`
// A boolean indicating if the class will meet on each Thursday of the term
Thursday bool `json:"thursday"`
// A boolean indicating if the class will meet on each Friday of the term
Friday bool `json:"friday"`
// A boolean indicating if the class will meet on each Saturday of the term
Saturday bool `json:"saturday"`
// A boolean indicating if the class will meet on each Sunday of the term
Sunday bool `json:"sunday"`
} `json:"meetingTime"`
Term string `json:"term"`
}
// String returns a formatted string representation of the meeting time.
func (m *MeetingTimeResponse) String() string {
switch m.MeetingTime.MeetingType {
case "HB":
return fmt.Sprintf("%s\nHybrid %s", m.TimeString(), m.PlaceString())
case "H2":
return fmt.Sprintf("%s\nHybrid %s", m.TimeString(), m.PlaceString())
case "H1":
return fmt.Sprintf("%s\nHybrid %s", m.TimeString(), m.PlaceString())
case "OS":
return fmt.Sprintf("%s\nOnline Only", m.TimeString())
case "OA":
return "No Time\nOnline Asynchronous"
case "OH":
return fmt.Sprintf("%s\nOnline Partial", m.TimeString())
case "ID":
return "To Be Arranged"
case "FF":
return fmt.Sprintf("%s\n%s", m.TimeString(), m.PlaceString())
}
// TODO: Add error log
return "Unknown"
}
// TimeString returns a formatted string of the meeting times (e.g., "MWF 1:00PM-2:15PM").
func (m *MeetingTimeResponse) TimeString() string {
startTime := m.StartTime()
endTime := m.EndTime()
if startTime == nil || endTime == nil {
return "???"
}
return fmt.Sprintf("%s %s-%s", internal.WeekdaysToString(m.Days()), m.StartTime().String(), m.EndTime().String())
}
// PlaceString returns a formatted string representing the location of the meeting.
func (m *MeetingTimeResponse) PlaceString() string {
mt := m.MeetingTime
// TODO: Add format case for partial online classes
if mt.Room == "" {
return "Online"
}
return fmt.Sprintf("%s | %s | %s %s", mt.CampusDescription, mt.BuildingDescription, mt.Building, mt.Room)
}
// Days returns a map of weekdays on which the course meets.
func (m *MeetingTimeResponse) Days() map[time.Weekday]bool {
days := map[time.Weekday]bool{}
days[time.Monday] = m.MeetingTime.Monday
days[time.Tuesday] = m.MeetingTime.Tuesday
days[time.Wednesday] = m.MeetingTime.Wednesday
days[time.Thursday] = m.MeetingTime.Thursday
days[time.Friday] = m.MeetingTime.Friday
days[time.Saturday] = m.MeetingTime.Saturday
return days
}
// ByDay returns a comma-separated string of two-letter day abbreviations for the iCalendar RRule.
func (m *MeetingTimeResponse) ByDay() string {
days := []string{}
if m.MeetingTime.Sunday {
days = append(days, "SU")
}
if m.MeetingTime.Monday {
days = append(days, "MO")
}
if m.MeetingTime.Tuesday {
days = append(days, "TU")
}
if m.MeetingTime.Wednesday {
days = append(days, "WE")
}
if m.MeetingTime.Thursday {
days = append(days, "TH")
}
if m.MeetingTime.Friday {
days = append(days, "FR")
}
if m.MeetingTime.Saturday {
days = append(days, "SA")
}
return strings.Join(days, ",")
}
const layout = "01/02/2006"
// StartDay returns the start date of the meeting as a time.Time object.
// This method is not cached and will panic if the date cannot be parsed.
func (m *MeetingTimeResponse) StartDay() time.Time {
t, err := time.Parse(layout, m.MeetingTime.StartDate)
if err != nil {
log.Panic().Stack().Err(err).Str("raw", m.MeetingTime.StartDate).Msg("Cannot parse start date")
}
return t
}
// EndDay returns the end date of the meeting as a time.Time object.
// This method is not cached and will panic if the date cannot be parsed.
func (m *MeetingTimeResponse) EndDay() time.Time {
t, err := time.Parse(layout, m.MeetingTime.EndDate)
if err != nil {
log.Panic().Stack().Err(err).Str("raw", m.MeetingTime.EndDate).Msg("Cannot parse end date")
}
return t
}
// StartTime returns the start time of the meeting as a NaiveTime object.
// This method is not cached and will panic if the time cannot be parsed.
func (m *MeetingTimeResponse) StartTime() *internal.NaiveTime {
raw := m.MeetingTime.BeginTime
if raw == "" {
log.Panic().Stack().Msg("Start time is empty")
}
value, err := strconv.ParseUint(raw, 10, 32)
if err != nil {
log.Panic().Stack().Err(err).Str("raw", raw).Msg("Cannot parse start time integer")
}
return internal.ParseNaiveTime(value)
}
// EndTime returns the end time of the meeting as a NaiveTime object.
// This method is not cached and will panic if the time cannot be parsed.
func (m *MeetingTimeResponse) EndTime() *internal.NaiveTime {
raw := m.MeetingTime.EndTime
if raw == "" {
return nil
}
value, err := strconv.ParseUint(raw, 10, 32)
if err != nil {
log.Panic().Stack().Err(err).Str("raw", raw).Msg("Cannot parse end time integer")
}
return internal.ParseNaiveTime(value)
}
// RRule represents a recurrence rule for an iCalendar event.
type RRule struct {
Until string
ByDay string
}
// RRule converts the meeting time to a struct that satisfies the iCalendar RRule format.
func (m *MeetingTimeResponse) RRule() RRule {
return RRule{
Until: m.EndDay().UTC().Format("20060102T150405Z"),
ByDay: m.ByDay(),
}
}
// SearchResult represents the result of a course search.
type SearchResult struct {
Success bool `json:"success"`
TotalCount int `json:"totalCount"`
PageOffset int `json:"pageOffset"`
PageMaxSize int `json:"pageMaxSize"`
PathMode string `json:"pathMode"`
SearchResultsConfig []struct {
Config string `json:"config"`
Display string `json:"display"`
} `json:"searchResultsConfig"`
Data []Course `json:"data"`
}
// Course represents a single course returned from a search.
type Course struct {
// ID is an internal identifier not used outside of the Banner system.
ID int `json:"id"`
// Term is the internal identifier for the term this class is in (e.g. 202420).
Term string `json:"term"`
// TermDesc is the human-readable name of the term this class is in (e.g. Fall 2021).
TermDesc string `json:"termDesc"`
// CourseReferenceNumber is the unique identifier for a course within a term.
CourseReferenceNumber string `json:"courseReferenceNumber"`
// PartOfTerm specifies which part of the term the course is in (e.g. B6, B5).
PartOfTerm string `json:"partOfTerm"`
// CourseNumber is the 4-digit code for the course (e.g. 3743).
CourseNumber string `json:"courseNumber"`
// Subject is the subject acronym (e.g. CS, AEPI).
Subject string `json:"subject"`
// SubjectDescription is the full name of the course subject.
SubjectDescription string `json:"subjectDescription"`
// SequenceNumber is the course section (e.g. 001, 002).
SequenceNumber string `json:"sequenceNumber"`
CampusDescription string `json:"campusDescription"`
// ScheduleTypeDescription is the type of schedule for the course (e.g. Lecture, Seminar).
ScheduleTypeDescription string `json:"scheduleTypeDescription"`
CourseTitle string `json:"courseTitle"`
CreditHours int `json:"creditHours"`
// MaximumEnrollment is the maximum number of students that can enroll.
MaximumEnrollment int `json:"maximumEnrollment"`
Enrollment int `json:"enrollment"`
SeatsAvailable int `json:"seatsAvailable"`
WaitCapacity int `json:"waitCapacity"`
WaitCount int `json:"waitCount"`
CrossList *string `json:"crossList"`
CrossListCapacity *int `json:"crossListCapacity"`
CrossListCount *int `json:"crossListCount"`
CrossListAvailable *int `json:"crossListAvailable"`
CreditHourHigh *int `json:"creditHourHigh"`
CreditHourLow *int `json:"creditHourLow"`
CreditHourIndicator *string `json:"creditHourIndicator"`
OpenSection bool `json:"openSection"`
LinkIdentifier *string `json:"linkIdentifier"`
IsSectionLinked bool `json:"isSectionLinked"`
// SubjectCourse is the combination of the subject and course number (e.g. CS3443).
SubjectCourse string `json:"subjectCourse"`
ReservedSeatSummary *string `json:"reservedSeatSummary"`
InstructionalMethod string `json:"instructionalMethod"`
InstructionalMethodDescription string `json:"instructionalMethodDescription"`
SectionAttributes []struct {
// Class is an internal API class identifier used by Banner.
Class string `json:"class"`
CourseReferenceNumber string `json:"courseReferenceNumber"`
// Code for the attribute (e.g., UPPR, ZIEP, AIS).
Code string `json:"code"`
Description string `json:"description"`
TermCode string `json:"termCode"`
IsZtcAttribute bool `json:"isZTCAttribute"`
} `json:"sectionAttributes"`
Faculty []FacultyItem `json:"faculty"`
MeetingsFaculty []MeetingTimeResponse `json:"meetingsFaculty"`
}
// MarshalBinary implements the encoding.BinaryMarshaler interface.
func (course Course) MarshalBinary() ([]byte, error) {
return json.Marshal(course)
}
@@ -0,0 +1,56 @@
-- Drop all old tables
DROP TABLE IF EXISTS scrape_jobs;
DROP TABLE IF EXISTS course_metrics;
DROP TABLE IF EXISTS course_audits;
DROP TABLE IF EXISTS courses;
-- Enums for scrape_jobs
CREATE TYPE scrape_priority AS ENUM ('Low', 'Medium', 'High', 'Critical');
CREATE TYPE target_type AS ENUM ('Subject', 'CourseRange', 'CrnList', 'SingleCrn');
-- Main course data table
CREATE TABLE courses (
id SERIAL PRIMARY KEY,
crn VARCHAR NOT NULL,
subject VARCHAR NOT NULL,
course_number VARCHAR NOT NULL,
title VARCHAR NOT NULL,
term_code VARCHAR NOT NULL,
enrollment INTEGER NOT NULL,
max_enrollment INTEGER NOT NULL,
wait_count INTEGER NOT NULL,
wait_capacity INTEGER NOT NULL,
last_scraped_at TIMESTAMPTZ NOT NULL,
UNIQUE(crn, term_code)
);
-- Time-series data for course enrollment
CREATE TABLE course_metrics (
id SERIAL PRIMARY KEY,
course_id INTEGER NOT NULL REFERENCES courses(id) ON DELETE CASCADE,
timestamp TIMESTAMPTZ NOT NULL,
enrollment INTEGER NOT NULL,
wait_count INTEGER NOT NULL,
seats_available INTEGER NOT NULL
);
-- Audit trail for changes to course data
CREATE TABLE course_audits (
id SERIAL PRIMARY KEY,
course_id INTEGER NOT NULL REFERENCES courses(id) ON DELETE CASCADE,
timestamp TIMESTAMPTZ NOT NULL,
field_changed VARCHAR NOT NULL,
old_value TEXT NOT NULL,
new_value TEXT NOT NULL
);
-- Job queue for the scraper
CREATE TABLE scrape_jobs (
id SERIAL PRIMARY KEY,
target_type target_type NOT NULL,
target_payload JSONB NOT NULL,
priority scrape_priority NOT NULL,
execute_at TIMESTAMPTZ NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
locked_at TIMESTAMPTZ
);
@@ -0,0 +1,3 @@
-- Add retry tracking columns to scrape_jobs table
ALTER TABLE scrape_jobs ADD COLUMN retry_count INTEGER NOT NULL DEFAULT 0 CHECK (retry_count >= 0);
ALTER TABLE scrape_jobs ADD COLUMN max_retries INTEGER NOT NULL DEFAULT 5 CHECK (max_retries >= 0);
@@ -0,0 +1,45 @@
-- Performance optimization indexes
-- Index for term-based queries (most common access pattern)
CREATE INDEX IF NOT EXISTS idx_courses_term_code ON courses(term_code);
-- Index for subject-based filtering
CREATE INDEX IF NOT EXISTS idx_courses_subject ON courses(subject);
-- Composite index for subject + term queries
CREATE INDEX IF NOT EXISTS idx_courses_subject_term ON courses(subject, term_code);
-- Index for course number lookups
CREATE INDEX IF NOT EXISTS idx_courses_course_number ON courses(course_number);
-- Index for last scraped timestamp (useful for finding stale data)
CREATE INDEX IF NOT EXISTS idx_courses_last_scraped ON courses(last_scraped_at);
-- Index for course metrics time-series queries
-- BRIN index is optimal for time-series data
CREATE INDEX IF NOT EXISTS idx_course_metrics_timestamp ON course_metrics USING BRIN(timestamp);
-- B-tree index for specific course metric lookups
CREATE INDEX IF NOT EXISTS idx_course_metrics_course_timestamp
ON course_metrics(course_id, timestamp DESC);
-- Partial index for pending scrape jobs (only unlocked jobs)
CREATE INDEX IF NOT EXISTS idx_scrape_jobs_pending
ON scrape_jobs(execute_at ASC)
WHERE locked_at IS NULL;
-- Index for high-priority job processing
CREATE INDEX IF NOT EXISTS idx_scrape_jobs_priority_pending
ON scrape_jobs(priority DESC, execute_at ASC)
WHERE locked_at IS NULL;
-- Index for retry tracking
CREATE INDEX IF NOT EXISTS idx_scrape_jobs_retry_count
ON scrape_jobs(retry_count)
WHERE retry_count > 0 AND locked_at IS NULL;
-- Analyze tables to update statistics
ANALYZE courses;
ANALYZE course_metrics;
ANALYZE course_audits;
ANALYZE scrape_jobs;
@@ -0,0 +1,53 @@
-- Index Optimization Follow-up Migration
-- Reason: Redundant with composite index idx_courses_subject_term
DROP INDEX IF EXISTS idx_courses_subject;
-- Remove: idx_scrape_jobs_retry_count
DROP INDEX IF EXISTS idx_scrape_jobs_retry_count;
-- Purpose: Optimize the scheduler's frequent query (runs every 60 seconds)
CREATE INDEX IF NOT EXISTS idx_scrape_jobs_scheduler_lookup
ON scrape_jobs(target_type, target_payload)
WHERE locked_at IS NULL;
-- Note: We use (target_type, target_payload) instead of including locked_at
-- in the index columns because:
-- 1. The WHERE clause filters locked_at IS NULL (partial index optimization)
-- 2. target_payload is JSONB and already large; keeping it as an indexed column
-- allows PostgreSQL to use index-only scans for the SELECT target_payload query
-- 3. This design minimizes index size while maximizing query performance
-- Purpose: Enable efficient audit trail queries by course
CREATE INDEX IF NOT EXISTS idx_course_audits_course_timestamp
ON course_audits(course_id, timestamp DESC);
-- Purpose: Enable queries like "Show all changes in the last 24 hours"
CREATE INDEX IF NOT EXISTS idx_course_audits_timestamp
ON course_audits(timestamp DESC);
-- The BRIN index on course_metrics(timestamp) assumes data is inserted in
-- chronological order. BRIN indexes are only effective when data is physically
-- ordered on disk. If you perform:
-- - Backfills of historical data
-- - Out-of-order inserts
-- - Frequent UPDATEs that move rows
--
-- Then the BRIN index effectiveness will degrade. Monitor with:
-- SELECT * FROM brin_page_items(get_raw_page('idx_course_metrics_timestamp', 1));
--
-- If you see poor selectivity, consider:
-- 1. REINDEX to rebuild after bulk loads
-- 2. Switch to B-tree if inserts are not time-ordered
-- 3. Use CLUSTER to physically reorder the table (requires downtime)
COMMENT ON INDEX idx_course_metrics_timestamp IS
'BRIN index - requires chronologically ordered inserts for efficiency. Monitor selectivity.';
-- Update statistics for query planner
ANALYZE courses;
ANALYZE course_metrics;
ANALYZE course_audits;
ANALYZE scrape_jobs;
@@ -0,0 +1,83 @@
-- ============================================================
-- Expand courses table with rich Banner API fields
-- ============================================================
-- Section identifiers
ALTER TABLE courses ADD COLUMN sequence_number VARCHAR;
ALTER TABLE courses ADD COLUMN part_of_term VARCHAR;
-- Schedule & delivery (store codes, descriptions come from reference_data)
ALTER TABLE courses ADD COLUMN instructional_method VARCHAR;
ALTER TABLE courses ADD COLUMN campus VARCHAR;
-- Credit hours
ALTER TABLE courses ADD COLUMN credit_hours INTEGER;
ALTER TABLE courses ADD COLUMN credit_hour_low INTEGER;
ALTER TABLE courses ADD COLUMN credit_hour_high INTEGER;
-- Cross-listing
ALTER TABLE courses ADD COLUMN cross_list VARCHAR;
ALTER TABLE courses ADD COLUMN cross_list_capacity INTEGER;
ALTER TABLE courses ADD COLUMN cross_list_count INTEGER;
-- Section linking
ALTER TABLE courses ADD COLUMN link_identifier VARCHAR;
ALTER TABLE courses ADD COLUMN is_section_linked BOOLEAN;
-- JSONB columns for 1-to-many data
ALTER TABLE courses ADD COLUMN meeting_times JSONB NOT NULL DEFAULT '[]'::jsonb;
ALTER TABLE courses ADD COLUMN attributes JSONB NOT NULL DEFAULT '[]'::jsonb;
-- ============================================================
-- Full-text search support
-- ============================================================
-- Generated tsvector for word-based search on title
ALTER TABLE courses ADD COLUMN title_search tsvector
GENERATED ALWAYS AS (to_tsvector('simple', coalesce(title, ''))) STORED;
CREATE INDEX idx_courses_title_search ON courses USING GIN (title_search);
-- Trigram index for substring/ILIKE search on title
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE INDEX idx_courses_title_trgm ON courses USING GIN (title gin_trgm_ops);
-- ============================================================
-- New filter indexes
-- ============================================================
CREATE INDEX idx_courses_instructional_method ON courses(instructional_method);
CREATE INDEX idx_courses_campus ON courses(campus);
-- Composite for "open CS courses in Fall 2024" pattern
CREATE INDEX idx_courses_term_subject_avail ON courses(term_code, subject, max_enrollment, enrollment);
-- ============================================================
-- Instructors table (normalized, deduplicated)
-- ============================================================
CREATE TABLE instructors (
banner_id VARCHAR PRIMARY KEY,
display_name VARCHAR NOT NULL,
email VARCHAR
);
CREATE TABLE course_instructors (
course_id INTEGER NOT NULL REFERENCES courses(id) ON DELETE CASCADE,
instructor_id VARCHAR NOT NULL REFERENCES instructors(banner_id) ON DELETE CASCADE,
is_primary BOOLEAN NOT NULL DEFAULT false,
PRIMARY KEY (course_id, instructor_id)
);
CREATE INDEX idx_course_instructors_instructor ON course_instructors(instructor_id);
-- ============================================================
-- Reference data table (all code→description lookups)
-- ============================================================
CREATE TABLE reference_data (
category VARCHAR NOT NULL,
code VARCHAR NOT NULL,
description VARCHAR NOT NULL,
PRIMARY KEY (category, code)
);
@@ -0,0 +1,17 @@
-- RMP professor data (bulk synced from RateMyProfessors)
CREATE TABLE rmp_professors (
legacy_id INTEGER PRIMARY KEY,
graphql_id VARCHAR NOT NULL,
first_name VARCHAR NOT NULL,
last_name VARCHAR NOT NULL,
department VARCHAR,
avg_rating REAL,
avg_difficulty REAL,
num_ratings INTEGER NOT NULL DEFAULT 0,
would_take_again_pct REAL,
last_synced_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Link Banner instructors to RMP professors
ALTER TABLE instructors ADD COLUMN rmp_legacy_id INTEGER REFERENCES rmp_professors(legacy_id);
ALTER TABLE instructors ADD COLUMN rmp_match_status VARCHAR NOT NULL DEFAULT 'pending';
@@ -0,0 +1,7 @@
-- Add queued_at column to track when a job last entered the "ready to pick up" state.
-- For fresh jobs this equals execute_at; for retried jobs it is updated to NOW().
ALTER TABLE scrape_jobs
ADD COLUMN queued_at TIMESTAMPTZ NOT NULL DEFAULT NOW();
-- Backfill existing rows: set queued_at = execute_at (best approximation)
UPDATE scrape_jobs SET queued_at = execute_at;
@@ -0,0 +1,19 @@
CREATE TABLE users (
discord_id BIGINT PRIMARY KEY,
discord_username TEXT NOT NULL,
discord_avatar_hash TEXT,
is_admin BOOLEAN NOT NULL DEFAULT false,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE TABLE user_sessions (
id TEXT PRIMARY KEY,
user_id BIGINT NOT NULL REFERENCES users(discord_id) ON DELETE CASCADE,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
expires_at TIMESTAMPTZ NOT NULL,
last_active_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX idx_user_sessions_user_id ON user_sessions(user_id);
CREATE INDEX idx_user_sessions_expires_at ON user_sessions(expires_at);
@@ -0,0 +1,80 @@
-- Collapse instructors from per-banner-id rows to per-person rows (deduped by lowercased email).
-- All existing RMP matches are deliberately dropped; the new auto-matcher will re-score from scratch.
-- 1. Create the new instructors table (1 row per person, keyed by email)
CREATE TABLE instructors_new (
id SERIAL PRIMARY KEY,
display_name VARCHAR NOT NULL,
email VARCHAR NOT NULL,
rmp_professor_id INTEGER UNIQUE REFERENCES rmp_professors(legacy_id),
rmp_match_status VARCHAR NOT NULL DEFAULT 'unmatched',
CONSTRAINT instructors_email_unique UNIQUE (email)
);
-- 2. Populate from existing data, deduplicating by lowercased email.
-- For each email, pick the display_name from the row with the highest banner_id
-- (deterministic tiebreaker). All rmp fields start fresh (NULL / 'unmatched').
INSERT INTO instructors_new (display_name, email)
SELECT DISTINCT ON (LOWER(email))
display_name,
LOWER(email)
FROM instructors
ORDER BY LOWER(email), banner_id DESC;
-- 3. Create the new course_instructors table with integer FK and banner_id column
CREATE TABLE course_instructors_new (
course_id INTEGER NOT NULL REFERENCES courses(id) ON DELETE CASCADE,
instructor_id INTEGER NOT NULL REFERENCES instructors_new(id) ON DELETE CASCADE,
banner_id VARCHAR NOT NULL,
is_primary BOOLEAN NOT NULL DEFAULT false,
PRIMARY KEY (course_id, instructor_id)
);
-- 4. Populate from old data, mapping old banner_id → new instructor id via lowercased email.
-- Use DISTINCT ON to handle cases where multiple old banner_ids for the same person
-- taught the same course (would cause duplicate (course_id, instructor_id) pairs).
INSERT INTO course_instructors_new (course_id, instructor_id, banner_id, is_primary)
SELECT DISTINCT ON (ci.course_id, inew.id)
ci.course_id,
inew.id,
ci.instructor_id, -- old banner_id
ci.is_primary
FROM course_instructors ci
JOIN instructors iold ON iold.banner_id = ci.instructor_id
JOIN instructors_new inew ON inew.email = LOWER(iold.email)
ORDER BY ci.course_id, inew.id, ci.is_primary DESC;
-- 5. Drop old tables (course_instructors first due to FK dependency)
DROP TABLE course_instructors;
DROP TABLE instructors;
-- 6. Rename new tables into place
ALTER TABLE instructors_new RENAME TO instructors;
ALTER TABLE course_instructors_new RENAME TO course_instructors;
-- 7. Rename constraints to match the final table names
ALTER TABLE instructors RENAME CONSTRAINT instructors_new_pkey TO instructors_pkey;
ALTER TABLE instructors RENAME CONSTRAINT instructors_new_rmp_professor_id_key TO instructors_rmp_professor_id_key;
ALTER TABLE course_instructors RENAME CONSTRAINT course_instructors_new_pkey TO course_instructors_pkey;
-- 8. Recreate indexes
CREATE INDEX idx_course_instructors_instructor ON course_instructors (instructor_id);
CREATE INDEX idx_instructors_rmp_status ON instructors (rmp_match_status);
CREATE INDEX idx_instructors_email ON instructors (email);
-- 9. Create rmp_match_candidates table
CREATE TABLE rmp_match_candidates (
id SERIAL PRIMARY KEY,
instructor_id INTEGER NOT NULL REFERENCES instructors(id) ON DELETE CASCADE,
rmp_legacy_id INTEGER NOT NULL REFERENCES rmp_professors(legacy_id),
score REAL NOT NULL,
score_breakdown JSONB NOT NULL DEFAULT '{}',
status VARCHAR NOT NULL DEFAULT 'pending',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
resolved_at TIMESTAMPTZ,
resolved_by BIGINT REFERENCES users(discord_id),
CONSTRAINT uq_candidate_pair UNIQUE (instructor_id, rmp_legacy_id)
);
CREATE INDEX idx_match_candidates_instructor ON rmp_match_candidates (instructor_id);
CREATE INDEX idx_match_candidates_status ON rmp_match_candidates (status);
@@ -0,0 +1,24 @@
-- Multi-RMP profile support: allow many RMP profiles per instructor.
-- Each RMP profile still links to at most one instructor (rmp_legacy_id UNIQUE).
-- 1. Create junction table
CREATE TABLE instructor_rmp_links (
id SERIAL PRIMARY KEY,
instructor_id INTEGER NOT NULL REFERENCES instructors(id) ON DELETE CASCADE,
rmp_legacy_id INTEGER NOT NULL UNIQUE REFERENCES rmp_professors(legacy_id),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_by BIGINT REFERENCES users(discord_id),
source VARCHAR NOT NULL DEFAULT 'manual' -- 'auto' | 'manual'
);
CREATE INDEX idx_instructor_rmp_links_instructor ON instructor_rmp_links (instructor_id);
-- 2. Migrate existing matches
INSERT INTO instructor_rmp_links (instructor_id, rmp_legacy_id, source)
SELECT id, rmp_professor_id,
CASE rmp_match_status WHEN 'auto' THEN 'auto' ELSE 'manual' END
FROM instructors
WHERE rmp_professor_id IS NOT NULL;
-- 3. Drop old column (and its unique constraint)
ALTER TABLE instructors DROP COLUMN rmp_professor_id;
@@ -0,0 +1,31 @@
-- Scrape job results log: one row per completed (or failed) job for effectiveness tracking.
CREATE TABLE scrape_job_results (
id BIGSERIAL PRIMARY KEY,
target_type target_type NOT NULL,
payload JSONB NOT NULL,
priority scrape_priority NOT NULL,
-- Timing
queued_at TIMESTAMPTZ NOT NULL,
started_at TIMESTAMPTZ NOT NULL,
completed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
duration_ms INT NOT NULL,
-- Outcome
success BOOLEAN NOT NULL,
error_message TEXT,
retry_count INT NOT NULL DEFAULT 0,
-- Effectiveness (NULL when success = false)
courses_fetched INT,
courses_changed INT,
courses_unchanged INT,
audits_generated INT,
metrics_generated INT
);
CREATE INDEX idx_scrape_job_results_target_time
ON scrape_job_results (target_type, completed_at);
CREATE INDEX idx_scrape_job_results_completed
ON scrape_job_results (completed_at);
@@ -0,0 +1,13 @@
-- Indexes for the timeline aggregation endpoint.
-- The query buckets course_metrics by 15-minute intervals, joins to courses
-- for subject, and aggregates enrollment. These indexes support efficient
-- time-range scans and the join.
-- Primary access pattern: scan course_metrics by timestamp range
CREATE INDEX IF NOT EXISTS idx_course_metrics_timestamp
ON course_metrics (timestamp);
-- Composite index for the DISTINCT ON (bucket, course_id) ordered by timestamp DESC
-- to efficiently pick the latest metric per course per bucket.
CREATE INDEX IF NOT EXISTS idx_course_metrics_course_timestamp
ON course_metrics (course_id, timestamp DESC);
@@ -0,0 +1,5 @@
-- Add structured first/last name columns to instructors.
-- Populated by Rust-side backfill (parse_banner_name) since we need
-- HTML entity decoding and suffix extraction that SQL can't handle well.
ALTER TABLE instructors ADD COLUMN first_name VARCHAR;
ALTER TABLE instructors ADD COLUMN last_name VARCHAR;
+32
View File
@@ -0,0 +1,32 @@
/**
* Generate TypeScript bindings from Rust types (ts-rs).
*
* Usage: bun scripts/bindings.ts
*/
import { readdirSync, writeFileSync, rmSync } from "fs";
import { run } from "./lib/proc";
const BINDINGS_DIR = "web/src/lib/bindings";
// Build test binary first (slow part) — fail before deleting anything
run(["cargo", "test", "--no-run"]);
// Clean slate
rmSync(BINDINGS_DIR, { recursive: true, force: true });
// Run the export (fast, already compiled)
run(["cargo", "test", "export_bindings"]);
// Auto-generate index.ts from emitted .ts files
const types = readdirSync(BINDINGS_DIR)
.filter((f) => f.endsWith(".ts") && f !== "index.ts")
.map((f) => f.replace(/\.ts$/, ""))
.sort();
writeFileSync(
`${BINDINGS_DIR}/index.ts`,
types.map((t) => `export type { ${t} } from "./${t}";`).join("\n") + "\n",
);
console.log(`Generated ${BINDINGS_DIR}/index.ts (${types.length} types)`);
+45
View File
@@ -0,0 +1,45 @@
/**
* Production build.
*
* Usage: bun scripts/build.ts [flags]
*
* Flags:
* -d, --debug Debug build instead of release
* -f, --frontend-only Frontend only
* -b, --backend-only Backend only
*/
import { parseFlags, c } from "./lib/fmt";
import { run } from "./lib/proc";
const { flags } = parseFlags(
process.argv.slice(2),
{
debug: "bool",
"frontend-only": "bool",
"backend-only": "bool",
} as const,
{ d: "debug", f: "frontend-only", b: "backend-only" },
{ debug: false, "frontend-only": false, "backend-only": false },
);
if (flags["frontend-only"] && flags["backend-only"]) {
console.error("Cannot use -f and -b together");
process.exit(1);
}
const buildFrontend = !flags["backend-only"];
const buildBackend = !flags["frontend-only"];
const profile = flags.debug ? "debug" : "release";
if (buildFrontend) {
console.log(c("1;36", "→ Building frontend..."));
run(["bun", "run", "--cwd", "web", "build"]);
}
if (buildBackend) {
console.log(c("1;36", `→ Building backend (${profile})...`));
const cmd = ["cargo", "build", "--bin", "banner"];
if (!flags.debug) cmd.push("--release");
run(cmd);
}
+21
View File
@@ -0,0 +1,21 @@
{
"lockfileVersion": 1,
"configVersion": 1,
"workspaces": {
"": {
"name": "banner-scripts",
"devDependencies": {
"@types/bun": "^1.3.8",
},
},
},
"packages": {
"@types/bun": ["@types/bun@1.3.8", "", { "dependencies": { "bun-types": "1.3.8" } }, "sha512-3LvWJ2q5GerAXYxO2mffLTqOzEu5qnhEAlh48Vnu8WQfnmSwbgagjGZV6BoHKJztENYEDn6QmVd949W4uESRJA=="],
"@types/node": ["@types/node@25.1.0", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-t7frlewr6+cbx+9Ohpl0NOTKXZNV9xHRmNOvql47BFJKcEG1CxtxlPEEe+gR9uhVWM4DwhnvTF110mIL4yP9RA=="],
"bun-types": ["bun-types@1.3.8", "", { "dependencies": { "@types/node": "*" } }, "sha512-fL99nxdOWvV4LqjmC+8Q9kW3M4QTtTR1eePs94v5ctGqU8OeceWrSUaRw3JYb7tU3FkMIAjkueehrHPPPGKi5Q=="],
"undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="],
}
}
+241
View File
@@ -0,0 +1,241 @@
/**
* Run all project checks in parallel. Auto-fixes formatting when safe.
*
* Usage: bun scripts/check.ts [--fix|-f]
*/
import { c, elapsed, isStderrTTY } from "./lib/fmt";
import { run, runPiped, spawnCollect, raceInOrder, type CollectResult } from "./lib/proc";
import { existsSync, statSync, readdirSync, writeFileSync, rmSync } from "fs";
const fix = process.argv.includes("--fix") || process.argv.includes("-f");
// ---------------------------------------------------------------------------
// Fix path: format + clippy fix, then fall through to verification
// ---------------------------------------------------------------------------
if (fix) {
console.log(c("1;36", "→ Fixing..."));
run(["cargo", "fmt", "--all"]);
run(["bun", "run", "--cwd", "web", "format"]);
run([
"cargo", "clippy", "--all-features", "--fix", "--allow-dirty", "--allow-staged",
"--", "--deny", "warnings",
]);
console.log(c("1;36", "→ Verifying..."));
}
// ---------------------------------------------------------------------------
// Ensure TypeScript bindings are up-to-date before frontend checks
// ---------------------------------------------------------------------------
{
const BINDINGS_DIR = "web/src/lib/bindings";
let newestSrcMtime = 0;
for (const file of new Bun.Glob("src/**/*.rs").scanSync(".")) {
const mt = statSync(file).mtimeMs;
if (mt > newestSrcMtime) newestSrcMtime = mt;
}
for (const f of ["Cargo.toml", "Cargo.lock"]) {
if (existsSync(f)) {
const mt = statSync(f).mtimeMs;
if (mt > newestSrcMtime) newestSrcMtime = mt;
}
}
let newestBindingMtime = 0;
if (existsSync(BINDINGS_DIR)) {
for (const file of new Bun.Glob("**/*").scanSync(BINDINGS_DIR)) {
const mt = statSync(`${BINDINGS_DIR}/${file}`).mtimeMs;
if (mt > newestBindingMtime) newestBindingMtime = mt;
}
}
const stale = newestBindingMtime === 0 || newestSrcMtime > newestBindingMtime;
if (stale) {
const t = Date.now();
process.stdout.write(
c("1;36", "→ Regenerating TypeScript bindings (Rust sources changed)...") + "\n",
);
run(["cargo", "test", "--no-run"]);
rmSync(BINDINGS_DIR, { recursive: true, force: true });
run(["cargo", "test", "export_bindings"]);
const types = readdirSync(BINDINGS_DIR)
.filter((f) => f.endsWith(".ts") && f !== "index.ts")
.map((f) => f.replace(/\.ts$/, ""))
.sort();
writeFileSync(
`${BINDINGS_DIR}/index.ts`,
types.map((t) => `export type { ${t} } from "./${t}";`).join("\n") + "\n",
);
process.stdout.write(c("32", "✓ bindings") + ` (${elapsed(t)}s, ${types.length} types)\n`);
} else {
process.stdout.write(c("2", "· bindings up-to-date, skipped") + "\n");
}
}
// ---------------------------------------------------------------------------
// Check definitions
// ---------------------------------------------------------------------------
interface Check {
name: string;
cmd: string[];
hint?: string;
}
const checks: Check[] = [
{
name: "rustfmt",
cmd: ["cargo", "fmt", "--all", "--", "--check"],
hint: "Run 'cargo fmt --all' to see and fix formatting issues.",
},
{ name: "clippy", cmd: ["cargo", "clippy", "--all-features", "--", "--deny", "warnings"] },
{ name: "cargo-check", cmd: ["cargo", "check", "--all-features"] },
{ name: "rust-test", cmd: ["cargo", "nextest", "run", "-E", "not test(export_bindings)"] },
{ name: "svelte-check", cmd: ["bun", "run", "--cwd", "web", "check"] },
{ name: "biome", cmd: ["bun", "run", "--cwd", "web", "format:check"] },
{ name: "biome-lint", cmd: ["bun", "run", "--cwd", "web", "lint"] },
{ name: "web-test", cmd: ["bun", "run", "--cwd", "web", "test"] },
{ name: "actionlint", cmd: ["actionlint"] },
];
// ---------------------------------------------------------------------------
// Domain groups: formatter → { peers, format command, sanity rechecks }
// ---------------------------------------------------------------------------
const domains: Record<
string,
{
peers: string[];
format: () => ReturnType<typeof runPiped>;
recheck: Check[];
}
> = {
rustfmt: {
peers: ["clippy", "cargo-check", "rust-test"],
format: () => runPiped(["cargo", "fmt", "--all"]),
recheck: [
{ name: "rustfmt", cmd: ["cargo", "fmt", "--all", "--", "--check"] },
{ name: "cargo-check", cmd: ["cargo", "check", "--all-features"] },
],
},
biome: {
peers: ["svelte-check", "biome-lint", "web-test"],
format: () => runPiped(["bun", "run", "--cwd", "web", "format"]),
recheck: [
{ name: "biome", cmd: ["bun", "run", "--cwd", "web", "format:check"] },
{ name: "svelte-check", cmd: ["bun", "run", "--cwd", "web", "check"] },
],
},
};
// ---------------------------------------------------------------------------
// Phase 1: run all checks in parallel, display in completion order
// ---------------------------------------------------------------------------
const start = Date.now();
const remaining = new Set(checks.map((ch) => ch.name));
const promises = checks.map(async (check) => ({
...check,
...(await spawnCollect(check.cmd, start)),
}));
const interval = isStderrTTY
? setInterval(() => {
process.stderr.write(`\r\x1b[K${elapsed(start)}s [${Array.from(remaining).join(", ")}]`);
}, 100)
: null;
const results: Record<string, Check & CollectResult> = {};
await raceInOrder(promises, checks, (r) => {
results[r.name] = r;
remaining.delete(r.name);
if (isStderrTTY) process.stderr.write("\r\x1b[K");
if (r.exitCode !== 0) {
process.stdout.write(c("31", `${r.name}`) + ` (${r.elapsed}s)\n`);
if (r.hint) {
process.stdout.write(c("2", ` ${r.hint}`) + "\n");
} else {
if (r.stdout) process.stdout.write(r.stdout);
if (r.stderr) process.stderr.write(r.stderr);
}
} else {
process.stdout.write(c("32", `${r.name}`) + ` (${r.elapsed}s)\n`);
}
});
if (interval) clearInterval(interval);
if (isStderrTTY) process.stderr.write("\r\x1b[K");
// ---------------------------------------------------------------------------
// Phase 2: auto-fix formatting if it's the only failure in its domain
// ---------------------------------------------------------------------------
const autoFixedDomains = new Set<string>();
for (const [fmtName, domain] of Object.entries(domains)) {
const fmtResult = results[fmtName];
if (!fmtResult || fmtResult.exitCode === 0) continue;
if (!domain.peers.every((p) => results[p]?.exitCode === 0)) continue;
process.stdout.write(
"\n" +
c("1;36", `→ Auto-formatting ${fmtName} (peers passed, only formatting failed)...`) +
"\n",
);
const fmtOut = domain.format();
if (fmtOut.exitCode !== 0) {
process.stdout.write(c("31", `${fmtName} formatter failed`) + "\n");
if (fmtOut.stdout) process.stdout.write(fmtOut.stdout);
if (fmtOut.stderr) process.stderr.write(fmtOut.stderr);
continue;
}
const recheckStart = Date.now();
const recheckPromises = domain.recheck.map(async (ch) => ({
...ch,
...(await spawnCollect(ch.cmd, recheckStart)),
}));
let recheckFailed = false;
await raceInOrder(recheckPromises, domain.recheck, (r) => {
if (r.exitCode !== 0) {
recheckFailed = true;
process.stdout.write(c("31", `${r.name}`) + ` (${r.elapsed}s)\n`);
if (r.stdout) process.stdout.write(r.stdout);
if (r.stderr) process.stderr.write(r.stderr);
} else {
process.stdout.write(c("32", `${r.name}`) + ` (${r.elapsed}s)\n`);
}
});
if (!recheckFailed) {
process.stdout.write(c("32", `${fmtName} auto-fix succeeded`) + "\n");
autoFixedDomains.add(fmtName);
} else {
process.stdout.write(c("31", `${fmtName} auto-fix failed sanity check`) + "\n");
}
}
// ---------------------------------------------------------------------------
// Final verdict
// ---------------------------------------------------------------------------
const finalFailed = Object.entries(results).some(
([name, r]) => r.exitCode !== 0 && !autoFixedDomains.has(name),
);
if (autoFixedDomains.size > 0 && !finalFailed) {
process.stdout.write(
"\n" + c("1;32", "✓ All checks passed (formatting was auto-fixed)") + "\n",
);
}
process.exit(finalFailed ? 1 : 0);
+79
View File
@@ -0,0 +1,79 @@
/**
* PostgreSQL Docker container management.
*
* Usage: bun scripts/db.ts [start|reset|rm]
*/
import { readFile, writeFile } from "fs/promises";
import { spawnSync } from "child_process";
const NAME = "banner-postgres";
const USER = "banner";
const PASS = "banner";
const DB = "banner";
const PORT = "59489";
const ENV_FILE = ".env";
const cmd = process.argv[2] || "start";
function docker(...args: string[]) {
return spawnSync("docker", args, { encoding: "utf8" });
}
function getContainer() {
const res = docker("ps", "-a", "--filter", `name=^${NAME}$`, "--format", "json");
return res.stdout.trim() ? JSON.parse(res.stdout) : null;
}
async function updateEnv() {
const url = `postgresql://${USER}:${PASS}@localhost:${PORT}/${DB}`;
try {
let content = await readFile(ENV_FILE, "utf8");
content = content.includes("DATABASE_URL=")
? content.replace(/DATABASE_URL=.*$/m, `DATABASE_URL=${url}`)
: content.trim() + `\nDATABASE_URL=${url}\n`;
await writeFile(ENV_FILE, content);
} catch {
await writeFile(ENV_FILE, `DATABASE_URL=${url}\n`);
}
}
function create() {
docker(
"run", "-d", "--name", NAME,
"-e", `POSTGRES_USER=${USER}`,
"-e", `POSTGRES_PASSWORD=${PASS}`,
"-e", `POSTGRES_DB=${DB}`,
"-p", `${PORT}:5432`,
"postgres:17-alpine",
);
console.log("created");
}
const container = getContainer();
if (cmd === "rm") {
if (!container) process.exit(0);
docker("stop", NAME);
docker("rm", NAME);
console.log("removed");
} else if (cmd === "reset") {
if (!container) {
create();
} else {
docker("exec", NAME, "psql", "-U", USER, "-d", "postgres", "-c", `DROP DATABASE IF EXISTS ${DB}`);
docker("exec", NAME, "psql", "-U", USER, "-d", "postgres", "-c", `CREATE DATABASE ${DB}`);
console.log("reset");
}
await updateEnv();
} else {
if (!container) {
create();
} else if (container.State !== "running") {
docker("start", NAME);
console.log("started");
} else {
console.log("running");
}
await updateEnv();
}
+112
View File
@@ -0,0 +1,112 @@
/**
* Dev server orchestrator.
*
* Usage: bun scripts/dev.ts [flags] [-- passthrough-args]
*
* Flags:
* -f, --frontend-only Frontend only (Vite dev server)
* -b, --backend-only Backend only (bacon watch)
* -W, --no-watch Build once + run (no watch)
* -n, --no-build Run last compiled binary (no rebuild)
* -r, --release Use release profile
* -e, --embed Embed assets (implies -b)
* --tracing <fmt> Tracing format (default: pretty)
*/
import { existsSync } from "fs";
import { parseFlags, c } from "./lib/fmt";
import { run, ProcessGroup } from "./lib/proc";
const { flags, passthrough } = parseFlags(
process.argv.slice(2),
{
"frontend-only": "bool",
"backend-only": "bool",
"no-watch": "bool",
"no-build": "bool",
release: "bool",
embed: "bool",
tracing: "string",
} as const,
{ f: "frontend-only", b: "backend-only", W: "no-watch", n: "no-build", r: "release", e: "embed" },
{
"frontend-only": false,
"backend-only": false,
"no-watch": false,
"no-build": false,
release: false,
embed: false,
tracing: "pretty",
},
);
let frontendOnly = flags["frontend-only"];
let backendOnly = flags["backend-only"];
let noWatch = flags["no-watch"];
const noBuild = flags["no-build"];
const release = flags.release;
const embed = flags.embed;
const tracing = flags.tracing as string;
// -e implies -b
if (embed) backendOnly = true;
// -n implies -W
if (noBuild) noWatch = true;
if (frontendOnly && backendOnly) {
console.error("Cannot use -f and -b together (or -e implies -b)");
process.exit(1);
}
const runFrontend = !backendOnly;
const runBackend = !frontendOnly;
const profile = release ? "release" : "dev";
const profileDir = release ? "release" : "debug";
const group = new ProcessGroup();
// Build frontend first when embedding assets
if (embed && !noBuild) {
console.log(c("1;36", "→ Building frontend (for embedding)..."));
run(["bun", "run", "--cwd", "web", "build"]);
}
// Frontend: Vite dev server
if (runFrontend) {
group.spawn(["bun", "run", "--cwd", "web", "dev"]);
}
// Backend
if (runBackend) {
const backendArgs = ["--tracing", tracing, ...passthrough];
const bin = `target/${profileDir}/banner`;
if (noWatch) {
if (!noBuild) {
console.log(c("1;36", `→ Building backend (${profile})...`));
const cargoArgs = ["cargo", "build", "--bin", "banner"];
if (!embed) cargoArgs.push("--no-default-features");
if (release) cargoArgs.push("--release");
run(cargoArgs);
}
if (!existsSync(bin)) {
console.error(`Binary not found: ${bin}`);
console.error(`Run 'just build${release ? "" : " -d"}' first, or remove -n to use bacon.`);
await group.killAll();
process.exit(1);
}
console.log(c("1;36", `→ Running ${bin} (no watch)`));
group.spawn([bin, ...backendArgs]);
} else {
// Bacon watch mode
const baconArgs = ["bacon", "--headless", "run", "--"];
if (!embed) baconArgs.push("--no-default-features");
if (release) baconArgs.push("--profile", "release");
baconArgs.push("--", ...backendArgs);
group.spawn(baconArgs);
}
}
const code = await group.waitForFirst();
process.exit(code);
+96
View File
@@ -0,0 +1,96 @@
/**
* Shared formatting, color, and CLI argument parsing utilities.
*/
const isTTY = process.stdout.isTTY ?? false;
const isStderrTTY = process.stderr.isTTY ?? false;
/** ANSI color wrapper — no-op when stdout is not a TTY. */
export function c(code: string, text: string): string {
return isTTY ? `\x1b[${code}m${text}\x1b[0m` : text;
}
/** Elapsed seconds since `start` as a formatted string. */
export function elapsed(start: number): string {
return ((Date.now() - start) / 1000).toFixed(1);
}
/** Whether stderr is a TTY (for progress spinners). */
export { isStderrTTY };
/**
* Parse short and long CLI flags from a flat argument array.
*
* `spec` maps flag names to their type:
* - `"bool"` — presence sets the value to `true`
* - `"string"` — consumes the next argument as the value
*
* Short flags can be combined: `-fbW` expands to `-f -b -W`.
* Long flags: `--frontend-only`, `--tracing pretty`.
* `--` terminates flag parsing; remaining args go to `passthrough`.
*
* Returns `{ flags, passthrough }`.
*/
export function parseFlags<T extends Record<string, "bool" | "string">>(
argv: string[],
spec: T,
shortMap: Record<string, keyof T>,
defaults: { [K in keyof T]: T[K] extends "bool" ? boolean : string },
): { flags: typeof defaults; passthrough: string[] } {
const flags = { ...defaults };
const passthrough: string[] = [];
let i = 0;
while (i < argv.length) {
const arg = argv[i];
if (arg === "--") {
passthrough.push(...argv.slice(i + 1));
break;
}
if (arg.startsWith("--")) {
const name = arg.slice(2);
if (!(name in spec)) {
console.error(`Unknown flag: ${arg}`);
process.exit(1);
}
if (spec[name] === "string") {
(flags as Record<string, unknown>)[name] = argv[++i] || "";
} else {
(flags as Record<string, unknown>)[name] = true;
}
} else if (arg.startsWith("-") && arg.length > 1) {
for (const ch of arg.slice(1)) {
const mapped = shortMap[ch];
if (!mapped) {
console.error(`Unknown flag: -${ch}`);
process.exit(1);
}
if (spec[mapped as string] === "string") {
(flags as Record<string, unknown>)[mapped as string] = argv[++i] || "";
} else {
(flags as Record<string, unknown>)[mapped as string] = true;
}
}
} else {
console.error(`Unknown argument: ${arg}`);
process.exit(1);
}
i++;
}
return { flags, passthrough };
}
/**
* Simple positional-or-keyword argument parser.
* Returns the first positional arg, or empty string.
*/
export function parseArgs(raw: string): string[] {
return raw
.trim()
.split(/\s+/)
.filter(Boolean);
}
+113
View File
@@ -0,0 +1,113 @@
/**
* Shared process spawning utilities for project scripts.
*/
import { elapsed } from "./fmt";
export interface CollectResult {
stdout: string;
stderr: string;
exitCode: number;
elapsed: string;
}
/** Sync spawn with inherited stdio. Exits process on failure. */
export function run(cmd: string[]): void {
const proc = Bun.spawnSync(cmd, { stdio: ["inherit", "inherit", "inherit"] });
if (proc.exitCode !== 0) process.exit(proc.exitCode);
}
/** Sync spawn with piped stdio. Returns captured output. */
export function runPiped(cmd: string[]): { exitCode: number; stdout: string; stderr: string } {
const proc = Bun.spawnSync(cmd, { stdout: "pipe", stderr: "pipe" });
return {
exitCode: proc.exitCode,
stdout: proc.stdout?.toString() ?? "",
stderr: proc.stderr?.toString() ?? "",
};
}
/**
* Async spawn that collects stdout/stderr. Returns a result object.
* Catches spawn failures (e.g. missing binary) instead of throwing.
*/
export async function spawnCollect(cmd: string[], startTime: number): Promise<CollectResult> {
try {
const proc = Bun.spawn(cmd, {
env: { ...process.env, FORCE_COLOR: "1" },
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr] = await Promise.all([
new Response(proc.stdout).text(),
new Response(proc.stderr).text(),
]);
await proc.exited;
return { stdout, stderr, exitCode: proc.exitCode, elapsed: elapsed(startTime) };
} catch (err) {
return { stdout: "", stderr: String(err), exitCode: 1, elapsed: elapsed(startTime) };
}
}
/**
* Race all promises, yielding results in completion order via callback.
* Spawn failures become results, not unhandled rejections.
*/
export async function raceInOrder<T extends { name: string }>(
promises: Promise<T & CollectResult>[],
fallbacks: T[],
onResult: (r: T & CollectResult) => void,
): Promise<void> {
const tagged = promises.map((p, i) =>
p
.then((r) => ({ i, r }))
.catch((err) => ({
i,
r: {
...fallbacks[i],
exitCode: 1,
stdout: "",
stderr: String(err),
elapsed: "?",
} as T & CollectResult,
})),
);
for (let n = 0; n < promises.length; n++) {
const { i, r } = await Promise.race(tagged);
tagged[i] = new Promise(() => {}); // sentinel: never resolves
onResult(r);
}
}
/** Spawn managed processes with coordinated cleanup on exit. */
export class ProcessGroup {
private procs: ReturnType<typeof Bun.spawn>[] = [];
constructor() {
const cleanup = async () => {
await this.killAll();
process.exit(0);
};
process.on("SIGINT", cleanup);
process.on("SIGTERM", cleanup);
}
spawn(cmd: string[]): ReturnType<typeof Bun.spawn> {
const proc = Bun.spawn(cmd, { stdio: ["inherit", "inherit", "inherit"] });
this.procs.push(proc);
return proc;
}
async killAll(): Promise<void> {
for (const p of this.procs) p.kill();
await Promise.all(this.procs.map((p) => p.exited));
}
/** Wait for any process to exit, kill the rest, return exit code. */
async waitForFirst(): Promise<number> {
const results = this.procs.map((p, i) => p.exited.then((code) => ({ i, code })));
const first = await Promise.race(results);
await this.killAll();
return first.code;
}
}
+8
View File
@@ -0,0 +1,8 @@
{
"name": "banner-scripts",
"private": true,
"type": "module",
"devDependencies": {
"@types/bun": "^1.3.8"
}
}
+20
View File
@@ -0,0 +1,20 @@
/**
* Run project tests.
*
* Usage: bun scripts/test.ts [rust|web|<nextest filter args>]
*/
import { run } from "./lib/proc";
const input = process.argv.slice(2).join(" ").trim();
if (input === "web") {
run(["bun", "run", "--cwd", "web", "test"]);
} else if (input === "rust") {
run(["cargo", "nextest", "run", "-E", "not test(export_bindings)"]);
} else if (input === "") {
run(["cargo", "nextest", "run", "-E", "not test(export_bindings)"]);
run(["bun", "run", "--cwd", "web", "test"]);
} else {
run(["cargo", "nextest", "run", ...input.split(/\s+/)]);
}
+15
View File
@@ -0,0 +1,15 @@
{
"compilerOptions": {
"target": "ESNext",
"module": "ESNext",
"moduleResolution": "bundler",
"strict": true,
"noEmit": true,
"skipLibCheck": true,
"types": ["bun-types"],
"paths": {
"#lib/*": ["./lib/*"]
}
},
"include": ["**/*.ts"]
}
+199
View File
@@ -0,0 +1,199 @@
use crate::banner::BannerApi;
use crate::cli::ServiceName;
use crate::config::Config;
use crate::scraper::ScraperService;
use crate::services::bot::BotService;
use crate::services::manager::ServiceManager;
use crate::services::web::WebService;
use crate::state::AppState;
use crate::web::auth::AuthConfig;
use anyhow::Context;
use figment::value::UncasedStr;
use figment::{Figment, providers::Env};
use sqlx::postgres::PgPoolOptions;
use std::process::ExitCode;
use std::sync::Arc;
use std::time::Duration;
use tracing::{error, info, warn};
/// Main application struct containing all necessary components
pub struct App {
config: Config,
db_pool: sqlx::PgPool,
banner_api: Arc<BannerApi>,
app_state: AppState,
service_manager: ServiceManager,
}
impl App {
/// Create a new App instance with all necessary components initialized
pub async fn new() -> Result<Self, anyhow::Error> {
// Load configuration
let config: Config = Figment::new()
.merge(Env::raw().map(|k| {
if k == UncasedStr::new("RAILWAY_DEPLOYMENT_DRAINING_SECONDS") {
"SHUTDOWN_TIMEOUT".into()
} else {
k.into()
}
}))
.extract()
.context("Failed to load config")?;
// Check if the database URL is via private networking
let is_private = config.database_url.contains("railway.internal");
let slow_threshold = Duration::from_millis(if is_private { 200 } else { 500 });
// Create database connection pool
let db_pool = PgPoolOptions::new()
.min_connections(0)
.max_connections(4)
.acquire_slow_threshold(slow_threshold)
.acquire_timeout(Duration::from_secs(4))
.idle_timeout(Duration::from_secs(60 * 2))
.max_lifetime(Duration::from_secs(60 * 30))
.connect(&config.database_url)
.await
.context("Failed to create database pool")?;
info!(
is_private = is_private,
slow_threshold = format!("{:.2?}", slow_threshold),
"database pool established"
);
// Run database migrations
info!("Running database migrations...");
sqlx::migrate!("./migrations")
.run(&db_pool)
.await
.context("Failed to run database migrations")?;
info!("Database migrations completed successfully");
// Backfill structured name columns for existing instructors
if let Err(e) = crate::data::names::backfill_instructor_names(&db_pool).await {
warn!(error = ?e, "Failed to backfill instructor names (non-fatal)");
}
// Create BannerApi and AppState
let banner_api = BannerApi::new_with_config(
config.banner_base_url.clone(),
config.rate_limiting.clone(),
)
.context("Failed to create BannerApi")?;
let banner_api_arc = Arc::new(banner_api);
let app_state = AppState::new(banner_api_arc.clone(), db_pool.clone());
// Load reference data cache from DB (may be empty on first run)
if let Err(e) = app_state.load_reference_cache().await {
info!(error = ?e, "Could not load reference cache on startup (may be empty)");
}
// Load schedule cache for timeline enrollment queries
if let Err(e) = app_state.schedule_cache.load().await {
info!(error = ?e, "Could not load schedule cache on startup (may be empty)");
}
// Seed the initial admin user if configured
if let Some(admin_id) = config.admin_discord_id {
let user = crate::data::users::ensure_seed_admin(&db_pool, admin_id as i64)
.await
.context("Failed to seed admin user")?;
info!(discord_id = admin_id, username = %user.discord_username, "Seed admin ensured");
}
Ok(App {
config,
db_pool,
banner_api: banner_api_arc,
app_state,
service_manager: ServiceManager::new(),
})
}
/// Setup and register services based on enabled service list
pub fn setup_services(&mut self, services: &[ServiceName]) -> Result<(), anyhow::Error> {
// Register enabled services with the manager
if services.contains(&ServiceName::Web) {
let auth_config = AuthConfig {
client_id: self.config.discord_client_id.clone(),
client_secret: self.config.discord_client_secret.clone(),
redirect_base: self.config.discord_redirect_uri.clone(),
};
let web_service = Box::new(WebService::new(
self.config.port,
self.app_state.clone(),
auth_config,
));
self.service_manager
.register_service(ServiceName::Web.as_str(), web_service);
}
if services.contains(&ServiceName::Scraper) {
let scraper_service = Box::new(ScraperService::new(
self.db_pool.clone(),
self.banner_api.clone(),
self.app_state.reference_cache.clone(),
self.app_state.service_statuses.clone(),
self.app_state.scrape_job_tx.clone(),
));
self.service_manager
.register_service(ServiceName::Scraper.as_str(), scraper_service);
}
// Check if any services are enabled
if !self.service_manager.has_services() && !services.contains(&ServiceName::Bot) {
error!("No services enabled. Cannot start application.");
return Err(anyhow::anyhow!("No services enabled"));
}
Ok(())
}
/// Setup bot service if enabled
pub async fn setup_bot_service(&mut self) -> Result<(), anyhow::Error> {
use std::sync::Arc;
use tokio::sync::{Mutex, broadcast};
// Create shutdown channel for status update task
let (status_shutdown_tx, status_shutdown_rx) = broadcast::channel(1);
let status_task_handle = Arc::new(Mutex::new(None));
let client = BotService::create_client(
&self.config,
self.app_state.clone(),
status_task_handle.clone(),
status_shutdown_rx,
)
.await
.context("Failed to create Discord client")?;
let bot_service = Box::new(BotService::new(
client,
status_task_handle,
status_shutdown_tx,
self.app_state.service_statuses.clone(),
));
self.service_manager
.register_service(ServiceName::Bot.as_str(), bot_service);
Ok(())
}
/// Start all registered services
pub fn start_services(&mut self) {
self.service_manager.spawn_all();
}
/// Run the application and handle shutdown signals
pub async fn run(self) -> ExitCode {
use crate::signals::handle_shutdown_signals;
handle_shutdown_signals(self.service_manager, self.config.shutdown_timeout).await
}
/// Get a reference to the configuration
pub fn config(&self) -> &Config {
&self.config
}
}
+346
View File
@@ -0,0 +1,346 @@
//! Main Banner API client implementation.
use std::collections::HashMap;
use crate::banner::{
SessionPool, create_shared_rate_limiter, errors::BannerApiError, json::parse_json_with_context,
middleware::TransparentMiddleware, models::*, nonce, query::SearchQuery,
rate_limit_middleware::RateLimitMiddleware, util::user_agent,
};
use crate::config::RateLimitingConfig;
use anyhow::{Context, Result, anyhow};
use http::HeaderValue;
use reqwest::Client;
use reqwest_middleware::{ClientBuilder, ClientWithMiddleware};
use tracing::debug;
/// Main Banner API client.
pub struct BannerApi {
pub sessions: SessionPool,
http: ClientWithMiddleware,
base_url: String,
}
impl BannerApi {
/// Creates a new Banner API client.
#[allow(dead_code)]
pub fn new(base_url: String) -> Result<Self> {
Self::new_with_config(base_url, RateLimitingConfig::default())
}
/// Creates a new Banner API client with custom rate limiting configuration.
pub fn new_with_config(
base_url: String,
rate_limit_config: RateLimitingConfig,
) -> Result<Self> {
let rate_limiter = create_shared_rate_limiter(Some(rate_limit_config));
let http = ClientBuilder::new(
Client::builder()
.cookie_store(false)
.user_agent(user_agent())
.tcp_keepalive(Some(std::time::Duration::from_secs(60 * 5)))
.read_timeout(std::time::Duration::from_secs(20))
.connect_timeout(std::time::Duration::from_secs(15))
.timeout(std::time::Duration::from_secs(40))
.build()
.context("Failed to create HTTP client")?,
)
.with(TransparentMiddleware)
.with(RateLimitMiddleware::new(rate_limiter.clone()))
.build();
Ok(Self {
sessions: SessionPool::new(http.clone(), base_url.clone()),
http,
base_url,
})
}
/// Validates offset parameter for search methods.
fn validate_offset(offset: i32) -> Result<()> {
if offset <= 0 {
Err(anyhow::anyhow!("Offset must be greater than 0"))
} else {
Ok(())
}
}
/// Builds common search parameters for list endpoints.
fn build_list_params(
&self,
search: &str,
term: &str,
offset: i32,
max_results: i32,
session_id: &str,
) -> Vec<(&str, String)> {
vec![
("searchTerm", search.to_string()),
("term", term.to_string()),
("offset", offset.to_string()),
("max", max_results.to_string()),
("uniqueSessionId", session_id.to_string()),
("_", nonce()),
]
}
/// Makes a GET request to a list endpoint and parses JSON response.
async fn get_list_endpoint<T>(
&self,
endpoint: &str,
search: &str,
term: &str,
offset: i32,
max_results: i32,
) -> Result<Vec<T>>
where
T: for<'de> serde::Deserialize<'de>,
{
Self::validate_offset(offset)?;
let session = self.sessions.acquire(term.parse()?).await?;
let url = format!("{}/classSearch/{}", self.base_url, endpoint);
let params = self.build_list_params(search, term, offset, max_results, session.id());
let response = self
.http
.get(&url)
.query(&params)
.send()
.await
.with_context(|| format!("Failed to get {}", endpoint))?;
let data: Vec<T> = response
.json()
.await
.with_context(|| format!("Failed to parse {} response", endpoint))?;
Ok(data)
}
/// Builds search parameters for course search methods.
fn build_search_params(
&self,
query: &SearchQuery,
term: &str,
session_id: &str,
sort: &str,
sort_descending: bool,
) -> HashMap<String, String> {
let mut params = query.to_params();
params.insert("txt_term".to_string(), term.to_string());
params.insert("uniqueSessionId".to_string(), session_id.to_string());
params.insert("sortColumn".to_string(), sort.to_string());
params.insert(
"sortDirection".to_string(),
if sort_descending { "desc" } else { "asc" }.to_string(),
);
params.insert("startDatepicker".to_string(), String::new());
params.insert("endDatepicker".to_string(), String::new());
params
}
/// Performs a course search and handles common response processing.
#[tracing::instrument(
skip(self, query, sort, sort_descending),
fields(term = %term)
)]
async fn perform_search(
&self,
term: &str,
query: &SearchQuery,
sort: &str,
sort_descending: bool,
) -> Result<SearchResult, BannerApiError> {
let mut session = self.sessions.acquire(term.parse()?).await?;
if session.been_used() {
self.http
.post(format!("{}/classSearch/resetDataForm", self.base_url))
.header("Cookie", session.cookie())
.send()
.await
.map_err(|e| BannerApiError::RequestFailed(e.into()))?;
}
session.touch();
let params = self.build_search_params(query, term, session.id(), sort, sort_descending);
debug!(
term = term,
subject = query.get_subject().map(|s| s.as_str()).unwrap_or("all"),
max_results = query.get_max_results(),
"Searching for courses"
);
let response = self
.http
.get(format!("{}/searchResults/searchResults", self.base_url))
.header("Cookie", session.cookie())
.query(&params)
.send()
.await
.context("Failed to search courses")?;
let status = response.status();
let url = response.url().clone();
let body = response
.text()
.await
.with_context(|| format!("Failed to read body (status={status})"))?;
let search_result: SearchResult = parse_json_with_context(&body).map_err(|e| {
BannerApiError::RequestFailed(anyhow!(
"Failed to parse search response (status={status}, url={url}): {e}"
))
})?;
// Check for signs of an invalid session
if search_result.path_mode.is_none() {
return Err(BannerApiError::InvalidSession(
"Search result path mode is none".to_string(),
));
} else if search_result.data.is_none() {
return Err(BannerApiError::InvalidSession(
"Search result data is none".to_string(),
));
}
if !search_result.success {
return Err(BannerApiError::RequestFailed(anyhow!(
"Search marked as unsuccessful by Banner API"
)));
}
Ok(search_result)
}
/// Retrieves a list of subjects from the Banner API.
pub async fn get_subjects(
&self,
search: &str,
term: &str,
offset: i32,
max_results: i32,
) -> Result<Vec<Pair>> {
self.get_list_endpoint("get_subject", search, term, offset, max_results)
.await
}
/// Retrieves campus codes and descriptions.
pub async fn get_campuses(&self, term: &str) -> Result<Vec<Pair>> {
self.get_list_endpoint("get_campus", "", term, 1, 500).await
}
/// Retrieves instructional method codes and descriptions.
pub async fn get_instructional_methods(&self, term: &str) -> Result<Vec<Pair>> {
self.get_list_endpoint("get_instructionalMethod", "", term, 1, 500)
.await
}
/// Retrieves part-of-term codes and descriptions.
pub async fn get_parts_of_term(&self, term: &str) -> Result<Vec<Pair>> {
self.get_list_endpoint("get_partOfTerm", "", term, 1, 500)
.await
}
/// Retrieves section attribute codes and descriptions.
pub async fn get_attributes(&self, term: &str) -> Result<Vec<Pair>> {
self.get_list_endpoint("get_attribute", "", term, 1, 500)
.await
}
/// Retrieves meeting time information for a course.
pub async fn get_course_meeting_time(
&self,
term: &str,
crn: &str,
) -> Result<Vec<MeetingScheduleInfo>> {
let url = format!("{}/searchResults/getFacultyMeetingTimes", self.base_url);
let params = [("term", term), ("courseReferenceNumber", crn)];
let response = self
.http
.get(&url)
.query(&params)
.send()
.await
.context("Failed to get meeting times")?;
if !response.status().is_success() {
return Err(anyhow::anyhow!(
"Failed to get meeting times: {}",
response.status()
));
} else if !response
.headers()
.get("Content-Type")
.unwrap_or(&HeaderValue::from_static(""))
.to_str()
.unwrap_or("")
.starts_with("application/json")
{
return Err(anyhow::anyhow!(
"Unexpected content type: {:?}",
response
.headers()
.get("Content-Type")
.unwrap_or(&HeaderValue::from_static("(empty)"))
.to_str()
.unwrap_or("(non-ascii)")
));
}
let response: MeetingTimesApiResponse =
response.json().await.context("Failed to parse response")?;
Ok(response
.fmt
.into_iter()
.map(|m| m.schedule_info())
.collect())
}
/// Performs a search for courses.
pub async fn search(
&self,
term: &str,
query: &SearchQuery,
sort: &str,
sort_descending: bool,
) -> Result<SearchResult, BannerApiError> {
self.perform_search(term, query, sort, sort_descending)
.await
}
/// Retrieves a single course by CRN by issuing a minimal search
pub async fn get_course_by_crn(
&self,
term: &str,
crn: &str,
) -> Result<Option<Course>, BannerApiError> {
debug!(term = term, crn = crn, "Looking up course by CRN");
let query = SearchQuery::new()
.course_reference_number(crn)
.max_results(1);
let search_result = self
.perform_search(term, &query, "subjectDescription", false)
.await?;
// Additional validation for CRN search
if search_result.path_mode == Some("registration".to_string())
&& search_result.data.is_none()
{
return Err(BannerApiError::InvalidSession(
"Search result path mode is registration and data is none".to_string(),
));
}
Ok(search_result
.data
.and_then(|courses| courses.into_iter().next()))
}
}
+9
View File
@@ -0,0 +1,9 @@
//! Error types for the Banner API client.
#[derive(Debug, thiserror::Error)]
pub enum BannerApiError {
#[error("Banner session is invalid or expired: {0}")]
InvalidSession(String),
#[error(transparent)]
RequestFailed(#[from] anyhow::Error),
}
+419
View File
@@ -0,0 +1,419 @@
//! JSON parsing utilities for the Banner API client.
use anyhow::Result;
use serde_json::{self, Value};
/// Attempt to parse JSON and, on failure, include a contextual snippet of the
/// line where the error occurred.
///
/// In debug builds, this provides detailed context including the full JSON object
/// containing the error and type mismatch information. In release builds, it shows
/// a minimal snippet to prevent dumping huge JSON bodies to production logs.
pub fn parse_json_with_context<T: serde::de::DeserializeOwned>(body: &str) -> Result<T> {
let jd = &mut serde_json::Deserializer::from_str(body);
match serde_path_to_error::deserialize(jd) {
Ok(value) => Ok(value),
Err(err) => {
let inner_err = err.inner();
let (line, column) = (inner_err.line(), inner_err.column());
let path = err.path().to_string();
let msg = inner_err.to_string();
let loc = format!(" at line {line} column {column}");
let msg_without_loc = msg.strip_suffix(&loc).unwrap_or(&msg).to_string();
// Build error message differently for debug vs release builds
let final_err = if cfg!(debug_assertions) {
// Debug mode: provide detailed context
let type_info = parse_type_mismatch(&msg_without_loc);
let context = extract_json_object_at_path(body, err.path(), line, column);
let mut err_msg = String::new();
if !path.is_empty() && path != "." {
err_msg.push_str(&format!("for path '{}'\n", path));
}
err_msg.push_str(&format!(
"({}) at line {} column {}\n\n",
type_info, line, column
));
err_msg.push_str(&context);
err_msg
} else {
// Release mode: minimal snippet to keep logs concise
let snippet = build_error_snippet(body, line, column, 20);
let mut err_msg = String::new();
if !path.is_empty() && path != "." {
err_msg.push_str(&format!("for path '{}' ", path));
}
err_msg.push_str(&format!(
"({}) at line {} column {}",
msg_without_loc, line, column
));
err_msg.push_str(&format!("\n{}", snippet));
err_msg
};
Err(anyhow::anyhow!(final_err))
}
}
}
/// Extract type mismatch information from a serde error message.
///
/// Parses error messages like "invalid type: null, expected a string" to extract
/// the expected and actual types for clearer error reporting.
///
/// Returns a formatted string like "(expected a string, got null)" or the original
/// message if parsing fails.
fn parse_type_mismatch(error_msg: &str) -> String {
// Try to parse "invalid type: X, expected Y" format
if let Some(invalid_start) = error_msg.find("invalid type: ") {
let after_prefix = &error_msg[invalid_start + "invalid type: ".len()..];
if let Some(comma_pos) = after_prefix.find(", expected ") {
let actual_type = &after_prefix[..comma_pos];
let expected_part = &after_prefix[comma_pos + ", expected ".len()..];
// Clean up expected part (remove " at line X column Y" if present)
let expected_type = expected_part
.split(" at line ")
.next()
.unwrap_or(expected_part)
.trim();
return format!("expected {}, got {}", expected_type, actual_type);
}
}
// Try to parse "expected X at line Y" format
if error_msg.starts_with("expected ")
&& let Some(expected_part) = error_msg.split(" at line ").next()
{
return expected_part.to_string();
}
// Fallback: return original message without location info
error_msg.to_string()
}
/// Extract and pretty-print the JSON object/array containing the parse error.
///
/// This function navigates to the error location using the serde path and extracts
/// the parent object or array to provide better context for debugging.
///
/// # Arguments
/// * `body` - The raw JSON string
/// * `path` - The serde path to the error (e.g., "data[0].faculty[0].displayName")
/// * `line` - Line number of the error (for fallback)
/// * `column` - Column number of the error (for fallback)
///
/// # Returns
/// A formatted string containing the JSON object with the error, or a fallback snippet
fn extract_json_object_at_path(
body: &str,
path: &serde_path_to_error::Path,
line: usize,
column: usize,
) -> String {
// Try to parse the entire JSON structure
let root_value: Value = match serde_json::from_str(body) {
Ok(v) => v,
Err(_) => {
// If we can't parse the JSON at all, fall back to line snippet
return build_error_snippet(body, line, column, 20);
}
};
// Navigate to the error location using the path
let path_str = path.to_string();
let segments = parse_path_segments(&path_str);
let (context_value, context_name) = navigate_to_context(&root_value, &segments);
// Pretty-print the context value with limited depth to avoid huge output
match serde_json::to_string_pretty(&context_value) {
Ok(pretty) => {
// Limit output to ~50 lines to prevent log spam
let lines: Vec<&str> = pretty.lines().collect();
let truncated = if lines.len() > 50 {
let mut result = lines[..47].join("\n");
result.push_str("\n ... (truncated, ");
result.push_str(&(lines.len() - 47).to_string());
result.push_str(" more lines)");
result
} else {
pretty
};
format!("{} at '{}':\n{}", context_name, path_str, truncated)
}
Err(_) => {
// Fallback to simple snippet if pretty-print fails
build_error_snippet(body, line, column, 20)
}
}
}
/// Parse a JSON path string into segments for navigation.
///
/// Converts paths like "data[0].faculty[1].displayName" into a sequence of
/// object keys and array indices.
fn parse_path_segments(path: &str) -> Vec<PathSegment> {
let mut segments = Vec::new();
let mut current = String::new();
let mut in_bracket = false;
for ch in path.chars() {
match ch {
'.' if !in_bracket => {
if !current.is_empty() {
segments.push(PathSegment::Key(current.clone()));
current.clear();
}
}
'[' => {
if !current.is_empty() {
segments.push(PathSegment::Key(current.clone()));
current.clear();
}
in_bracket = true;
}
']' => {
if in_bracket && !current.is_empty() {
if let Ok(index) = current.parse::<usize>() {
segments.push(PathSegment::Index(index));
}
current.clear();
}
in_bracket = false;
}
_ => current.push(ch),
}
}
if !current.is_empty() {
segments.push(PathSegment::Key(current));
}
segments
}
/// Represents a segment in a JSON path (either an object key or array index).
#[derive(Debug)]
enum PathSegment {
Key(String),
Index(usize),
}
/// Navigate through a JSON value using path segments and return the appropriate context.
///
/// This function walks the JSON structure and returns the parent object/array that
/// contains the error, providing meaningful context for debugging.
///
/// # Returns
/// A tuple of (context_value, description) where context_value is the JSON to display
/// and description is a human-readable name for what we're showing.
fn navigate_to_context<'a>(
mut current: &'a Value,
segments: &[PathSegment],
) -> (&'a Value, &'static str) {
// If path is empty or just root, return the whole value
if segments.is_empty() {
return (current, "Root object");
}
// Try to navigate to the parent of the error location
// We want to show the containing object/array, not just the failing field
let parent_depth = segments.len().saturating_sub(1);
for (i, segment) in segments.iter().enumerate() {
// Stop one level before the end to show the parent context
if i >= parent_depth {
break;
}
match segment {
PathSegment::Key(key) => {
if let Some(next) = current.get(key) {
current = next;
} else {
// Can't navigate further, return what we have
return (current, "Partial context (navigation stopped)");
}
}
PathSegment::Index(idx) => {
if let Some(next) = current.get(idx) {
current = next;
} else {
return (current, "Partial context (index out of bounds)");
}
}
}
}
(current, "Object containing error")
}
fn build_error_snippet(body: &str, line: usize, column: usize, context_len: usize) -> String {
let target_line = body.lines().nth(line.saturating_sub(1)).unwrap_or("");
if target_line.is_empty() {
return "(empty line)".to_string();
}
// column is 1-based, convert to 0-based for slicing
let error_idx = column.saturating_sub(1);
let half_len = context_len / 2;
let start = error_idx.saturating_sub(half_len);
let end = (error_idx + half_len).min(target_line.len());
let slice = &target_line[start..end];
let indicator_pos = error_idx - start;
let indicator = " ".repeat(indicator_pos) + "^";
format!("...{slice}...\n {indicator}")
}
#[cfg(test)]
mod tests {
use super::*;
use serde::Deserialize;
#[test]
fn test_parse_type_mismatch_invalid_type() {
let msg = "invalid type: null, expected a string at line 45 column 29";
let result = parse_type_mismatch(msg);
assert_eq!(result, "expected a string, got null");
}
#[test]
fn test_parse_type_mismatch_expected() {
let msg = "expected value at line 1 column 1";
let result = parse_type_mismatch(msg);
assert_eq!(result, "expected value");
}
#[test]
fn test_parse_path_segments_simple() {
let segments = parse_path_segments("data.name");
assert_eq!(segments.len(), 2);
match &segments[0] {
PathSegment::Key(k) => assert_eq!(k, "data"),
_ => panic!("Expected Key segment"),
}
}
#[test]
fn test_parse_path_segments_with_array() {
let segments = parse_path_segments("data[0].faculty[1].displayName");
assert_eq!(segments.len(), 5);
match &segments[0] {
PathSegment::Key(k) => assert_eq!(k, "data"),
_ => panic!("Expected Key segment"),
}
match &segments[1] {
PathSegment::Index(i) => assert_eq!(*i, 0),
_ => panic!("Expected Index segment"),
}
}
#[test]
fn test_parse_json_with_context_null_value() {
#[derive(Debug, Deserialize)]
struct TestStruct {
#[allow(dead_code)]
name: String,
}
let json = r#"{"name": null}"#;
let result: Result<TestStruct> = parse_json_with_context(json);
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
// Should contain path info
assert!(err_msg.contains("name"));
// In debug mode, should contain detailed context
if cfg!(debug_assertions) {
assert!(err_msg.contains("expected"));
}
}
#[test]
fn test_navigate_to_context() {
let json = r#"{"data": [{"faculty": [{"name": "John"}]}]}"#;
let value: Value = serde_json::from_str(json).unwrap();
let segments = parse_path_segments("data[0].faculty[0].name");
let (context, _) = navigate_to_context(&value, &segments);
// Should return the faculty[0] object (parent of 'name')
assert!(context.is_object());
assert!(context.get("name").is_some());
}
#[test]
fn test_realistic_banner_error() {
#[derive(Debug, Deserialize)]
struct Course {
#[allow(dead_code)]
#[serde(rename = "courseTitle")]
course_title: String,
#[allow(dead_code)]
faculty: Vec<Faculty>,
}
#[derive(Debug, Deserialize)]
struct Faculty {
#[serde(rename = "displayName")]
#[allow(dead_code)]
display_name: String,
#[allow(dead_code)]
email: String,
}
#[derive(Debug, Deserialize)]
struct SearchResult {
#[allow(dead_code)]
data: Vec<Course>,
}
// Simulate Banner API response with null faculty displayName
// This mimics the actual error from SPN subject scrape
let json = r#"{
"data": [
{
"courseTitle": "Spanish Conversation",
"faculty": [
{
"displayName": null,
"email": "instructor@utsa.edu"
}
]
}
]
}"#;
let result: Result<SearchResult> = parse_json_with_context(json);
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
println!("\n=== Error output in debug mode ===\n{}\n", err_msg);
// Verify error contains key information
assert!(err_msg.contains("data[0].faculty[0].displayName"));
// In debug mode, should show detailed context
if cfg!(debug_assertions) {
// Should show type mismatch info
assert!(err_msg.contains("expected") && err_msg.contains("got"));
// Should show surrounding JSON context with the faculty object
assert!(err_msg.contains("email"));
}
}
}
+72
View File
@@ -0,0 +1,72 @@
//! HTTP middleware for the Banner API client.
use http::Extensions;
use reqwest::{Request, Response};
use reqwest_middleware::{Middleware, Next};
use tracing::{debug, trace, warn};
pub struct TransparentMiddleware;
/// Threshold for logging slow requests at DEBUG level (in milliseconds)
const SLOW_REQUEST_THRESHOLD_MS: u128 = 1000;
#[async_trait::async_trait]
impl Middleware for TransparentMiddleware {
async fn handle(
&self,
req: Request,
extensions: &mut Extensions,
next: Next<'_>,
) -> std::result::Result<Response, reqwest_middleware::Error> {
let method = req.method().to_string();
let path = req.url().path().to_string();
let start = std::time::Instant::now();
let response_result = next.run(req, extensions).await;
let duration = start.elapsed();
match response_result {
Ok(response) => {
if response.status().is_success() {
let duration_ms = duration.as_millis();
if duration_ms >= SLOW_REQUEST_THRESHOLD_MS {
debug!(
method = method,
path = path,
status = response.status().as_u16(),
duration_ms = duration_ms,
"Request completed (slow)"
);
} else {
trace!(
method = method,
path = path,
status = response.status().as_u16(),
duration_ms = duration_ms,
"Request completed"
);
}
Ok(response)
} else {
warn!(
method = method,
path = path,
status = response.status().as_u16(),
duration_ms = duration.as_millis(),
"Request failed"
);
Ok(response)
}
}
Err(error) => {
warn!(
method = method,
path = path,
duration_ms = duration.as_millis(),
"Request failed"
);
Err(error)
}
}
}
}
+24
View File
@@ -0,0 +1,24 @@
//! Banner API module for interacting with Ellucian Banner systems.
//!
//! This module provides functionality to:
//! - Search for courses and retrieve course information
//! - Manage Banner API sessions and authentication
//! - Generate ICS files and calendar links
pub mod api;
pub mod errors;
pub mod json;
pub mod middleware;
pub mod models;
pub mod query;
pub mod rate_limit_middleware;
pub mod rate_limiter;
pub mod session;
pub mod util;
pub use api::*;
pub use errors::*;
pub use models::*;
pub use query::*;
pub use rate_limiter::*;
pub use session::*;
+22
View File
@@ -0,0 +1,22 @@
use serde::{Deserialize, Serialize};
/// Represents a key-value pair from the Banner API
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Pair {
pub code: String,
pub description: String,
}
/// Represents a term in the Banner system
pub type BannerTerm = Pair;
/// Represents an instructor in the Banner system
#[allow(dead_code)]
pub type Instructor = Pair;
impl BannerTerm {
/// Returns true if the term is in an archival (view-only) state
pub fn is_archived(&self) -> bool {
self.description.contains("View Only")
}
}
+78
View File
@@ -0,0 +1,78 @@
use serde::{Deserialize, Serialize};
use super::meetings::FacultyItem;
use super::meetings::MeetingTimeResponse;
/// Course section attribute
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SectionAttribute {
pub class: String,
pub course_reference_number: String,
pub code: String,
pub description: String,
pub term_code: String,
#[serde(rename = "isZTCAttribute")]
pub is_ztc_attribute: bool,
}
/// Represents a single course returned from a search
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Course {
pub id: i32,
pub term: String,
pub term_desc: String,
pub course_reference_number: String,
pub part_of_term: String,
pub course_number: String,
pub subject: String,
pub subject_description: String,
pub sequence_number: String,
pub campus_description: String,
pub schedule_type_description: String,
pub course_title: String,
pub credit_hours: Option<i32>,
pub maximum_enrollment: i32,
pub enrollment: i32,
pub seats_available: i32,
pub wait_capacity: i32,
pub wait_count: i32,
pub cross_list: Option<String>,
pub cross_list_capacity: Option<i32>,
pub cross_list_count: Option<i32>,
pub cross_list_available: Option<i32>,
pub credit_hour_high: Option<i32>,
pub credit_hour_low: Option<i32>,
pub credit_hour_indicator: Option<String>,
pub open_section: bool,
pub link_identifier: Option<String>,
pub is_section_linked: bool,
pub subject_course: String,
pub reserved_seat_summary: Option<String>,
pub instructional_method: String,
pub instructional_method_description: String,
pub section_attributes: Vec<SectionAttribute>,
#[serde(default)]
pub faculty: Vec<FacultyItem>,
#[serde(default)]
pub meetings_faculty: Vec<MeetingTimeResponse>,
}
impl Course {
/// Returns the course title in the format "SUBJ #### - Course Title"
pub fn display_title(&self) -> String {
format!(
"{} {} - {}",
self.subject, self.course_number, self.course_title
)
}
/// Returns the name of the primary instructor, or "Unknown" if not available
pub fn primary_instructor_name(&self) -> &str {
self.faculty
.first()
.map(|f| f.display_name.as_str())
.unwrap_or("Unknown")
}
}
+577
View File
@@ -0,0 +1,577 @@
use bitflags::{Flags, bitflags};
use chrono::{DateTime, NaiveDate, NaiveTime, Timelike, Utc, Weekday};
use extension_traits::extension;
use serde::{Deserialize, Deserializer, Serialize};
use std::{cmp::Ordering, str::FromStr};
use super::terms::Term;
#[extension(pub trait WeekdayExt)]
impl Weekday {
/// Short two-letter representation (used for ICS generation)
fn to_short_string(self) -> &'static str {
match self {
Weekday::Mon => "Mo",
Weekday::Tue => "Tu",
Weekday::Wed => "We",
Weekday::Thu => "Th",
Weekday::Fri => "Fr",
Weekday::Sat => "Sa",
Weekday::Sun => "Su",
}
}
/// Full day name
fn to_full_string(self) -> &'static str {
match self {
Weekday::Mon => "Monday",
Weekday::Tue => "Tuesday",
Weekday::Wed => "Wednesday",
Weekday::Thu => "Thursday",
Weekday::Fri => "Friday",
Weekday::Sat => "Saturday",
Weekday::Sun => "Sunday",
}
}
}
/// Deserialize a string field into a u32
fn deserialize_string_to_u32<'de, D>(deserializer: D) -> Result<u32, D::Error>
where
D: Deserializer<'de>,
{
let s: String = Deserialize::deserialize(deserializer)?;
s.parse::<u32>().map_err(serde::de::Error::custom)
}
/// Deserialize a string field into a Term
fn deserialize_string_to_term<'de, D>(deserializer: D) -> Result<Term, D::Error>
where
D: Deserializer<'de>,
{
let s: String = Deserialize::deserialize(deserializer)?;
Term::from_str(&s).map_err(serde::de::Error::custom)
}
/// Represents a faculty member associated with a course
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct FacultyItem {
pub banner_id: String, // e.g "@01647907" (can contain @ symbol)
pub category: Option<String>, // zero-padded digits
pub class: String, // internal class name
#[serde(deserialize_with = "deserialize_string_to_u32")]
pub course_reference_number: u32, // CRN, e.g 27294
pub display_name: String, // "LastName, FirstName"
pub email_address: Option<String>, // e.g. FirstName.LastName@utsaedu
pub primary_indicator: bool,
pub term: String, // e.g "202420"
}
/// Meeting time information for a course
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct MeetingTime {
pub start_date: String, // MM/DD/YYYY, e.g 08/26/2025
pub end_date: String, // MM/DD/YYYY, e.g 08/26/2025
pub begin_time: Option<String>, // HHMM, e.g 1000
pub end_time: Option<String>, // HHMM, e.g 1100
pub category: String, // unknown meaning, e.g. 01, 02, etc
pub class: String, // internal class name, e.g. net.hedtech.banner.general.overallMeetingTimeDecorator
pub monday: bool, // true if the meeting time occurs on Monday
pub tuesday: bool, // true if the meeting time occurs on Tuesday
pub wednesday: bool, // true if the meeting time occurs on Wednesday
pub thursday: bool, // true if the meeting time occurs on Thursday
pub friday: bool, // true if the meeting time occurs on Friday
pub saturday: bool, // true if the meeting time occurs on Saturday
pub sunday: bool, // true if the meeting time occurs on Sunday
pub room: Option<String>, // e.g. 1.238
#[serde(deserialize_with = "deserialize_string_to_term")]
pub term: Term, // e.g 202510
pub building: Option<String>, // e.g NPB
pub building_description: Option<String>, // e.g North Paseo Building
pub campus: Option<String>, // campus code, e.g 11
pub campus_description: Option<String>, // name of campus, e.g Main Campus
pub course_reference_number: String, // CRN, e.g 27294
pub credit_hour_session: Option<f64>, // e.g. 30
pub hours_week: f64, // e.g. 30
pub meeting_schedule_type: String, // e.g AFF
pub meeting_type: String, // e.g HB, H2, H1, OS, OA, OH, ID, FF
pub meeting_type_description: String,
}
bitflags! {
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct MeetingDays: u8 {
const Monday = 1 << 0;
const Tuesday = 1 << 1;
const Wednesday = 1 << 2;
const Thursday = 1 << 3;
const Friday = 1 << 4;
const Saturday = 1 << 5;
const Sunday = 1 << 6;
}
}
impl MeetingDays {
/// Convert from the boolean flags in the raw API response
pub fn from_meeting_time(meeting_time: &MeetingTime) -> MeetingDays {
let mut days = MeetingDays::empty();
if meeting_time.monday {
days.insert(MeetingDays::Monday);
}
if meeting_time.tuesday {
days.insert(MeetingDays::Tuesday);
}
if meeting_time.wednesday {
days.insert(MeetingDays::Wednesday);
}
if meeting_time.thursday {
days.insert(MeetingDays::Thursday);
}
if meeting_time.friday {
days.insert(MeetingDays::Friday);
}
if meeting_time.saturday {
days.insert(MeetingDays::Saturday);
}
if meeting_time.sunday {
days.insert(MeetingDays::Sunday);
}
days
}
}
impl Ord for MeetingDays {
fn cmp(&self, other: &Self) -> Ordering {
self.bits().cmp(&other.bits())
}
}
impl PartialOrd for MeetingDays {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl From<Weekday> for MeetingDays {
fn from(day: Weekday) -> Self {
match day {
Weekday::Mon => MeetingDays::Monday,
Weekday::Tue => MeetingDays::Tuesday,
Weekday::Wed => MeetingDays::Wednesday,
Weekday::Thu => MeetingDays::Thursday,
Weekday::Fri => MeetingDays::Friday,
Weekday::Sat => MeetingDays::Saturday,
Weekday::Sun => MeetingDays::Sunday,
}
}
}
impl TryFrom<MeetingDays> for Weekday {
type Error = anyhow::Error;
fn try_from(days: MeetingDays) -> Result<Self, Self::Error> {
if days.contains_unknown_bits() {
return Err(anyhow::anyhow!("Unknown days: {:?}", days));
}
let count = days.into_iter().count();
if count == 1 {
return Ok(match days {
MeetingDays::Monday => Weekday::Mon,
MeetingDays::Tuesday => Weekday::Tue,
MeetingDays::Wednesday => Weekday::Wed,
MeetingDays::Thursday => Weekday::Thu,
MeetingDays::Friday => Weekday::Fri,
MeetingDays::Saturday => Weekday::Sat,
MeetingDays::Sunday => Weekday::Sun,
_ => unreachable!(),
});
}
Err(anyhow::anyhow!(
"Cannot convert multiple days to a single day: {days:?}"
))
}
}
/// Time range for meetings
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct TimeRange {
pub start: NaiveTime,
pub end: NaiveTime,
}
impl TimeRange {
/// Parse time range from HHMM format strings
pub fn from_hhmm(start: &str, end: &str) -> Option<Self> {
let start_time = Self::parse_hhmm(start)?;
let end_time = Self::parse_hhmm(end)?;
Some(TimeRange {
start: start_time,
end: end_time,
})
}
/// Parse HHMM format string to NaiveTime
fn parse_hhmm(time_str: &str) -> Option<NaiveTime> {
if time_str.len() != 4 {
return None;
}
let hours = time_str[..2].parse::<u32>().ok()?;
let minutes = time_str[2..].parse::<u32>().ok()?;
if hours > 23 || minutes > 59 {
return None;
}
NaiveTime::from_hms_opt(hours, minutes, 0)
}
/// Format time in 12-hour format
pub fn format_12hr(&self) -> String {
format!(
"{}-{}",
Self::format_time_12hr(self.start),
Self::format_time_12hr(self.end)
)
}
/// Format a single time in 12-hour format
fn format_time_12hr(time: NaiveTime) -> String {
let hour = time.hour();
let minute = time.minute();
let meridiem = if hour < 12 { "AM" } else { "PM" };
let display_hour = match hour {
0 => 12,
13..=23 => hour - 12,
_ => hour,
};
format!("{display_hour}:{minute:02}{meridiem}")
}
/// Get duration in minutes
#[allow(dead_code)]
pub fn duration_minutes(&self) -> i64 {
let start_minutes = self.start.hour() as i64 * 60 + self.start.minute() as i64;
let end_minutes = self.end.hour() as i64 * 60 + self.end.minute() as i64;
end_minutes - start_minutes
}
}
impl PartialOrd for TimeRange {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.start.cmp(&other.start))
}
}
/// Date range for meetings
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DateRange {
pub start: NaiveDate,
pub end: NaiveDate,
}
impl DateRange {
/// Parse date range from MM/DD/YYYY format strings
pub fn from_mm_dd_yyyy(start: &str, end: &str) -> Option<Self> {
let start_date = Self::parse_mm_dd_yyyy(start)?;
let end_date = Self::parse_mm_dd_yyyy(end)?;
Some(DateRange {
start: start_date,
end: end_date,
})
}
/// Parse MM/DD/YYYY format string to NaiveDate
fn parse_mm_dd_yyyy(date_str: &str) -> Option<NaiveDate> {
NaiveDate::parse_from_str(date_str, "%m/%d/%Y").ok()
}
/// Get the number of weeks between start and end dates
pub fn weeks_duration(&self) -> u32 {
let duration = self.end.signed_duration_since(self.start);
duration.num_weeks().max(0) as u32
}
/// Check if a specific date falls within this range
#[allow(dead_code)]
pub fn contains_date(&self, date: NaiveDate) -> bool {
date >= self.start && date <= self.end
}
}
/// Meeting schedule type enum
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum MeetingType {
HybridBlended, // HB, H2, H1
OnlineSynchronous, // OS
OnlineAsynchronous, // OA
OnlineHybrid, // OH
IndependentStudy, // ID
FaceToFace, // FF
Unknown(String),
}
impl std::str::FromStr for MeetingType {
type Err = std::convert::Infallible;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
Ok(match s {
"HB" | "H2" | "H1" => MeetingType::HybridBlended,
"OS" => MeetingType::OnlineSynchronous,
"OA" => MeetingType::OnlineAsynchronous,
"OH" => MeetingType::OnlineHybrid,
"ID" => MeetingType::IndependentStudy,
"FF" => MeetingType::FaceToFace,
other => MeetingType::Unknown(other.to_string()),
})
}
}
impl MeetingType {
/// Get description for the meeting type
pub fn description(&self) -> &'static str {
match self {
MeetingType::HybridBlended => "Hybrid",
MeetingType::OnlineSynchronous => "Online Only",
MeetingType::OnlineAsynchronous => "Online Asynchronous",
MeetingType::OnlineHybrid => "Online Partial",
MeetingType::IndependentStudy => "To Be Arranged",
MeetingType::FaceToFace => "Face to Face",
MeetingType::Unknown(_) => "Unknown",
}
}
}
/// Meeting location information
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum MeetingLocation {
Online,
InPerson {
campus: String,
campus_description: String,
building: String,
building_description: String,
room: String,
},
}
impl MeetingLocation {
/// Create from raw MeetingTime data
pub fn from_meeting_time(meeting_time: &MeetingTime) -> Self {
if let (
Some(campus),
Some(campus_description),
Some(building),
Some(building_description),
Some(room),
) = (
&meeting_time.campus,
&meeting_time.campus_description,
&meeting_time.building,
&meeting_time.building_description,
&meeting_time.room,
) {
if campus_description == "Internet" {
return MeetingLocation::Online;
}
MeetingLocation::InPerson {
campus: campus.clone(),
campus_description: campus_description.clone(),
building: building.clone(),
building_description: building_description.clone(),
room: room.clone(),
}
} else {
MeetingLocation::Online
}
}
}
/// Clean, parsed meeting schedule information
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MeetingScheduleInfo {
pub days: MeetingDays,
pub time_range: Option<TimeRange>,
pub date_range: DateRange,
pub meeting_type: MeetingType,
pub location: MeetingLocation,
pub duration_weeks: u32,
}
impl MeetingScheduleInfo {
/// Create from raw MeetingTime data
pub fn from_meeting_time(meeting_time: &MeetingTime) -> Self {
let days = MeetingDays::from_meeting_time(meeting_time);
let time_range = match (&meeting_time.begin_time, &meeting_time.end_time) {
(Some(begin), Some(end)) => TimeRange::from_hhmm(begin, end),
_ => None,
};
let date_range =
DateRange::from_mm_dd_yyyy(&meeting_time.start_date, &meeting_time.end_date)
.unwrap_or_else(|| {
// Fallback to current date if parsing fails
let now = chrono::Utc::now().naive_utc().date();
DateRange {
start: now,
end: now,
}
});
let meeting_type: MeetingType = meeting_time.meeting_type.parse().unwrap();
let location = MeetingLocation::from_meeting_time(meeting_time);
let duration_weeks = date_range.weeks_duration();
MeetingScheduleInfo {
days,
time_range,
date_range,
meeting_type,
location,
duration_weeks,
}
}
/// Convert the meeting days bitset to a weekday vector
pub fn days_of_week(&self) -> Vec<Weekday> {
self.days
.iter()
.map(|day| <MeetingDays as TryInto<Weekday>>::try_into(day).unwrap())
.collect()
}
/// Get formatted days string
pub fn days_string(&self) -> Option<String> {
if self.days.is_empty() {
return None;
}
if self.days.is_all() {
return Some("Everyday".to_string());
}
let days_of_week = self.days_of_week();
if days_of_week.len() == 1 {
return Some(days_of_week[0].to_full_string().to_string());
}
// Mapper function to get the short string representation of the day of week
let mapper = {
let ambiguous = self.days.intersects(
MeetingDays::Tuesday
| MeetingDays::Thursday
| MeetingDays::Saturday
| MeetingDays::Sunday,
);
if ambiguous {
|day: &Weekday| day.to_short_string().to_string()
} else {
|day: &Weekday| day.to_short_string().chars().next().unwrap().to_string()
}
};
Some(days_of_week.iter().map(mapper).collect::<String>())
}
/// Returns a formatted string representing the location of the meeting
pub fn place_string(&self) -> String {
match &self.location {
MeetingLocation::Online => "Online".to_string(),
MeetingLocation::InPerson {
campus,
building,
building_description,
room,
..
} => format!(
"{} | {} | {} {}",
campus, building_description, building, room
),
}
}
/// Sort a slice of meeting schedule infos by start time, with stable fallback to day bits.
///
/// Meetings with a time range sort before those without one.
/// Among meetings without a time range, ties break by day-of-week bits.
pub fn sort_by_start_time(meetings: &mut [MeetingScheduleInfo]) {
meetings.sort_unstable_by(|a, b| match (&a.time_range, &b.time_range) {
(Some(a_time), Some(b_time)) => a_time.start.cmp(&b_time.start),
(Some(_), None) => std::cmp::Ordering::Less,
(None, Some(_)) => std::cmp::Ordering::Greater,
(None, None) => a.days.bits().cmp(&b.days.bits()),
});
}
/// Get the start and end date times for the meeting
///
/// Uses the start and end times of the meeting if available, otherwise defaults to midnight (00:00:00.000).
///
/// The returned times are in UTC.
pub fn datetime_range(&self) -> (DateTime<Utc>, DateTime<Utc>) {
let (start, end) = if let Some(time_range) = &self.time_range {
let start = self.date_range.start.and_time(time_range.start);
let end = self.date_range.end.and_time(time_range.end);
(start, end)
} else {
(
self.date_range.start.and_hms_opt(0, 0, 0).unwrap(),
self.date_range.end.and_hms_opt(0, 0, 0).unwrap(),
)
};
(start.and_utc(), end.and_utc())
}
}
impl PartialEq for MeetingScheduleInfo {
fn eq(&self, other: &Self) -> bool {
self.days == other.days && self.time_range == other.time_range
}
}
impl PartialOrd for MeetingScheduleInfo {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
match (&self.time_range, &other.time_range) {
(Some(self_time), Some(other_time)) => self_time.partial_cmp(other_time),
(None, None) => Some(self.days.partial_cmp(&other.days).unwrap()),
(Some(_), None) => Some(Ordering::Less),
(None, Some(_)) => Some(Ordering::Greater),
}
}
}
/// API response wrapper for meeting times
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MeetingTimesApiResponse {
pub fmt: Vec<MeetingTimeResponse>,
}
/// Meeting time response wrapper
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct MeetingTimeResponse {
pub category: Option<String>,
pub class: String,
pub course_reference_number: String,
#[serde(default)]
pub faculty: Vec<FacultyItem>,
pub meeting_time: MeetingTime,
pub term: String,
}
impl MeetingTimeResponse {
/// Get parsed meeting schedule information
pub fn schedule_info(&self) -> MeetingScheduleInfo {
MeetingScheduleInfo::from_meeting_time(&self.meeting_time)
}
}
+14
View File
@@ -0,0 +1,14 @@
//! Data models for the Banner API.
pub mod common;
pub mod courses;
pub mod meetings;
pub mod search;
pub mod terms;
// Re-export commonly used types
pub use common::*;
pub use courses::*;
pub use meetings::*;
pub use search::*;
pub use terms::*;
+23
View File
@@ -0,0 +1,23 @@
use serde::{Deserialize, Serialize};
use super::courses::Course;
/// Search result wrapper
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchResult {
pub success: bool,
pub total_count: i32,
pub page_offset: i32,
pub page_max_size: i32,
pub path_mode: Option<String>,
pub search_results_config: Option<Vec<SearchResultConfig>>,
pub data: Option<Vec<Course>>,
}
/// Search result configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchResultConfig {
pub config: String,
pub display: String,
}
+573
View File
@@ -0,0 +1,573 @@
use std::{ops::RangeInclusive, str::FromStr};
use anyhow::Context;
use chrono::{Datelike, Local, NaiveDate};
use serde::{Deserialize, Serialize};
/// The current year at the time of compilation
const CURRENT_YEAR: u32 = compile_time::date!().year() as u32;
/// The valid years for terms
/// We set a semi-static upper limit to avoid having to update this value while also keeping a tight bound
/// TODO: Recheck the lower bound, it's just a guess right now.
const VALID_YEARS: RangeInclusive<u32> = 2007..=(CURRENT_YEAR + 10);
/// Represents a term in the Banner system
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct Term {
pub year: u32, // 2024, 2025, etc
pub season: Season,
}
/// Represents the term status at a specific point in time
#[derive(Debug, Clone)]
pub enum TermPoint {
/// Currently in a term
InTerm { current: Term },
/// Between terms, with the next term specified
BetweenTerms { next: Term },
}
/// Represents a season within a term
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum Season {
Fall,
Spring,
Summer,
}
impl Term {
/// Returns the current term status - either currently in a term or between terms
pub fn get_current() -> TermPoint {
let now = Local::now().naive_local();
Self::get_status_for_date(now.date())
}
/// Returns the current term status for a specific date
pub fn get_status_for_date(date: NaiveDate) -> TermPoint {
let literal_year = date.year() as u32;
let day_of_year = date.ordinal();
let ranges = Self::get_season_ranges(literal_year);
// If we're past the end of the summer term, we're 'in' the next school year.
let term_year = if day_of_year > ranges.summer.end {
literal_year + 1
} else {
literal_year
};
if (day_of_year < ranges.spring.start) || (day_of_year >= ranges.fall.end) {
// Fall over, Spring not yet begun
TermPoint::BetweenTerms {
next: Term {
year: term_year,
season: Season::Spring,
},
}
} else if (day_of_year >= ranges.spring.start) && (day_of_year < ranges.spring.end) {
// Spring
TermPoint::InTerm {
current: Term {
year: term_year,
season: Season::Spring,
},
}
} else if day_of_year < ranges.summer.start {
// Spring over, Summer not yet begun
TermPoint::BetweenTerms {
next: Term {
year: term_year,
season: Season::Summer,
},
}
} else if (day_of_year >= ranges.summer.start) && (day_of_year < ranges.summer.end) {
// Summer
TermPoint::InTerm {
current: Term {
year: term_year,
season: Season::Summer,
},
}
} else if day_of_year < ranges.fall.start {
// Summer over, Fall not yet begun
TermPoint::BetweenTerms {
next: Term {
year: term_year,
season: Season::Fall,
},
}
} else if (day_of_year >= ranges.fall.start) && (day_of_year < ranges.fall.end) {
// Fall
TermPoint::InTerm {
current: Term {
year: term_year,
season: Season::Fall,
},
}
} else {
// This should never happen, but Rust requires exhaustive matching
panic!("Impossible code reached (dayOfYear: {})", day_of_year);
}
}
/// Returns the start and end day of each term for the given year.
/// The ranges are inclusive of the start day and exclusive of the end day.
fn get_season_ranges(year: u32) -> SeasonRanges {
let spring_start = NaiveDate::from_ymd_opt(year as i32, 1, 14)
.unwrap()
.ordinal();
let spring_end = NaiveDate::from_ymd_opt(year as i32, 5, 1)
.unwrap()
.ordinal();
let summer_start = NaiveDate::from_ymd_opt(year as i32, 5, 25)
.unwrap()
.ordinal();
let summer_end = NaiveDate::from_ymd_opt(year as i32, 8, 15)
.unwrap()
.ordinal();
let fall_start = NaiveDate::from_ymd_opt(year as i32, 8, 18)
.unwrap()
.ordinal();
let fall_end = NaiveDate::from_ymd_opt(year as i32, 12, 10)
.unwrap()
.ordinal();
SeasonRanges {
spring: YearDayRange {
start: spring_start,
end: spring_end,
},
summer: YearDayRange {
start: summer_start,
end: summer_end,
},
fall: YearDayRange {
start: fall_start,
end: fall_end,
},
}
}
/// URL-friendly slug, e.g. "spring-2026"
pub fn slug(&self) -> String {
format!("{}-{}", self.season.slug(), self.year)
}
/// Parse a slug like "spring-2026" into a Term
pub fn from_slug(s: &str) -> Option<Self> {
let (season_str, year_str) = s.rsplit_once('-')?;
let season = Season::from_slug(season_str)?;
let year = year_str.parse::<u32>().ok()?;
if !VALID_YEARS.contains(&year) {
return None;
}
Some(Term { year, season })
}
/// Human-readable description, e.g. "Spring 2026"
pub fn description(&self) -> String {
format!("{} {}", self.season, self.year)
}
/// Resolve a string that is either a term code ("202620") or a slug ("spring-2026") to a term code.
pub fn resolve_to_code(s: &str) -> Option<String> {
// Try parsing as a 6-digit code first
if let Ok(term) = s.parse::<Term>() {
return Some(term.to_string());
}
// Try parsing as a slug
Term::from_slug(s).map(|t| t.to_string())
}
}
impl TermPoint {
/// Returns the inner Term regardless of the status
pub fn inner(&self) -> &Term {
match self {
TermPoint::InTerm { current } => current,
TermPoint::BetweenTerms { next } => next,
}
}
}
/// Represents the start and end day of each term within a year
#[derive(Debug, Clone)]
struct SeasonRanges {
spring: YearDayRange,
summer: YearDayRange,
fall: YearDayRange,
}
/// Represents the start and end day of a term within a year
#[derive(Debug, Clone)]
struct YearDayRange {
start: u32,
end: u32,
}
impl std::fmt::Display for Term {
/// Returns the term in the format YYYYXX, where YYYY is the year and XX is the season code
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{year}{season}",
year = self.year,
season = self.season.to_str()
)
}
}
impl Season {
/// Returns the season code as a string
fn to_str(self) -> &'static str {
match self {
Season::Fall => "10",
Season::Spring => "20",
Season::Summer => "30",
}
}
/// Returns the lowercase slug for URL-friendly representation
pub fn slug(self) -> &'static str {
match self {
Season::Fall => "fall",
Season::Spring => "spring",
Season::Summer => "summer",
}
}
/// Parse a slug like "spring", "summer", "fall" into a Season
pub fn from_slug(s: &str) -> Option<Self> {
match s {
"fall" => Some(Season::Fall),
"spring" => Some(Season::Spring),
"summer" => Some(Season::Summer),
_ => None,
}
}
}
impl std::fmt::Display for Season {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Season::Fall => write!(f, "Fall"),
Season::Spring => write!(f, "Spring"),
Season::Summer => write!(f, "Summer"),
}
}
}
impl FromStr for Season {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let season = match s {
"10" => Season::Fall,
"20" => Season::Spring,
"30" => Season::Summer,
_ => return Err(anyhow::anyhow!("Invalid season: {s}")),
};
Ok(season)
}
}
impl FromStr for Term {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
if s.len() != 6 {
return Err(anyhow::anyhow!("Term string must be 6 characters"));
}
let year = s[0..4].parse::<u32>().context("Failed to parse year")?;
if !VALID_YEARS.contains(&year) {
return Err(anyhow::anyhow!("Year out of range"));
}
let season =
Season::from_str(&s[4..6]).map_err(|e| anyhow::anyhow!("Invalid season: {}", e))?;
Ok(Term { year, season })
}
}
#[cfg(test)]
mod tests {
use super::*;
// --- Season::from_str ---
#[test]
fn test_season_from_str_fall() {
assert_eq!(Season::from_str("10").unwrap(), Season::Fall);
}
#[test]
fn test_season_from_str_spring() {
assert_eq!(Season::from_str("20").unwrap(), Season::Spring);
}
#[test]
fn test_season_from_str_summer() {
assert_eq!(Season::from_str("30").unwrap(), Season::Summer);
}
#[test]
fn test_season_from_str_invalid() {
for input in ["00", "40", "1", ""] {
assert!(
Season::from_str(input).is_err(),
"expected Err for {input:?}"
);
}
}
// --- Season Display ---
#[test]
fn test_season_display() {
assert_eq!(Season::Fall.to_string(), "Fall");
assert_eq!(Season::Spring.to_string(), "Spring");
assert_eq!(Season::Summer.to_string(), "Summer");
}
#[test]
fn test_season_to_str_roundtrip() {
for season in [Season::Fall, Season::Spring, Season::Summer] {
assert_eq!(Season::from_str(season.to_str()).unwrap(), season);
}
}
// --- Term::from_str ---
#[test]
fn test_term_from_str_valid_fall() {
let term = Term::from_str("202510").unwrap();
assert_eq!(term.year, 2025);
assert_eq!(term.season, Season::Fall);
}
#[test]
fn test_term_from_str_valid_spring() {
let term = Term::from_str("202520").unwrap();
assert_eq!(term.year, 2025);
assert_eq!(term.season, Season::Spring);
}
#[test]
fn test_term_from_str_valid_summer() {
let term = Term::from_str("202530").unwrap();
assert_eq!(term.year, 2025);
assert_eq!(term.season, Season::Summer);
}
#[test]
fn test_term_from_str_too_short() {
assert!(Term::from_str("20251").is_err());
}
#[test]
fn test_term_from_str_too_long() {
assert!(Term::from_str("2025100").is_err());
}
#[test]
fn test_term_from_str_empty() {
assert!(Term::from_str("").is_err());
}
#[test]
fn test_term_from_str_invalid_year_chars() {
assert!(Term::from_str("abcd10").is_err());
}
#[test]
fn test_term_from_str_invalid_season() {
assert!(Term::from_str("202540").is_err());
}
#[test]
fn test_term_from_str_year_below_range() {
assert!(Term::from_str("200010").is_err());
}
#[test]
fn test_term_display_roundtrip() {
for code in ["202510", "202520", "202530"] {
let term = Term::from_str(code).unwrap();
assert_eq!(term.to_string(), code);
}
}
// --- Term::get_status_for_date ---
#[test]
fn test_status_mid_spring() {
let date = NaiveDate::from_ymd_opt(2025, 2, 15).unwrap();
let status = Term::get_status_for_date(date);
assert!(
matches!(status, TermPoint::InTerm { current } if current.season == Season::Spring)
);
}
#[test]
fn test_status_mid_summer() {
let date = NaiveDate::from_ymd_opt(2025, 7, 1).unwrap();
let status = Term::get_status_for_date(date);
assert!(
matches!(status, TermPoint::InTerm { current } if current.season == Season::Summer)
);
}
#[test]
fn test_status_mid_fall() {
let date = NaiveDate::from_ymd_opt(2025, 10, 15).unwrap();
let status = Term::get_status_for_date(date);
assert!(matches!(status, TermPoint::InTerm { current } if current.season == Season::Fall));
}
#[test]
fn test_status_between_fall_and_spring() {
let date = NaiveDate::from_ymd_opt(2025, 1, 1).unwrap();
let status = Term::get_status_for_date(date);
assert!(
matches!(status, TermPoint::BetweenTerms { next } if next.season == Season::Spring)
);
}
#[test]
fn test_status_between_spring_and_summer() {
let date = NaiveDate::from_ymd_opt(2025, 5, 15).unwrap();
let status = Term::get_status_for_date(date);
assert!(
matches!(status, TermPoint::BetweenTerms { next } if next.season == Season::Summer)
);
}
#[test]
fn test_status_between_summer_and_fall() {
let date = NaiveDate::from_ymd_opt(2025, 8, 16).unwrap();
let status = Term::get_status_for_date(date);
assert!(matches!(status, TermPoint::BetweenTerms { next } if next.season == Season::Fall));
}
#[test]
fn test_status_after_fall_end() {
let date = NaiveDate::from_ymd_opt(2025, 12, 15).unwrap();
let status = Term::get_status_for_date(date);
assert!(
matches!(status, TermPoint::BetweenTerms { next } if next.season == Season::Spring)
);
// Year should roll over: fall 2025 ends → next spring is 2026
let next_term = status.inner();
assert_eq!(next_term.year, 2026);
}
// --- TermPoint::inner ---
#[test]
fn test_term_point_inner() {
let in_term = TermPoint::InTerm {
current: Term {
year: 2025,
season: Season::Fall,
},
};
assert_eq!(
in_term.inner(),
&Term {
year: 2025,
season: Season::Fall
}
);
let between = TermPoint::BetweenTerms {
next: Term {
year: 2026,
season: Season::Spring,
},
};
assert_eq!(
between.inner(),
&Term {
year: 2026,
season: Season::Spring
}
);
}
// --- Season::slug / from_slug ---
#[test]
fn test_season_slug_roundtrip() {
for season in [Season::Fall, Season::Spring, Season::Summer] {
assert_eq!(Season::from_slug(season.slug()), Some(season));
}
}
#[test]
fn test_season_from_slug_invalid() {
assert_eq!(Season::from_slug("winter"), None);
assert_eq!(Season::from_slug(""), None);
assert_eq!(Season::from_slug("Spring"), None); // case-sensitive
}
// --- Term::slug / from_slug ---
#[test]
fn test_term_slug() {
let term = Term {
year: 2026,
season: Season::Spring,
};
assert_eq!(term.slug(), "spring-2026");
}
#[test]
fn test_term_from_slug_roundtrip() {
for code in ["202510", "202520", "202530"] {
let term = Term::from_str(code).unwrap();
let slug = term.slug();
let parsed = Term::from_slug(&slug).unwrap();
assert_eq!(parsed, term);
}
}
#[test]
fn test_term_from_slug_invalid() {
assert_eq!(Term::from_slug("winter-2026"), None);
assert_eq!(Term::from_slug("spring"), None);
assert_eq!(Term::from_slug(""), None);
}
// --- Term::description ---
#[test]
fn test_term_description() {
let term = Term {
year: 2026,
season: Season::Spring,
};
assert_eq!(term.description(), "Spring 2026");
}
// --- Term::resolve_to_code ---
#[test]
fn test_resolve_to_code_from_code() {
assert_eq!(Term::resolve_to_code("202620"), Some("202620".to_string()));
}
#[test]
fn test_resolve_to_code_from_slug() {
assert_eq!(
Term::resolve_to_code("spring-2026"),
Some("202620".to_string())
);
}
#[test]
fn test_resolve_to_code_invalid() {
assert_eq!(Term::resolve_to_code("garbage"), None);
}
}
+519
View File
@@ -0,0 +1,519 @@
//! Query builder for Banner API course searches.
use std::collections::HashMap;
use std::time::Duration;
/// Range of two integers
#[derive(Debug, Clone)]
pub struct Range {
pub low: i32,
pub high: i32,
}
/// Builder for constructing Banner API search queries.
#[derive(Debug, Clone, Default)]
#[allow(dead_code)]
pub struct SearchQuery {
subject: Option<String>,
title: Option<String>,
keywords: Option<Vec<String>>,
course_reference_number: Option<String>,
open_only: Option<bool>,
term_part: Option<Vec<String>>,
campus: Option<Vec<String>>,
instructional_method: Option<Vec<String>>,
attributes: Option<Vec<String>>,
instructor: Option<Vec<u64>>,
start_time: Option<Duration>,
end_time: Option<Duration>,
min_credits: Option<i32>,
max_credits: Option<i32>,
offset: i32,
max_results: i32,
course_number_range: Option<Range>,
}
#[allow(dead_code)]
impl SearchQuery {
/// Creates a new SearchQuery with default values
pub fn new() -> Self {
Self {
max_results: 8,
offset: 0,
..Default::default()
}
}
/// Sets the subject for the query
pub fn subject<S: Into<String>>(mut self, subject: S) -> Self {
self.subject = Some(subject.into());
self
}
/// Sets the title for the query
pub fn title<S: Into<String>>(mut self, title: S) -> Self {
self.title = Some(title.into());
self
}
/// Sets the course reference number (CRN) for the query
pub fn course_reference_number<S: Into<String>>(mut self, crn: S) -> Self {
self.course_reference_number = Some(crn.into());
self
}
/// Sets the keywords for the query
pub fn keywords(mut self, keywords: Vec<String>) -> Self {
self.keywords = Some(keywords);
self
}
/// Adds a keyword to the query
pub fn keyword<S: Into<String>>(mut self, keyword: S) -> Self {
match &mut self.keywords {
Some(keywords) => keywords.push(keyword.into()),
None => self.keywords = Some(vec![keyword.into()]),
}
self
}
/// Sets whether to search for open courses only
pub fn open_only(mut self, open_only: bool) -> Self {
self.open_only = Some(open_only);
self
}
/// Sets the term part for the query
pub fn term_part(mut self, term_part: Vec<String>) -> Self {
self.term_part = Some(term_part);
self
}
/// Sets the campuses for the query
pub fn campus(mut self, campus: Vec<String>) -> Self {
self.campus = Some(campus);
self
}
/// Sets the instructional methods for the query
pub fn instructional_method(mut self, instructional_method: Vec<String>) -> Self {
self.instructional_method = Some(instructional_method);
self
}
/// Sets the attributes for the query
pub fn attributes(mut self, attributes: Vec<String>) -> Self {
self.attributes = Some(attributes);
self
}
/// Sets the instructors for the query
pub fn instructor(mut self, instructor: Vec<u64>) -> Self {
self.instructor = Some(instructor);
self
}
/// Sets the start time for the query
pub fn start_time(mut self, start_time: Duration) -> Self {
self.start_time = Some(start_time);
self
}
/// Sets the end time for the query
pub fn end_time(mut self, end_time: Duration) -> Self {
self.end_time = Some(end_time);
self
}
/// Sets the credit range for the query
pub fn credits(mut self, low: i32, high: i32) -> Self {
self.min_credits = Some(low);
self.max_credits = Some(high);
self
}
/// Sets the minimum credits for the query
pub fn min_credits(mut self, value: i32) -> Self {
self.min_credits = Some(value);
self
}
/// Sets the maximum credits for the query
pub fn max_credits(mut self, value: i32) -> Self {
self.max_credits = Some(value);
self
}
/// Sets the course number range for the query
pub fn course_numbers(mut self, low: i32, high: i32) -> Self {
self.course_number_range = Some(Range { low, high });
self
}
/// Sets the offset for pagination
pub fn offset(mut self, offset: i32) -> Self {
self.offset = offset;
self
}
/// Sets the maximum number of results to return
/// Clamped to a maximum of 500 to prevent excessive API load
pub fn max_results(mut self, max_results: i32) -> Self {
self.max_results = max_results.clamp(1, 500);
self
}
/// Gets the subject field
pub fn get_subject(&self) -> Option<&String> {
self.subject.as_ref()
}
/// Gets the max_results field
pub fn get_max_results(&self) -> i32 {
self.max_results
}
/// Converts the query into URL parameters for the Banner API
pub fn to_params(&self) -> HashMap<String, String> {
let mut params = HashMap::new();
if let Some(ref subject) = self.subject {
params.insert("txt_subject".to_string(), subject.clone());
}
if let Some(ref title) = self.title {
params.insert("txt_courseTitle".to_string(), title.trim().to_string());
}
if let Some(ref crn) = self.course_reference_number {
params.insert("txt_courseReferenceNumber".to_string(), crn.clone());
}
if let Some(ref keywords) = self.keywords {
params.insert("txt_keywordlike".to_string(), keywords.join(" "));
}
if self.open_only == Some(true) {
params.insert("chk_open_only".to_string(), "true".to_string());
}
if let Some(ref term_part) = self.term_part {
params.insert("txt_partOfTerm".to_string(), term_part.join(","));
}
if let Some(ref campus) = self.campus {
params.insert("txt_campus".to_string(), campus.join(","));
}
if let Some(ref attributes) = self.attributes {
params.insert("txt_attribute".to_string(), attributes.join(","));
}
if let Some(ref instructor) = self.instructor {
let instructor_str = instructor
.iter()
.map(|i| i.to_string())
.collect::<Vec<_>>()
.join(",");
params.insert("txt_instructor".to_string(), instructor_str);
}
if let Some(start_time) = self.start_time {
let (hour, minute, meridiem) = format_time_parameter(start_time);
params.insert("select_start_hour".to_string(), hour);
params.insert("select_start_min".to_string(), minute);
params.insert("select_start_ampm".to_string(), meridiem);
}
if let Some(end_time) = self.end_time {
let (hour, minute, meridiem) = format_time_parameter(end_time);
params.insert("select_end_hour".to_string(), hour);
params.insert("select_end_min".to_string(), minute);
params.insert("select_end_ampm".to_string(), meridiem);
}
if let Some(min_credits) = self.min_credits {
params.insert("txt_credithourlow".to_string(), min_credits.to_string());
}
if let Some(max_credits) = self.max_credits {
params.insert("txt_credithourhigh".to_string(), max_credits.to_string());
}
if let Some(ref range) = self.course_number_range {
params.insert("txt_course_number_range".to_string(), range.low.to_string());
params.insert(
"txt_course_number_range_to".to_string(),
range.high.to_string(),
);
}
params.insert("pageOffset".to_string(), self.offset.to_string());
params.insert("pageMaxSize".to_string(), self.max_results.to_string());
params
}
}
/// Formats a Duration into hour, minute, and meridiem strings for Banner API.
///
/// Uses 12-hour format: midnight = 12:00 AM, noon = 12:00 PM.
fn format_time_parameter(duration: Duration) -> (String, String, String) {
let total_minutes = duration.as_secs() / 60;
let hours = total_minutes / 60;
let minutes = total_minutes % 60;
let meridiem = if hours >= 12 { "PM" } else { "AM" };
let hour_12 = match hours % 12 {
0 => 12,
h => h,
};
(
hour_12.to_string(),
minutes.to_string(),
meridiem.to_string(),
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new_defaults() {
let q = SearchQuery::new();
assert_eq!(q.get_max_results(), 8);
assert!(q.get_subject().is_none());
let params = q.to_params();
assert_eq!(params.get("pageMaxSize").unwrap(), "8");
assert_eq!(params.get("pageOffset").unwrap(), "0");
assert_eq!(params.len(), 2);
}
#[test]
fn test_subject_param() {
let params = SearchQuery::new().subject("CS").to_params();
assert_eq!(params.get("txt_subject").unwrap(), "CS");
}
#[test]
fn test_title_trims_whitespace() {
let params = SearchQuery::new().title(" Intro to CS ").to_params();
assert_eq!(params.get("txt_courseTitle").unwrap(), "Intro to CS");
}
#[test]
fn test_crn_param() {
let params = SearchQuery::new()
.course_reference_number("12345")
.to_params();
assert_eq!(params.get("txt_courseReferenceNumber").unwrap(), "12345");
}
#[test]
fn test_keywords_joined_with_spaces() {
let params = SearchQuery::new()
.keyword("data")
.keyword("science")
.to_params();
assert_eq!(params.get("txt_keywordlike").unwrap(), "data science");
}
#[test]
fn test_keywords_vec() {
let params = SearchQuery::new()
.keywords(vec!["machine".into(), "learning".into()])
.to_params();
assert_eq!(params.get("txt_keywordlike").unwrap(), "machine learning");
}
#[test]
fn test_open_only() {
let params = SearchQuery::new().open_only(true).to_params();
assert_eq!(params.get("chk_open_only").unwrap(), "true");
// open_only(false) should NOT set the param
let params2 = SearchQuery::new().open_only(false).to_params();
assert!(params2.get("chk_open_only").is_none());
}
#[test]
fn test_credits_range() {
let params = SearchQuery::new().credits(3, 6).to_params();
assert_eq!(params.get("txt_credithourlow").unwrap(), "3");
assert_eq!(params.get("txt_credithourhigh").unwrap(), "6");
}
#[test]
fn test_course_number_range() {
let params = SearchQuery::new().course_numbers(3000, 3999).to_params();
assert_eq!(params.get("txt_course_number_range").unwrap(), "3000");
assert_eq!(params.get("txt_course_number_range_to").unwrap(), "3999");
}
#[test]
fn test_pagination() {
let params = SearchQuery::new().offset(20).max_results(10).to_params();
assert_eq!(params.get("pageOffset").unwrap(), "20");
assert_eq!(params.get("pageMaxSize").unwrap(), "10");
}
#[test]
fn test_format_time_9am() {
let (h, m, mer) = format_time_parameter(Duration::from_secs(9 * 3600));
assert_eq!(h, "9");
assert_eq!(m, "0");
assert_eq!(mer, "AM");
}
#[test]
fn test_format_time_noon() {
let (h, m, mer) = format_time_parameter(Duration::from_secs(12 * 3600));
assert_eq!(h, "12");
assert_eq!(m, "0");
assert_eq!(mer, "PM");
}
#[test]
fn test_format_time_1pm() {
let (h, m, mer) = format_time_parameter(Duration::from_secs(13 * 3600));
assert_eq!(h, "1");
assert_eq!(m, "0");
assert_eq!(mer, "PM");
}
#[test]
fn test_format_time_930am() {
let (h, m, mer) = format_time_parameter(Duration::from_secs(9 * 3600 + 30 * 60));
assert_eq!(h, "9");
assert_eq!(m, "30");
assert_eq!(mer, "AM");
}
#[test]
fn test_format_time_midnight() {
let (h, m, mer) = format_time_parameter(Duration::from_secs(0));
assert_eq!(h, "12");
assert_eq!(m, "0");
assert_eq!(mer, "AM");
}
#[test]
fn test_time_params_in_query() {
let params = SearchQuery::new()
.start_time(Duration::from_secs(9 * 3600))
.end_time(Duration::from_secs(17 * 3600))
.to_params();
assert_eq!(params.get("select_start_hour").unwrap(), "9");
assert_eq!(params.get("select_start_ampm").unwrap(), "AM");
assert_eq!(params.get("select_end_hour").unwrap(), "5");
assert_eq!(params.get("select_end_ampm").unwrap(), "PM");
}
#[test]
fn test_multi_value_params() {
let params = SearchQuery::new()
.campus(vec!["MAIN".into(), "DT".into()])
.attributes(vec!["HONORS".into()])
.instructor(vec![1001, 1002])
.to_params();
assert_eq!(params.get("txt_campus").unwrap(), "MAIN,DT");
assert_eq!(params.get("txt_attribute").unwrap(), "HONORS");
assert_eq!(params.get("txt_instructor").unwrap(), "1001,1002");
}
#[test]
fn test_display_minimal() {
let display = SearchQuery::new().to_string();
assert_eq!(display, "offset=0, maxResults=8");
}
#[test]
fn test_display_with_fields() {
let display = SearchQuery::new()
.subject("CS")
.open_only(true)
.max_results(10)
.to_string();
assert!(display.contains("subject=CS"));
assert!(display.contains("openOnly=true"));
assert!(display.contains("maxResults=10"));
}
#[test]
fn test_full_query_param_count() {
let params = SearchQuery::new()
.subject("CS")
.title("Intro")
.course_reference_number("12345")
.keyword("programming")
.open_only(true)
.credits(3, 4)
.course_numbers(1000, 1999)
.offset(0)
.max_results(25)
.to_params();
// subject, title, crn, keyword, open_only, min_credits, max_credits,
// course_number_range, course_number_range_to, pageOffset, pageMaxSize = 11
assert_eq!(params.len(), 11);
}
}
impl std::fmt::Display for SearchQuery {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut parts = Vec::new();
if let Some(ref subject) = self.subject {
parts.push(format!("subject={subject}"));
}
if let Some(ref title) = self.title {
parts.push(format!("title={}", title.trim()));
}
if let Some(ref keywords) = self.keywords {
parts.push(format!("keywords={}", keywords.join(" ")));
}
if self.open_only == Some(true) {
parts.push("openOnly=true".to_string());
}
if let Some(ref term_part) = self.term_part {
parts.push(format!("termPart={}", term_part.join(",")));
}
if let Some(ref campus) = self.campus {
parts.push(format!("campus={}", campus.join(",")));
}
if let Some(ref attributes) = self.attributes {
parts.push(format!("attributes={}", attributes.join(",")));
}
if let Some(ref instructor) = self.instructor {
let instructor_str = instructor
.iter()
.map(|i| i.to_string())
.collect::<Vec<_>>()
.join(",");
parts.push(format!("instructor={instructor_str}"));
}
if let Some(start_time) = self.start_time {
let (hour, minute, meridiem) = format_time_parameter(start_time);
parts.push(format!("startTime={hour}:{minute}:{meridiem}"));
}
if let Some(end_time) = self.end_time {
let (hour, minute, meridiem) = format_time_parameter(end_time);
parts.push(format!("endTime={hour}:{minute}:{meridiem}"));
}
if let Some(min_credits) = self.min_credits {
parts.push(format!("minCredits={min_credits}"));
}
if let Some(max_credits) = self.max_credits {
parts.push(format!("maxCredits={max_credits}"));
}
if let Some(ref range) = self.course_number_range {
parts.push(format!("courseNumberRange={}-{}", range.low, range.high));
}
parts.push(format!("offset={}", self.offset));
parts.push(format!("maxResults={}", self.max_results));
write!(f, "{}", parts.join(", "))
}
}
+84
View File
@@ -0,0 +1,84 @@
//! HTTP middleware that enforces rate limiting for Banner API requests.
use crate::banner::rate_limiter::{RequestType, SharedRateLimiter};
use http::Extensions;
use reqwest::{Request, Response};
use reqwest_middleware::{Middleware, Next};
use tracing::debug;
use url::Url;
/// Middleware that enforces rate limiting based on request URL patterns
pub struct RateLimitMiddleware {
rate_limiter: SharedRateLimiter,
}
impl RateLimitMiddleware {
/// Creates a new rate limiting middleware
pub fn new(rate_limiter: SharedRateLimiter) -> Self {
Self { rate_limiter }
}
/// Returns a human-readable description of the rate limit for a request type
fn get_rate_limit_description(request_type: RequestType) -> &'static str {
match request_type {
RequestType::Session => "6 rpm (~10s interval)",
RequestType::Search => "30 rpm (~2s interval)",
RequestType::Metadata => "20 rpm (~3s interval)",
RequestType::Reset => "10 rpm (~6s interval)",
}
}
/// Determines the request type based on the URL path
fn get_request_type(url: &Url) -> RequestType {
let path = url.path();
if path.contains("/registration")
|| path.contains("/selfServiceMenu")
|| path.contains("/term/termSelection")
{
RequestType::Session
} else if path.contains("/searchResults") || path.contains("/classSearch") {
RequestType::Search
} else if path.contains("/getTerms")
|| path.contains("/getSubjects")
|| path.contains("/getCampuses")
{
RequestType::Metadata
} else if path.contains("/resetDataForm") {
RequestType::Reset
} else {
// Default to search for unknown endpoints
RequestType::Search
}
}
}
#[async_trait::async_trait]
impl Middleware for RateLimitMiddleware {
async fn handle(
&self,
req: Request,
extensions: &mut Extensions,
next: Next<'_>,
) -> std::result::Result<Response, reqwest_middleware::Error> {
let request_type = Self::get_request_type(req.url());
let start = std::time::Instant::now();
self.rate_limiter.wait_for_permission(request_type).await;
let wait_duration = start.elapsed();
// Only log if rate limiting caused significant delay (>= 500ms)
if wait_duration.as_millis() >= 500 {
let limit_desc = Self::get_rate_limit_description(request_type);
debug!(
request_type = ?request_type,
wait_ms = wait_duration.as_millis(),
rate_limit = limit_desc,
"Rate limit caused delay"
);
}
// Make the actual request
next.run(req, extensions).await
}
}
+210
View File
@@ -0,0 +1,210 @@
//! Rate limiting for Banner API requests to prevent overwhelming the server.
use crate::config::RateLimitingConfig;
use governor::{
Quota, RateLimiter,
clock::DefaultClock,
state::{InMemoryState, NotKeyed},
};
use std::num::NonZeroU32;
use std::sync::Arc;
use std::time::Duration;
/// Different types of Banner API requests with different rate limits
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum RequestType {
/// Session creation and management (very conservative)
Session,
/// Course search requests (moderate)
Search,
/// Term and metadata requests (moderate)
Metadata,
/// Data form resets (low priority)
Reset,
}
/// A rate limiter that manages different request types with different limits
pub struct BannerRateLimiter {
session_limiter: RateLimiter<NotKeyed, InMemoryState, DefaultClock>,
search_limiter: RateLimiter<NotKeyed, InMemoryState, DefaultClock>,
metadata_limiter: RateLimiter<NotKeyed, InMemoryState, DefaultClock>,
reset_limiter: RateLimiter<NotKeyed, InMemoryState, DefaultClock>,
}
impl BannerRateLimiter {
/// Creates a new rate limiter with the given configuration
pub fn new(config: RateLimitingConfig) -> Self {
let session_quota = Quota::with_period(Duration::from_secs(60) / config.session_rpm)
.unwrap()
.allow_burst(NonZeroU32::new(config.burst_allowance).unwrap());
let search_quota = Quota::with_period(Duration::from_secs(60) / config.search_rpm)
.unwrap()
.allow_burst(NonZeroU32::new(config.burst_allowance).unwrap());
let metadata_quota = Quota::with_period(Duration::from_secs(60) / config.metadata_rpm)
.unwrap()
.allow_burst(NonZeroU32::new(config.burst_allowance).unwrap());
let reset_quota = Quota::with_period(Duration::from_secs(60) / config.reset_rpm)
.unwrap()
.allow_burst(NonZeroU32::new(config.burst_allowance).unwrap());
Self {
session_limiter: RateLimiter::direct(session_quota),
search_limiter: RateLimiter::direct(search_quota),
metadata_limiter: RateLimiter::direct(metadata_quota),
reset_limiter: RateLimiter::direct(reset_quota),
}
}
/// Waits for permission to make a request of the given type
pub async fn wait_for_permission(&self, request_type: RequestType) {
let limiter = match request_type {
RequestType::Session => &self.session_limiter,
RequestType::Search => &self.search_limiter,
RequestType::Metadata => &self.metadata_limiter,
RequestType::Reset => &self.reset_limiter,
};
// Wait until we can make the request (logging handled by middleware)
limiter.until_ready().await;
}
}
impl Default for BannerRateLimiter {
fn default() -> Self {
Self::new(RateLimitingConfig::default())
}
}
/// A shared rate limiter instance
pub type SharedRateLimiter = Arc<BannerRateLimiter>;
/// Creates a new shared rate limiter with custom configuration
pub fn create_shared_rate_limiter(config: Option<RateLimitingConfig>) -> SharedRateLimiter {
Arc::new(BannerRateLimiter::new(config.unwrap_or_default()))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new_with_default_config() {
let _limiter = BannerRateLimiter::new(RateLimitingConfig::default());
}
#[test]
fn test_new_with_custom_config() {
let config = RateLimitingConfig {
session_rpm: 10,
search_rpm: 30,
metadata_rpm: 20,
reset_rpm: 15,
burst_allowance: 5,
};
let _limiter = BannerRateLimiter::new(config);
}
#[test]
fn test_new_with_minimum_valid_values() {
let config = RateLimitingConfig {
session_rpm: 1,
search_rpm: 1,
metadata_rpm: 1,
reset_rpm: 1,
burst_allowance: 1,
};
let _limiter = BannerRateLimiter::new(config);
}
#[test]
fn test_new_with_high_rpm_values() {
let config = RateLimitingConfig {
session_rpm: 10000,
search_rpm: 10000,
metadata_rpm: 10000,
reset_rpm: 10000,
burst_allowance: 1,
};
let _limiter = BannerRateLimiter::new(config);
}
#[test]
fn test_default_impl() {
let _limiter = BannerRateLimiter::default();
}
#[test]
#[should_panic]
fn test_new_panics_on_zero_session_rpm() {
let config = RateLimitingConfig {
session_rpm: 0,
..RateLimitingConfig::default()
};
let _limiter = BannerRateLimiter::new(config);
}
#[test]
#[should_panic]
fn test_new_panics_on_zero_search_rpm() {
let config = RateLimitingConfig {
search_rpm: 0,
..RateLimitingConfig::default()
};
let _limiter = BannerRateLimiter::new(config);
}
#[test]
#[should_panic]
fn test_new_panics_on_zero_metadata_rpm() {
let config = RateLimitingConfig {
metadata_rpm: 0,
..RateLimitingConfig::default()
};
let _limiter = BannerRateLimiter::new(config);
}
#[test]
#[should_panic]
fn test_new_panics_on_zero_reset_rpm() {
let config = RateLimitingConfig {
reset_rpm: 0,
..RateLimitingConfig::default()
};
let _limiter = BannerRateLimiter::new(config);
}
#[test]
#[should_panic]
fn test_new_panics_on_zero_burst_allowance() {
let config = RateLimitingConfig {
burst_allowance: 0,
..RateLimitingConfig::default()
};
let _limiter = BannerRateLimiter::new(config);
}
#[tokio::test]
async fn test_wait_for_permission_completes() {
let limiter = BannerRateLimiter::default();
let timeout_duration = std::time::Duration::from_secs(1);
for request_type in [
RequestType::Session,
RequestType::Search,
RequestType::Metadata,
RequestType::Reset,
] {
let result =
tokio::time::timeout(timeout_duration, limiter.wait_for_permission(request_type))
.await;
assert!(
result.is_ok(),
"wait_for_permission timed out for {:?}",
request_type
);
}
}
}
+623
View File
@@ -0,0 +1,623 @@
//! Session management for Banner API.
use crate::banner::BannerTerm;
use crate::banner::models::Term;
use anyhow::{Context, Result};
use cookie::Cookie;
use dashmap::DashMap;
use governor::state::InMemoryState;
use governor::{Quota, RateLimiter};
use rand::distr::{Alphanumeric, SampleString};
use reqwest_middleware::ClientWithMiddleware;
use std::collections::{HashMap, VecDeque};
use std::mem::ManuallyDrop;
use std::ops::{Deref, DerefMut};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, LazyLock};
use std::time::{Duration, Instant};
use tokio::sync::{Mutex, Notify};
use tracing::{debug, info, trace};
use url::Url;
const SESSION_EXPIRY: Duration = Duration::from_secs(25 * 60); // 25 minutes
// A global rate limiter to ensure we only try to create one new session every 10 seconds,
// preventing us from overwhelming the server with session creation requests.
static SESSION_CREATION_RATE_LIMITER: LazyLock<
RateLimiter<governor::state::direct::NotKeyed, InMemoryState, governor::clock::DefaultClock>,
> = LazyLock::new(|| RateLimiter::direct(Quota::with_period(Duration::from_secs(10)).unwrap()));
/// Represents an active anonymous session within the Banner API.
/// Identified by multiple persistent cookies, as well as a client-generated "unique session ID".
#[derive(Debug, Clone)]
pub struct BannerSession {
// Randomly generated
pub unique_session_id: String,
// Timestamp of creation
created_at: Instant,
// Timestamp of last activity
last_activity: Option<Instant>,
// Cookie values from initial registration page
jsessionid: String,
ssb_cookie: String,
}
/// Generates a new session ID mimicking Banner's format
fn generate_session_id() -> String {
let random_part = Alphanumeric.sample_string(&mut rand::rng(), 5);
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_millis();
format!("{}{}", random_part, timestamp)
}
/// Generates a timestamp-based nonce
pub fn nonce() -> String {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_millis()
.to_string()
}
impl BannerSession {
/// Creates a new session
pub fn new(unique_session_id: &str, jsessionid: &str, ssb_cookie: &str) -> Self {
let now = Instant::now();
Self {
created_at: now,
last_activity: None,
unique_session_id: unique_session_id.to_string(),
jsessionid: jsessionid.to_string(),
ssb_cookie: ssb_cookie.to_string(),
}
}
/// Returns the unique session ID
pub fn id(&self) -> &str {
&self.unique_session_id
}
/// Updates the last activity timestamp
pub fn touch(&mut self) {
self.last_activity = Some(Instant::now());
}
/// Returns true if the session is expired
pub fn is_expired(&self) -> bool {
self.last_activity.unwrap_or(self.created_at).elapsed() > SESSION_EXPIRY
}
/// Returns a string used to for the "Cookie" header
pub fn cookie(&self) -> String {
format!(
"JSESSIONID={}; SSB_COOKIE={}",
self.jsessionid, self.ssb_cookie
)
}
pub fn been_used(&self) -> bool {
self.last_activity.is_some()
}
#[cfg(test)]
pub(crate) fn new_with_created_at(
unique_session_id: &str,
jsessionid: &str,
ssb_cookie: &str,
created_at: Instant,
) -> Self {
Self {
unique_session_id: unique_session_id.to_string(),
created_at,
last_activity: None,
jsessionid: jsessionid.to_string(),
ssb_cookie: ssb_cookie.to_string(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::time::Duration;
/// Verifies that cancelling `acquire()` mid-session-creation resets `is_creating`,
/// allowing subsequent callers to proceed rather than deadlocking.
#[tokio::test]
async fn test_acquire_not_deadlocked_after_cancellation() {
use tokio::sync::mpsc;
let (tx, mut rx) = mpsc::channel::<()>(10);
// Local server: /registration signals arrival via `tx`, then hangs forever.
let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
let addr = listener.local_addr().unwrap();
let app = axum::Router::new().route(
"/StudentRegistrationSsb/registration",
axum::routing::get(move || {
let tx = tx.clone();
async move {
let _ = tx.send(()).await;
std::future::pending::<&str>().await
}
}),
);
tokio::spawn(async move {
axum::serve(listener, app).await.unwrap();
});
let base_url = format!("http://{}/StudentRegistrationSsb", addr);
let client = reqwest_middleware::ClientBuilder::new(
reqwest::Client::builder()
.timeout(Duration::from_secs(300))
.build()
.unwrap(),
)
.build();
let pool = SessionPool::new(client, base_url);
let term: Term = "202620".parse().unwrap();
// First acquire: cancel once the request reaches the server.
tokio::select! {
_ = pool.acquire(term) => panic!("server hangs — acquire should never complete"),
_ = rx.recv() => {} // Request arrived; dropping the future simulates timeout cancellation.
}
// Second acquire: verify it reaches the server (i.e., is_creating was reset).
// The global rate limiter has a 10s period, so allow 15s for the second attempt.
tokio::select! {
_ = pool.acquire(term) => {}
result = tokio::time::timeout(Duration::from_secs(15), rx.recv()) => {
assert!(
result.is_ok(),
"acquire() deadlocked — is_creating was not reset after cancellation"
);
}
}
}
#[test]
fn test_new_session_creates_session() {
let session = BannerSession::new("sess-1", "JSID123", "SSB456");
assert_eq!(session.id(), "sess-1");
}
#[test]
fn test_fresh_session_not_expired() {
let session = BannerSession::new("sess-1", "JSID123", "SSB456");
assert!(!session.is_expired());
}
#[test]
fn test_fresh_session_not_been_used() {
let session = BannerSession::new("sess-1", "JSID123", "SSB456");
assert!(!session.been_used());
}
#[test]
fn test_touch_marks_used() {
let mut session = BannerSession::new("sess-1", "JSID123", "SSB456");
session.touch();
assert!(session.been_used());
}
#[test]
fn test_touched_session_not_expired() {
let mut session = BannerSession::new("sess-1", "JSID123", "SSB456");
session.touch();
assert!(!session.is_expired());
}
#[test]
fn test_cookie_format() {
let session = BannerSession::new("sess-1", "JSID123", "SSB456");
assert_eq!(session.cookie(), "JSESSIONID=JSID123; SSB_COOKIE=SSB456");
}
#[test]
fn test_id_returns_unique_session_id() {
let session = BannerSession::new("my-unique-id", "JSID123", "SSB456");
assert_eq!(session.id(), "my-unique-id");
}
#[test]
fn test_expired_session() {
let session = BannerSession::new_with_created_at(
"sess-old",
"JSID123",
"SSB456",
Instant::now() - Duration::from_secs(26 * 60),
);
assert!(session.is_expired());
}
#[test]
fn test_not_quite_expired_session() {
let session = BannerSession::new_with_created_at(
"sess-recent",
"JSID123",
"SSB456",
Instant::now() - Duration::from_secs(24 * 60),
);
assert!(!session.is_expired());
}
#[test]
fn test_session_at_expiry_boundary() {
let session = BannerSession::new_with_created_at(
"sess-boundary",
"JSID123",
"SSB456",
Instant::now() - Duration::from_secs(25 * 60 + 1),
);
assert!(session.is_expired());
}
}
/// A smart pointer that returns a `BannerSession` to the pool when dropped.
pub struct PooledSession {
session: ManuallyDrop<BannerSession>,
pool: Arc<TermPool>,
}
impl Deref for PooledSession {
type Target = BannerSession;
fn deref(&self) -> &Self::Target {
&self.session
}
}
impl DerefMut for PooledSession {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.session
}
}
impl Drop for PooledSession {
fn drop(&mut self) {
// SAFETY: `drop` is called exactly once by Rust's drop semantics,
// so `ManuallyDrop::take` is guaranteed to see a valid value.
let session = unsafe { ManuallyDrop::take(&mut self.session) };
let pool = self.pool.clone();
tokio::spawn(async move {
pool.release(session).await;
});
}
}
pub struct TermPool {
sessions: Mutex<VecDeque<BannerSession>>,
notifier: Notify,
is_creating: AtomicBool,
}
/// RAII guard ensuring `is_creating` is reset on drop for cancellation safety.
/// Without this, a cancelled `acquire()` future would leave the flag set permanently,
/// deadlocking all subsequent callers.
struct CreatingGuard(Arc<TermPool>);
impl Drop for CreatingGuard {
fn drop(&mut self) {
self.0.is_creating.store(false, Ordering::Release);
self.0.notifier.notify_waiters();
}
}
impl TermPool {
fn new() -> Self {
Self {
sessions: Mutex::new(VecDeque::new()),
notifier: Notify::new(),
is_creating: AtomicBool::new(false),
}
}
async fn release(&self, session: BannerSession) {
let id = session.unique_session_id.clone();
if session.is_expired() {
debug!(id = id, "Session expired, dropping");
// Wake up a waiter, as it might need to create a new session
// if this was the last one.
self.notifier.notify_one();
return;
}
let mut queue = self.sessions.lock().await;
queue.push_back(session);
drop(queue); // Release lock before notifying
self.notifier.notify_one();
}
}
pub struct SessionPool {
sessions: DashMap<Term, Arc<TermPool>>,
http: ClientWithMiddleware,
base_url: String,
}
impl SessionPool {
pub fn new(http: ClientWithMiddleware, base_url: String) -> Self {
Self {
sessions: DashMap::new(),
http,
base_url,
}
}
/// Acquires a session from the pool.
/// If no sessions are available, a new one is created on demand,
/// respecting the global rate limit.
pub async fn acquire(&self, term: Term) -> Result<PooledSession> {
let term_pool = self
.sessions
.entry(term)
.or_insert_with(|| Arc::new(TermPool::new()))
.clone();
let start = Instant::now();
let mut waited_for_creation = false;
loop {
// Fast path: Try to get an existing, non-expired session.
{
let mut queue = term_pool.sessions.lock().await;
if let Some(session) = queue.pop_front() {
if !session.is_expired() {
return Ok(PooledSession {
session: ManuallyDrop::new(session),
pool: Arc::clone(&term_pool),
});
} else {
debug!(id = session.unique_session_id, "Discarded expired session");
}
}
} // MutexGuard is dropped, lock is released.
// Slow path: wait for an in-progress creation, or become the creator.
if term_pool.is_creating.load(Ordering::Acquire) {
if !waited_for_creation {
trace!("Waiting for another task to create session");
waited_for_creation = true;
}
term_pool.notifier.notified().await;
continue;
}
// CAS to become the designated creator.
if term_pool
.is_creating
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire)
.is_err()
{
continue; // Lost the race — loop back and wait.
}
// Guard resets is_creating on drop (including cancellation).
let creating_guard = CreatingGuard(Arc::clone(&term_pool));
trace!("Pool empty, creating new session");
tokio::select! {
_ = term_pool.notifier.notified() => {
// A session was returned — release creator role and race for it.
drop(creating_guard);
continue;
}
_ = SESSION_CREATION_RATE_LIMITER.until_ready() => {
let new_session_result = self.create_session(&term).await;
drop(creating_guard);
match new_session_result {
Ok(new_session) => {
let elapsed = start.elapsed();
debug!(
id = new_session.unique_session_id,
elapsed_ms = elapsed.as_millis(),
"Created new session"
);
return Ok(PooledSession {
session: ManuallyDrop::new(new_session),
pool: term_pool,
});
}
Err(e) => {
return Err(e.context("Failed to create new session in pool"));
}
}
}
}
}
}
/// Sets up initial session cookies by making required Banner API requests.
async fn create_session(&self, term: &Term) -> Result<BannerSession> {
info!(term = %term, "setting up banner session");
// The 'register' or 'search' registration page
let initial_registration = self
.http
.get(format!("{}/registration", self.base_url))
.send()
.await?;
// TODO: Validate success
let cookies: HashMap<String, String> = initial_registration
.headers()
.get_all("Set-Cookie")
.iter()
.filter_map(|v| {
let c = Cookie::parse(v.to_str().ok()?).ok()?;
Some((c.name().to_string(), c.value().to_string()))
})
.collect();
let jsessionid = cookies
.get("JSESSIONID")
.ok_or_else(|| anyhow::anyhow!("JSESSIONID cookie missing"))?;
let ssb_cookie = cookies
.get("SSB_COOKIE")
.ok_or_else(|| anyhow::anyhow!("SSB_COOKIE cookie missing"))?;
let cookie_header = format!("JSESSIONID={}; SSB_COOKIE={}", jsessionid, ssb_cookie);
self.http
.get(format!("{}/selfServiceMenu/data", self.base_url))
.header("Cookie", &cookie_header)
.send()
.await?
.error_for_status()
.context("Failed to get data page")?;
self.http
.get(format!("{}/term/termSelection", self.base_url))
.header("Cookie", &cookie_header)
.query(&[("mode", "search")])
.send()
.await?
.error_for_status()
.context("Failed to get term selection page")?;
// TODO: Validate success
let terms = self.get_terms("", 1, 10).await?;
if !terms.iter().any(|t| t.code == term.to_string()) {
return Err(anyhow::anyhow!("Failed to get term search response"));
}
let specific_term_search_response = self.get_terms(&term.to_string(), 1, 10).await?;
if !specific_term_search_response
.iter()
.any(|t| t.code == term.to_string())
{
return Err(anyhow::anyhow!("Failed to get term search response"));
}
let unique_session_id = generate_session_id();
self.select_term(&term.to_string(), &unique_session_id, &cookie_header)
.await?;
Ok(BannerSession::new(
&unique_session_id,
jsessionid,
ssb_cookie,
))
}
/// Retrieves a list of terms from the Banner API.
pub async fn get_terms(
&self,
search: &str,
page: i32,
max_results: i32,
) -> Result<Vec<BannerTerm>> {
if page <= 0 {
return Err(anyhow::anyhow!("Page must be greater than 0"));
}
let url = format!("{}/classSearch/getTerms", self.base_url);
let params = [
("searchTerm", search),
("offset", &page.to_string()),
("max", &max_results.to_string()),
("_", &nonce()),
];
let response = self
.http
.get(&url)
.query(&params)
.send()
.await
.with_context(|| "Failed to get terms".to_string())?;
let terms: Vec<BannerTerm> = response
.json()
.await
.context("Failed to parse terms response")?;
Ok(terms)
}
/// Selects a term for the current session.
async fn select_term(
&self,
term: &str,
unique_session_id: &str,
cookie_header: &str,
) -> Result<()> {
let form_data = [
("term", term),
("studyPath", ""),
("studyPathText", ""),
("startDatepicker", ""),
("endDatepicker", ""),
("uniqueSessionId", unique_session_id),
];
let url = format!("{}/term/search", self.base_url);
let response = self
.http
.post(&url)
.header("Cookie", cookie_header)
.query(&[("mode", "search")])
.form(&form_data)
.send()
.await?;
if !response.status().is_success() {
return Err(anyhow::anyhow!(
"Failed to select term {}: {}",
term,
response.status()
));
}
#[derive(serde::Deserialize)]
struct RedirectResponse {
#[serde(rename = "fwdURL")]
fwd_url: String,
}
let redirect: RedirectResponse = response.json().await?;
let base_url_path = self
.base_url
.parse::<Url>()
.context("Failed to parse base URL")?
.path()
.to_string();
let non_overlap_redirect =
redirect
.fwd_url
.strip_prefix(&base_url_path)
.ok_or_else(|| {
anyhow::anyhow!(
"Redirect URL '{}' does not start with expected prefix '{}'",
redirect.fwd_url,
base_url_path
)
})?;
// Follow the redirect
let redirect_url = format!("{}{}", self.base_url, non_overlap_redirect);
let redirect_response = self
.http
.get(&redirect_url)
.header("Cookie", cookie_header)
.send()
.await?;
if !redirect_response.status().is_success() {
return Err(anyhow::anyhow!(
"Failed to follow redirect: {}",
redirect_response.status()
));
}
Ok(())
}
}
+6
View File
@@ -0,0 +1,6 @@
//! Utility functions for the Banner module.
/// Returns a browser-like user agent string.
pub fn user_agent() -> &'static str {
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
}
+131
View File
@@ -0,0 +1,131 @@
use banner::banner::{BannerApi, SearchQuery, Term};
use banner::config::Config;
use banner::error::Result;
use figment::{Figment, providers::Env};
use futures::future;
use tracing::{error, info};
use tracing_subscriber::{EnvFilter, FmtSubscriber};
#[tokio::main]
async fn main() -> Result<()> {
// Configure logging
let filter = EnvFilter::try_from_default_env()
.unwrap_or_else(|_| EnvFilter::new("info,banner=trace,reqwest=debug,hyper=info"));
let subscriber = FmtSubscriber::builder()
.with_env_filter(filter)
.with_target(true)
.finish();
tracing::subscriber::set_global_default(subscriber).expect("setting default subscriber failed");
info!("Starting Banner search test");
dotenvy::dotenv().ok();
// Load configuration
let config: Config = Figment::new()
.merge(Env::raw())
.extract()
.expect("Failed to load config");
info!(
banner_base_url = config.banner_base_url,
"Configuration loaded"
);
// Create Banner API client
let banner_api = BannerApi::new_with_config(config.banner_base_url, config.rate_limiting)
.expect("Failed to create BannerApi");
// Get current term
let term = Term::get_current().inner().to_string();
info!(term = term, "Using current term");
// Define multiple search queries
let queries = vec![
(
"CS Courses",
SearchQuery::new().subject("CS").max_results(10),
),
(
"Math Courses",
SearchQuery::new().subject("MAT").max_results(10),
),
(
"3000-level CS",
SearchQuery::new()
.subject("CS")
.course_numbers(3000, 3999)
.max_results(8),
),
(
"High Credit Courses",
SearchQuery::new().credits(4, 6).max_results(8),
),
(
"Programming Courses",
SearchQuery::new().keyword("programming").max_results(6),
),
];
info!(query_count = queries.len(), "Executing concurrent searches");
// Execute all searches concurrently
let search_futures = queries.into_iter().map(|(label, query)| {
info!(label = %label, "Starting search");
let banner_api = &banner_api;
let term = &term;
async move {
let result = banner_api
.search(term, &query, "subjectDescription", false)
.await;
(label, result)
}
});
// Wait for all searches to complete
let search_results = future::join_all(search_futures)
.await
.into_iter()
.filter_map(|(label, result)| match result {
Ok(search_result) => {
info!(
label = label,
success = search_result.success,
total_count = search_result.total_count,
"Search completed successfully"
);
Some((label, search_result))
}
Err(e) => {
error!(label = label, error = ?e, "Search failed");
None
}
})
.collect::<Vec<_>>();
// Process and display results
for (label, search_result) in search_results {
println!("\n=== {} ===", label);
if let Some(courses) = &search_result.data {
if courses.is_empty() {
println!(" No courses found");
} else {
println!(" Found {} courses:", courses.len());
for course in courses {
println!(
" {} {} - {} (CRN: {})",
course.subject,
course.course_number,
course.course_title,
course.course_reference_number
);
}
}
} else {
println!(" No courses found");
}
}
info!("Search test completed");
Ok(())
}
+146
View File
@@ -0,0 +1,146 @@
//! Google Calendar command implementation.
use crate::banner::{Course, MeetingScheduleInfo};
use crate::bot::{Context, Error, utils};
use chrono::{NaiveDate, Weekday};
use std::collections::HashMap;
use tracing::info;
use url::Url;
/// Generate a link to create a Google Calendar event for a course
#[poise::command(slash_command)]
pub async fn gcal(
ctx: Context<'_>,
#[description = "Course Reference Number (CRN)"] crn: i32,
) -> Result<(), Error> {
let user = ctx.author();
info!(source = user.name, target = crn, "gcal command invoked");
ctx.defer().await?;
let course = utils::get_course_by_crn(&ctx, crn).await?;
let term = course.term.clone();
// Get meeting times
let meeting_times = ctx
.data()
.app_state
.banner_api
.get_course_meeting_time(&term, &crn.to_string())
.await?;
struct LinkDetail {
link: String,
detail: String,
}
let response: Vec<LinkDetail> = match meeting_times.len() {
0 => Err(anyhow::anyhow!("No meeting times found for this course.")),
1.. => {
// Sort meeting times by start time of their TimeRange
let mut sorted_meeting_times = meeting_times.to_vec();
MeetingScheduleInfo::sort_by_start_time(&mut sorted_meeting_times);
let links = sorted_meeting_times
.iter()
.map(|m| {
let link = generate_gcal_url(&course, m)?;
let days = m.days_string().unwrap_or_else(|| "TBA".to_string());
let detail = match &m.time_range {
Some(range) => {
format!("{days} {}", range.format_12hr())
}
None => days,
};
Ok(LinkDetail { link, detail })
})
.collect::<Result<Vec<LinkDetail>, anyhow::Error>>()?;
Ok(links)
}
}?;
ctx.say(
response
.iter()
.map(|LinkDetail { link, detail }| {
format!("[Add to Google Calendar](<{link}>) ({detail})")
})
.collect::<Vec<String>>()
.join("\n"),
)
.await?;
info!(crn = %crn, "gcal command completed");
Ok(())
}
/// Generate Google Calendar URL for a course
fn generate_gcal_url(
course: &Course,
meeting_time: &MeetingScheduleInfo,
) -> Result<String, anyhow::Error> {
let course_text = course.display_title();
let dates_text = {
let (start, end) = meeting_time.datetime_range();
format!(
"{}/{}",
start.format("%Y%m%dT%H%M%S"),
end.format("%Y%m%dT%H%M%S")
)
};
// Get instructor name
let instructor_name = course.primary_instructor_name();
// The event description
let details_text = format!(
"CRN: {}\nInstructor: {}\nDays: {}",
course.course_reference_number,
instructor_name,
meeting_time
.days_string()
.unwrap_or_else(|| "TBA".to_string())
);
// The event location
let location_text = meeting_time.place_string();
// The event recurrence rule
let recur_text = generate_rrule(meeting_time, meeting_time.date_range.end);
let mut params = HashMap::new();
params.insert("action", "TEMPLATE");
params.insert("text", &course_text);
params.insert("dates", &dates_text);
params.insert("details", &details_text);
params.insert("location", &location_text);
params.insert("trp", "true");
params.insert("ctz", "America/Chicago");
params.insert("recur", &recur_text);
Ok(Url::parse_with_params("https://calendar.google.com/calendar/render", &params)?.to_string())
}
/// Generate RRULE for recurrence
fn generate_rrule(meeting_time: &MeetingScheduleInfo, end_date: NaiveDate) -> String {
let days_of_week = meeting_time.days_of_week();
let by_day = days_of_week
.iter()
.map(|day| match day {
Weekday::Mon => "MO",
Weekday::Tue => "TU",
Weekday::Wed => "WE",
Weekday::Thu => "TH",
Weekday::Fri => "FR",
Weekday::Sat => "SA",
Weekday::Sun => "SU",
})
.collect::<Vec<&str>>()
.join(",");
// Format end date for RRULE (YYYYMMDD format)
let until = end_date.format("%Y%m%dT000000Z").to_string();
format!("RRULE:FREQ=WEEKLY;BYDAY={by_day};UNTIL={until}")
}
+379
View File
@@ -0,0 +1,379 @@
//! ICS command implementation for generating calendar files.
use crate::banner::{Course, MeetingDays, MeetingScheduleInfo, WeekdayExt};
use crate::bot::{Context, Error, utils};
use chrono::{Datelike, Duration, NaiveDate, Utc, Weekday};
use serenity::all::CreateAttachment;
use tracing::info;
/// Find the nth occurrence of a weekday in a given month/year (1-based).
fn nth_weekday_of_month(year: i32, month: u32, weekday: Weekday, n: u32) -> Option<NaiveDate> {
let first = NaiveDate::from_ymd_opt(year, month, 1)?;
let days_ahead = (weekday.num_days_from_monday() as i64
- first.weekday().num_days_from_monday() as i64)
.rem_euclid(7) as u32;
let day = 1 + days_ahead + 7 * (n - 1);
NaiveDate::from_ymd_opt(year, month, day)
}
/// Compute a consecutive range of dates starting from `start` for `count` days.
fn date_range(start: NaiveDate, count: i64) -> Vec<NaiveDate> {
(0..count)
.filter_map(|i| start.checked_add_signed(Duration::days(i)))
.collect()
}
/// Compute university holidays for a given year.
///
/// Federal holidays use weekday-of-month rules so they're correct for any year.
/// University-specific breaks (Fall Break, Spring Break, Winter Holiday) are derived
/// from anchoring federal holidays or using UTSA's typical scheduling patterns.
fn compute_holidays_for_year(year: i32) -> Vec<(&'static str, Vec<NaiveDate>)> {
let mut holidays = Vec::new();
// Labor Day: 1st Monday of September
if let Some(d) = nth_weekday_of_month(year, 9, Weekday::Mon, 1) {
holidays.push(("Labor Day", vec![d]));
}
// Fall Break: Mon-Tue of Columbus Day week (2nd Monday of October + Tuesday)
if let Some(mon) = nth_weekday_of_month(year, 10, Weekday::Mon, 2) {
holidays.push(("Fall Break", date_range(mon, 2)));
}
// Day before Thanksgiving: Wednesday before 4th Thursday of November
if let Some(thu) = nth_weekday_of_month(year, 11, Weekday::Thu, 4)
&& let Some(wed) = thu.checked_sub_signed(Duration::days(1))
{
holidays.push(("Day Before Thanksgiving", vec![wed]));
}
// Thanksgiving: 4th Thursday of November + Friday
if let Some(thu) = nth_weekday_of_month(year, 11, Weekday::Thu, 4) {
holidays.push(("Thanksgiving", date_range(thu, 2)));
}
// Winter Holiday: Dec 23-31
if let Some(start) = NaiveDate::from_ymd_opt(year, 12, 23) {
holidays.push(("Winter Holiday", date_range(start, 9)));
}
// New Year's Day: January 1
if let Some(d) = NaiveDate::from_ymd_opt(year, 1, 1) {
holidays.push(("New Year's Day", vec![d]));
}
// MLK Day: 3rd Monday of January
if let Some(d) = nth_weekday_of_month(year, 1, Weekday::Mon, 3) {
holidays.push(("MLK Day", vec![d]));
}
// Spring Break: full week (Mon-Sat) starting the 2nd or 3rd Monday of March
// UTSA typically uses the 2nd full week of March
if let Some(mon) = nth_weekday_of_month(year, 3, Weekday::Mon, 2) {
holidays.push(("Spring Break", date_range(mon, 6)));
}
holidays
}
/// Generate an ICS file for a course
#[poise::command(slash_command, prefix_command)]
pub async fn ics(
ctx: Context<'_>,
#[description = "Course Reference Number (CRN)"] crn: i32,
) -> Result<(), Error> {
ctx.defer().await?;
let course = utils::get_course_by_crn(&ctx, crn).await?;
let term = course.term.clone();
// Get meeting times
let meeting_times = ctx
.data()
.app_state
.banner_api
.get_course_meeting_time(&term, &crn.to_string())
.await?;
if meeting_times.is_empty() {
ctx.say("No meeting times found for this course.").await?;
return Ok(());
}
// Sort meeting times by start time
let mut sorted_meeting_times = meeting_times.to_vec();
MeetingScheduleInfo::sort_by_start_time(&mut sorted_meeting_times);
// Generate ICS content
let (ics_content, excluded_holidays) =
generate_ics_content(&course, &term, &sorted_meeting_times)?;
// Create file attachment
let filename = format!(
"{subject}_{number}_{section}.ics",
subject = course.subject.replace(" ", "_"),
number = course.course_number,
section = course.sequence_number,
);
let file = CreateAttachment::bytes(ics_content.into_bytes(), filename.clone());
// Build response content
let mut response_content = format!(
"📅 Generated ICS calendar for **{}**\n\n**Meeting Times:**\n{}",
course.display_title(),
sorted_meeting_times
.iter()
.enumerate()
.map(|(i, m)| {
let time_info = match &m.time_range {
Some(range) => format!(
"{} {}",
m.days_string().unwrap_or("TBA".to_string()),
range.format_12hr()
),
None => m.days_string().unwrap_or("TBA".to_string()),
};
format!("{}. {}", i + 1, time_info)
})
.collect::<Vec<_>>()
.join("\n")
);
// Add holiday exclusion information
if !excluded_holidays.is_empty() {
let count = excluded_holidays.len();
let count_text = if count == 1 {
"1 date was".to_string()
} else {
format!("{} dates were", count)
};
response_content.push_str(&format!("\n\n{} excluded from the ICS file:\n", count_text));
response_content.push_str(
&excluded_holidays
.iter()
.map(|s| format!("- {}", s))
.collect::<Vec<_>>()
.join("\n"),
);
}
ctx.send(
poise::CreateReply::default()
.content(response_content)
.attachment(file),
)
.await?;
info!(crn = %crn, "ics command completed");
Ok(())
}
/// Generate ICS content for a course and its meeting times
fn generate_ics_content(
course: &Course,
term: &str,
meeting_times: &[MeetingScheduleInfo],
) -> Result<(String, Vec<String>), anyhow::Error> {
let mut ics_content = String::new();
let mut excluded_holidays = Vec::new();
// ICS header
ics_content.push_str("BEGIN:VCALENDAR\r\n");
ics_content.push_str("VERSION:2.0\r\n");
ics_content.push_str("PRODID:-//Banner Bot//Course Calendar//EN\r\n");
ics_content.push_str("CALSCALE:GREGORIAN\r\n");
ics_content.push_str("METHOD:PUBLISH\r\n");
// Calendar name
ics_content.push_str(&format!(
"X-WR-CALNAME:{} - {}\r\n",
course.display_title(),
term
));
// Generate events for each meeting time
for (index, meeting_time) in meeting_times.iter().enumerate() {
let (event_content, holidays) = generate_event_content(course, meeting_time, index)?;
ics_content.push_str(&event_content);
excluded_holidays.extend(holidays);
}
// ICS footer
ics_content.push_str("END:VCALENDAR\r\n");
Ok((ics_content, excluded_holidays))
}
/// Generate ICS event content for a single meeting time
fn generate_event_content(
course: &Course,
meeting_time: &MeetingScheduleInfo,
index: usize,
) -> Result<(String, Vec<String>), anyhow::Error> {
let course_title = course.display_title();
let instructor_name = course.primary_instructor_name();
let location = meeting_time.place_string();
// Create event title with meeting index if multiple meetings
let event_title = if index > 0 {
format!("{} (Meeting {})", course_title, index + 1)
} else {
course_title
};
// Create event description
let description = format!(
"CRN: {}\\nInstructor: {}\\nDays: {}\\nMeeting Type: {}",
course.course_reference_number,
instructor_name,
meeting_time.days_string().unwrap_or("TBA".to_string()),
meeting_time.meeting_type.description()
);
// Get start and end times
let (start_dt, end_dt) = meeting_time.datetime_range();
// Format datetimes for ICS (UTC format)
let start_utc = start_dt.with_timezone(&Utc);
let end_utc = end_dt.with_timezone(&Utc);
let start_str = start_utc.format("%Y%m%dT%H%M%SZ").to_string();
let end_str = end_utc.format("%Y%m%dT%H%M%SZ").to_string();
// Generate unique ID for the event
let uid = format!(
"{}-{}-{}@banner-bot.local",
course.course_reference_number,
index,
start_utc.timestamp()
);
let mut event_content = String::new();
// Event header
event_content.push_str("BEGIN:VEVENT\r\n");
event_content.push_str(&format!("UID:{}\r\n", uid));
event_content.push_str(&format!("DTSTART:{}\r\n", start_str));
event_content.push_str(&format!("DTEND:{}\r\n", end_str));
event_content.push_str(&format!("SUMMARY:{}\r\n", escape_ics_text(&event_title)));
event_content.push_str(&format!(
"DESCRIPTION:{}\r\n",
escape_ics_text(&description)
));
event_content.push_str(&format!("LOCATION:{}\r\n", escape_ics_text(&location)));
// Add recurrence rule if there are specific days and times
if !meeting_time.days.is_empty() && meeting_time.time_range.is_some() {
let days_of_week = meeting_time.days_of_week();
let by_day: Vec<String> = days_of_week
.iter()
.map(|day| day.to_short_string().to_uppercase())
.collect();
if !by_day.is_empty() {
let until_date = meeting_time
.date_range
.end
.format("%Y%m%dT000000Z")
.to_string();
event_content.push_str(&format!(
"RRULE:FREQ=WEEKLY;BYDAY={};UNTIL={}\r\n",
by_day.join(","),
until_date
));
// Add holiday exceptions (EXDATE) if the class would meet on holiday dates
let holiday_exceptions = get_holiday_exceptions(meeting_time);
if let Some(exdate_property) = generate_exdate_property(&holiday_exceptions, start_utc)
{
event_content.push_str(&format!("{}\r\n", exdate_property));
}
// Collect holiday names for reporting
let start_year = meeting_time.date_range.start.year();
let end_year = meeting_time.date_range.end.year();
let all_holidays: Vec<_> = (start_year..=end_year)
.flat_map(compute_holidays_for_year)
.collect();
let mut holiday_names = Vec::new();
for (holiday_name, holiday_dates) in &all_holidays {
for &exception_date in &holiday_exceptions {
if holiday_dates.contains(&exception_date) {
holiday_names.push(format!(
"{} ({})",
holiday_name,
exception_date.format("%a, %b %d")
));
}
}
}
holiday_names.sort();
holiday_names.dedup();
event_content.push_str("END:VEVENT\r\n");
return Ok((event_content, holiday_names));
}
}
// Event footer
event_content.push_str("END:VEVENT\r\n");
Ok((event_content, Vec::new()))
}
/// Check if a class meets on a specific date based on its meeting days
fn class_meets_on_date(meeting_time: &MeetingScheduleInfo, date: NaiveDate) -> bool {
let day: MeetingDays = date.weekday().into();
meeting_time.days.contains(day)
}
/// Get holiday dates that fall within the course date range and would conflict with class meetings
fn get_holiday_exceptions(meeting_time: &MeetingScheduleInfo) -> Vec<NaiveDate> {
let start_year = meeting_time.date_range.start.year();
let end_year = meeting_time.date_range.end.year();
(start_year..=end_year)
.flat_map(compute_holidays_for_year)
.flat_map(|(_, dates)| dates)
.filter(|&date| {
date >= meeting_time.date_range.start
&& date <= meeting_time.date_range.end
&& class_meets_on_date(meeting_time, date)
})
.collect()
}
/// Generate EXDATE property for holiday exceptions
fn generate_exdate_property(
exceptions: &[NaiveDate],
start_time: chrono::DateTime<Utc>,
) -> Option<String> {
if exceptions.is_empty() {
return None;
}
let mut exdate_values = Vec::new();
for &exception_date in exceptions {
// Create a datetime for the exception using the same time as the start time
let exception_datetime = exception_date.and_time(start_time.time()).and_utc();
let exdate_str = exception_datetime.format("%Y%m%dT%H%M%SZ").to_string();
exdate_values.push(exdate_str);
}
Some(format!("EXDATE:{}", exdate_values.join(",")))
}
/// Escape text for ICS format
fn escape_ics_text(text: &str) -> String {
text.replace("\\", "\\\\")
.replace(";", "\\;")
.replace(",", "\\,")
.replace("\n", "\\n")
.replace("\r", "")
}
+11
View File
@@ -0,0 +1,11 @@
//! Bot commands module.
pub mod gcal;
pub mod ics;
pub mod search;
pub mod terms;
pub use gcal::gcal;
pub use ics::ics;
pub use search::search;
pub use terms::terms;
+249
View File
@@ -0,0 +1,249 @@
//! Course search command implementation.
use crate::banner::{SearchQuery, Term};
use crate::bot::{Context, Error};
use anyhow::anyhow;
use regex::Regex;
use std::sync::LazyLock;
use tracing::info;
static RANGE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\d{1,4})-(\d{1,4})?").unwrap());
static WILDCARD_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\d+)(x+)").unwrap());
/// Search for courses with various filters
#[poise::command(slash_command, prefix_command)]
pub async fn search(
ctx: Context<'_>,
#[description = "Course title (exact, use autocomplete)"] title: Option<String>,
#[description = "Course code (e.g. 3743, 3000-3999, 3xxx, 3000-)"] code: Option<String>,
#[description = "Maximum number of results"] max: Option<i32>,
#[description = "Keywords in title or description (space separated)"] keywords: Option<String>,
// #[description = "Instructor name"] instructor: Option<String>,
// #[description = "Subject (e.g Computer Science/CS, Mathematics/MAT)"] subject: Option<String>,
) -> Result<(), Error> {
// Defer the response since this might take a while
ctx.defer().await?;
// Build the search query — no default credit filter so all courses are visible
let mut query = SearchQuery::new();
if let Some(title) = title {
query = query.title(title);
}
if let Some(code) = code {
let (low, high) = parse_course_code(&code)?;
query = query.course_numbers(low, high);
}
if let Some(keywords) = keywords {
let keyword_list: Vec<String> =
keywords.split_whitespace().map(|s| s.to_string()).collect();
query = query.keywords(keyword_list);
}
if let Some(max_results) = max {
query = query.max_results(max_results.min(25)); // Cap at 25
}
let term = Term::get_current().inner().to_string();
let search_result = ctx
.data()
.app_state
.banner_api
.search(&term, &query, "subjectDescription", false)
.await?;
let response = if let Some(courses) = search_result.data {
if courses.is_empty() {
"No courses found with the specified criteria.".to_string()
} else {
courses
.iter()
.map(|course| {
format!(
"**{}**: {} ({})",
course.display_title(),
course.primary_instructor_name(),
course.course_reference_number
)
})
.collect::<Vec<_>>()
.join("\n")
}
} else {
"No courses found with the specified criteria.".to_string()
};
ctx.say(response).await?;
info!("search command completed");
Ok(())
}
/// Parse course code input (e.g, "3743", "3000-3999", "3xxx", "3000-")
fn parse_course_code(input: &str) -> Result<(i32, i32), Error> {
let input = input.trim();
// Handle range format (e.g, "3000-3999")
if input.contains('-') {
if let Some(captures) = RANGE_RE.captures(input) {
let low: i32 = captures[1].parse()?;
let high = if captures.get(2).is_some() {
captures[2].parse()?
} else {
9999 // Open-ended range
};
if low > high {
return Err(anyhow!("Invalid range: low value greater than high value"));
}
if low < 1000 || high > 9999 {
return Err(anyhow!("Course codes must be between 1000 and 9999"));
}
return Ok((low, high));
}
return Err(anyhow!("Invalid range format"));
}
// Handle wildcard format (e.g, "34xx")
if input.contains('x') {
if input.len() != 4 {
return Err(anyhow!("Wildcard format must be exactly 4 characters"));
}
if let Some(captures) = WILDCARD_RE.captures(input) {
let prefix: i32 = captures[1].parse()?;
let x_count = captures[2].len();
let low = prefix * 10_i32.pow(x_count as u32);
let high = low + 10_i32.pow(x_count as u32) - 1;
if low < 1000 || high > 9999 {
return Err(anyhow!("Course codes must be between 1000 and 9999"));
}
return Ok((low, high));
}
return Err(anyhow!("Invalid wildcard format"));
}
// Handle single course code
if input.len() == 4 {
let code: i32 = input.parse()?;
if !(1000..=9999).contains(&code) {
return Err(anyhow!("Course codes must be between 1000 and 9999"));
}
return Ok((code, code));
}
Err(anyhow!("Invalid course code format"))
}
#[cfg(test)]
mod tests {
use super::*;
// --- Single codes ---
#[test]
fn test_parse_single_code() {
assert_eq!(parse_course_code("3743").unwrap(), (3743, 3743));
}
#[test]
fn test_parse_single_code_boundaries() {
assert_eq!(parse_course_code("1000").unwrap(), (1000, 1000));
assert_eq!(parse_course_code("9999").unwrap(), (9999, 9999));
}
#[test]
fn test_parse_single_code_below_range() {
assert!(parse_course_code("0999").is_err());
}
#[test]
fn test_parse_single_code_wrong_length() {
assert!(parse_course_code("123").is_err());
}
#[test]
fn test_parse_single_code_non_numeric() {
assert!(parse_course_code("abcd").is_err());
}
#[test]
fn test_parse_single_code_trimmed() {
assert_eq!(parse_course_code(" 3743 ").unwrap(), (3743, 3743));
}
// --- Ranges ---
#[test]
fn test_parse_range_full() {
assert_eq!(parse_course_code("3000-3999").unwrap(), (3000, 3999));
}
#[test]
fn test_parse_range_same() {
assert_eq!(parse_course_code("3000-3000").unwrap(), (3000, 3000));
}
#[test]
fn test_parse_range_open() {
assert_eq!(parse_course_code("3000-").unwrap(), (3000, 9999));
}
#[test]
fn test_parse_range_inverted() {
assert!(parse_course_code("5000-3000").is_err());
}
#[test]
fn test_parse_range_below_1000() {
assert!(parse_course_code("500-999").is_err());
}
#[test]
fn test_parse_range_above_9999() {
assert!(parse_course_code("9000-10000").is_err());
}
#[test]
fn test_parse_range_full_valid() {
assert_eq!(parse_course_code("1000-9999").unwrap(), (1000, 9999));
}
// --- Wildcards ---
#[test]
fn test_parse_wildcard_one_x() {
assert_eq!(parse_course_code("300x").unwrap(), (3000, 3009));
}
#[test]
fn test_parse_wildcard_two_x() {
assert_eq!(parse_course_code("30xx").unwrap(), (3000, 3099));
}
#[test]
fn test_parse_wildcard_three_x() {
assert_eq!(parse_course_code("3xxx").unwrap(), (3000, 3999));
}
#[test]
fn test_parse_wildcard_9xxx() {
assert_eq!(parse_course_code("9xxx").unwrap(), (9000, 9999));
}
#[test]
fn test_parse_wildcard_wrong_length() {
assert!(parse_course_code("3xxxx").is_err());
}
#[test]
fn test_parse_wildcard_0xxx() {
assert!(parse_course_code("0xxx").is_err());
}
}
+59
View File
@@ -0,0 +1,59 @@
//! Terms command implementation.
use crate::banner::{BannerTerm, Term};
use crate::bot::{Context, Error};
use tracing::info;
/// List available terms or search for a specific term
#[poise::command(slash_command, prefix_command)]
pub async fn terms(
ctx: Context<'_>,
#[description = "Term to search for"] search: Option<String>,
#[description = "Page number"] page: Option<i32>,
) -> Result<(), Error> {
ctx.defer().await?;
let search_term = search.unwrap_or_default();
let page_number = page.unwrap_or(1).max(1);
let max_results = 10;
let terms = ctx
.data()
.app_state
.banner_api
.sessions
.get_terms(&search_term, page_number, max_results)
.await?;
let response = if terms.is_empty() {
"No terms found.".to_string()
} else {
let current_term_code = Term::get_current().inner().to_string();
terms
.iter()
.map(|term| format_term(term, &current_term_code))
.collect::<Vec<_>>()
.join("\n")
};
ctx.say(response).await?;
info!("terms command completed");
Ok(())
}
fn format_term(term: &BannerTerm, current_term_code: &str) -> String {
let is_current = if term.code == current_term_code {
" (current)"
} else {
""
};
let is_archived = if term.is_archived() {
" (archived)"
} else {
""
};
format!(
"- `{}`: {}{}{}",
term.code, term.description, is_current, is_archived
)
}
+20
View File
@@ -0,0 +1,20 @@
use crate::error::Error;
use crate::state::AppState;
pub mod commands;
pub mod utils;
pub struct Data {
pub app_state: AppState,
} // User data, which is stored and accessible in all command invocations
pub type Context<'a> = poise::Context<'a, Data, Error>;
/// Get all available commands
pub fn get_commands() -> Vec<poise::Command<Data, Error>> {
vec![
commands::search(),
commands::terms(),
commands::ics(),
commands::gcal(),
]
}
+24
View File
@@ -0,0 +1,24 @@
//! Bot command utilities.
use crate::banner::{Course, Term};
use crate::bot::Context;
use crate::error::Result;
use tracing::error;
/// Gets a course by its CRN for the current term.
pub async fn get_course_by_crn(ctx: &Context<'_>, crn: i32) -> Result<Course> {
let app_state = &ctx.data().app_state;
// Get current term dynamically
let current_term_status = Term::get_current();
let term = current_term_status.inner();
// Fetch live course data from database via AppState
app_state
.get_course_or_fetch(&term.to_string(), &crn.to_string())
.await
.map_err(|e| {
error!(error = %e, crn = %crn, "failed to fetch course data");
e
})
}
+462
View File
@@ -0,0 +1,462 @@
//! Shared calendar generation logic for ICS files and Google Calendar URLs.
//!
//! Used by both the Discord bot commands and the web API endpoints.
use crate::data::models::DbMeetingTime;
use chrono::{Datelike, Duration, NaiveDate, NaiveTime, Weekday};
/// Course metadata needed for calendar generation (shared interface between bot and web).
pub struct CalendarCourse {
pub crn: String,
pub subject: String,
pub course_number: String,
pub title: String,
pub sequence_number: Option<String>,
pub primary_instructor: Option<String>,
}
impl CalendarCourse {
/// Display title like "CS 1083 - Introduction to Computer Science"
pub fn display_title(&self) -> String {
format!("{} {} - {}", self.subject, self.course_number, self.title)
}
/// Filename-safe identifier: "CS_1083_001"
pub fn filename_stem(&self) -> String {
format!(
"{}_{}{}",
self.subject.replace(' ', "_"),
self.course_number,
self.sequence_number
.as_deref()
.map(|s| format!("_{s}"))
.unwrap_or_default()
)
}
}
// ---------------------------------------------------------------------------
// Date parsing helpers
// ---------------------------------------------------------------------------
/// Parse a date string in either MM/DD/YYYY or YYYY-MM-DD format.
fn parse_date(s: &str) -> Option<NaiveDate> {
NaiveDate::parse_from_str(s, "%m/%d/%Y")
.or_else(|_| NaiveDate::parse_from_str(s, "%Y-%m-%d"))
.ok()
}
/// Parse an HHMM time string into `NaiveTime`.
fn parse_hhmm(s: &str) -> Option<NaiveTime> {
if s.len() != 4 {
return None;
}
let hours = s[..2].parse::<u32>().ok()?;
let minutes = s[2..].parse::<u32>().ok()?;
NaiveTime::from_hms_opt(hours, minutes, 0)
}
/// Active weekdays for a meeting time.
fn active_weekdays(mt: &DbMeetingTime) -> Vec<Weekday> {
let mapping: [(bool, Weekday); 7] = [
(mt.monday, Weekday::Mon),
(mt.tuesday, Weekday::Tue),
(mt.wednesday, Weekday::Wed),
(mt.thursday, Weekday::Thu),
(mt.friday, Weekday::Fri),
(mt.saturday, Weekday::Sat),
(mt.sunday, Weekday::Sun),
];
mapping
.iter()
.filter(|(active, _)| *active)
.map(|(_, day)| *day)
.collect()
}
/// ICS two-letter day code for RRULE BYDAY.
fn ics_day_code(day: Weekday) -> &'static str {
match day {
Weekday::Mon => "MO",
Weekday::Tue => "TU",
Weekday::Wed => "WE",
Weekday::Thu => "TH",
Weekday::Fri => "FR",
Weekday::Sat => "SA",
Weekday::Sun => "SU",
}
}
/// Location string from a `DbMeetingTime`.
fn location_string(mt: &DbMeetingTime) -> String {
let building = mt
.building_description
.as_deref()
.or(mt.building.as_deref())
.unwrap_or("");
let room = mt.room.as_deref().unwrap_or("");
let combined = format!("{building} {room}").trim().to_string();
if combined.is_empty() {
"Online".to_string()
} else {
combined
}
}
/// Days display string (e.g. "MWF", "TTh").
fn days_display(mt: &DbMeetingTime) -> String {
let weekdays = active_weekdays(mt);
if weekdays.is_empty() {
return "TBA".to_string();
}
weekdays
.iter()
.map(|d| ics_day_code(*d))
.collect::<Vec<_>>()
.join("")
}
/// Escape text for ICS property values.
fn escape_ics(text: &str) -> String {
text.replace('\\', "\\\\")
.replace(';', "\\;")
.replace(',', "\\,")
.replace('\n', "\\n")
.replace('\r', "")
}
// ---------------------------------------------------------------------------
// University holidays (ported from bot/commands/ics.rs)
// ---------------------------------------------------------------------------
/// Find the nth occurrence of a weekday in a given month/year (1-based).
fn nth_weekday_of_month(year: i32, month: u32, weekday: Weekday, n: u32) -> Option<NaiveDate> {
let first = NaiveDate::from_ymd_opt(year, month, 1)?;
let days_ahead = (weekday.num_days_from_monday() as i64
- first.weekday().num_days_from_monday() as i64)
.rem_euclid(7) as u32;
let day = 1 + days_ahead + 7 * (n - 1);
NaiveDate::from_ymd_opt(year, month, day)
}
/// Compute a consecutive range of dates starting from `start` for `count` days.
fn date_range(start: NaiveDate, count: i64) -> Vec<NaiveDate> {
(0..count)
.filter_map(|i| start.checked_add_signed(Duration::days(i)))
.collect()
}
/// Compute university holidays for a given year.
fn compute_holidays_for_year(year: i32) -> Vec<(&'static str, Vec<NaiveDate>)> {
let mut holidays = Vec::new();
// Labor Day: 1st Monday of September
if let Some(d) = nth_weekday_of_month(year, 9, Weekday::Mon, 1) {
holidays.push(("Labor Day", vec![d]));
}
// Fall Break: Mon-Tue of Columbus Day week
if let Some(mon) = nth_weekday_of_month(year, 10, Weekday::Mon, 2) {
holidays.push(("Fall Break", date_range(mon, 2)));
}
// Day before Thanksgiving
if let Some(thu) = nth_weekday_of_month(year, 11, Weekday::Thu, 4)
&& let Some(wed) = thu.checked_sub_signed(Duration::days(1))
{
holidays.push(("Day Before Thanksgiving", vec![wed]));
}
// Thanksgiving: 4th Thursday + Friday
if let Some(thu) = nth_weekday_of_month(year, 11, Weekday::Thu, 4) {
holidays.push(("Thanksgiving", date_range(thu, 2)));
}
// Winter Holiday: Dec 23-31
if let Some(start) = NaiveDate::from_ymd_opt(year, 12, 23) {
holidays.push(("Winter Holiday", date_range(start, 9)));
}
// New Year's Day
if let Some(d) = NaiveDate::from_ymd_opt(year, 1, 1) {
holidays.push(("New Year's Day", vec![d]));
}
// MLK Day: 3rd Monday of January
if let Some(d) = nth_weekday_of_month(year, 1, Weekday::Mon, 3) {
holidays.push(("MLK Day", vec![d]));
}
// Spring Break: full week starting 2nd Monday of March
if let Some(mon) = nth_weekday_of_month(year, 3, Weekday::Mon, 2) {
holidays.push(("Spring Break", date_range(mon, 6)));
}
holidays
}
/// Get holiday dates within a date range that fall on specific weekdays.
fn holiday_exceptions(start: NaiveDate, end: NaiveDate, weekdays: &[Weekday]) -> Vec<NaiveDate> {
let start_year = start.year();
let end_year = end.year();
(start_year..=end_year)
.flat_map(compute_holidays_for_year)
.flat_map(|(_, dates)| dates)
.filter(|&date| date >= start && date <= end && weekdays.contains(&date.weekday()))
.collect()
}
/// Names of excluded holidays (for user-facing messages).
fn excluded_holiday_names(
start: NaiveDate,
end: NaiveDate,
exceptions: &[NaiveDate],
) -> Vec<String> {
let start_year = start.year();
let end_year = end.year();
let all_holidays: Vec<_> = (start_year..=end_year)
.flat_map(compute_holidays_for_year)
.collect();
let mut names = Vec::new();
for (holiday_name, holiday_dates) in &all_holidays {
for &exc in exceptions {
if holiday_dates.contains(&exc) {
names.push(format!("{} ({})", holiday_name, exc.format("%a, %b %d")));
}
}
}
names.sort();
names.dedup();
names
}
// ---------------------------------------------------------------------------
// ICS generation
// ---------------------------------------------------------------------------
/// Result from ICS generation, including the file content and excluded holiday names.
pub struct IcsResult {
pub content: String,
pub filename: String,
/// Holiday dates excluded via EXDATE rules, for user-facing messages.
#[allow(dead_code)]
pub excluded_holidays: Vec<String>,
}
/// Generate an ICS calendar file for a course.
pub fn generate_ics(
course: &CalendarCourse,
meeting_times: &[DbMeetingTime],
) -> Result<IcsResult, anyhow::Error> {
let mut ics = String::new();
let mut all_excluded = Vec::new();
// Header
ics.push_str("BEGIN:VCALENDAR\r\n");
ics.push_str("VERSION:2.0\r\n");
ics.push_str("PRODID:-//Banner Bot//Course Calendar//EN\r\n");
ics.push_str("CALSCALE:GREGORIAN\r\n");
ics.push_str("METHOD:PUBLISH\r\n");
ics.push_str(&format!(
"X-WR-CALNAME:{}\r\n",
escape_ics(&course.display_title())
));
for (index, mt) in meeting_times.iter().enumerate() {
let (event, holidays) = generate_ics_event(course, mt, index)?;
ics.push_str(&event);
all_excluded.extend(holidays);
}
ics.push_str("END:VCALENDAR\r\n");
Ok(IcsResult {
content: ics,
filename: format!("{}.ics", course.filename_stem()),
excluded_holidays: all_excluded,
})
}
/// Generate a single VEVENT for one meeting time.
fn generate_ics_event(
course: &CalendarCourse,
mt: &DbMeetingTime,
index: usize,
) -> Result<(String, Vec<String>), anyhow::Error> {
let start_date = parse_date(&mt.start_date)
.ok_or_else(|| anyhow::anyhow!("Invalid start_date: {}", mt.start_date))?;
let end_date = parse_date(&mt.end_date)
.ok_or_else(|| anyhow::anyhow!("Invalid end_date: {}", mt.end_date))?;
let start_time = mt.begin_time.as_deref().and_then(parse_hhmm);
let end_time = mt.end_time.as_deref().and_then(parse_hhmm);
// DTSTART/DTEND: first occurrence with time, or all-day on start_date
let (dtstart, dtend) = match (start_time, end_time) {
(Some(st), Some(et)) => {
let s = start_date.and_time(st).and_utc();
let e = start_date.and_time(et).and_utc();
(
s.format("%Y%m%dT%H%M%SZ").to_string(),
e.format("%Y%m%dT%H%M%SZ").to_string(),
)
}
_ => {
let s = start_date.and_hms_opt(0, 0, 0).unwrap().and_utc();
let e = start_date.and_hms_opt(0, 0, 0).unwrap().and_utc();
(
s.format("%Y%m%dT%H%M%SZ").to_string(),
e.format("%Y%m%dT%H%M%SZ").to_string(),
)
}
};
let event_title = if index > 0 {
format!("{} (Meeting {})", course.display_title(), index + 1)
} else {
course.display_title()
};
let instructor = course.primary_instructor.as_deref().unwrap_or("Staff");
let description = format!(
"CRN: {}\\nInstructor: {}\\nDays: {}\\nMeeting Type: {}",
course.crn,
instructor,
days_display(mt),
mt.meeting_type,
);
let location = location_string(mt);
let uid = format!(
"{}-{}-{}@banner-bot.local",
course.crn,
index,
start_date
.and_hms_opt(0, 0, 0)
.unwrap()
.and_utc()
.timestamp()
);
let mut event = String::new();
event.push_str("BEGIN:VEVENT\r\n");
event.push_str(&format!("UID:{uid}\r\n"));
event.push_str(&format!("DTSTART:{dtstart}\r\n"));
event.push_str(&format!("DTEND:{dtend}\r\n"));
event.push_str(&format!("SUMMARY:{}\r\n", escape_ics(&event_title)));
event.push_str(&format!("DESCRIPTION:{}\r\n", escape_ics(&description)));
event.push_str(&format!("LOCATION:{}\r\n", escape_ics(&location)));
let weekdays = active_weekdays(mt);
let mut holiday_names = Vec::new();
if let (false, Some(st)) = (weekdays.is_empty(), start_time) {
let by_day: Vec<&str> = weekdays.iter().map(|d| ics_day_code(*d)).collect();
let until = end_date.format("%Y%m%dT000000Z").to_string();
event.push_str(&format!(
"RRULE:FREQ=WEEKLY;BYDAY={};UNTIL={}\r\n",
by_day.join(","),
until,
));
// Holiday exceptions
let exceptions = holiday_exceptions(start_date, end_date, &weekdays);
if !exceptions.is_empty() {
let start_utc = start_date.and_time(st).and_utc();
let exdates: Vec<String> = exceptions
.iter()
.map(|&d| {
d.and_time(start_utc.time())
.and_utc()
.format("%Y%m%dT%H%M%SZ")
.to_string()
})
.collect();
event.push_str(&format!("EXDATE:{}\r\n", exdates.join(",")));
}
holiday_names = excluded_holiday_names(start_date, end_date, &exceptions);
}
event.push_str("END:VEVENT\r\n");
Ok((event, holiday_names))
}
// ---------------------------------------------------------------------------
// Google Calendar URL generation
// ---------------------------------------------------------------------------
/// Generate a Google Calendar "add event" URL for a single meeting time.
pub fn generate_gcal_url(
course: &CalendarCourse,
mt: &DbMeetingTime,
) -> Result<String, anyhow::Error> {
let start_date = parse_date(&mt.start_date)
.ok_or_else(|| anyhow::anyhow!("Invalid start_date: {}", mt.start_date))?;
let end_date = parse_date(&mt.end_date)
.ok_or_else(|| anyhow::anyhow!("Invalid end_date: {}", mt.end_date))?;
let start_time = mt.begin_time.as_deref().and_then(parse_hhmm);
let end_time = mt.end_time.as_deref().and_then(parse_hhmm);
let dates_text = match (start_time, end_time) {
(Some(st), Some(et)) => {
let s = start_date.and_time(st);
let e = start_date.and_time(et);
format!(
"{}/{}",
s.format("%Y%m%dT%H%M%S"),
e.format("%Y%m%dT%H%M%S")
)
}
_ => {
let s = start_date.format("%Y%m%d").to_string();
format!("{s}/{s}")
}
};
let instructor = course.primary_instructor.as_deref().unwrap_or("Staff");
let details = format!(
"CRN: {}\nInstructor: {}\nDays: {}",
course.crn,
instructor,
days_display(mt),
);
let location = location_string(mt);
let weekdays = active_weekdays(mt);
let recur = if !weekdays.is_empty() && start_time.is_some() {
let by_day: Vec<&str> = weekdays.iter().map(|d| ics_day_code(*d)).collect();
let until = end_date.format("%Y%m%dT000000Z").to_string();
format!(
"RRULE:FREQ=WEEKLY;BYDAY={};UNTIL={}",
by_day.join(","),
until
)
} else {
String::new()
};
let course_text = course.display_title();
let params: Vec<(&str, &str)> = vec![
("action", "TEMPLATE"),
("text", &course_text),
("dates", &dates_text),
("details", &details),
("location", &location),
("trp", "true"),
("ctz", "America/Chicago"),
("recur", &recur),
];
let url = url::Url::parse_with_params("https://calendar.google.com/calendar/render", &params)?;
Ok(url.to_string())
}
+76
View File
@@ -0,0 +1,76 @@
use clap::Parser;
/// Banner Discord Bot - Course availability monitoring
///
/// This application runs all services:
/// - bot: Discord bot for course monitoring commands
/// - web: HTTP server for web interface and API
/// - scraper: Background service for scraping course data
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
pub struct Args {
/// Log formatter to use
#[arg(long, value_enum, default_value_t = default_tracing_format())]
pub tracing: TracingFormat,
}
#[derive(clap::ValueEnum, Clone, Debug)]
pub enum TracingFormat {
/// Use pretty formatter (default in debug mode)
Pretty,
/// Use JSON formatter (default in release mode)
Json,
}
#[derive(clap::ValueEnum, Clone, Debug, PartialEq)]
pub enum ServiceName {
/// Discord bot for course monitoring commands
Bot,
/// HTTP server for web interface and API
Web,
/// Background service for scraping course data
Scraper,
}
impl ServiceName {
/// Get all available services
pub fn all() -> Vec<ServiceName> {
vec![ServiceName::Bot, ServiceName::Web, ServiceName::Scraper]
}
/// Convert to string for service registration
pub fn as_str(&self) -> &'static str {
match self {
ServiceName::Bot => "bot",
ServiceName::Web => "web",
ServiceName::Scraper => "scraper",
}
}
}
#[cfg(debug_assertions)]
const DEFAULT_TRACING_FORMAT: TracingFormat = TracingFormat::Pretty;
#[cfg(not(debug_assertions))]
const DEFAULT_TRACING_FORMAT: TracingFormat = TracingFormat::Json;
fn default_tracing_format() -> TracingFormat {
DEFAULT_TRACING_FORMAT
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_service_name_as_str() {
assert_eq!(ServiceName::Bot.as_str(), "bot");
assert_eq!(ServiceName::Web.as_str(), "web");
assert_eq!(ServiceName::Scraper.as_str(), "scraper");
}
#[test]
fn test_service_name_all() {
let all = ServiceName::all();
assert_eq!(all.len(), 3);
}
}
+365
View File
@@ -0,0 +1,365 @@
//! Configuration module for the banner application.
//!
//! This module handles loading and parsing configuration from environment variables
//! using the figment crate. It supports flexible duration parsing that accepts both
//! numeric values (interpreted as seconds) and duration strings with units.
use fundu::{DurationParser, TimeUnit};
use serde::{Deserialize, Deserializer};
use std::time::Duration;
/// Main application configuration containing all sub-configurations
#[derive(Deserialize)]
pub struct Config {
/// Log level for the application
///
/// This value is used to set the log level for this application's target specifically.
/// e.g. "debug" would be similar to "warn,banner=debug,..."
///
/// Valid values are: "trace", "debug", "info", "warn", "error"
/// Defaults to "info" if not specified
#[serde(default = "default_log_level")]
pub log_level: String,
/// Port for the web server (default: 8080)
#[serde(default = "default_port")]
pub port: u16,
/// Database connection URL
pub database_url: String,
/// Graceful shutdown timeout duration
///
/// Accepts both numeric values (seconds) and duration strings
/// Defaults to 8 seconds if not specified
#[serde(
default = "default_shutdown_timeout",
deserialize_with = "deserialize_duration"
)]
pub shutdown_timeout: Duration,
/// Discord bot token for authentication
pub bot_token: String,
/// Target Discord guild ID where the bot operates
pub bot_target_guild: u64,
/// Base URL for banner generation service
///
/// Defaults to "https://ssbprod.utsa.edu/StudentRegistrationSsb/ssb" if not specified
#[serde(default = "default_banner_base_url")]
pub banner_base_url: String,
/// Rate limiting configuration for Banner API requests
#[serde(default = "default_rate_limiting")]
pub rate_limiting: RateLimitingConfig,
/// Discord OAuth2 client ID for web authentication
#[serde(deserialize_with = "deserialize_string_or_uint")]
pub discord_client_id: String,
/// Discord OAuth2 client secret for web authentication
pub discord_client_secret: String,
/// Optional base URL override for OAuth2 redirect (e.g. "https://banner.xevion.dev").
/// When unset, the redirect URI is derived from the incoming request's Origin/Host.
#[serde(default)]
pub discord_redirect_uri: Option<String>,
/// Discord user ID to seed as initial admin on startup (optional)
#[serde(default)]
pub admin_discord_id: Option<u64>,
}
/// Default log level of "info"
fn default_log_level() -> String {
"info".to_string()
}
/// Default port of 8080
fn default_port() -> u16 {
8080
}
/// Default shutdown timeout of 8 seconds
fn default_shutdown_timeout() -> Duration {
Duration::from_secs(8)
}
/// Default banner base URL
fn default_banner_base_url() -> String {
"https://ssbprod.utsa.edu/StudentRegistrationSsb/ssb".to_string()
}
/// Rate limiting configuration for Banner API requests
#[derive(Deserialize, Clone, Debug, PartialEq, Eq)]
pub struct RateLimitingConfig {
/// Requests per minute for session operations (very conservative)
#[serde(default = "default_session_rpm")]
pub session_rpm: u32,
/// Requests per minute for search operations (moderate)
#[serde(default = "default_search_rpm")]
pub search_rpm: u32,
/// Requests per minute for metadata operations (moderate)
#[serde(default = "default_metadata_rpm")]
pub metadata_rpm: u32,
/// Requests per minute for reset operations (low priority)
#[serde(default = "default_reset_rpm")]
pub reset_rpm: u32,
/// Burst allowance (extra requests allowed in short bursts)
#[serde(default = "default_burst_allowance")]
pub burst_allowance: u32,
}
/// Default rate limiting configuration
fn default_rate_limiting() -> RateLimitingConfig {
RateLimitingConfig::default()
}
impl Default for RateLimitingConfig {
fn default() -> Self {
Self {
session_rpm: default_session_rpm(),
search_rpm: default_search_rpm(),
metadata_rpm: default_metadata_rpm(),
reset_rpm: default_reset_rpm(),
burst_allowance: default_burst_allowance(),
}
}
}
/// Default session requests per minute (6 = 1 every 10 seconds)
fn default_session_rpm() -> u32 {
6
}
/// Default search requests per minute (30 = 1 every 2 seconds)
fn default_search_rpm() -> u32 {
30
}
/// Default metadata requests per minute (20 = 1 every 3 seconds)
fn default_metadata_rpm() -> u32 {
20
}
/// Default reset requests per minute (10 = 1 every 6 seconds)
fn default_reset_rpm() -> u32 {
10
}
/// Default burst allowance (3 extra requests)
fn default_burst_allowance() -> u32 {
3
}
/// Duration parser configured to handle various time units with seconds as default
///
/// Supports:
/// - Seconds (s) - default unit
/// - Milliseconds (ms)
/// - Minutes (m)
/// - Hours (h)
///
/// Does not support fractions, exponents, or infinity values
/// Allows for whitespace between the number and the time unit
/// Allows for multiple time units to be specified (summed together, e.g "10s 2m" = 120 + 10 = 130 seconds)
const DURATION_PARSER: DurationParser<'static> = DurationParser::builder()
.time_units(&[TimeUnit::Second, TimeUnit::MilliSecond, TimeUnit::Minute])
.parse_multiple(None)
.allow_time_unit_delimiter()
.disable_infinity()
.disable_fraction()
.disable_exponent()
.default_unit(TimeUnit::Second)
.build();
/// Custom deserializer for duration fields that accepts both numeric and string values
///
/// This deserializer handles the flexible duration parsing by accepting:
/// - Unsigned integers (interpreted as seconds)
/// - Signed integers (interpreted as seconds, must be non-negative)
/// - Strings (parsed using the fundu duration parser)
///
/// # Examples
///
/// - `1` -> 1 second
/// - `"30s"` -> 30 seconds
/// - `"2 m"` -> 2 minutes
/// - `"1500ms"` -> 15 seconds
fn deserialize_duration<'de, D>(deserializer: D) -> Result<Duration, D::Error>
where
D: Deserializer<'de>,
{
use serde::de::Visitor;
struct DurationVisitor;
impl<'de> Visitor<'de> for DurationVisitor {
type Value = Duration;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("a duration string or number")
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
DURATION_PARSER.parse(value)
.map_err(|e| {
serde::de::Error::custom(format!(
"Invalid duration format '{}': {}. Examples: '5' (5 seconds), '3500ms', '30s', '2m', '1.5h'",
value, e
))
})?
.try_into()
.map_err(|e| serde::de::Error::custom(format!("Duration conversion error: {}", e)))
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(Duration::from_secs(value))
}
fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
if value < 0 {
return Err(serde::de::Error::custom("Duration cannot be negative"));
}
Ok(Duration::from_secs(value as u64))
}
}
deserializer.deserialize_any(DurationVisitor)
}
/// Deserializes a value that may arrive as either a string or unsigned integer.
///
/// Figment's env provider infers types from raw values, so numeric-looking strings
/// like Discord client IDs get parsed as integers. This accepts both forms.
fn deserialize_string_or_uint<'de, D>(deserializer: D) -> Result<String, D::Error>
where
D: Deserializer<'de>,
{
use serde::de::Visitor;
struct StringOrUintVisitor;
impl<'de> Visitor<'de> for StringOrUintVisitor {
type Value = String;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("a string or unsigned integer")
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(value.to_owned())
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(value.to_string())
}
}
deserializer.deserialize_any(StringOrUintVisitor)
}
#[cfg(test)]
mod tests {
use super::*;
use serde::Deserialize;
#[derive(Deserialize)]
struct DurationWrapper {
#[serde(deserialize_with = "deserialize_duration")]
value: Duration,
}
fn parse(json: &str) -> Result<Duration, String> {
serde_json::from_str::<DurationWrapper>(json)
.map(|w| w.value)
.map_err(|e| e.to_string())
}
#[test]
fn test_duration_from_integer_seconds() {
let d = parse(r#"{"value": 30}"#).unwrap();
assert_eq!(d, Duration::from_secs(30));
}
#[test]
fn test_duration_from_string_seconds() {
let d = parse(r#"{"value": "30s"}"#).unwrap();
assert_eq!(d, Duration::from_secs(30));
}
#[test]
fn test_duration_from_string_minutes() {
let d = parse(r#"{"value": "2m"}"#).unwrap();
assert_eq!(d, Duration::from_secs(120));
}
#[test]
fn test_duration_from_string_milliseconds() {
let d = parse(r#"{"value": "1500ms"}"#).unwrap();
assert_eq!(d, Duration::from_millis(1500));
}
#[test]
fn test_duration_from_string_with_space() {
let d = parse(r#"{"value": "2 m"}"#).unwrap();
assert_eq!(d, Duration::from_secs(120));
}
#[test]
fn test_duration_from_string_multiple_units() {
let d = parse(r#"{"value": "1m 30s"}"#).unwrap();
assert_eq!(d, Duration::from_secs(90));
}
#[test]
fn test_duration_from_bare_number_string() {
let d = parse(r#"{"value": "45"}"#).unwrap();
assert_eq!(d, Duration::from_secs(45));
}
#[test]
fn test_duration_zero() {
let d = parse(r#"{"value": 0}"#).unwrap();
assert_eq!(d, Duration::from_secs(0));
}
#[test]
fn test_duration_negative_rejected() {
let err = parse(r#"{"value": -5}"#).unwrap_err();
assert!(err.contains("negative"), "expected negative error: {err}");
}
#[test]
fn test_duration_invalid_string_rejected() {
let err = parse(r#"{"value": "notaduration"}"#).unwrap_err();
assert!(
err.contains("Invalid duration"),
"expected invalid format error: {err}"
);
}
#[test]
fn test_default_config_values() {
assert_eq!(default_port(), 8080);
assert_eq!(default_shutdown_timeout(), Duration::from_secs(8));
assert_eq!(default_log_level(), "info");
}
#[test]
fn test_default_rate_limiting() {
let rl = default_rate_limiting();
assert_eq!(rl.session_rpm, 6);
assert_eq!(rl.search_rpm, 30);
assert_eq!(rl.metadata_rpm, 20);
assert_eq!(rl.reset_rpm, 10);
assert_eq!(rl.burst_allowance, 3);
}
}
+764
View File
@@ -0,0 +1,764 @@
//! Batch database operations for improved performance.
use crate::banner::Course;
use crate::data::models::{DbMeetingTime, UpsertCounts};
use crate::data::names::{decode_html_entities, parse_banner_name};
use crate::error::Result;
use sqlx::PgConnection;
use sqlx::PgPool;
use std::collections::{HashMap, HashSet};
use std::time::Instant;
use tracing::info;
/// Convert a Banner API course's meeting times to the DB JSONB shape.
fn to_db_meeting_times(course: &Course) -> serde_json::Value {
let meetings: Vec<DbMeetingTime> = course
.meetings_faculty
.iter()
.map(|mf| {
let mt = &mf.meeting_time;
DbMeetingTime {
begin_time: mt.begin_time.clone(),
end_time: mt.end_time.clone(),
start_date: mt.start_date.clone(),
end_date: mt.end_date.clone(),
monday: mt.monday,
tuesday: mt.tuesday,
wednesday: mt.wednesday,
thursday: mt.thursday,
friday: mt.friday,
saturday: mt.saturday,
sunday: mt.sunday,
building: mt.building.clone(),
building_description: mt.building_description.clone(),
room: mt.room.clone(),
campus: mt.campus.clone(),
meeting_type: mt.meeting_type.clone(),
meeting_schedule_type: mt.meeting_schedule_type.clone(),
}
})
.collect();
serde_json::to_value(meetings).unwrap_or_default()
}
/// Convert a Banner API course's section attributes to a JSONB array of code strings.
fn to_db_attributes(course: &Course) -> serde_json::Value {
let codes: Vec<&str> = course
.section_attributes
.iter()
.map(|a| a.code.as_str())
.collect();
serde_json::to_value(codes).unwrap_or_default()
}
/// Extract the campus code from the first meeting time (Banner doesn't put it on the course directly).
fn extract_campus_code(course: &Course) -> Option<String> {
course
.meetings_faculty
.first()
.and_then(|mf| mf.meeting_time.campus.clone())
}
// ---------------------------------------------------------------------------
// Task 1: UpsertDiffRow — captures pre- and post-upsert state for diffing
// ---------------------------------------------------------------------------
/// Row returned by the CTE-based upsert query, carrying both old and new values
/// for every auditable field. `old_id` is `None` for fresh inserts.
#[derive(sqlx::FromRow, Debug)]
struct UpsertDiffRow {
id: i32,
old_id: Option<i32>,
crn: String,
term_code: String,
// enrollment fields
old_enrollment: Option<i32>,
new_enrollment: i32,
old_max_enrollment: Option<i32>,
new_max_enrollment: i32,
old_wait_count: Option<i32>,
new_wait_count: i32,
old_wait_capacity: Option<i32>,
new_wait_capacity: i32,
// text fields (non-nullable in DB)
old_subject: Option<String>,
new_subject: String,
old_course_number: Option<String>,
new_course_number: String,
old_title: Option<String>,
new_title: String,
// nullable text fields
old_sequence_number: Option<String>,
new_sequence_number: Option<String>,
old_part_of_term: Option<String>,
new_part_of_term: Option<String>,
old_instructional_method: Option<String>,
new_instructional_method: Option<String>,
old_campus: Option<String>,
new_campus: Option<String>,
// nullable int fields
old_credit_hours: Option<i32>,
new_credit_hours: Option<i32>,
old_credit_hour_low: Option<i32>,
new_credit_hour_low: Option<i32>,
old_credit_hour_high: Option<i32>,
new_credit_hour_high: Option<i32>,
// cross-list fields
old_cross_list: Option<String>,
new_cross_list: Option<String>,
old_cross_list_capacity: Option<i32>,
new_cross_list_capacity: Option<i32>,
old_cross_list_count: Option<i32>,
new_cross_list_count: Option<i32>,
// link fields
old_link_identifier: Option<String>,
new_link_identifier: Option<String>,
old_is_section_linked: Option<bool>,
new_is_section_linked: Option<bool>,
// JSONB fields
old_meeting_times: Option<serde_json::Value>,
new_meeting_times: serde_json::Value,
old_attributes: Option<serde_json::Value>,
new_attributes: serde_json::Value,
}
// ---------------------------------------------------------------------------
// Task 3: Entry types and diff logic
// ---------------------------------------------------------------------------
struct AuditEntry {
course_id: i32,
field_changed: &'static str,
old_value: String,
new_value: String,
}
struct MetricEntry {
course_id: i32,
enrollment: i32,
wait_count: i32,
seats_available: i32,
}
/// Compare old vs new for a single field, pushing an `AuditEntry` when they differ.
///
/// Three variants:
/// - `diff_field!(audits, row, field_name, old_field, new_field)` — `Option<T>` old vs `T` new
/// - `diff_field!(opt audits, row, field_name, old_field, new_field)` — `Option<T>` old vs `Option<T>` new
/// - `diff_field!(json audits, row, field_name, old_field, new_field)` — `Option<Value>` old vs `Value` new
///
/// All variants skip when `old_id` is None (fresh insert).
macro_rules! diff_field {
// Standard: Option<T> old vs T new (non-nullable columns)
($audits:ident, $row:ident, $field:expr, $old:ident, $new:ident) => {
if $row.old_id.is_some() {
let old_str = $row
.$old
.as_ref()
.map(|v| v.to_string())
.unwrap_or_default();
let new_str = $row.$new.to_string();
if old_str != new_str {
$audits.push(AuditEntry {
course_id: $row.id,
field_changed: $field,
old_value: old_str,
new_value: new_str,
});
}
}
};
// Nullable: Option<T> old vs Option<T> new
(opt $audits:ident, $row:ident, $field:expr, $old:ident, $new:ident) => {
if $row.old_id.is_some() {
let old_str = $row
.$old
.as_ref()
.map(|v| v.to_string())
.unwrap_or_default();
let new_str = $row
.$new
.as_ref()
.map(|v| v.to_string())
.unwrap_or_default();
if old_str != new_str {
$audits.push(AuditEntry {
course_id: $row.id,
field_changed: $field,
old_value: old_str,
new_value: new_str,
});
}
}
};
// JSONB: Option<Value> old vs Value new
(json $audits:ident, $row:ident, $field:expr, $old:ident, $new:ident) => {
if $row.old_id.is_some() {
let old_val = $row
.$old
.as_ref()
.cloned()
.unwrap_or(serde_json::Value::Null);
let new_val = &$row.$new;
if old_val != *new_val {
$audits.push(AuditEntry {
course_id: $row.id,
field_changed: $field,
old_value: old_val.to_string(),
new_value: new_val.to_string(),
});
}
}
};
}
/// Compute audit entries (field-level diffs) and metric entries from upsert diff rows.
fn compute_diffs(rows: &[UpsertDiffRow]) -> (Vec<AuditEntry>, Vec<MetricEntry>) {
let mut audits = Vec::new();
let mut metrics = Vec::new();
for row in rows {
// Non-nullable fields
diff_field!(audits, row, "enrollment", old_enrollment, new_enrollment);
diff_field!(
audits,
row,
"max_enrollment",
old_max_enrollment,
new_max_enrollment
);
diff_field!(audits, row, "wait_count", old_wait_count, new_wait_count);
diff_field!(
audits,
row,
"wait_capacity",
old_wait_capacity,
new_wait_capacity
);
diff_field!(audits, row, "subject", old_subject, new_subject);
diff_field!(
audits,
row,
"course_number",
old_course_number,
new_course_number
);
diff_field!(audits, row, "title", old_title, new_title);
// Nullable text fields
diff_field!(opt audits, row, "sequence_number", old_sequence_number, new_sequence_number);
diff_field!(opt audits, row, "part_of_term", old_part_of_term, new_part_of_term);
diff_field!(opt audits, row, "instructional_method", old_instructional_method, new_instructional_method);
diff_field!(opt audits, row, "campus", old_campus, new_campus);
// Nullable int fields
diff_field!(opt audits, row, "credit_hours", old_credit_hours, new_credit_hours);
diff_field!(opt audits, row, "credit_hour_low", old_credit_hour_low, new_credit_hour_low);
diff_field!(opt audits, row, "credit_hour_high", old_credit_hour_high, new_credit_hour_high);
// Cross-list fields
diff_field!(opt audits, row, "cross_list", old_cross_list, new_cross_list);
diff_field!(opt audits, row, "cross_list_capacity", old_cross_list_capacity, new_cross_list_capacity);
diff_field!(opt audits, row, "cross_list_count", old_cross_list_count, new_cross_list_count);
// Link fields
diff_field!(opt audits, row, "link_identifier", old_link_identifier, new_link_identifier);
diff_field!(opt audits, row, "is_section_linked", old_is_section_linked, new_is_section_linked);
// JSONB fields
diff_field!(json audits, row, "meeting_times", old_meeting_times, new_meeting_times);
diff_field!(json audits, row, "attributes", old_attributes, new_attributes);
// Emit a metric entry on fresh insert (baseline) or when enrollment data changed
let is_new = row.old_id.is_none();
let enrollment_changed = row.old_id.is_some()
&& (row.old_enrollment != Some(row.new_enrollment)
|| row.old_wait_count != Some(row.new_wait_count)
|| row.old_max_enrollment != Some(row.new_max_enrollment));
if is_new || enrollment_changed {
metrics.push(MetricEntry {
course_id: row.id,
enrollment: row.new_enrollment,
wait_count: row.new_wait_count,
seats_available: row.new_max_enrollment - row.new_enrollment,
});
}
}
(audits, metrics)
}
// ---------------------------------------------------------------------------
// Task 4: Batch insert functions for audits and metrics
// ---------------------------------------------------------------------------
async fn insert_audits(audits: &[AuditEntry], conn: &mut PgConnection) -> Result<()> {
if audits.is_empty() {
return Ok(());
}
let course_ids: Vec<i32> = audits.iter().map(|a| a.course_id).collect();
let fields: Vec<&str> = audits.iter().map(|a| a.field_changed).collect();
let old_values: Vec<&str> = audits.iter().map(|a| a.old_value.as_str()).collect();
let new_values: Vec<&str> = audits.iter().map(|a| a.new_value.as_str()).collect();
sqlx::query(
r#"
INSERT INTO course_audits (course_id, timestamp, field_changed, old_value, new_value)
SELECT v.course_id, NOW(), v.field_changed, v.old_value, v.new_value
FROM UNNEST($1::int4[], $2::text[], $3::text[], $4::text[])
AS v(course_id, field_changed, old_value, new_value)
"#,
)
.bind(&course_ids)
.bind(&fields)
.bind(&old_values)
.bind(&new_values)
.execute(&mut *conn)
.await
.map_err(|e| anyhow::anyhow!("Failed to batch insert course_audits: {}", e))?;
Ok(())
}
async fn insert_metrics(metrics: &[MetricEntry], conn: &mut PgConnection) -> Result<()> {
if metrics.is_empty() {
return Ok(());
}
let course_ids: Vec<i32> = metrics.iter().map(|m| m.course_id).collect();
let enrollments: Vec<i32> = metrics.iter().map(|m| m.enrollment).collect();
let wait_counts: Vec<i32> = metrics.iter().map(|m| m.wait_count).collect();
let seats_available: Vec<i32> = metrics.iter().map(|m| m.seats_available).collect();
sqlx::query(
r#"
INSERT INTO course_metrics (course_id, timestamp, enrollment, wait_count, seats_available)
SELECT v.course_id, NOW(), v.enrollment, v.wait_count, v.seats_available
FROM UNNEST($1::int4[], $2::int4[], $3::int4[], $4::int4[])
AS v(course_id, enrollment, wait_count, seats_available)
"#,
)
.bind(&course_ids)
.bind(&enrollments)
.bind(&wait_counts)
.bind(&seats_available)
.execute(&mut *conn)
.await
.map_err(|e| anyhow::anyhow!("Failed to batch insert course_metrics: {}", e))?;
Ok(())
}
// ---------------------------------------------------------------------------
// Core upsert functions (updated to use &mut PgConnection)
// ---------------------------------------------------------------------------
/// Batch upsert courses in a single database query.
///
/// Performs a bulk INSERT...ON CONFLICT DO UPDATE for all courses, including
/// new fields (meeting times, attributes, instructor data). Captures pre-update
/// state for audit/metric tracking, all within a single transaction.
///
/// # Performance
/// - Reduces N database round-trips to 5 (old-data CTE + upsert, audits, metrics, instructors, junction)
/// - Typical usage: 50-200 courses per batch
pub async fn batch_upsert_courses(courses: &[Course], db_pool: &PgPool) -> Result<UpsertCounts> {
if courses.is_empty() {
info!("No courses to upsert, skipping batch operation");
return Ok(UpsertCounts::default());
}
let start = Instant::now();
let course_count = courses.len();
let mut tx = db_pool.begin().await?;
// Step 1: Upsert courses with CTE, returning diff rows
let diff_rows = upsert_courses(courses, &mut tx).await?;
// Step 2: Build (crn, term_code) → course_id map for instructor linking.
// RETURNING order from INSERT ... ON CONFLICT is not guaranteed to match
// the input array order, so we must key by (crn, term_code) rather than
// relying on positional correspondence.
let crn_term_to_id: HashMap<(&str, &str), i32> = diff_rows
.iter()
.map(|r| ((r.crn.as_str(), r.term_code.as_str()), r.id))
.collect();
// Step 3: Compute audit/metric diffs
let (audits, metrics) = compute_diffs(&diff_rows);
// Count courses that had at least one field change (existing rows only)
let changed_ids: HashSet<i32> = audits.iter().map(|a| a.course_id).collect();
let existing_count = diff_rows.iter().filter(|r| r.old_id.is_some()).count() as i32;
let courses_changed = changed_ids.len() as i32;
let counts = UpsertCounts {
courses_fetched: course_count as i32,
courses_changed,
courses_unchanged: existing_count - courses_changed,
audits_generated: audits.len() as i32,
metrics_generated: metrics.len() as i32,
};
// Step 4: Insert audits and metrics
insert_audits(&audits, &mut tx).await?;
insert_metrics(&metrics, &mut tx).await?;
// Step 5: Upsert instructors (returns email -> id map)
let email_to_id = upsert_instructors(courses, &mut tx).await?;
// Step 6: Link courses to instructors via junction table
upsert_course_instructors(courses, &crn_term_to_id, &email_to_id, &mut tx).await?;
tx.commit().await?;
let duration = start.elapsed();
info!(
courses_count = course_count,
courses_changed = counts.courses_changed,
courses_unchanged = counts.courses_unchanged,
audit_entries = counts.audits_generated,
metric_entries = counts.metrics_generated,
duration_ms = duration.as_millis(),
"Batch upserted courses with instructors, audits, and metrics"
);
Ok(counts)
}
// ---------------------------------------------------------------------------
// Task 2: CTE-based upsert returning old+new values
// ---------------------------------------------------------------------------
/// Upsert all courses and return diff rows with old and new values for auditing.
async fn upsert_courses(courses: &[Course], conn: &mut PgConnection) -> Result<Vec<UpsertDiffRow>> {
let crns: Vec<&str> = courses
.iter()
.map(|c| c.course_reference_number.as_str())
.collect();
let subjects: Vec<&str> = courses.iter().map(|c| c.subject.as_str()).collect();
let course_numbers: Vec<&str> = courses.iter().map(|c| c.course_number.as_str()).collect();
let titles: Vec<String> = courses
.iter()
.map(|c| decode_html_entities(&c.course_title))
.collect();
let term_codes: Vec<&str> = courses.iter().map(|c| c.term.as_str()).collect();
let enrollments: Vec<i32> = courses.iter().map(|c| c.enrollment).collect();
let max_enrollments: Vec<i32> = courses.iter().map(|c| c.maximum_enrollment).collect();
let wait_counts: Vec<i32> = courses.iter().map(|c| c.wait_count).collect();
let wait_capacities: Vec<i32> = courses.iter().map(|c| c.wait_capacity).collect();
// New scalar fields
let sequence_numbers: Vec<Option<&str>> = courses
.iter()
.map(|c| Some(c.sequence_number.as_str()))
.collect();
let parts_of_term: Vec<Option<&str>> = courses
.iter()
.map(|c| Some(c.part_of_term.as_str()))
.collect();
let instructional_methods: Vec<Option<&str>> = courses
.iter()
.map(|c| Some(c.instructional_method.as_str()))
.collect();
let campuses: Vec<Option<String>> = courses.iter().map(extract_campus_code).collect();
let credit_hours: Vec<Option<i32>> = courses.iter().map(|c| c.credit_hours).collect();
let credit_hour_lows: Vec<Option<i32>> = courses.iter().map(|c| c.credit_hour_low).collect();
let credit_hour_highs: Vec<Option<i32>> = courses.iter().map(|c| c.credit_hour_high).collect();
let cross_lists: Vec<Option<&str>> = courses.iter().map(|c| c.cross_list.as_deref()).collect();
let cross_list_capacities: Vec<Option<i32>> =
courses.iter().map(|c| c.cross_list_capacity).collect();
let cross_list_counts: Vec<Option<i32>> = courses.iter().map(|c| c.cross_list_count).collect();
let link_identifiers: Vec<Option<&str>> = courses
.iter()
.map(|c| c.link_identifier.as_deref())
.collect();
let is_section_linkeds: Vec<Option<bool>> =
courses.iter().map(|c| Some(c.is_section_linked)).collect();
// JSONB fields
let meeting_times_json: Vec<serde_json::Value> =
courses.iter().map(to_db_meeting_times).collect();
let attributes_json: Vec<serde_json::Value> = courses.iter().map(to_db_attributes).collect();
let rows = sqlx::query_as::<_, UpsertDiffRow>(
r#"
WITH old_data AS (
SELECT id, enrollment, max_enrollment, wait_count, wait_capacity,
subject, course_number, title,
sequence_number, part_of_term, instructional_method, campus,
credit_hours, credit_hour_low, credit_hour_high,
cross_list, cross_list_capacity, cross_list_count,
link_identifier, is_section_linked,
meeting_times, attributes,
crn, term_code
FROM courses
WHERE (crn, term_code) IN (SELECT * FROM UNNEST($1::text[], $5::text[]))
),
upserted AS (
INSERT INTO courses (
crn, subject, course_number, title, term_code,
enrollment, max_enrollment, wait_count, wait_capacity, last_scraped_at,
sequence_number, part_of_term, instructional_method, campus,
credit_hours, credit_hour_low, credit_hour_high,
cross_list, cross_list_capacity, cross_list_count,
link_identifier, is_section_linked,
meeting_times, attributes
)
SELECT
v.crn, v.subject, v.course_number, v.title, v.term_code,
v.enrollment, v.max_enrollment, v.wait_count, v.wait_capacity, NOW(),
v.sequence_number, v.part_of_term, v.instructional_method, v.campus,
v.credit_hours, v.credit_hour_low, v.credit_hour_high,
v.cross_list, v.cross_list_capacity, v.cross_list_count,
v.link_identifier, v.is_section_linked,
v.meeting_times, v.attributes
FROM UNNEST(
$1::text[], $2::text[], $3::text[], $4::text[], $5::text[],
$6::int4[], $7::int4[], $8::int4[], $9::int4[],
$10::text[], $11::text[], $12::text[], $13::text[],
$14::int4[], $15::int4[], $16::int4[],
$17::text[], $18::int4[], $19::int4[],
$20::text[], $21::bool[],
$22::jsonb[], $23::jsonb[]
) AS v(
crn, subject, course_number, title, term_code,
enrollment, max_enrollment, wait_count, wait_capacity,
sequence_number, part_of_term, instructional_method, campus,
credit_hours, credit_hour_low, credit_hour_high,
cross_list, cross_list_capacity, cross_list_count,
link_identifier, is_section_linked,
meeting_times, attributes
)
ON CONFLICT (crn, term_code)
DO UPDATE SET
subject = EXCLUDED.subject,
course_number = EXCLUDED.course_number,
title = EXCLUDED.title,
enrollment = EXCLUDED.enrollment,
max_enrollment = EXCLUDED.max_enrollment,
wait_count = EXCLUDED.wait_count,
wait_capacity = EXCLUDED.wait_capacity,
last_scraped_at = EXCLUDED.last_scraped_at,
sequence_number = EXCLUDED.sequence_number,
part_of_term = EXCLUDED.part_of_term,
instructional_method = EXCLUDED.instructional_method,
campus = EXCLUDED.campus,
credit_hours = EXCLUDED.credit_hours,
credit_hour_low = EXCLUDED.credit_hour_low,
credit_hour_high = EXCLUDED.credit_hour_high,
cross_list = EXCLUDED.cross_list,
cross_list_capacity = EXCLUDED.cross_list_capacity,
cross_list_count = EXCLUDED.cross_list_count,
link_identifier = EXCLUDED.link_identifier,
is_section_linked = EXCLUDED.is_section_linked,
meeting_times = EXCLUDED.meeting_times,
attributes = EXCLUDED.attributes
RETURNING *
)
SELECT u.id,
o.id AS old_id,
u.crn, u.term_code,
o.enrollment AS old_enrollment, u.enrollment AS new_enrollment,
o.max_enrollment AS old_max_enrollment, u.max_enrollment AS new_max_enrollment,
o.wait_count AS old_wait_count, u.wait_count AS new_wait_count,
o.wait_capacity AS old_wait_capacity, u.wait_capacity AS new_wait_capacity,
o.subject AS old_subject, u.subject AS new_subject,
o.course_number AS old_course_number, u.course_number AS new_course_number,
o.title AS old_title, u.title AS new_title,
o.sequence_number AS old_sequence_number, u.sequence_number AS new_sequence_number,
o.part_of_term AS old_part_of_term, u.part_of_term AS new_part_of_term,
o.instructional_method AS old_instructional_method, u.instructional_method AS new_instructional_method,
o.campus AS old_campus, u.campus AS new_campus,
o.credit_hours AS old_credit_hours, u.credit_hours AS new_credit_hours,
o.credit_hour_low AS old_credit_hour_low, u.credit_hour_low AS new_credit_hour_low,
o.credit_hour_high AS old_credit_hour_high, u.credit_hour_high AS new_credit_hour_high,
o.cross_list AS old_cross_list, u.cross_list AS new_cross_list,
o.cross_list_capacity AS old_cross_list_capacity, u.cross_list_capacity AS new_cross_list_capacity,
o.cross_list_count AS old_cross_list_count, u.cross_list_count AS new_cross_list_count,
o.link_identifier AS old_link_identifier, u.link_identifier AS new_link_identifier,
o.is_section_linked AS old_is_section_linked, u.is_section_linked AS new_is_section_linked,
o.meeting_times AS old_meeting_times, u.meeting_times AS new_meeting_times,
o.attributes AS old_attributes, u.attributes AS new_attributes
FROM upserted u
LEFT JOIN old_data o ON u.crn = o.crn AND u.term_code = o.term_code
"#,
)
.bind(&crns)
.bind(&subjects)
.bind(&course_numbers)
.bind(&titles)
.bind(&term_codes)
.bind(&enrollments)
.bind(&max_enrollments)
.bind(&wait_counts)
.bind(&wait_capacities)
.bind(&sequence_numbers)
.bind(&parts_of_term)
.bind(&instructional_methods)
.bind(&campuses)
.bind(&credit_hours)
.bind(&credit_hour_lows)
.bind(&credit_hour_highs)
.bind(&cross_lists)
.bind(&cross_list_capacities)
.bind(&cross_list_counts)
.bind(&link_identifiers)
.bind(&is_section_linkeds)
.bind(&meeting_times_json)
.bind(&attributes_json)
.fetch_all(&mut *conn)
.await
.map_err(|e| anyhow::anyhow!("Failed to batch upsert courses: {}", e))?;
Ok(rows)
}
/// Deduplicate and upsert all instructors from the batch by email.
/// Returns a map of lowercased_email -> instructor id for junction linking.
async fn upsert_instructors(
courses: &[Course],
conn: &mut PgConnection,
) -> Result<HashMap<String, i32>> {
let mut seen = HashSet::new();
let mut display_names: Vec<String> = Vec::new();
let mut first_names: Vec<Option<String>> = Vec::new();
let mut last_names: Vec<Option<String>> = Vec::new();
let mut emails_lower: Vec<String> = Vec::new();
let mut skipped_no_email = 0u32;
for course in courses {
for faculty in &course.faculty {
if let Some(email) = &faculty.email_address {
let email_lower = email.to_lowercase();
if seen.insert(email_lower.clone()) {
let parts = parse_banner_name(&faculty.display_name);
display_names.push(decode_html_entities(&faculty.display_name));
first_names.push(parts.as_ref().map(|p| p.first.clone()));
last_names.push(parts.as_ref().map(|p| p.last.clone()));
emails_lower.push(email_lower);
}
} else {
skipped_no_email += 1;
}
}
}
if skipped_no_email > 0 {
tracing::warn!(
count = skipped_no_email,
"Skipped instructors with no email address"
);
}
if display_names.is_empty() {
return Ok(HashMap::new());
}
let email_refs: Vec<&str> = emails_lower.iter().map(|s| s.as_str()).collect();
let first_name_refs: Vec<Option<&str>> = first_names.iter().map(|s| s.as_deref()).collect();
let last_name_refs: Vec<Option<&str>> = last_names.iter().map(|s| s.as_deref()).collect();
let rows: Vec<(i32, String)> = sqlx::query_as(
r#"
INSERT INTO instructors (display_name, email, first_name, last_name)
SELECT * FROM UNNEST($1::text[], $2::text[], $3::text[], $4::text[])
ON CONFLICT (email)
DO UPDATE SET
display_name = EXCLUDED.display_name,
first_name = EXCLUDED.first_name,
last_name = EXCLUDED.last_name
RETURNING id, email
"#,
)
.bind(&display_names)
.bind(&email_refs)
.bind(&first_name_refs)
.bind(&last_name_refs)
.fetch_all(&mut *conn)
.await
.map_err(|e| anyhow::anyhow!("Failed to batch upsert instructors: {}", e))?;
Ok(rows.into_iter().map(|(id, email)| (email, id)).collect())
}
/// Link courses to their instructors via the junction table.
async fn upsert_course_instructors(
courses: &[Course],
crn_term_to_id: &HashMap<(&str, &str), i32>,
email_to_id: &HashMap<String, i32>,
conn: &mut PgConnection,
) -> Result<()> {
let mut cids = Vec::new();
let mut instructor_ids: Vec<i32> = Vec::new();
let mut banner_ids: Vec<&str> = Vec::new();
let mut primaries = Vec::new();
for course in courses {
let key = (
course.course_reference_number.as_str(),
course.term.as_str(),
);
let Some(&course_id) = crn_term_to_id.get(&key) else {
tracing::warn!(
crn = %course.course_reference_number,
term = %course.term,
"No course_id found for CRN/term pair during instructor linking"
);
continue;
};
for faculty in &course.faculty {
if let Some(email) = &faculty.email_address {
let email_lower = email.to_lowercase();
if let Some(&instructor_id) = email_to_id.get(&email_lower) {
cids.push(course_id);
instructor_ids.push(instructor_id);
banner_ids.push(faculty.banner_id.as_str());
primaries.push(faculty.primary_indicator);
}
}
}
}
if cids.is_empty() {
return Ok(());
}
// Delete existing links for these courses then re-insert.
// This handles instructor changes cleanly.
sqlx::query("DELETE FROM course_instructors WHERE course_id = ANY($1)")
.bind(&cids)
.execute(&mut *conn)
.await?;
sqlx::query(
r#"
INSERT INTO course_instructors (course_id, instructor_id, banner_id, is_primary)
SELECT * FROM UNNEST($1::int4[], $2::int4[], $3::text[], $4::bool[])
ON CONFLICT (course_id, instructor_id)
DO UPDATE SET
banner_id = EXCLUDED.banner_id,
is_primary = EXCLUDED.is_primary
"#,
)
.bind(&cids)
.bind(&instructor_ids)
.bind(&banner_ids)
.bind(&primaries)
.execute(&mut *conn)
.await
.map_err(|e| anyhow::anyhow!("Failed to batch upsert course_instructors: {}", e))?;
Ok(())
}
+249
View File
@@ -0,0 +1,249 @@
//! Database query functions for courses, used by the web API.
use crate::data::models::{Course, CourseInstructorDetail};
use crate::error::Result;
use sqlx::PgPool;
use std::collections::HashMap;
/// Column to sort search results by.
#[derive(Debug, Clone, Copy, serde::Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SortColumn {
CourseCode,
Title,
Instructor,
Time,
Seats,
}
/// Sort direction.
#[derive(Debug, Clone, Copy, serde::Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SortDirection {
Asc,
Desc,
}
/// Shared WHERE clause for course search filters.
///
/// Parameters $1-$8 match the bind order in `search_courses`.
const SEARCH_WHERE: &str = r#"
WHERE term_code = $1
AND ($2::text[] IS NULL OR subject = ANY($2))
AND ($3::text IS NULL OR title_search @@ plainto_tsquery('simple', $3) OR title ILIKE '%' || $3 || '%')
AND ($4::int IS NULL OR course_number::int >= $4)
AND ($5::int IS NULL OR course_number::int <= $5)
AND ($6::bool = false OR max_enrollment > enrollment)
AND ($7::text IS NULL OR instructional_method = $7)
AND ($8::text IS NULL OR campus = $8)
"#;
/// Build a safe ORDER BY clause from typed sort parameters.
///
/// All column names are hardcoded string literals — no caller input is interpolated.
fn sort_clause(column: Option<SortColumn>, direction: Option<SortDirection>) -> String {
let dir = match direction.unwrap_or(SortDirection::Asc) {
SortDirection::Asc => "ASC",
SortDirection::Desc => "DESC",
};
match column {
Some(SortColumn::CourseCode) => {
format!("subject {dir}, course_number {dir}, sequence_number {dir}")
}
Some(SortColumn::Title) => format!("title {dir}"),
Some(SortColumn::Instructor) => {
format!(
"(SELECT i.display_name FROM course_instructors ci \
JOIN instructors i ON i.id = ci.instructor_id \
WHERE ci.course_id = courses.id AND ci.is_primary = true \
LIMIT 1) {dir} NULLS LAST"
)
}
Some(SortColumn::Time) => {
format!("(meeting_times->0->>'begin_time') {dir} NULLS LAST")
}
Some(SortColumn::Seats) => {
format!("(max_enrollment - enrollment) {dir}")
}
None => "subject ASC, course_number ASC, sequence_number ASC".to_string(),
}
}
/// Search courses by term with optional filters.
///
/// Returns `(courses, total_count)` for pagination. Uses FTS tsvector for word
/// search and falls back to trigram ILIKE for substring matching.
#[allow(clippy::too_many_arguments)]
pub async fn search_courses(
db_pool: &PgPool,
term_code: &str,
subject: Option<&[String]>,
title_query: Option<&str>,
course_number_low: Option<i32>,
course_number_high: Option<i32>,
open_only: bool,
instructional_method: Option<&str>,
campus: Option<&str>,
limit: i32,
offset: i32,
sort_by: Option<SortColumn>,
sort_dir: Option<SortDirection>,
) -> Result<(Vec<Course>, i64)> {
let order_by = sort_clause(sort_by, sort_dir);
let data_query =
format!("SELECT * FROM courses {SEARCH_WHERE} ORDER BY {order_by} LIMIT $9 OFFSET $10");
let count_query = format!("SELECT COUNT(*) FROM courses {SEARCH_WHERE}");
let courses = sqlx::query_as::<_, Course>(&data_query)
.bind(term_code)
.bind(subject)
.bind(title_query)
.bind(course_number_low)
.bind(course_number_high)
.bind(open_only)
.bind(instructional_method)
.bind(campus)
.bind(limit)
.bind(offset)
.fetch_all(db_pool)
.await?;
let total: (i64,) = sqlx::query_as(&count_query)
.bind(term_code)
.bind(subject)
.bind(title_query)
.bind(course_number_low)
.bind(course_number_high)
.bind(open_only)
.bind(instructional_method)
.bind(campus)
.fetch_one(db_pool)
.await?;
Ok((courses, total.0))
}
/// Get a single course by CRN and term.
pub async fn get_course_by_crn(
db_pool: &PgPool,
crn: &str,
term_code: &str,
) -> Result<Option<Course>> {
let course =
sqlx::query_as::<_, Course>("SELECT * FROM courses WHERE crn = $1 AND term_code = $2")
.bind(crn)
.bind(term_code)
.fetch_optional(db_pool)
.await?;
Ok(course)
}
/// Get instructors for a single course by course ID.
pub async fn get_course_instructors(
db_pool: &PgPool,
course_id: i32,
) -> Result<Vec<CourseInstructorDetail>> {
let rows = sqlx::query_as::<_, CourseInstructorDetail>(
r#"
SELECT i.id as instructor_id, ci.banner_id, i.display_name, i.email, ci.is_primary,
rmp.avg_rating, rmp.num_ratings, rmp.rmp_legacy_id,
ci.course_id
FROM course_instructors ci
JOIN instructors i ON i.id = ci.instructor_id
LEFT JOIN LATERAL (
SELECT rp.avg_rating, rp.num_ratings, rp.legacy_id as rmp_legacy_id
FROM instructor_rmp_links irl
JOIN rmp_professors rp ON rp.legacy_id = irl.rmp_legacy_id
WHERE irl.instructor_id = i.id
ORDER BY rp.num_ratings DESC NULLS LAST, rp.legacy_id ASC
LIMIT 1
) rmp ON true
WHERE ci.course_id = $1
ORDER BY ci.is_primary DESC, i.display_name
"#,
)
.bind(course_id)
.fetch_all(db_pool)
.await?;
Ok(rows)
}
/// Batch-fetch instructors for multiple courses in a single query.
///
/// Returns a map of `course_id → Vec<CourseInstructorDetail>`.
pub async fn get_instructors_for_courses(
db_pool: &PgPool,
course_ids: &[i32],
) -> Result<HashMap<i32, Vec<CourseInstructorDetail>>> {
if course_ids.is_empty() {
return Ok(HashMap::new());
}
let rows = sqlx::query_as::<_, CourseInstructorDetail>(
r#"
SELECT i.id as instructor_id, ci.banner_id, i.display_name, i.email, ci.is_primary,
rmp.avg_rating, rmp.num_ratings, rmp.rmp_legacy_id,
ci.course_id
FROM course_instructors ci
JOIN instructors i ON i.id = ci.instructor_id
LEFT JOIN LATERAL (
SELECT rp.avg_rating, rp.num_ratings, rp.legacy_id as rmp_legacy_id
FROM instructor_rmp_links irl
JOIN rmp_professors rp ON rp.legacy_id = irl.rmp_legacy_id
WHERE irl.instructor_id = i.id
ORDER BY rp.num_ratings DESC NULLS LAST, rp.legacy_id ASC
LIMIT 1
) rmp ON true
WHERE ci.course_id = ANY($1)
ORDER BY ci.course_id, ci.is_primary DESC, i.display_name
"#,
)
.bind(course_ids)
.fetch_all(db_pool)
.await?;
let mut map: HashMap<i32, Vec<CourseInstructorDetail>> = HashMap::new();
for row in rows {
// course_id is always present in the batch query
let cid = row.course_id.unwrap_or_default();
map.entry(cid).or_default().push(row);
}
Ok(map)
}
/// Get subjects for a term, sorted by total enrollment (descending).
///
/// Returns only subjects that have courses in the given term, with their
/// descriptions from reference_data and enrollment totals for ranking.
pub async fn get_subjects_by_enrollment(
db_pool: &PgPool,
term_code: &str,
) -> Result<Vec<(String, String, i64)>> {
let rows: Vec<(String, String, i64)> = sqlx::query_as(
r#"
SELECT c.subject,
COALESCE(rd.description, c.subject),
COALESCE(SUM(c.enrollment), 0) as total_enrollment
FROM courses c
LEFT JOIN reference_data rd ON rd.category = 'subject' AND rd.code = c.subject
WHERE c.term_code = $1
GROUP BY c.subject, rd.description
ORDER BY total_enrollment DESC
"#,
)
.bind(term_code)
.fetch_all(db_pool)
.await?;
Ok(rows)
}
/// Get all distinct term codes that have courses in the DB.
pub async fn get_available_terms(db_pool: &PgPool) -> Result<Vec<String>> {
let rows: Vec<(String,)> =
sqlx::query_as("SELECT DISTINCT term_code FROM courses ORDER BY term_code DESC")
.fetch_all(db_pool)
.await?;
Ok(rows.into_iter().map(|(tc,)| tc).collect())
}
+12
View File
@@ -0,0 +1,12 @@
//! Database models and schema.
pub mod batch;
pub mod courses;
pub mod models;
pub mod names;
pub mod reference;
pub mod rmp;
pub mod rmp_matching;
pub mod scrape_jobs;
pub mod sessions;
pub mod users;
+273
View File
@@ -0,0 +1,273 @@
//! `sqlx` models for the database schema.
use chrono::{DateTime, Utc};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use serde_json::Value;
use ts_rs::TS;
/// Serialize an `i64` as a string to avoid JavaScript precision loss for values exceeding 2^53.
fn serialize_i64_as_string<S: Serializer>(value: &i64, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_str(&value.to_string())
}
/// Deserialize an `i64` from either a number or a string.
fn deserialize_i64_from_string<'de, D: Deserializer<'de>>(
deserializer: D,
) -> Result<i64, D::Error> {
use serde::de;
struct I64OrStringVisitor;
impl<'de> de::Visitor<'de> for I64OrStringVisitor {
type Value = i64;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("an integer or a string containing an integer")
}
fn visit_i64<E: de::Error>(self, value: i64) -> Result<i64, E> {
Ok(value)
}
fn visit_u64<E: de::Error>(self, value: u64) -> Result<i64, E> {
i64::try_from(value).map_err(|_| E::custom(format!("u64 {value} out of i64 range")))
}
fn visit_str<E: de::Error>(self, value: &str) -> Result<i64, E> {
value.parse().map_err(de::Error::custom)
}
}
deserializer.deserialize_any(I64OrStringVisitor)
}
/// Represents a meeting time stored as JSONB in the courses table.
#[derive(Debug, Clone, Serialize, Deserialize, TS)]
#[ts(export)]
pub struct DbMeetingTime {
pub begin_time: Option<String>,
pub end_time: Option<String>,
pub start_date: String,
pub end_date: String,
pub monday: bool,
pub tuesday: bool,
pub wednesday: bool,
pub thursday: bool,
pub friday: bool,
pub saturday: bool,
pub sunday: bool,
pub building: Option<String>,
pub building_description: Option<String>,
pub room: Option<String>,
pub campus: Option<String>,
pub meeting_type: String,
pub meeting_schedule_type: String,
}
#[allow(dead_code)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct Course {
pub id: i32,
pub crn: String,
pub subject: String,
pub course_number: String,
pub title: String,
pub term_code: String,
pub enrollment: i32,
pub max_enrollment: i32,
pub wait_count: i32,
pub wait_capacity: i32,
pub last_scraped_at: DateTime<Utc>,
// New scalar fields
pub sequence_number: Option<String>,
pub part_of_term: Option<String>,
pub instructional_method: Option<String>,
pub campus: Option<String>,
pub credit_hours: Option<i32>,
pub credit_hour_low: Option<i32>,
pub credit_hour_high: Option<i32>,
pub cross_list: Option<String>,
pub cross_list_capacity: Option<i32>,
pub cross_list_count: Option<i32>,
pub link_identifier: Option<String>,
pub is_section_linked: Option<bool>,
// JSONB fields
pub meeting_times: Value,
pub attributes: Value,
}
#[allow(dead_code)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct Instructor {
pub id: i32,
pub display_name: String,
pub email: String,
pub rmp_match_status: String,
pub first_name: Option<String>,
pub last_name: Option<String>,
}
#[allow(dead_code)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct CourseInstructor {
pub course_id: i32,
pub instructor_id: i32,
pub banner_id: String,
pub is_primary: bool,
}
/// Joined instructor data for a course (from course_instructors + instructors + rmp_professors).
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct CourseInstructorDetail {
pub instructor_id: i32,
pub banner_id: String,
pub display_name: String,
pub email: String,
pub is_primary: bool,
pub avg_rating: Option<f32>,
pub num_ratings: Option<i32>,
pub rmp_legacy_id: Option<i32>,
/// Present when fetched via batch query; `None` for single-course queries.
pub course_id: Option<i32>,
}
#[allow(dead_code)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct ReferenceData {
pub category: String,
pub code: String,
pub description: String,
}
#[allow(dead_code)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct CourseMetric {
pub id: i32,
pub course_id: i32,
pub timestamp: DateTime<Utc>,
pub enrollment: i32,
pub wait_count: i32,
pub seats_available: i32,
}
#[allow(dead_code)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct CourseAudit {
pub id: i32,
pub course_id: i32,
pub timestamp: DateTime<Utc>,
pub field_changed: String,
pub old_value: String,
pub new_value: String,
}
/// Aggregate counts returned by batch upsert, used for scrape job result logging.
#[derive(Debug, Clone, Default)]
pub struct UpsertCounts {
pub courses_fetched: i32,
pub courses_changed: i32,
pub courses_unchanged: i32,
pub audits_generated: i32,
pub metrics_generated: i32,
}
/// The priority level of a scrape job.
#[derive(sqlx::Type, Copy, Debug, Clone)]
#[sqlx(type_name = "scrape_priority", rename_all = "PascalCase")]
pub enum ScrapePriority {
Low,
Medium,
High,
Critical,
}
/// The type of target for a scrape job, determining how the payload is interpreted.
#[derive(sqlx::Type, Copy, Debug, Clone)]
#[sqlx(type_name = "target_type", rename_all = "PascalCase")]
pub enum TargetType {
Subject,
CourseRange,
CrnList,
SingleCrn,
}
/// Computed status for a scrape job, derived from existing fields.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
#[serde(rename_all = "camelCase")]
pub enum ScrapeJobStatus {
Processing,
StaleLock,
Exhausted,
Scheduled,
Pending,
}
/// How long a lock can be held before it is considered stale (mirrors `scrape_jobs::LOCK_EXPIRY`).
const LOCK_EXPIRY_SECS: i64 = 10 * 60;
/// Represents a queryable job from the database.
#[allow(dead_code)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct ScrapeJob {
pub id: i32,
pub target_type: TargetType,
pub target_payload: Value,
pub priority: ScrapePriority,
pub execute_at: DateTime<Utc>,
pub created_at: DateTime<Utc>,
pub locked_at: Option<DateTime<Utc>>,
/// Number of retry attempts for this job (non-negative, enforced by CHECK constraint)
pub retry_count: i32,
/// Maximum number of retry attempts allowed (non-negative, enforced by CHECK constraint)
pub max_retries: i32,
/// When the job last entered the "ready to pick up" state.
/// Set to NOW() on creation; updated to NOW() on retry.
pub queued_at: DateTime<Utc>,
}
impl ScrapeJob {
/// Compute the current status of this job from its fields.
pub fn status(&self) -> ScrapeJobStatus {
let now = Utc::now();
match self.locked_at {
Some(locked) if (now - locked).num_seconds() < LOCK_EXPIRY_SECS => {
ScrapeJobStatus::Processing
}
Some(_) => ScrapeJobStatus::StaleLock,
None if self.retry_count >= self.max_retries && self.max_retries > 0 => {
ScrapeJobStatus::Exhausted
}
None if self.execute_at > now => ScrapeJobStatus::Scheduled,
None => ScrapeJobStatus::Pending,
}
}
}
/// A user authenticated via Discord OAuth.
#[derive(sqlx::FromRow, Debug, Clone, Serialize, Deserialize, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export)]
pub struct User {
#[serde(
serialize_with = "serialize_i64_as_string",
deserialize_with = "deserialize_i64_from_string"
)]
#[ts(type = "string")]
pub discord_id: i64,
pub discord_username: String,
pub discord_avatar_hash: Option<String>,
pub is_admin: bool,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
/// A server-side session for an authenticated user.
#[allow(dead_code)] // Fields read via sqlx::FromRow; some only used in DB queries
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct UserSession {
pub id: String,
pub user_id: i64,
pub created_at: DateTime<Utc>,
pub expires_at: DateTime<Utc>,
pub last_active_at: DateTime<Utc>,
}
+728
View File
@@ -0,0 +1,728 @@
//! Name parsing, normalization, and matching utilities.
//!
//! Handles the mismatch between Banner's single `display_name` ("Last, First Middle")
//! and RMP's separate `first_name`/`last_name` fields, plus data quality issues
//! from both sources (HTML entities, accents, nicknames, suffixes, junk).
use sqlx::PgPool;
use tracing::{info, warn};
use unicode_normalization::UnicodeNormalization;
/// Known name suffixes to extract from the last-name portion.
const SUFFIXES: &[&str] = &["iv", "iii", "ii", "jr", "sr"];
/// Parsed, cleaned name components.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct NameParts {
/// Cleaned display-quality first name(s): "H. Paul", "María"
pub first: String,
/// Cleaned display-quality last name: "O'Brien", "LeBlanc"
pub last: String,
/// Middle name/initial if detected: "Manuel", "L."
pub middle: Option<String>,
/// Suffix if detected: "III", "Jr"
pub suffix: Option<String>,
/// Nicknames extracted from parentheses: ["Ken"], ["Qian"]
pub nicknames: Vec<String>,
}
/// Decode common HTML entities found in Banner data.
///
/// Handles both named entities (`&amp;`, `&uuml;`) and numeric references
/// (`&#39;`, `&#x27;`).
pub(crate) fn decode_html_entities(s: &str) -> String {
if !s.contains('&') {
return s.to_string();
}
htmlize::unescape(s).to_string()
}
/// Extract parenthesized nicknames from a name string.
///
/// `"William (Ken)"` → `("William", vec!["Ken"])`
/// `"Guenevere (Qian)"` → `("Guenevere", vec!["Qian"])`
/// `"John (jack) C."` → `("John C.", vec!["jack"])`
fn extract_nicknames(s: &str) -> (String, Vec<String>) {
let mut nicknames = Vec::new();
let mut cleaned = String::with_capacity(s.len());
let mut chars = s.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '(' {
let mut nick = String::new();
for inner in chars.by_ref() {
if inner == ')' {
break;
}
nick.push(inner);
}
let nick = nick.trim().to_string();
if !nick.is_empty() {
nicknames.push(nick);
}
} else if ch == '"' || ch == '\u{201C}' || ch == '\u{201D}' {
// Extract quoted nicknames: Thomas "Butch" → nickname "Butch"
let mut nick = String::new();
for inner in chars.by_ref() {
if inner == '"' || inner == '\u{201C}' || inner == '\u{201D}' {
break;
}
nick.push(inner);
}
let nick = nick.trim().to_string();
if !nick.is_empty() {
nicknames.push(nick);
}
} else {
cleaned.push(ch);
}
}
// Collapse multiple spaces left by extraction
let cleaned = collapse_whitespace(&cleaned);
(cleaned, nicknames)
}
/// Extract a suffix (Jr, Sr, II, III, IV) from the last-name portion.
///
/// `"LeBlanc III"` → `("LeBlanc", Some("III"))`
/// `"Smith Jr."` → `("Smith", Some("Jr."))`
fn extract_suffix(last: &str) -> (String, Option<String>) {
// Try to match the last token as a suffix
let tokens: Vec<&str> = last.split_whitespace().collect();
if tokens.len() < 2 {
return (last.to_string(), None);
}
let candidate = tokens.last().unwrap();
let candidate_normalized = candidate.to_lowercase().trim_end_matches('.').to_string();
if SUFFIXES.contains(&candidate_normalized.as_str()) {
let name_part = tokens[..tokens.len() - 1].join(" ");
return (name_part, Some(candidate.to_string()));
}
(last.to_string(), None)
}
/// Strip junk commonly found in RMP name fields.
///
/// - Trailing commas: `"Cronenberger,"` → `"Cronenberger"`
/// - Email addresses: `"Neel.Baumgardner@utsa.edu"` → `""` (returns empty)
fn strip_junk(s: &str) -> String {
let s = s.trim();
// If the string looks like an email, return empty
if s.contains('@') && s.contains('.') && !s.contains(' ') {
return String::new();
}
// Strip trailing commas
s.trim_end_matches(',').trim().to_string()
}
/// Collapse runs of whitespace into single spaces and trim.
fn collapse_whitespace(s: &str) -> String {
s.split_whitespace().collect::<Vec<_>>().join(" ")
}
/// Parse a Banner `display_name` ("Last, First Middle") into structured parts.
///
/// Handles HTML entities, suffixes, and multi-token names.
///
/// # Examples
///
/// ```
/// use banner::data::names::parse_banner_name;
///
/// let parts = parse_banner_name("O&#39;Brien, Erin").unwrap();
/// assert_eq!(parts.first, "Erin");
/// assert_eq!(parts.last, "O'Brien");
/// ```
pub fn parse_banner_name(display_name: &str) -> Option<NameParts> {
// 1. Decode HTML entities
let decoded = decode_html_entities(display_name);
// 2. Split on first comma
let (last_part, first_part) = decoded.split_once(',')?;
let last_part = last_part.trim();
let first_part = first_part.trim();
if last_part.is_empty() || first_part.is_empty() {
return None;
}
// 3. Extract suffix from last name
let (last_clean, suffix) = extract_suffix(last_part);
// 4. Parse first-name portion: first token(s) + optional middle
// Banner format is "First Middle", so we keep all tokens as first_name
// to support "H. Paul" style names
let first_clean = collapse_whitespace(first_part);
Some(NameParts {
first: first_clean,
last: last_clean,
middle: None, // Banner doesn't clearly delineate middle vs first
suffix,
nicknames: Vec::new(), // Banner doesn't include nicknames
})
}
/// Parse RMP professor name fields into structured parts.
///
/// Handles junk data, nicknames in parentheses/quotes, and suffixes.
///
/// # Examples
///
/// ```
/// use banner::data::names::parse_rmp_name;
///
/// let parts = parse_rmp_name("William (Ken)", "Burchenal").unwrap();
/// assert_eq!(parts.first, "William");
/// assert_eq!(parts.nicknames, vec!["Ken"]);
/// ```
pub fn parse_rmp_name(first_name: &str, last_name: &str) -> Option<NameParts> {
let first_cleaned = strip_junk(first_name);
let last_cleaned = strip_junk(last_name);
if first_cleaned.is_empty() || last_cleaned.is_empty() {
return None;
}
// Extract nicknames from parens/quotes in first name
let (first_no_nicks, nicknames) = extract_nicknames(&first_cleaned);
let first_final = collapse_whitespace(&first_no_nicks);
// Extract suffix from last name
let (last_final, suffix) = extract_suffix(&last_cleaned);
if first_final.is_empty() || last_final.is_empty() {
return None;
}
Some(NameParts {
first: first_final,
last: last_final,
middle: None,
suffix,
nicknames,
})
}
/// Normalize a name string for matching comparison.
///
/// Pipeline: lowercase → NFD decompose → strip combining marks →
/// strip punctuation/hyphens → collapse whitespace → trim.
///
/// # Examples
///
/// ```
/// use banner::data::names::normalize_for_matching;
///
/// assert_eq!(normalize_for_matching("García"), "garcia");
/// assert_eq!(normalize_for_matching("O'Brien"), "obrien");
/// assert_eq!(normalize_for_matching("Aguirre-Mesa"), "aguirremesa");
/// ```
/// Normalize a name string for matching index keys.
///
/// Pipeline: lowercase → NFD decompose → strip combining marks →
/// strip ALL punctuation, hyphens, and whitespace.
///
/// This produces a compact, space-free string so that "Aguirre Mesa" (Banner)
/// and "Aguirre-Mesa" (RMP) both become "aguirremesa".
///
/// # Examples
///
/// ```
/// use banner::data::names::normalize_for_matching;
///
/// assert_eq!(normalize_for_matching("García"), "garcia");
/// assert_eq!(normalize_for_matching("O'Brien"), "obrien");
/// assert_eq!(normalize_for_matching("Aguirre-Mesa"), "aguirremesa");
/// assert_eq!(normalize_for_matching("Aguirre Mesa"), "aguirremesa");
/// ```
pub fn normalize_for_matching(s: &str) -> String {
s.to_lowercase()
.nfd()
.filter(|c| {
// Keep only non-combining alphabetic characters — strip everything else
c.is_alphabetic() && !unicode_normalization::char::is_combining_mark(*c)
})
.collect()
}
/// Generate all matching index keys for a parsed name.
///
/// For a name like "H. Paul" / "LeBlanc" with no nicknames, generates:
/// - `("leblanc", "h paul")` — full normalized first
/// - `("leblanc", "paul")` — individual token (if multi-token)
/// - `("leblanc", "h")` — individual token (if multi-token)
///
/// For a name like "William" / "Burchenal" with nickname "Ken":
/// - `("burchenal", "william")` — primary
/// - `("burchenal", "ken")` — nickname variant
pub fn matching_keys(parts: &NameParts) -> Vec<(String, String)> {
let norm_last = normalize_for_matching(&parts.last);
if norm_last.is_empty() {
return Vec::new();
}
let mut keys = Vec::new();
let mut seen = std::collections::HashSet::new();
// Primary key: full first name (all spaces stripped)
let norm_first_full = normalize_for_matching(&parts.first);
if !norm_first_full.is_empty() && seen.insert(norm_first_full.clone()) {
keys.push((norm_last.clone(), norm_first_full));
}
// Individual tokens from the display-form first name
// (split before full normalization so we can generate per-token keys)
let first_tokens: Vec<&str> = parts.first.split_whitespace().collect();
if first_tokens.len() > 1 {
for token in &first_tokens {
let norm_token = normalize_for_matching(token);
if !norm_token.is_empty() && seen.insert(norm_token.clone()) {
keys.push((norm_last.clone(), norm_token));
}
}
}
// Nickname variants
for nick in &parts.nicknames {
let norm_nick = normalize_for_matching(nick);
if !norm_nick.is_empty() && seen.insert(norm_nick.clone()) {
keys.push((norm_last.clone(), norm_nick));
}
}
keys
}
/// Backfill `first_name`/`last_name` columns for all instructors that have
/// a `display_name` but NULL structured name fields.
///
/// Parses each `display_name` using [`parse_banner_name`] and updates the row.
/// Logs warnings for any names that fail to parse.
pub async fn backfill_instructor_names(db_pool: &PgPool) -> crate::error::Result<()> {
let rows: Vec<(i32, String)> = sqlx::query_as(
"SELECT id, display_name FROM instructors WHERE first_name IS NULL OR last_name IS NULL",
)
.fetch_all(db_pool)
.await?;
if rows.is_empty() {
return Ok(());
}
let total = rows.len();
let mut ids: Vec<i32> = Vec::with_capacity(total);
let mut firsts: Vec<String> = Vec::with_capacity(total);
let mut lasts: Vec<String> = Vec::with_capacity(total);
let mut unparseable = 0usize;
for (id, display_name) in &rows {
match parse_banner_name(display_name) {
Some(parts) => {
ids.push(*id);
firsts.push(parts.first);
lasts.push(parts.last);
}
None => {
warn!(
id,
display_name, "Failed to parse instructor display_name during backfill"
);
unparseable += 1;
}
}
}
if !ids.is_empty() {
let first_refs: Vec<&str> = firsts.iter().map(|s| s.as_str()).collect();
let last_refs: Vec<&str> = lasts.iter().map(|s| s.as_str()).collect();
sqlx::query(
r#"
UPDATE instructors i
SET first_name = v.first_name, last_name = v.last_name
FROM UNNEST($1::int4[], $2::text[], $3::text[])
AS v(id, first_name, last_name)
WHERE i.id = v.id
"#,
)
.bind(&ids)
.bind(&first_refs)
.bind(&last_refs)
.execute(db_pool)
.await?;
}
info!(
total,
updated = ids.len(),
unparseable,
"Instructor name backfill complete"
);
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
// -----------------------------------------------------------------------
// HTML entity decoding
// -----------------------------------------------------------------------
#[test]
fn decode_apostrophe_entity() {
assert_eq!(decode_html_entities("O&#39;Brien"), "O'Brien");
}
#[test]
fn decode_umlaut_entity() {
assert_eq!(decode_html_entities("B&uuml;lent"), "Bülent");
}
#[test]
fn decode_no_entities() {
assert_eq!(decode_html_entities("Smith"), "Smith");
}
// -----------------------------------------------------------------------
// Nickname extraction
// -----------------------------------------------------------------------
#[test]
fn extract_paren_nickname() {
let (cleaned, nicks) = extract_nicknames("William (Ken)");
assert_eq!(cleaned, "William");
assert_eq!(nicks, vec!["Ken"]);
}
#[test]
fn extract_quoted_nickname() {
let (cleaned, nicks) = extract_nicknames("Thomas \"Butch\"");
assert_eq!(cleaned, "Thomas");
assert_eq!(nicks, vec!["Butch"]);
}
#[test]
fn extract_paren_with_extra_text() {
let (cleaned, nicks) = extract_nicknames("John (jack) C.");
assert_eq!(cleaned, "John C.");
assert_eq!(nicks, vec!["jack"]);
}
#[test]
fn extract_no_nicknames() {
let (cleaned, nicks) = extract_nicknames("Maria Elena");
assert_eq!(cleaned, "Maria Elena");
assert!(nicks.is_empty());
}
// -----------------------------------------------------------------------
// Suffix extraction
// -----------------------------------------------------------------------
#[test]
fn extract_suffix_iii() {
let (name, suffix) = extract_suffix("LeBlanc III");
assert_eq!(name, "LeBlanc");
assert_eq!(suffix, Some("III".to_string()));
}
#[test]
fn extract_suffix_jr_period() {
let (name, suffix) = extract_suffix("Smith Jr.");
assert_eq!(name, "Smith");
assert_eq!(suffix, Some("Jr.".to_string()));
}
#[test]
fn extract_no_suffix() {
let (name, suffix) = extract_suffix("García");
assert_eq!(name, "García");
assert_eq!(suffix, None);
}
// -----------------------------------------------------------------------
// Junk stripping
// -----------------------------------------------------------------------
#[test]
fn strip_trailing_comma() {
assert_eq!(strip_junk("Cronenberger,"), "Cronenberger");
}
#[test]
fn strip_email_address() {
assert_eq!(strip_junk("Neel.Baumgardner@utsa.edu"), "");
}
#[test]
fn strip_clean_name() {
assert_eq!(strip_junk(" Maria "), "Maria");
}
// -----------------------------------------------------------------------
// normalize_for_matching
// -----------------------------------------------------------------------
#[test]
fn normalize_strips_accents() {
assert_eq!(normalize_for_matching("García"), "garcia");
}
#[test]
fn normalize_strips_apostrophe() {
assert_eq!(normalize_for_matching("O'Brien"), "obrien");
}
#[test]
fn normalize_strips_hyphen() {
assert_eq!(normalize_for_matching("Aguirre-Mesa"), "aguirremesa");
}
#[test]
fn normalize_tilde_n() {
assert_eq!(normalize_for_matching("Muñoz"), "munoz");
}
#[test]
fn normalize_umlaut() {
assert_eq!(normalize_for_matching("Müller"), "muller");
}
#[test]
fn normalize_period() {
assert_eq!(normalize_for_matching("H. Paul"), "hpaul");
}
#[test]
fn normalize_strips_spaces() {
assert_eq!(normalize_for_matching("Mary Lou"), "marylou");
}
// -----------------------------------------------------------------------
// parse_banner_name
// -----------------------------------------------------------------------
#[test]
fn banner_standard_name() {
let p = parse_banner_name("Smith, John").unwrap();
assert_eq!(p.first, "John");
assert_eq!(p.last, "Smith");
assert_eq!(p.suffix, None);
}
#[test]
fn banner_html_entity_apostrophe() {
let p = parse_banner_name("O&#39;Brien, Erin").unwrap();
assert_eq!(p.first, "Erin");
assert_eq!(p.last, "O'Brien");
}
#[test]
fn banner_html_entity_umlaut() {
let p = parse_banner_name("Temel, B&uuml;lent").unwrap();
assert_eq!(p.first, "Bülent");
assert_eq!(p.last, "Temel");
}
#[test]
fn banner_suffix_iii() {
let p = parse_banner_name("LeBlanc III, H. Paul").unwrap();
assert_eq!(p.first, "H. Paul");
assert_eq!(p.last, "LeBlanc");
assert_eq!(p.suffix, Some("III".to_string()));
}
#[test]
fn banner_suffix_ii() {
let p = parse_banner_name("Ellis II, Ronald").unwrap();
assert_eq!(p.first, "Ronald");
assert_eq!(p.last, "Ellis");
assert_eq!(p.suffix, Some("II".to_string()));
}
#[test]
fn banner_multi_word_last() {
let p = parse_banner_name("Aguirre Mesa, Andres").unwrap();
assert_eq!(p.first, "Andres");
assert_eq!(p.last, "Aguirre Mesa");
}
#[test]
fn banner_hyphenated_last() {
let p = parse_banner_name("Abu-Lail, Nehal").unwrap();
assert_eq!(p.first, "Nehal");
assert_eq!(p.last, "Abu-Lail");
}
#[test]
fn banner_with_middle_name() {
let p = parse_banner_name("Smith, John David").unwrap();
assert_eq!(p.first, "John David");
assert_eq!(p.last, "Smith");
}
#[test]
fn banner_no_comma() {
assert!(parse_banner_name("SingleName").is_none());
}
#[test]
fn banner_empty_first() {
assert!(parse_banner_name("Smith,").is_none());
}
#[test]
fn banner_empty_last() {
assert!(parse_banner_name(", John").is_none());
}
// -----------------------------------------------------------------------
// parse_rmp_name
// -----------------------------------------------------------------------
#[test]
fn rmp_standard_name() {
let p = parse_rmp_name("John", "Smith").unwrap();
assert_eq!(p.first, "John");
assert_eq!(p.last, "Smith");
}
#[test]
fn rmp_with_nickname() {
let p = parse_rmp_name("William (Ken)", "Burchenal").unwrap();
assert_eq!(p.first, "William");
assert_eq!(p.nicknames, vec!["Ken"]);
}
#[test]
fn rmp_trailing_comma_last() {
let p = parse_rmp_name("J.", "Cronenberger,").unwrap();
assert_eq!(p.last, "Cronenberger");
}
#[test]
fn rmp_email_in_first() {
assert!(parse_rmp_name("Neel.Baumgardner@utsa.edu", "Baumgardner").is_none());
}
#[test]
fn rmp_suffix_in_last() {
let p = parse_rmp_name("H. Paul", "LeBlanc III").unwrap();
assert_eq!(p.first, "H. Paul");
assert_eq!(p.last, "LeBlanc");
assert_eq!(p.suffix, Some("III".to_string()));
}
#[test]
fn rmp_quoted_nickname() {
let p = parse_rmp_name("Thomas \"Butch\"", "Matjeka").unwrap();
assert_eq!(p.first, "Thomas");
assert_eq!(p.nicknames, vec!["Butch"]);
}
#[test]
fn rmp_accented_last() {
let p = parse_rmp_name("Liliana", "Saldaña").unwrap();
assert_eq!(p.last, "Saldaña");
}
// -----------------------------------------------------------------------
// matching_keys
// -----------------------------------------------------------------------
#[test]
fn keys_simple_name() {
let parts = NameParts {
first: "John".into(),
last: "Smith".into(),
middle: None,
suffix: None,
nicknames: vec![],
};
let keys = matching_keys(&parts);
assert_eq!(keys, vec![("smith".into(), "john".into())]);
}
#[test]
fn keys_multi_token_first() {
let parts = NameParts {
first: "H. Paul".into(),
last: "LeBlanc".into(),
middle: None,
suffix: Some("III".into()),
nicknames: vec![],
};
let keys = matching_keys(&parts);
assert!(keys.contains(&("leblanc".into(), "hpaul".into())));
assert!(keys.contains(&("leblanc".into(), "paul".into())));
assert!(keys.contains(&("leblanc".into(), "h".into())));
assert_eq!(keys.len(), 3);
}
#[test]
fn keys_with_nickname() {
let parts = NameParts {
first: "William".into(),
last: "Burchenal".into(),
middle: None,
suffix: None,
nicknames: vec!["Ken".into()],
};
let keys = matching_keys(&parts);
assert!(keys.contains(&("burchenal".into(), "william".into())));
assert!(keys.contains(&("burchenal".into(), "ken".into())));
assert_eq!(keys.len(), 2);
}
#[test]
fn keys_hyphenated_last() {
let parts = parse_banner_name("Aguirre-Mesa, Andres").unwrap();
let keys = matching_keys(&parts);
// Hyphen removed: "aguirremesa"
assert!(keys.contains(&("aguirremesa".into(), "andres".into())));
}
#[test]
fn keys_accented_name() {
let parts = parse_rmp_name("Liliana", "Saldaña").unwrap();
let keys = matching_keys(&parts);
assert!(keys.contains(&("saldana".into(), "liliana".into())));
}
#[test]
fn keys_cross_source_match() {
// Banner: "Aguirre Mesa, Andres" → last="Aguirre Mesa"
let banner = parse_banner_name("Aguirre Mesa, Andres").unwrap();
let banner_keys = matching_keys(&banner);
// RMP: "Andres" / "Aguirre-Mesa" → last="Aguirre-Mesa"
let rmp = parse_rmp_name("Andres", "Aguirre-Mesa").unwrap();
let rmp_keys = matching_keys(&rmp);
// Both should normalize to ("aguirremesa", "andres")
assert!(banner_keys.iter().any(|k| rmp_keys.contains(k)));
}
#[test]
fn keys_accent_cross_match() {
// Banner: "García, José" (if Banner ever has accents)
let banner = parse_banner_name("Garcia, Jose").unwrap();
let banner_keys = matching_keys(&banner);
// RMP: "José" / "García"
let rmp = parse_rmp_name("José", "García").unwrap();
let rmp_keys = matching_keys(&rmp);
// Both normalize to ("garcia", "jose")
assert!(banner_keys.iter().any(|k| rmp_keys.contains(k)));
}
}
+57
View File
@@ -0,0 +1,57 @@
//! Database operations for the `reference_data` table (code→description lookups).
use crate::data::models::ReferenceData;
use crate::error::Result;
use html_escape::decode_html_entities;
use sqlx::PgPool;
/// Batch upsert reference data entries.
pub async fn batch_upsert(entries: &[ReferenceData], db_pool: &PgPool) -> Result<()> {
if entries.is_empty() {
return Ok(());
}
let categories: Vec<&str> = entries.iter().map(|e| e.category.as_str()).collect();
let codes: Vec<&str> = entries.iter().map(|e| e.code.as_str()).collect();
let descriptions: Vec<String> = entries
.iter()
.map(|e| decode_html_entities(&e.description).into_owned())
.collect();
sqlx::query(
r#"
INSERT INTO reference_data (category, code, description)
SELECT * FROM UNNEST($1::text[], $2::text[], $3::text[])
ON CONFLICT (category, code)
DO UPDATE SET description = EXCLUDED.description
"#,
)
.bind(&categories)
.bind(&codes)
.bind(&descriptions)
.execute(db_pool)
.await?;
Ok(())
}
/// Get all reference data entries for a category.
pub async fn get_by_category(category: &str, db_pool: &PgPool) -> Result<Vec<ReferenceData>> {
let rows = sqlx::query_as::<_, ReferenceData>(
"SELECT category, code, description FROM reference_data WHERE category = $1 ORDER BY description",
)
.bind(category)
.fetch_all(db_pool)
.await?;
Ok(rows)
}
/// Get all reference data entries (for cache initialization).
pub async fn get_all(db_pool: &PgPool) -> Result<Vec<ReferenceData>> {
let rows = sqlx::query_as::<_, ReferenceData>(
"SELECT category, code, description FROM reference_data ORDER BY category, description",
)
.fetch_all(db_pool)
.await?;
Ok(rows)
}
+192
View File
@@ -0,0 +1,192 @@
//! Database operations for RateMyProfessors data.
use crate::error::Result;
use crate::rmp::RmpProfessor;
use sqlx::PgPool;
use std::collections::HashSet;
/// Bulk upsert RMP professors using the UNNEST pattern.
///
/// Deduplicates by `legacy_id` before inserting — the RMP API can return
/// the same professor on multiple pages.
pub async fn batch_upsert_rmp_professors(
professors: &[RmpProfessor],
db_pool: &PgPool,
) -> Result<()> {
if professors.is_empty() {
return Ok(());
}
// Deduplicate: keep last occurrence per legacy_id (latest page wins)
let mut seen = HashSet::new();
let deduped: Vec<&RmpProfessor> = professors
.iter()
.rev()
.filter(|p| seen.insert(p.legacy_id))
.collect();
let legacy_ids: Vec<i32> = deduped.iter().map(|p| p.legacy_id).collect();
let graphql_ids: Vec<&str> = deduped.iter().map(|p| p.graphql_id.as_str()).collect();
let first_names: Vec<String> = deduped
.iter()
.map(|p| p.first_name.trim().to_string())
.collect();
let first_name_refs: Vec<&str> = first_names.iter().map(|s| s.as_str()).collect();
let last_names: Vec<String> = deduped
.iter()
.map(|p| p.last_name.trim().to_string())
.collect();
let last_name_refs: Vec<&str> = last_names.iter().map(|s| s.as_str()).collect();
let departments: Vec<Option<&str>> = deduped.iter().map(|p| p.department.as_deref()).collect();
let avg_ratings: Vec<Option<f32>> = deduped.iter().map(|p| p.avg_rating).collect();
let avg_difficulties: Vec<Option<f32>> = deduped.iter().map(|p| p.avg_difficulty).collect();
let num_ratings: Vec<i32> = deduped.iter().map(|p| p.num_ratings).collect();
let would_take_again_pcts: Vec<Option<f32>> =
deduped.iter().map(|p| p.would_take_again_pct).collect();
sqlx::query(
r#"
INSERT INTO rmp_professors (
legacy_id, graphql_id, first_name, last_name, department,
avg_rating, avg_difficulty, num_ratings, would_take_again_pct,
last_synced_at
)
SELECT
v.legacy_id, v.graphql_id, v.first_name, v.last_name, v.department,
v.avg_rating, v.avg_difficulty, v.num_ratings, v.would_take_again_pct,
NOW()
FROM UNNEST(
$1::int4[], $2::text[], $3::text[], $4::text[], $5::text[],
$6::real[], $7::real[], $8::int4[], $9::real[]
) AS v(
legacy_id, graphql_id, first_name, last_name, department,
avg_rating, avg_difficulty, num_ratings, would_take_again_pct
)
ON CONFLICT (legacy_id)
DO UPDATE SET
graphql_id = EXCLUDED.graphql_id,
first_name = EXCLUDED.first_name,
last_name = EXCLUDED.last_name,
department = EXCLUDED.department,
avg_rating = EXCLUDED.avg_rating,
avg_difficulty = EXCLUDED.avg_difficulty,
num_ratings = EXCLUDED.num_ratings,
would_take_again_pct = EXCLUDED.would_take_again_pct,
last_synced_at = EXCLUDED.last_synced_at
"#,
)
.bind(&legacy_ids)
.bind(&graphql_ids)
.bind(&first_name_refs)
.bind(&last_name_refs)
.bind(&departments)
.bind(&avg_ratings)
.bind(&avg_difficulties)
.bind(&num_ratings)
.bind(&would_take_again_pcts)
.execute(db_pool)
.await
.map_err(|e| anyhow::anyhow!("Failed to batch upsert RMP professors: {}", e))?;
Ok(())
}
/// Retrieve RMP rating data for an instructor by instructor id.
///
/// Returns `(avg_rating, num_ratings)` for the best linked RMP profile
/// (most ratings). Returns `None` if no link exists.
#[allow(dead_code)]
pub async fn get_instructor_rmp_data(
db_pool: &PgPool,
instructor_id: i32,
) -> Result<Option<(f32, i32)>> {
let row: Option<(f32, i32)> = sqlx::query_as(
r#"
SELECT rp.avg_rating, rp.num_ratings
FROM instructor_rmp_links irl
JOIN rmp_professors rp ON rp.legacy_id = irl.rmp_legacy_id
WHERE irl.instructor_id = $1
AND rp.avg_rating IS NOT NULL
ORDER BY rp.num_ratings DESC NULLS LAST
LIMIT 1
"#,
)
.bind(instructor_id)
.fetch_optional(db_pool)
.await?;
Ok(row)
}
/// Unmatch an instructor from an RMP profile.
///
/// Removes the link from `instructor_rmp_links` and updates the instructor's
/// `rmp_match_status` to 'unmatched' if no links remain.
///
/// If `rmp_legacy_id` is `Some`, removes only that specific link.
/// If `None`, removes all links for the instructor.
pub async fn unmatch_instructor(
db_pool: &PgPool,
instructor_id: i32,
rmp_legacy_id: Option<i32>,
) -> Result<()> {
let mut tx = db_pool.begin().await?;
// Delete specific link or all links
if let Some(legacy_id) = rmp_legacy_id {
sqlx::query(
"DELETE FROM instructor_rmp_links WHERE instructor_id = $1 AND rmp_legacy_id = $2",
)
.bind(instructor_id)
.bind(legacy_id)
.execute(&mut *tx)
.await?;
} else {
sqlx::query("DELETE FROM instructor_rmp_links WHERE instructor_id = $1")
.bind(instructor_id)
.execute(&mut *tx)
.await?;
}
// Check if any links remain
let (remaining,): (i64,) =
sqlx::query_as("SELECT COUNT(*) FROM instructor_rmp_links WHERE instructor_id = $1")
.bind(instructor_id)
.fetch_one(&mut *tx)
.await?;
// Update instructor status if no links remain
if remaining == 0 {
sqlx::query("UPDATE instructors SET rmp_match_status = 'unmatched' WHERE id = $1")
.bind(instructor_id)
.execute(&mut *tx)
.await?;
}
// Reset accepted candidates back to pending when unmatching
// This allows the candidates to be re-matched later
if let Some(legacy_id) = rmp_legacy_id {
// Reset only the specific candidate
sqlx::query(
"UPDATE rmp_match_candidates
SET status = 'pending', resolved_at = NULL, resolved_by = NULL
WHERE instructor_id = $1 AND rmp_legacy_id = $2 AND status = 'accepted'",
)
.bind(instructor_id)
.bind(legacy_id)
.execute(&mut *tx)
.await?;
} else {
// Reset all accepted candidates for this instructor
sqlx::query(
"UPDATE rmp_match_candidates
SET status = 'pending', resolved_at = NULL, resolved_by = NULL
WHERE instructor_id = $1 AND status = 'accepted'",
)
.bind(instructor_id)
.execute(&mut *tx)
.await?;
}
tx.commit().await?;
Ok(())
}
+695
View File
@@ -0,0 +1,695 @@
//! Confidence scoring and candidate generation for RMP instructor matching.
use crate::data::names::{matching_keys, parse_banner_name, parse_rmp_name};
use crate::error::Result;
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use std::collections::{HashMap, HashSet};
use tracing::{debug, info};
// ---------------------------------------------------------------------------
// Scoring types
// ---------------------------------------------------------------------------
/// Breakdown of individual scoring signals.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ScoreBreakdown {
pub name: f32,
pub department: f32,
pub uniqueness: f32,
pub volume: f32,
}
/// Result of scoring a single instructorRMP candidate pair.
#[derive(Debug, Clone)]
pub struct MatchScore {
pub score: f32,
pub breakdown: ScoreBreakdown,
}
// ---------------------------------------------------------------------------
// Thresholds
// ---------------------------------------------------------------------------
/// Minimum composite score to store a candidate row.
const MIN_CANDIDATE_THRESHOLD: f32 = 0.40;
/// Score at or above which a candidate is auto-accepted.
const AUTO_ACCEPT_THRESHOLD: f32 = 0.85;
// ---------------------------------------------------------------------------
// Weights (must sum to 1.0)
// ---------------------------------------------------------------------------
const WEIGHT_NAME: f32 = 0.50;
const WEIGHT_DEPARTMENT: f32 = 0.25;
const WEIGHT_UNIQUENESS: f32 = 0.15;
const WEIGHT_VOLUME: f32 = 0.10;
// ---------------------------------------------------------------------------
// Pure scoring functions
// ---------------------------------------------------------------------------
/// Check if an instructor's subjects overlap with an RMP department.
///
/// Returns `1.0` for a match, `0.2` for a mismatch, `0.5` when the RMP
/// department is unknown.
fn department_similarity(subjects: &[String], rmp_department: Option<&str>) -> f32 {
let Some(dept) = rmp_department else {
return 0.5;
};
let dept_lower = dept.to_lowercase();
// Quick check: does any subject appear directly in the department string
// or vice-versa?
for subj in subjects {
let subj_lower = subj.to_lowercase();
if dept_lower.contains(&subj_lower) || subj_lower.contains(&dept_lower) {
return 1.0;
}
// Handle common UTSA abbreviation mappings.
if matches_known_abbreviation(&subj_lower, &dept_lower) {
return 1.0;
}
}
0.2
}
/// Expand common subject abbreviations used at UTSA and check for overlap.
fn matches_known_abbreviation(subject: &str, department: &str) -> bool {
const MAPPINGS: &[(&str, &[&str])] = &[
// Core subjects (original mappings, corrected)
("cs", &["computer science"]),
("ece", &["early childhood education", "early childhood"]),
("ee", &["electrical engineering", "electrical"]),
("me", &["mechanical engineering", "mechanical"]),
("ce", &["civil engineering", "civil"]),
("bio", &["biology", "biological"]),
("chem", &["chemistry"]),
("phys", &["physics"]),
("math", &["mathematics"]),
("sta", &["statistics"]),
("eng", &["english"]),
("his", &["history"]),
("pol", &["political science"]),
("psy", &["psychology"]),
("soc", &["sociology"]),
("mus", &["music"]),
("art", &["art"]),
("phi", &["philosophy"]),
("eco", &["economics"]),
("acc", &["accounting"]),
("fin", &["finance"]),
("mgt", &["management"]),
("mkt", &["marketing"]),
("is", &["information systems"]),
("ms", &["management science"]),
("kin", &["kinesiology"]),
("com", &["communication"]),
// Architecture & Design
("arc", &["architecture"]),
("ide", &["interior design", "design"]),
// Anthropology & Ethnic Studies
("ant", &["anthropology"]),
("aas", &["african american studies", "ethnic studies"]),
("mas", &["mexican american studies", "ethnic studies"]),
("regs", &["ethnic studies", "gender"]),
// Languages
("lng", &["linguistics", "applied linguistics"]),
("spn", &["spanish"]),
("frn", &["french"]),
("ger", &["german"]),
("chn", &["chinese"]),
("jpn", &["japanese"]),
("kor", &["korean"]),
("itl", &["italian"]),
("rus", &["russian"]),
("lat", &["latin"]),
("grk", &["greek"]),
("asl", &["american sign language", "sign language"]),
(
"fl",
&["foreign languages", "languages", "modern languages"],
),
// Education
("edu", &["education"]),
("ci", &["curriculum", "education"]),
("edl", &["educational leadership", "education"]),
("edp", &["educational psychology", "education"]),
("bbl", &["bilingual education"]),
("spe", &["special education", "education"]),
// Business
("ent", &["entrepreneurship"]),
("gba", &["general business", "business"]),
("blw", &["business law", "law"]),
("rfd", &["real estate"]),
("mot", &["management of technology", "management"]),
// Engineering
("egr", &["engineering"]),
("bme", &["biomedical engineering", "engineering"]),
("cme", &["chemical engineering", "engineering"]),
("cpe", &["computer engineering", "engineering"]),
("ise", &["industrial", "systems engineering", "engineering"]),
("mate", &["materials engineering", "engineering"]),
// Sciences
("che", &["chemistry"]),
("bch", &["biochemistry", "chemistry"]),
("geo", &["geology"]),
("phy", &["physics"]),
("ast", &["astronomy"]),
("es", &["environmental science"]),
// Social Sciences
("crj", &["criminal justice"]),
("swk", &["social work"]),
("pad", &["public administration"]),
("grg", &["geography"]),
("ges", &["geography"]),
// Humanities
("cla", &["classics"]),
("hum", &["humanities"]),
("wgss", &["women's studies"]),
// Health
("hth", &["health"]),
("hcp", &["health science", "health"]),
("ntr", &["nutrition"]),
// Military
("msc", &["military science"]),
("asc", &["aerospace"]),
// Arts
("dan", &["dance"]),
("thr", &["theater"]),
("ahc", &["art history"]),
// Other
("cou", &["counseling"]),
("hon", &["honors"]),
("csm", &["construction"]),
("wrc", &["writing"]),
("set", &["tourism management", "tourism"]),
];
for &(abbr, expansions) in MAPPINGS {
if subject == abbr {
return expansions
.iter()
.any(|expansion| department.contains(expansion));
}
}
false
}
/// Compute match confidence score (0.01.0) for an instructorRMP pair.
///
/// The name signal is always 1.0 since candidates are only generated for
/// exact normalized name matches. The effective score range is 0.501.0.
pub fn compute_match_score(
instructor_subjects: &[String],
rmp_department: Option<&str>,
candidate_count: usize,
rmp_num_ratings: i32,
) -> MatchScore {
// --- Name (0.50) — always 1.0, candidates only exist for exact matches ---
let name_score = 1.0;
// --- Department (0.25) ---
let dept_score = department_similarity(instructor_subjects, rmp_department);
// --- Uniqueness (0.15) ---
let uniqueness_score = match candidate_count {
0 | 1 => 1.0,
2 => 0.5,
_ => 0.2,
};
// --- Volume (0.10) ---
let volume_score = ((rmp_num_ratings as f32).ln_1p() / 5.0_f32.ln_1p()).clamp(0.0, 1.0);
let composite = name_score * WEIGHT_NAME
+ dept_score * WEIGHT_DEPARTMENT
+ uniqueness_score * WEIGHT_UNIQUENESS
+ volume_score * WEIGHT_VOLUME;
MatchScore {
score: composite,
breakdown: ScoreBreakdown {
name: name_score,
department: dept_score,
uniqueness: uniqueness_score,
volume: volume_score,
},
}
}
// ---------------------------------------------------------------------------
// Candidate generation (DB)
// ---------------------------------------------------------------------------
/// Statistics returned from candidate generation.
#[derive(Debug)]
pub struct MatchingStats {
pub total_unmatched: usize,
pub candidates_created: usize,
pub candidates_rescored: usize,
pub auto_matched: usize,
pub skipped_unparseable: usize,
pub skipped_no_candidates: usize,
}
/// Lightweight row for building the in-memory RMP name index.
struct RmpProfForMatching {
legacy_id: i32,
department: Option<String>,
num_ratings: i32,
}
/// Generate match candidates for all unmatched instructors.
///
/// For each unmatched instructor:
/// 1. Parse `display_name` into [`NameParts`] and generate matching keys.
/// 2. Find RMP professors with matching normalized name keys.
/// 3. Score each candidate.
/// 4. Store candidates scoring above [`MIN_CANDIDATE_THRESHOLD`].
/// 5. Auto-accept if the top candidate scores ≥ [`AUTO_ACCEPT_THRESHOLD`]
/// and no existing rejected candidate exists for that pair.
///
/// Already-evaluated instructorRMP pairs (any status) are skipped.
pub async fn generate_candidates(db_pool: &PgPool) -> Result<MatchingStats> {
// 1. Load unmatched instructors
let instructors: Vec<(i32, String)> = sqlx::query_as(
"SELECT id, display_name FROM instructors WHERE rmp_match_status = 'unmatched'",
)
.fetch_all(db_pool)
.await?;
if instructors.is_empty() {
info!("No unmatched instructors to generate candidates for");
return Ok(MatchingStats {
total_unmatched: 0,
candidates_created: 0,
candidates_rescored: 0,
auto_matched: 0,
skipped_unparseable: 0,
skipped_no_candidates: 0,
});
}
let instructor_ids: Vec<i32> = instructors.iter().map(|(id, _)| *id).collect();
let total_unmatched = instructors.len();
// 2. Load instructor subjects
let subject_rows: Vec<(i32, String)> = sqlx::query_as(
r#"
SELECT DISTINCT ci.instructor_id, c.subject
FROM course_instructors ci
JOIN courses c ON c.id = ci.course_id
WHERE ci.instructor_id = ANY($1)
"#,
)
.bind(&instructor_ids)
.fetch_all(db_pool)
.await?;
let mut subject_map: HashMap<i32, Vec<String>> = HashMap::new();
for (iid, subject) in subject_rows {
subject_map.entry(iid).or_default().push(subject);
}
// 3. Load all RMP professors and build multi-key name index
let prof_rows: Vec<(i32, String, String, Option<String>, i32)> = sqlx::query_as(
"SELECT legacy_id, first_name, last_name, department, num_ratings FROM rmp_professors",
)
.fetch_all(db_pool)
.await?;
// Build name index: (normalized_last, normalized_first) -> Vec<RmpProfForMatching>
// Each professor may appear under multiple keys (nicknames, token variants).
let mut name_index: HashMap<(String, String), Vec<RmpProfForMatching>> = HashMap::new();
let mut rmp_parse_failures = 0usize;
for (legacy_id, first_name, last_name, department, num_ratings) in &prof_rows {
match parse_rmp_name(first_name, last_name) {
Some(parts) => {
let keys = matching_keys(&parts);
for key in keys {
name_index.entry(key).or_default().push(RmpProfForMatching {
legacy_id: *legacy_id,
department: department.clone(),
num_ratings: *num_ratings,
});
}
}
None => {
rmp_parse_failures += 1;
debug!(
legacy_id,
first_name, last_name, "Unparseable RMP professor name, skipping"
);
}
}
}
if rmp_parse_failures > 0 {
debug!(
count = rmp_parse_failures,
"RMP professors with unparseable names"
);
}
// 4. Load existing candidate pairs — only skip resolved (accepted/rejected) pairs.
// Pending candidates are rescored so updated mappings take effect.
let candidate_rows: Vec<(i32, i32, String)> =
sqlx::query_as("SELECT instructor_id, rmp_legacy_id, status FROM rmp_match_candidates")
.fetch_all(db_pool)
.await?;
let mut resolved_pairs: HashSet<(i32, i32)> = HashSet::new();
let mut pending_pairs: HashSet<(i32, i32)> = HashSet::new();
let mut rejected_pairs: HashSet<(i32, i32)> = HashSet::new();
for (iid, lid, status) in candidate_rows {
match status.as_str() {
"accepted" | "rejected" => {
resolved_pairs.insert((iid, lid));
if status == "rejected" {
rejected_pairs.insert((iid, lid));
}
}
_ => {
pending_pairs.insert((iid, lid));
}
}
}
// 5. Score and collect candidates (new + rescored pending)
let empty_subjects: Vec<String> = Vec::new();
let mut new_candidates: Vec<(i32, i32, f32, serde_json::Value)> = Vec::new();
let mut rescored_candidates: Vec<(i32, i32, f32, serde_json::Value)> = Vec::new();
let mut auto_accept: Vec<(i32, i32)> = Vec::new(); // (instructor_id, legacy_id)
let mut skipped_unparseable = 0usize;
let mut skipped_no_candidates = 0usize;
for (instructor_id, display_name) in &instructors {
let Some(instructor_parts) = parse_banner_name(display_name) else {
skipped_unparseable += 1;
debug!(
instructor_id,
display_name, "Unparseable display name, skipping"
);
continue;
};
let subjects = subject_map.get(instructor_id).unwrap_or(&empty_subjects);
// Generate all matching keys for this instructor and collect candidate
// RMP professors across all key variants (deduplicated by legacy_id).
let instructor_keys = matching_keys(&instructor_parts);
let mut seen_profs: HashSet<i32> = HashSet::new();
let mut matched_profs: Vec<&RmpProfForMatching> = Vec::new();
for key in &instructor_keys {
if let Some(profs) = name_index.get(key) {
for prof in profs {
if seen_profs.insert(prof.legacy_id) {
matched_profs.push(prof);
}
}
}
}
if matched_profs.is_empty() {
skipped_no_candidates += 1;
continue;
}
let candidate_count = matched_profs.len();
let mut best: Option<(f32, i32)> = None;
for prof in &matched_profs {
let pair = (*instructor_id, prof.legacy_id);
if resolved_pairs.contains(&pair) {
continue;
}
let ms = compute_match_score(
subjects,
prof.department.as_deref(),
candidate_count,
prof.num_ratings,
);
if ms.score < MIN_CANDIDATE_THRESHOLD {
continue;
}
let breakdown_json =
serde_json::to_value(&ms.breakdown).unwrap_or_else(|_| serde_json::json!({}));
if pending_pairs.contains(&pair) {
rescored_candidates.push((
*instructor_id,
prof.legacy_id,
ms.score,
breakdown_json,
));
} else {
new_candidates.push((*instructor_id, prof.legacy_id, ms.score, breakdown_json));
}
match best {
Some((s, _)) if ms.score > s => best = Some((ms.score, prof.legacy_id)),
None => best = Some((ms.score, prof.legacy_id)),
_ => {}
}
}
// Auto-accept the top candidate if it meets the threshold and is not
// previously rejected.
if let Some((score, legacy_id)) = best
&& score >= AUTO_ACCEPT_THRESHOLD
&& !rejected_pairs.contains(&(*instructor_id, legacy_id))
{
auto_accept.push((*instructor_id, legacy_id));
}
}
// 67. Write candidates, rescore, and auto-accept within a single transaction
let candidates_created = new_candidates.len();
let candidates_rescored = rescored_candidates.len();
let auto_matched = auto_accept.len();
let mut tx = db_pool.begin().await?;
// 6a. Batch-insert new candidates
if !new_candidates.is_empty() {
let c_instructor_ids: Vec<i32> = new_candidates.iter().map(|(iid, _, _, _)| *iid).collect();
let c_legacy_ids: Vec<i32> = new_candidates.iter().map(|(_, lid, _, _)| *lid).collect();
let c_scores: Vec<f32> = new_candidates.iter().map(|(_, _, s, _)| *s).collect();
let c_breakdowns: Vec<serde_json::Value> =
new_candidates.into_iter().map(|(_, _, _, b)| b).collect();
sqlx::query(
r#"
INSERT INTO rmp_match_candidates (instructor_id, rmp_legacy_id, score, score_breakdown)
SELECT v.instructor_id, v.rmp_legacy_id, v.score, v.score_breakdown
FROM UNNEST($1::int4[], $2::int4[], $3::real[], $4::jsonb[])
AS v(instructor_id, rmp_legacy_id, score, score_breakdown)
ON CONFLICT (instructor_id, rmp_legacy_id) DO NOTHING
"#,
)
.bind(&c_instructor_ids)
.bind(&c_legacy_ids)
.bind(&c_scores)
.bind(&c_breakdowns)
.execute(&mut *tx)
.await?;
}
// 6b. Batch-update rescored pending candidates
if !rescored_candidates.is_empty() {
let r_instructor_ids: Vec<i32> = rescored_candidates
.iter()
.map(|(iid, _, _, _)| *iid)
.collect();
let r_legacy_ids: Vec<i32> = rescored_candidates
.iter()
.map(|(_, lid, _, _)| *lid)
.collect();
let r_scores: Vec<f32> = rescored_candidates.iter().map(|(_, _, s, _)| *s).collect();
let r_breakdowns: Vec<serde_json::Value> = rescored_candidates
.into_iter()
.map(|(_, _, _, b)| b)
.collect();
sqlx::query(
r#"
UPDATE rmp_match_candidates mc
SET score = v.score, score_breakdown = v.score_breakdown
FROM UNNEST($1::int4[], $2::int4[], $3::real[], $4::jsonb[])
AS v(instructor_id, rmp_legacy_id, score, score_breakdown)
WHERE mc.instructor_id = v.instructor_id
AND mc.rmp_legacy_id = v.rmp_legacy_id
"#,
)
.bind(&r_instructor_ids)
.bind(&r_legacy_ids)
.bind(&r_scores)
.bind(&r_breakdowns)
.execute(&mut *tx)
.await?;
}
// 7. Auto-accept top candidates
if !auto_accept.is_empty() {
let aa_instructor_ids: Vec<i32> = auto_accept.iter().map(|(iid, _)| *iid).collect();
let aa_legacy_ids: Vec<i32> = auto_accept.iter().map(|(_, lid)| *lid).collect();
// Mark the candidate row as accepted
sqlx::query(
r#"
UPDATE rmp_match_candidates mc
SET status = 'accepted', resolved_at = NOW()
FROM UNNEST($1::int4[], $2::int4[]) AS v(instructor_id, rmp_legacy_id)
WHERE mc.instructor_id = v.instructor_id
AND mc.rmp_legacy_id = v.rmp_legacy_id
"#,
)
.bind(&aa_instructor_ids)
.bind(&aa_legacy_ids)
.execute(&mut *tx)
.await?;
// Insert links into instructor_rmp_links
sqlx::query(
r#"
INSERT INTO instructor_rmp_links (instructor_id, rmp_legacy_id, source)
SELECT v.instructor_id, v.rmp_legacy_id, 'auto'
FROM UNNEST($1::int4[], $2::int4[]) AS v(instructor_id, rmp_legacy_id)
ON CONFLICT (rmp_legacy_id) DO NOTHING
"#,
)
.bind(&aa_instructor_ids)
.bind(&aa_legacy_ids)
.execute(&mut *tx)
.await?;
// Update instructor match status
sqlx::query(
r#"
UPDATE instructors i
SET rmp_match_status = 'auto'
FROM UNNEST($1::int4[]) AS v(instructor_id)
WHERE i.id = v.instructor_id
"#,
)
.bind(&aa_instructor_ids)
.execute(&mut *tx)
.await?;
}
tx.commit().await?;
let stats = MatchingStats {
total_unmatched,
candidates_created,
candidates_rescored,
auto_matched,
skipped_unparseable,
skipped_no_candidates,
};
info!(
total_unmatched = stats.total_unmatched,
candidates_created = stats.candidates_created,
candidates_rescored = stats.candidates_rescored,
auto_matched = stats.auto_matched,
skipped_unparseable = stats.skipped_unparseable,
skipped_no_candidates = stats.skipped_no_candidates,
"Candidate generation complete"
);
Ok(stats)
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ideal_candidate_high_score() {
let ms = compute_match_score(
&["CS".to_string()],
Some("Computer Science"),
1, // unique candidate
50, // decent ratings
);
// name 1.0*0.50 + dept 1.0*0.25 + unique 1.0*0.15 + volume ~0.97*0.10 ≈ 0.997
assert!(ms.score >= 0.85, "Expected score >= 0.85, got {}", ms.score);
assert_eq!(ms.breakdown.name, 1.0);
assert_eq!(ms.breakdown.uniqueness, 1.0);
assert_eq!(ms.breakdown.department, 1.0);
}
#[test]
fn test_ambiguous_candidates_lower_score() {
let unique = compute_match_score(&[], None, 1, 10);
let ambiguous = compute_match_score(&[], None, 3, 10);
assert!(
unique.score > ambiguous.score,
"Unique ({}) should outscore ambiguous ({})",
unique.score,
ambiguous.score
);
assert_eq!(unique.breakdown.uniqueness, 1.0);
assert_eq!(ambiguous.breakdown.uniqueness, 0.2);
}
#[test]
fn test_no_department_neutral() {
let ms = compute_match_score(&["CS".to_string()], None, 1, 10);
assert_eq!(ms.breakdown.department, 0.5);
}
#[test]
fn test_department_match() {
let ms = compute_match_score(&["CS".to_string()], Some("Computer Science"), 1, 10);
assert_eq!(ms.breakdown.department, 1.0);
}
#[test]
fn test_department_mismatch() {
let ms = compute_match_score(&["CS".to_string()], Some("History"), 1, 10);
assert_eq!(ms.breakdown.department, 0.2);
}
#[test]
fn test_department_match_outscores_mismatch() {
let matched = compute_match_score(&["CS".to_string()], Some("Computer Science"), 1, 10);
let mismatched = compute_match_score(&["CS".to_string()], Some("History"), 1, 10);
assert!(
matched.score > mismatched.score,
"Department match ({}) should outscore mismatch ({})",
matched.score,
mismatched.score
);
}
#[test]
fn test_volume_scaling() {
let zero = compute_match_score(&[], None, 1, 0);
let many = compute_match_score(&[], None, 1, 100);
assert!(
many.breakdown.volume > zero.breakdown.volume,
"100 ratings ({}) should outscore 0 ratings ({})",
many.breakdown.volume,
zero.breakdown.volume
);
assert_eq!(zero.breakdown.volume, 0.0);
assert!(
many.breakdown.volume > 0.9,
"100 ratings should be near max"
);
}
}
+321
View File
@@ -0,0 +1,321 @@
//! Database operations for scrape job queue management.
use crate::data::models::{ScrapeJob, ScrapePriority, TargetType, UpsertCounts};
use crate::error::Result;
use chrono::{DateTime, Utc};
use sqlx::PgPool;
use std::collections::HashSet;
/// Force-unlock all jobs that have a non-NULL `locked_at`.
///
/// Intended to be called once at startup to recover jobs left locked by
/// a previous unclean shutdown (crash, OOM kill, etc.).
///
/// # Returns
/// The number of jobs that were unlocked.
pub async fn force_unlock_all(db_pool: &PgPool) -> Result<u64> {
let result = sqlx::query(
"UPDATE scrape_jobs SET locked_at = NULL, queued_at = NOW() WHERE locked_at IS NOT NULL",
)
.execute(db_pool)
.await?;
Ok(result.rows_affected())
}
/// How long a lock can be held before it is considered expired and reclaimable.
///
/// This acts as a safety net for cases where a worker dies without unlocking
/// (OOM kill, crash, network partition). Under normal operation, the worker's
/// own job timeout fires well before this threshold.
const LOCK_EXPIRY: std::time::Duration = std::time::Duration::from_secs(10 * 60);
/// Atomically fetch and lock the next available scrape job.
///
/// Uses `FOR UPDATE SKIP LOCKED` to allow multiple workers to poll the queue
/// concurrently without conflicts. Considers jobs that are:
/// - Unlocked and ready to execute, OR
/// - Locked but past [`LOCK_EXPIRY`] (abandoned by a dead worker)
///
/// # Arguments
/// * `db_pool` - PostgreSQL connection pool
///
/// # Returns
/// * `Ok(Some(job))` if a job was successfully fetched and locked
/// * `Ok(None)` if no jobs are available
pub async fn fetch_and_lock_job(db_pool: &PgPool) -> Result<Option<ScrapeJob>> {
let mut tx = db_pool.begin().await?;
let lock_expiry_secs = LOCK_EXPIRY.as_secs() as i32;
let job = sqlx::query_as::<_, ScrapeJob>(
"SELECT * FROM scrape_jobs \
WHERE (locked_at IS NULL OR locked_at < NOW() - make_interval(secs => $1::double precision)) \
AND execute_at <= NOW() \
ORDER BY priority DESC, execute_at ASC \
LIMIT 1 \
FOR UPDATE SKIP LOCKED"
)
.bind(lock_expiry_secs)
.fetch_optional(&mut *tx)
.await?;
if let Some(ref job) = job {
sqlx::query("UPDATE scrape_jobs SET locked_at = NOW() WHERE id = $1")
.bind(job.id)
.execute(&mut *tx)
.await?;
}
tx.commit().await?;
Ok(job)
}
/// Delete a scrape job by ID.
///
/// Typically called after a job has been successfully processed or permanently failed.
///
/// # Arguments
/// * `job_id` - The database ID of the job to delete
/// * `db_pool` - PostgreSQL connection pool
pub async fn delete_job(job_id: i32, db_pool: &PgPool) -> Result<()> {
sqlx::query("DELETE FROM scrape_jobs WHERE id = $1")
.bind(job_id)
.execute(db_pool)
.await?;
Ok(())
}
/// Unlock a scrape job by clearing its `locked_at` timestamp.
///
/// Used to release a job back to the queue, e.g. during graceful shutdown.
///
/// # Arguments
/// * `job_id` - The database ID of the job to unlock
/// * `db_pool` - PostgreSQL connection pool
pub async fn unlock_job(job_id: i32, db_pool: &PgPool) -> Result<()> {
sqlx::query("UPDATE scrape_jobs SET locked_at = NULL WHERE id = $1")
.bind(job_id)
.execute(db_pool)
.await?;
Ok(())
}
/// Atomically unlock a job, increment its retry count, and reset `queued_at`.
///
/// Returns the new `queued_at` timestamp if retries remain, or `None` if
/// the job has exhausted its retries. This is determined atomically in the
/// database to avoid race conditions between workers.
///
/// # Arguments
/// * `job_id` - The database ID of the job
/// * `max_retries` - Maximum number of retries allowed for this job
/// * `db_pool` - PostgreSQL connection pool
///
/// # Returns
/// * `Ok(Some(queued_at))` if the job was unlocked and retries remain
/// * `Ok(None)` if the job has exhausted its retries
pub async fn unlock_and_increment_retry(
job_id: i32,
max_retries: i32,
db_pool: &PgPool,
) -> Result<Option<chrono::DateTime<chrono::Utc>>> {
let result = sqlx::query_scalar::<_, Option<chrono::DateTime<chrono::Utc>>>(
"UPDATE scrape_jobs
SET locked_at = NULL, retry_count = retry_count + 1, queued_at = NOW()
WHERE id = $1
RETURNING CASE WHEN retry_count <= $2 THEN queued_at ELSE NULL END",
)
.bind(job_id)
.bind(max_retries)
.fetch_one(db_pool)
.await?;
Ok(result)
}
/// Find existing job payloads matching the given target type and candidates.
///
/// Returns a set of stringified JSON payloads that already exist in the queue
/// (both locked and unlocked), used for deduplication when scheduling new jobs.
///
/// # Arguments
/// * `target_type` - The target type to filter by
/// * `candidate_payloads` - Candidate payloads to check against existing jobs
/// * `db_pool` - PostgreSQL connection pool
///
/// # Returns
/// A `HashSet` of stringified JSON payloads that already have pending or in-progress jobs
pub async fn find_existing_job_payloads(
target_type: TargetType,
candidate_payloads: &[serde_json::Value],
db_pool: &PgPool,
) -> Result<HashSet<String>> {
let existing_jobs: Vec<(serde_json::Value,)> = sqlx::query_as(
"SELECT target_payload FROM scrape_jobs
WHERE target_type = $1 AND target_payload = ANY($2)",
)
.bind(target_type)
.bind(candidate_payloads)
.fetch_all(db_pool)
.await?;
let existing_payloads = existing_jobs
.into_iter()
.map(|(payload,)| payload.to_string())
.collect();
Ok(existing_payloads)
}
/// Insert a scrape job result log entry.
#[allow(clippy::too_many_arguments)]
pub async fn insert_job_result(
target_type: TargetType,
payload: serde_json::Value,
priority: ScrapePriority,
queued_at: DateTime<Utc>,
started_at: DateTime<Utc>,
duration_ms: i32,
success: bool,
error_message: Option<&str>,
retry_count: i32,
counts: Option<&UpsertCounts>,
db_pool: &PgPool,
) -> Result<()> {
sqlx::query(
r#"
INSERT INTO scrape_job_results (
target_type, payload, priority,
queued_at, started_at, duration_ms,
success, error_message, retry_count,
courses_fetched, courses_changed, courses_unchanged,
audits_generated, metrics_generated
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
"#,
)
.bind(target_type)
.bind(&payload)
.bind(priority)
.bind(queued_at)
.bind(started_at)
.bind(duration_ms)
.bind(success)
.bind(error_message)
.bind(retry_count)
.bind(counts.map(|c| c.courses_fetched))
.bind(counts.map(|c| c.courses_changed))
.bind(counts.map(|c| c.courses_unchanged))
.bind(counts.map(|c| c.audits_generated))
.bind(counts.map(|c| c.metrics_generated))
.execute(db_pool)
.await?;
Ok(())
}
/// Per-subject aggregated stats from recent scrape results.
///
/// Populated by [`fetch_subject_stats`] and converted into
/// [`crate::scraper::adaptive::SubjectStats`] for interval computation.
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct SubjectResultStats {
pub subject: String,
pub recent_runs: i64,
pub avg_change_ratio: f64,
pub consecutive_zero_changes: i64,
pub consecutive_empty_fetches: i64,
pub recent_failure_count: i64,
pub recent_success_count: i64,
pub last_completed: DateTime<Utc>,
}
/// Fetch aggregated per-subject statistics from the last 24 hours of results.
///
/// For each subject, examines the 20 most recent results and computes:
/// - Average change ratio (courses_changed / courses_fetched)
/// - Consecutive zero-change runs from the most recent result
/// - Consecutive empty-fetch runs from the most recent result
/// - Failure and success counts
/// - Last completion timestamp
pub async fn fetch_subject_stats(db_pool: &PgPool) -> Result<Vec<SubjectResultStats>> {
let rows = sqlx::query_as::<_, SubjectResultStats>(
r#"
WITH recent AS (
SELECT payload->>'subject' AS subject, success,
COALESCE(courses_fetched, 0) AS courses_fetched,
COALESCE(courses_changed, 0) AS courses_changed,
completed_at,
ROW_NUMBER() OVER (PARTITION BY payload->>'subject' ORDER BY completed_at DESC) AS rn
FROM scrape_job_results
WHERE target_type = 'Subject' AND completed_at > NOW() - INTERVAL '24 hours'
),
filtered AS (SELECT * FROM recent WHERE rn <= 20),
zero_break AS (
SELECT subject,
MIN(rn) FILTER (WHERE courses_changed > 0 AND success) AS first_nonzero_rn,
MIN(rn) FILTER (WHERE courses_fetched > 0 AND success) AS first_nonempty_rn
FROM filtered GROUP BY subject
)
SELECT
f.subject::TEXT AS subject,
COUNT(*)::BIGINT AS recent_runs,
COALESCE(AVG(CASE WHEN f.success AND f.courses_fetched > 0
THEN f.courses_changed::FLOAT / f.courses_fetched ELSE NULL END), 0.0)::FLOAT8 AS avg_change_ratio,
COALESCE(zb.first_nonzero_rn - 1, COUNT(*) FILTER (WHERE f.success AND f.courses_changed = 0))::BIGINT AS consecutive_zero_changes,
COALESCE(zb.first_nonempty_rn - 1, COUNT(*) FILTER (WHERE f.success AND f.courses_fetched = 0))::BIGINT AS consecutive_empty_fetches,
COUNT(*) FILTER (WHERE NOT f.success)::BIGINT AS recent_failure_count,
COUNT(*) FILTER (WHERE f.success)::BIGINT AS recent_success_count,
MAX(f.completed_at) AS last_completed
FROM filtered f
LEFT JOIN zero_break zb ON f.subject = zb.subject
GROUP BY f.subject, zb.first_nonzero_rn, zb.first_nonempty_rn
"#,
)
.fetch_all(db_pool)
.await?;
Ok(rows)
}
/// Batch insert scrape jobs using UNNEST for a single round-trip.
///
/// All jobs are inserted with `execute_at` set to the current time.
///
/// # Arguments
/// * `jobs` - Slice of `(payload, target_type, priority)` tuples to insert
/// * `db_pool` - PostgreSQL connection pool
pub async fn batch_insert_jobs(
jobs: &[(serde_json::Value, TargetType, ScrapePriority)],
db_pool: &PgPool,
) -> Result<Vec<ScrapeJob>> {
if jobs.is_empty() {
return Ok(Vec::new());
}
let mut target_types: Vec<String> = Vec::with_capacity(jobs.len());
let mut payloads: Vec<serde_json::Value> = Vec::with_capacity(jobs.len());
let mut priorities: Vec<String> = Vec::with_capacity(jobs.len());
for (payload, target_type, priority) in jobs {
target_types.push(format!("{target_type:?}"));
payloads.push(payload.clone());
priorities.push(format!("{priority:?}"));
}
let inserted = sqlx::query_as::<_, ScrapeJob>(
r#"
INSERT INTO scrape_jobs (target_type, target_payload, priority, execute_at, queued_at)
SELECT v.target_type::target_type, v.payload, v.priority::scrape_priority, NOW(), NOW()
FROM UNNEST($1::text[], $2::jsonb[], $3::text[])
AS v(target_type, payload, priority)
RETURNING *
"#,
)
.bind(&target_types)
.bind(&payloads)
.bind(&priorities)
.fetch_all(db_pool)
.await?;
Ok(inserted)
}

Some files were not shown because too many files have changed in this diff Show More