164 Commits

Author SHA1 Message Date
47132e71d7 chore(master): release 0.6.1 (#1) 2026-01-31 00:37:13 -06:00
87db1a4ccb refactor: extract Justfile inline scripts into scripts/ directory
Move all [script("bun")] blocks into standalone TypeScript files under
scripts/ with shared utilities in scripts/lib/. The Justfile is now ~40
lines of thin `bun scripts/*.ts` wrappers.

Shared code consolidated into two lib files:
- lib/proc.ts: process spawning (run, spawnCollect, raceInOrder, ProcessGroup)
- lib/fmt.ts: color output, elapsed timers, reusable flag parser
2026-01-31 00:34:27 -06:00
e203e8e182 feat(build): auto-regenerate TypeScript bindings on source changes 2026-01-31 00:27:27 -06:00
cbb0a51bca refactor(terms): move term formatting from frontend to backend 2026-01-31 00:26:41 -06:00
c533768362 feat(scraper): improve results visibility and loading states 2026-01-30 23:36:23 -06:00
16039e02a9 fix(metrics): always emit baseline metrics on initial course insertion 2026-01-30 23:32:04 -06:00
7d2255a988 fix(data): decode HTML entities in course titles and instructor names 2026-01-30 23:31:05 -06:00
8bfc14e55c feat(course): distinguish async from synchronous online courses
Add logic to detect and label asynchronous online sections (INT building
with TBA times) separately from synchronous online courses. Update table
rendering to show "Async" instead of "TBA" for these sections.
2026-01-30 23:27:54 -06:00
2689587dd5 fix: avoid status flickering on subjects table 2026-01-30 22:04:48 -06:00
1ad614dad0 feat(scraper): improve dashboard clarity with stat tooltips 2026-01-30 22:00:59 -06:00
ebb7a97c11 fix(ci): add postgres container service for rust tests
Also updated deprecated codeql action to v4.
2026-01-30 21:36:32 -06:00
2df0ba0ec5 chore: add ts-rs generated bindings 2026-01-30 21:29:32 -06:00
dd148e08a0 fix(ci): fix rust/frontend/security job failures and expand local checks 2026-01-30 21:22:01 -06:00
3494341e3f ci: split quality checks into parallel jobs with security scanning
Reorganize CI pipeline into separate jobs for Rust quality, frontend
quality, tests, Docker build, and security audits. Add cargo-audit,
bun audit, and Trivy filesystem scanning. Allow formatting checks to
pass with warnings on push events while failing on PRs.
2026-01-30 21:08:16 -06:00
acccaa54d4 chore: update frontend packages 2026-01-30 21:07:08 -06:00
6863ee58d0 ci: add Release Please automation for changelog and version management 2026-01-30 21:05:15 -06:00
550401b85c refactor: use friendly term codes in URL query parameters 2026-01-30 20:49:08 -06:00
b02a0738e2 chore: bump to v0.6.0, update roadmap & changelog 2026-01-30 20:37:31 -06:00
5d7d60cd96 fix: prevent session pool deadlock on acquire cancellation
Replace is_creating mutex with atomic flag and RAII guard to ensure
proper cleanup when acquire() futures are cancelled mid-creation,
preventing permanent deadlock for subsequent callers.
2026-01-30 20:19:10 -06:00
1954166db6 feat: add name parsing and normalization for instructor-RMP matching 2026-01-30 20:02:59 -06:00
a2a9116b7a fix: avoid clipping page content 2026-01-30 19:32:05 -06:00
a103f0643a feat: refactor admin instructor UI with component extraction and optimistic updates 2026-01-30 19:31:31 -06:00
474d519b9d feat: add auto-format recovery when formatting is sole check failure
Enhances check recipe to detect when only formatting checks fail while
peers pass, automatically applies formatters, then re-verifies. Supports
both Rust (rustfmt + cargo-check) and web (biome + svelte-check)
domains. Displays results eagerly as checks complete instead of in
original order.
2026-01-30 16:01:56 -06:00
fb27bdc119 feat: implement session expiry extension and 401 recovery 2026-01-30 16:01:17 -06:00
669dec0235 feat: add timeline API with schedule-aware enrollment aggregation
Implements POST /api/timeline endpoint that aggregates enrollment by
subject over 15-minute slots, filtering courses by their actual meeting
times. Includes ISR-style schedule cache with hourly background refresh
using stale-while-revalidate pattern, database indexes for efficient
queries, and frontend refactor to dynamically discover subjects from API.
2026-01-30 10:56:11 -06:00
67ba63339a fix: instructor/course mismatching, build order-independent map for association 2026-01-30 09:53:03 -06:00
7b8c11ac13 feat: add calendar export endpoints for ICS and Google Calendar 2026-01-30 04:08:16 -06:00
a767a3f8be feat: add root error page handling 2026-01-30 04:07:53 -06:00
8ce398c0e0 feat: add scraper analytics dashboard with timeseries and subject monitoring 2026-01-30 03:46:48 -06:00
9fed651641 feat: add adaptive scheduling and scraper admin endpoints
Subjects now have individually calculated scrape intervals based on their
historical change ratio, consecutive zero-change runs, failure counts, and
the current time of day. This reduces unnecessary scrapes during inactive
periods while maintaining responsiveness during peak hours. Includes four
new admin endpoints for monitoring scraper health and scheduling decisions.
2026-01-30 02:14:37 -06:00
75a99c10ea feat: add scrape job result persistence for effectiveness tracking 2026-01-30 01:37:41 -06:00
857ceabcca fix: prevent ts-rs serde warnings 2026-01-30 01:36:57 -06:00
203c337cf0 feat: add confidence-based RMP matching with manual review workflow
Replace simple auto-matching with scored candidate generation that
considers department overlap, name uniqueness, and rating volume.
Candidates above 0.85 auto-accept; others require admin approval.
2026-01-30 01:31:11 -06:00
39ba131322 feat: add mobile touch controls with gesture detection 2026-01-29 23:56:45 -06:00
2fad9c969d fix: avoid title on icon, use simpler href-based login redirect 2026-01-29 23:44:05 -06:00
47b4f3315f feat: enhance login page with FAQ section and improved styling 2026-01-29 23:40:48 -06:00
fa28f13a45 feat: add interactive timeline visualization for class times
Implements a canvas-based timeline view with D3 scales showing class
counts across subjects. Features drag-to-pan, mouse wheel zoom, subject
filtering, hover tooltips, and smooth animations. Timeline auto-follows
current time and supports keyboard navigation.
2026-01-29 23:19:39 -06:00
5a6ea1e53a fix: handle backend startup delays with retry logic in auth 2026-01-29 20:04:50 -06:00
ba2b2fc50a fix: increase Banner API timeouts to handle slow responses 2026-01-29 19:49:57 -06:00
cfe098d193 feat: add websocket support for real-time scrape job monitoring 2026-01-29 19:31:04 -06:00
d861888e5e fix: proper centering for login page content, avoid unnecssary scrollbar 2026-01-29 18:05:50 -06:00
f0645d82d9 refactor: persist audit log state in module scope for cross-navigation caching 2026-01-29 17:54:27 -06:00
7a1cd2a39b refactor: centralize number formatting with locale-aware utility 2026-01-29 17:53:38 -06:00
d2985f98ce feat: enhance audit log with smart diffing, conditional request caching, auto refreshing 2026-01-29 17:35:11 -06:00
b58eb840f3 refactor: consolidate navigation with top nav bar and route groups 2026-01-29 17:01:47 -06:00
2bc6fbdf30 feat: implement relative time feedback and improve tooltip customization 2026-01-29 16:44:06 -06:00
e41b970d6e fix: implement i64 serialization for JavaScript compatibility, fixing avatar URL display 2026-01-29 15:51:19 -06:00
e880126281 feat: implement worker timeout protection and crash recovery for job queue
Add JOB_TIMEOUT constant to fail stuck jobs after 5 minutes, and
LOCK_EXPIRY to reclaim abandoned locks after 10 minutes. Introduce
force_unlock_all to recover orphaned jobs at startup. Fix retry limit
off-by-one error and update deduplication to include locked jobs.
2026-01-29 15:50:09 -06:00
db0ec1e69d feat: add rmp profile links and confidence-aware rating display 2026-01-29 15:43:39 -06:00
2947face06 fix: run frontend build first with -e embed flag in Justfile 2026-01-29 15:00:13 -06:00
36bcc27d7f feat: setup smart page transitions, fix laggy theme-aware element transitions 2026-01-29 14:59:47 -06:00
9e403e5043 refactor: modernize Justfile commands and simplify service management 2026-01-29 14:33:16 -06:00
98a6d978c6 feat: implement course change auditing with time-series metrics endpoint 2026-01-29 14:19:36 -06:00
4deeef2f00 feat: optimize asset delivery with build-time compression and encoding negotiation 2026-01-29 13:56:10 -06:00
e008ee5a12 feat: show search duration and result count feedback 2026-01-29 13:15:25 -06:00
a007ccb6a2 fix: remove out:fade transition from CourseTable 2026-01-29 13:08:45 -06:00
527cbebc6a feat: implement user authentication system with admin dashboard 2026-01-29 12:56:51 -06:00
4207783cdd docs: add changelog entries and roadmap completion tracking 2026-01-29 12:27:46 -06:00
c90bd740de refactor: consolidate query logic and eliminate N+1 instructor loads 2026-01-29 12:03:06 -06:00
61f8bd9de7 refactor: consolidate menu snippets and strengthen type safety
Replaces duplicated dropdown/context menu code with parameterized snippet,
eliminates unsafe type casts, adds error handling for clipboard and API
calls, and improves accessibility annotations.
2026-01-29 11:40:55 -06:00
b5eaedc9bc feat: add delivery mode indicators and tooltips to location column 2026-01-29 11:32:35 -06:00
58475c8673 feat: add page selector dropdown with animated pagination controls
Replace Previous/Next buttons with 5-slot page navigation centered in
pagination bar. Current page becomes a dropdown trigger allowing direct
page jumps. Side slots animate on page transitions.
2026-01-29 11:31:55 -06:00
78159707e2 feat: table FLIP animations, improved time tooltip details & day abbreviations 2026-01-29 03:40:40 -06:00
779144a4d5 feat: implement smart name abbreviation for instructor display 2026-01-29 03:14:55 -06:00
0da2e810fe feat: add multi-select subject filtering with searchable comboboxes 2026-01-29 03:03:21 -06:00
ed72ac6bff refactor: extract reusable SimpleTooltip component and enhance UI hints 2026-01-29 01:37:04 -06:00
57b5cafb27 feat: enhance table scrolling and eliminate initial theme flash 2026-01-29 01:18:02 -06:00
841191c44d feat: integrate OverlayScrollbars with theme-aware styling 2026-01-29 01:05:19 -06:00
67d7c81ef4 feat: implement interactive data table with sorting and column control
Replaces static course table with TanStack Table featuring sortable
columns, column visibility management, and server-side sort handling.
Adds reusable data-table primitives adapted for Svelte 5 runes.
2026-01-29 01:04:18 -06:00
d108a41f91 feat: sync RMP professor ratings and display in course search interface 2026-01-29 00:26:40 -06:00
5fab8c216a feat: add course search UI with ts-rs type bindings
Integrate ts-rs for Rust-to-TypeScript type generation, build course
search page with filters, pagination, and expandable detail rows,
and refactor theme toggle into a reactive store with view transition
animation.
2026-01-28 22:11:17 -06:00
15256ff91c docs: replace feature wishlist with organized roadmap and changelog 2026-01-28 21:07:10 -06:00
6df4303bd6 feat: implement comprehensive course data model with reference cache and search 2026-01-28 21:06:53 -06:00
e3b855b956 refactor: migrate frontend from React to SvelteKit 2026-01-28 21:06:53 -06:00
0ce0257fdc chore: reduce tracing verbosity and normalize log levels 2026-01-28 20:13:53 -06:00
fa2fc45aa9 refactor: extract theme toggle styles to CSS and improve timeout handling 2026-01-28 19:47:24 -06:00
7cc8267c2e feat: implement real-time service status tracking and health reporting 2026-01-28 18:37:42 -06:00
1733ee5f86 feat: extract database operations module and add extensive test suite 2026-01-28 17:32:27 -06:00
992263205c refactor: consolidate types, remove dead code, and fix minor bugs
Replace DayOfWeek with chrono::Weekday via extension traits, unify
RateLimitConfig into the config module, and remove the unused time
command, BannerState, and ClassDetails stub. Fix open_only query
parameter to respect false values and correct 12-hour time display.
2026-01-28 16:31:11 -06:00
37942378ae test: add comprehensive unit tests for query builder, CLI args, and config parsing 2026-01-28 14:29:03 -06:00
c445190838 chore: update frontend dependencies to latest versions 2026-01-28 14:24:18 -06:00
57a6a9871f feat: add conditional asset embedding with dev/prod build separation
- Add embed-assets feature flag to make rust-embed/mime_guess optional
- Update Justfile with backend-dev command for development (no embedded assets)
- Add CORS middleware when assets not embedded (for Vite proxy)
- Improve db recipe with Bun script supporting start/reset/rm commands
2026-01-28 13:31:00 -06:00
Ryan Walters
966732a6d2 feat: modernize build tooling and add CI/CD workflow
Switch to Bun for 2-5x faster frontend builds, implement cargo-chef for
reliable Rust dependency caching, and add Biome for fast code
formatting.

Build system improvements:
- Replace pnpm with Bun for frontend package management
- Add cargo-chef to Dockerfile for better Rust build layer caching
- Update all commands to use bun instead of pnpm

Developer experience:
- Add comprehensive Justfile commands (format, format-check, db)
- Implement automated PostgreSQL Docker setup with random port
allocation
- Add stricter checks (--deny warnings on clippy, --all-features flag)

Code quality:
- Add Biome formatter for 10-100x faster TypeScript/JavaScript
formatting
- Add GitHub Actions CI/CD workflow for automated checks
- Update .dockerignore with comprehensive exclusions
- Format all code with cargo fmt (Rust) and Biome (TypeScript)

All changes maintain backward compatibility and can be tested
incrementally.
2025-11-18 18:59:03 -06:00
Ryan Walters
3292d35521 build(docker): copy migrations directory to build context
Ensures database migration files are available during the Docker build process.
2025-11-03 12:07:27 -06:00
Ryan Walters
71ac0782d0 feat(json): enhance error context with debug mode detailed reporting
Improve JSON parsing error messages with build-specific behavior:
- Debug builds: Show full parent object context and type mismatch details
- Release builds: Keep minimal snippets to avoid log spam

Add comprehensive test coverage for error handling and path parsing.
2025-11-03 12:04:20 -06:00
Ryan Walters
1c6d2d4b6e perf: implement batch operations and optimize database indexes
Add batch upsert functionality to reduce database round-trips from N to 1 when inserting courses. Create comprehensive database indexes for common query patterns including term/subject lookups, time-series metrics, and job scheduling. Remove redundant indexes and add monitoring guidance for BRIN index effectiveness.
2025-11-03 11:18:42 -06:00
Ryan Walters
51f8256e61 feat: implement comprehensive retry mechanism and improve observability
Add retry tracking to scrape jobs with configurable max retries (default 5), implement
automatic database migrations on startup, and significantly reduce logging noise from
infrastructure layers. Enhanced tracing with structured spans for better debugging while
keeping output readable by suppressing verbose trace logs from rate limiters and session
management. Improved error handling with detailed retry context and proper session cookie
validation.
2025-11-03 10:18:07 -06:00
Ryan Walters
b1ed2434f8 feat: add ESLint configuration and testing infrastructure
Add comprehensive ESLint setup with React and TypeScript support, create basic integration tests for the shutdown utilities, and enhance the Justfile with a new check command that runs all validation steps (cargo check, clippy, tests, and linting).
2025-11-03 02:21:35 -06:00
Ryan Walters
47c23459f1 refactor: implement comprehensive graceful shutdown across all services
Implements graceful shutdown with broadcast channels and proper timeout handling
for scraper workers, scheduler, bot service, and status update tasks. Introduces
centralized shutdown utilities and improves service manager to handle parallel
shutdown with per-service timeouts instead of shared timeout budgets.

Key changes:
- Add utils module with shutdown helper functions
- Update ScraperService to return errors on shutdown failures
- Refactor scheduler with cancellable work tasks and 5s grace period
- Extract worker shutdown logic into helper methods for clarity
- Add broadcast channel shutdown support to BotService and status task
- Improve ServiceManager to shutdown services in parallel with individual timeouts
2025-11-03 02:10:01 -06:00
Ryan Walters
8af9b0a1a2 refactor(scraper): implement graceful shutdown with broadcast channels
Replace task abortion with broadcast-based graceful shutdown for scheduler and workers. Implement cancellation tokens for in-progress work with 5s timeout. Add tokio-util dependency for CancellationToken support. Update ServiceManager to use completion channels and abort handles for better service lifecycle control.
2025-11-03 01:22:12 -06:00
020a00254f chore: improve database pool connection options, tighter thresholds & limits 2025-09-14 12:18:39 -05:00
45de5be60d refactor: redistribute main.rs into new modules for app & service initialization 2025-09-14 12:18:15 -05:00
8384f418c8 refactor: remove unused/dead code, apply allowances to the rest 2025-09-14 01:57:30 -05:00
3dca896a35 feat(web): add 10 second timeout layer 2025-09-14 01:47:52 -05:00
1b7d2d2824 fix: make version retrieval search current dir, add basic logs, existence check 2025-09-13 22:08:48 -05:00
e370008d75 fix: pass RAILWAY_GIT_COMMIT_SHA through Docker, provide Cargo.toml for frontend (version retrieval) 2025-09-13 22:04:44 -05:00
176574343f fix: provide proper theme-based colors to all elements necessary 2025-09-13 21:57:56 -05:00
91899bb109 fix: limit devtools panel to dev mode 2025-09-13 21:52:14 -05:00
08ae54c093 fix: use wildcard COPY for .git directory, use RAILWAY_GIT_COMMIT_SHA as fallback 2025-09-13 21:20:16 -05:00
33b8681b19 chore: use locale-based number formatting 2025-09-13 21:12:13 -05:00
398a1b9474 feat: dark mode with theme toggle button 2025-09-13 21:11:16 -05:00
a732ff9a15 feat: better frontend state implementation, acquire version in frontend build time 2025-09-13 20:29:18 -05:00
bfcd868337 refactor: proper implementation of services status, better styling/appearance/logic 2025-09-13 19:34:34 -05:00
99f0d0bc49 fix: add build.rs and .git dir to Dockerfile COPY build step, add git dependency 2025-09-13 19:09:27 -05:00
8b7729788d chore: replace template properties 2025-09-13 19:02:01 -05:00
27b0cb877e feat: display project version on frontend 2025-09-13 18:58:35 -05:00
8ec2f7d36f chore: bump version to 0.3.2 2025-09-13 18:52:23 -05:00
28a8a15b6b feat: embed git commit into binary, provide link on frontend 2025-09-13 18:51:48 -05:00
19b3a98f66 feat: setup span recording for CustomJsonFormatter, use 'yansi' for better ANSI terminal colors in CustomPrettyFormatter 2025-09-13 18:40:55 -05:00
b64aa41b14 feat: better profile-based router assembly, tracing layer for responses with span-based request paths 2025-09-13 18:03:20 -05:00
64449e8976 feat: setup pretty frontend for system status 2025-09-13 17:49:35 -05:00
2e0fefa5ee feat: implement interval backoff for presence indicator 2025-09-13 16:15:33 -05:00
97488494fb chore: bump version to 0.3.0 2025-09-13 15:52:40 -05:00
b3322636a9 feat: setup frontend build code, tune .dockerignore patterns
also removed diesel.toml
2025-09-13 15:48:25 -05:00
878cc5f773 docs: setup proper documentation, organize & clean README 2025-09-13 15:27:32 -05:00
94fb6b4190 chore: set banner URL default in config, remove old mentions of redis 2025-09-13 14:48:49 -05:00
e3b638a7d8 feat: add ETag & Cache-Control headers, cached hexadecimal hashes via rapidhash 2025-09-13 13:24:54 -05:00
404a52e64c feat: cache mime types for valid assets, use octet-stream content type 2025-09-13 12:37:36 -05:00
a917315967 fix: simplify asset serving, use fallback primarily 2025-09-13 12:23:27 -05:00
9d51fde893 feat: add arguments for enabling/disabling srevices 2025-09-13 12:06:10 -05:00
79fc931077 refactor: remove 'auto' mode, just specify value via constant for better clap visibility 2025-09-13 11:38:43 -05:00
f3861a60c4 chore: add dev-release helper profile into Cargo.toml 2025-09-13 11:34:25 -05:00
26b1a88860 chore: use clippy by default for check command, fix lint 2025-09-13 11:31:09 -05:00
27ac9a7302 feat: add formatter CLI argument, setup asset embedding in release mode 2025-09-13 11:30:57 -05:00
1d345ed247 chore: customize bacon, add 'dev' job 2025-09-13 11:30:23 -05:00
6f831f5fa6 feat: setup web/ for tanstack router frontend 2025-09-13 11:30:11 -05:00
ac2638dd9a feat: implement proper SIGTERM handling for container shutdown 2025-09-13 09:43:47 -05:00
cfb847f2e5 feat: holiday exclusion logic for ICS command 2025-09-13 02:20:27 -05:00
e7d47f1f96 feat: implement ICS command 2025-09-13 01:50:18 -05:00
9a48587479 chore: drop redis 2025-09-13 01:49:47 -05:00
624247ee14 feat: basic activity status 2025-09-13 01:04:46 -05:00
430e2a255b fix: avoid crashing due to odd url parse 2025-09-13 01:01:49 -05:00
bbc78131ec feat: setup recoverable/unrecoverable job error distinction, delete unrecoverable jobs 2025-09-13 00:48:11 -05:00
77ab71d4d5 feat: map RAILWAY_DEPLOYMENT_DRAINING_SECONDS to SHUTDOWN_TIMEOUT 2025-09-13 00:36:11 -05:00
9d720bb0a7 feat: implement common job trait & better interface for scheduler & workers 2025-09-13 00:17:53 -05:00
dcc564dee6 fix: credit_hour_session is optional 2025-09-12 23:50:36 -05:00
4ca55a1fd4 feat: schedule & query jobs efficiently in batches 2025-09-12 23:41:27 -05:00
a6e7adcaef fix: improve json error handling, make email_address optional 2025-09-12 23:36:07 -05:00
752c855dec chore: drop env prefixed config vars 2025-09-12 22:39:32 -05:00
14b02df8f4 feat: much better JSON logging, project-wide logging improvements, better use of debug/trace levels, field attributes 2025-09-12 22:01:14 -05:00
00cb209052 fix: disable poor error snippet 2025-09-12 21:40:07 -05:00
dfc05a2789 feat: setup rate limiter middleware & config 2025-09-12 21:12:06 -05:00
fe798e1867 fix: avoid COPY of non existent dir, add .dockerignore 2025-09-12 20:57:33 -05:00
39688f800f chore: update Dockerfile rust to 1.89.0 2025-09-12 20:53:24 -05:00
b2b4bb67f0 chore: rustfmt 2025-09-12 20:52:07 -05:00
e5d8cec2d6 refactor: reorganize banner api files, fix clippy lints, reformat 2025-09-12 20:50:47 -05:00
e9a0558535 feat: asynchronous, rate limited term session acquisition 2025-09-12 20:35:12 -05:00
353c36bcf2 feat: 'search' example binary 2025-09-12 20:12:41 -05:00
2f853a7de9 feat: middleware headers, fix concurrent session cookies issue, middleware headers, invalid session details 2025-09-12 20:12:12 -05:00
dd212c3239 chore: update dependencies, add sqlx 'macros', add futures, add 'http' (explicit) 2025-09-12 20:11:13 -05:00
8ff3a18c3e feat: Dockerfile 2025-09-01 00:47:26 -05:00
43647096e9 feat: scraper system 2025-09-01 00:46:38 -05:00
1bdbd1d6d6 chore: remove unused dependencies 2025-09-01 00:26:20 -05:00
23be6035ed feat: much better, smarter session acquisition 2025-08-31 15:34:49 -05:00
139e4aa635 feat: translate over to sqlx, remove diesel 2025-08-31 15:34:49 -05:00
677bb05b87 chore: update & sort dependencies, add sqlx, remove 'migrations' 2025-08-29 12:52:46 -05:00
f2bd02c970 chore: add bacon config 2025-08-29 12:10:57 -05:00
8cdf969a53 feat: command logging, explicit builtin command error handler 2025-08-29 12:10:57 -05:00
4764d48ac9 feat: move scraper into separate module, begin building data models 2025-08-29 11:07:46 -05:00
e734e40347 feat: setup diesel & schema, course with metrics/audit tables 2025-08-27 18:57:43 -05:00
c7117f14a3 feat: smart day string, terse refactor and use types properly, work on unimplemented commands lightly, util modules, 2025-08-27 13:46:41 -05:00
cb8a595326 chore: solve lints, improve formatting 2025-08-27 12:43:43 -05:00
ac70306c04 feat: improve logging, solve lints, improve implementations, remove unused code, standardize things 2025-08-27 12:43:43 -05:00
9972357cf6 feat: implement simple web service, improve ServiceManager encapsulation 2025-08-27 11:58:57 -05:00
234 changed files with 31473 additions and 2012 deletions
+11
View File
@@ -0,0 +1,11 @@
# cargo-audit configuration
# https://github.com/rustsec/rustsec/tree/main/cargo-audit
[advisories]
# Transitive dependencies we can't control
ignore = [
# rsa: Marvin Attack timing sidechannel (via sqlx-mysql, no fix available)
"RUSTSEC-2023-0071",
# derivative: unmaintained (via poise)
"RUSTSEC-2024-0388",
]
+2
View File
@@ -0,0 +1,2 @@
[env]
TS_RS_EXPORT_DIR = { value = "web/src/lib/bindings/", relative = true }
+51
View File
@@ -0,0 +1,51 @@
# Build artifacts
target/
**/target/
# Documentation
README.md
docs/
*.md
# Old Go codebase
go/
# Development configuration
bacon.toml
.env
.env.*
!.env.example
# CI/CD
.github/
.git/
# Development tools
Justfile
rust-toolchain.toml
# Frontend build artifacts and cache
web/node_modules/
web/dist/
web/.vite/
web/.tanstack/
web/.vscode/
# IDE and editor files
.vscode/
.idea/
*.swp
*.swo
*~
# OS files
.DS_Store
Thumbs.db
# Test coverage
coverage/
*.profdata
*.profraw
# SQLx offline mode (include this in builds)
!.sqlx/
+34
View File
@@ -0,0 +1,34 @@
{
"$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json",
"changelog-sections": [
{ "type": "feat", "section": "Features" },
{ "type": "fix", "section": "Bug Fixes" },
{ "type": "perf", "section": "Performance Improvements" },
{ "type": "refactor", "section": "Code Refactoring" },
{ "type": "docs", "section": "Documentation" },
{ "type": "ci", "section": "Continuous Integration" },
{ "type": "build", "section": "Build System" },
{ "type": "chore", "section": "Miscellaneous" },
{ "type": "style", "section": "Styles", "hidden": true },
{ "type": "test", "section": "Tests", "hidden": true }
],
"bump-minor-pre-major": true,
"always-update": true,
"bump-patch-for-minor-pre-major": true,
"include-v-in-tag": true,
"include-component-in-tag": false,
"plugins": ["sentence-case"],
"packages": {
".": {
"release-type": "rust",
"exclude-paths": [".vscode", "docs"],
"extra-files": [
{
"type": "toml",
"path": "Cargo.lock",
"jsonpath": "$.package[?(@.name=='banner')].version"
}
]
}
}
}
+3
View File
@@ -0,0 +1,3 @@
{
".": "0.6.1"
}
+185
View File
@@ -0,0 +1,185 @@
name: CI
on:
push:
branches: [master]
pull_request:
branches: [master]
env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1
jobs:
rust-quality:
name: Rust Quality
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable
with:
components: rustfmt, clippy
- name: Cache Rust dependencies
uses: Swatinem/rust-cache@v2
with:
cache-on-failure: true
- name: Check formatting
run: |
if [ "${{ github.event_name }}" = "pull_request" ]; then
cargo fmt --all -- --check
else
cargo fmt --all -- --check || echo "::warning::Rust formatting issues found (not failing on push)"
fi
- name: Clippy
run: cargo clippy --no-default-features -- -D warnings
frontend-quality:
name: Frontend Quality
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Bun
uses: oven-sh/setup-bun@v2
with:
bun-version: latest
- name: Install dependencies
working-directory: web
run: bun install --frozen-lockfile
- name: Check formatting
working-directory: web
run: |
if [ "${{ github.event_name }}" = "pull_request" ]; then
bun run format:check
else
bun run format:check || echo "::warning::Frontend formatting issues found (not failing on push)"
fi
- name: Lint
working-directory: web
run: bun run lint
- name: Type check
working-directory: web
run: bun run typecheck
rust-tests:
name: Rust Tests
runs-on: ubuntu-latest
services:
postgres:
image: postgres:17-alpine
env:
POSTGRES_USER: banner
POSTGRES_PASSWORD: banner
POSTGRES_DB: banner
ports:
- 5432:5432
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
env:
DATABASE_URL: postgresql://banner:banner@localhost:5432/banner
steps:
- uses: actions/checkout@v4
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable
- name: Cache Rust dependencies
uses: Swatinem/rust-cache@v2
with:
cache-on-failure: true
- name: Run tests
run: cargo test --no-default-features
frontend-tests:
name: Frontend Tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Bun
uses: oven-sh/setup-bun@v2
with:
bun-version: latest
- name: Install dependencies
working-directory: web
run: bun install --frozen-lockfile
- name: Run tests
working-directory: web
run: bun run test
docker-build:
name: Docker Build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build image
uses: docker/build-push-action@v6
with:
context: .
push: false
cache-from: type=gha
cache-to: type=gha,mode=max
security:
name: Security Scan
runs-on: ubuntu-latest
permissions:
contents: read
security-events: write
steps:
- uses: actions/checkout@v4
- name: Install cargo-audit
uses: taiki-e/install-action@cargo-audit
- name: Rust security audit
run: cargo audit
- name: Setup Bun
uses: oven-sh/setup-bun@v2
with:
bun-version: latest
- name: Install frontend dependencies
working-directory: web
run: bun install --frozen-lockfile
- name: Frontend security audit
working-directory: web
run: bun audit --audit-level=moderate
continue-on-error: true
- name: Trivy filesystem scan
uses: aquasecurity/trivy-action@master
with:
scan-type: fs
scan-ref: .
format: sarif
output: trivy-results.sarif
severity: CRITICAL,HIGH
exit-code: 0
- name: Upload Trivy results
uses: github/codeql-action/upload-sarif@v4
if: always() && hashFiles('trivy-results.sarif') != ''
with:
sarif_file: trivy-results.sarif
+27
View File
@@ -0,0 +1,27 @@
name: Release Please
on:
workflow_dispatch:
workflow_run:
workflows: ["CI"]
types:
- completed
branches:
- master
permissions:
contents: write
pull-requests: write
jobs:
release-please:
name: Create Release PR
runs-on: ubuntu-latest
if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }}
steps:
- uses: googleapis/release-please-action@v4
with:
token: ${{ secrets.RELEASE_PLEASE_TOKEN }}
config-file: .github/release-please-config.json
manifest-file: .github/release-please-manifest.json
Vendored
+3 -1
View File
@@ -1,3 +1,5 @@
.env
/target
/go/
/scripts/node_modules
+3
View File
@@ -0,0 +1,3 @@
{
"rust-analyzer.check.command": "clippy"
}
+141
View File
@@ -0,0 +1,141 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
## [0.6.1](https://github.com/Xevion/Banner/compare/v0.6.0...v0.6.1) (2026-01-31)
### Features
* **build:** Auto-regenerate TypeScript bindings on source changes ([e203e8e](https://github.com/Xevion/Banner/commit/e203e8e182f7a0b0224a8f9e6bf79d15259215a2))
* **course:** Distinguish async from synchronous online courses ([8bfc14e](https://github.com/Xevion/Banner/commit/8bfc14e55c1bdf5acc2006096476e0b1eb1b7cc6))
* **scraper:** Improve dashboard clarity with stat tooltips ([1ad614d](https://github.com/Xevion/Banner/commit/1ad614dad03d3631a8d119203786718c814e72c7))
* **scraper:** Improve results visibility and loading states ([c533768](https://github.com/Xevion/Banner/commit/c53376836238f3aca92ac82cd5fd59a077bcceff))
### Bug Fixes
* Avoid status flickering on subjects table ([2689587](https://github.com/Xevion/Banner/commit/2689587dd53c572a65eeb91f74c737662e1f148b))
* **ci:** Add postgres container service for rust tests ([ebb7a97](https://github.com/Xevion/Banner/commit/ebb7a97c113fa1d4b61b8637dfe97cae5260075c))
* **ci:** Fix rust/frontend/security job failures and expand local checks ([dd148e0](https://github.com/Xevion/Banner/commit/dd148e08a0b6d5b7afe4ff614d7d6e4e4d0dfce6))
* **data:** Decode HTML entities in course titles and instructor names ([7d2255a](https://github.com/Xevion/Banner/commit/7d2255a988a23f6e1b1c8e7cb5a8ead833ad34da))
* **metrics:** Always emit baseline metrics on initial course insertion ([16039e0](https://github.com/Xevion/Banner/commit/16039e02a999c668d4969a43eb9ed1d4e8d370e1))
### Code Refactoring
* **terms:** Move term formatting from frontend to backend ([cbb0a51](https://github.com/Xevion/Banner/commit/cbb0a51bca9e4e0d6a8fcee90465c93943f2a30e))
* Use friendly term codes in URL query parameters ([550401b](https://github.com/Xevion/Banner/commit/550401b85ceb8a447e316209b479c69062c5b658))
### Continuous Integration
* Add Release Please automation for changelog and version management ([6863ee5](https://github.com/Xevion/Banner/commit/6863ee58d0a5778303af1b7626b2a9eda3043ca0))
* Split quality checks into parallel jobs with security scanning ([3494341](https://github.com/Xevion/Banner/commit/3494341e3fbe9ffd96b6fcd8abbe7f95ecec6f45))
### Miscellaneous
* Add ts-rs generated bindings ([2df0ba0](https://github.com/Xevion/Banner/commit/2df0ba0ec58155d73830a66132cb635dc819e8a9))
* Update frontend packages ([acccaa5](https://github.com/Xevion/Banner/commit/acccaa54d4455500db60d1b6437cad1c592445f1))
## [Unreleased]
## [0.6.0] - 2026-01-30
### Added
- User authentication system with Discord OAuth, sessions, admin roles, and login page with FAQ.
- Interactive timeline visualization with D3 canvas, pan/zoom, touch gestures, and enrollment aggregation API.
- Scraper analytics dashboard with timeseries charts, subject monitoring, and per-subject detail views.
- Adaptive scraper scheduling with admin endpoints for monitoring and configuration.
- Scrape job result persistence for effectiveness tracking.
- WebSocket support for real-time scrape job monitoring with connection status indicators.
- Course change auditing with field-level tracking and time-series metrics endpoint.
- Audit log UI with smart JSON diffing, conditional request caching, and auto-refresh.
- Calendar export web endpoints for ICS download and Google Calendar redirect.
- Confidence-based RMP matching with manual review workflow and admin instructor UI.
- RMP profile links and confidence-aware rating display.
- Name parsing and normalization for improved instructor-RMP matching.
- Mobile touch controls with gesture detection for timeline.
- Worker timeout protection and crash recovery for job queue.
- Build-time asset compression with encoding negotiation (gzip, brotli, zstd).
- Smart page transitions with theme-aware element transitions.
- Search duration and result count feedback.
- Root error page handling.
- Login page with FAQ section and improved styling.
### Changed
- Consolidated navigation with top nav bar and route groups.
- Centralized number formatting with locale-aware utility.
- Modernized Justfile commands and simplified service management.
- Persisted audit log state in module scope for cross-navigation caching.
- Relative time feedback and improved tooltip customization.
### Fixed
- Instructor/course mismatching via build-order-independent map for association.
- Page content clipping.
- Backend startup delays with retry logic in auth.
- Banner API timeouts increased to handle slow responses.
- i64 serialization for JavaScript compatibility, fixing avatar URL display.
- Frontend build ordering with `-e` embed flag in Justfile.
- Login page centering and unnecessary scrollbar.
- ts-rs serde warnings.
## [0.5.0] - 2026-01-29
### Added
- Multi-select subject filtering with searchable comboboxes.
- Smart instructor name abbreviation for compact table display.
- Delivery mode indicators and tooltips in location column.
- Page selector dropdown with animated pagination controls.
- FLIP animations for smooth table row transitions during pagination.
- Time tooltip with detailed meeting schedule and day abbreviations.
- Reusable SimpleTooltip component for consistent UI hints.
### Changed
- Consolidated query logic and eliminated N+1 instructor loads via batch fetching.
- Consolidated menu snippets and strengthened component type safety.
- Enhanced table scrolling with OverlayScrollbars and theme-aware styling.
- Eliminated initial theme flash on page load.
## [0.4.0] - 2026-01-28
### Added
- Web-based course search UI with interactive data table, multi-column sorting, and column visibility controls.
- TypeScript type bindings generated from Rust types via ts-rs.
- RateMyProfessors integration: bulk professor sync via GraphQL and inline rating display in search results.
- Course detail expansion panel with enrollment, meeting times, and instructor info.
- OverlayScrollbars integration for styled, theme-aware scrollable areas.
- Pagination component for navigating large search result sets.
- Footer component with version display.
- API endpoints: `/api/courses/search`, `/api/courses/:term/:crn`, `/api/terms`, `/api/subjects`, `/api/reference/:category`.
- Frontend API client with typed request/response handling and test coverage.
- Course formatting utilities with comprehensive unit tests.
## [0.3.4] - 2026-01
### Added
- Live service status tracking on web dashboard with auto-refresh and health indicators.
- DB operation extraction for improved testability.
- Unit test suite foundation covering core functionality.
- Docker support for PostgreSQL development environment.
- ICS calendar export with comprehensive holiday exclusion coverage.
- Google Calendar link generation with recurrence rules and meeting details.
- Job queue with priority-based scheduling for background scraping.
- Rate limiting with burst allowance for Banner API requests.
- Session management and caching for Banner API interactions.
- Discord bot commands: search, terms, ics, gcal.
- Intelligent scraping system with priority queues and retry tracking.
### Changed
- Type consolidation and dead code removal across the codebase.
Generated
+1332 -510
View File
File diff suppressed because it is too large Load Diff
+64 -25
View File
@@ -1,32 +1,71 @@
[package]
name = "banner"
version = "0.1.0"
version = "0.6.1"
edition = "2024"
default-run = "banner"
[features]
default = ["embed-assets"]
embed-assets = ["dep:rust-embed", "dep:mime_guess"]
[dependencies]
tokio = { version = "1.47.1", features = ["full"] }
axum = "0.8.4"
serenity = { version = "0.12.4", features = ["rustls_backend"] }
reqwest = { version = "0.12.23", features = ["json", "cookies"] }
diesel = { version = "2.2.12", features = ["chrono", "postgres", "uuid"] }
redis = { version = "0.32.5", features = ["tokio-comp"] }
figment = { version = "0.10.19", features = ["toml", "env"] }
serde_json = "1.0.143"
serde = { version = "1.0.219", features = ["derive"] }
governor = "0.10.1"
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }
dotenvy = "0.15.7"
poise = "0.6.1"
async-trait = "0.1"
fundu = "2.0.1"
anyhow = "1.0.99"
thiserror = "2.0.16"
chrono = { version = "0.4", features = ["serde"] }
chrono-tz = "0.8"
rand = "0.8"
regex = "1.10"
url = "2.5"
async-trait = "0.1"
axum = { version = "0.8.4", features = ["ws"] }
bitflags = { version = "2.9.4", features = ["serde"] }
chrono = { version = "0.4.42", features = ["serde"] }
compile-time = "0.2.0"
time = "0.3.41"
bitflags = { version = "2.9.3", features = ["serde"] }
cookie = "0.18.1"
dashmap = "6.1.0"
dotenvy = "0.15.7"
figment = { version = "0.10.19", features = ["toml", "env"] }
fundu = "2.0.1"
futures = "0.3"
http = "1.3.1"
poise = "0.6.1"
rand = "0.9.2"
regex = "1.10"
reqwest = { version = "0.12.23", features = ["json", "cookies"] }
reqwest-middleware = { version = "0.4.2", features = ["json"] }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.143"
serenity = { version = "0.12.4", features = ["rustls_backend"] }
sqlx = { version = "0.8.6", features = [
"runtime-tokio-rustls",
"postgres",
"chrono",
"json",
"macros",
"migrate",
] }
thiserror = "2.0.16"
time = "0.3.43"
tokio = { version = "1.47.1", features = ["full"] }
tokio-util = "0.7"
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.20", features = ["env-filter", "json"] }
url = "2.5"
governor = "0.10.1"
serde_path_to_error = "0.1.17"
num-format = "0.4.4"
tower-http = { version = "0.6.0", features = ["cors", "trace", "timeout", "compression-full"] }
rust-embed = { version = "8.0", features = ["include-exclude"], optional = true }
mime_guess = { version = "2.0", optional = true }
clap = { version = "4.5", features = ["derive"] }
rapidhash = "4.1.0"
yansi = "1.0.1"
extension-traits = "2"
ts-rs = { version = "11.1.0", features = ["chrono-impl", "serde-compat", "serde-json-impl", "no-serde-warnings"] }
html-escape = "0.2.13"
axum-extra = { version = "0.12.5", features = ["query"] }
urlencoding = "2.1.3"
chrono-tz = "0.10.4"
htmlize = { version = "1.0.6", features = ["unescape"] }
unicode-normalization = "0.1.25"
[dev-dependencies]
# A 'release mode' profile that compiles quickly, but still 'appears' like a release build, useful for debugging
[profile.dev-release]
inherits = "dev"
debug-assertions = false
+119
View File
@@ -0,0 +1,119 @@
# Build arguments
ARG RUST_VERSION=1.89.0
ARG RAILWAY_GIT_COMMIT_SHA
# --- Frontend Build Stage ---
FROM oven/bun:1 AS frontend-builder
WORKDIR /app
# Install zstd for pre-compression
RUN apt-get update && apt-get install -y --no-install-recommends zstd && rm -rf /var/lib/apt/lists/*
# Copy backend Cargo.toml for build-time version retrieval
COPY ./Cargo.toml ./
# Copy frontend package files
COPY ./web/package.json ./web/bun.lock* ./
# Install dependencies
RUN bun install --frozen-lockfile
# Copy frontend source code
COPY ./web ./
# Build frontend, then pre-compress static assets (gzip, brotli, zstd)
RUN bun run build && bun run scripts/compress-assets.ts
# --- Chef Base Stage ---
FROM lukemathwalker/cargo-chef:latest-rust-${RUST_VERSION} AS chef
WORKDIR /app
# --- Planner Stage ---
FROM chef AS planner
COPY Cargo.toml Cargo.lock ./
COPY build.rs ./
COPY src ./src
# Migrations & .sqlx specifically left out to avoid invalidating cache
RUN cargo chef prepare --recipe-path recipe.json --bin banner
# --- Rust Build Stage ---
FROM chef AS builder
# Set build-time environment variable for Railway Git commit SHA
ARG RAILWAY_GIT_COMMIT_SHA
ENV RAILWAY_GIT_COMMIT_SHA=${RAILWAY_GIT_COMMIT_SHA}
# Copy recipe from planner and build dependencies only
COPY --from=planner /app/recipe.json recipe.json
RUN cargo chef cook --release --recipe-path recipe.json --bin banner
# Install build dependencies for final compilation
RUN apt-get update && apt-get install -y \
pkg-config \
libssl-dev \
git \
&& rm -rf /var/lib/apt/lists/*
# Copy source code and built frontend assets
COPY Cargo.toml Cargo.lock ./
COPY build.rs ./
COPY .git* ./
COPY src ./src
COPY migrations ./migrations
COPY --from=frontend-builder /app/dist ./web/dist
# Build web app with embedded assets
RUN cargo build --release --bin banner
# Strip the binary to reduce size
RUN strip target/release/banner
# --- Runtime Stage ---
FROM debian:12-slim
ARG APP=/usr/src/app
ARG APP_USER=appuser
ARG UID=1000
ARG GID=1000
# Install runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
tzdata \
wget \
&& rm -rf /var/lib/apt/lists/*
ARG TZ=Etc/UTC
ENV TZ=${TZ}
# Create user with specific UID/GID
RUN addgroup --gid $GID $APP_USER \
&& adduser --uid $UID --disabled-password --gecos "" --ingroup $APP_USER $APP_USER \
&& mkdir -p ${APP}
# Copy application binary
COPY --from=builder --chown=$APP_USER:$APP_USER /app/target/release/banner ${APP}/banner
# Set proper permissions
RUN chmod +x ${APP}/banner
USER $APP_USER
WORKDIR ${APP}
# Build-time arg for PORT, default to 8000
ARG PORT=8000
# Runtime environment var for PORT, default to build-time arg
ENV PORT=${PORT}
EXPOSE ${PORT}
# Add health check
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD wget --no-verbose --tries=1 --spider http://localhost:${PORT}/health || exit 1
# Can be explicitly overriden with different hosts & ports
ENV HOSTS=0.0.0.0,[::]
# Implicitly uses PORT environment variable
# Runs all services: web, bot, and scraper
CMD ["sh", "-c", "exec ./banner"]
+46
View File
@@ -0,0 +1,46 @@
set dotenv-load
default:
just --list
# Run all checks in parallel. Pass -f/--fix to auto-format and fix first.
check *flags:
bun scripts/check.ts {{flags}}
# Format all Rust and TypeScript code
format:
cargo fmt --all
bun run --cwd web format
# Run tests. Usage: just test [rust|web|<nextest filter args>]
test *args:
bun scripts/test.ts {{args}}
# Generate TypeScript bindings from Rust types (ts-rs)
bindings:
bun scripts/bindings.ts
# Run the Banner API search demo (hits live UTSA API, ~20s)
search *ARGS:
cargo run -q --bin search -- {{ARGS}}
# Dev server. Flags: -f(rontend) -b(ackend) -W(no-watch) -n(o-build) -r(elease) -e(mbed) --tracing <fmt>
# Pass args to binary after --: just dev -n -- --some-flag
dev *flags:
bun scripts/dev.ts {{flags}}
# Production build. Flags: -d(ebug) -f(rontend-only) -b(ackend-only)
build *flags:
bun scripts/build.ts {{flags}}
# Start PostgreSQL in Docker and update .env with connection string
# Commands: start (default), reset, rm
db cmd="start":
bun scripts/db.ts {{cmd}}
alias b := bun
bun *ARGS:
cd web && bun {{ ARGS }}
sql *ARGS:
lazysql ${DATABASE_URL}
+33 -108
View File
@@ -1,125 +1,50 @@
# banner
A discord bot for executing queries & searches on the Ellucian Banner instance hosting all of UTSA's class data.
A complex multi-service system providing a Discord bot and browser-based interface to UTSA's course data.
## Feature Wishlist
## Services
- Commands
- ICS Download (get a ICS download of your classes with location & timing perfectly - set for every class you're in)
- Classes Now (find classes happening)
- Autocomplete
- Class Title
- Course Number
- Term/Part of Term
- Professor
- Attribute
- Component Pagination
- RateMyProfessor Integration (Linked/Embedded)
- Smart term selection (i.e. Summer 2024 will be selected automatically when opened)
- Rate Limiting (bursting with global/user limits)
- DMs Integration (allow usage of the bot in DMs)
- Class Change Notifications (get notified when details about a class change)
- Multi-term Querying (currently the backend for searching is kinda weird)
- Full Autocomplete for Every Search Option
- Metrics, Log Query, Privileged Error Feedback
- Search for Classes
- Major, Professor, Location, Name, Time of Day
- Subscribe to Classes
- Availability (seat, pre-seat)
- Waitlist Movement
- Detail Changes (meta, time, location, seats, professor)
- `time` Start, End, Days of Week
- `seats` Any change in seat/waitlist data
- `meta`
- Lookup via Course Reference Number (CRN)
- Smart Time of Day Handling
- "2 PM" -> Start within 2:00 PM to 2:59 PM
- "2-3 PM" -> Start within 2:00 PM to 3:59 PM
- "ends by 2 PM" -> Ends within 12:00 AM to 2:00 PM
- "after 2 PM" -> Start within 2:01 PM to 11:59 PM
- "before 2 PM" -> Ends within 12:00 AM to 1:59 PM
- Get By Section Command
- CS 4393 001 =>
- Will require SQL to be able to search for a class by its section number
The application consists of three modular services that can be run independently or together:
## Analysis Required
- Discord Bot ([`bot`][src-bot])
Some of the features and architecture of Ellucian's Banner system are not clear.
The follow features, JSON, and more require validation & analysis:
- Primary interface for course monitoring and data queries
- Built with [Serenity][serenity] and [Poise][poise] frameworks for robust command handling
- Uses slash commands with comprehensive error handling and logging
- Struct Nullability
- Much of the responses provided by Ellucian contain nulls, and most of them are uncertain as to when and why they're null.
- Analysis must be conducted to be sure of when to use a string and when it should nillable (pointer).
- Multiple Professors / Primary Indicator
- Multiple Meeting Times
- Meeting Schedule Types
- AFF vs AIN vs AHB etc.
- Do CRNs repeat between years?
- Check whether partOfTerm is always filled in, and it's meaning for various class results.
- Check which API calls are affected by change in term/sessionID term select
- SessionIDs
- How long does a session ID work?
- Do I really require a separate one per term?
- How many can I activate, are there any restrictions?
- How should session IDs be checked as 'invalid'?
- What action(s) keep a session ID 'active', if any?
- Are there any courses with multiple meeting times?
- Google Calendar link generation, as an alternative to ICS file generation
- Web Server ([`web`][src-web])
## Change Identification
- [Axum][axum]-based server with Vite/React-based frontend
- [Embeds static assets][rust-embed] at compile time with E-Tags & Cache-Control headers
- Important attributes of a class will be parsed on both the old and new data.
- These attributes will be compared and given identifiers that can be subscribed to.
- When a user subscribes to one of these identifiers, any changes identified will be sent to the user.
- Scraper ([`scraper`][src-scraper])
## Real-time Suggestions
- Intelligent data collection system with priority-based queuing inside PostgreSQL via [`sqlx`][sqlx]
- Rate-limited scraping with burst handling to respect UTSA's systems
- Handles course data updates, availability changes, and metadata synchronization
Various commands arguments have the ability to have suggestions appear.
## Quick Start
- They must be fast. As ephemeral suggestions that are only relevant for seconds or less, they need to be delivered in less than a second.
- They need to be easy to acquire. With as many commands & arguments to search as I do, it is paramount that the API be easy to understand & use.
- It cannot be complicated. I only have so much time to develop this.
- It does not need to be persistent. Since the data is scraped and rolled periodically from the Banner system, the data used will be deleted and re-requested occasionally.
```bash
bun install --cwd web # Install frontend dependencies
cargo build # Build the backend
For these reasons, I believe SQLite to be the ideal place for this data to be stored.
It is exceptionally fast, works well in-memory, and is less complicated compared to most other solutions.
just dev # Runs auto-reloading dev build with all services
just dev-build # Development build with release characteristics (frontend is embedded, non-auto-reloading)
- Only required data about the class will be stored, along with the JSON-encoded string.
- For now, this would only be the CRN (and possibly the Term).
- Potentially, a binary encoding could be used for performance, but it is unlikely to be better.
- Database dumping into R2 would be good to ensure that over-scraping of the Banner system does not occur.
- Upon a safe close requested
- Must be done quickly (<8 seconds)
- Every 30 minutes, if any scraping ocurred.
- May cause locking of commands.
just build # Production build that embeds assets
```
## Scraping
## Documentation
In order to keep the in-memory database of the bot up-to-date with the Banner system, the API must be scraped.
Scraping will be separated by major to allow for priority majors (namely, Computer Science) to be scraped more often compared to others.
This will lower the overall load on the Banner system while ensuring that data presented by the app is still relevant.
Comprehensive documentation is available in the [`docs/`][documentation] folder.
For now, all majors will be scraped fully every 4 hours with at least 5 minutes between each one.
- On startup, priority majors will be scraped first (if required).
- Other majors will be scraped in arbitrary order (if required).
- Scrape timing will be stored in Redis.
- CRNs will be the Primary Key within SQLite
- If CRNs are duplicated between terms, then the primary key will be (CRN, Term)
Considerations
- Change in metadata should decrease the interval
- The number of courses scraped should change the interval (2 hours per 500 courses involved)
## Rate Limiting, Costs & Bursting
Ideally, this application would implement dynamic rate limiting to ensure overload on the server does not occur.
Better, it would also ensure that priority requests (commands) are dispatched faster than background processes (scraping), while making sure different requests are weighted differently.
For example, a recent scrape of 350 classes should be weighted 5x more than a search for 8 classes by a user.
Still, even if the cap does not normally allow for this request to be processed immediately, the small user search should proceed with a small bursting cap.
The requirements to this hypothetical system would be:
- Conditional Bursting: background processes or other requests deemed "low priority" are not allowed to use bursting.
- Arbitrary Costs: rate limiting is considered in the form of the request size/speed more or less, such that small simple requests can be made more frequently, unlike large requests.
[documentation]: docs/README.md
[src-bot]: src/bot
[src-web]: src/web
[src-scraper]: src/scraper
[serenity]: https://github.com/serenity-rs/serenity
[poise]: https://github.com/serenity-rs/poise
[axum]: https://github.com/tokio-rs/axum
[rust-embed]: https://lib.rs/crates/rust-embed
[sqlx]: https://github.com/launchbadge/sqlx
+52
View File
@@ -0,0 +1,52 @@
# This is a configuration file for the bacon tool
#
# Complete help on configuration: https://dystroy.org/bacon/config/
#
# You may check the current default at
# https://github.com/Canop/bacon/blob/main/defaults/default-bacon.toml
default_job = "check"
env.CARGO_TERM_COLOR = "always"
[jobs.check]
command = ["cargo", "check", "--all-targets"]
need_stdout = false
[jobs.clippy]
command = ["cargo", "clippy", "--all-targets"]
need_stdout = false
[jobs.test]
command = [
"cargo", "nextest", "run",
]
need_stdout = true
analyzer = "nextest"
[jobs.run]
command = [
"cargo", "run",
]
need_stdout = true
allow_warnings = true
background = false
on_change_strategy = "kill_then_restart"
# kill = ["pkill", "-TERM", "-P"]'
[jobs.dev]
command = [
"just", "dev"
]
need_stdout = true
allow_warnings = true
background = false
on_change_strategy = "kill_then_restart"
# You may define here keybindings that would be specific to
# a project, for example a shortcut to launch a specific job.
# Shortcuts to internal functions (scrolling, toggling, etc.)
# should go in your personal global prefs.toml file instead.
[keybindings]
c = "job:clippy" # comment this to have 'c' run clippy on only the default target
shift-c = "job:check"
d = "job:dev"
+36
View File
@@ -0,0 +1,36 @@
use std::process::Command;
fn main() {
// Try to get Git commit hash from Railway environment variable first
let git_hash = std::env::var("RAILWAY_GIT_COMMIT_SHA").unwrap_or_else(|_| {
// Fallback to git command if not on Railway
let output = Command::new("git").args(["rev-parse", "HEAD"]).output();
match output {
Ok(output) => {
if output.status.success() {
String::from_utf8_lossy(&output.stdout).trim().to_string()
} else {
"unknown".to_string()
}
}
Err(_) => "unknown".to_string(),
}
});
// Get the short hash (first 7 characters)
let short_hash = if git_hash != "unknown" && git_hash.len() >= 7 {
git_hash[..7].to_string()
} else {
git_hash.clone()
};
// Set the environment variables that will be available at compile time
println!("cargo:rustc-env=GIT_COMMIT_HASH={}", git_hash);
println!("cargo:rustc-env=GIT_COMMIT_SHORT={}", short_hash);
// Rebuild if the Git commit changes (only works when .git directory is available)
if std::path::Path::new(".git/HEAD").exists() {
println!("cargo:rerun-if-changed=.git/HEAD");
println!("cargo:rerun-if-changed=.git/refs/heads");
}
}
+117
View File
@@ -0,0 +1,117 @@
# Architecture
## System Overview
The Banner project is built as a multi-service application with the following components:
- **Discord Bot Service**: Handles Discord interactions and commands (Serenity/Poise)
- **Web Service**: Axum HTTP server serving the SvelteKit frontend and REST API endpoints
- **Scraper Service**: Background data collection and synchronization with job queue
- **Database Layer**: PostgreSQL 17 for persistent storage (SQLx with compile-time verification)
- **RateMyProfessors Client**: GraphQL-based bulk sync of professor ratings
### Frontend Stack
- **SvelteKit** with Svelte 5 runes (`$state`, `$derived`, `$effect`)
- **Tailwind CSS v4** via `@tailwindcss/vite`
- **bits-ui** for headless UI primitives (comboboxes, tooltips, dropdowns)
- **TanStack Table** for interactive data tables with sorting and column control
- **OverlayScrollbars** for styled, theme-aware scrollable areas
- **ts-rs** generates TypeScript type bindings from Rust structs
### API Endpoints
| Endpoint | Description |
|---|---|
| `GET /api/health` | Health check |
| `GET /api/status` | Service status, version, and commit hash |
| `GET /api/metrics` | Basic metrics |
| `GET /api/courses/search` | Paginated course search with filters (term, subject, query, open-only, sort) |
| `GET /api/courses/:term/:crn` | Single course detail with instructors and RMP ratings |
| `GET /api/terms` | Available terms from reference cache |
| `GET /api/subjects?term=` | Subjects for a term, ordered by enrollment |
| `GET /api/reference/:category` | Reference data lookups (campuses, instructional methods, etc.) |
## Technical Analysis
### Banner System Integration
Some of the features and architecture of Ellucian's Banner system are not clear.
The following features, JSON, and more require validation & analysis:
- Struct Nullability
- Much of the responses provided by Ellucian contain nulls, and most of them are uncertain as to when and why they're null.
- Analysis must be conducted to be sure of when to use a string and when it should nillable (pointer).
- Multiple Professors / Primary Indicator
- Multiple Meeting Times
- Meeting Schedule Types
- AFF vs AIN vs AHB etc.
- Do CRNs repeat between years?
- Check whether partOfTerm is always filled in, and it's meaning for various class results.
- Check which API calls are affected by change in term/sessionID term select
- SessionIDs
- How long does a session ID work?
- Do I really require a separate one per term?
- How many can I activate, are there any restrictions?
- How should session IDs be checked as 'invalid'?
- What action(s) keep a session ID 'active', if any?
- Are there any courses with multiple meeting times?
- Google Calendar link generation, as an alternative to ICS file generation
## Change Identification
- Important attributes of a class will be parsed on both the old and new data.
- These attributes will be compared and given identifiers that can be subscribed to.
- When a user subscribes to one of these identifiers, any changes identified will be sent to the user.
## Real-time Suggestions
Various commands arguments have the ability to have suggestions appear.
- They must be fast. As ephemeral suggestions that are only relevant for seconds or less, they need to be delivered in less than a second.
- They need to be easy to acquire. With as many commands & arguments to search as I do, it is paramount that the API be easy to understand & use.
- It cannot be complicated. I only have so much time to develop this.
- It does not need to be persistent. Since the data is scraped and rolled periodically from the Banner system, the data used will be deleted and re-requested occasionally.
For these reasons, I believe PostgreSQL to be the ideal place for this data to be stored.
It is exceptionally fast, works well in-memory, and is less complicated compared to most other solutions.
- Only required data about the class will be stored, along with the JSON-encoded string.
- For now, this would only be the CRN (and possibly the Term).
- Potentially, a binary encoding could be used for performance, but it is unlikely to be better.
- Database dumping into R2 would be good to ensure that over-scraping of the Banner system does not occur.
- Upon a safe close requested
- Must be done quickly (<8 seconds)
- Every 30 minutes, if any scraping ocurred.
- May cause locking of commands.
## Scraping System
In order to keep the in-memory database of the bot up-to-date with the Banner system, the API must be scraped.
Scraping will be separated by major to allow for priority majors (namely, Computer Science) to be scraped more often compared to others.
This will lower the overall load on the Banner system while ensuring that data presented by the app is still relevant.
For now, all majors will be scraped fully every 4 hours with at least 5 minutes between each one.
- On startup, priority majors will be scraped first (if required).
- Other majors will be scraped in arbitrary order (if required).
- Scrape timing will be stored in database.
- CRNs will be the Primary Key within database
- If CRNs are duplicated between terms, then the primary key will be (CRN, Term)
Considerations
- Change in metadata should decrease the interval
- The number of courses scraped should change the interval (2 hours per 500 courses involved)
## Rate Limiting, Costs & Bursting
Ideally, this application would implement dynamic rate limiting to ensure overload on the server does not occur.
Better, it would also ensure that priority requests (commands) are dispatched faster than background processes (scraping), while making sure different requests are weighted differently.
For example, a recent scrape of 350 classes should be weighted 5x more than a search for 8 classes by a user.
Still, even if the cap does not normally allow for this request to be processed immediately, the small user search should proceed with a small bursting cap.
The requirements to this hypothetical system would be:
- Conditional Bursting: background processes or other requests deemed "low priority" are not allowed to use bursting.
- Arbitrary Costs: rate limiting is considered in the form of the request size/speed more or less, such that small simple requests can be made more frequently, unlike large requests.
+8 -2
View File
@@ -1,11 +1,17 @@
# Sessions
# Banner
All notes on the internal workings of the Banner system by Ellucian.
## Sessions
All notes on the internal workings of Sessions in the Banner system.
- Sessions are generated on demand with a random string of characters.
- The format `{5 random characters}{milliseconds since epoch}`
- Example: ``
- Sessions are invalidated after 30 minutes, but may change.
- This delay can be found in the original HTML returned, find `meta[name="maxInactiveInterval"]` and read the `content` attribute.
- This is read at runtime by the javascript on initialization.
- This is read at runtime (in the browser, by javascript) on initialization.
- Multiple timers exist, one is for the Inactivity Timer.
- A dialog will appear asking the user to continue their session.
- If they click the button, the session will be extended via the keepAliveURL (see `meta[name="keepAliveURL"]`).
+43
View File
@@ -0,0 +1,43 @@
# Documentation
This folder contains detailed documentation for the Banner project. This file acts as the index.
## Files
- [`CHANGELOG.md`](CHANGELOG.md) - Notable changes by version
- [`ROADMAP.md`](ROADMAP.md) - Planned features and priorities
- [`BANNER.md`](BANNER.md) - General API documentation on the Banner system
- [`ARCHITECTURE.md`](ARCHITECTURE.md) - Technical implementation details, system design, and analysis
## Samples
The `samples/` folder contains real Banner API response examples:
- `search/` - Course search API responses with various filters
- [`searchResults.json`](samples/search/searchResults.json)
- [`searchResults_500.json`](samples/search/searchResults_500.json)
- [`searchResults_CS500.json`](samples/search/searchResults_CS500.json)
- [`searchResults_malware.json`](samples/search/searchResults_malware.json)
- `meta/` - Metadata API responses (terms, subjects, instructors, etc.)
- [`get_attribute.json`](samples/meta/get_attribute.json)
- [`get_campus.json`](samples/meta/get_campus.json)
- [`get_instructionalMethod.json`](samples/meta/get_instructionalMethod.json)
- [`get_instructor.json`](samples/meta/get_instructor.json)
- [`get_partOfTerm.json`](samples/meta/get_partOfTerm.json)
- [`get_subject.json`](samples/meta/get_subject.json)
- [`getTerms.json`](samples/meta/getTerms.json)
- `course/` - Course detail API responses (HTML and JSON)
- [`getFacultyMeetingTimes.json`](samples/course/getFacultyMeetingTimes.json)
- [`getClassDetails.html`](samples/course/getClassDetails.html)
- [`getCorequisites.html`](samples/course/getCorequisites.html)
- [`getCourseDescription.html`](samples/course/getCourseDescription.html)
- [`getEnrollmentInfo.html`](samples/course/getEnrollmentInfo.html)
- [`getFees.html`](samples/course/getFees.html)
- [`getLinkedSections.html`](samples/course/getLinkedSections.html)
- [`getRestrictions.html`](samples/course/getRestrictions.html)
- [`getSectionAttributes.html`](samples/course/getSectionAttributes.html)
- [`getSectionBookstoreDetails.html`](samples/course/getSectionBookstoreDetails.html)
- [`getSectionPrerequisites.html`](samples/course/getSectionPrerequisites.html)
- [`getXlistSections.html`](samples/course/getXlistSections.html)
These samples are used for development, testing, and understanding the Banner API structure.
+44
View File
@@ -0,0 +1,44 @@
# Roadmap
## Now
- **Discord bot revival** - Audit and fix all existing commands (search, terms, ics, gcal) against the current data model. Add test coverage. Bot has been untouched since ~0.3.4 and commands may be broken.
- **Notification and subscription system** - Subscribe to courses and get alerts on seat availability, waitlist movement, and detail changes (time, location, professor, seats). Deliver via Discord bot and web dashboard.
- **Mobile/responsive redesign** - Hamburger nav for sidebar, responsive table column hiding, mobile-friendly admin pages. Timeline is the only area with solid mobile support; most pages need work.
- **Professor name search filter** - Filter search results by instructor. Backend code exists but is commented out.
- **Search field autocomplete** - Typeahead for course titles, course numbers, professors, and terms.
- **Large component extraction** - Break down CourseTable, Instructors page, and TimelineCanvas into smaller, testable subcomponents.
## Soon
- **Bot slash command parity** - Keep Discord bot commands in sync with web features: timeline summaries, RMP lookups, audit log highlights, notification management via bot.
- **E2E test suite** - Playwright tests for critical user flows: search, login, admin pages, timeline interaction.
- **Settings page** - Replace placeholder with theme preferences, notification settings, default term/subject selection.
- **Profile enhancements** - Expand from read-only stub to subscription management, saved searches, and course watchlists.
- **Smart time-of-day search parsing** - Support natural queries like "2 PM", "ends by 2 PM", "after 2 PM" mapped to time ranges.
- **Multi-term querying** - Query across multiple terms in a single search instead of one at a time.
- **Historical analytics visualization** - Build trend UI on top of existing course metrics and timeline API. Fill-rate charts per course or professor.
- **Schedule builder** - Visual weekly schedule tool for assembling a conflict-free course lineup. Timeline visualization serves as a foundation.
## Eventually
- **API rate limiting** - Rate limiter on public API endpoints. Needed before any public or external exposure.
- **Bulk admin operations** - Batch RMP match/reject, bulk user management, data export from admin pages.
- **Degree audit helper** - Map available courses to degree requirements and suggest what to take next.
- **DM support** - Allow the Discord bot to respond in direct messages, not just guild channels.
- **"Classes Now" command** - Find classes currently in session based on the current day and time.
- **Privileged error feedback** - Detailed error information surfaced to bot admins when commands fail.
## Done
- **Interactive timeline visualization** - D3 canvas with pan/zoom, touch gestures, and enrollment aggregation API. *(0.6.0)*
- **Scraper analytics dashboard** - Timeseries charts, subject monitoring, adaptive scheduling, and admin endpoints. *(0.6.0)*
- **WebSocket job monitoring** - Real-time scrape job queue with live connection status indicators. *(0.6.0)*
- **Course change audit log** - Field-level change tracking with smart diffing, conditional caching, and auto-refresh. *(0.6.0)*
- **User authentication system** - Discord OAuth, sessions, admin roles, and login page. *(0.6.0)*
- **Dynamic scraper scheduling** - Adaptive scrape intervals based on change frequency and course volume. *(0.6.0)*
- **Metrics dashboard** - Scraper and service metrics surfaced on the web dashboard. *(0.6.0)*
- **Subject/major search filter** - Multi-select subject filtering with searchable comboboxes. *(0.5.0)*
- **Web course search UI** - Browser-based course search with interactive data table, sorting, pagination, and column controls. *(0.4.0)*
- **RateMyProfessor integration** - Bulk professor sync via GraphQL with inline ratings in search results. *(0.4.0)*
- **Test coverage expansion** - Unit tests for course formatting, API client, query builder, CLI args, and config parsing. *(0.3.4--0.4.0)*
@@ -0,0 +1,56 @@
-- Drop all old tables
DROP TABLE IF EXISTS scrape_jobs;
DROP TABLE IF EXISTS course_metrics;
DROP TABLE IF EXISTS course_audits;
DROP TABLE IF EXISTS courses;
-- Enums for scrape_jobs
CREATE TYPE scrape_priority AS ENUM ('Low', 'Medium', 'High', 'Critical');
CREATE TYPE target_type AS ENUM ('Subject', 'CourseRange', 'CrnList', 'SingleCrn');
-- Main course data table
CREATE TABLE courses (
id SERIAL PRIMARY KEY,
crn VARCHAR NOT NULL,
subject VARCHAR NOT NULL,
course_number VARCHAR NOT NULL,
title VARCHAR NOT NULL,
term_code VARCHAR NOT NULL,
enrollment INTEGER NOT NULL,
max_enrollment INTEGER NOT NULL,
wait_count INTEGER NOT NULL,
wait_capacity INTEGER NOT NULL,
last_scraped_at TIMESTAMPTZ NOT NULL,
UNIQUE(crn, term_code)
);
-- Time-series data for course enrollment
CREATE TABLE course_metrics (
id SERIAL PRIMARY KEY,
course_id INTEGER NOT NULL REFERENCES courses(id) ON DELETE CASCADE,
timestamp TIMESTAMPTZ NOT NULL,
enrollment INTEGER NOT NULL,
wait_count INTEGER NOT NULL,
seats_available INTEGER NOT NULL
);
-- Audit trail for changes to course data
CREATE TABLE course_audits (
id SERIAL PRIMARY KEY,
course_id INTEGER NOT NULL REFERENCES courses(id) ON DELETE CASCADE,
timestamp TIMESTAMPTZ NOT NULL,
field_changed VARCHAR NOT NULL,
old_value TEXT NOT NULL,
new_value TEXT NOT NULL
);
-- Job queue for the scraper
CREATE TABLE scrape_jobs (
id SERIAL PRIMARY KEY,
target_type target_type NOT NULL,
target_payload JSONB NOT NULL,
priority scrape_priority NOT NULL,
execute_at TIMESTAMPTZ NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
locked_at TIMESTAMPTZ
);
@@ -0,0 +1,3 @@
-- Add retry tracking columns to scrape_jobs table
ALTER TABLE scrape_jobs ADD COLUMN retry_count INTEGER NOT NULL DEFAULT 0 CHECK (retry_count >= 0);
ALTER TABLE scrape_jobs ADD COLUMN max_retries INTEGER NOT NULL DEFAULT 5 CHECK (max_retries >= 0);
@@ -0,0 +1,45 @@
-- Performance optimization indexes
-- Index for term-based queries (most common access pattern)
CREATE INDEX IF NOT EXISTS idx_courses_term_code ON courses(term_code);
-- Index for subject-based filtering
CREATE INDEX IF NOT EXISTS idx_courses_subject ON courses(subject);
-- Composite index for subject + term queries
CREATE INDEX IF NOT EXISTS idx_courses_subject_term ON courses(subject, term_code);
-- Index for course number lookups
CREATE INDEX IF NOT EXISTS idx_courses_course_number ON courses(course_number);
-- Index for last scraped timestamp (useful for finding stale data)
CREATE INDEX IF NOT EXISTS idx_courses_last_scraped ON courses(last_scraped_at);
-- Index for course metrics time-series queries
-- BRIN index is optimal for time-series data
CREATE INDEX IF NOT EXISTS idx_course_metrics_timestamp ON course_metrics USING BRIN(timestamp);
-- B-tree index for specific course metric lookups
CREATE INDEX IF NOT EXISTS idx_course_metrics_course_timestamp
ON course_metrics(course_id, timestamp DESC);
-- Partial index for pending scrape jobs (only unlocked jobs)
CREATE INDEX IF NOT EXISTS idx_scrape_jobs_pending
ON scrape_jobs(execute_at ASC)
WHERE locked_at IS NULL;
-- Index for high-priority job processing
CREATE INDEX IF NOT EXISTS idx_scrape_jobs_priority_pending
ON scrape_jobs(priority DESC, execute_at ASC)
WHERE locked_at IS NULL;
-- Index for retry tracking
CREATE INDEX IF NOT EXISTS idx_scrape_jobs_retry_count
ON scrape_jobs(retry_count)
WHERE retry_count > 0 AND locked_at IS NULL;
-- Analyze tables to update statistics
ANALYZE courses;
ANALYZE course_metrics;
ANALYZE course_audits;
ANALYZE scrape_jobs;
@@ -0,0 +1,53 @@
-- Index Optimization Follow-up Migration
-- Reason: Redundant with composite index idx_courses_subject_term
DROP INDEX IF EXISTS idx_courses_subject;
-- Remove: idx_scrape_jobs_retry_count
DROP INDEX IF EXISTS idx_scrape_jobs_retry_count;
-- Purpose: Optimize the scheduler's frequent query (runs every 60 seconds)
CREATE INDEX IF NOT EXISTS idx_scrape_jobs_scheduler_lookup
ON scrape_jobs(target_type, target_payload)
WHERE locked_at IS NULL;
-- Note: We use (target_type, target_payload) instead of including locked_at
-- in the index columns because:
-- 1. The WHERE clause filters locked_at IS NULL (partial index optimization)
-- 2. target_payload is JSONB and already large; keeping it as an indexed column
-- allows PostgreSQL to use index-only scans for the SELECT target_payload query
-- 3. This design minimizes index size while maximizing query performance
-- Purpose: Enable efficient audit trail queries by course
CREATE INDEX IF NOT EXISTS idx_course_audits_course_timestamp
ON course_audits(course_id, timestamp DESC);
-- Purpose: Enable queries like "Show all changes in the last 24 hours"
CREATE INDEX IF NOT EXISTS idx_course_audits_timestamp
ON course_audits(timestamp DESC);
-- The BRIN index on course_metrics(timestamp) assumes data is inserted in
-- chronological order. BRIN indexes are only effective when data is physically
-- ordered on disk. If you perform:
-- - Backfills of historical data
-- - Out-of-order inserts
-- - Frequent UPDATEs that move rows
--
-- Then the BRIN index effectiveness will degrade. Monitor with:
-- SELECT * FROM brin_page_items(get_raw_page('idx_course_metrics_timestamp', 1));
--
-- If you see poor selectivity, consider:
-- 1. REINDEX to rebuild after bulk loads
-- 2. Switch to B-tree if inserts are not time-ordered
-- 3. Use CLUSTER to physically reorder the table (requires downtime)
COMMENT ON INDEX idx_course_metrics_timestamp IS
'BRIN index - requires chronologically ordered inserts for efficiency. Monitor selectivity.';
-- Update statistics for query planner
ANALYZE courses;
ANALYZE course_metrics;
ANALYZE course_audits;
ANALYZE scrape_jobs;
@@ -0,0 +1,83 @@
-- ============================================================
-- Expand courses table with rich Banner API fields
-- ============================================================
-- Section identifiers
ALTER TABLE courses ADD COLUMN sequence_number VARCHAR;
ALTER TABLE courses ADD COLUMN part_of_term VARCHAR;
-- Schedule & delivery (store codes, descriptions come from reference_data)
ALTER TABLE courses ADD COLUMN instructional_method VARCHAR;
ALTER TABLE courses ADD COLUMN campus VARCHAR;
-- Credit hours
ALTER TABLE courses ADD COLUMN credit_hours INTEGER;
ALTER TABLE courses ADD COLUMN credit_hour_low INTEGER;
ALTER TABLE courses ADD COLUMN credit_hour_high INTEGER;
-- Cross-listing
ALTER TABLE courses ADD COLUMN cross_list VARCHAR;
ALTER TABLE courses ADD COLUMN cross_list_capacity INTEGER;
ALTER TABLE courses ADD COLUMN cross_list_count INTEGER;
-- Section linking
ALTER TABLE courses ADD COLUMN link_identifier VARCHAR;
ALTER TABLE courses ADD COLUMN is_section_linked BOOLEAN;
-- JSONB columns for 1-to-many data
ALTER TABLE courses ADD COLUMN meeting_times JSONB NOT NULL DEFAULT '[]'::jsonb;
ALTER TABLE courses ADD COLUMN attributes JSONB NOT NULL DEFAULT '[]'::jsonb;
-- ============================================================
-- Full-text search support
-- ============================================================
-- Generated tsvector for word-based search on title
ALTER TABLE courses ADD COLUMN title_search tsvector
GENERATED ALWAYS AS (to_tsvector('simple', coalesce(title, ''))) STORED;
CREATE INDEX idx_courses_title_search ON courses USING GIN (title_search);
-- Trigram index for substring/ILIKE search on title
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE INDEX idx_courses_title_trgm ON courses USING GIN (title gin_trgm_ops);
-- ============================================================
-- New filter indexes
-- ============================================================
CREATE INDEX idx_courses_instructional_method ON courses(instructional_method);
CREATE INDEX idx_courses_campus ON courses(campus);
-- Composite for "open CS courses in Fall 2024" pattern
CREATE INDEX idx_courses_term_subject_avail ON courses(term_code, subject, max_enrollment, enrollment);
-- ============================================================
-- Instructors table (normalized, deduplicated)
-- ============================================================
CREATE TABLE instructors (
banner_id VARCHAR PRIMARY KEY,
display_name VARCHAR NOT NULL,
email VARCHAR
);
CREATE TABLE course_instructors (
course_id INTEGER NOT NULL REFERENCES courses(id) ON DELETE CASCADE,
instructor_id VARCHAR NOT NULL REFERENCES instructors(banner_id) ON DELETE CASCADE,
is_primary BOOLEAN NOT NULL DEFAULT false,
PRIMARY KEY (course_id, instructor_id)
);
CREATE INDEX idx_course_instructors_instructor ON course_instructors(instructor_id);
-- ============================================================
-- Reference data table (all code→description lookups)
-- ============================================================
CREATE TABLE reference_data (
category VARCHAR NOT NULL,
code VARCHAR NOT NULL,
description VARCHAR NOT NULL,
PRIMARY KEY (category, code)
);
@@ -0,0 +1,17 @@
-- RMP professor data (bulk synced from RateMyProfessors)
CREATE TABLE rmp_professors (
legacy_id INTEGER PRIMARY KEY,
graphql_id VARCHAR NOT NULL,
first_name VARCHAR NOT NULL,
last_name VARCHAR NOT NULL,
department VARCHAR,
avg_rating REAL,
avg_difficulty REAL,
num_ratings INTEGER NOT NULL DEFAULT 0,
would_take_again_pct REAL,
last_synced_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Link Banner instructors to RMP professors
ALTER TABLE instructors ADD COLUMN rmp_legacy_id INTEGER REFERENCES rmp_professors(legacy_id);
ALTER TABLE instructors ADD COLUMN rmp_match_status VARCHAR NOT NULL DEFAULT 'pending';
@@ -0,0 +1,7 @@
-- Add queued_at column to track when a job last entered the "ready to pick up" state.
-- For fresh jobs this equals execute_at; for retried jobs it is updated to NOW().
ALTER TABLE scrape_jobs
ADD COLUMN queued_at TIMESTAMPTZ NOT NULL DEFAULT NOW();
-- Backfill existing rows: set queued_at = execute_at (best approximation)
UPDATE scrape_jobs SET queued_at = execute_at;
@@ -0,0 +1,19 @@
CREATE TABLE users (
discord_id BIGINT PRIMARY KEY,
discord_username TEXT NOT NULL,
discord_avatar_hash TEXT,
is_admin BOOLEAN NOT NULL DEFAULT false,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE TABLE user_sessions (
id TEXT PRIMARY KEY,
user_id BIGINT NOT NULL REFERENCES users(discord_id) ON DELETE CASCADE,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
expires_at TIMESTAMPTZ NOT NULL,
last_active_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX idx_user_sessions_user_id ON user_sessions(user_id);
CREATE INDEX idx_user_sessions_expires_at ON user_sessions(expires_at);
@@ -0,0 +1,80 @@
-- Collapse instructors from per-banner-id rows to per-person rows (deduped by lowercased email).
-- All existing RMP matches are deliberately dropped; the new auto-matcher will re-score from scratch.
-- 1. Create the new instructors table (1 row per person, keyed by email)
CREATE TABLE instructors_new (
id SERIAL PRIMARY KEY,
display_name VARCHAR NOT NULL,
email VARCHAR NOT NULL,
rmp_professor_id INTEGER UNIQUE REFERENCES rmp_professors(legacy_id),
rmp_match_status VARCHAR NOT NULL DEFAULT 'unmatched',
CONSTRAINT instructors_email_unique UNIQUE (email)
);
-- 2. Populate from existing data, deduplicating by lowercased email.
-- For each email, pick the display_name from the row with the highest banner_id
-- (deterministic tiebreaker). All rmp fields start fresh (NULL / 'unmatched').
INSERT INTO instructors_new (display_name, email)
SELECT DISTINCT ON (LOWER(email))
display_name,
LOWER(email)
FROM instructors
ORDER BY LOWER(email), banner_id DESC;
-- 3. Create the new course_instructors table with integer FK and banner_id column
CREATE TABLE course_instructors_new (
course_id INTEGER NOT NULL REFERENCES courses(id) ON DELETE CASCADE,
instructor_id INTEGER NOT NULL REFERENCES instructors_new(id) ON DELETE CASCADE,
banner_id VARCHAR NOT NULL,
is_primary BOOLEAN NOT NULL DEFAULT false,
PRIMARY KEY (course_id, instructor_id)
);
-- 4. Populate from old data, mapping old banner_id → new instructor id via lowercased email.
-- Use DISTINCT ON to handle cases where multiple old banner_ids for the same person
-- taught the same course (would cause duplicate (course_id, instructor_id) pairs).
INSERT INTO course_instructors_new (course_id, instructor_id, banner_id, is_primary)
SELECT DISTINCT ON (ci.course_id, inew.id)
ci.course_id,
inew.id,
ci.instructor_id, -- old banner_id
ci.is_primary
FROM course_instructors ci
JOIN instructors iold ON iold.banner_id = ci.instructor_id
JOIN instructors_new inew ON inew.email = LOWER(iold.email)
ORDER BY ci.course_id, inew.id, ci.is_primary DESC;
-- 5. Drop old tables (course_instructors first due to FK dependency)
DROP TABLE course_instructors;
DROP TABLE instructors;
-- 6. Rename new tables into place
ALTER TABLE instructors_new RENAME TO instructors;
ALTER TABLE course_instructors_new RENAME TO course_instructors;
-- 7. Rename constraints to match the final table names
ALTER TABLE instructors RENAME CONSTRAINT instructors_new_pkey TO instructors_pkey;
ALTER TABLE instructors RENAME CONSTRAINT instructors_new_rmp_professor_id_key TO instructors_rmp_professor_id_key;
ALTER TABLE course_instructors RENAME CONSTRAINT course_instructors_new_pkey TO course_instructors_pkey;
-- 8. Recreate indexes
CREATE INDEX idx_course_instructors_instructor ON course_instructors (instructor_id);
CREATE INDEX idx_instructors_rmp_status ON instructors (rmp_match_status);
CREATE INDEX idx_instructors_email ON instructors (email);
-- 9. Create rmp_match_candidates table
CREATE TABLE rmp_match_candidates (
id SERIAL PRIMARY KEY,
instructor_id INTEGER NOT NULL REFERENCES instructors(id) ON DELETE CASCADE,
rmp_legacy_id INTEGER NOT NULL REFERENCES rmp_professors(legacy_id),
score REAL NOT NULL,
score_breakdown JSONB NOT NULL DEFAULT '{}',
status VARCHAR NOT NULL DEFAULT 'pending',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
resolved_at TIMESTAMPTZ,
resolved_by BIGINT REFERENCES users(discord_id),
CONSTRAINT uq_candidate_pair UNIQUE (instructor_id, rmp_legacy_id)
);
CREATE INDEX idx_match_candidates_instructor ON rmp_match_candidates (instructor_id);
CREATE INDEX idx_match_candidates_status ON rmp_match_candidates (status);
@@ -0,0 +1,24 @@
-- Multi-RMP profile support: allow many RMP profiles per instructor.
-- Each RMP profile still links to at most one instructor (rmp_legacy_id UNIQUE).
-- 1. Create junction table
CREATE TABLE instructor_rmp_links (
id SERIAL PRIMARY KEY,
instructor_id INTEGER NOT NULL REFERENCES instructors(id) ON DELETE CASCADE,
rmp_legacy_id INTEGER NOT NULL UNIQUE REFERENCES rmp_professors(legacy_id),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_by BIGINT REFERENCES users(discord_id),
source VARCHAR NOT NULL DEFAULT 'manual' -- 'auto' | 'manual'
);
CREATE INDEX idx_instructor_rmp_links_instructor ON instructor_rmp_links (instructor_id);
-- 2. Migrate existing matches
INSERT INTO instructor_rmp_links (instructor_id, rmp_legacy_id, source)
SELECT id, rmp_professor_id,
CASE rmp_match_status WHEN 'auto' THEN 'auto' ELSE 'manual' END
FROM instructors
WHERE rmp_professor_id IS NOT NULL;
-- 3. Drop old column (and its unique constraint)
ALTER TABLE instructors DROP COLUMN rmp_professor_id;
@@ -0,0 +1,31 @@
-- Scrape job results log: one row per completed (or failed) job for effectiveness tracking.
CREATE TABLE scrape_job_results (
id BIGSERIAL PRIMARY KEY,
target_type target_type NOT NULL,
payload JSONB NOT NULL,
priority scrape_priority NOT NULL,
-- Timing
queued_at TIMESTAMPTZ NOT NULL,
started_at TIMESTAMPTZ NOT NULL,
completed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
duration_ms INT NOT NULL,
-- Outcome
success BOOLEAN NOT NULL,
error_message TEXT,
retry_count INT NOT NULL DEFAULT 0,
-- Effectiveness (NULL when success = false)
courses_fetched INT,
courses_changed INT,
courses_unchanged INT,
audits_generated INT,
metrics_generated INT
);
CREATE INDEX idx_scrape_job_results_target_time
ON scrape_job_results (target_type, completed_at);
CREATE INDEX idx_scrape_job_results_completed
ON scrape_job_results (completed_at);
@@ -0,0 +1,13 @@
-- Indexes for the timeline aggregation endpoint.
-- The query buckets course_metrics by 15-minute intervals, joins to courses
-- for subject, and aggregates enrollment. These indexes support efficient
-- time-range scans and the join.
-- Primary access pattern: scan course_metrics by timestamp range
CREATE INDEX IF NOT EXISTS idx_course_metrics_timestamp
ON course_metrics (timestamp);
-- Composite index for the DISTINCT ON (bucket, course_id) ordered by timestamp DESC
-- to efficiently pick the latest metric per course per bucket.
CREATE INDEX IF NOT EXISTS idx_course_metrics_course_timestamp
ON course_metrics (course_id, timestamp DESC);
@@ -0,0 +1,5 @@
-- Add structured first/last name columns to instructors.
-- Populated by Rust-side backfill (parse_banner_name) since we need
-- HTML entity decoding and suffix extraction that SQL can't handle well.
ALTER TABLE instructors ADD COLUMN first_name VARCHAR;
ALTER TABLE instructors ADD COLUMN last_name VARCHAR;
+32
View File
@@ -0,0 +1,32 @@
/**
* Generate TypeScript bindings from Rust types (ts-rs).
*
* Usage: bun scripts/bindings.ts
*/
import { readdirSync, writeFileSync, rmSync } from "fs";
import { run } from "./lib/proc";
const BINDINGS_DIR = "web/src/lib/bindings";
// Build test binary first (slow part) — fail before deleting anything
run(["cargo", "test", "--no-run"]);
// Clean slate
rmSync(BINDINGS_DIR, { recursive: true, force: true });
// Run the export (fast, already compiled)
run(["cargo", "test", "export_bindings"]);
// Auto-generate index.ts from emitted .ts files
const types = readdirSync(BINDINGS_DIR)
.filter((f) => f.endsWith(".ts") && f !== "index.ts")
.map((f) => f.replace(/\.ts$/, ""))
.sort();
writeFileSync(
`${BINDINGS_DIR}/index.ts`,
types.map((t) => `export type { ${t} } from "./${t}";`).join("\n") + "\n",
);
console.log(`Generated ${BINDINGS_DIR}/index.ts (${types.length} types)`);
+45
View File
@@ -0,0 +1,45 @@
/**
* Production build.
*
* Usage: bun scripts/build.ts [flags]
*
* Flags:
* -d, --debug Debug build instead of release
* -f, --frontend-only Frontend only
* -b, --backend-only Backend only
*/
import { parseFlags, c } from "./lib/fmt";
import { run } from "./lib/proc";
const { flags } = parseFlags(
process.argv.slice(2),
{
debug: "bool",
"frontend-only": "bool",
"backend-only": "bool",
} as const,
{ d: "debug", f: "frontend-only", b: "backend-only" },
{ debug: false, "frontend-only": false, "backend-only": false },
);
if (flags["frontend-only"] && flags["backend-only"]) {
console.error("Cannot use -f and -b together");
process.exit(1);
}
const buildFrontend = !flags["backend-only"];
const buildBackend = !flags["frontend-only"];
const profile = flags.debug ? "debug" : "release";
if (buildFrontend) {
console.log(c("1;36", "→ Building frontend..."));
run(["bun", "run", "--cwd", "web", "build"]);
}
if (buildBackend) {
console.log(c("1;36", `→ Building backend (${profile})...`));
const cmd = ["cargo", "build", "--bin", "banner"];
if (!flags.debug) cmd.push("--release");
run(cmd);
}
+21
View File
@@ -0,0 +1,21 @@
{
"lockfileVersion": 1,
"configVersion": 1,
"workspaces": {
"": {
"name": "banner-scripts",
"devDependencies": {
"@types/bun": "^1.3.8",
},
},
},
"packages": {
"@types/bun": ["@types/bun@1.3.8", "", { "dependencies": { "bun-types": "1.3.8" } }, "sha512-3LvWJ2q5GerAXYxO2mffLTqOzEu5qnhEAlh48Vnu8WQfnmSwbgagjGZV6BoHKJztENYEDn6QmVd949W4uESRJA=="],
"@types/node": ["@types/node@25.1.0", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-t7frlewr6+cbx+9Ohpl0NOTKXZNV9xHRmNOvql47BFJKcEG1CxtxlPEEe+gR9uhVWM4DwhnvTF110mIL4yP9RA=="],
"bun-types": ["bun-types@1.3.8", "", { "dependencies": { "@types/node": "*" } }, "sha512-fL99nxdOWvV4LqjmC+8Q9kW3M4QTtTR1eePs94v5ctGqU8OeceWrSUaRw3JYb7tU3FkMIAjkueehrHPPPGKi5Q=="],
"undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="],
}
}
+241
View File
@@ -0,0 +1,241 @@
/**
* Run all project checks in parallel. Auto-fixes formatting when safe.
*
* Usage: bun scripts/check.ts [--fix|-f]
*/
import { c, elapsed, isStderrTTY } from "./lib/fmt";
import { run, runPiped, spawnCollect, raceInOrder, type CollectResult } from "./lib/proc";
import { existsSync, statSync, readdirSync, writeFileSync, rmSync } from "fs";
const fix = process.argv.includes("--fix") || process.argv.includes("-f");
// ---------------------------------------------------------------------------
// Fix path: format + clippy fix, then fall through to verification
// ---------------------------------------------------------------------------
if (fix) {
console.log(c("1;36", "→ Fixing..."));
run(["cargo", "fmt", "--all"]);
run(["bun", "run", "--cwd", "web", "format"]);
run([
"cargo", "clippy", "--all-features", "--fix", "--allow-dirty", "--allow-staged",
"--", "--deny", "warnings",
]);
console.log(c("1;36", "→ Verifying..."));
}
// ---------------------------------------------------------------------------
// Ensure TypeScript bindings are up-to-date before frontend checks
// ---------------------------------------------------------------------------
{
const BINDINGS_DIR = "web/src/lib/bindings";
let newestSrcMtime = 0;
for (const file of new Bun.Glob("src/**/*.rs").scanSync(".")) {
const mt = statSync(file).mtimeMs;
if (mt > newestSrcMtime) newestSrcMtime = mt;
}
for (const f of ["Cargo.toml", "Cargo.lock"]) {
if (existsSync(f)) {
const mt = statSync(f).mtimeMs;
if (mt > newestSrcMtime) newestSrcMtime = mt;
}
}
let newestBindingMtime = 0;
if (existsSync(BINDINGS_DIR)) {
for (const file of new Bun.Glob("**/*").scanSync(BINDINGS_DIR)) {
const mt = statSync(`${BINDINGS_DIR}/${file}`).mtimeMs;
if (mt > newestBindingMtime) newestBindingMtime = mt;
}
}
const stale = newestBindingMtime === 0 || newestSrcMtime > newestBindingMtime;
if (stale) {
const t = Date.now();
process.stdout.write(
c("1;36", "→ Regenerating TypeScript bindings (Rust sources changed)...") + "\n",
);
run(["cargo", "test", "--no-run"]);
rmSync(BINDINGS_DIR, { recursive: true, force: true });
run(["cargo", "test", "export_bindings"]);
const types = readdirSync(BINDINGS_DIR)
.filter((f) => f.endsWith(".ts") && f !== "index.ts")
.map((f) => f.replace(/\.ts$/, ""))
.sort();
writeFileSync(
`${BINDINGS_DIR}/index.ts`,
types.map((t) => `export type { ${t} } from "./${t}";`).join("\n") + "\n",
);
process.stdout.write(c("32", "✓ bindings") + ` (${elapsed(t)}s, ${types.length} types)\n`);
} else {
process.stdout.write(c("2", "· bindings up-to-date, skipped") + "\n");
}
}
// ---------------------------------------------------------------------------
// Check definitions
// ---------------------------------------------------------------------------
interface Check {
name: string;
cmd: string[];
hint?: string;
}
const checks: Check[] = [
{
name: "rustfmt",
cmd: ["cargo", "fmt", "--all", "--", "--check"],
hint: "Run 'cargo fmt --all' to see and fix formatting issues.",
},
{ name: "clippy", cmd: ["cargo", "clippy", "--all-features", "--", "--deny", "warnings"] },
{ name: "cargo-check", cmd: ["cargo", "check", "--all-features"] },
{ name: "rust-test", cmd: ["cargo", "nextest", "run", "-E", "not test(export_bindings)"] },
{ name: "svelte-check", cmd: ["bun", "run", "--cwd", "web", "check"] },
{ name: "biome", cmd: ["bun", "run", "--cwd", "web", "format:check"] },
{ name: "biome-lint", cmd: ["bun", "run", "--cwd", "web", "lint"] },
{ name: "web-test", cmd: ["bun", "run", "--cwd", "web", "test"] },
{ name: "actionlint", cmd: ["actionlint"] },
];
// ---------------------------------------------------------------------------
// Domain groups: formatter → { peers, format command, sanity rechecks }
// ---------------------------------------------------------------------------
const domains: Record<
string,
{
peers: string[];
format: () => ReturnType<typeof runPiped>;
recheck: Check[];
}
> = {
rustfmt: {
peers: ["clippy", "cargo-check", "rust-test"],
format: () => runPiped(["cargo", "fmt", "--all"]),
recheck: [
{ name: "rustfmt", cmd: ["cargo", "fmt", "--all", "--", "--check"] },
{ name: "cargo-check", cmd: ["cargo", "check", "--all-features"] },
],
},
biome: {
peers: ["svelte-check", "biome-lint", "web-test"],
format: () => runPiped(["bun", "run", "--cwd", "web", "format"]),
recheck: [
{ name: "biome", cmd: ["bun", "run", "--cwd", "web", "format:check"] },
{ name: "svelte-check", cmd: ["bun", "run", "--cwd", "web", "check"] },
],
},
};
// ---------------------------------------------------------------------------
// Phase 1: run all checks in parallel, display in completion order
// ---------------------------------------------------------------------------
const start = Date.now();
const remaining = new Set(checks.map((ch) => ch.name));
const promises = checks.map(async (check) => ({
...check,
...(await spawnCollect(check.cmd, start)),
}));
const interval = isStderrTTY
? setInterval(() => {
process.stderr.write(`\r\x1b[K${elapsed(start)}s [${Array.from(remaining).join(", ")}]`);
}, 100)
: null;
const results: Record<string, Check & CollectResult> = {};
await raceInOrder(promises, checks, (r) => {
results[r.name] = r;
remaining.delete(r.name);
if (isStderrTTY) process.stderr.write("\r\x1b[K");
if (r.exitCode !== 0) {
process.stdout.write(c("31", `${r.name}`) + ` (${r.elapsed}s)\n`);
if (r.hint) {
process.stdout.write(c("2", ` ${r.hint}`) + "\n");
} else {
if (r.stdout) process.stdout.write(r.stdout);
if (r.stderr) process.stderr.write(r.stderr);
}
} else {
process.stdout.write(c("32", `${r.name}`) + ` (${r.elapsed}s)\n`);
}
});
if (interval) clearInterval(interval);
if (isStderrTTY) process.stderr.write("\r\x1b[K");
// ---------------------------------------------------------------------------
// Phase 2: auto-fix formatting if it's the only failure in its domain
// ---------------------------------------------------------------------------
const autoFixedDomains = new Set<string>();
for (const [fmtName, domain] of Object.entries(domains)) {
const fmtResult = results[fmtName];
if (!fmtResult || fmtResult.exitCode === 0) continue;
if (!domain.peers.every((p) => results[p]?.exitCode === 0)) continue;
process.stdout.write(
"\n" +
c("1;36", `→ Auto-formatting ${fmtName} (peers passed, only formatting failed)...`) +
"\n",
);
const fmtOut = domain.format();
if (fmtOut.exitCode !== 0) {
process.stdout.write(c("31", `${fmtName} formatter failed`) + "\n");
if (fmtOut.stdout) process.stdout.write(fmtOut.stdout);
if (fmtOut.stderr) process.stderr.write(fmtOut.stderr);
continue;
}
const recheckStart = Date.now();
const recheckPromises = domain.recheck.map(async (ch) => ({
...ch,
...(await spawnCollect(ch.cmd, recheckStart)),
}));
let recheckFailed = false;
await raceInOrder(recheckPromises, domain.recheck, (r) => {
if (r.exitCode !== 0) {
recheckFailed = true;
process.stdout.write(c("31", `${r.name}`) + ` (${r.elapsed}s)\n`);
if (r.stdout) process.stdout.write(r.stdout);
if (r.stderr) process.stderr.write(r.stderr);
} else {
process.stdout.write(c("32", `${r.name}`) + ` (${r.elapsed}s)\n`);
}
});
if (!recheckFailed) {
process.stdout.write(c("32", `${fmtName} auto-fix succeeded`) + "\n");
autoFixedDomains.add(fmtName);
} else {
process.stdout.write(c("31", `${fmtName} auto-fix failed sanity check`) + "\n");
}
}
// ---------------------------------------------------------------------------
// Final verdict
// ---------------------------------------------------------------------------
const finalFailed = Object.entries(results).some(
([name, r]) => r.exitCode !== 0 && !autoFixedDomains.has(name),
);
if (autoFixedDomains.size > 0 && !finalFailed) {
process.stdout.write(
"\n" + c("1;32", "✓ All checks passed (formatting was auto-fixed)") + "\n",
);
}
process.exit(finalFailed ? 1 : 0);
+79
View File
@@ -0,0 +1,79 @@
/**
* PostgreSQL Docker container management.
*
* Usage: bun scripts/db.ts [start|reset|rm]
*/
import { readFile, writeFile } from "fs/promises";
import { spawnSync } from "child_process";
const NAME = "banner-postgres";
const USER = "banner";
const PASS = "banner";
const DB = "banner";
const PORT = "59489";
const ENV_FILE = ".env";
const cmd = process.argv[2] || "start";
function docker(...args: string[]) {
return spawnSync("docker", args, { encoding: "utf8" });
}
function getContainer() {
const res = docker("ps", "-a", "--filter", `name=^${NAME}$`, "--format", "json");
return res.stdout.trim() ? JSON.parse(res.stdout) : null;
}
async function updateEnv() {
const url = `postgresql://${USER}:${PASS}@localhost:${PORT}/${DB}`;
try {
let content = await readFile(ENV_FILE, "utf8");
content = content.includes("DATABASE_URL=")
? content.replace(/DATABASE_URL=.*$/m, `DATABASE_URL=${url}`)
: content.trim() + `\nDATABASE_URL=${url}\n`;
await writeFile(ENV_FILE, content);
} catch {
await writeFile(ENV_FILE, `DATABASE_URL=${url}\n`);
}
}
function create() {
docker(
"run", "-d", "--name", NAME,
"-e", `POSTGRES_USER=${USER}`,
"-e", `POSTGRES_PASSWORD=${PASS}`,
"-e", `POSTGRES_DB=${DB}`,
"-p", `${PORT}:5432`,
"postgres:17-alpine",
);
console.log("created");
}
const container = getContainer();
if (cmd === "rm") {
if (!container) process.exit(0);
docker("stop", NAME);
docker("rm", NAME);
console.log("removed");
} else if (cmd === "reset") {
if (!container) {
create();
} else {
docker("exec", NAME, "psql", "-U", USER, "-d", "postgres", "-c", `DROP DATABASE IF EXISTS ${DB}`);
docker("exec", NAME, "psql", "-U", USER, "-d", "postgres", "-c", `CREATE DATABASE ${DB}`);
console.log("reset");
}
await updateEnv();
} else {
if (!container) {
create();
} else if (container.State !== "running") {
docker("start", NAME);
console.log("started");
} else {
console.log("running");
}
await updateEnv();
}
+112
View File
@@ -0,0 +1,112 @@
/**
* Dev server orchestrator.
*
* Usage: bun scripts/dev.ts [flags] [-- passthrough-args]
*
* Flags:
* -f, --frontend-only Frontend only (Vite dev server)
* -b, --backend-only Backend only (bacon watch)
* -W, --no-watch Build once + run (no watch)
* -n, --no-build Run last compiled binary (no rebuild)
* -r, --release Use release profile
* -e, --embed Embed assets (implies -b)
* --tracing <fmt> Tracing format (default: pretty)
*/
import { existsSync } from "fs";
import { parseFlags, c } from "./lib/fmt";
import { run, ProcessGroup } from "./lib/proc";
const { flags, passthrough } = parseFlags(
process.argv.slice(2),
{
"frontend-only": "bool",
"backend-only": "bool",
"no-watch": "bool",
"no-build": "bool",
release: "bool",
embed: "bool",
tracing: "string",
} as const,
{ f: "frontend-only", b: "backend-only", W: "no-watch", n: "no-build", r: "release", e: "embed" },
{
"frontend-only": false,
"backend-only": false,
"no-watch": false,
"no-build": false,
release: false,
embed: false,
tracing: "pretty",
},
);
let frontendOnly = flags["frontend-only"];
let backendOnly = flags["backend-only"];
let noWatch = flags["no-watch"];
const noBuild = flags["no-build"];
const release = flags.release;
const embed = flags.embed;
const tracing = flags.tracing as string;
// -e implies -b
if (embed) backendOnly = true;
// -n implies -W
if (noBuild) noWatch = true;
if (frontendOnly && backendOnly) {
console.error("Cannot use -f and -b together (or -e implies -b)");
process.exit(1);
}
const runFrontend = !backendOnly;
const runBackend = !frontendOnly;
const profile = release ? "release" : "dev";
const profileDir = release ? "release" : "debug";
const group = new ProcessGroup();
// Build frontend first when embedding assets
if (embed && !noBuild) {
console.log(c("1;36", "→ Building frontend (for embedding)..."));
run(["bun", "run", "--cwd", "web", "build"]);
}
// Frontend: Vite dev server
if (runFrontend) {
group.spawn(["bun", "run", "--cwd", "web", "dev"]);
}
// Backend
if (runBackend) {
const backendArgs = ["--tracing", tracing, ...passthrough];
const bin = `target/${profileDir}/banner`;
if (noWatch) {
if (!noBuild) {
console.log(c("1;36", `→ Building backend (${profile})...`));
const cargoArgs = ["cargo", "build", "--bin", "banner"];
if (!embed) cargoArgs.push("--no-default-features");
if (release) cargoArgs.push("--release");
run(cargoArgs);
}
if (!existsSync(bin)) {
console.error(`Binary not found: ${bin}`);
console.error(`Run 'just build${release ? "" : " -d"}' first, or remove -n to use bacon.`);
await group.killAll();
process.exit(1);
}
console.log(c("1;36", `→ Running ${bin} (no watch)`));
group.spawn([bin, ...backendArgs]);
} else {
// Bacon watch mode
const baconArgs = ["bacon", "--headless", "run", "--"];
if (!embed) baconArgs.push("--no-default-features");
if (release) baconArgs.push("--profile", "release");
baconArgs.push("--", ...backendArgs);
group.spawn(baconArgs);
}
}
const code = await group.waitForFirst();
process.exit(code);
+96
View File
@@ -0,0 +1,96 @@
/**
* Shared formatting, color, and CLI argument parsing utilities.
*/
const isTTY = process.stdout.isTTY ?? false;
const isStderrTTY = process.stderr.isTTY ?? false;
/** ANSI color wrapper — no-op when stdout is not a TTY. */
export function c(code: string, text: string): string {
return isTTY ? `\x1b[${code}m${text}\x1b[0m` : text;
}
/** Elapsed seconds since `start` as a formatted string. */
export function elapsed(start: number): string {
return ((Date.now() - start) / 1000).toFixed(1);
}
/** Whether stderr is a TTY (for progress spinners). */
export { isStderrTTY };
/**
* Parse short and long CLI flags from a flat argument array.
*
* `spec` maps flag names to their type:
* - `"bool"` — presence sets the value to `true`
* - `"string"` — consumes the next argument as the value
*
* Short flags can be combined: `-fbW` expands to `-f -b -W`.
* Long flags: `--frontend-only`, `--tracing pretty`.
* `--` terminates flag parsing; remaining args go to `passthrough`.
*
* Returns `{ flags, passthrough }`.
*/
export function parseFlags<T extends Record<string, "bool" | "string">>(
argv: string[],
spec: T,
shortMap: Record<string, keyof T>,
defaults: { [K in keyof T]: T[K] extends "bool" ? boolean : string },
): { flags: typeof defaults; passthrough: string[] } {
const flags = { ...defaults };
const passthrough: string[] = [];
let i = 0;
while (i < argv.length) {
const arg = argv[i];
if (arg === "--") {
passthrough.push(...argv.slice(i + 1));
break;
}
if (arg.startsWith("--")) {
const name = arg.slice(2);
if (!(name in spec)) {
console.error(`Unknown flag: ${arg}`);
process.exit(1);
}
if (spec[name] === "string") {
(flags as Record<string, unknown>)[name] = argv[++i] || "";
} else {
(flags as Record<string, unknown>)[name] = true;
}
} else if (arg.startsWith("-") && arg.length > 1) {
for (const ch of arg.slice(1)) {
const mapped = shortMap[ch];
if (!mapped) {
console.error(`Unknown flag: -${ch}`);
process.exit(1);
}
if (spec[mapped as string] === "string") {
(flags as Record<string, unknown>)[mapped as string] = argv[++i] || "";
} else {
(flags as Record<string, unknown>)[mapped as string] = true;
}
}
} else {
console.error(`Unknown argument: ${arg}`);
process.exit(1);
}
i++;
}
return { flags, passthrough };
}
/**
* Simple positional-or-keyword argument parser.
* Returns the first positional arg, or empty string.
*/
export function parseArgs(raw: string): string[] {
return raw
.trim()
.split(/\s+/)
.filter(Boolean);
}
+113
View File
@@ -0,0 +1,113 @@
/**
* Shared process spawning utilities for project scripts.
*/
import { elapsed } from "./fmt";
export interface CollectResult {
stdout: string;
stderr: string;
exitCode: number;
elapsed: string;
}
/** Sync spawn with inherited stdio. Exits process on failure. */
export function run(cmd: string[]): void {
const proc = Bun.spawnSync(cmd, { stdio: ["inherit", "inherit", "inherit"] });
if (proc.exitCode !== 0) process.exit(proc.exitCode);
}
/** Sync spawn with piped stdio. Returns captured output. */
export function runPiped(cmd: string[]): { exitCode: number; stdout: string; stderr: string } {
const proc = Bun.spawnSync(cmd, { stdout: "pipe", stderr: "pipe" });
return {
exitCode: proc.exitCode,
stdout: proc.stdout?.toString() ?? "",
stderr: proc.stderr?.toString() ?? "",
};
}
/**
* Async spawn that collects stdout/stderr. Returns a result object.
* Catches spawn failures (e.g. missing binary) instead of throwing.
*/
export async function spawnCollect(cmd: string[], startTime: number): Promise<CollectResult> {
try {
const proc = Bun.spawn(cmd, {
env: { ...process.env, FORCE_COLOR: "1" },
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr] = await Promise.all([
new Response(proc.stdout).text(),
new Response(proc.stderr).text(),
]);
await proc.exited;
return { stdout, stderr, exitCode: proc.exitCode, elapsed: elapsed(startTime) };
} catch (err) {
return { stdout: "", stderr: String(err), exitCode: 1, elapsed: elapsed(startTime) };
}
}
/**
* Race all promises, yielding results in completion order via callback.
* Spawn failures become results, not unhandled rejections.
*/
export async function raceInOrder<T extends { name: string }>(
promises: Promise<T & CollectResult>[],
fallbacks: T[],
onResult: (r: T & CollectResult) => void,
): Promise<void> {
const tagged = promises.map((p, i) =>
p
.then((r) => ({ i, r }))
.catch((err) => ({
i,
r: {
...fallbacks[i],
exitCode: 1,
stdout: "",
stderr: String(err),
elapsed: "?",
} as T & CollectResult,
})),
);
for (let n = 0; n < promises.length; n++) {
const { i, r } = await Promise.race(tagged);
tagged[i] = new Promise(() => {}); // sentinel: never resolves
onResult(r);
}
}
/** Spawn managed processes with coordinated cleanup on exit. */
export class ProcessGroup {
private procs: ReturnType<typeof Bun.spawn>[] = [];
constructor() {
const cleanup = async () => {
await this.killAll();
process.exit(0);
};
process.on("SIGINT", cleanup);
process.on("SIGTERM", cleanup);
}
spawn(cmd: string[]): ReturnType<typeof Bun.spawn> {
const proc = Bun.spawn(cmd, { stdio: ["inherit", "inherit", "inherit"] });
this.procs.push(proc);
return proc;
}
async killAll(): Promise<void> {
for (const p of this.procs) p.kill();
await Promise.all(this.procs.map((p) => p.exited));
}
/** Wait for any process to exit, kill the rest, return exit code. */
async waitForFirst(): Promise<number> {
const results = this.procs.map((p, i) => p.exited.then((code) => ({ i, code })));
const first = await Promise.race(results);
await this.killAll();
return first.code;
}
}
+8
View File
@@ -0,0 +1,8 @@
{
"name": "banner-scripts",
"private": true,
"type": "module",
"devDependencies": {
"@types/bun": "^1.3.8"
}
}
+20
View File
@@ -0,0 +1,20 @@
/**
* Run project tests.
*
* Usage: bun scripts/test.ts [rust|web|<nextest filter args>]
*/
import { run } from "./lib/proc";
const input = process.argv.slice(2).join(" ").trim();
if (input === "web") {
run(["bun", "run", "--cwd", "web", "test"]);
} else if (input === "rust") {
run(["cargo", "nextest", "run", "-E", "not test(export_bindings)"]);
} else if (input === "") {
run(["cargo", "nextest", "run", "-E", "not test(export_bindings)"]);
run(["bun", "run", "--cwd", "web", "test"]);
} else {
run(["cargo", "nextest", "run", ...input.split(/\s+/)]);
}
+15
View File
@@ -0,0 +1,15 @@
{
"compilerOptions": {
"target": "ESNext",
"module": "ESNext",
"moduleResolution": "bundler",
"strict": true,
"noEmit": true,
"skipLibCheck": true,
"types": ["bun-types"],
"paths": {
"#lib/*": ["./lib/*"]
}
},
"include": ["**/*.ts"]
}
+199
View File
@@ -0,0 +1,199 @@
use crate::banner::BannerApi;
use crate::cli::ServiceName;
use crate::config::Config;
use crate::scraper::ScraperService;
use crate::services::bot::BotService;
use crate::services::manager::ServiceManager;
use crate::services::web::WebService;
use crate::state::AppState;
use crate::web::auth::AuthConfig;
use anyhow::Context;
use figment::value::UncasedStr;
use figment::{Figment, providers::Env};
use sqlx::postgres::PgPoolOptions;
use std::process::ExitCode;
use std::sync::Arc;
use std::time::Duration;
use tracing::{error, info, warn};
/// Main application struct containing all necessary components
pub struct App {
config: Config,
db_pool: sqlx::PgPool,
banner_api: Arc<BannerApi>,
app_state: AppState,
service_manager: ServiceManager,
}
impl App {
/// Create a new App instance with all necessary components initialized
pub async fn new() -> Result<Self, anyhow::Error> {
// Load configuration
let config: Config = Figment::new()
.merge(Env::raw().map(|k| {
if k == UncasedStr::new("RAILWAY_DEPLOYMENT_DRAINING_SECONDS") {
"SHUTDOWN_TIMEOUT".into()
} else {
k.into()
}
}))
.extract()
.context("Failed to load config")?;
// Check if the database URL is via private networking
let is_private = config.database_url.contains("railway.internal");
let slow_threshold = Duration::from_millis(if is_private { 200 } else { 500 });
// Create database connection pool
let db_pool = PgPoolOptions::new()
.min_connections(0)
.max_connections(4)
.acquire_slow_threshold(slow_threshold)
.acquire_timeout(Duration::from_secs(4))
.idle_timeout(Duration::from_secs(60 * 2))
.max_lifetime(Duration::from_secs(60 * 30))
.connect(&config.database_url)
.await
.context("Failed to create database pool")?;
info!(
is_private = is_private,
slow_threshold = format!("{:.2?}", slow_threshold),
"database pool established"
);
// Run database migrations
info!("Running database migrations...");
sqlx::migrate!("./migrations")
.run(&db_pool)
.await
.context("Failed to run database migrations")?;
info!("Database migrations completed successfully");
// Backfill structured name columns for existing instructors
if let Err(e) = crate::data::names::backfill_instructor_names(&db_pool).await {
warn!(error = ?e, "Failed to backfill instructor names (non-fatal)");
}
// Create BannerApi and AppState
let banner_api = BannerApi::new_with_config(
config.banner_base_url.clone(),
config.rate_limiting.clone(),
)
.context("Failed to create BannerApi")?;
let banner_api_arc = Arc::new(banner_api);
let app_state = AppState::new(banner_api_arc.clone(), db_pool.clone());
// Load reference data cache from DB (may be empty on first run)
if let Err(e) = app_state.load_reference_cache().await {
info!(error = ?e, "Could not load reference cache on startup (may be empty)");
}
// Load schedule cache for timeline enrollment queries
if let Err(e) = app_state.schedule_cache.load().await {
info!(error = ?e, "Could not load schedule cache on startup (may be empty)");
}
// Seed the initial admin user if configured
if let Some(admin_id) = config.admin_discord_id {
let user = crate::data::users::ensure_seed_admin(&db_pool, admin_id as i64)
.await
.context("Failed to seed admin user")?;
info!(discord_id = admin_id, username = %user.discord_username, "Seed admin ensured");
}
Ok(App {
config,
db_pool,
banner_api: banner_api_arc,
app_state,
service_manager: ServiceManager::new(),
})
}
/// Setup and register services based on enabled service list
pub fn setup_services(&mut self, services: &[ServiceName]) -> Result<(), anyhow::Error> {
// Register enabled services with the manager
if services.contains(&ServiceName::Web) {
let auth_config = AuthConfig {
client_id: self.config.discord_client_id.clone(),
client_secret: self.config.discord_client_secret.clone(),
redirect_base: self.config.discord_redirect_uri.clone(),
};
let web_service = Box::new(WebService::new(
self.config.port,
self.app_state.clone(),
auth_config,
));
self.service_manager
.register_service(ServiceName::Web.as_str(), web_service);
}
if services.contains(&ServiceName::Scraper) {
let scraper_service = Box::new(ScraperService::new(
self.db_pool.clone(),
self.banner_api.clone(),
self.app_state.reference_cache.clone(),
self.app_state.service_statuses.clone(),
self.app_state.scrape_job_tx.clone(),
));
self.service_manager
.register_service(ServiceName::Scraper.as_str(), scraper_service);
}
// Check if any services are enabled
if !self.service_manager.has_services() && !services.contains(&ServiceName::Bot) {
error!("No services enabled. Cannot start application.");
return Err(anyhow::anyhow!("No services enabled"));
}
Ok(())
}
/// Setup bot service if enabled
pub async fn setup_bot_service(&mut self) -> Result<(), anyhow::Error> {
use std::sync::Arc;
use tokio::sync::{Mutex, broadcast};
// Create shutdown channel for status update task
let (status_shutdown_tx, status_shutdown_rx) = broadcast::channel(1);
let status_task_handle = Arc::new(Mutex::new(None));
let client = BotService::create_client(
&self.config,
self.app_state.clone(),
status_task_handle.clone(),
status_shutdown_rx,
)
.await
.context("Failed to create Discord client")?;
let bot_service = Box::new(BotService::new(
client,
status_task_handle,
status_shutdown_tx,
self.app_state.service_statuses.clone(),
));
self.service_manager
.register_service(ServiceName::Bot.as_str(), bot_service);
Ok(())
}
/// Start all registered services
pub fn start_services(&mut self) {
self.service_manager.spawn_all();
}
/// Run the application and handle shutdown signals
pub async fn run(self) -> ExitCode {
use crate::signals::handle_shutdown_signals;
handle_shutdown_signals(self.service_manager, self.config.shutdown_timeout).await
}
/// Get a reference to the configuration
pub fn config(&self) -> &Config {
&self.config
}
}
-48
View File
@@ -1,48 +0,0 @@
//! Application state shared across components (bot, web, scheduler).
use crate::banner::BannerApi;
use crate::banner::Course;
use anyhow::Result;
use redis::AsyncCommands;
use redis::Client;
use serde_json;
#[derive(Clone, Debug)]
pub struct AppState {
pub banner_api: std::sync::Arc<BannerApi>,
pub redis: std::sync::Arc<Client>,
}
impl AppState {
pub fn new(
banner_api: BannerApi,
redis_url: &str,
) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
let redis_client = Client::open(redis_url)?;
Ok(Self {
banner_api: std::sync::Arc::new(banner_api),
redis: std::sync::Arc::new(redis_client),
})
}
/// Get a course by CRN with Redis cache fallback to Banner API
pub async fn get_course_or_fetch(&self, term: &str, crn: &str) -> Result<Course> {
let mut conn = self.redis.get_multiplexed_async_connection().await?;
let key = format!("class:{}", crn);
if let Some(serialized) = conn.get::<_, Option<String>>(&key).await? {
let course: Course = serde_json::from_str(&serialized)?;
return Ok(course);
}
// Fallback: fetch from Banner API
if let Some(course) = self.banner_api.get_course_by_crn(term, crn).await? {
let serialized = serde_json::to_string(&course)?;
let _: () = conn.set(&key, serialized).await?;
return Ok(course);
}
Err(anyhow::anyhow!("Course not found for CRN {}", crn))
}
}
+225 -307
View File
@@ -1,81 +1,219 @@
//! Main Banner API client implementation.
use crate::banner::{SessionManager, models::*, query::SearchQuery};
use anyhow::{Context, Result};
use axum::http::HeaderValue;
use reqwest::Client;
use serde_json;
use std::collections::HashMap;
// use tracing::debug;
use crate::banner::{
SessionPool, create_shared_rate_limiter, errors::BannerApiError, json::parse_json_with_context,
middleware::TransparentMiddleware, models::*, nonce, query::SearchQuery,
rate_limit_middleware::RateLimitMiddleware, util::user_agent,
};
use crate::config::RateLimitingConfig;
use anyhow::{Context, Result, anyhow};
use http::HeaderValue;
use reqwest::Client;
use reqwest_middleware::{ClientBuilder, ClientWithMiddleware};
use tracing::debug;
/// Main Banner API client.
#[derive(Debug)]
pub struct BannerApi {
session_manager: SessionManager,
client: Client,
pub sessions: SessionPool,
http: ClientWithMiddleware,
base_url: String,
}
impl BannerApi {
/// Creates a new Banner API client.
#[allow(dead_code)]
pub fn new(base_url: String) -> Result<Self> {
let client = Client::builder()
.cookie_store(true)
.user_agent(user_agent())
.tcp_keepalive(Some(std::time::Duration::from_secs(60 * 5)))
.read_timeout(std::time::Duration::from_secs(10))
.connect_timeout(std::time::Duration::from_secs(10))
.timeout(std::time::Duration::from_secs(30))
.build()
.context("Failed to create HTTP client")?;
Self::new_with_config(base_url, RateLimitingConfig::default())
}
let session_manager = SessionManager::new(base_url.clone(), client.clone());
/// Creates a new Banner API client with custom rate limiting configuration.
pub fn new_with_config(
base_url: String,
rate_limit_config: RateLimitingConfig,
) -> Result<Self> {
let rate_limiter = create_shared_rate_limiter(Some(rate_limit_config));
let http = ClientBuilder::new(
Client::builder()
.cookie_store(false)
.user_agent(user_agent())
.tcp_keepalive(Some(std::time::Duration::from_secs(60 * 5)))
.read_timeout(std::time::Duration::from_secs(20))
.connect_timeout(std::time::Duration::from_secs(15))
.timeout(std::time::Duration::from_secs(40))
.build()
.context("Failed to create HTTP client")?,
)
.with(TransparentMiddleware)
.with(RateLimitMiddleware::new(rate_limiter.clone()))
.build();
Ok(Self {
session_manager,
client,
sessions: SessionPool::new(http.clone(), base_url.clone()),
http,
base_url,
})
}
/// Sets up the API client by initializing session cookies.
pub async fn setup(&self) -> Result<()> {
self.session_manager.setup().await
/// Validates offset parameter for search methods.
fn validate_offset(offset: i32) -> Result<()> {
if offset <= 0 {
Err(anyhow::anyhow!("Offset must be greater than 0"))
} else {
Ok(())
}
}
/// Retrieves a list of terms from the Banner API.
pub async fn get_terms(
/// Builds common search parameters for list endpoints.
fn build_list_params(
&self,
search: &str,
page: i32,
term: &str,
offset: i32,
max_results: i32,
) -> Result<Vec<BannerTerm>> {
if page <= 0 {
return Err(anyhow::anyhow!("Page must be greater than 0"));
}
session_id: &str,
) -> Vec<(&str, String)> {
vec![
("searchTerm", search.to_string()),
("term", term.to_string()),
("offset", offset.to_string()),
("max", max_results.to_string()),
("uniqueSessionId", session_id.to_string()),
("_", nonce()),
]
}
let url = format!("{}/classSearch/getTerms", self.base_url);
let params = [
("searchTerm", search),
("offset", &page.to_string()),
("max", &max_results.to_string()),
("_", &timestamp_nonce()),
];
/// Makes a GET request to a list endpoint and parses JSON response.
async fn get_list_endpoint<T>(
&self,
endpoint: &str,
search: &str,
term: &str,
offset: i32,
max_results: i32,
) -> Result<Vec<T>>
where
T: for<'de> serde::Deserialize<'de>,
{
Self::validate_offset(offset)?;
let session = self.sessions.acquire(term.parse()?).await?;
let url = format!("{}/classSearch/{}", self.base_url, endpoint);
let params = self.build_list_params(search, term, offset, max_results, session.id());
let response = self
.client
.http
.get(&url)
.query(&params)
.send()
.await
.context("Failed to get terms")?;
.with_context(|| format!("Failed to get {}", endpoint))?;
let terms: Vec<BannerTerm> = response
let data: Vec<T> = response
.json()
.await
.context("Failed to parse terms response")?;
.with_context(|| format!("Failed to parse {} response", endpoint))?;
Ok(terms)
Ok(data)
}
/// Builds search parameters for course search methods.
fn build_search_params(
&self,
query: &SearchQuery,
term: &str,
session_id: &str,
sort: &str,
sort_descending: bool,
) -> HashMap<String, String> {
let mut params = query.to_params();
params.insert("txt_term".to_string(), term.to_string());
params.insert("uniqueSessionId".to_string(), session_id.to_string());
params.insert("sortColumn".to_string(), sort.to_string());
params.insert(
"sortDirection".to_string(),
if sort_descending { "desc" } else { "asc" }.to_string(),
);
params.insert("startDatepicker".to_string(), String::new());
params.insert("endDatepicker".to_string(), String::new());
params
}
/// Performs a course search and handles common response processing.
#[tracing::instrument(
skip(self, query, sort, sort_descending),
fields(term = %term)
)]
async fn perform_search(
&self,
term: &str,
query: &SearchQuery,
sort: &str,
sort_descending: bool,
) -> Result<SearchResult, BannerApiError> {
let mut session = self.sessions.acquire(term.parse()?).await?;
if session.been_used() {
self.http
.post(format!("{}/classSearch/resetDataForm", self.base_url))
.header("Cookie", session.cookie())
.send()
.await
.map_err(|e| BannerApiError::RequestFailed(e.into()))?;
}
session.touch();
let params = self.build_search_params(query, term, session.id(), sort, sort_descending);
debug!(
term = term,
subject = query.get_subject().map(|s| s.as_str()).unwrap_or("all"),
max_results = query.get_max_results(),
"Searching for courses"
);
let response = self
.http
.get(format!("{}/searchResults/searchResults", self.base_url))
.header("Cookie", session.cookie())
.query(&params)
.send()
.await
.context("Failed to search courses")?;
let status = response.status();
let url = response.url().clone();
let body = response
.text()
.await
.with_context(|| format!("Failed to read body (status={status})"))?;
let search_result: SearchResult = parse_json_with_context(&body).map_err(|e| {
BannerApiError::RequestFailed(anyhow!(
"Failed to parse search response (status={status}, url={url}): {e}"
))
})?;
// Check for signs of an invalid session
if search_result.path_mode.is_none() {
return Err(BannerApiError::InvalidSession(
"Search result path mode is none".to_string(),
));
} else if search_result.data.is_none() {
return Err(BannerApiError::InvalidSession(
"Search result data is none".to_string(),
));
}
if !search_result.success {
return Err(BannerApiError::RequestFailed(anyhow!(
"Search marked as unsuccessful by Banner API"
)));
}
Ok(search_result)
}
/// Retrieves a list of subjects from the Banner API.
@@ -86,126 +224,44 @@ impl BannerApi {
offset: i32,
max_results: i32,
) -> Result<Vec<Pair>> {
if offset <= 0 {
return Err(anyhow::anyhow!("Offset must be greater than 0"));
}
let session_id = self.session_manager.ensure_session()?;
let url = format!("{}/classSearch/get_subject", self.base_url);
let params = [
("searchTerm", search),
("term", term),
("offset", &offset.to_string()),
("max", &max_results.to_string()),
("uniqueSessionId", &session_id),
("_", &timestamp_nonce()),
];
let response = self
.client
.get(&url)
.query(&params)
.send()
self.get_list_endpoint("get_subject", search, term, offset, max_results)
.await
.context("Failed to get subjects")?;
let subjects: Vec<Pair> = response
.json()
.await
.context("Failed to parse subjects response")?;
Ok(subjects)
}
/// Retrieves a list of instructors from the Banner API.
pub async fn get_instructors(
&self,
search: &str,
term: &str,
offset: i32,
max_results: i32,
) -> Result<Vec<Instructor>> {
if offset <= 0 {
return Err(anyhow::anyhow!("Offset must be greater than 0"));
}
let session_id = self.session_manager.ensure_session()?;
let url = format!("{}/classSearch/get_instructor", self.base_url);
let params = [
("searchTerm", search),
("term", term),
("offset", &offset.to_string()),
("max", &max_results.to_string()),
("uniqueSessionId", &session_id),
("_", &timestamp_nonce()),
];
let response = self
.client
.get(&url)
.query(&params)
.send()
.await
.context("Failed to get instructors")?;
let instructors: Vec<Instructor> = response
.json()
.await
.context("Failed to parse instructors response")?;
Ok(instructors)
/// Retrieves campus codes and descriptions.
pub async fn get_campuses(&self, term: &str) -> Result<Vec<Pair>> {
self.get_list_endpoint("get_campus", "", term, 1, 500).await
}
/// Retrieves a list of campuses from the Banner API.
pub async fn get_campuses(
&self,
search: &str,
term: i32,
offset: i32,
max_results: i32,
) -> Result<Vec<Pair>> {
if offset <= 0 {
return Err(anyhow::anyhow!("Offset must be greater than 0"));
}
let session_id = self.session_manager.ensure_session()?;
let url = format!("{}/classSearch/get_campus", self.base_url);
let params = [
("searchTerm", search),
("term", &term.to_string()),
("offset", &offset.to_string()),
("max", &max_results.to_string()),
("uniqueSessionId", &session_id),
("_", &timestamp_nonce()),
];
let response = self
.client
.get(&url)
.query(&params)
.send()
/// Retrieves instructional method codes and descriptions.
pub async fn get_instructional_methods(&self, term: &str) -> Result<Vec<Pair>> {
self.get_list_endpoint("get_instructionalMethod", "", term, 1, 500)
.await
.context("Failed to get campuses")?;
}
let campuses: Vec<Pair> = response
.json()
/// Retrieves part-of-term codes and descriptions.
pub async fn get_parts_of_term(&self, term: &str) -> Result<Vec<Pair>> {
self.get_list_endpoint("get_partOfTerm", "", term, 1, 500)
.await
.context("Failed to parse campuses response")?;
}
Ok(campuses)
/// Retrieves section attribute codes and descriptions.
pub async fn get_attributes(&self, term: &str) -> Result<Vec<Pair>> {
self.get_list_endpoint("get_attribute", "", term, 1, 500)
.await
}
/// Retrieves meeting time information for a course.
pub async fn get_course_meeting_time(
&self,
term: &str,
crn: i32,
crn: &str,
) -> Result<Vec<MeetingScheduleInfo>> {
let url = format!("{}/searchResults/getFacultyMeetingTimes", self.base_url);
let params = [("term", term), ("courseReferenceNumber", &crn.to_string())];
let params = [("term", term), ("courseReferenceNumber", crn)];
let response = self
.client
.http
.get(&url)
.query(&params)
.send()
@@ -236,14 +292,14 @@ impl BannerApi {
));
}
#[derive(serde::Deserialize)]
struct ResponseWrapper {
fmt: Vec<MeetingTimeResponse>,
}
let response: MeetingTimesApiResponse =
response.json().await.context("Failed to parse response")?;
let wrapper: ResponseWrapper = response.json().await.context("Failed to parse response")?;
Ok(wrapper.fmt.into_iter().map(|m| m.schedule_info()).collect())
Ok(response
.fmt
.into_iter()
.map(|m| m.schedule_info())
.collect())
}
/// Performs a search for courses.
@@ -253,95 +309,33 @@ impl BannerApi {
query: &SearchQuery,
sort: &str,
sort_descending: bool,
) -> Result<SearchResult> {
self.session_manager.reset_data_form().await?;
let session_id = self.session_manager.ensure_session()?;
let mut params = query.to_params();
// Add additional parameters
params.insert("txt_term".to_string(), term.to_string());
params.insert("uniqueSessionId".to_string(), session_id);
params.insert("sortColumn".to_string(), sort.to_string());
params.insert(
"sortDirection".to_string(),
if sort_descending { "desc" } else { "asc" }.to_string(),
);
params.insert("startDatepicker".to_string(), String::new());
params.insert("endDatepicker".to_string(), String::new());
let url = format!("{}/searchResults/searchResults", self.base_url);
let response = self
.client
.get(&url)
.query(&params)
.send()
) -> Result<SearchResult, BannerApiError> {
self.perform_search(term, query, sort, sort_descending)
.await
.context("Failed to search courses")?;
let search_result: SearchResult = response
.json()
.await
.context("Failed to parse search response")?;
if !search_result.success {
return Err(anyhow::anyhow!(
"Search marked as unsuccessful by Banner API"
));
}
Ok(search_result)
}
/// Selects a term for the current session.
pub async fn select_term(&self, term: &str) -> Result<()> {
self.session_manager.select_term(term).await
}
/// Retrieves a single course by CRN by issuing a minimal search
pub async fn get_course_by_crn(&self, term: &str, crn: &str) -> Result<Option<Course>> {
self.session_manager.reset_data_form().await?;
// Ensure session is configured for this term
self.select_term(term).await?;
let session_id = self.session_manager.ensure_session()?;
pub async fn get_course_by_crn(
&self,
term: &str,
crn: &str,
) -> Result<Option<Course>, BannerApiError> {
debug!(term = term, crn = crn, "Looking up course by CRN");
let query = SearchQuery::new()
.course_reference_number(crn)
.max_results(1);
let mut params = query.to_params();
params.insert("txt_term".to_string(), term.to_string());
params.insert("uniqueSessionId".to_string(), session_id);
params.insert("sortColumn".to_string(), "subjectDescription".to_string());
params.insert("sortDirection".to_string(), "asc".to_string());
params.insert("startDatepicker".to_string(), String::new());
params.insert("endDatepicker".to_string(), String::new());
let search_result = self
.perform_search(term, &query, "subjectDescription", false)
.await?;
let url = format!("{}/searchResults/searchResults", self.base_url);
let response = self
.client
.get(&url)
.query(&params)
.send()
.await
.context("Failed to search course by CRN")?;
let status = response.status();
let body = response
.text()
.await
.with_context(|| format!("Failed to read body (status={status})"))?;
let search_result: SearchResult = parse_json_with_context(&body).map_err(|e| {
anyhow::anyhow!(
"Failed to parse search response for CRN (status={status}, url={url}): {e}",
)
})?;
if !search_result.success {
return Err(anyhow::anyhow!(
"Search marked as unsuccessful by Banner API"
// Additional validation for CRN search
if search_result.path_mode == Some("registration".to_string())
&& search_result.data.is_none()
{
return Err(BannerApiError::InvalidSession(
"Search result path mode is registration and data is none".to_string(),
));
}
@@ -349,80 +343,4 @@ impl BannerApi {
.data
.and_then(|courses| courses.into_iter().next()))
}
/// Gets course details (placeholder - needs implementation).
pub async fn get_course_details(&self, term: i32, crn: i32) -> Result<ClassDetails> {
let body = serde_json::json!({
"term": term.to_string(),
"courseReferenceNumber": crn.to_string(),
"first": "first"
});
let url = format!("{}/searchResults/getClassDetails", self.base_url);
let response = self
.client
.post(&url)
.json(&body)
.send()
.await
.context("Failed to get course details")?;
let details: ClassDetails = response
.json()
.await
.context("Failed to parse course details response")?;
Ok(details)
}
}
/// Generates a timestamp-based nonce.
fn timestamp_nonce() -> String {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_millis()
.to_string()
}
/// Returns a browser-like user agent string.
fn user_agent() -> &'static str {
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"
}
/// Attempt to parse JSON and, on failure, include a contextual snippet around the error location
fn parse_json_with_context<T: serde::de::DeserializeOwned>(body: &str) -> Result<T> {
match serde_json::from_str::<T>(body) {
Ok(value) => Ok(value),
Err(err) => {
let (line, column) = (err.line(), err.column());
let snippet = build_error_snippet(body, line as usize, column as usize, 120);
Err(anyhow::anyhow!(
"{} at line {}, column {}\nSnippet:\n{}",
err,
line,
column,
snippet
))
}
}
}
fn build_error_snippet(body: &str, line: usize, column: usize, max_len: usize) -> String {
let target_line = body.lines().nth(line.saturating_sub(1)).unwrap_or("");
if target_line.is_empty() {
return String::new();
}
let start = column.saturating_sub(max_len.min(column));
let end = (column + max_len).min(target_line.len());
let slice = &target_line[start..end];
let mut indicator = String::new();
if column > start {
indicator.push_str(&" ".repeat(column - start - 1));
indicator.push('^');
}
format!("{}\n{}", slice, indicator)
}
+9
View File
@@ -0,0 +1,9 @@
//! Error types for the Banner API client.
#[derive(Debug, thiserror::Error)]
pub enum BannerApiError {
#[error("Banner session is invalid or expired: {0}")]
InvalidSession(String),
#[error(transparent)]
RequestFailed(#[from] anyhow::Error),
}
+419
View File
@@ -0,0 +1,419 @@
//! JSON parsing utilities for the Banner API client.
use anyhow::Result;
use serde_json::{self, Value};
/// Attempt to parse JSON and, on failure, include a contextual snippet of the
/// line where the error occurred.
///
/// In debug builds, this provides detailed context including the full JSON object
/// containing the error and type mismatch information. In release builds, it shows
/// a minimal snippet to prevent dumping huge JSON bodies to production logs.
pub fn parse_json_with_context<T: serde::de::DeserializeOwned>(body: &str) -> Result<T> {
let jd = &mut serde_json::Deserializer::from_str(body);
match serde_path_to_error::deserialize(jd) {
Ok(value) => Ok(value),
Err(err) => {
let inner_err = err.inner();
let (line, column) = (inner_err.line(), inner_err.column());
let path = err.path().to_string();
let msg = inner_err.to_string();
let loc = format!(" at line {line} column {column}");
let msg_without_loc = msg.strip_suffix(&loc).unwrap_or(&msg).to_string();
// Build error message differently for debug vs release builds
let final_err = if cfg!(debug_assertions) {
// Debug mode: provide detailed context
let type_info = parse_type_mismatch(&msg_without_loc);
let context = extract_json_object_at_path(body, err.path(), line, column);
let mut err_msg = String::new();
if !path.is_empty() && path != "." {
err_msg.push_str(&format!("for path '{}'\n", path));
}
err_msg.push_str(&format!(
"({}) at line {} column {}\n\n",
type_info, line, column
));
err_msg.push_str(&context);
err_msg
} else {
// Release mode: minimal snippet to keep logs concise
let snippet = build_error_snippet(body, line, column, 20);
let mut err_msg = String::new();
if !path.is_empty() && path != "." {
err_msg.push_str(&format!("for path '{}' ", path));
}
err_msg.push_str(&format!(
"({}) at line {} column {}",
msg_without_loc, line, column
));
err_msg.push_str(&format!("\n{}", snippet));
err_msg
};
Err(anyhow::anyhow!(final_err))
}
}
}
/// Extract type mismatch information from a serde error message.
///
/// Parses error messages like "invalid type: null, expected a string" to extract
/// the expected and actual types for clearer error reporting.
///
/// Returns a formatted string like "(expected a string, got null)" or the original
/// message if parsing fails.
fn parse_type_mismatch(error_msg: &str) -> String {
// Try to parse "invalid type: X, expected Y" format
if let Some(invalid_start) = error_msg.find("invalid type: ") {
let after_prefix = &error_msg[invalid_start + "invalid type: ".len()..];
if let Some(comma_pos) = after_prefix.find(", expected ") {
let actual_type = &after_prefix[..comma_pos];
let expected_part = &after_prefix[comma_pos + ", expected ".len()..];
// Clean up expected part (remove " at line X column Y" if present)
let expected_type = expected_part
.split(" at line ")
.next()
.unwrap_or(expected_part)
.trim();
return format!("expected {}, got {}", expected_type, actual_type);
}
}
// Try to parse "expected X at line Y" format
if error_msg.starts_with("expected ")
&& let Some(expected_part) = error_msg.split(" at line ").next()
{
return expected_part.to_string();
}
// Fallback: return original message without location info
error_msg.to_string()
}
/// Extract and pretty-print the JSON object/array containing the parse error.
///
/// This function navigates to the error location using the serde path and extracts
/// the parent object or array to provide better context for debugging.
///
/// # Arguments
/// * `body` - The raw JSON string
/// * `path` - The serde path to the error (e.g., "data[0].faculty[0].displayName")
/// * `line` - Line number of the error (for fallback)
/// * `column` - Column number of the error (for fallback)
///
/// # Returns
/// A formatted string containing the JSON object with the error, or a fallback snippet
fn extract_json_object_at_path(
body: &str,
path: &serde_path_to_error::Path,
line: usize,
column: usize,
) -> String {
// Try to parse the entire JSON structure
let root_value: Value = match serde_json::from_str(body) {
Ok(v) => v,
Err(_) => {
// If we can't parse the JSON at all, fall back to line snippet
return build_error_snippet(body, line, column, 20);
}
};
// Navigate to the error location using the path
let path_str = path.to_string();
let segments = parse_path_segments(&path_str);
let (context_value, context_name) = navigate_to_context(&root_value, &segments);
// Pretty-print the context value with limited depth to avoid huge output
match serde_json::to_string_pretty(&context_value) {
Ok(pretty) => {
// Limit output to ~50 lines to prevent log spam
let lines: Vec<&str> = pretty.lines().collect();
let truncated = if lines.len() > 50 {
let mut result = lines[..47].join("\n");
result.push_str("\n ... (truncated, ");
result.push_str(&(lines.len() - 47).to_string());
result.push_str(" more lines)");
result
} else {
pretty
};
format!("{} at '{}':\n{}", context_name, path_str, truncated)
}
Err(_) => {
// Fallback to simple snippet if pretty-print fails
build_error_snippet(body, line, column, 20)
}
}
}
/// Parse a JSON path string into segments for navigation.
///
/// Converts paths like "data[0].faculty[1].displayName" into a sequence of
/// object keys and array indices.
fn parse_path_segments(path: &str) -> Vec<PathSegment> {
let mut segments = Vec::new();
let mut current = String::new();
let mut in_bracket = false;
for ch in path.chars() {
match ch {
'.' if !in_bracket => {
if !current.is_empty() {
segments.push(PathSegment::Key(current.clone()));
current.clear();
}
}
'[' => {
if !current.is_empty() {
segments.push(PathSegment::Key(current.clone()));
current.clear();
}
in_bracket = true;
}
']' => {
if in_bracket && !current.is_empty() {
if let Ok(index) = current.parse::<usize>() {
segments.push(PathSegment::Index(index));
}
current.clear();
}
in_bracket = false;
}
_ => current.push(ch),
}
}
if !current.is_empty() {
segments.push(PathSegment::Key(current));
}
segments
}
/// Represents a segment in a JSON path (either an object key or array index).
#[derive(Debug)]
enum PathSegment {
Key(String),
Index(usize),
}
/// Navigate through a JSON value using path segments and return the appropriate context.
///
/// This function walks the JSON structure and returns the parent object/array that
/// contains the error, providing meaningful context for debugging.
///
/// # Returns
/// A tuple of (context_value, description) where context_value is the JSON to display
/// and description is a human-readable name for what we're showing.
fn navigate_to_context<'a>(
mut current: &'a Value,
segments: &[PathSegment],
) -> (&'a Value, &'static str) {
// If path is empty or just root, return the whole value
if segments.is_empty() {
return (current, "Root object");
}
// Try to navigate to the parent of the error location
// We want to show the containing object/array, not just the failing field
let parent_depth = segments.len().saturating_sub(1);
for (i, segment) in segments.iter().enumerate() {
// Stop one level before the end to show the parent context
if i >= parent_depth {
break;
}
match segment {
PathSegment::Key(key) => {
if let Some(next) = current.get(key) {
current = next;
} else {
// Can't navigate further, return what we have
return (current, "Partial context (navigation stopped)");
}
}
PathSegment::Index(idx) => {
if let Some(next) = current.get(idx) {
current = next;
} else {
return (current, "Partial context (index out of bounds)");
}
}
}
}
(current, "Object containing error")
}
fn build_error_snippet(body: &str, line: usize, column: usize, context_len: usize) -> String {
let target_line = body.lines().nth(line.saturating_sub(1)).unwrap_or("");
if target_line.is_empty() {
return "(empty line)".to_string();
}
// column is 1-based, convert to 0-based for slicing
let error_idx = column.saturating_sub(1);
let half_len = context_len / 2;
let start = error_idx.saturating_sub(half_len);
let end = (error_idx + half_len).min(target_line.len());
let slice = &target_line[start..end];
let indicator_pos = error_idx - start;
let indicator = " ".repeat(indicator_pos) + "^";
format!("...{slice}...\n {indicator}")
}
#[cfg(test)]
mod tests {
use super::*;
use serde::Deserialize;
#[test]
fn test_parse_type_mismatch_invalid_type() {
let msg = "invalid type: null, expected a string at line 45 column 29";
let result = parse_type_mismatch(msg);
assert_eq!(result, "expected a string, got null");
}
#[test]
fn test_parse_type_mismatch_expected() {
let msg = "expected value at line 1 column 1";
let result = parse_type_mismatch(msg);
assert_eq!(result, "expected value");
}
#[test]
fn test_parse_path_segments_simple() {
let segments = parse_path_segments("data.name");
assert_eq!(segments.len(), 2);
match &segments[0] {
PathSegment::Key(k) => assert_eq!(k, "data"),
_ => panic!("Expected Key segment"),
}
}
#[test]
fn test_parse_path_segments_with_array() {
let segments = parse_path_segments("data[0].faculty[1].displayName");
assert_eq!(segments.len(), 5);
match &segments[0] {
PathSegment::Key(k) => assert_eq!(k, "data"),
_ => panic!("Expected Key segment"),
}
match &segments[1] {
PathSegment::Index(i) => assert_eq!(*i, 0),
_ => panic!("Expected Index segment"),
}
}
#[test]
fn test_parse_json_with_context_null_value() {
#[derive(Debug, Deserialize)]
struct TestStruct {
#[allow(dead_code)]
name: String,
}
let json = r#"{"name": null}"#;
let result: Result<TestStruct> = parse_json_with_context(json);
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
// Should contain path info
assert!(err_msg.contains("name"));
// In debug mode, should contain detailed context
if cfg!(debug_assertions) {
assert!(err_msg.contains("expected"));
}
}
#[test]
fn test_navigate_to_context() {
let json = r#"{"data": [{"faculty": [{"name": "John"}]}]}"#;
let value: Value = serde_json::from_str(json).unwrap();
let segments = parse_path_segments("data[0].faculty[0].name");
let (context, _) = navigate_to_context(&value, &segments);
// Should return the faculty[0] object (parent of 'name')
assert!(context.is_object());
assert!(context.get("name").is_some());
}
#[test]
fn test_realistic_banner_error() {
#[derive(Debug, Deserialize)]
struct Course {
#[allow(dead_code)]
#[serde(rename = "courseTitle")]
course_title: String,
#[allow(dead_code)]
faculty: Vec<Faculty>,
}
#[derive(Debug, Deserialize)]
struct Faculty {
#[serde(rename = "displayName")]
#[allow(dead_code)]
display_name: String,
#[allow(dead_code)]
email: String,
}
#[derive(Debug, Deserialize)]
struct SearchResult {
#[allow(dead_code)]
data: Vec<Course>,
}
// Simulate Banner API response with null faculty displayName
// This mimics the actual error from SPN subject scrape
let json = r#"{
"data": [
{
"courseTitle": "Spanish Conversation",
"faculty": [
{
"displayName": null,
"email": "instructor@utsa.edu"
}
]
}
]
}"#;
let result: Result<SearchResult> = parse_json_with_context(json);
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
println!("\n=== Error output in debug mode ===\n{}\n", err_msg);
// Verify error contains key information
assert!(err_msg.contains("data[0].faculty[0].displayName"));
// In debug mode, should show detailed context
if cfg!(debug_assertions) {
// Should show type mismatch info
assert!(err_msg.contains("expected") && err_msg.contains("got"));
// Should show surrounding JSON context with the faculty object
assert!(err_msg.contains("email"));
}
}
}
+72
View File
@@ -0,0 +1,72 @@
//! HTTP middleware for the Banner API client.
use http::Extensions;
use reqwest::{Request, Response};
use reqwest_middleware::{Middleware, Next};
use tracing::{debug, trace, warn};
pub struct TransparentMiddleware;
/// Threshold for logging slow requests at DEBUG level (in milliseconds)
const SLOW_REQUEST_THRESHOLD_MS: u128 = 1000;
#[async_trait::async_trait]
impl Middleware for TransparentMiddleware {
async fn handle(
&self,
req: Request,
extensions: &mut Extensions,
next: Next<'_>,
) -> std::result::Result<Response, reqwest_middleware::Error> {
let method = req.method().to_string();
let path = req.url().path().to_string();
let start = std::time::Instant::now();
let response_result = next.run(req, extensions).await;
let duration = start.elapsed();
match response_result {
Ok(response) => {
if response.status().is_success() {
let duration_ms = duration.as_millis();
if duration_ms >= SLOW_REQUEST_THRESHOLD_MS {
debug!(
method = method,
path = path,
status = response.status().as_u16(),
duration_ms = duration_ms,
"Request completed (slow)"
);
} else {
trace!(
method = method,
path = path,
status = response.status().as_u16(),
duration_ms = duration_ms,
"Request completed"
);
}
Ok(response)
} else {
warn!(
method = method,
path = path,
status = response.status().as_u16(),
duration_ms = duration.as_millis(),
"Request failed"
);
Ok(response)
}
}
Err(error) => {
warn!(
method = method,
path = path,
duration_ms = duration.as_millis(),
"Request failed"
);
Err(error)
}
}
}
}
+8 -2
View File
@@ -3,16 +3,22 @@
//! This module provides functionality to:
//! - Search for courses and retrieve course information
//! - Manage Banner API sessions and authentication
//! - Scrape course data and cache it in Redis
//! - Generate ICS files and calendar links
pub mod api;
pub mod errors;
pub mod json;
pub mod middleware;
pub mod models;
pub mod query;
pub mod scraper;
pub mod rate_limit_middleware;
pub mod rate_limiter;
pub mod session;
pub mod util;
pub use api::*;
pub use errors::*;
pub use models::*;
pub use query::*;
pub use rate_limiter::*;
pub use session::*;
+1
View File
@@ -11,6 +11,7 @@ pub struct Pair {
pub type BannerTerm = Pair;
/// Represents an instructor in the Banner system
#[allow(dead_code)]
pub type Instructor = Pair;
impl BannerTerm {
+16 -4
View File
@@ -59,8 +59,20 @@ pub struct Course {
pub meetings_faculty: Vec<MeetingTimeResponse>,
}
/// Class details (to be implemented)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ClassDetails {
// TODO: Implement based on Banner API response
impl Course {
/// Returns the course title in the format "SUBJ #### - Course Title"
pub fn display_title(&self) -> String {
format!(
"{} {} - {}",
self.subject, self.course_number, self.course_title
)
}
/// Returns the name of the primary instructor, or "Unknown" if not available
pub fn primary_instructor_name(&self) -> &str {
self.faculty
.first()
.map(|f| f.display_name.as_str())
.unwrap_or("Unknown")
}
}
+182 -138
View File
@@ -1,10 +1,40 @@
use bitflags::{Flags, bitflags};
use chrono::{DateTime, NaiveDate, NaiveTime, Timelike, Utc};
use chrono::{DateTime, NaiveDate, NaiveTime, Timelike, Utc, Weekday};
use extension_traits::extension;
use serde::{Deserialize, Deserializer, Serialize};
use std::{cmp::Ordering, str::FromStr};
use super::terms::Term;
#[extension(pub trait WeekdayExt)]
impl Weekday {
/// Short two-letter representation (used for ICS generation)
fn to_short_string(self) -> &'static str {
match self {
Weekday::Mon => "Mo",
Weekday::Tue => "Tu",
Weekday::Wed => "We",
Weekday::Thu => "Th",
Weekday::Fri => "Fr",
Weekday::Sat => "Sa",
Weekday::Sun => "Su",
}
}
/// Full day name
fn to_full_string(self) -> &'static str {
match self {
Weekday::Mon => "Monday",
Weekday::Tue => "Tuesday",
Weekday::Wed => "Wednesday",
Weekday::Thu => "Thursday",
Weekday::Fri => "Friday",
Weekday::Sat => "Saturday",
Weekday::Sun => "Sunday",
}
}
}
/// Deserialize a string field into a u32
fn deserialize_string_to_u32<'de, D>(deserializer: D) -> Result<u32, D::Error>
where
@@ -33,7 +63,7 @@ pub struct FacultyItem {
#[serde(deserialize_with = "deserialize_string_to_u32")]
pub course_reference_number: u32, // CRN, e.g 27294
pub display_name: String, // "LastName, FirstName"
pub email_address: String, // e.g. FirstName.LastName@utsaedu
pub email_address: Option<String>, // e.g. FirstName.LastName@utsaedu
pub primary_indicator: bool,
pub term: String, // e.g "202420"
}
@@ -42,11 +72,11 @@ pub struct FacultyItem {
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct MeetingTime {
pub start_date: String, // MM/DD/YYYY, e.g 08/26/2025
pub end_date: String, // MM/DD/YYYY, e.g 08/26/2025
pub begin_time: String, // HHMM, e.g 1000
pub end_time: String, // HHMM, e.g 1100
pub category: String, // unknown meaning, e.g. 01, 02, etc
pub start_date: String, // MM/DD/YYYY, e.g 08/26/2025
pub end_date: String, // MM/DD/YYYY, e.g 08/26/2025
pub begin_time: Option<String>, // HHMM, e.g 1000
pub end_time: Option<String>, // HHMM, e.g 1100
pub category: String, // unknown meaning, e.g. 01, 02, etc
pub class: String, // internal class name, e.g. net.hedtech.banner.general.overallMeetingTimeDecorator
pub monday: bool, // true if the meeting time occurs on Monday
pub tuesday: bool, // true if the meeting time occurs on Tuesday
@@ -55,15 +85,15 @@ pub struct MeetingTime {
pub friday: bool, // true if the meeting time occurs on Friday
pub saturday: bool, // true if the meeting time occurs on Saturday
pub sunday: bool, // true if the meeting time occurs on Sunday
pub room: String, // e.g. 1238
pub room: Option<String>, // e.g. 1.238
#[serde(deserialize_with = "deserialize_string_to_term")]
pub term: Term, // e.g 202510
pub building: String, // e.g NPB
pub building_description: String, // e.g North Paseo Building
pub campus: String, // campus code, e.g 11
pub campus_description: String, // name of campus, e.g Main Campus
pub building: Option<String>, // e.g NPB
pub building_description: Option<String>, // e.g North Paseo Building
pub campus: Option<String>, // campus code, e.g 11
pub campus_description: Option<String>, // name of campus, e.g Main Campus
pub course_reference_number: String, // CRN, e.g 27294
pub credit_hour_session: f64, // e.g. 30
pub credit_hour_session: Option<f64>, // e.g. 30
pub hours_week: f64, // e.g. 30
pub meeting_schedule_type: String, // e.g AFF
pub meeting_type: String, // e.g HB, H2, H1, OS, OA, OH, ID, FF
@@ -114,67 +144,33 @@ impl MeetingDays {
}
}
impl Ord for MeetingDays {
fn cmp(&self, other: &Self) -> Ordering {
self.bits().cmp(&other.bits())
}
}
impl PartialOrd for MeetingDays {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.bits().cmp(&other.bits()))
Some(self.cmp(other))
}
}
impl From<DayOfWeek> for MeetingDays {
fn from(day: DayOfWeek) -> Self {
impl From<Weekday> for MeetingDays {
fn from(day: Weekday) -> Self {
match day {
DayOfWeek::Monday => MeetingDays::Monday,
DayOfWeek::Tuesday => MeetingDays::Tuesday,
DayOfWeek::Wednesday => MeetingDays::Wednesday,
DayOfWeek::Thursday => MeetingDays::Thursday,
DayOfWeek::Friday => MeetingDays::Friday,
DayOfWeek::Saturday => MeetingDays::Saturday,
DayOfWeek::Sunday => MeetingDays::Sunday,
Weekday::Mon => MeetingDays::Monday,
Weekday::Tue => MeetingDays::Tuesday,
Weekday::Wed => MeetingDays::Wednesday,
Weekday::Thu => MeetingDays::Thursday,
Weekday::Fri => MeetingDays::Friday,
Weekday::Sat => MeetingDays::Saturday,
Weekday::Sun => MeetingDays::Sunday,
}
}
}
/// Days of the week for meeting schedules
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum DayOfWeek {
Monday,
Tuesday,
Wednesday,
Thursday,
Friday,
Saturday,
Sunday,
}
impl DayOfWeek {
/// Convert to short string representation
pub fn to_short_string(&self) -> &'static str {
match self {
DayOfWeek::Monday => "M",
DayOfWeek::Tuesday => "Tu",
DayOfWeek::Wednesday => "W",
DayOfWeek::Thursday => "Th",
DayOfWeek::Friday => "F",
DayOfWeek::Saturday => "Sa",
DayOfWeek::Sunday => "Su",
}
}
/// Convert to full string representation
pub fn to_string(&self) -> &'static str {
match self {
DayOfWeek::Monday => "Monday",
DayOfWeek::Tuesday => "Tuesday",
DayOfWeek::Wednesday => "Wednesday",
DayOfWeek::Thursday => "Thursday",
DayOfWeek::Friday => "Friday",
DayOfWeek::Saturday => "Saturday",
DayOfWeek::Sunday => "Sunday",
}
}
}
impl TryFrom<MeetingDays> for DayOfWeek {
impl TryFrom<MeetingDays> for Weekday {
type Error = anyhow::Error;
fn try_from(days: MeetingDays) -> Result<Self, Self::Error> {
@@ -185,21 +181,20 @@ impl TryFrom<MeetingDays> for DayOfWeek {
let count = days.into_iter().count();
if count == 1 {
return Ok(match days {
MeetingDays::Monday => DayOfWeek::Monday,
MeetingDays::Tuesday => DayOfWeek::Tuesday,
MeetingDays::Wednesday => DayOfWeek::Wednesday,
MeetingDays::Thursday => DayOfWeek::Thursday,
MeetingDays::Friday => DayOfWeek::Friday,
MeetingDays::Saturday => DayOfWeek::Saturday,
MeetingDays::Sunday => DayOfWeek::Sunday,
MeetingDays::Monday => Weekday::Mon,
MeetingDays::Tuesday => Weekday::Tue,
MeetingDays::Wednesday => Weekday::Wed,
MeetingDays::Thursday => Weekday::Thu,
MeetingDays::Friday => Weekday::Fri,
MeetingDays::Saturday => Weekday::Sat,
MeetingDays::Sunday => Weekday::Sun,
_ => unreachable!(),
});
}
return Err(anyhow::anyhow!(
"Cannot convert multiple days to a single day: {:?}",
days
));
Err(anyhow::anyhow!(
"Cannot convert multiple days to a single day: {days:?}"
))
}
}
@@ -252,18 +247,17 @@ impl TimeRange {
let hour = time.hour();
let minute = time.minute();
if hour == 0 {
format!("12:{:02}AM", minute)
} else if hour < 12 {
format!("{}:{:02}AM", hour, minute)
} else if hour == 12 {
format!("12:{:02}PM", minute)
} else {
format!("{}:{:02}PM", hour - 12, minute)
}
let meridiem = if hour < 12 { "AM" } else { "PM" };
let display_hour = match hour {
0 => 12,
13..=23 => hour - 12,
_ => hour,
};
format!("{display_hour}:{minute:02}{meridiem}")
}
/// Get duration in minutes
#[allow(dead_code)]
pub fn duration_minutes(&self) -> i64 {
let start_minutes = self.start.hour() as i64 * 60 + self.start.minute() as i64;
let end_minutes = self.end.hour() as i64 * 60 + self.end.minute() as i64;
@@ -304,10 +298,11 @@ impl DateRange {
/// Get the number of weeks between start and end dates
pub fn weeks_duration(&self) -> u32 {
let duration = self.end.signed_duration_since(self.start);
duration.num_weeks() as u32
duration.num_weeks().max(0) as u32
}
/// Check if a specific date falls within this range
#[allow(dead_code)]
pub fn contains_date(&self, date: NaiveDate) -> bool {
date >= self.start && date <= self.end
}
@@ -325,10 +320,11 @@ pub enum MeetingType {
Unknown(String),
}
impl MeetingType {
/// Parse from the meeting type string
pub fn from_string(s: &str) -> Self {
match s {
impl std::str::FromStr for MeetingType {
type Err = std::convert::Infallible;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
Ok(match s {
"HB" | "H2" | "H1" => MeetingType::HybridBlended,
"OS" => MeetingType::OnlineSynchronous,
"OA" => MeetingType::OnlineAsynchronous,
@@ -336,9 +332,11 @@ impl MeetingType {
"ID" => MeetingType::IndependentStudy,
"FF" => MeetingType::FaceToFace,
other => MeetingType::Unknown(other.to_string()),
}
})
}
}
impl MeetingType {
/// Get description for the meeting type
pub fn description(&self) -> &'static str {
match self {
@@ -355,37 +353,46 @@ impl MeetingType {
/// Meeting location information
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MeetingLocation {
pub campus: String,
pub building: String,
pub building_description: String,
pub room: String,
pub is_online: bool,
pub enum MeetingLocation {
Online,
InPerson {
campus: String,
campus_description: String,
building: String,
building_description: String,
room: String,
},
}
impl MeetingLocation {
/// Create from raw MeetingTime data
pub fn from_meeting_time(meeting_time: &MeetingTime) -> Self {
let is_online = meeting_time.room.is_empty();
if let (
Some(campus),
Some(campus_description),
Some(building),
Some(building_description),
Some(room),
) = (
&meeting_time.campus,
&meeting_time.campus_description,
&meeting_time.building,
&meeting_time.building_description,
&meeting_time.room,
) {
if campus_description == "Internet" {
return MeetingLocation::Online;
}
MeetingLocation {
campus: meeting_time.campus_description.clone(),
building: meeting_time.building.clone(),
building_description: meeting_time.building_description.clone(),
room: meeting_time.room.clone(),
is_online,
}
}
/// Convert to formatted string
pub fn to_string(&self) -> String {
if self.is_online {
"Online".to_string()
MeetingLocation::InPerson {
campus: campus.clone(),
campus_description: campus_description.clone(),
building: building.clone(),
building_description: building_description.clone(),
room: room.clone(),
}
} else {
format!(
"{} | {} | {} {}",
self.campus, self.building_description, self.building, self.room
)
MeetingLocation::Online
}
}
}
@@ -405,7 +412,11 @@ impl MeetingScheduleInfo {
/// Create from raw MeetingTime data
pub fn from_meeting_time(meeting_time: &MeetingTime) -> Self {
let days = MeetingDays::from_meeting_time(meeting_time);
let time_range = TimeRange::from_hhmm(&meeting_time.begin_time, &meeting_time.end_time);
let time_range = match (&meeting_time.begin_time, &meeting_time.end_time) {
(Some(begin), Some(end)) => TimeRange::from_hhmm(begin, end),
_ => None,
};
let date_range =
DateRange::from_mm_dd_yyyy(&meeting_time.start_date, &meeting_time.end_date)
.unwrap_or_else(|| {
@@ -416,7 +427,7 @@ impl MeetingScheduleInfo {
end: now,
}
});
let meeting_type = MeetingType::from_string(&meeting_time.meeting_type);
let meeting_type: MeetingType = meeting_time.meeting_type.parse().unwrap();
let location = MeetingLocation::from_meeting_time(meeting_time);
let duration_weeks = date_range.weeks_duration();
@@ -430,44 +441,77 @@ impl MeetingScheduleInfo {
}
}
/// Convert the meeting days bitset to a enum vector
pub fn days_of_week(&self) -> Vec<DayOfWeek> {
/// Convert the meeting days bitset to a weekday vector
pub fn days_of_week(&self) -> Vec<Weekday> {
self.days
.iter()
.map(|day| <MeetingDays as TryInto<DayOfWeek>>::try_into(day).unwrap())
.map(|day| <MeetingDays as TryInto<Weekday>>::try_into(day).unwrap())
.collect()
}
/// Get formatted days string
pub fn days_string(&self) -> String {
pub fn days_string(&self) -> Option<String> {
if self.days.is_empty() {
"None".to_string()
} else if self.days.is_all() {
"Everyday".to_string()
} else {
self.days_of_week()
.iter()
.map(|day| day.to_short_string())
.collect::<Vec<_>>()
.join("")
return None;
}
if self.days.is_all() {
return Some("Everyday".to_string());
}
let days_of_week = self.days_of_week();
if days_of_week.len() == 1 {
return Some(days_of_week[0].to_full_string().to_string());
}
// Mapper function to get the short string representation of the day of week
let mapper = {
let ambiguous = self.days.intersects(
MeetingDays::Tuesday
| MeetingDays::Thursday
| MeetingDays::Saturday
| MeetingDays::Sunday,
);
if ambiguous {
|day: &Weekday| day.to_short_string().to_string()
} else {
|day: &Weekday| day.to_short_string().chars().next().unwrap().to_string()
}
};
Some(days_of_week.iter().map(mapper).collect::<String>())
}
/// Returns a formatted string representing the location of the meeting
pub fn place_string(&self) -> String {
if self.location.room.is_empty() {
"Online".to_string()
} else {
format!(
match &self.location {
MeetingLocation::Online => "Online".to_string(),
MeetingLocation::InPerson {
campus,
building,
building_description,
room,
..
} => format!(
"{} | {} | {} {}",
self.location.campus,
self.location.building_description,
self.location.building,
self.location.room
)
campus, building_description, building, room
),
}
}
/// Sort a slice of meeting schedule infos by start time, with stable fallback to day bits.
///
/// Meetings with a time range sort before those without one.
/// Among meetings without a time range, ties break by day-of-week bits.
pub fn sort_by_start_time(meetings: &mut [MeetingScheduleInfo]) {
meetings.sort_unstable_by(|a, b| match (&a.time_range, &b.time_range) {
(Some(a_time), Some(b_time)) => a_time.start.cmp(&b_time.start),
(Some(_), None) => std::cmp::Ordering::Less,
(None, Some(_)) => std::cmp::Ordering::Greater,
(None, None) => a.days.bits().cmp(&b.days.bits()),
});
}
/// Get the start and end date times for the meeting
///
/// Uses the start and end times of the meeting if available, otherwise defaults to midnight (00:00:00.000).
+2 -2
View File
@@ -10,8 +10,8 @@ pub struct SearchResult {
pub total_count: i32,
pub page_offset: i32,
pub page_max_size: i32,
pub path_mode: String,
pub search_results_config: Vec<SearchResultConfig>,
pub path_mode: Option<String>,
pub search_results_config: Option<Vec<SearchResultConfig>>,
pub data: Option<Vec<Course>>,
}
+347 -16
View File
@@ -13,7 +13,7 @@ const CURRENT_YEAR: u32 = compile_time::date!().year() as u32;
const VALID_YEARS: RangeInclusive<u32> = 2007..=(CURRENT_YEAR + 10);
/// Represents a term in the Banner system
#[derive(Debug, Clone, Serialize, Deserialize)]
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct Term {
pub year: u32, // 2024, 2025, etc
pub season: Season,
@@ -29,7 +29,7 @@ pub enum TermPoint {
}
/// Represents a season within a term
#[derive(Debug, Clone, Serialize, Deserialize)]
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum Season {
Fall,
Spring,
@@ -46,7 +46,7 @@ impl Term {
/// Returns the current term status for a specific date
pub fn get_status_for_date(date: NaiveDate) -> TermPoint {
let literal_year = date.year() as u32;
let day_of_year = date.ordinal() as u32;
let day_of_year = date.ordinal();
let ranges = Self::get_season_ranges(literal_year);
// If we're past the end of the summer term, we're 'in' the next school year.
@@ -115,22 +115,22 @@ impl Term {
fn get_season_ranges(year: u32) -> SeasonRanges {
let spring_start = NaiveDate::from_ymd_opt(year as i32, 1, 14)
.unwrap()
.ordinal() as u32;
.ordinal();
let spring_end = NaiveDate::from_ymd_opt(year as i32, 5, 1)
.unwrap()
.ordinal() as u32;
.ordinal();
let summer_start = NaiveDate::from_ymd_opt(year as i32, 5, 25)
.unwrap()
.ordinal() as u32;
.ordinal();
let summer_end = NaiveDate::from_ymd_opt(year as i32, 8, 15)
.unwrap()
.ordinal() as u32;
.ordinal();
let fall_start = NaiveDate::from_ymd_opt(year as i32, 8, 18)
.unwrap()
.ordinal() as u32;
.ordinal();
let fall_end = NaiveDate::from_ymd_opt(year as i32, 12, 10)
.unwrap()
.ordinal() as u32;
.ordinal();
SeasonRanges {
spring: YearDayRange {
@@ -148,10 +148,36 @@ impl Term {
}
}
/// Returns a long string representation of the term (e.g., "Fall 2025")
pub fn to_long_string(&self) -> String {
/// URL-friendly slug, e.g. "spring-2026"
pub fn slug(&self) -> String {
format!("{}-{}", self.season.slug(), self.year)
}
/// Parse a slug like "spring-2026" into a Term
pub fn from_slug(s: &str) -> Option<Self> {
let (season_str, year_str) = s.rsplit_once('-')?;
let season = Season::from_slug(season_str)?;
let year = year_str.parse::<u32>().ok()?;
if !VALID_YEARS.contains(&year) {
return None;
}
Some(Term { year, season })
}
/// Human-readable description, e.g. "Spring 2026"
pub fn description(&self) -> String {
format!("{} {}", self.season, self.year)
}
/// Resolve a string that is either a term code ("202620") or a slug ("spring-2026") to a term code.
pub fn resolve_to_code(s: &str) -> Option<String> {
// Try parsing as a 6-digit code first
if let Ok(term) = s.parse::<Term>() {
return Some(term.to_string());
}
// Try parsing as a slug
Term::from_slug(s).map(|t| t.to_string())
}
}
impl TermPoint {
@@ -179,22 +205,46 @@ struct YearDayRange {
end: u32,
}
impl ToString for Term {
impl std::fmt::Display for Term {
/// Returns the term in the format YYYYXX, where YYYY is the year and XX is the season code
fn to_string(&self) -> String {
format!("{}{}", self.year, self.season.to_str())
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{year}{season}",
year = self.year,
season = self.season.to_str()
)
}
}
impl Season {
/// Returns the season code as a string
fn to_str(&self) -> &'static str {
fn to_str(self) -> &'static str {
match self {
Season::Fall => "10",
Season::Spring => "20",
Season::Summer => "30",
}
}
/// Returns the lowercase slug for URL-friendly representation
pub fn slug(self) -> &'static str {
match self {
Season::Fall => "fall",
Season::Spring => "spring",
Season::Summer => "summer",
}
}
/// Parse a slug like "spring", "summer", "fall" into a Season
pub fn from_slug(s: &str) -> Option<Self> {
match s {
"fall" => Some(Season::Fall),
"spring" => Some(Season::Spring),
"summer" => Some(Season::Summer),
_ => None,
}
}
}
impl std::fmt::Display for Season {
@@ -215,7 +265,7 @@ impl FromStr for Season {
"10" => Season::Fall,
"20" => Season::Spring,
"30" => Season::Summer,
_ => return Err(anyhow::anyhow!("Invalid season: {}", s)),
_ => return Err(anyhow::anyhow!("Invalid season: {s}")),
};
Ok(season)
}
@@ -240,3 +290,284 @@ impl FromStr for Term {
Ok(Term { year, season })
}
}
#[cfg(test)]
mod tests {
use super::*;
// --- Season::from_str ---
#[test]
fn test_season_from_str_fall() {
assert_eq!(Season::from_str("10").unwrap(), Season::Fall);
}
#[test]
fn test_season_from_str_spring() {
assert_eq!(Season::from_str("20").unwrap(), Season::Spring);
}
#[test]
fn test_season_from_str_summer() {
assert_eq!(Season::from_str("30").unwrap(), Season::Summer);
}
#[test]
fn test_season_from_str_invalid() {
for input in ["00", "40", "1", ""] {
assert!(
Season::from_str(input).is_err(),
"expected Err for {input:?}"
);
}
}
// --- Season Display ---
#[test]
fn test_season_display() {
assert_eq!(Season::Fall.to_string(), "Fall");
assert_eq!(Season::Spring.to_string(), "Spring");
assert_eq!(Season::Summer.to_string(), "Summer");
}
#[test]
fn test_season_to_str_roundtrip() {
for season in [Season::Fall, Season::Spring, Season::Summer] {
assert_eq!(Season::from_str(season.to_str()).unwrap(), season);
}
}
// --- Term::from_str ---
#[test]
fn test_term_from_str_valid_fall() {
let term = Term::from_str("202510").unwrap();
assert_eq!(term.year, 2025);
assert_eq!(term.season, Season::Fall);
}
#[test]
fn test_term_from_str_valid_spring() {
let term = Term::from_str("202520").unwrap();
assert_eq!(term.year, 2025);
assert_eq!(term.season, Season::Spring);
}
#[test]
fn test_term_from_str_valid_summer() {
let term = Term::from_str("202530").unwrap();
assert_eq!(term.year, 2025);
assert_eq!(term.season, Season::Summer);
}
#[test]
fn test_term_from_str_too_short() {
assert!(Term::from_str("20251").is_err());
}
#[test]
fn test_term_from_str_too_long() {
assert!(Term::from_str("2025100").is_err());
}
#[test]
fn test_term_from_str_empty() {
assert!(Term::from_str("").is_err());
}
#[test]
fn test_term_from_str_invalid_year_chars() {
assert!(Term::from_str("abcd10").is_err());
}
#[test]
fn test_term_from_str_invalid_season() {
assert!(Term::from_str("202540").is_err());
}
#[test]
fn test_term_from_str_year_below_range() {
assert!(Term::from_str("200010").is_err());
}
#[test]
fn test_term_display_roundtrip() {
for code in ["202510", "202520", "202530"] {
let term = Term::from_str(code).unwrap();
assert_eq!(term.to_string(), code);
}
}
// --- Term::get_status_for_date ---
#[test]
fn test_status_mid_spring() {
let date = NaiveDate::from_ymd_opt(2025, 2, 15).unwrap();
let status = Term::get_status_for_date(date);
assert!(
matches!(status, TermPoint::InTerm { current } if current.season == Season::Spring)
);
}
#[test]
fn test_status_mid_summer() {
let date = NaiveDate::from_ymd_opt(2025, 7, 1).unwrap();
let status = Term::get_status_for_date(date);
assert!(
matches!(status, TermPoint::InTerm { current } if current.season == Season::Summer)
);
}
#[test]
fn test_status_mid_fall() {
let date = NaiveDate::from_ymd_opt(2025, 10, 15).unwrap();
let status = Term::get_status_for_date(date);
assert!(matches!(status, TermPoint::InTerm { current } if current.season == Season::Fall));
}
#[test]
fn test_status_between_fall_and_spring() {
let date = NaiveDate::from_ymd_opt(2025, 1, 1).unwrap();
let status = Term::get_status_for_date(date);
assert!(
matches!(status, TermPoint::BetweenTerms { next } if next.season == Season::Spring)
);
}
#[test]
fn test_status_between_spring_and_summer() {
let date = NaiveDate::from_ymd_opt(2025, 5, 15).unwrap();
let status = Term::get_status_for_date(date);
assert!(
matches!(status, TermPoint::BetweenTerms { next } if next.season == Season::Summer)
);
}
#[test]
fn test_status_between_summer_and_fall() {
let date = NaiveDate::from_ymd_opt(2025, 8, 16).unwrap();
let status = Term::get_status_for_date(date);
assert!(matches!(status, TermPoint::BetweenTerms { next } if next.season == Season::Fall));
}
#[test]
fn test_status_after_fall_end() {
let date = NaiveDate::from_ymd_opt(2025, 12, 15).unwrap();
let status = Term::get_status_for_date(date);
assert!(
matches!(status, TermPoint::BetweenTerms { next } if next.season == Season::Spring)
);
// Year should roll over: fall 2025 ends → next spring is 2026
let next_term = status.inner();
assert_eq!(next_term.year, 2026);
}
// --- TermPoint::inner ---
#[test]
fn test_term_point_inner() {
let in_term = TermPoint::InTerm {
current: Term {
year: 2025,
season: Season::Fall,
},
};
assert_eq!(
in_term.inner(),
&Term {
year: 2025,
season: Season::Fall
}
);
let between = TermPoint::BetweenTerms {
next: Term {
year: 2026,
season: Season::Spring,
},
};
assert_eq!(
between.inner(),
&Term {
year: 2026,
season: Season::Spring
}
);
}
// --- Season::slug / from_slug ---
#[test]
fn test_season_slug_roundtrip() {
for season in [Season::Fall, Season::Spring, Season::Summer] {
assert_eq!(Season::from_slug(season.slug()), Some(season));
}
}
#[test]
fn test_season_from_slug_invalid() {
assert_eq!(Season::from_slug("winter"), None);
assert_eq!(Season::from_slug(""), None);
assert_eq!(Season::from_slug("Spring"), None); // case-sensitive
}
// --- Term::slug / from_slug ---
#[test]
fn test_term_slug() {
let term = Term {
year: 2026,
season: Season::Spring,
};
assert_eq!(term.slug(), "spring-2026");
}
#[test]
fn test_term_from_slug_roundtrip() {
for code in ["202510", "202520", "202530"] {
let term = Term::from_str(code).unwrap();
let slug = term.slug();
let parsed = Term::from_slug(&slug).unwrap();
assert_eq!(parsed, term);
}
}
#[test]
fn test_term_from_slug_invalid() {
assert_eq!(Term::from_slug("winter-2026"), None);
assert_eq!(Term::from_slug("spring"), None);
assert_eq!(Term::from_slug(""), None);
}
// --- Term::description ---
#[test]
fn test_term_description() {
let term = Term {
year: 2026,
season: Season::Spring,
};
assert_eq!(term.description(), "Spring 2026");
}
// --- Term::resolve_to_code ---
#[test]
fn test_resolve_to_code_from_code() {
assert_eq!(Term::resolve_to_code("202620"), Some("202620".to_string()));
}
#[test]
fn test_resolve_to_code_from_slug() {
assert_eq!(
Term::resolve_to_code("spring-2026"),
Some("202620".to_string())
);
}
#[test]
fn test_resolve_to_code_invalid() {
assert_eq!(Term::resolve_to_code("garbage"), None);
}
}
+219 -24
View File
@@ -10,8 +10,9 @@ pub struct Range {
pub high: i32,
}
/// Builder for constructing Banner API search queries
/// Builder for constructing Banner API search queries.
#[derive(Debug, Clone, Default)]
#[allow(dead_code)]
pub struct SearchQuery {
subject: Option<String>,
title: Option<String>,
@@ -32,6 +33,7 @@ pub struct SearchQuery {
course_number_range: Option<Range>,
}
#[allow(dead_code)]
impl SearchQuery {
/// Creates a new SearchQuery with default values
pub fn new() -> Self {
@@ -155,11 +157,22 @@ impl SearchQuery {
}
/// Sets the maximum number of results to return
/// Clamped to a maximum of 500 to prevent excessive API load
pub fn max_results(mut self, max_results: i32) -> Self {
self.max_results = max_results;
self.max_results = max_results.clamp(1, 500);
self
}
/// Gets the subject field
pub fn get_subject(&self) -> Option<&String> {
self.subject.as_ref()
}
/// Gets the max_results field
pub fn get_max_results(&self) -> i32 {
self.max_results
}
/// Converts the query into URL parameters for the Banner API
pub fn to_params(&self) -> HashMap<String, String> {
let mut params = HashMap::new();
@@ -180,7 +193,7 @@ impl SearchQuery {
params.insert("txt_keywordlike".to_string(), keywords.join(" "));
}
if self.open_only.is_some() {
if self.open_only == Some(true) {
params.insert("chk_open_only".to_string(), "true".to_string());
}
@@ -242,26 +255,208 @@ impl SearchQuery {
}
}
/// Formats a Duration into hour, minute, and meridiem strings for Banner API
/// Formats a Duration into hour, minute, and meridiem strings for Banner API.
///
/// Uses 12-hour format: midnight = 12:00 AM, noon = 12:00 PM.
fn format_time_parameter(duration: Duration) -> (String, String, String) {
let total_minutes = duration.as_secs() / 60;
let hours = total_minutes / 60;
let minutes = total_minutes % 60;
let minute_str = minutes.to_string();
let meridiem = if hours >= 12 { "PM" } else { "AM" };
let hour_12 = match hours % 12 {
0 => 12,
h => h,
};
if hours >= 12 {
let meridiem = "PM".to_string();
let hour_str = if hours >= 13 {
(hours - 12).to_string()
} else {
hours.to_string()
};
(hour_str, minute_str, meridiem)
} else {
let meridiem = "AM".to_string();
let hour_str = hours.to_string();
(hour_str, minute_str, meridiem)
(
hour_12.to_string(),
minutes.to_string(),
meridiem.to_string(),
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new_defaults() {
let q = SearchQuery::new();
assert_eq!(q.get_max_results(), 8);
assert!(q.get_subject().is_none());
let params = q.to_params();
assert_eq!(params.get("pageMaxSize").unwrap(), "8");
assert_eq!(params.get("pageOffset").unwrap(), "0");
assert_eq!(params.len(), 2);
}
#[test]
fn test_subject_param() {
let params = SearchQuery::new().subject("CS").to_params();
assert_eq!(params.get("txt_subject").unwrap(), "CS");
}
#[test]
fn test_title_trims_whitespace() {
let params = SearchQuery::new().title(" Intro to CS ").to_params();
assert_eq!(params.get("txt_courseTitle").unwrap(), "Intro to CS");
}
#[test]
fn test_crn_param() {
let params = SearchQuery::new()
.course_reference_number("12345")
.to_params();
assert_eq!(params.get("txt_courseReferenceNumber").unwrap(), "12345");
}
#[test]
fn test_keywords_joined_with_spaces() {
let params = SearchQuery::new()
.keyword("data")
.keyword("science")
.to_params();
assert_eq!(params.get("txt_keywordlike").unwrap(), "data science");
}
#[test]
fn test_keywords_vec() {
let params = SearchQuery::new()
.keywords(vec!["machine".into(), "learning".into()])
.to_params();
assert_eq!(params.get("txt_keywordlike").unwrap(), "machine learning");
}
#[test]
fn test_open_only() {
let params = SearchQuery::new().open_only(true).to_params();
assert_eq!(params.get("chk_open_only").unwrap(), "true");
// open_only(false) should NOT set the param
let params2 = SearchQuery::new().open_only(false).to_params();
assert!(params2.get("chk_open_only").is_none());
}
#[test]
fn test_credits_range() {
let params = SearchQuery::new().credits(3, 6).to_params();
assert_eq!(params.get("txt_credithourlow").unwrap(), "3");
assert_eq!(params.get("txt_credithourhigh").unwrap(), "6");
}
#[test]
fn test_course_number_range() {
let params = SearchQuery::new().course_numbers(3000, 3999).to_params();
assert_eq!(params.get("txt_course_number_range").unwrap(), "3000");
assert_eq!(params.get("txt_course_number_range_to").unwrap(), "3999");
}
#[test]
fn test_pagination() {
let params = SearchQuery::new().offset(20).max_results(10).to_params();
assert_eq!(params.get("pageOffset").unwrap(), "20");
assert_eq!(params.get("pageMaxSize").unwrap(), "10");
}
#[test]
fn test_format_time_9am() {
let (h, m, mer) = format_time_parameter(Duration::from_secs(9 * 3600));
assert_eq!(h, "9");
assert_eq!(m, "0");
assert_eq!(mer, "AM");
}
#[test]
fn test_format_time_noon() {
let (h, m, mer) = format_time_parameter(Duration::from_secs(12 * 3600));
assert_eq!(h, "12");
assert_eq!(m, "0");
assert_eq!(mer, "PM");
}
#[test]
fn test_format_time_1pm() {
let (h, m, mer) = format_time_parameter(Duration::from_secs(13 * 3600));
assert_eq!(h, "1");
assert_eq!(m, "0");
assert_eq!(mer, "PM");
}
#[test]
fn test_format_time_930am() {
let (h, m, mer) = format_time_parameter(Duration::from_secs(9 * 3600 + 30 * 60));
assert_eq!(h, "9");
assert_eq!(m, "30");
assert_eq!(mer, "AM");
}
#[test]
fn test_format_time_midnight() {
let (h, m, mer) = format_time_parameter(Duration::from_secs(0));
assert_eq!(h, "12");
assert_eq!(m, "0");
assert_eq!(mer, "AM");
}
#[test]
fn test_time_params_in_query() {
let params = SearchQuery::new()
.start_time(Duration::from_secs(9 * 3600))
.end_time(Duration::from_secs(17 * 3600))
.to_params();
assert_eq!(params.get("select_start_hour").unwrap(), "9");
assert_eq!(params.get("select_start_ampm").unwrap(), "AM");
assert_eq!(params.get("select_end_hour").unwrap(), "5");
assert_eq!(params.get("select_end_ampm").unwrap(), "PM");
}
#[test]
fn test_multi_value_params() {
let params = SearchQuery::new()
.campus(vec!["MAIN".into(), "DT".into()])
.attributes(vec!["HONORS".into()])
.instructor(vec![1001, 1002])
.to_params();
assert_eq!(params.get("txt_campus").unwrap(), "MAIN,DT");
assert_eq!(params.get("txt_attribute").unwrap(), "HONORS");
assert_eq!(params.get("txt_instructor").unwrap(), "1001,1002");
}
#[test]
fn test_display_minimal() {
let display = SearchQuery::new().to_string();
assert_eq!(display, "offset=0, maxResults=8");
}
#[test]
fn test_display_with_fields() {
let display = SearchQuery::new()
.subject("CS")
.open_only(true)
.max_results(10)
.to_string();
assert!(display.contains("subject=CS"));
assert!(display.contains("openOnly=true"));
assert!(display.contains("maxResults=10"));
}
#[test]
fn test_full_query_param_count() {
let params = SearchQuery::new()
.subject("CS")
.title("Intro")
.course_reference_number("12345")
.keyword("programming")
.open_only(true)
.credits(3, 4)
.course_numbers(1000, 1999)
.offset(0)
.max_results(25)
.to_params();
// subject, title, crn, keyword, open_only, min_credits, max_credits,
// course_number_range, course_number_range_to, pageOffset, pageMaxSize = 11
assert_eq!(params.len(), 11);
}
}
@@ -270,7 +465,7 @@ impl std::fmt::Display for SearchQuery {
let mut parts = Vec::new();
if let Some(ref subject) = self.subject {
parts.push(format!("subject={}", subject));
parts.push(format!("subject={subject}"));
}
if let Some(ref title) = self.title {
parts.push(format!("title={}", title.trim()));
@@ -278,7 +473,7 @@ impl std::fmt::Display for SearchQuery {
if let Some(ref keywords) = self.keywords {
parts.push(format!("keywords={}", keywords.join(" ")));
}
if self.open_only.is_some() {
if self.open_only == Some(true) {
parts.push("openOnly=true".to_string());
}
if let Some(ref term_part) = self.term_part {
@@ -296,21 +491,21 @@ impl std::fmt::Display for SearchQuery {
.map(|i| i.to_string())
.collect::<Vec<_>>()
.join(",");
parts.push(format!("instructor={}", instructor_str));
parts.push(format!("instructor={instructor_str}"));
}
if let Some(start_time) = self.start_time {
let (hour, minute, meridiem) = format_time_parameter(start_time);
parts.push(format!("startTime={}:{}:{}", hour, minute, meridiem));
parts.push(format!("startTime={hour}:{minute}:{meridiem}"));
}
if let Some(end_time) = self.end_time {
let (hour, minute, meridiem) = format_time_parameter(end_time);
parts.push(format!("endTime={}:{}:{}", hour, minute, meridiem));
parts.push(format!("endTime={hour}:{minute}:{meridiem}"));
}
if let Some(min_credits) = self.min_credits {
parts.push(format!("minCredits={}", min_credits));
parts.push(format!("minCredits={min_credits}"));
}
if let Some(max_credits) = self.max_credits {
parts.push(format!("maxCredits={}", max_credits));
parts.push(format!("maxCredits={max_credits}"));
}
if let Some(ref range) = self.course_number_range {
parts.push(format!("courseNumberRange={}-{}", range.low, range.high));
+84
View File
@@ -0,0 +1,84 @@
//! HTTP middleware that enforces rate limiting for Banner API requests.
use crate::banner::rate_limiter::{RequestType, SharedRateLimiter};
use http::Extensions;
use reqwest::{Request, Response};
use reqwest_middleware::{Middleware, Next};
use tracing::debug;
use url::Url;
/// Middleware that enforces rate limiting based on request URL patterns
pub struct RateLimitMiddleware {
rate_limiter: SharedRateLimiter,
}
impl RateLimitMiddleware {
/// Creates a new rate limiting middleware
pub fn new(rate_limiter: SharedRateLimiter) -> Self {
Self { rate_limiter }
}
/// Returns a human-readable description of the rate limit for a request type
fn get_rate_limit_description(request_type: RequestType) -> &'static str {
match request_type {
RequestType::Session => "6 rpm (~10s interval)",
RequestType::Search => "30 rpm (~2s interval)",
RequestType::Metadata => "20 rpm (~3s interval)",
RequestType::Reset => "10 rpm (~6s interval)",
}
}
/// Determines the request type based on the URL path
fn get_request_type(url: &Url) -> RequestType {
let path = url.path();
if path.contains("/registration")
|| path.contains("/selfServiceMenu")
|| path.contains("/term/termSelection")
{
RequestType::Session
} else if path.contains("/searchResults") || path.contains("/classSearch") {
RequestType::Search
} else if path.contains("/getTerms")
|| path.contains("/getSubjects")
|| path.contains("/getCampuses")
{
RequestType::Metadata
} else if path.contains("/resetDataForm") {
RequestType::Reset
} else {
// Default to search for unknown endpoints
RequestType::Search
}
}
}
#[async_trait::async_trait]
impl Middleware for RateLimitMiddleware {
async fn handle(
&self,
req: Request,
extensions: &mut Extensions,
next: Next<'_>,
) -> std::result::Result<Response, reqwest_middleware::Error> {
let request_type = Self::get_request_type(req.url());
let start = std::time::Instant::now();
self.rate_limiter.wait_for_permission(request_type).await;
let wait_duration = start.elapsed();
// Only log if rate limiting caused significant delay (>= 500ms)
if wait_duration.as_millis() >= 500 {
let limit_desc = Self::get_rate_limit_description(request_type);
debug!(
request_type = ?request_type,
wait_ms = wait_duration.as_millis(),
rate_limit = limit_desc,
"Rate limit caused delay"
);
}
// Make the actual request
next.run(req, extensions).await
}
}
+210
View File
@@ -0,0 +1,210 @@
//! Rate limiting for Banner API requests to prevent overwhelming the server.
use crate::config::RateLimitingConfig;
use governor::{
Quota, RateLimiter,
clock::DefaultClock,
state::{InMemoryState, NotKeyed},
};
use std::num::NonZeroU32;
use std::sync::Arc;
use std::time::Duration;
/// Different types of Banner API requests with different rate limits
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum RequestType {
/// Session creation and management (very conservative)
Session,
/// Course search requests (moderate)
Search,
/// Term and metadata requests (moderate)
Metadata,
/// Data form resets (low priority)
Reset,
}
/// A rate limiter that manages different request types with different limits
pub struct BannerRateLimiter {
session_limiter: RateLimiter<NotKeyed, InMemoryState, DefaultClock>,
search_limiter: RateLimiter<NotKeyed, InMemoryState, DefaultClock>,
metadata_limiter: RateLimiter<NotKeyed, InMemoryState, DefaultClock>,
reset_limiter: RateLimiter<NotKeyed, InMemoryState, DefaultClock>,
}
impl BannerRateLimiter {
/// Creates a new rate limiter with the given configuration
pub fn new(config: RateLimitingConfig) -> Self {
let session_quota = Quota::with_period(Duration::from_secs(60) / config.session_rpm)
.unwrap()
.allow_burst(NonZeroU32::new(config.burst_allowance).unwrap());
let search_quota = Quota::with_period(Duration::from_secs(60) / config.search_rpm)
.unwrap()
.allow_burst(NonZeroU32::new(config.burst_allowance).unwrap());
let metadata_quota = Quota::with_period(Duration::from_secs(60) / config.metadata_rpm)
.unwrap()
.allow_burst(NonZeroU32::new(config.burst_allowance).unwrap());
let reset_quota = Quota::with_period(Duration::from_secs(60) / config.reset_rpm)
.unwrap()
.allow_burst(NonZeroU32::new(config.burst_allowance).unwrap());
Self {
session_limiter: RateLimiter::direct(session_quota),
search_limiter: RateLimiter::direct(search_quota),
metadata_limiter: RateLimiter::direct(metadata_quota),
reset_limiter: RateLimiter::direct(reset_quota),
}
}
/// Waits for permission to make a request of the given type
pub async fn wait_for_permission(&self, request_type: RequestType) {
let limiter = match request_type {
RequestType::Session => &self.session_limiter,
RequestType::Search => &self.search_limiter,
RequestType::Metadata => &self.metadata_limiter,
RequestType::Reset => &self.reset_limiter,
};
// Wait until we can make the request (logging handled by middleware)
limiter.until_ready().await;
}
}
impl Default for BannerRateLimiter {
fn default() -> Self {
Self::new(RateLimitingConfig::default())
}
}
/// A shared rate limiter instance
pub type SharedRateLimiter = Arc<BannerRateLimiter>;
/// Creates a new shared rate limiter with custom configuration
pub fn create_shared_rate_limiter(config: Option<RateLimitingConfig>) -> SharedRateLimiter {
Arc::new(BannerRateLimiter::new(config.unwrap_or_default()))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new_with_default_config() {
let _limiter = BannerRateLimiter::new(RateLimitingConfig::default());
}
#[test]
fn test_new_with_custom_config() {
let config = RateLimitingConfig {
session_rpm: 10,
search_rpm: 30,
metadata_rpm: 20,
reset_rpm: 15,
burst_allowance: 5,
};
let _limiter = BannerRateLimiter::new(config);
}
#[test]
fn test_new_with_minimum_valid_values() {
let config = RateLimitingConfig {
session_rpm: 1,
search_rpm: 1,
metadata_rpm: 1,
reset_rpm: 1,
burst_allowance: 1,
};
let _limiter = BannerRateLimiter::new(config);
}
#[test]
fn test_new_with_high_rpm_values() {
let config = RateLimitingConfig {
session_rpm: 10000,
search_rpm: 10000,
metadata_rpm: 10000,
reset_rpm: 10000,
burst_allowance: 1,
};
let _limiter = BannerRateLimiter::new(config);
}
#[test]
fn test_default_impl() {
let _limiter = BannerRateLimiter::default();
}
#[test]
#[should_panic]
fn test_new_panics_on_zero_session_rpm() {
let config = RateLimitingConfig {
session_rpm: 0,
..RateLimitingConfig::default()
};
let _limiter = BannerRateLimiter::new(config);
}
#[test]
#[should_panic]
fn test_new_panics_on_zero_search_rpm() {
let config = RateLimitingConfig {
search_rpm: 0,
..RateLimitingConfig::default()
};
let _limiter = BannerRateLimiter::new(config);
}
#[test]
#[should_panic]
fn test_new_panics_on_zero_metadata_rpm() {
let config = RateLimitingConfig {
metadata_rpm: 0,
..RateLimitingConfig::default()
};
let _limiter = BannerRateLimiter::new(config);
}
#[test]
#[should_panic]
fn test_new_panics_on_zero_reset_rpm() {
let config = RateLimitingConfig {
reset_rpm: 0,
..RateLimitingConfig::default()
};
let _limiter = BannerRateLimiter::new(config);
}
#[test]
#[should_panic]
fn test_new_panics_on_zero_burst_allowance() {
let config = RateLimitingConfig {
burst_allowance: 0,
..RateLimitingConfig::default()
};
let _limiter = BannerRateLimiter::new(config);
}
#[tokio::test]
async fn test_wait_for_permission_completes() {
let limiter = BannerRateLimiter::default();
let timeout_duration = std::time::Duration::from_secs(1);
for request_type in [
RequestType::Session,
RequestType::Search,
RequestType::Metadata,
RequestType::Reset,
] {
let result =
tokio::time::timeout(timeout_duration, limiter.wait_for_permission(request_type))
.await;
assert!(
result.is_ok(),
"wait_for_permission timed out for {:?}",
request_type
);
}
}
}
-293
View File
@@ -1,293 +0,0 @@
//! Course scraping functionality for the Banner API.
use crate::banner::{api::BannerApi, models::*, query::SearchQuery};
use anyhow::{Context, Result};
use redis::AsyncCommands;
use std::time::Duration;
use tokio::time;
use tracing::{debug, error, info, warn};
/// Priority majors that should be scraped more frequently
const PRIORITY_MAJORS: &[&str] = &["CS", "CPE", "MAT", "EE", "IS"];
/// Maximum number of courses to fetch per page
const MAX_PAGE_SIZE: i32 = 500;
/// Course scraper for Banner API
pub struct CourseScraper {
api: BannerApi,
redis_client: redis::Client,
}
impl CourseScraper {
/// Creates a new course scraper
pub fn new(api: BannerApi, redis_url: &str) -> Result<Self> {
let redis_client =
redis::Client::open(redis_url).context("Failed to create Redis client")?;
Ok(Self { api, redis_client })
}
/// Scrapes all courses and stores them in Redis
pub async fn scrape_all(&self, term: &str) -> Result<()> {
// Get all subjects
let subjects = self
.api
.get_subjects("", term, 1, 100)
.await
.context("Failed to get subjects for scraping")?;
if subjects.is_empty() {
return Err(anyhow::anyhow!("No subjects found for term {}", term));
}
// Categorize subjects
let (priority_subjects, other_subjects): (Vec<_>, Vec<_>) = subjects
.into_iter()
.partition(|subject| PRIORITY_MAJORS.contains(&subject.code.as_str()));
// Get expired subjects that need scraping
let mut expired_subjects = Vec::new();
expired_subjects.extend(self.get_expired_subjects(&priority_subjects, term).await?);
expired_subjects.extend(self.get_expired_subjects(&other_subjects, term).await?);
if expired_subjects.is_empty() {
info!("No expired subjects found, skipping scrape");
return Ok(());
}
info!(
"Scraping {} subjects for term {}",
expired_subjects.len(),
term
);
// Scrape each expired subject
for subject in expired_subjects {
if let Err(e) = self.scrape_subject(&subject.code, term).await {
error!("Failed to scrape subject {}: {}", subject.code, e);
}
// Rate limiting between subjects
time::sleep(Duration::from_secs(2)).await;
}
Ok(())
}
/// Gets subjects that have expired and need to be scraped
async fn get_expired_subjects(&self, subjects: &[Pair], term: &str) -> Result<Vec<Pair>> {
let mut conn = self
.redis_client
.get_multiplexed_async_connection()
.await
.context("Failed to get Redis connection")?;
let mut expired = Vec::new();
for subject in subjects {
let key = format!("scraped:{}:{}", subject.code, term);
let scraped: Option<String> = conn
.get(&key)
.await
.context("Failed to check scrape status in Redis")?;
// If not scraped or marked as expired (empty/0), add to list
if scraped.is_none() || scraped.as_deref() == Some("0") {
expired.push(subject.clone());
}
}
Ok(expired)
}
/// Scrapes all courses for a specific subject
pub async fn scrape_subject(&self, subject: &str, term: &str) -> Result<()> {
let mut offset = 0;
let mut total_courses = 0;
loop {
let query = SearchQuery::new()
.subject(subject)
.offset(offset)
.max_results(MAX_PAGE_SIZE * 2);
// Ensure session term is selected before searching
self.api.select_term(term).await?;
let result = self
.api
.search(term, &query, "subjectDescription", false)
.await
.with_context(|| {
format!(
"Failed to search for subject {} at offset {}",
subject, offset
)
})?;
if !result.success {
return Err(anyhow::anyhow!(
"Search marked unsuccessful for subject {}",
subject
));
}
let course_count = result.data.as_ref().map(|v| v.len() as i32).unwrap_or(0);
total_courses += course_count;
debug!(
"Retrieved {} courses for subject {} at offset {}",
course_count, subject, offset
);
// Store each course in Redis
for course in result.data.unwrap_or_default() {
if let Err(e) = self.store_course(&course).await {
error!(
"Failed to store course {}: {}",
course.course_reference_number, e
);
}
}
// Check if we got a full page and should continue
if course_count >= MAX_PAGE_SIZE {
if course_count > MAX_PAGE_SIZE {
warn!(
"Course count {} exceeds max page size {}",
course_count, MAX_PAGE_SIZE
);
}
offset += MAX_PAGE_SIZE;
debug!(
"Continuing to next page for subject {} at offset {}",
subject, offset
);
// Rate limiting between pages
time::sleep(Duration::from_secs(3)).await;
continue;
}
break;
}
info!(
"Scraped {} total courses for subject {}",
total_courses, subject
);
// Mark subject as scraped with expiry
self.mark_subject_scraped(subject, term, total_courses)
.await?;
Ok(())
}
/// Stores a course in Redis
async fn store_course(&self, course: &Course) -> Result<()> {
let mut conn = self
.redis_client
.get_multiplexed_async_connection()
.await
.context("Failed to get Redis connection")?;
let key = format!("class:{}", course.course_reference_number);
let serialized = serde_json::to_string(course).context("Failed to serialize course")?;
let _: () = conn
.set(&key, serialized)
.await
.context("Failed to store course in Redis")?;
Ok(())
}
/// Marks a subject as scraped with appropriate expiry time
async fn mark_subject_scraped(
&self,
subject: &str,
term: &str,
course_count: i32,
) -> Result<()> {
let mut conn = self
.redis_client
.get_multiplexed_async_connection()
.await
.context("Failed to get Redis connection")?;
let key = format!("scraped:{}:{}", subject, term);
let expiry = self.calculate_expiry(subject, course_count);
let value = if course_count == 0 { -1 } else { course_count };
let _: () = conn
.set_ex(&key, value, expiry.as_secs() as u64)
.await
.context("Failed to mark subject as scraped")?;
debug!(
"Marked subject {} as scraped with {} courses, expiry: {:?}",
subject, course_count, expiry
);
Ok(())
}
/// Calculates expiry time for a scraped subject based on various factors
fn calculate_expiry(&self, subject: &str, course_count: i32) -> Duration {
// Base calculation: 1 hour per 100 courses
let mut base_expiry = Duration::from_secs(3600 * (course_count as u64 / 100).max(1));
// Special handling for subjects with few courses
if course_count < 50 {
// Linear interpolation: 1 course = 12 hours, 49 courses = 1 hour
let hours = 12.0 - ((course_count as f64 - 1.0) / 48.0) * 11.0;
base_expiry = Duration::from_secs((hours * 3600.0) as u64);
}
// Priority subjects get shorter expiry (more frequent updates)
if PRIORITY_MAJORS.contains(&subject) {
base_expiry = base_expiry / 3;
}
// Add random variance (±15%)
let variance = (base_expiry.as_secs() as f64 * 0.15) as u64;
let random_offset = (rand::random::<f64>() - 0.5) * 2.0 * variance as f64;
let final_expiry = if random_offset > 0.0 {
base_expiry + Duration::from_secs(random_offset as u64)
} else {
base_expiry.saturating_sub(Duration::from_secs((-random_offset) as u64))
};
// Ensure minimum of 1 hour
final_expiry.max(Duration::from_secs(3600))
}
/// Gets a course from Redis cache
pub async fn get_course(&self, crn: &str) -> Result<Option<Course>> {
let mut conn = self
.redis_client
.get_multiplexed_async_connection()
.await
.context("Failed to get Redis connection")?;
let key = format!("class:{}", crn);
let serialized: Option<String> = conn
.get(&key)
.await
.context("Failed to get course from Redis")?;
match serialized {
Some(data) => {
let course: Course = serde_json::from_str(&data)
.context("Failed to deserialize course from Redis")?;
Ok(Some(course))
}
None => Ok(None),
}
}
}
+550 -117
View File
@@ -1,120 +1,570 @@
//! Session management for Banner API.
use anyhow::Result;
use rand::distributions::{Alphanumeric, DistString};
use reqwest::Client;
use std::sync::Mutex;
use crate::banner::BannerTerm;
use crate::banner::models::Term;
use anyhow::{Context, Result};
use cookie::Cookie;
use dashmap::DashMap;
use governor::state::InMemoryState;
use governor::{Quota, RateLimiter};
use rand::distr::{Alphanumeric, SampleString};
use reqwest_middleware::ClientWithMiddleware;
use std::collections::{HashMap, VecDeque};
use std::mem::ManuallyDrop;
use std::ops::{Deref, DerefMut};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, LazyLock};
use std::time::{Duration, Instant};
use tracing::{debug, info};
use tokio::sync::{Mutex, Notify};
use tracing::{debug, info, trace};
use url::Url;
/// Session manager for Banner API interactions
#[derive(Debug)]
pub struct SessionManager {
current_session: Mutex<Option<SessionData>>,
base_url: String,
client: Client,
}
const SESSION_EXPIRY: Duration = Duration::from_secs(25 * 60); // 25 minutes
// A global rate limiter to ensure we only try to create one new session every 10 seconds,
// preventing us from overwhelming the server with session creation requests.
static SESSION_CREATION_RATE_LIMITER: LazyLock<
RateLimiter<governor::state::direct::NotKeyed, InMemoryState, governor::clock::DefaultClock>,
> = LazyLock::new(|| RateLimiter::direct(Quota::with_period(Duration::from_secs(10)).unwrap()));
/// Represents an active anonymous session within the Banner API.
/// Identified by multiple persistent cookies, as well as a client-generated "unique session ID".
#[derive(Debug, Clone)]
struct SessionData {
session_id: String,
pub struct BannerSession {
// Randomly generated
pub unique_session_id: String,
// Timestamp of creation
created_at: Instant,
// Timestamp of last activity
last_activity: Option<Instant>,
// Cookie values from initial registration page
jsessionid: String,
ssb_cookie: String,
}
impl SessionManager {
const SESSION_EXPIRY: Duration = Duration::from_secs(25 * 60); // 25 minutes
/// Generates a new session ID mimicking Banner's format
fn generate_session_id() -> String {
let random_part = Alphanumeric.sample_string(&mut rand::rng(), 5);
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_millis();
format!("{}{}", random_part, timestamp)
}
/// Generates a timestamp-based nonce
pub fn nonce() -> String {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_millis()
.to_string()
}
impl BannerSession {
/// Creates a new session
pub fn new(unique_session_id: &str, jsessionid: &str, ssb_cookie: &str) -> Self {
let now = Instant::now();
/// Creates a new session manager
pub fn new(base_url: String, client: Client) -> Self {
Self {
current_session: Mutex::new(None),
base_url,
client,
created_at: now,
last_activity: None,
unique_session_id: unique_session_id.to_string(),
jsessionid: jsessionid.to_string(),
ssb_cookie: ssb_cookie.to_string(),
}
}
/// Ensures a valid session is available, creating one if necessary
pub fn ensure_session(&self) -> Result<String> {
let mut session_guard = self.current_session.lock().unwrap();
/// Returns the unique session ID
pub fn id(&self) -> &str {
&self.unique_session_id
}
if let Some(ref session) = *session_guard {
if session.created_at.elapsed() < Self::SESSION_EXPIRY {
return Ok(session.session_id.clone());
}
/// Updates the last activity timestamp
pub fn touch(&mut self) {
self.last_activity = Some(Instant::now());
}
/// Returns true if the session is expired
pub fn is_expired(&self) -> bool {
self.last_activity.unwrap_or(self.created_at).elapsed() > SESSION_EXPIRY
}
/// Returns a string used to for the "Cookie" header
pub fn cookie(&self) -> String {
format!(
"JSESSIONID={}; SSB_COOKIE={}",
self.jsessionid, self.ssb_cookie
)
}
pub fn been_used(&self) -> bool {
self.last_activity.is_some()
}
#[cfg(test)]
pub(crate) fn new_with_created_at(
unique_session_id: &str,
jsessionid: &str,
ssb_cookie: &str,
created_at: Instant,
) -> Self {
Self {
unique_session_id: unique_session_id.to_string(),
created_at,
last_activity: None,
jsessionid: jsessionid.to_string(),
ssb_cookie: ssb_cookie.to_string(),
}
}
}
// Generate new session
let session_id = self.generate_session_id();
*session_guard = Some(SessionData {
session_id: session_id.clone(),
created_at: Instant::now(),
#[cfg(test)]
mod tests {
use super::*;
use std::time::Duration;
/// Verifies that cancelling `acquire()` mid-session-creation resets `is_creating`,
/// allowing subsequent callers to proceed rather than deadlocking.
#[tokio::test]
async fn test_acquire_not_deadlocked_after_cancellation() {
use tokio::sync::mpsc;
let (tx, mut rx) = mpsc::channel::<()>(10);
// Local server: /registration signals arrival via `tx`, then hangs forever.
let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
let addr = listener.local_addr().unwrap();
let app = axum::Router::new().route(
"/StudentRegistrationSsb/registration",
axum::routing::get(move || {
let tx = tx.clone();
async move {
let _ = tx.send(()).await;
std::future::pending::<&str>().await
}
}),
);
tokio::spawn(async move {
axum::serve(listener, app).await.unwrap();
});
debug!("Generated new Banner session: {}", session_id);
Ok(session_id)
}
let base_url = format!("http://{}/StudentRegistrationSsb", addr);
let client = reqwest_middleware::ClientBuilder::new(
reqwest::Client::builder()
.timeout(Duration::from_secs(300))
.build()
.unwrap(),
)
.build();
/// Generates a new session ID mimicking Banner's format
fn generate_session_id(&self) -> String {
let random_part = Alphanumeric.sample_string(&mut rand::thread_rng(), 5);
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_millis();
format!("{}{}", random_part, timestamp)
}
let pool = SessionPool::new(client, base_url);
let term: Term = "202620".parse().unwrap();
/// Sets up initial session cookies by making required Banner API requests
pub async fn setup(&self) -> Result<()> {
info!("Setting up Banner session...");
let request_paths = ["/registration/registration", "/selfServiceMenu/data"];
for path in &request_paths {
let url = format!("{}{}", self.base_url, path);
let response = self
.client
.get(&url)
.query(&[("_", timestamp_nonce())])
.header("User-Agent", user_agent())
.send()
.await?;
if !response.status().is_success() {
return Err(anyhow::anyhow!(
"Failed to setup session, request to {} returned {}",
path,
response.status()
));
}
// First acquire: cancel once the request reaches the server.
tokio::select! {
_ = pool.acquire(term) => panic!("server hangs — acquire should never complete"),
_ = rx.recv() => {} // Request arrived; dropping the future simulates timeout cancellation.
}
// Note: Cookie validation would require additional setup in a real implementation
debug!("Session setup complete");
Ok(())
// Second acquire: verify it reaches the server (i.e., is_creating was reset).
// The global rate limiter has a 10s period, so allow 15s for the second attempt.
tokio::select! {
_ = pool.acquire(term) => {}
result = tokio::time::timeout(Duration::from_secs(15), rx.recv()) => {
assert!(
result.is_ok(),
"acquire() deadlocked — is_creating was not reset after cancellation"
);
}
}
}
/// Selects a term for the current session
pub async fn select_term(&self, term: &str) -> Result<()> {
let session_id = self.ensure_session()?;
#[test]
fn test_new_session_creates_session() {
let session = BannerSession::new("sess-1", "JSID123", "SSB456");
assert_eq!(session.id(), "sess-1");
}
#[test]
fn test_fresh_session_not_expired() {
let session = BannerSession::new("sess-1", "JSID123", "SSB456");
assert!(!session.is_expired());
}
#[test]
fn test_fresh_session_not_been_used() {
let session = BannerSession::new("sess-1", "JSID123", "SSB456");
assert!(!session.been_used());
}
#[test]
fn test_touch_marks_used() {
let mut session = BannerSession::new("sess-1", "JSID123", "SSB456");
session.touch();
assert!(session.been_used());
}
#[test]
fn test_touched_session_not_expired() {
let mut session = BannerSession::new("sess-1", "JSID123", "SSB456");
session.touch();
assert!(!session.is_expired());
}
#[test]
fn test_cookie_format() {
let session = BannerSession::new("sess-1", "JSID123", "SSB456");
assert_eq!(session.cookie(), "JSESSIONID=JSID123; SSB_COOKIE=SSB456");
}
#[test]
fn test_id_returns_unique_session_id() {
let session = BannerSession::new("my-unique-id", "JSID123", "SSB456");
assert_eq!(session.id(), "my-unique-id");
}
#[test]
fn test_expired_session() {
let session = BannerSession::new_with_created_at(
"sess-old",
"JSID123",
"SSB456",
Instant::now() - Duration::from_secs(26 * 60),
);
assert!(session.is_expired());
}
#[test]
fn test_not_quite_expired_session() {
let session = BannerSession::new_with_created_at(
"sess-recent",
"JSID123",
"SSB456",
Instant::now() - Duration::from_secs(24 * 60),
);
assert!(!session.is_expired());
}
#[test]
fn test_session_at_expiry_boundary() {
let session = BannerSession::new_with_created_at(
"sess-boundary",
"JSID123",
"SSB456",
Instant::now() - Duration::from_secs(25 * 60 + 1),
);
assert!(session.is_expired());
}
}
/// A smart pointer that returns a `BannerSession` to the pool when dropped.
pub struct PooledSession {
session: ManuallyDrop<BannerSession>,
pool: Arc<TermPool>,
}
impl Deref for PooledSession {
type Target = BannerSession;
fn deref(&self) -> &Self::Target {
&self.session
}
}
impl DerefMut for PooledSession {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.session
}
}
impl Drop for PooledSession {
fn drop(&mut self) {
// SAFETY: `drop` is called exactly once by Rust's drop semantics,
// so `ManuallyDrop::take` is guaranteed to see a valid value.
let session = unsafe { ManuallyDrop::take(&mut self.session) };
let pool = self.pool.clone();
tokio::spawn(async move {
pool.release(session).await;
});
}
}
pub struct TermPool {
sessions: Mutex<VecDeque<BannerSession>>,
notifier: Notify,
is_creating: AtomicBool,
}
/// RAII guard ensuring `is_creating` is reset on drop for cancellation safety.
/// Without this, a cancelled `acquire()` future would leave the flag set permanently,
/// deadlocking all subsequent callers.
struct CreatingGuard(Arc<TermPool>);
impl Drop for CreatingGuard {
fn drop(&mut self) {
self.0.is_creating.store(false, Ordering::Release);
self.0.notifier.notify_waiters();
}
}
impl TermPool {
fn new() -> Self {
Self {
sessions: Mutex::new(VecDeque::new()),
notifier: Notify::new(),
is_creating: AtomicBool::new(false),
}
}
async fn release(&self, session: BannerSession) {
let id = session.unique_session_id.clone();
if session.is_expired() {
debug!(id = id, "Session expired, dropping");
// Wake up a waiter, as it might need to create a new session
// if this was the last one.
self.notifier.notify_one();
return;
}
let mut queue = self.sessions.lock().await;
queue.push_back(session);
drop(queue); // Release lock before notifying
self.notifier.notify_one();
}
}
pub struct SessionPool {
sessions: DashMap<Term, Arc<TermPool>>,
http: ClientWithMiddleware,
base_url: String,
}
impl SessionPool {
pub fn new(http: ClientWithMiddleware, base_url: String) -> Self {
Self {
sessions: DashMap::new(),
http,
base_url,
}
}
/// Acquires a session from the pool.
/// If no sessions are available, a new one is created on demand,
/// respecting the global rate limit.
pub async fn acquire(&self, term: Term) -> Result<PooledSession> {
let term_pool = self
.sessions
.entry(term)
.or_insert_with(|| Arc::new(TermPool::new()))
.clone();
let start = Instant::now();
let mut waited_for_creation = false;
loop {
// Fast path: Try to get an existing, non-expired session.
{
let mut queue = term_pool.sessions.lock().await;
if let Some(session) = queue.pop_front() {
if !session.is_expired() {
return Ok(PooledSession {
session: ManuallyDrop::new(session),
pool: Arc::clone(&term_pool),
});
} else {
debug!(id = session.unique_session_id, "Discarded expired session");
}
}
} // MutexGuard is dropped, lock is released.
// Slow path: wait for an in-progress creation, or become the creator.
if term_pool.is_creating.load(Ordering::Acquire) {
if !waited_for_creation {
trace!("Waiting for another task to create session");
waited_for_creation = true;
}
term_pool.notifier.notified().await;
continue;
}
// CAS to become the designated creator.
if term_pool
.is_creating
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire)
.is_err()
{
continue; // Lost the race — loop back and wait.
}
// Guard resets is_creating on drop (including cancellation).
let creating_guard = CreatingGuard(Arc::clone(&term_pool));
trace!("Pool empty, creating new session");
tokio::select! {
_ = term_pool.notifier.notified() => {
// A session was returned — release creator role and race for it.
drop(creating_guard);
continue;
}
_ = SESSION_CREATION_RATE_LIMITER.until_ready() => {
let new_session_result = self.create_session(&term).await;
drop(creating_guard);
match new_session_result {
Ok(new_session) => {
let elapsed = start.elapsed();
debug!(
id = new_session.unique_session_id,
elapsed_ms = elapsed.as_millis(),
"Created new session"
);
return Ok(PooledSession {
session: ManuallyDrop::new(new_session),
pool: term_pool,
});
}
Err(e) => {
return Err(e.context("Failed to create new session in pool"));
}
}
}
}
}
}
/// Sets up initial session cookies by making required Banner API requests.
async fn create_session(&self, term: &Term) -> Result<BannerSession> {
info!(term = %term, "setting up banner session");
// The 'register' or 'search' registration page
let initial_registration = self
.http
.get(format!("{}/registration", self.base_url))
.send()
.await?;
// TODO: Validate success
let cookies: HashMap<String, String> = initial_registration
.headers()
.get_all("Set-Cookie")
.iter()
.filter_map(|v| {
let c = Cookie::parse(v.to_str().ok()?).ok()?;
Some((c.name().to_string(), c.value().to_string()))
})
.collect();
let jsessionid = cookies
.get("JSESSIONID")
.ok_or_else(|| anyhow::anyhow!("JSESSIONID cookie missing"))?;
let ssb_cookie = cookies
.get("SSB_COOKIE")
.ok_or_else(|| anyhow::anyhow!("SSB_COOKIE cookie missing"))?;
let cookie_header = format!("JSESSIONID={}; SSB_COOKIE={}", jsessionid, ssb_cookie);
self.http
.get(format!("{}/selfServiceMenu/data", self.base_url))
.header("Cookie", &cookie_header)
.send()
.await?
.error_for_status()
.context("Failed to get data page")?;
self.http
.get(format!("{}/term/termSelection", self.base_url))
.header("Cookie", &cookie_header)
.query(&[("mode", "search")])
.send()
.await?
.error_for_status()
.context("Failed to get term selection page")?;
// TODO: Validate success
let terms = self.get_terms("", 1, 10).await?;
if !terms.iter().any(|t| t.code == term.to_string()) {
return Err(anyhow::anyhow!("Failed to get term search response"));
}
let specific_term_search_response = self.get_terms(&term.to_string(), 1, 10).await?;
if !specific_term_search_response
.iter()
.any(|t| t.code == term.to_string())
{
return Err(anyhow::anyhow!("Failed to get term search response"));
}
let unique_session_id = generate_session_id();
self.select_term(&term.to_string(), &unique_session_id, &cookie_header)
.await?;
Ok(BannerSession::new(
&unique_session_id,
jsessionid,
ssb_cookie,
))
}
/// Retrieves a list of terms from the Banner API.
pub async fn get_terms(
&self,
search: &str,
page: i32,
max_results: i32,
) -> Result<Vec<BannerTerm>> {
if page <= 0 {
return Err(anyhow::anyhow!("Page must be greater than 0"));
}
let url = format!("{}/classSearch/getTerms", self.base_url);
let params = [
("searchTerm", search),
("offset", &page.to_string()),
("max", &max_results.to_string()),
("_", &nonce()),
];
let response = self
.http
.get(&url)
.query(&params)
.send()
.await
.with_context(|| "Failed to get terms".to_string())?;
let terms: Vec<BannerTerm> = response
.json()
.await
.context("Failed to parse terms response")?;
Ok(terms)
}
/// Selects a term for the current session.
async fn select_term(
&self,
term: &str,
unique_session_id: &str,
cookie_header: &str,
) -> Result<()> {
let form_data = [
("term", term),
("studyPath", ""),
("studyPathText", ""),
("startDatepicker", ""),
("endDatepicker", ""),
("uniqueSessionId", &session_id),
("uniqueSessionId", unique_session_id),
];
let url = format!("{}/term/search", self.base_url);
let response = self
.client
.http
.post(&url)
.header("Cookie", cookie_header)
.query(&[("mode", "search")])
.form(&form_data)
.header("User-Agent", user_agent())
.header("Content-Type", "application/x-www-form-urlencoded")
.send()
.await?;
@@ -128,18 +578,36 @@ impl SessionManager {
#[derive(serde::Deserialize)]
struct RedirectResponse {
#[serde(rename = "fwdUrl")]
#[serde(rename = "fwdURL")]
fwd_url: String,
}
let redirect: RedirectResponse = response.json().await?;
let base_url_path = self
.base_url
.parse::<Url>()
.context("Failed to parse base URL")?
.path()
.to_string();
let non_overlap_redirect =
redirect
.fwd_url
.strip_prefix(&base_url_path)
.ok_or_else(|| {
anyhow::anyhow!(
"Redirect URL '{}' does not start with expected prefix '{}'",
redirect.fwd_url,
base_url_path
)
})?;
// Follow the redirect
let redirect_url = format!("{}{}", self.base_url, redirect.fwd_url);
let redirect_url = format!("{}{}", self.base_url, non_overlap_redirect);
let redirect_response = self
.client
.http
.get(&redirect_url)
.header("User-Agent", user_agent())
.header("Cookie", cookie_header)
.send()
.await?;
@@ -150,41 +618,6 @@ impl SessionManager {
));
}
debug!("Successfully selected term: {}", term);
Ok(())
}
/// Resets the data form (required before new searches)
pub async fn reset_data_form(&self) -> Result<()> {
let url = format!("{}/classSearch/resetDataForm", self.base_url);
let response = self
.client
.post(&url)
.header("User-Agent", user_agent())
.send()
.await?;
if !response.status().is_success() {
return Err(anyhow::anyhow!(
"Failed to reset data form: {}",
response.status()
));
}
Ok(())
}
}
/// Generates a timestamp-based nonce
fn timestamp_nonce() -> String {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_millis()
.to_string()
}
/// Returns a browser-like user agent string
fn user_agent() -> &'static str {
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"
}
+6
View File
@@ -0,0 +1,6 @@
//! Utility functions for the Banner module.
/// Returns a browser-like user agent string.
pub fn user_agent() -> &'static str {
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
}
+131
View File
@@ -0,0 +1,131 @@
use banner::banner::{BannerApi, SearchQuery, Term};
use banner::config::Config;
use banner::error::Result;
use figment::{Figment, providers::Env};
use futures::future;
use tracing::{error, info};
use tracing_subscriber::{EnvFilter, FmtSubscriber};
#[tokio::main]
async fn main() -> Result<()> {
// Configure logging
let filter = EnvFilter::try_from_default_env()
.unwrap_or_else(|_| EnvFilter::new("info,banner=trace,reqwest=debug,hyper=info"));
let subscriber = FmtSubscriber::builder()
.with_env_filter(filter)
.with_target(true)
.finish();
tracing::subscriber::set_global_default(subscriber).expect("setting default subscriber failed");
info!("Starting Banner search test");
dotenvy::dotenv().ok();
// Load configuration
let config: Config = Figment::new()
.merge(Env::raw())
.extract()
.expect("Failed to load config");
info!(
banner_base_url = config.banner_base_url,
"Configuration loaded"
);
// Create Banner API client
let banner_api = BannerApi::new_with_config(config.banner_base_url, config.rate_limiting)
.expect("Failed to create BannerApi");
// Get current term
let term = Term::get_current().inner().to_string();
info!(term = term, "Using current term");
// Define multiple search queries
let queries = vec![
(
"CS Courses",
SearchQuery::new().subject("CS").max_results(10),
),
(
"Math Courses",
SearchQuery::new().subject("MAT").max_results(10),
),
(
"3000-level CS",
SearchQuery::new()
.subject("CS")
.course_numbers(3000, 3999)
.max_results(8),
),
(
"High Credit Courses",
SearchQuery::new().credits(4, 6).max_results(8),
),
(
"Programming Courses",
SearchQuery::new().keyword("programming").max_results(6),
),
];
info!(query_count = queries.len(), "Executing concurrent searches");
// Execute all searches concurrently
let search_futures = queries.into_iter().map(|(label, query)| {
info!(label = %label, "Starting search");
let banner_api = &banner_api;
let term = &term;
async move {
let result = banner_api
.search(term, &query, "subjectDescription", false)
.await;
(label, result)
}
});
// Wait for all searches to complete
let search_results = future::join_all(search_futures)
.await
.into_iter()
.filter_map(|(label, result)| match result {
Ok(search_result) => {
info!(
label = label,
success = search_result.success,
total_count = search_result.total_count,
"Search completed successfully"
);
Some((label, search_result))
}
Err(e) => {
error!(label = label, error = ?e, "Search failed");
None
}
})
.collect::<Vec<_>>();
// Process and display results
for (label, search_result) in search_results {
println!("\n=== {} ===", label);
if let Some(courses) = &search_result.data {
if courses.is_empty() {
println!(" No courses found");
} else {
println!(" Found {} courses:", courses.len());
for course in courses {
println!(
" {} {} - {} (CRN: {})",
course.subject,
course.course_number,
course.course_title,
course.course_reference_number
);
}
}
} else {
println!(" No courses found");
}
}
info!("Search test completed");
Ok(())
}
+31 -61
View File
@@ -1,10 +1,10 @@
//! Google Calendar command implementation.
use crate::banner::{Course, DayOfWeek, MeetingScheduleInfo, Term};
use crate::bot::{Context, Error};
use chrono::NaiveDate;
use crate::banner::{Course, MeetingScheduleInfo};
use crate::bot::{Context, Error, utils};
use chrono::{NaiveDate, Weekday};
use std::collections::HashMap;
use tracing::{error, info};
use tracing::info;
use url::Url;
/// Generate a link to create a Google Calendar event for a course
@@ -18,36 +18,16 @@ pub async fn gcal(
ctx.defer().await?;
let app_state = &ctx.data().app_state;
let banner_api = &app_state.banner_api;
// Get current term dynamically
let current_term_status = Term::get_current();
let term = current_term_status.inner();
// Fetch live course data from Redis cache via AppState
let course = match app_state
.get_course_or_fetch(&term.to_string(), &crn.to_string())
.await
{
Ok(course) => course,
Err(e) => {
error!(%e, crn, "Failed to fetch course data");
return Err(Error::from(e));
}
};
let course = utils::get_course_by_crn(&ctx, crn).await?;
let term = course.term.clone();
// Get meeting times
let meeting_times = match banner_api
.get_course_meeting_time(&term.to_string(), crn)
.await
{
Ok(meeting_time) => meeting_time,
Err(e) => {
error!("Failed to get meeting times: {}", e);
return Err(Error::from(e));
}
};
let meeting_times = ctx
.data()
.app_state
.banner_api
.get_course_meeting_time(&term, &crn.to_string())
.await?;
struct LinkDetail {
link: String,
@@ -59,23 +39,18 @@ pub async fn gcal(
1.. => {
// Sort meeting times by start time of their TimeRange
let mut sorted_meeting_times = meeting_times.to_vec();
sorted_meeting_times.sort_unstable_by(|a, b| {
// Primary sort: by start time
match (&a.time_range, &b.time_range) {
(Some(a_time), Some(b_time)) => a_time.start.cmp(&b_time.start),
(Some(_), None) => std::cmp::Ordering::Less,
(None, Some(_)) => std::cmp::Ordering::Greater,
(None, None) => a.days.bits().cmp(&b.days.bits()),
}
});
MeetingScheduleInfo::sort_by_start_time(&mut sorted_meeting_times);
let links = sorted_meeting_times
.iter()
.map(|m| {
let link = generate_gcal_url(&course, m)?;
let days = m.days_string().unwrap_or_else(|| "TBA".to_string());
let detail = match &m.time_range {
Some(range) => format!("{} {}", m.days_string(), range.format_12hr()),
None => m.days_string(),
Some(range) => {
format!("{days} {}", range.format_12hr())
}
None => days,
};
Ok(LinkDetail { link, detail })
})
@@ -95,7 +70,7 @@ pub async fn gcal(
)
.await?;
info!("gcal command completed for CRN: {}", crn);
info!(crn = %crn, "gcal command completed");
Ok(())
}
@@ -104,10 +79,7 @@ fn generate_gcal_url(
course: &Course,
meeting_time: &MeetingScheduleInfo,
) -> Result<String, anyhow::Error> {
let course_text = format!(
"{} {} - {}",
course.subject, course.course_number, course.course_title
);
let course_text = course.display_title();
let dates_text = {
let (start, end) = meeting_time.datetime_range();
@@ -119,18 +91,16 @@ fn generate_gcal_url(
};
// Get instructor name
let instructor_name = if !course.faculty.is_empty() {
&course.faculty[0].display_name
} else {
"Unknown"
};
let instructor_name = course.primary_instructor_name();
// The event description
let details_text = format!(
"CRN: {}\nInstructor: {}\nDays: {}",
course.course_reference_number,
instructor_name,
meeting_time.days_string()
meeting_time
.days_string()
.unwrap_or_else(|| "TBA".to_string())
);
// The event location
@@ -158,13 +128,13 @@ fn generate_rrule(meeting_time: &MeetingScheduleInfo, end_date: NaiveDate) -> St
let by_day = days_of_week
.iter()
.map(|day| match day {
DayOfWeek::Monday => "MO",
DayOfWeek::Tuesday => "TU",
DayOfWeek::Wednesday => "WE",
DayOfWeek::Thursday => "TH",
DayOfWeek::Friday => "FR",
DayOfWeek::Saturday => "SA",
DayOfWeek::Sunday => "SU",
Weekday::Mon => "MO",
Weekday::Tue => "TU",
Weekday::Wed => "WE",
Weekday::Thu => "TH",
Weekday::Fri => "FR",
Weekday::Sat => "SA",
Weekday::Sun => "SU",
})
.collect::<Vec<&str>>()
.join(",");
+363 -9
View File
@@ -1,6 +1,81 @@
//! ICS command implementation for generating calendar files.
use crate::bot::{Context, Error};
use crate::banner::{Course, MeetingDays, MeetingScheduleInfo, WeekdayExt};
use crate::bot::{Context, Error, utils};
use chrono::{Datelike, Duration, NaiveDate, Utc, Weekday};
use serenity::all::CreateAttachment;
use tracing::info;
/// Find the nth occurrence of a weekday in a given month/year (1-based).
fn nth_weekday_of_month(year: i32, month: u32, weekday: Weekday, n: u32) -> Option<NaiveDate> {
let first = NaiveDate::from_ymd_opt(year, month, 1)?;
let days_ahead = (weekday.num_days_from_monday() as i64
- first.weekday().num_days_from_monday() as i64)
.rem_euclid(7) as u32;
let day = 1 + days_ahead + 7 * (n - 1);
NaiveDate::from_ymd_opt(year, month, day)
}
/// Compute a consecutive range of dates starting from `start` for `count` days.
fn date_range(start: NaiveDate, count: i64) -> Vec<NaiveDate> {
(0..count)
.filter_map(|i| start.checked_add_signed(Duration::days(i)))
.collect()
}
/// Compute university holidays for a given year.
///
/// Federal holidays use weekday-of-month rules so they're correct for any year.
/// University-specific breaks (Fall Break, Spring Break, Winter Holiday) are derived
/// from anchoring federal holidays or using UTSA's typical scheduling patterns.
fn compute_holidays_for_year(year: i32) -> Vec<(&'static str, Vec<NaiveDate>)> {
let mut holidays = Vec::new();
// Labor Day: 1st Monday of September
if let Some(d) = nth_weekday_of_month(year, 9, Weekday::Mon, 1) {
holidays.push(("Labor Day", vec![d]));
}
// Fall Break: Mon-Tue of Columbus Day week (2nd Monday of October + Tuesday)
if let Some(mon) = nth_weekday_of_month(year, 10, Weekday::Mon, 2) {
holidays.push(("Fall Break", date_range(mon, 2)));
}
// Day before Thanksgiving: Wednesday before 4th Thursday of November
if let Some(thu) = nth_weekday_of_month(year, 11, Weekday::Thu, 4)
&& let Some(wed) = thu.checked_sub_signed(Duration::days(1))
{
holidays.push(("Day Before Thanksgiving", vec![wed]));
}
// Thanksgiving: 4th Thursday of November + Friday
if let Some(thu) = nth_weekday_of_month(year, 11, Weekday::Thu, 4) {
holidays.push(("Thanksgiving", date_range(thu, 2)));
}
// Winter Holiday: Dec 23-31
if let Some(start) = NaiveDate::from_ymd_opt(year, 12, 23) {
holidays.push(("Winter Holiday", date_range(start, 9)));
}
// New Year's Day: January 1
if let Some(d) = NaiveDate::from_ymd_opt(year, 1, 1) {
holidays.push(("New Year's Day", vec![d]));
}
// MLK Day: 3rd Monday of January
if let Some(d) = nth_weekday_of_month(year, 1, Weekday::Mon, 3) {
holidays.push(("MLK Day", vec![d]));
}
// Spring Break: full week (Mon-Sat) starting the 2nd or 3rd Monday of March
// UTSA typically uses the 2nd full week of March
if let Some(mon) = nth_weekday_of_month(year, 3, Weekday::Mon, 2) {
holidays.push(("Spring Break", date_range(mon, 6)));
}
holidays
}
/// Generate an ICS file for a course
#[poise::command(slash_command, prefix_command)]
@@ -10,16 +85,295 @@ pub async fn ics(
) -> Result<(), Error> {
ctx.defer().await?;
// TODO: Get BannerApi from context or global state
// TODO: Get current term dynamically
let term = 202510; // Hardcoded for now
let course = utils::get_course_by_crn(&ctx, crn).await?;
let term = course.term.clone();
// TODO: Implement actual ICS file generation
ctx.say(format!(
"ICS command not yet implemented - BannerApi integration needed\nCRN: {}, Term: {}",
crn, term
))
// Get meeting times
let meeting_times = ctx
.data()
.app_state
.banner_api
.get_course_meeting_time(&term, &crn.to_string())
.await?;
if meeting_times.is_empty() {
ctx.say("No meeting times found for this course.").await?;
return Ok(());
}
// Sort meeting times by start time
let mut sorted_meeting_times = meeting_times.to_vec();
MeetingScheduleInfo::sort_by_start_time(&mut sorted_meeting_times);
// Generate ICS content
let (ics_content, excluded_holidays) =
generate_ics_content(&course, &term, &sorted_meeting_times)?;
// Create file attachment
let filename = format!(
"{subject}_{number}_{section}.ics",
subject = course.subject.replace(" ", "_"),
number = course.course_number,
section = course.sequence_number,
);
let file = CreateAttachment::bytes(ics_content.into_bytes(), filename.clone());
// Build response content
let mut response_content = format!(
"📅 Generated ICS calendar for **{}**\n\n**Meeting Times:**\n{}",
course.display_title(),
sorted_meeting_times
.iter()
.enumerate()
.map(|(i, m)| {
let time_info = match &m.time_range {
Some(range) => format!(
"{} {}",
m.days_string().unwrap_or("TBA".to_string()),
range.format_12hr()
),
None => m.days_string().unwrap_or("TBA".to_string()),
};
format!("{}. {}", i + 1, time_info)
})
.collect::<Vec<_>>()
.join("\n")
);
// Add holiday exclusion information
if !excluded_holidays.is_empty() {
let count = excluded_holidays.len();
let count_text = if count == 1 {
"1 date was".to_string()
} else {
format!("{} dates were", count)
};
response_content.push_str(&format!("\n\n{} excluded from the ICS file:\n", count_text));
response_content.push_str(
&excluded_holidays
.iter()
.map(|s| format!("- {}", s))
.collect::<Vec<_>>()
.join("\n"),
);
}
ctx.send(
poise::CreateReply::default()
.content(response_content)
.attachment(file),
)
.await?;
info!(crn = %crn, "ics command completed");
Ok(())
}
/// Generate ICS content for a course and its meeting times
fn generate_ics_content(
course: &Course,
term: &str,
meeting_times: &[MeetingScheduleInfo],
) -> Result<(String, Vec<String>), anyhow::Error> {
let mut ics_content = String::new();
let mut excluded_holidays = Vec::new();
// ICS header
ics_content.push_str("BEGIN:VCALENDAR\r\n");
ics_content.push_str("VERSION:2.0\r\n");
ics_content.push_str("PRODID:-//Banner Bot//Course Calendar//EN\r\n");
ics_content.push_str("CALSCALE:GREGORIAN\r\n");
ics_content.push_str("METHOD:PUBLISH\r\n");
// Calendar name
ics_content.push_str(&format!(
"X-WR-CALNAME:{} - {}\r\n",
course.display_title(),
term
));
// Generate events for each meeting time
for (index, meeting_time) in meeting_times.iter().enumerate() {
let (event_content, holidays) = generate_event_content(course, meeting_time, index)?;
ics_content.push_str(&event_content);
excluded_holidays.extend(holidays);
}
// ICS footer
ics_content.push_str("END:VCALENDAR\r\n");
Ok((ics_content, excluded_holidays))
}
/// Generate ICS event content for a single meeting time
fn generate_event_content(
course: &Course,
meeting_time: &MeetingScheduleInfo,
index: usize,
) -> Result<(String, Vec<String>), anyhow::Error> {
let course_title = course.display_title();
let instructor_name = course.primary_instructor_name();
let location = meeting_time.place_string();
// Create event title with meeting index if multiple meetings
let event_title = if index > 0 {
format!("{} (Meeting {})", course_title, index + 1)
} else {
course_title
};
// Create event description
let description = format!(
"CRN: {}\\nInstructor: {}\\nDays: {}\\nMeeting Type: {}",
course.course_reference_number,
instructor_name,
meeting_time.days_string().unwrap_or("TBA".to_string()),
meeting_time.meeting_type.description()
);
// Get start and end times
let (start_dt, end_dt) = meeting_time.datetime_range();
// Format datetimes for ICS (UTC format)
let start_utc = start_dt.with_timezone(&Utc);
let end_utc = end_dt.with_timezone(&Utc);
let start_str = start_utc.format("%Y%m%dT%H%M%SZ").to_string();
let end_str = end_utc.format("%Y%m%dT%H%M%SZ").to_string();
// Generate unique ID for the event
let uid = format!(
"{}-{}-{}@banner-bot.local",
course.course_reference_number,
index,
start_utc.timestamp()
);
let mut event_content = String::new();
// Event header
event_content.push_str("BEGIN:VEVENT\r\n");
event_content.push_str(&format!("UID:{}\r\n", uid));
event_content.push_str(&format!("DTSTART:{}\r\n", start_str));
event_content.push_str(&format!("DTEND:{}\r\n", end_str));
event_content.push_str(&format!("SUMMARY:{}\r\n", escape_ics_text(&event_title)));
event_content.push_str(&format!(
"DESCRIPTION:{}\r\n",
escape_ics_text(&description)
));
event_content.push_str(&format!("LOCATION:{}\r\n", escape_ics_text(&location)));
// Add recurrence rule if there are specific days and times
if !meeting_time.days.is_empty() && meeting_time.time_range.is_some() {
let days_of_week = meeting_time.days_of_week();
let by_day: Vec<String> = days_of_week
.iter()
.map(|day| day.to_short_string().to_uppercase())
.collect();
if !by_day.is_empty() {
let until_date = meeting_time
.date_range
.end
.format("%Y%m%dT000000Z")
.to_string();
event_content.push_str(&format!(
"RRULE:FREQ=WEEKLY;BYDAY={};UNTIL={}\r\n",
by_day.join(","),
until_date
));
// Add holiday exceptions (EXDATE) if the class would meet on holiday dates
let holiday_exceptions = get_holiday_exceptions(meeting_time);
if let Some(exdate_property) = generate_exdate_property(&holiday_exceptions, start_utc)
{
event_content.push_str(&format!("{}\r\n", exdate_property));
}
// Collect holiday names for reporting
let start_year = meeting_time.date_range.start.year();
let end_year = meeting_time.date_range.end.year();
let all_holidays: Vec<_> = (start_year..=end_year)
.flat_map(compute_holidays_for_year)
.collect();
let mut holiday_names = Vec::new();
for (holiday_name, holiday_dates) in &all_holidays {
for &exception_date in &holiday_exceptions {
if holiday_dates.contains(&exception_date) {
holiday_names.push(format!(
"{} ({})",
holiday_name,
exception_date.format("%a, %b %d")
));
}
}
}
holiday_names.sort();
holiday_names.dedup();
event_content.push_str("END:VEVENT\r\n");
return Ok((event_content, holiday_names));
}
}
// Event footer
event_content.push_str("END:VEVENT\r\n");
Ok((event_content, Vec::new()))
}
/// Check if a class meets on a specific date based on its meeting days
fn class_meets_on_date(meeting_time: &MeetingScheduleInfo, date: NaiveDate) -> bool {
let day: MeetingDays = date.weekday().into();
meeting_time.days.contains(day)
}
/// Get holiday dates that fall within the course date range and would conflict with class meetings
fn get_holiday_exceptions(meeting_time: &MeetingScheduleInfo) -> Vec<NaiveDate> {
let start_year = meeting_time.date_range.start.year();
let end_year = meeting_time.date_range.end.year();
(start_year..=end_year)
.flat_map(compute_holidays_for_year)
.flat_map(|(_, dates)| dates)
.filter(|&date| {
date >= meeting_time.date_range.start
&& date <= meeting_time.date_range.end
&& class_meets_on_date(meeting_time, date)
})
.collect()
}
/// Generate EXDATE property for holiday exceptions
fn generate_exdate_property(
exceptions: &[NaiveDate],
start_time: chrono::DateTime<Utc>,
) -> Option<String> {
if exceptions.is_empty() {
return None;
}
let mut exdate_values = Vec::new();
for &exception_date in exceptions {
// Create a datetime for the exception using the same time as the start time
let exception_datetime = exception_date.and_time(start_time.time()).and_utc();
let exdate_str = exception_datetime.format("%Y%m%dT%H%M%SZ").to_string();
exdate_values.push(exdate_str);
}
Some(format!("EXDATE:{}", exdate_values.join(",")))
}
/// Escape text for ICS format
fn escape_ics_text(text: &str) -> String {
text.replace("\\", "\\\\")
.replace(";", "\\;")
.replace(",", "\\,")
.replace("\n", "\\n")
.replace("\r", "")
}
+4 -6
View File
@@ -1,13 +1,11 @@
//! Bot commands module.
pub mod gcal;
pub mod ics;
pub mod search;
pub mod terms;
pub mod time;
pub mod ics;
pub mod gcal;
pub use gcal::gcal;
pub use ics::ics;
pub use search::search;
pub use terms::terms;
pub use time::time;
pub use ics::ics;
pub use gcal::gcal;
+156 -20
View File
@@ -1,8 +1,14 @@
//! Course search command implementation.
use crate::banner::SearchQuery;
use crate::banner::{SearchQuery, Term};
use crate::bot::{Context, Error};
use anyhow::anyhow;
use regex::Regex;
use std::sync::LazyLock;
use tracing::info;
static RANGE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\d{1,4})-(\d{1,4})?").unwrap());
static WILDCARD_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\d+)(x+)").unwrap());
/// Search for courses with various filters
#[poise::command(slash_command, prefix_command)]
@@ -18,8 +24,8 @@ pub async fn search(
// Defer the response since this might take a while
ctx.defer().await?;
// Build the search query
let mut query = SearchQuery::new().credits(3, 6);
// Build the search query — no default credit filter so all courses are visible
let mut query = SearchQuery::new();
if let Some(title) = title {
query = query.title(title);
@@ -40,12 +46,37 @@ pub async fn search(
query = query.max_results(max_results.min(25)); // Cap at 25
}
// TODO: Get current term dynamically
// TODO: Get BannerApi from context or global state
// For now, we'll return an error
ctx.say("Search functionality not yet implemented - BannerApi integration needed")
let term = Term::get_current().inner().to_string();
let search_result = ctx
.data()
.app_state
.banner_api
.search(&term, &query, "subjectDescription", false)
.await?;
let response = if let Some(courses) = search_result.data {
if courses.is_empty() {
"No courses found with the specified criteria.".to_string()
} else {
courses
.iter()
.map(|course| {
format!(
"**{}**: {} ({})",
course.display_title(),
course.primary_instructor_name(),
course.course_reference_number
)
})
.collect::<Vec<_>>()
.join("\n")
}
} else {
"No courses found with the specified criteria.".to_string()
};
ctx.say(response).await?;
info!("search command completed");
Ok(())
}
@@ -55,8 +86,7 @@ fn parse_course_code(input: &str) -> Result<(i32, i32), Error> {
// Handle range format (e.g, "3000-3999")
if input.contains('-') {
let re = Regex::new(r"(\d{1,4})-(\d{1,4})?").unwrap();
if let Some(captures) = re.captures(input) {
if let Some(captures) = RANGE_RE.captures(input) {
let low: i32 = captures[1].parse()?;
let high = if captures.get(2).is_some() {
captures[2].parse()?
@@ -65,26 +95,25 @@ fn parse_course_code(input: &str) -> Result<(i32, i32), Error> {
};
if low > high {
return Err("Invalid range: low value greater than high value".into());
return Err(anyhow!("Invalid range: low value greater than high value"));
}
if low < 1000 || high > 9999 {
return Err("Course codes must be between 1000 and 9999".into());
return Err(anyhow!("Course codes must be between 1000 and 9999"));
}
return Ok((low, high));
}
return Err("Invalid range format".into());
return Err(anyhow!("Invalid range format"));
}
// Handle wildcard format (e.g, "34xx")
if input.contains('x') {
if input.len() != 4 {
return Err("Wildcard format must be exactly 4 characters".into());
return Err(anyhow!("Wildcard format must be exactly 4 characters"));
}
let re = Regex::new(r"(\d+)(x+)").unwrap();
if let Some(captures) = re.captures(input) {
if let Some(captures) = WILDCARD_RE.captures(input) {
let prefix: i32 = captures[1].parse()?;
let x_count = captures[2].len();
@@ -92,22 +121,129 @@ fn parse_course_code(input: &str) -> Result<(i32, i32), Error> {
let high = low + 10_i32.pow(x_count as u32) - 1;
if low < 1000 || high > 9999 {
return Err("Course codes must be between 1000 and 9999".into());
return Err(anyhow!("Course codes must be between 1000 and 9999"));
}
return Ok((low, high));
}
return Err("Invalid wildcard format".into());
return Err(anyhow!("Invalid wildcard format"));
}
// Handle single course code
if input.len() == 4 {
let code: i32 = input.parse()?;
if code < 1000 || code > 9999 {
return Err("Course codes must be between 1000 and 9999".into());
if !(1000..=9999).contains(&code) {
return Err(anyhow!("Course codes must be between 1000 and 9999"));
}
return Ok((code, code));
}
Err("Invalid course code format".into())
Err(anyhow!("Invalid course code format"))
}
#[cfg(test)]
mod tests {
use super::*;
// --- Single codes ---
#[test]
fn test_parse_single_code() {
assert_eq!(parse_course_code("3743").unwrap(), (3743, 3743));
}
#[test]
fn test_parse_single_code_boundaries() {
assert_eq!(parse_course_code("1000").unwrap(), (1000, 1000));
assert_eq!(parse_course_code("9999").unwrap(), (9999, 9999));
}
#[test]
fn test_parse_single_code_below_range() {
assert!(parse_course_code("0999").is_err());
}
#[test]
fn test_parse_single_code_wrong_length() {
assert!(parse_course_code("123").is_err());
}
#[test]
fn test_parse_single_code_non_numeric() {
assert!(parse_course_code("abcd").is_err());
}
#[test]
fn test_parse_single_code_trimmed() {
assert_eq!(parse_course_code(" 3743 ").unwrap(), (3743, 3743));
}
// --- Ranges ---
#[test]
fn test_parse_range_full() {
assert_eq!(parse_course_code("3000-3999").unwrap(), (3000, 3999));
}
#[test]
fn test_parse_range_same() {
assert_eq!(parse_course_code("3000-3000").unwrap(), (3000, 3000));
}
#[test]
fn test_parse_range_open() {
assert_eq!(parse_course_code("3000-").unwrap(), (3000, 9999));
}
#[test]
fn test_parse_range_inverted() {
assert!(parse_course_code("5000-3000").is_err());
}
#[test]
fn test_parse_range_below_1000() {
assert!(parse_course_code("500-999").is_err());
}
#[test]
fn test_parse_range_above_9999() {
assert!(parse_course_code("9000-10000").is_err());
}
#[test]
fn test_parse_range_full_valid() {
assert_eq!(parse_course_code("1000-9999").unwrap(), (1000, 9999));
}
// --- Wildcards ---
#[test]
fn test_parse_wildcard_one_x() {
assert_eq!(parse_course_code("300x").unwrap(), (3000, 3009));
}
#[test]
fn test_parse_wildcard_two_x() {
assert_eq!(parse_course_code("30xx").unwrap(), (3000, 3099));
}
#[test]
fn test_parse_wildcard_three_x() {
assert_eq!(parse_course_code("3xxx").unwrap(), (3000, 3999));
}
#[test]
fn test_parse_wildcard_9xxx() {
assert_eq!(parse_course_code("9xxx").unwrap(), (9000, 9999));
}
#[test]
fn test_parse_wildcard_wrong_length() {
assert!(parse_course_code("3xxxx").is_err());
}
#[test]
fn test_parse_wildcard_0xxx() {
assert!(parse_course_code("0xxx").is_err());
}
}
+40 -7
View File
@@ -1,6 +1,8 @@
//! Terms command implementation.
use crate::banner::{BannerTerm, Term};
use crate::bot::{Context, Error};
use tracing::info;
/// List available terms or search for a specific term
#[poise::command(slash_command, prefix_command)]
@@ -13,14 +15,45 @@ pub async fn terms(
let search_term = search.unwrap_or_default();
let page_number = page.unwrap_or(1).max(1);
let max_results = 10;
// TODO: Get BannerApi from context or global state
// For now, we'll return a placeholder response
ctx.say(format!(
"Terms command not yet implemented - BannerApi integration needed\nSearch: '{}', Page: {}",
search_term, page_number
))
.await?;
let terms = ctx
.data()
.app_state
.banner_api
.sessions
.get_terms(&search_term, page_number, max_results)
.await?;
let response = if terms.is_empty() {
"No terms found.".to_string()
} else {
let current_term_code = Term::get_current().inner().to_string();
terms
.iter()
.map(|term| format_term(term, &current_term_code))
.collect::<Vec<_>>()
.join("\n")
};
ctx.say(response).await?;
info!("terms command completed");
Ok(())
}
fn format_term(term: &BannerTerm, current_term_code: &str) -> String {
let is_current = if term.code == current_term_code {
" (current)"
} else {
""
};
let is_archived = if term.is_archived() {
" (archived)"
} else {
""
};
format!(
"- `{}`: {}{}{}",
term.code, term.description, is_current, is_archived
)
}
-25
View File
@@ -1,25 +0,0 @@
//! Time command implementation for course meeting times.
use crate::bot::{Context, Error};
/// Get meeting times for a specific course
#[poise::command(slash_command, prefix_command)]
pub async fn time(
ctx: Context<'_>,
#[description = "Course Reference Number (CRN)"] crn: i32,
) -> Result<(), Error> {
ctx.defer().await?;
// TODO: Get BannerApi from context or global state
// TODO: Get current term dynamically
let term = 202510; // Hardcoded for now
// TODO: Implement actual meeting time retrieval
ctx.say(format!(
"Time command not yet implemented - BannerApi integration needed\nCRN: {}, Term: {}",
crn, term
))
.await?;
Ok(())
}
+3 -4
View File
@@ -1,12 +1,12 @@
use crate::app_state::AppState;
use crate::error::Error;
use crate::state::AppState;
pub mod commands;
pub mod utils;
#[derive(Debug)]
pub struct Data {
pub app_state: AppState,
} // User data, which is stored and accessible in all command invocations
pub type Error = Box<dyn std::error::Error + Send + Sync>;
pub type Context<'a> = poise::Context<'a, Data, Error>;
/// Get all available commands
@@ -14,7 +14,6 @@ pub fn get_commands() -> Vec<poise::Command<Data, Error>> {
vec![
commands::search(),
commands::terms(),
commands::time(),
commands::ics(),
commands::gcal(),
]
+24
View File
@@ -0,0 +1,24 @@
//! Bot command utilities.
use crate::banner::{Course, Term};
use crate::bot::Context;
use crate::error::Result;
use tracing::error;
/// Gets a course by its CRN for the current term.
pub async fn get_course_by_crn(ctx: &Context<'_>, crn: i32) -> Result<Course> {
let app_state = &ctx.data().app_state;
// Get current term dynamically
let current_term_status = Term::get_current();
let term = current_term_status.inner();
// Fetch live course data from database via AppState
app_state
.get_course_or_fetch(&term.to_string(), &crn.to_string())
.await
.map_err(|e| {
error!(error = %e, crn = %crn, "failed to fetch course data");
e
})
}
+462
View File
@@ -0,0 +1,462 @@
//! Shared calendar generation logic for ICS files and Google Calendar URLs.
//!
//! Used by both the Discord bot commands and the web API endpoints.
use crate::data::models::DbMeetingTime;
use chrono::{Datelike, Duration, NaiveDate, NaiveTime, Weekday};
/// Course metadata needed for calendar generation (shared interface between bot and web).
pub struct CalendarCourse {
pub crn: String,
pub subject: String,
pub course_number: String,
pub title: String,
pub sequence_number: Option<String>,
pub primary_instructor: Option<String>,
}
impl CalendarCourse {
/// Display title like "CS 1083 - Introduction to Computer Science"
pub fn display_title(&self) -> String {
format!("{} {} - {}", self.subject, self.course_number, self.title)
}
/// Filename-safe identifier: "CS_1083_001"
pub fn filename_stem(&self) -> String {
format!(
"{}_{}{}",
self.subject.replace(' ', "_"),
self.course_number,
self.sequence_number
.as_deref()
.map(|s| format!("_{s}"))
.unwrap_or_default()
)
}
}
// ---------------------------------------------------------------------------
// Date parsing helpers
// ---------------------------------------------------------------------------
/// Parse a date string in either MM/DD/YYYY or YYYY-MM-DD format.
fn parse_date(s: &str) -> Option<NaiveDate> {
NaiveDate::parse_from_str(s, "%m/%d/%Y")
.or_else(|_| NaiveDate::parse_from_str(s, "%Y-%m-%d"))
.ok()
}
/// Parse an HHMM time string into `NaiveTime`.
fn parse_hhmm(s: &str) -> Option<NaiveTime> {
if s.len() != 4 {
return None;
}
let hours = s[..2].parse::<u32>().ok()?;
let minutes = s[2..].parse::<u32>().ok()?;
NaiveTime::from_hms_opt(hours, minutes, 0)
}
/// Active weekdays for a meeting time.
fn active_weekdays(mt: &DbMeetingTime) -> Vec<Weekday> {
let mapping: [(bool, Weekday); 7] = [
(mt.monday, Weekday::Mon),
(mt.tuesday, Weekday::Tue),
(mt.wednesday, Weekday::Wed),
(mt.thursday, Weekday::Thu),
(mt.friday, Weekday::Fri),
(mt.saturday, Weekday::Sat),
(mt.sunday, Weekday::Sun),
];
mapping
.iter()
.filter(|(active, _)| *active)
.map(|(_, day)| *day)
.collect()
}
/// ICS two-letter day code for RRULE BYDAY.
fn ics_day_code(day: Weekday) -> &'static str {
match day {
Weekday::Mon => "MO",
Weekday::Tue => "TU",
Weekday::Wed => "WE",
Weekday::Thu => "TH",
Weekday::Fri => "FR",
Weekday::Sat => "SA",
Weekday::Sun => "SU",
}
}
/// Location string from a `DbMeetingTime`.
fn location_string(mt: &DbMeetingTime) -> String {
let building = mt
.building_description
.as_deref()
.or(mt.building.as_deref())
.unwrap_or("");
let room = mt.room.as_deref().unwrap_or("");
let combined = format!("{building} {room}").trim().to_string();
if combined.is_empty() {
"Online".to_string()
} else {
combined
}
}
/// Days display string (e.g. "MWF", "TTh").
fn days_display(mt: &DbMeetingTime) -> String {
let weekdays = active_weekdays(mt);
if weekdays.is_empty() {
return "TBA".to_string();
}
weekdays
.iter()
.map(|d| ics_day_code(*d))
.collect::<Vec<_>>()
.join("")
}
/// Escape text for ICS property values.
fn escape_ics(text: &str) -> String {
text.replace('\\', "\\\\")
.replace(';', "\\;")
.replace(',', "\\,")
.replace('\n', "\\n")
.replace('\r', "")
}
// ---------------------------------------------------------------------------
// University holidays (ported from bot/commands/ics.rs)
// ---------------------------------------------------------------------------
/// Find the nth occurrence of a weekday in a given month/year (1-based).
fn nth_weekday_of_month(year: i32, month: u32, weekday: Weekday, n: u32) -> Option<NaiveDate> {
let first = NaiveDate::from_ymd_opt(year, month, 1)?;
let days_ahead = (weekday.num_days_from_monday() as i64
- first.weekday().num_days_from_monday() as i64)
.rem_euclid(7) as u32;
let day = 1 + days_ahead + 7 * (n - 1);
NaiveDate::from_ymd_opt(year, month, day)
}
/// Compute a consecutive range of dates starting from `start` for `count` days.
fn date_range(start: NaiveDate, count: i64) -> Vec<NaiveDate> {
(0..count)
.filter_map(|i| start.checked_add_signed(Duration::days(i)))
.collect()
}
/// Compute university holidays for a given year.
fn compute_holidays_for_year(year: i32) -> Vec<(&'static str, Vec<NaiveDate>)> {
let mut holidays = Vec::new();
// Labor Day: 1st Monday of September
if let Some(d) = nth_weekday_of_month(year, 9, Weekday::Mon, 1) {
holidays.push(("Labor Day", vec![d]));
}
// Fall Break: Mon-Tue of Columbus Day week
if let Some(mon) = nth_weekday_of_month(year, 10, Weekday::Mon, 2) {
holidays.push(("Fall Break", date_range(mon, 2)));
}
// Day before Thanksgiving
if let Some(thu) = nth_weekday_of_month(year, 11, Weekday::Thu, 4)
&& let Some(wed) = thu.checked_sub_signed(Duration::days(1))
{
holidays.push(("Day Before Thanksgiving", vec![wed]));
}
// Thanksgiving: 4th Thursday + Friday
if let Some(thu) = nth_weekday_of_month(year, 11, Weekday::Thu, 4) {
holidays.push(("Thanksgiving", date_range(thu, 2)));
}
// Winter Holiday: Dec 23-31
if let Some(start) = NaiveDate::from_ymd_opt(year, 12, 23) {
holidays.push(("Winter Holiday", date_range(start, 9)));
}
// New Year's Day
if let Some(d) = NaiveDate::from_ymd_opt(year, 1, 1) {
holidays.push(("New Year's Day", vec![d]));
}
// MLK Day: 3rd Monday of January
if let Some(d) = nth_weekday_of_month(year, 1, Weekday::Mon, 3) {
holidays.push(("MLK Day", vec![d]));
}
// Spring Break: full week starting 2nd Monday of March
if let Some(mon) = nth_weekday_of_month(year, 3, Weekday::Mon, 2) {
holidays.push(("Spring Break", date_range(mon, 6)));
}
holidays
}
/// Get holiday dates within a date range that fall on specific weekdays.
fn holiday_exceptions(start: NaiveDate, end: NaiveDate, weekdays: &[Weekday]) -> Vec<NaiveDate> {
let start_year = start.year();
let end_year = end.year();
(start_year..=end_year)
.flat_map(compute_holidays_for_year)
.flat_map(|(_, dates)| dates)
.filter(|&date| date >= start && date <= end && weekdays.contains(&date.weekday()))
.collect()
}
/// Names of excluded holidays (for user-facing messages).
fn excluded_holiday_names(
start: NaiveDate,
end: NaiveDate,
exceptions: &[NaiveDate],
) -> Vec<String> {
let start_year = start.year();
let end_year = end.year();
let all_holidays: Vec<_> = (start_year..=end_year)
.flat_map(compute_holidays_for_year)
.collect();
let mut names = Vec::new();
for (holiday_name, holiday_dates) in &all_holidays {
for &exc in exceptions {
if holiday_dates.contains(&exc) {
names.push(format!("{} ({})", holiday_name, exc.format("%a, %b %d")));
}
}
}
names.sort();
names.dedup();
names
}
// ---------------------------------------------------------------------------
// ICS generation
// ---------------------------------------------------------------------------
/// Result from ICS generation, including the file content and excluded holiday names.
pub struct IcsResult {
pub content: String,
pub filename: String,
/// Holiday dates excluded via EXDATE rules, for user-facing messages.
#[allow(dead_code)]
pub excluded_holidays: Vec<String>,
}
/// Generate an ICS calendar file for a course.
pub fn generate_ics(
course: &CalendarCourse,
meeting_times: &[DbMeetingTime],
) -> Result<IcsResult, anyhow::Error> {
let mut ics = String::new();
let mut all_excluded = Vec::new();
// Header
ics.push_str("BEGIN:VCALENDAR\r\n");
ics.push_str("VERSION:2.0\r\n");
ics.push_str("PRODID:-//Banner Bot//Course Calendar//EN\r\n");
ics.push_str("CALSCALE:GREGORIAN\r\n");
ics.push_str("METHOD:PUBLISH\r\n");
ics.push_str(&format!(
"X-WR-CALNAME:{}\r\n",
escape_ics(&course.display_title())
));
for (index, mt) in meeting_times.iter().enumerate() {
let (event, holidays) = generate_ics_event(course, mt, index)?;
ics.push_str(&event);
all_excluded.extend(holidays);
}
ics.push_str("END:VCALENDAR\r\n");
Ok(IcsResult {
content: ics,
filename: format!("{}.ics", course.filename_stem()),
excluded_holidays: all_excluded,
})
}
/// Generate a single VEVENT for one meeting time.
fn generate_ics_event(
course: &CalendarCourse,
mt: &DbMeetingTime,
index: usize,
) -> Result<(String, Vec<String>), anyhow::Error> {
let start_date = parse_date(&mt.start_date)
.ok_or_else(|| anyhow::anyhow!("Invalid start_date: {}", mt.start_date))?;
let end_date = parse_date(&mt.end_date)
.ok_or_else(|| anyhow::anyhow!("Invalid end_date: {}", mt.end_date))?;
let start_time = mt.begin_time.as_deref().and_then(parse_hhmm);
let end_time = mt.end_time.as_deref().and_then(parse_hhmm);
// DTSTART/DTEND: first occurrence with time, or all-day on start_date
let (dtstart, dtend) = match (start_time, end_time) {
(Some(st), Some(et)) => {
let s = start_date.and_time(st).and_utc();
let e = start_date.and_time(et).and_utc();
(
s.format("%Y%m%dT%H%M%SZ").to_string(),
e.format("%Y%m%dT%H%M%SZ").to_string(),
)
}
_ => {
let s = start_date.and_hms_opt(0, 0, 0).unwrap().and_utc();
let e = start_date.and_hms_opt(0, 0, 0).unwrap().and_utc();
(
s.format("%Y%m%dT%H%M%SZ").to_string(),
e.format("%Y%m%dT%H%M%SZ").to_string(),
)
}
};
let event_title = if index > 0 {
format!("{} (Meeting {})", course.display_title(), index + 1)
} else {
course.display_title()
};
let instructor = course.primary_instructor.as_deref().unwrap_or("Staff");
let description = format!(
"CRN: {}\\nInstructor: {}\\nDays: {}\\nMeeting Type: {}",
course.crn,
instructor,
days_display(mt),
mt.meeting_type,
);
let location = location_string(mt);
let uid = format!(
"{}-{}-{}@banner-bot.local",
course.crn,
index,
start_date
.and_hms_opt(0, 0, 0)
.unwrap()
.and_utc()
.timestamp()
);
let mut event = String::new();
event.push_str("BEGIN:VEVENT\r\n");
event.push_str(&format!("UID:{uid}\r\n"));
event.push_str(&format!("DTSTART:{dtstart}\r\n"));
event.push_str(&format!("DTEND:{dtend}\r\n"));
event.push_str(&format!("SUMMARY:{}\r\n", escape_ics(&event_title)));
event.push_str(&format!("DESCRIPTION:{}\r\n", escape_ics(&description)));
event.push_str(&format!("LOCATION:{}\r\n", escape_ics(&location)));
let weekdays = active_weekdays(mt);
let mut holiday_names = Vec::new();
if let (false, Some(st)) = (weekdays.is_empty(), start_time) {
let by_day: Vec<&str> = weekdays.iter().map(|d| ics_day_code(*d)).collect();
let until = end_date.format("%Y%m%dT000000Z").to_string();
event.push_str(&format!(
"RRULE:FREQ=WEEKLY;BYDAY={};UNTIL={}\r\n",
by_day.join(","),
until,
));
// Holiday exceptions
let exceptions = holiday_exceptions(start_date, end_date, &weekdays);
if !exceptions.is_empty() {
let start_utc = start_date.and_time(st).and_utc();
let exdates: Vec<String> = exceptions
.iter()
.map(|&d| {
d.and_time(start_utc.time())
.and_utc()
.format("%Y%m%dT%H%M%SZ")
.to_string()
})
.collect();
event.push_str(&format!("EXDATE:{}\r\n", exdates.join(",")));
}
holiday_names = excluded_holiday_names(start_date, end_date, &exceptions);
}
event.push_str("END:VEVENT\r\n");
Ok((event, holiday_names))
}
// ---------------------------------------------------------------------------
// Google Calendar URL generation
// ---------------------------------------------------------------------------
/// Generate a Google Calendar "add event" URL for a single meeting time.
pub fn generate_gcal_url(
course: &CalendarCourse,
mt: &DbMeetingTime,
) -> Result<String, anyhow::Error> {
let start_date = parse_date(&mt.start_date)
.ok_or_else(|| anyhow::anyhow!("Invalid start_date: {}", mt.start_date))?;
let end_date = parse_date(&mt.end_date)
.ok_or_else(|| anyhow::anyhow!("Invalid end_date: {}", mt.end_date))?;
let start_time = mt.begin_time.as_deref().and_then(parse_hhmm);
let end_time = mt.end_time.as_deref().and_then(parse_hhmm);
let dates_text = match (start_time, end_time) {
(Some(st), Some(et)) => {
let s = start_date.and_time(st);
let e = start_date.and_time(et);
format!(
"{}/{}",
s.format("%Y%m%dT%H%M%S"),
e.format("%Y%m%dT%H%M%S")
)
}
_ => {
let s = start_date.format("%Y%m%d").to_string();
format!("{s}/{s}")
}
};
let instructor = course.primary_instructor.as_deref().unwrap_or("Staff");
let details = format!(
"CRN: {}\nInstructor: {}\nDays: {}",
course.crn,
instructor,
days_display(mt),
);
let location = location_string(mt);
let weekdays = active_weekdays(mt);
let recur = if !weekdays.is_empty() && start_time.is_some() {
let by_day: Vec<&str> = weekdays.iter().map(|d| ics_day_code(*d)).collect();
let until = end_date.format("%Y%m%dT000000Z").to_string();
format!(
"RRULE:FREQ=WEEKLY;BYDAY={};UNTIL={}",
by_day.join(","),
until
)
} else {
String::new()
};
let course_text = course.display_title();
let params: Vec<(&str, &str)> = vec![
("action", "TEMPLATE"),
("text", &course_text),
("dates", &dates_text),
("details", &details),
("location", &location),
("trp", "true"),
("ctz", "America/Chicago"),
("recur", &recur),
];
let url = url::Url::parse_with_params("https://calendar.google.com/calendar/render", &params)?;
Ok(url.to_string())
}
+76
View File
@@ -0,0 +1,76 @@
use clap::Parser;
/// Banner Discord Bot - Course availability monitoring
///
/// This application runs all services:
/// - bot: Discord bot for course monitoring commands
/// - web: HTTP server for web interface and API
/// - scraper: Background service for scraping course data
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
pub struct Args {
/// Log formatter to use
#[arg(long, value_enum, default_value_t = default_tracing_format())]
pub tracing: TracingFormat,
}
#[derive(clap::ValueEnum, Clone, Debug)]
pub enum TracingFormat {
/// Use pretty formatter (default in debug mode)
Pretty,
/// Use JSON formatter (default in release mode)
Json,
}
#[derive(clap::ValueEnum, Clone, Debug, PartialEq)]
pub enum ServiceName {
/// Discord bot for course monitoring commands
Bot,
/// HTTP server for web interface and API
Web,
/// Background service for scraping course data
Scraper,
}
impl ServiceName {
/// Get all available services
pub fn all() -> Vec<ServiceName> {
vec![ServiceName::Bot, ServiceName::Web, ServiceName::Scraper]
}
/// Convert to string for service registration
pub fn as_str(&self) -> &'static str {
match self {
ServiceName::Bot => "bot",
ServiceName::Web => "web",
ServiceName::Scraper => "scraper",
}
}
}
#[cfg(debug_assertions)]
const DEFAULT_TRACING_FORMAT: TracingFormat = TracingFormat::Pretty;
#[cfg(not(debug_assertions))]
const DEFAULT_TRACING_FORMAT: TracingFormat = TracingFormat::Json;
fn default_tracing_format() -> TracingFormat {
DEFAULT_TRACING_FORMAT
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_service_name_as_str() {
assert_eq!(ServiceName::Bot.as_str(), "bot");
assert_eq!(ServiceName::Web.as_str(), "web");
assert_eq!(ServiceName::Scraper.as_str(), "scraper");
}
#[test]
fn test_service_name_all() {
let all = ServiceName::all();
assert_eq!(all.len(), 3);
}
}
+251 -13
View File
@@ -3,28 +3,28 @@
//! This module handles loading and parsing configuration from environment variables
//! using the figment crate. It supports flexible duration parsing that accepts both
//! numeric values (interpreted as seconds) and duration strings with units.
//!
//! All configuration is loaded from environment variables with the `APP_` prefix:
use fundu::{DurationParser, TimeUnit};
use serde::{Deserialize, Deserializer};
use std::time::Duration;
/// Application configuration loaded from environment variables
/// Main application configuration containing all sub-configurations
#[derive(Deserialize)]
pub struct Config {
/// Discord bot token for authentication
pub bot_token: String,
/// Log level for the application
///
/// This value is used to set the log level for this application's target specifically.
/// e.g. "debug" would be similar to "warn,banner=debug,..."
///
/// Valid values are: "trace", "debug", "info", "warn", "error"
/// Defaults to "info" if not specified
#[serde(default = "default_log_level")]
pub log_level: String,
/// Port for the web server (default: 8080)
#[serde(default = "default_port")]
pub port: u16,
/// Database connection URL
pub database_url: String,
/// Redis connection URL
pub redis_url: String,
/// Base URL for banner generation service
pub banner_base_url: String,
/// Target Discord guild ID where the bot operates
pub bot_target_guild: u64,
/// Discord application ID
pub bot_app_id: u64,
/// Graceful shutdown timeout duration
///
/// Accepts both numeric values (seconds) and duration strings
@@ -34,6 +34,42 @@ pub struct Config {
deserialize_with = "deserialize_duration"
)]
pub shutdown_timeout: Duration,
/// Discord bot token for authentication
pub bot_token: String,
/// Target Discord guild ID where the bot operates
pub bot_target_guild: u64,
/// Base URL for banner generation service
///
/// Defaults to "https://ssbprod.utsa.edu/StudentRegistrationSsb/ssb" if not specified
#[serde(default = "default_banner_base_url")]
pub banner_base_url: String,
/// Rate limiting configuration for Banner API requests
#[serde(default = "default_rate_limiting")]
pub rate_limiting: RateLimitingConfig,
/// Discord OAuth2 client ID for web authentication
#[serde(deserialize_with = "deserialize_string_or_uint")]
pub discord_client_id: String,
/// Discord OAuth2 client secret for web authentication
pub discord_client_secret: String,
/// Optional base URL override for OAuth2 redirect (e.g. "https://banner.xevion.dev").
/// When unset, the redirect URI is derived from the incoming request's Origin/Host.
#[serde(default)]
pub discord_redirect_uri: Option<String>,
/// Discord user ID to seed as initial admin on startup (optional)
#[serde(default)]
pub admin_discord_id: Option<u64>,
}
/// Default log level of "info"
fn default_log_level() -> String {
"info".to_string()
}
/// Default port of 8080
fn default_port() -> u16 {
8080
}
/// Default shutdown timeout of 8 seconds
@@ -41,6 +77,73 @@ fn default_shutdown_timeout() -> Duration {
Duration::from_secs(8)
}
/// Default banner base URL
fn default_banner_base_url() -> String {
"https://ssbprod.utsa.edu/StudentRegistrationSsb/ssb".to_string()
}
/// Rate limiting configuration for Banner API requests
#[derive(Deserialize, Clone, Debug, PartialEq, Eq)]
pub struct RateLimitingConfig {
/// Requests per minute for session operations (very conservative)
#[serde(default = "default_session_rpm")]
pub session_rpm: u32,
/// Requests per minute for search operations (moderate)
#[serde(default = "default_search_rpm")]
pub search_rpm: u32,
/// Requests per minute for metadata operations (moderate)
#[serde(default = "default_metadata_rpm")]
pub metadata_rpm: u32,
/// Requests per minute for reset operations (low priority)
#[serde(default = "default_reset_rpm")]
pub reset_rpm: u32,
/// Burst allowance (extra requests allowed in short bursts)
#[serde(default = "default_burst_allowance")]
pub burst_allowance: u32,
}
/// Default rate limiting configuration
fn default_rate_limiting() -> RateLimitingConfig {
RateLimitingConfig::default()
}
impl Default for RateLimitingConfig {
fn default() -> Self {
Self {
session_rpm: default_session_rpm(),
search_rpm: default_search_rpm(),
metadata_rpm: default_metadata_rpm(),
reset_rpm: default_reset_rpm(),
burst_allowance: default_burst_allowance(),
}
}
}
/// Default session requests per minute (6 = 1 every 10 seconds)
fn default_session_rpm() -> u32 {
6
}
/// Default search requests per minute (30 = 1 every 2 seconds)
fn default_search_rpm() -> u32 {
30
}
/// Default metadata requests per minute (20 = 1 every 3 seconds)
fn default_metadata_rpm() -> u32 {
20
}
/// Default reset requests per minute (10 = 1 every 6 seconds)
fn default_reset_rpm() -> u32 {
10
}
/// Default burst allowance (3 extra requests)
fn default_burst_allowance() -> u32 {
3
}
/// Duration parser configured to handle various time units with seconds as default
///
/// Supports:
@@ -125,3 +228,138 @@ where
deserializer.deserialize_any(DurationVisitor)
}
/// Deserializes a value that may arrive as either a string or unsigned integer.
///
/// Figment's env provider infers types from raw values, so numeric-looking strings
/// like Discord client IDs get parsed as integers. This accepts both forms.
fn deserialize_string_or_uint<'de, D>(deserializer: D) -> Result<String, D::Error>
where
D: Deserializer<'de>,
{
use serde::de::Visitor;
struct StringOrUintVisitor;
impl<'de> Visitor<'de> for StringOrUintVisitor {
type Value = String;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("a string or unsigned integer")
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(value.to_owned())
}
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(value.to_string())
}
}
deserializer.deserialize_any(StringOrUintVisitor)
}
#[cfg(test)]
mod tests {
use super::*;
use serde::Deserialize;
#[derive(Deserialize)]
struct DurationWrapper {
#[serde(deserialize_with = "deserialize_duration")]
value: Duration,
}
fn parse(json: &str) -> Result<Duration, String> {
serde_json::from_str::<DurationWrapper>(json)
.map(|w| w.value)
.map_err(|e| e.to_string())
}
#[test]
fn test_duration_from_integer_seconds() {
let d = parse(r#"{"value": 30}"#).unwrap();
assert_eq!(d, Duration::from_secs(30));
}
#[test]
fn test_duration_from_string_seconds() {
let d = parse(r#"{"value": "30s"}"#).unwrap();
assert_eq!(d, Duration::from_secs(30));
}
#[test]
fn test_duration_from_string_minutes() {
let d = parse(r#"{"value": "2m"}"#).unwrap();
assert_eq!(d, Duration::from_secs(120));
}
#[test]
fn test_duration_from_string_milliseconds() {
let d = parse(r#"{"value": "1500ms"}"#).unwrap();
assert_eq!(d, Duration::from_millis(1500));
}
#[test]
fn test_duration_from_string_with_space() {
let d = parse(r#"{"value": "2 m"}"#).unwrap();
assert_eq!(d, Duration::from_secs(120));
}
#[test]
fn test_duration_from_string_multiple_units() {
let d = parse(r#"{"value": "1m 30s"}"#).unwrap();
assert_eq!(d, Duration::from_secs(90));
}
#[test]
fn test_duration_from_bare_number_string() {
let d = parse(r#"{"value": "45"}"#).unwrap();
assert_eq!(d, Duration::from_secs(45));
}
#[test]
fn test_duration_zero() {
let d = parse(r#"{"value": 0}"#).unwrap();
assert_eq!(d, Duration::from_secs(0));
}
#[test]
fn test_duration_negative_rejected() {
let err = parse(r#"{"value": -5}"#).unwrap_err();
assert!(err.contains("negative"), "expected negative error: {err}");
}
#[test]
fn test_duration_invalid_string_rejected() {
let err = parse(r#"{"value": "notaduration"}"#).unwrap_err();
assert!(
err.contains("Invalid duration"),
"expected invalid format error: {err}"
);
}
#[test]
fn test_default_config_values() {
assert_eq!(default_port(), 8080);
assert_eq!(default_shutdown_timeout(), Duration::from_secs(8));
assert_eq!(default_log_level(), "info");
}
#[test]
fn test_default_rate_limiting() {
let rl = default_rate_limiting();
assert_eq!(rl.session_rpm, 6);
assert_eq!(rl.search_rpm, 30);
assert_eq!(rl.metadata_rpm, 20);
assert_eq!(rl.reset_rpm, 10);
assert_eq!(rl.burst_allowance, 3);
}
}
+764
View File
@@ -0,0 +1,764 @@
//! Batch database operations for improved performance.
use crate::banner::Course;
use crate::data::models::{DbMeetingTime, UpsertCounts};
use crate::data::names::{decode_html_entities, parse_banner_name};
use crate::error::Result;
use sqlx::PgConnection;
use sqlx::PgPool;
use std::collections::{HashMap, HashSet};
use std::time::Instant;
use tracing::info;
/// Convert a Banner API course's meeting times to the DB JSONB shape.
fn to_db_meeting_times(course: &Course) -> serde_json::Value {
let meetings: Vec<DbMeetingTime> = course
.meetings_faculty
.iter()
.map(|mf| {
let mt = &mf.meeting_time;
DbMeetingTime {
begin_time: mt.begin_time.clone(),
end_time: mt.end_time.clone(),
start_date: mt.start_date.clone(),
end_date: mt.end_date.clone(),
monday: mt.monday,
tuesday: mt.tuesday,
wednesday: mt.wednesday,
thursday: mt.thursday,
friday: mt.friday,
saturday: mt.saturday,
sunday: mt.sunday,
building: mt.building.clone(),
building_description: mt.building_description.clone(),
room: mt.room.clone(),
campus: mt.campus.clone(),
meeting_type: mt.meeting_type.clone(),
meeting_schedule_type: mt.meeting_schedule_type.clone(),
}
})
.collect();
serde_json::to_value(meetings).unwrap_or_default()
}
/// Convert a Banner API course's section attributes to a JSONB array of code strings.
fn to_db_attributes(course: &Course) -> serde_json::Value {
let codes: Vec<&str> = course
.section_attributes
.iter()
.map(|a| a.code.as_str())
.collect();
serde_json::to_value(codes).unwrap_or_default()
}
/// Extract the campus code from the first meeting time (Banner doesn't put it on the course directly).
fn extract_campus_code(course: &Course) -> Option<String> {
course
.meetings_faculty
.first()
.and_then(|mf| mf.meeting_time.campus.clone())
}
// ---------------------------------------------------------------------------
// Task 1: UpsertDiffRow — captures pre- and post-upsert state for diffing
// ---------------------------------------------------------------------------
/// Row returned by the CTE-based upsert query, carrying both old and new values
/// for every auditable field. `old_id` is `None` for fresh inserts.
#[derive(sqlx::FromRow, Debug)]
struct UpsertDiffRow {
id: i32,
old_id: Option<i32>,
crn: String,
term_code: String,
// enrollment fields
old_enrollment: Option<i32>,
new_enrollment: i32,
old_max_enrollment: Option<i32>,
new_max_enrollment: i32,
old_wait_count: Option<i32>,
new_wait_count: i32,
old_wait_capacity: Option<i32>,
new_wait_capacity: i32,
// text fields (non-nullable in DB)
old_subject: Option<String>,
new_subject: String,
old_course_number: Option<String>,
new_course_number: String,
old_title: Option<String>,
new_title: String,
// nullable text fields
old_sequence_number: Option<String>,
new_sequence_number: Option<String>,
old_part_of_term: Option<String>,
new_part_of_term: Option<String>,
old_instructional_method: Option<String>,
new_instructional_method: Option<String>,
old_campus: Option<String>,
new_campus: Option<String>,
// nullable int fields
old_credit_hours: Option<i32>,
new_credit_hours: Option<i32>,
old_credit_hour_low: Option<i32>,
new_credit_hour_low: Option<i32>,
old_credit_hour_high: Option<i32>,
new_credit_hour_high: Option<i32>,
// cross-list fields
old_cross_list: Option<String>,
new_cross_list: Option<String>,
old_cross_list_capacity: Option<i32>,
new_cross_list_capacity: Option<i32>,
old_cross_list_count: Option<i32>,
new_cross_list_count: Option<i32>,
// link fields
old_link_identifier: Option<String>,
new_link_identifier: Option<String>,
old_is_section_linked: Option<bool>,
new_is_section_linked: Option<bool>,
// JSONB fields
old_meeting_times: Option<serde_json::Value>,
new_meeting_times: serde_json::Value,
old_attributes: Option<serde_json::Value>,
new_attributes: serde_json::Value,
}
// ---------------------------------------------------------------------------
// Task 3: Entry types and diff logic
// ---------------------------------------------------------------------------
struct AuditEntry {
course_id: i32,
field_changed: &'static str,
old_value: String,
new_value: String,
}
struct MetricEntry {
course_id: i32,
enrollment: i32,
wait_count: i32,
seats_available: i32,
}
/// Compare old vs new for a single field, pushing an `AuditEntry` when they differ.
///
/// Three variants:
/// - `diff_field!(audits, row, field_name, old_field, new_field)` — `Option<T>` old vs `T` new
/// - `diff_field!(opt audits, row, field_name, old_field, new_field)` — `Option<T>` old vs `Option<T>` new
/// - `diff_field!(json audits, row, field_name, old_field, new_field)` — `Option<Value>` old vs `Value` new
///
/// All variants skip when `old_id` is None (fresh insert).
macro_rules! diff_field {
// Standard: Option<T> old vs T new (non-nullable columns)
($audits:ident, $row:ident, $field:expr, $old:ident, $new:ident) => {
if $row.old_id.is_some() {
let old_str = $row
.$old
.as_ref()
.map(|v| v.to_string())
.unwrap_or_default();
let new_str = $row.$new.to_string();
if old_str != new_str {
$audits.push(AuditEntry {
course_id: $row.id,
field_changed: $field,
old_value: old_str,
new_value: new_str,
});
}
}
};
// Nullable: Option<T> old vs Option<T> new
(opt $audits:ident, $row:ident, $field:expr, $old:ident, $new:ident) => {
if $row.old_id.is_some() {
let old_str = $row
.$old
.as_ref()
.map(|v| v.to_string())
.unwrap_or_default();
let new_str = $row
.$new
.as_ref()
.map(|v| v.to_string())
.unwrap_or_default();
if old_str != new_str {
$audits.push(AuditEntry {
course_id: $row.id,
field_changed: $field,
old_value: old_str,
new_value: new_str,
});
}
}
};
// JSONB: Option<Value> old vs Value new
(json $audits:ident, $row:ident, $field:expr, $old:ident, $new:ident) => {
if $row.old_id.is_some() {
let old_val = $row
.$old
.as_ref()
.cloned()
.unwrap_or(serde_json::Value::Null);
let new_val = &$row.$new;
if old_val != *new_val {
$audits.push(AuditEntry {
course_id: $row.id,
field_changed: $field,
old_value: old_val.to_string(),
new_value: new_val.to_string(),
});
}
}
};
}
/// Compute audit entries (field-level diffs) and metric entries from upsert diff rows.
fn compute_diffs(rows: &[UpsertDiffRow]) -> (Vec<AuditEntry>, Vec<MetricEntry>) {
let mut audits = Vec::new();
let mut metrics = Vec::new();
for row in rows {
// Non-nullable fields
diff_field!(audits, row, "enrollment", old_enrollment, new_enrollment);
diff_field!(
audits,
row,
"max_enrollment",
old_max_enrollment,
new_max_enrollment
);
diff_field!(audits, row, "wait_count", old_wait_count, new_wait_count);
diff_field!(
audits,
row,
"wait_capacity",
old_wait_capacity,
new_wait_capacity
);
diff_field!(audits, row, "subject", old_subject, new_subject);
diff_field!(
audits,
row,
"course_number",
old_course_number,
new_course_number
);
diff_field!(audits, row, "title", old_title, new_title);
// Nullable text fields
diff_field!(opt audits, row, "sequence_number", old_sequence_number, new_sequence_number);
diff_field!(opt audits, row, "part_of_term", old_part_of_term, new_part_of_term);
diff_field!(opt audits, row, "instructional_method", old_instructional_method, new_instructional_method);
diff_field!(opt audits, row, "campus", old_campus, new_campus);
// Nullable int fields
diff_field!(opt audits, row, "credit_hours", old_credit_hours, new_credit_hours);
diff_field!(opt audits, row, "credit_hour_low", old_credit_hour_low, new_credit_hour_low);
diff_field!(opt audits, row, "credit_hour_high", old_credit_hour_high, new_credit_hour_high);
// Cross-list fields
diff_field!(opt audits, row, "cross_list", old_cross_list, new_cross_list);
diff_field!(opt audits, row, "cross_list_capacity", old_cross_list_capacity, new_cross_list_capacity);
diff_field!(opt audits, row, "cross_list_count", old_cross_list_count, new_cross_list_count);
// Link fields
diff_field!(opt audits, row, "link_identifier", old_link_identifier, new_link_identifier);
diff_field!(opt audits, row, "is_section_linked", old_is_section_linked, new_is_section_linked);
// JSONB fields
diff_field!(json audits, row, "meeting_times", old_meeting_times, new_meeting_times);
diff_field!(json audits, row, "attributes", old_attributes, new_attributes);
// Emit a metric entry on fresh insert (baseline) or when enrollment data changed
let is_new = row.old_id.is_none();
let enrollment_changed = row.old_id.is_some()
&& (row.old_enrollment != Some(row.new_enrollment)
|| row.old_wait_count != Some(row.new_wait_count)
|| row.old_max_enrollment != Some(row.new_max_enrollment));
if is_new || enrollment_changed {
metrics.push(MetricEntry {
course_id: row.id,
enrollment: row.new_enrollment,
wait_count: row.new_wait_count,
seats_available: row.new_max_enrollment - row.new_enrollment,
});
}
}
(audits, metrics)
}
// ---------------------------------------------------------------------------
// Task 4: Batch insert functions for audits and metrics
// ---------------------------------------------------------------------------
async fn insert_audits(audits: &[AuditEntry], conn: &mut PgConnection) -> Result<()> {
if audits.is_empty() {
return Ok(());
}
let course_ids: Vec<i32> = audits.iter().map(|a| a.course_id).collect();
let fields: Vec<&str> = audits.iter().map(|a| a.field_changed).collect();
let old_values: Vec<&str> = audits.iter().map(|a| a.old_value.as_str()).collect();
let new_values: Vec<&str> = audits.iter().map(|a| a.new_value.as_str()).collect();
sqlx::query(
r#"
INSERT INTO course_audits (course_id, timestamp, field_changed, old_value, new_value)
SELECT v.course_id, NOW(), v.field_changed, v.old_value, v.new_value
FROM UNNEST($1::int4[], $2::text[], $3::text[], $4::text[])
AS v(course_id, field_changed, old_value, new_value)
"#,
)
.bind(&course_ids)
.bind(&fields)
.bind(&old_values)
.bind(&new_values)
.execute(&mut *conn)
.await
.map_err(|e| anyhow::anyhow!("Failed to batch insert course_audits: {}", e))?;
Ok(())
}
async fn insert_metrics(metrics: &[MetricEntry], conn: &mut PgConnection) -> Result<()> {
if metrics.is_empty() {
return Ok(());
}
let course_ids: Vec<i32> = metrics.iter().map(|m| m.course_id).collect();
let enrollments: Vec<i32> = metrics.iter().map(|m| m.enrollment).collect();
let wait_counts: Vec<i32> = metrics.iter().map(|m| m.wait_count).collect();
let seats_available: Vec<i32> = metrics.iter().map(|m| m.seats_available).collect();
sqlx::query(
r#"
INSERT INTO course_metrics (course_id, timestamp, enrollment, wait_count, seats_available)
SELECT v.course_id, NOW(), v.enrollment, v.wait_count, v.seats_available
FROM UNNEST($1::int4[], $2::int4[], $3::int4[], $4::int4[])
AS v(course_id, enrollment, wait_count, seats_available)
"#,
)
.bind(&course_ids)
.bind(&enrollments)
.bind(&wait_counts)
.bind(&seats_available)
.execute(&mut *conn)
.await
.map_err(|e| anyhow::anyhow!("Failed to batch insert course_metrics: {}", e))?;
Ok(())
}
// ---------------------------------------------------------------------------
// Core upsert functions (updated to use &mut PgConnection)
// ---------------------------------------------------------------------------
/// Batch upsert courses in a single database query.
///
/// Performs a bulk INSERT...ON CONFLICT DO UPDATE for all courses, including
/// new fields (meeting times, attributes, instructor data). Captures pre-update
/// state for audit/metric tracking, all within a single transaction.
///
/// # Performance
/// - Reduces N database round-trips to 5 (old-data CTE + upsert, audits, metrics, instructors, junction)
/// - Typical usage: 50-200 courses per batch
pub async fn batch_upsert_courses(courses: &[Course], db_pool: &PgPool) -> Result<UpsertCounts> {
if courses.is_empty() {
info!("No courses to upsert, skipping batch operation");
return Ok(UpsertCounts::default());
}
let start = Instant::now();
let course_count = courses.len();
let mut tx = db_pool.begin().await?;
// Step 1: Upsert courses with CTE, returning diff rows
let diff_rows = upsert_courses(courses, &mut tx).await?;
// Step 2: Build (crn, term_code) → course_id map for instructor linking.
// RETURNING order from INSERT ... ON CONFLICT is not guaranteed to match
// the input array order, so we must key by (crn, term_code) rather than
// relying on positional correspondence.
let crn_term_to_id: HashMap<(&str, &str), i32> = diff_rows
.iter()
.map(|r| ((r.crn.as_str(), r.term_code.as_str()), r.id))
.collect();
// Step 3: Compute audit/metric diffs
let (audits, metrics) = compute_diffs(&diff_rows);
// Count courses that had at least one field change (existing rows only)
let changed_ids: HashSet<i32> = audits.iter().map(|a| a.course_id).collect();
let existing_count = diff_rows.iter().filter(|r| r.old_id.is_some()).count() as i32;
let courses_changed = changed_ids.len() as i32;
let counts = UpsertCounts {
courses_fetched: course_count as i32,
courses_changed,
courses_unchanged: existing_count - courses_changed,
audits_generated: audits.len() as i32,
metrics_generated: metrics.len() as i32,
};
// Step 4: Insert audits and metrics
insert_audits(&audits, &mut tx).await?;
insert_metrics(&metrics, &mut tx).await?;
// Step 5: Upsert instructors (returns email -> id map)
let email_to_id = upsert_instructors(courses, &mut tx).await?;
// Step 6: Link courses to instructors via junction table
upsert_course_instructors(courses, &crn_term_to_id, &email_to_id, &mut tx).await?;
tx.commit().await?;
let duration = start.elapsed();
info!(
courses_count = course_count,
courses_changed = counts.courses_changed,
courses_unchanged = counts.courses_unchanged,
audit_entries = counts.audits_generated,
metric_entries = counts.metrics_generated,
duration_ms = duration.as_millis(),
"Batch upserted courses with instructors, audits, and metrics"
);
Ok(counts)
}
// ---------------------------------------------------------------------------
// Task 2: CTE-based upsert returning old+new values
// ---------------------------------------------------------------------------
/// Upsert all courses and return diff rows with old and new values for auditing.
async fn upsert_courses(courses: &[Course], conn: &mut PgConnection) -> Result<Vec<UpsertDiffRow>> {
let crns: Vec<&str> = courses
.iter()
.map(|c| c.course_reference_number.as_str())
.collect();
let subjects: Vec<&str> = courses.iter().map(|c| c.subject.as_str()).collect();
let course_numbers: Vec<&str> = courses.iter().map(|c| c.course_number.as_str()).collect();
let titles: Vec<String> = courses
.iter()
.map(|c| decode_html_entities(&c.course_title))
.collect();
let term_codes: Vec<&str> = courses.iter().map(|c| c.term.as_str()).collect();
let enrollments: Vec<i32> = courses.iter().map(|c| c.enrollment).collect();
let max_enrollments: Vec<i32> = courses.iter().map(|c| c.maximum_enrollment).collect();
let wait_counts: Vec<i32> = courses.iter().map(|c| c.wait_count).collect();
let wait_capacities: Vec<i32> = courses.iter().map(|c| c.wait_capacity).collect();
// New scalar fields
let sequence_numbers: Vec<Option<&str>> = courses
.iter()
.map(|c| Some(c.sequence_number.as_str()))
.collect();
let parts_of_term: Vec<Option<&str>> = courses
.iter()
.map(|c| Some(c.part_of_term.as_str()))
.collect();
let instructional_methods: Vec<Option<&str>> = courses
.iter()
.map(|c| Some(c.instructional_method.as_str()))
.collect();
let campuses: Vec<Option<String>> = courses.iter().map(extract_campus_code).collect();
let credit_hours: Vec<Option<i32>> = courses.iter().map(|c| c.credit_hours).collect();
let credit_hour_lows: Vec<Option<i32>> = courses.iter().map(|c| c.credit_hour_low).collect();
let credit_hour_highs: Vec<Option<i32>> = courses.iter().map(|c| c.credit_hour_high).collect();
let cross_lists: Vec<Option<&str>> = courses.iter().map(|c| c.cross_list.as_deref()).collect();
let cross_list_capacities: Vec<Option<i32>> =
courses.iter().map(|c| c.cross_list_capacity).collect();
let cross_list_counts: Vec<Option<i32>> = courses.iter().map(|c| c.cross_list_count).collect();
let link_identifiers: Vec<Option<&str>> = courses
.iter()
.map(|c| c.link_identifier.as_deref())
.collect();
let is_section_linkeds: Vec<Option<bool>> =
courses.iter().map(|c| Some(c.is_section_linked)).collect();
// JSONB fields
let meeting_times_json: Vec<serde_json::Value> =
courses.iter().map(to_db_meeting_times).collect();
let attributes_json: Vec<serde_json::Value> = courses.iter().map(to_db_attributes).collect();
let rows = sqlx::query_as::<_, UpsertDiffRow>(
r#"
WITH old_data AS (
SELECT id, enrollment, max_enrollment, wait_count, wait_capacity,
subject, course_number, title,
sequence_number, part_of_term, instructional_method, campus,
credit_hours, credit_hour_low, credit_hour_high,
cross_list, cross_list_capacity, cross_list_count,
link_identifier, is_section_linked,
meeting_times, attributes,
crn, term_code
FROM courses
WHERE (crn, term_code) IN (SELECT * FROM UNNEST($1::text[], $5::text[]))
),
upserted AS (
INSERT INTO courses (
crn, subject, course_number, title, term_code,
enrollment, max_enrollment, wait_count, wait_capacity, last_scraped_at,
sequence_number, part_of_term, instructional_method, campus,
credit_hours, credit_hour_low, credit_hour_high,
cross_list, cross_list_capacity, cross_list_count,
link_identifier, is_section_linked,
meeting_times, attributes
)
SELECT
v.crn, v.subject, v.course_number, v.title, v.term_code,
v.enrollment, v.max_enrollment, v.wait_count, v.wait_capacity, NOW(),
v.sequence_number, v.part_of_term, v.instructional_method, v.campus,
v.credit_hours, v.credit_hour_low, v.credit_hour_high,
v.cross_list, v.cross_list_capacity, v.cross_list_count,
v.link_identifier, v.is_section_linked,
v.meeting_times, v.attributes
FROM UNNEST(
$1::text[], $2::text[], $3::text[], $4::text[], $5::text[],
$6::int4[], $7::int4[], $8::int4[], $9::int4[],
$10::text[], $11::text[], $12::text[], $13::text[],
$14::int4[], $15::int4[], $16::int4[],
$17::text[], $18::int4[], $19::int4[],
$20::text[], $21::bool[],
$22::jsonb[], $23::jsonb[]
) AS v(
crn, subject, course_number, title, term_code,
enrollment, max_enrollment, wait_count, wait_capacity,
sequence_number, part_of_term, instructional_method, campus,
credit_hours, credit_hour_low, credit_hour_high,
cross_list, cross_list_capacity, cross_list_count,
link_identifier, is_section_linked,
meeting_times, attributes
)
ON CONFLICT (crn, term_code)
DO UPDATE SET
subject = EXCLUDED.subject,
course_number = EXCLUDED.course_number,
title = EXCLUDED.title,
enrollment = EXCLUDED.enrollment,
max_enrollment = EXCLUDED.max_enrollment,
wait_count = EXCLUDED.wait_count,
wait_capacity = EXCLUDED.wait_capacity,
last_scraped_at = EXCLUDED.last_scraped_at,
sequence_number = EXCLUDED.sequence_number,
part_of_term = EXCLUDED.part_of_term,
instructional_method = EXCLUDED.instructional_method,
campus = EXCLUDED.campus,
credit_hours = EXCLUDED.credit_hours,
credit_hour_low = EXCLUDED.credit_hour_low,
credit_hour_high = EXCLUDED.credit_hour_high,
cross_list = EXCLUDED.cross_list,
cross_list_capacity = EXCLUDED.cross_list_capacity,
cross_list_count = EXCLUDED.cross_list_count,
link_identifier = EXCLUDED.link_identifier,
is_section_linked = EXCLUDED.is_section_linked,
meeting_times = EXCLUDED.meeting_times,
attributes = EXCLUDED.attributes
RETURNING *
)
SELECT u.id,
o.id AS old_id,
u.crn, u.term_code,
o.enrollment AS old_enrollment, u.enrollment AS new_enrollment,
o.max_enrollment AS old_max_enrollment, u.max_enrollment AS new_max_enrollment,
o.wait_count AS old_wait_count, u.wait_count AS new_wait_count,
o.wait_capacity AS old_wait_capacity, u.wait_capacity AS new_wait_capacity,
o.subject AS old_subject, u.subject AS new_subject,
o.course_number AS old_course_number, u.course_number AS new_course_number,
o.title AS old_title, u.title AS new_title,
o.sequence_number AS old_sequence_number, u.sequence_number AS new_sequence_number,
o.part_of_term AS old_part_of_term, u.part_of_term AS new_part_of_term,
o.instructional_method AS old_instructional_method, u.instructional_method AS new_instructional_method,
o.campus AS old_campus, u.campus AS new_campus,
o.credit_hours AS old_credit_hours, u.credit_hours AS new_credit_hours,
o.credit_hour_low AS old_credit_hour_low, u.credit_hour_low AS new_credit_hour_low,
o.credit_hour_high AS old_credit_hour_high, u.credit_hour_high AS new_credit_hour_high,
o.cross_list AS old_cross_list, u.cross_list AS new_cross_list,
o.cross_list_capacity AS old_cross_list_capacity, u.cross_list_capacity AS new_cross_list_capacity,
o.cross_list_count AS old_cross_list_count, u.cross_list_count AS new_cross_list_count,
o.link_identifier AS old_link_identifier, u.link_identifier AS new_link_identifier,
o.is_section_linked AS old_is_section_linked, u.is_section_linked AS new_is_section_linked,
o.meeting_times AS old_meeting_times, u.meeting_times AS new_meeting_times,
o.attributes AS old_attributes, u.attributes AS new_attributes
FROM upserted u
LEFT JOIN old_data o ON u.crn = o.crn AND u.term_code = o.term_code
"#,
)
.bind(&crns)
.bind(&subjects)
.bind(&course_numbers)
.bind(&titles)
.bind(&term_codes)
.bind(&enrollments)
.bind(&max_enrollments)
.bind(&wait_counts)
.bind(&wait_capacities)
.bind(&sequence_numbers)
.bind(&parts_of_term)
.bind(&instructional_methods)
.bind(&campuses)
.bind(&credit_hours)
.bind(&credit_hour_lows)
.bind(&credit_hour_highs)
.bind(&cross_lists)
.bind(&cross_list_capacities)
.bind(&cross_list_counts)
.bind(&link_identifiers)
.bind(&is_section_linkeds)
.bind(&meeting_times_json)
.bind(&attributes_json)
.fetch_all(&mut *conn)
.await
.map_err(|e| anyhow::anyhow!("Failed to batch upsert courses: {}", e))?;
Ok(rows)
}
/// Deduplicate and upsert all instructors from the batch by email.
/// Returns a map of lowercased_email -> instructor id for junction linking.
async fn upsert_instructors(
courses: &[Course],
conn: &mut PgConnection,
) -> Result<HashMap<String, i32>> {
let mut seen = HashSet::new();
let mut display_names: Vec<String> = Vec::new();
let mut first_names: Vec<Option<String>> = Vec::new();
let mut last_names: Vec<Option<String>> = Vec::new();
let mut emails_lower: Vec<String> = Vec::new();
let mut skipped_no_email = 0u32;
for course in courses {
for faculty in &course.faculty {
if let Some(email) = &faculty.email_address {
let email_lower = email.to_lowercase();
if seen.insert(email_lower.clone()) {
let parts = parse_banner_name(&faculty.display_name);
display_names.push(decode_html_entities(&faculty.display_name));
first_names.push(parts.as_ref().map(|p| p.first.clone()));
last_names.push(parts.as_ref().map(|p| p.last.clone()));
emails_lower.push(email_lower);
}
} else {
skipped_no_email += 1;
}
}
}
if skipped_no_email > 0 {
tracing::warn!(
count = skipped_no_email,
"Skipped instructors with no email address"
);
}
if display_names.is_empty() {
return Ok(HashMap::new());
}
let email_refs: Vec<&str> = emails_lower.iter().map(|s| s.as_str()).collect();
let first_name_refs: Vec<Option<&str>> = first_names.iter().map(|s| s.as_deref()).collect();
let last_name_refs: Vec<Option<&str>> = last_names.iter().map(|s| s.as_deref()).collect();
let rows: Vec<(i32, String)> = sqlx::query_as(
r#"
INSERT INTO instructors (display_name, email, first_name, last_name)
SELECT * FROM UNNEST($1::text[], $2::text[], $3::text[], $4::text[])
ON CONFLICT (email)
DO UPDATE SET
display_name = EXCLUDED.display_name,
first_name = EXCLUDED.first_name,
last_name = EXCLUDED.last_name
RETURNING id, email
"#,
)
.bind(&display_names)
.bind(&email_refs)
.bind(&first_name_refs)
.bind(&last_name_refs)
.fetch_all(&mut *conn)
.await
.map_err(|e| anyhow::anyhow!("Failed to batch upsert instructors: {}", e))?;
Ok(rows.into_iter().map(|(id, email)| (email, id)).collect())
}
/// Link courses to their instructors via the junction table.
async fn upsert_course_instructors(
courses: &[Course],
crn_term_to_id: &HashMap<(&str, &str), i32>,
email_to_id: &HashMap<String, i32>,
conn: &mut PgConnection,
) -> Result<()> {
let mut cids = Vec::new();
let mut instructor_ids: Vec<i32> = Vec::new();
let mut banner_ids: Vec<&str> = Vec::new();
let mut primaries = Vec::new();
for course in courses {
let key = (
course.course_reference_number.as_str(),
course.term.as_str(),
);
let Some(&course_id) = crn_term_to_id.get(&key) else {
tracing::warn!(
crn = %course.course_reference_number,
term = %course.term,
"No course_id found for CRN/term pair during instructor linking"
);
continue;
};
for faculty in &course.faculty {
if let Some(email) = &faculty.email_address {
let email_lower = email.to_lowercase();
if let Some(&instructor_id) = email_to_id.get(&email_lower) {
cids.push(course_id);
instructor_ids.push(instructor_id);
banner_ids.push(faculty.banner_id.as_str());
primaries.push(faculty.primary_indicator);
}
}
}
}
if cids.is_empty() {
return Ok(());
}
// Delete existing links for these courses then re-insert.
// This handles instructor changes cleanly.
sqlx::query("DELETE FROM course_instructors WHERE course_id = ANY($1)")
.bind(&cids)
.execute(&mut *conn)
.await?;
sqlx::query(
r#"
INSERT INTO course_instructors (course_id, instructor_id, banner_id, is_primary)
SELECT * FROM UNNEST($1::int4[], $2::int4[], $3::text[], $4::bool[])
ON CONFLICT (course_id, instructor_id)
DO UPDATE SET
banner_id = EXCLUDED.banner_id,
is_primary = EXCLUDED.is_primary
"#,
)
.bind(&cids)
.bind(&instructor_ids)
.bind(&banner_ids)
.bind(&primaries)
.execute(&mut *conn)
.await
.map_err(|e| anyhow::anyhow!("Failed to batch upsert course_instructors: {}", e))?;
Ok(())
}
+249
View File
@@ -0,0 +1,249 @@
//! Database query functions for courses, used by the web API.
use crate::data::models::{Course, CourseInstructorDetail};
use crate::error::Result;
use sqlx::PgPool;
use std::collections::HashMap;
/// Column to sort search results by.
#[derive(Debug, Clone, Copy, serde::Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SortColumn {
CourseCode,
Title,
Instructor,
Time,
Seats,
}
/// Sort direction.
#[derive(Debug, Clone, Copy, serde::Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SortDirection {
Asc,
Desc,
}
/// Shared WHERE clause for course search filters.
///
/// Parameters $1-$8 match the bind order in `search_courses`.
const SEARCH_WHERE: &str = r#"
WHERE term_code = $1
AND ($2::text[] IS NULL OR subject = ANY($2))
AND ($3::text IS NULL OR title_search @@ plainto_tsquery('simple', $3) OR title ILIKE '%' || $3 || '%')
AND ($4::int IS NULL OR course_number::int >= $4)
AND ($5::int IS NULL OR course_number::int <= $5)
AND ($6::bool = false OR max_enrollment > enrollment)
AND ($7::text IS NULL OR instructional_method = $7)
AND ($8::text IS NULL OR campus = $8)
"#;
/// Build a safe ORDER BY clause from typed sort parameters.
///
/// All column names are hardcoded string literals — no caller input is interpolated.
fn sort_clause(column: Option<SortColumn>, direction: Option<SortDirection>) -> String {
let dir = match direction.unwrap_or(SortDirection::Asc) {
SortDirection::Asc => "ASC",
SortDirection::Desc => "DESC",
};
match column {
Some(SortColumn::CourseCode) => {
format!("subject {dir}, course_number {dir}, sequence_number {dir}")
}
Some(SortColumn::Title) => format!("title {dir}"),
Some(SortColumn::Instructor) => {
format!(
"(SELECT i.display_name FROM course_instructors ci \
JOIN instructors i ON i.id = ci.instructor_id \
WHERE ci.course_id = courses.id AND ci.is_primary = true \
LIMIT 1) {dir} NULLS LAST"
)
}
Some(SortColumn::Time) => {
format!("(meeting_times->0->>'begin_time') {dir} NULLS LAST")
}
Some(SortColumn::Seats) => {
format!("(max_enrollment - enrollment) {dir}")
}
None => "subject ASC, course_number ASC, sequence_number ASC".to_string(),
}
}
/// Search courses by term with optional filters.
///
/// Returns `(courses, total_count)` for pagination. Uses FTS tsvector for word
/// search and falls back to trigram ILIKE for substring matching.
#[allow(clippy::too_many_arguments)]
pub async fn search_courses(
db_pool: &PgPool,
term_code: &str,
subject: Option<&[String]>,
title_query: Option<&str>,
course_number_low: Option<i32>,
course_number_high: Option<i32>,
open_only: bool,
instructional_method: Option<&str>,
campus: Option<&str>,
limit: i32,
offset: i32,
sort_by: Option<SortColumn>,
sort_dir: Option<SortDirection>,
) -> Result<(Vec<Course>, i64)> {
let order_by = sort_clause(sort_by, sort_dir);
let data_query =
format!("SELECT * FROM courses {SEARCH_WHERE} ORDER BY {order_by} LIMIT $9 OFFSET $10");
let count_query = format!("SELECT COUNT(*) FROM courses {SEARCH_WHERE}");
let courses = sqlx::query_as::<_, Course>(&data_query)
.bind(term_code)
.bind(subject)
.bind(title_query)
.bind(course_number_low)
.bind(course_number_high)
.bind(open_only)
.bind(instructional_method)
.bind(campus)
.bind(limit)
.bind(offset)
.fetch_all(db_pool)
.await?;
let total: (i64,) = sqlx::query_as(&count_query)
.bind(term_code)
.bind(subject)
.bind(title_query)
.bind(course_number_low)
.bind(course_number_high)
.bind(open_only)
.bind(instructional_method)
.bind(campus)
.fetch_one(db_pool)
.await?;
Ok((courses, total.0))
}
/// Get a single course by CRN and term.
pub async fn get_course_by_crn(
db_pool: &PgPool,
crn: &str,
term_code: &str,
) -> Result<Option<Course>> {
let course =
sqlx::query_as::<_, Course>("SELECT * FROM courses WHERE crn = $1 AND term_code = $2")
.bind(crn)
.bind(term_code)
.fetch_optional(db_pool)
.await?;
Ok(course)
}
/// Get instructors for a single course by course ID.
pub async fn get_course_instructors(
db_pool: &PgPool,
course_id: i32,
) -> Result<Vec<CourseInstructorDetail>> {
let rows = sqlx::query_as::<_, CourseInstructorDetail>(
r#"
SELECT i.id as instructor_id, ci.banner_id, i.display_name, i.email, ci.is_primary,
rmp.avg_rating, rmp.num_ratings, rmp.rmp_legacy_id,
ci.course_id
FROM course_instructors ci
JOIN instructors i ON i.id = ci.instructor_id
LEFT JOIN LATERAL (
SELECT rp.avg_rating, rp.num_ratings, rp.legacy_id as rmp_legacy_id
FROM instructor_rmp_links irl
JOIN rmp_professors rp ON rp.legacy_id = irl.rmp_legacy_id
WHERE irl.instructor_id = i.id
ORDER BY rp.num_ratings DESC NULLS LAST, rp.legacy_id ASC
LIMIT 1
) rmp ON true
WHERE ci.course_id = $1
ORDER BY ci.is_primary DESC, i.display_name
"#,
)
.bind(course_id)
.fetch_all(db_pool)
.await?;
Ok(rows)
}
/// Batch-fetch instructors for multiple courses in a single query.
///
/// Returns a map of `course_id → Vec<CourseInstructorDetail>`.
pub async fn get_instructors_for_courses(
db_pool: &PgPool,
course_ids: &[i32],
) -> Result<HashMap<i32, Vec<CourseInstructorDetail>>> {
if course_ids.is_empty() {
return Ok(HashMap::new());
}
let rows = sqlx::query_as::<_, CourseInstructorDetail>(
r#"
SELECT i.id as instructor_id, ci.banner_id, i.display_name, i.email, ci.is_primary,
rmp.avg_rating, rmp.num_ratings, rmp.rmp_legacy_id,
ci.course_id
FROM course_instructors ci
JOIN instructors i ON i.id = ci.instructor_id
LEFT JOIN LATERAL (
SELECT rp.avg_rating, rp.num_ratings, rp.legacy_id as rmp_legacy_id
FROM instructor_rmp_links irl
JOIN rmp_professors rp ON rp.legacy_id = irl.rmp_legacy_id
WHERE irl.instructor_id = i.id
ORDER BY rp.num_ratings DESC NULLS LAST, rp.legacy_id ASC
LIMIT 1
) rmp ON true
WHERE ci.course_id = ANY($1)
ORDER BY ci.course_id, ci.is_primary DESC, i.display_name
"#,
)
.bind(course_ids)
.fetch_all(db_pool)
.await?;
let mut map: HashMap<i32, Vec<CourseInstructorDetail>> = HashMap::new();
for row in rows {
// course_id is always present in the batch query
let cid = row.course_id.unwrap_or_default();
map.entry(cid).or_default().push(row);
}
Ok(map)
}
/// Get subjects for a term, sorted by total enrollment (descending).
///
/// Returns only subjects that have courses in the given term, with their
/// descriptions from reference_data and enrollment totals for ranking.
pub async fn get_subjects_by_enrollment(
db_pool: &PgPool,
term_code: &str,
) -> Result<Vec<(String, String, i64)>> {
let rows: Vec<(String, String, i64)> = sqlx::query_as(
r#"
SELECT c.subject,
COALESCE(rd.description, c.subject),
COALESCE(SUM(c.enrollment), 0) as total_enrollment
FROM courses c
LEFT JOIN reference_data rd ON rd.category = 'subject' AND rd.code = c.subject
WHERE c.term_code = $1
GROUP BY c.subject, rd.description
ORDER BY total_enrollment DESC
"#,
)
.bind(term_code)
.fetch_all(db_pool)
.await?;
Ok(rows)
}
/// Get all distinct term codes that have courses in the DB.
pub async fn get_available_terms(db_pool: &PgPool) -> Result<Vec<String>> {
let rows: Vec<(String,)> =
sqlx::query_as("SELECT DISTINCT term_code FROM courses ORDER BY term_code DESC")
.fetch_all(db_pool)
.await?;
Ok(rows.into_iter().map(|(tc,)| tc).collect())
}
+12
View File
@@ -0,0 +1,12 @@
//! Database models and schema.
pub mod batch;
pub mod courses;
pub mod models;
pub mod names;
pub mod reference;
pub mod rmp;
pub mod rmp_matching;
pub mod scrape_jobs;
pub mod sessions;
pub mod users;
+273
View File
@@ -0,0 +1,273 @@
//! `sqlx` models for the database schema.
use chrono::{DateTime, Utc};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use serde_json::Value;
use ts_rs::TS;
/// Serialize an `i64` as a string to avoid JavaScript precision loss for values exceeding 2^53.
fn serialize_i64_as_string<S: Serializer>(value: &i64, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_str(&value.to_string())
}
/// Deserialize an `i64` from either a number or a string.
fn deserialize_i64_from_string<'de, D: Deserializer<'de>>(
deserializer: D,
) -> Result<i64, D::Error> {
use serde::de;
struct I64OrStringVisitor;
impl<'de> de::Visitor<'de> for I64OrStringVisitor {
type Value = i64;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("an integer or a string containing an integer")
}
fn visit_i64<E: de::Error>(self, value: i64) -> Result<i64, E> {
Ok(value)
}
fn visit_u64<E: de::Error>(self, value: u64) -> Result<i64, E> {
i64::try_from(value).map_err(|_| E::custom(format!("u64 {value} out of i64 range")))
}
fn visit_str<E: de::Error>(self, value: &str) -> Result<i64, E> {
value.parse().map_err(de::Error::custom)
}
}
deserializer.deserialize_any(I64OrStringVisitor)
}
/// Represents a meeting time stored as JSONB in the courses table.
#[derive(Debug, Clone, Serialize, Deserialize, TS)]
#[ts(export)]
pub struct DbMeetingTime {
pub begin_time: Option<String>,
pub end_time: Option<String>,
pub start_date: String,
pub end_date: String,
pub monday: bool,
pub tuesday: bool,
pub wednesday: bool,
pub thursday: bool,
pub friday: bool,
pub saturday: bool,
pub sunday: bool,
pub building: Option<String>,
pub building_description: Option<String>,
pub room: Option<String>,
pub campus: Option<String>,
pub meeting_type: String,
pub meeting_schedule_type: String,
}
#[allow(dead_code)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct Course {
pub id: i32,
pub crn: String,
pub subject: String,
pub course_number: String,
pub title: String,
pub term_code: String,
pub enrollment: i32,
pub max_enrollment: i32,
pub wait_count: i32,
pub wait_capacity: i32,
pub last_scraped_at: DateTime<Utc>,
// New scalar fields
pub sequence_number: Option<String>,
pub part_of_term: Option<String>,
pub instructional_method: Option<String>,
pub campus: Option<String>,
pub credit_hours: Option<i32>,
pub credit_hour_low: Option<i32>,
pub credit_hour_high: Option<i32>,
pub cross_list: Option<String>,
pub cross_list_capacity: Option<i32>,
pub cross_list_count: Option<i32>,
pub link_identifier: Option<String>,
pub is_section_linked: Option<bool>,
// JSONB fields
pub meeting_times: Value,
pub attributes: Value,
}
#[allow(dead_code)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct Instructor {
pub id: i32,
pub display_name: String,
pub email: String,
pub rmp_match_status: String,
pub first_name: Option<String>,
pub last_name: Option<String>,
}
#[allow(dead_code)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct CourseInstructor {
pub course_id: i32,
pub instructor_id: i32,
pub banner_id: String,
pub is_primary: bool,
}
/// Joined instructor data for a course (from course_instructors + instructors + rmp_professors).
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct CourseInstructorDetail {
pub instructor_id: i32,
pub banner_id: String,
pub display_name: String,
pub email: String,
pub is_primary: bool,
pub avg_rating: Option<f32>,
pub num_ratings: Option<i32>,
pub rmp_legacy_id: Option<i32>,
/// Present when fetched via batch query; `None` for single-course queries.
pub course_id: Option<i32>,
}
#[allow(dead_code)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct ReferenceData {
pub category: String,
pub code: String,
pub description: String,
}
#[allow(dead_code)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct CourseMetric {
pub id: i32,
pub course_id: i32,
pub timestamp: DateTime<Utc>,
pub enrollment: i32,
pub wait_count: i32,
pub seats_available: i32,
}
#[allow(dead_code)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct CourseAudit {
pub id: i32,
pub course_id: i32,
pub timestamp: DateTime<Utc>,
pub field_changed: String,
pub old_value: String,
pub new_value: String,
}
/// Aggregate counts returned by batch upsert, used for scrape job result logging.
#[derive(Debug, Clone, Default)]
pub struct UpsertCounts {
pub courses_fetched: i32,
pub courses_changed: i32,
pub courses_unchanged: i32,
pub audits_generated: i32,
pub metrics_generated: i32,
}
/// The priority level of a scrape job.
#[derive(sqlx::Type, Copy, Debug, Clone)]
#[sqlx(type_name = "scrape_priority", rename_all = "PascalCase")]
pub enum ScrapePriority {
Low,
Medium,
High,
Critical,
}
/// The type of target for a scrape job, determining how the payload is interpreted.
#[derive(sqlx::Type, Copy, Debug, Clone)]
#[sqlx(type_name = "target_type", rename_all = "PascalCase")]
pub enum TargetType {
Subject,
CourseRange,
CrnList,
SingleCrn,
}
/// Computed status for a scrape job, derived from existing fields.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
#[serde(rename_all = "camelCase")]
pub enum ScrapeJobStatus {
Processing,
StaleLock,
Exhausted,
Scheduled,
Pending,
}
/// How long a lock can be held before it is considered stale (mirrors `scrape_jobs::LOCK_EXPIRY`).
const LOCK_EXPIRY_SECS: i64 = 10 * 60;
/// Represents a queryable job from the database.
#[allow(dead_code)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct ScrapeJob {
pub id: i32,
pub target_type: TargetType,
pub target_payload: Value,
pub priority: ScrapePriority,
pub execute_at: DateTime<Utc>,
pub created_at: DateTime<Utc>,
pub locked_at: Option<DateTime<Utc>>,
/// Number of retry attempts for this job (non-negative, enforced by CHECK constraint)
pub retry_count: i32,
/// Maximum number of retry attempts allowed (non-negative, enforced by CHECK constraint)
pub max_retries: i32,
/// When the job last entered the "ready to pick up" state.
/// Set to NOW() on creation; updated to NOW() on retry.
pub queued_at: DateTime<Utc>,
}
impl ScrapeJob {
/// Compute the current status of this job from its fields.
pub fn status(&self) -> ScrapeJobStatus {
let now = Utc::now();
match self.locked_at {
Some(locked) if (now - locked).num_seconds() < LOCK_EXPIRY_SECS => {
ScrapeJobStatus::Processing
}
Some(_) => ScrapeJobStatus::StaleLock,
None if self.retry_count >= self.max_retries && self.max_retries > 0 => {
ScrapeJobStatus::Exhausted
}
None if self.execute_at > now => ScrapeJobStatus::Scheduled,
None => ScrapeJobStatus::Pending,
}
}
}
/// A user authenticated via Discord OAuth.
#[derive(sqlx::FromRow, Debug, Clone, Serialize, Deserialize, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export)]
pub struct User {
#[serde(
serialize_with = "serialize_i64_as_string",
deserialize_with = "deserialize_i64_from_string"
)]
#[ts(type = "string")]
pub discord_id: i64,
pub discord_username: String,
pub discord_avatar_hash: Option<String>,
pub is_admin: bool,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
/// A server-side session for an authenticated user.
#[allow(dead_code)] // Fields read via sqlx::FromRow; some only used in DB queries
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct UserSession {
pub id: String,
pub user_id: i64,
pub created_at: DateTime<Utc>,
pub expires_at: DateTime<Utc>,
pub last_active_at: DateTime<Utc>,
}
+728
View File
@@ -0,0 +1,728 @@
//! Name parsing, normalization, and matching utilities.
//!
//! Handles the mismatch between Banner's single `display_name` ("Last, First Middle")
//! and RMP's separate `first_name`/`last_name` fields, plus data quality issues
//! from both sources (HTML entities, accents, nicknames, suffixes, junk).
use sqlx::PgPool;
use tracing::{info, warn};
use unicode_normalization::UnicodeNormalization;
/// Known name suffixes to extract from the last-name portion.
const SUFFIXES: &[&str] = &["iv", "iii", "ii", "jr", "sr"];
/// Parsed, cleaned name components.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct NameParts {
/// Cleaned display-quality first name(s): "H. Paul", "María"
pub first: String,
/// Cleaned display-quality last name: "O'Brien", "LeBlanc"
pub last: String,
/// Middle name/initial if detected: "Manuel", "L."
pub middle: Option<String>,
/// Suffix if detected: "III", "Jr"
pub suffix: Option<String>,
/// Nicknames extracted from parentheses: ["Ken"], ["Qian"]
pub nicknames: Vec<String>,
}
/// Decode common HTML entities found in Banner data.
///
/// Handles both named entities (`&amp;`, `&uuml;`) and numeric references
/// (`&#39;`, `&#x27;`).
pub(crate) fn decode_html_entities(s: &str) -> String {
if !s.contains('&') {
return s.to_string();
}
htmlize::unescape(s).to_string()
}
/// Extract parenthesized nicknames from a name string.
///
/// `"William (Ken)"` → `("William", vec!["Ken"])`
/// `"Guenevere (Qian)"` → `("Guenevere", vec!["Qian"])`
/// `"John (jack) C."` → `("John C.", vec!["jack"])`
fn extract_nicknames(s: &str) -> (String, Vec<String>) {
let mut nicknames = Vec::new();
let mut cleaned = String::with_capacity(s.len());
let mut chars = s.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '(' {
let mut nick = String::new();
for inner in chars.by_ref() {
if inner == ')' {
break;
}
nick.push(inner);
}
let nick = nick.trim().to_string();
if !nick.is_empty() {
nicknames.push(nick);
}
} else if ch == '"' || ch == '\u{201C}' || ch == '\u{201D}' {
// Extract quoted nicknames: Thomas "Butch" → nickname "Butch"
let mut nick = String::new();
for inner in chars.by_ref() {
if inner == '"' || inner == '\u{201C}' || inner == '\u{201D}' {
break;
}
nick.push(inner);
}
let nick = nick.trim().to_string();
if !nick.is_empty() {
nicknames.push(nick);
}
} else {
cleaned.push(ch);
}
}
// Collapse multiple spaces left by extraction
let cleaned = collapse_whitespace(&cleaned);
(cleaned, nicknames)
}
/// Extract a suffix (Jr, Sr, II, III, IV) from the last-name portion.
///
/// `"LeBlanc III"` → `("LeBlanc", Some("III"))`
/// `"Smith Jr."` → `("Smith", Some("Jr."))`
fn extract_suffix(last: &str) -> (String, Option<String>) {
// Try to match the last token as a suffix
let tokens: Vec<&str> = last.split_whitespace().collect();
if tokens.len() < 2 {
return (last.to_string(), None);
}
let candidate = tokens.last().unwrap();
let candidate_normalized = candidate.to_lowercase().trim_end_matches('.').to_string();
if SUFFIXES.contains(&candidate_normalized.as_str()) {
let name_part = tokens[..tokens.len() - 1].join(" ");
return (name_part, Some(candidate.to_string()));
}
(last.to_string(), None)
}
/// Strip junk commonly found in RMP name fields.
///
/// - Trailing commas: `"Cronenberger,"` → `"Cronenberger"`
/// - Email addresses: `"Neel.Baumgardner@utsa.edu"` → `""` (returns empty)
fn strip_junk(s: &str) -> String {
let s = s.trim();
// If the string looks like an email, return empty
if s.contains('@') && s.contains('.') && !s.contains(' ') {
return String::new();
}
// Strip trailing commas
s.trim_end_matches(',').trim().to_string()
}
/// Collapse runs of whitespace into single spaces and trim.
fn collapse_whitespace(s: &str) -> String {
s.split_whitespace().collect::<Vec<_>>().join(" ")
}
/// Parse a Banner `display_name` ("Last, First Middle") into structured parts.
///
/// Handles HTML entities, suffixes, and multi-token names.
///
/// # Examples
///
/// ```
/// use banner::data::names::parse_banner_name;
///
/// let parts = parse_banner_name("O&#39;Brien, Erin").unwrap();
/// assert_eq!(parts.first, "Erin");
/// assert_eq!(parts.last, "O'Brien");
/// ```
pub fn parse_banner_name(display_name: &str) -> Option<NameParts> {
// 1. Decode HTML entities
let decoded = decode_html_entities(display_name);
// 2. Split on first comma
let (last_part, first_part) = decoded.split_once(',')?;
let last_part = last_part.trim();
let first_part = first_part.trim();
if last_part.is_empty() || first_part.is_empty() {
return None;
}
// 3. Extract suffix from last name
let (last_clean, suffix) = extract_suffix(last_part);
// 4. Parse first-name portion: first token(s) + optional middle
// Banner format is "First Middle", so we keep all tokens as first_name
// to support "H. Paul" style names
let first_clean = collapse_whitespace(first_part);
Some(NameParts {
first: first_clean,
last: last_clean,
middle: None, // Banner doesn't clearly delineate middle vs first
suffix,
nicknames: Vec::new(), // Banner doesn't include nicknames
})
}
/// Parse RMP professor name fields into structured parts.
///
/// Handles junk data, nicknames in parentheses/quotes, and suffixes.
///
/// # Examples
///
/// ```
/// use banner::data::names::parse_rmp_name;
///
/// let parts = parse_rmp_name("William (Ken)", "Burchenal").unwrap();
/// assert_eq!(parts.first, "William");
/// assert_eq!(parts.nicknames, vec!["Ken"]);
/// ```
pub fn parse_rmp_name(first_name: &str, last_name: &str) -> Option<NameParts> {
let first_cleaned = strip_junk(first_name);
let last_cleaned = strip_junk(last_name);
if first_cleaned.is_empty() || last_cleaned.is_empty() {
return None;
}
// Extract nicknames from parens/quotes in first name
let (first_no_nicks, nicknames) = extract_nicknames(&first_cleaned);
let first_final = collapse_whitespace(&first_no_nicks);
// Extract suffix from last name
let (last_final, suffix) = extract_suffix(&last_cleaned);
if first_final.is_empty() || last_final.is_empty() {
return None;
}
Some(NameParts {
first: first_final,
last: last_final,
middle: None,
suffix,
nicknames,
})
}
/// Normalize a name string for matching comparison.
///
/// Pipeline: lowercase → NFD decompose → strip combining marks →
/// strip punctuation/hyphens → collapse whitespace → trim.
///
/// # Examples
///
/// ```
/// use banner::data::names::normalize_for_matching;
///
/// assert_eq!(normalize_for_matching("García"), "garcia");
/// assert_eq!(normalize_for_matching("O'Brien"), "obrien");
/// assert_eq!(normalize_for_matching("Aguirre-Mesa"), "aguirremesa");
/// ```
/// Normalize a name string for matching index keys.
///
/// Pipeline: lowercase → NFD decompose → strip combining marks →
/// strip ALL punctuation, hyphens, and whitespace.
///
/// This produces a compact, space-free string so that "Aguirre Mesa" (Banner)
/// and "Aguirre-Mesa" (RMP) both become "aguirremesa".
///
/// # Examples
///
/// ```
/// use banner::data::names::normalize_for_matching;
///
/// assert_eq!(normalize_for_matching("García"), "garcia");
/// assert_eq!(normalize_for_matching("O'Brien"), "obrien");
/// assert_eq!(normalize_for_matching("Aguirre-Mesa"), "aguirremesa");
/// assert_eq!(normalize_for_matching("Aguirre Mesa"), "aguirremesa");
/// ```
pub fn normalize_for_matching(s: &str) -> String {
s.to_lowercase()
.nfd()
.filter(|c| {
// Keep only non-combining alphabetic characters — strip everything else
c.is_alphabetic() && !unicode_normalization::char::is_combining_mark(*c)
})
.collect()
}
/// Generate all matching index keys for a parsed name.
///
/// For a name like "H. Paul" / "LeBlanc" with no nicknames, generates:
/// - `("leblanc", "h paul")` — full normalized first
/// - `("leblanc", "paul")` — individual token (if multi-token)
/// - `("leblanc", "h")` — individual token (if multi-token)
///
/// For a name like "William" / "Burchenal" with nickname "Ken":
/// - `("burchenal", "william")` — primary
/// - `("burchenal", "ken")` — nickname variant
pub fn matching_keys(parts: &NameParts) -> Vec<(String, String)> {
let norm_last = normalize_for_matching(&parts.last);
if norm_last.is_empty() {
return Vec::new();
}
let mut keys = Vec::new();
let mut seen = std::collections::HashSet::new();
// Primary key: full first name (all spaces stripped)
let norm_first_full = normalize_for_matching(&parts.first);
if !norm_first_full.is_empty() && seen.insert(norm_first_full.clone()) {
keys.push((norm_last.clone(), norm_first_full));
}
// Individual tokens from the display-form first name
// (split before full normalization so we can generate per-token keys)
let first_tokens: Vec<&str> = parts.first.split_whitespace().collect();
if first_tokens.len() > 1 {
for token in &first_tokens {
let norm_token = normalize_for_matching(token);
if !norm_token.is_empty() && seen.insert(norm_token.clone()) {
keys.push((norm_last.clone(), norm_token));
}
}
}
// Nickname variants
for nick in &parts.nicknames {
let norm_nick = normalize_for_matching(nick);
if !norm_nick.is_empty() && seen.insert(norm_nick.clone()) {
keys.push((norm_last.clone(), norm_nick));
}
}
keys
}
/// Backfill `first_name`/`last_name` columns for all instructors that have
/// a `display_name` but NULL structured name fields.
///
/// Parses each `display_name` using [`parse_banner_name`] and updates the row.
/// Logs warnings for any names that fail to parse.
pub async fn backfill_instructor_names(db_pool: &PgPool) -> crate::error::Result<()> {
let rows: Vec<(i32, String)> = sqlx::query_as(
"SELECT id, display_name FROM instructors WHERE first_name IS NULL OR last_name IS NULL",
)
.fetch_all(db_pool)
.await?;
if rows.is_empty() {
return Ok(());
}
let total = rows.len();
let mut ids: Vec<i32> = Vec::with_capacity(total);
let mut firsts: Vec<String> = Vec::with_capacity(total);
let mut lasts: Vec<String> = Vec::with_capacity(total);
let mut unparseable = 0usize;
for (id, display_name) in &rows {
match parse_banner_name(display_name) {
Some(parts) => {
ids.push(*id);
firsts.push(parts.first);
lasts.push(parts.last);
}
None => {
warn!(
id,
display_name, "Failed to parse instructor display_name during backfill"
);
unparseable += 1;
}
}
}
if !ids.is_empty() {
let first_refs: Vec<&str> = firsts.iter().map(|s| s.as_str()).collect();
let last_refs: Vec<&str> = lasts.iter().map(|s| s.as_str()).collect();
sqlx::query(
r#"
UPDATE instructors i
SET first_name = v.first_name, last_name = v.last_name
FROM UNNEST($1::int4[], $2::text[], $3::text[])
AS v(id, first_name, last_name)
WHERE i.id = v.id
"#,
)
.bind(&ids)
.bind(&first_refs)
.bind(&last_refs)
.execute(db_pool)
.await?;
}
info!(
total,
updated = ids.len(),
unparseable,
"Instructor name backfill complete"
);
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
// -----------------------------------------------------------------------
// HTML entity decoding
// -----------------------------------------------------------------------
#[test]
fn decode_apostrophe_entity() {
assert_eq!(decode_html_entities("O&#39;Brien"), "O'Brien");
}
#[test]
fn decode_umlaut_entity() {
assert_eq!(decode_html_entities("B&uuml;lent"), "Bülent");
}
#[test]
fn decode_no_entities() {
assert_eq!(decode_html_entities("Smith"), "Smith");
}
// -----------------------------------------------------------------------
// Nickname extraction
// -----------------------------------------------------------------------
#[test]
fn extract_paren_nickname() {
let (cleaned, nicks) = extract_nicknames("William (Ken)");
assert_eq!(cleaned, "William");
assert_eq!(nicks, vec!["Ken"]);
}
#[test]
fn extract_quoted_nickname() {
let (cleaned, nicks) = extract_nicknames("Thomas \"Butch\"");
assert_eq!(cleaned, "Thomas");
assert_eq!(nicks, vec!["Butch"]);
}
#[test]
fn extract_paren_with_extra_text() {
let (cleaned, nicks) = extract_nicknames("John (jack) C.");
assert_eq!(cleaned, "John C.");
assert_eq!(nicks, vec!["jack"]);
}
#[test]
fn extract_no_nicknames() {
let (cleaned, nicks) = extract_nicknames("Maria Elena");
assert_eq!(cleaned, "Maria Elena");
assert!(nicks.is_empty());
}
// -----------------------------------------------------------------------
// Suffix extraction
// -----------------------------------------------------------------------
#[test]
fn extract_suffix_iii() {
let (name, suffix) = extract_suffix("LeBlanc III");
assert_eq!(name, "LeBlanc");
assert_eq!(suffix, Some("III".to_string()));
}
#[test]
fn extract_suffix_jr_period() {
let (name, suffix) = extract_suffix("Smith Jr.");
assert_eq!(name, "Smith");
assert_eq!(suffix, Some("Jr.".to_string()));
}
#[test]
fn extract_no_suffix() {
let (name, suffix) = extract_suffix("García");
assert_eq!(name, "García");
assert_eq!(suffix, None);
}
// -----------------------------------------------------------------------
// Junk stripping
// -----------------------------------------------------------------------
#[test]
fn strip_trailing_comma() {
assert_eq!(strip_junk("Cronenberger,"), "Cronenberger");
}
#[test]
fn strip_email_address() {
assert_eq!(strip_junk("Neel.Baumgardner@utsa.edu"), "");
}
#[test]
fn strip_clean_name() {
assert_eq!(strip_junk(" Maria "), "Maria");
}
// -----------------------------------------------------------------------
// normalize_for_matching
// -----------------------------------------------------------------------
#[test]
fn normalize_strips_accents() {
assert_eq!(normalize_for_matching("García"), "garcia");
}
#[test]
fn normalize_strips_apostrophe() {
assert_eq!(normalize_for_matching("O'Brien"), "obrien");
}
#[test]
fn normalize_strips_hyphen() {
assert_eq!(normalize_for_matching("Aguirre-Mesa"), "aguirremesa");
}
#[test]
fn normalize_tilde_n() {
assert_eq!(normalize_for_matching("Muñoz"), "munoz");
}
#[test]
fn normalize_umlaut() {
assert_eq!(normalize_for_matching("Müller"), "muller");
}
#[test]
fn normalize_period() {
assert_eq!(normalize_for_matching("H. Paul"), "hpaul");
}
#[test]
fn normalize_strips_spaces() {
assert_eq!(normalize_for_matching("Mary Lou"), "marylou");
}
// -----------------------------------------------------------------------
// parse_banner_name
// -----------------------------------------------------------------------
#[test]
fn banner_standard_name() {
let p = parse_banner_name("Smith, John").unwrap();
assert_eq!(p.first, "John");
assert_eq!(p.last, "Smith");
assert_eq!(p.suffix, None);
}
#[test]
fn banner_html_entity_apostrophe() {
let p = parse_banner_name("O&#39;Brien, Erin").unwrap();
assert_eq!(p.first, "Erin");
assert_eq!(p.last, "O'Brien");
}
#[test]
fn banner_html_entity_umlaut() {
let p = parse_banner_name("Temel, B&uuml;lent").unwrap();
assert_eq!(p.first, "Bülent");
assert_eq!(p.last, "Temel");
}
#[test]
fn banner_suffix_iii() {
let p = parse_banner_name("LeBlanc III, H. Paul").unwrap();
assert_eq!(p.first, "H. Paul");
assert_eq!(p.last, "LeBlanc");
assert_eq!(p.suffix, Some("III".to_string()));
}
#[test]
fn banner_suffix_ii() {
let p = parse_banner_name("Ellis II, Ronald").unwrap();
assert_eq!(p.first, "Ronald");
assert_eq!(p.last, "Ellis");
assert_eq!(p.suffix, Some("II".to_string()));
}
#[test]
fn banner_multi_word_last() {
let p = parse_banner_name("Aguirre Mesa, Andres").unwrap();
assert_eq!(p.first, "Andres");
assert_eq!(p.last, "Aguirre Mesa");
}
#[test]
fn banner_hyphenated_last() {
let p = parse_banner_name("Abu-Lail, Nehal").unwrap();
assert_eq!(p.first, "Nehal");
assert_eq!(p.last, "Abu-Lail");
}
#[test]
fn banner_with_middle_name() {
let p = parse_banner_name("Smith, John David").unwrap();
assert_eq!(p.first, "John David");
assert_eq!(p.last, "Smith");
}
#[test]
fn banner_no_comma() {
assert!(parse_banner_name("SingleName").is_none());
}
#[test]
fn banner_empty_first() {
assert!(parse_banner_name("Smith,").is_none());
}
#[test]
fn banner_empty_last() {
assert!(parse_banner_name(", John").is_none());
}
// -----------------------------------------------------------------------
// parse_rmp_name
// -----------------------------------------------------------------------
#[test]
fn rmp_standard_name() {
let p = parse_rmp_name("John", "Smith").unwrap();
assert_eq!(p.first, "John");
assert_eq!(p.last, "Smith");
}
#[test]
fn rmp_with_nickname() {
let p = parse_rmp_name("William (Ken)", "Burchenal").unwrap();
assert_eq!(p.first, "William");
assert_eq!(p.nicknames, vec!["Ken"]);
}
#[test]
fn rmp_trailing_comma_last() {
let p = parse_rmp_name("J.", "Cronenberger,").unwrap();
assert_eq!(p.last, "Cronenberger");
}
#[test]
fn rmp_email_in_first() {
assert!(parse_rmp_name("Neel.Baumgardner@utsa.edu", "Baumgardner").is_none());
}
#[test]
fn rmp_suffix_in_last() {
let p = parse_rmp_name("H. Paul", "LeBlanc III").unwrap();
assert_eq!(p.first, "H. Paul");
assert_eq!(p.last, "LeBlanc");
assert_eq!(p.suffix, Some("III".to_string()));
}
#[test]
fn rmp_quoted_nickname() {
let p = parse_rmp_name("Thomas \"Butch\"", "Matjeka").unwrap();
assert_eq!(p.first, "Thomas");
assert_eq!(p.nicknames, vec!["Butch"]);
}
#[test]
fn rmp_accented_last() {
let p = parse_rmp_name("Liliana", "Saldaña").unwrap();
assert_eq!(p.last, "Saldaña");
}
// -----------------------------------------------------------------------
// matching_keys
// -----------------------------------------------------------------------
#[test]
fn keys_simple_name() {
let parts = NameParts {
first: "John".into(),
last: "Smith".into(),
middle: None,
suffix: None,
nicknames: vec![],
};
let keys = matching_keys(&parts);
assert_eq!(keys, vec![("smith".into(), "john".into())]);
}
#[test]
fn keys_multi_token_first() {
let parts = NameParts {
first: "H. Paul".into(),
last: "LeBlanc".into(),
middle: None,
suffix: Some("III".into()),
nicknames: vec![],
};
let keys = matching_keys(&parts);
assert!(keys.contains(&("leblanc".into(), "hpaul".into())));
assert!(keys.contains(&("leblanc".into(), "paul".into())));
assert!(keys.contains(&("leblanc".into(), "h".into())));
assert_eq!(keys.len(), 3);
}
#[test]
fn keys_with_nickname() {
let parts = NameParts {
first: "William".into(),
last: "Burchenal".into(),
middle: None,
suffix: None,
nicknames: vec!["Ken".into()],
};
let keys = matching_keys(&parts);
assert!(keys.contains(&("burchenal".into(), "william".into())));
assert!(keys.contains(&("burchenal".into(), "ken".into())));
assert_eq!(keys.len(), 2);
}
#[test]
fn keys_hyphenated_last() {
let parts = parse_banner_name("Aguirre-Mesa, Andres").unwrap();
let keys = matching_keys(&parts);
// Hyphen removed: "aguirremesa"
assert!(keys.contains(&("aguirremesa".into(), "andres".into())));
}
#[test]
fn keys_accented_name() {
let parts = parse_rmp_name("Liliana", "Saldaña").unwrap();
let keys = matching_keys(&parts);
assert!(keys.contains(&("saldana".into(), "liliana".into())));
}
#[test]
fn keys_cross_source_match() {
// Banner: "Aguirre Mesa, Andres" → last="Aguirre Mesa"
let banner = parse_banner_name("Aguirre Mesa, Andres").unwrap();
let banner_keys = matching_keys(&banner);
// RMP: "Andres" / "Aguirre-Mesa" → last="Aguirre-Mesa"
let rmp = parse_rmp_name("Andres", "Aguirre-Mesa").unwrap();
let rmp_keys = matching_keys(&rmp);
// Both should normalize to ("aguirremesa", "andres")
assert!(banner_keys.iter().any(|k| rmp_keys.contains(k)));
}
#[test]
fn keys_accent_cross_match() {
// Banner: "García, José" (if Banner ever has accents)
let banner = parse_banner_name("Garcia, Jose").unwrap();
let banner_keys = matching_keys(&banner);
// RMP: "José" / "García"
let rmp = parse_rmp_name("José", "García").unwrap();
let rmp_keys = matching_keys(&rmp);
// Both normalize to ("garcia", "jose")
assert!(banner_keys.iter().any(|k| rmp_keys.contains(k)));
}
}
+57
View File
@@ -0,0 +1,57 @@
//! Database operations for the `reference_data` table (code→description lookups).
use crate::data::models::ReferenceData;
use crate::error::Result;
use html_escape::decode_html_entities;
use sqlx::PgPool;
/// Batch upsert reference data entries.
pub async fn batch_upsert(entries: &[ReferenceData], db_pool: &PgPool) -> Result<()> {
if entries.is_empty() {
return Ok(());
}
let categories: Vec<&str> = entries.iter().map(|e| e.category.as_str()).collect();
let codes: Vec<&str> = entries.iter().map(|e| e.code.as_str()).collect();
let descriptions: Vec<String> = entries
.iter()
.map(|e| decode_html_entities(&e.description).into_owned())
.collect();
sqlx::query(
r#"
INSERT INTO reference_data (category, code, description)
SELECT * FROM UNNEST($1::text[], $2::text[], $3::text[])
ON CONFLICT (category, code)
DO UPDATE SET description = EXCLUDED.description
"#,
)
.bind(&categories)
.bind(&codes)
.bind(&descriptions)
.execute(db_pool)
.await?;
Ok(())
}
/// Get all reference data entries for a category.
pub async fn get_by_category(category: &str, db_pool: &PgPool) -> Result<Vec<ReferenceData>> {
let rows = sqlx::query_as::<_, ReferenceData>(
"SELECT category, code, description FROM reference_data WHERE category = $1 ORDER BY description",
)
.bind(category)
.fetch_all(db_pool)
.await?;
Ok(rows)
}
/// Get all reference data entries (for cache initialization).
pub async fn get_all(db_pool: &PgPool) -> Result<Vec<ReferenceData>> {
let rows = sqlx::query_as::<_, ReferenceData>(
"SELECT category, code, description FROM reference_data ORDER BY category, description",
)
.fetch_all(db_pool)
.await?;
Ok(rows)
}
+192
View File
@@ -0,0 +1,192 @@
//! Database operations for RateMyProfessors data.
use crate::error::Result;
use crate::rmp::RmpProfessor;
use sqlx::PgPool;
use std::collections::HashSet;
/// Bulk upsert RMP professors using the UNNEST pattern.
///
/// Deduplicates by `legacy_id` before inserting — the RMP API can return
/// the same professor on multiple pages.
pub async fn batch_upsert_rmp_professors(
professors: &[RmpProfessor],
db_pool: &PgPool,
) -> Result<()> {
if professors.is_empty() {
return Ok(());
}
// Deduplicate: keep last occurrence per legacy_id (latest page wins)
let mut seen = HashSet::new();
let deduped: Vec<&RmpProfessor> = professors
.iter()
.rev()
.filter(|p| seen.insert(p.legacy_id))
.collect();
let legacy_ids: Vec<i32> = deduped.iter().map(|p| p.legacy_id).collect();
let graphql_ids: Vec<&str> = deduped.iter().map(|p| p.graphql_id.as_str()).collect();
let first_names: Vec<String> = deduped
.iter()
.map(|p| p.first_name.trim().to_string())
.collect();
let first_name_refs: Vec<&str> = first_names.iter().map(|s| s.as_str()).collect();
let last_names: Vec<String> = deduped
.iter()
.map(|p| p.last_name.trim().to_string())
.collect();
let last_name_refs: Vec<&str> = last_names.iter().map(|s| s.as_str()).collect();
let departments: Vec<Option<&str>> = deduped.iter().map(|p| p.department.as_deref()).collect();
let avg_ratings: Vec<Option<f32>> = deduped.iter().map(|p| p.avg_rating).collect();
let avg_difficulties: Vec<Option<f32>> = deduped.iter().map(|p| p.avg_difficulty).collect();
let num_ratings: Vec<i32> = deduped.iter().map(|p| p.num_ratings).collect();
let would_take_again_pcts: Vec<Option<f32>> =
deduped.iter().map(|p| p.would_take_again_pct).collect();
sqlx::query(
r#"
INSERT INTO rmp_professors (
legacy_id, graphql_id, first_name, last_name, department,
avg_rating, avg_difficulty, num_ratings, would_take_again_pct,
last_synced_at
)
SELECT
v.legacy_id, v.graphql_id, v.first_name, v.last_name, v.department,
v.avg_rating, v.avg_difficulty, v.num_ratings, v.would_take_again_pct,
NOW()
FROM UNNEST(
$1::int4[], $2::text[], $3::text[], $4::text[], $5::text[],
$6::real[], $7::real[], $8::int4[], $9::real[]
) AS v(
legacy_id, graphql_id, first_name, last_name, department,
avg_rating, avg_difficulty, num_ratings, would_take_again_pct
)
ON CONFLICT (legacy_id)
DO UPDATE SET
graphql_id = EXCLUDED.graphql_id,
first_name = EXCLUDED.first_name,
last_name = EXCLUDED.last_name,
department = EXCLUDED.department,
avg_rating = EXCLUDED.avg_rating,
avg_difficulty = EXCLUDED.avg_difficulty,
num_ratings = EXCLUDED.num_ratings,
would_take_again_pct = EXCLUDED.would_take_again_pct,
last_synced_at = EXCLUDED.last_synced_at
"#,
)
.bind(&legacy_ids)
.bind(&graphql_ids)
.bind(&first_name_refs)
.bind(&last_name_refs)
.bind(&departments)
.bind(&avg_ratings)
.bind(&avg_difficulties)
.bind(&num_ratings)
.bind(&would_take_again_pcts)
.execute(db_pool)
.await
.map_err(|e| anyhow::anyhow!("Failed to batch upsert RMP professors: {}", e))?;
Ok(())
}
/// Retrieve RMP rating data for an instructor by instructor id.
///
/// Returns `(avg_rating, num_ratings)` for the best linked RMP profile
/// (most ratings). Returns `None` if no link exists.
#[allow(dead_code)]
pub async fn get_instructor_rmp_data(
db_pool: &PgPool,
instructor_id: i32,
) -> Result<Option<(f32, i32)>> {
let row: Option<(f32, i32)> = sqlx::query_as(
r#"
SELECT rp.avg_rating, rp.num_ratings
FROM instructor_rmp_links irl
JOIN rmp_professors rp ON rp.legacy_id = irl.rmp_legacy_id
WHERE irl.instructor_id = $1
AND rp.avg_rating IS NOT NULL
ORDER BY rp.num_ratings DESC NULLS LAST
LIMIT 1
"#,
)
.bind(instructor_id)
.fetch_optional(db_pool)
.await?;
Ok(row)
}
/// Unmatch an instructor from an RMP profile.
///
/// Removes the link from `instructor_rmp_links` and updates the instructor's
/// `rmp_match_status` to 'unmatched' if no links remain.
///
/// If `rmp_legacy_id` is `Some`, removes only that specific link.
/// If `None`, removes all links for the instructor.
pub async fn unmatch_instructor(
db_pool: &PgPool,
instructor_id: i32,
rmp_legacy_id: Option<i32>,
) -> Result<()> {
let mut tx = db_pool.begin().await?;
// Delete specific link or all links
if let Some(legacy_id) = rmp_legacy_id {
sqlx::query(
"DELETE FROM instructor_rmp_links WHERE instructor_id = $1 AND rmp_legacy_id = $2",
)
.bind(instructor_id)
.bind(legacy_id)
.execute(&mut *tx)
.await?;
} else {
sqlx::query("DELETE FROM instructor_rmp_links WHERE instructor_id = $1")
.bind(instructor_id)
.execute(&mut *tx)
.await?;
}
// Check if any links remain
let (remaining,): (i64,) =
sqlx::query_as("SELECT COUNT(*) FROM instructor_rmp_links WHERE instructor_id = $1")
.bind(instructor_id)
.fetch_one(&mut *tx)
.await?;
// Update instructor status if no links remain
if remaining == 0 {
sqlx::query("UPDATE instructors SET rmp_match_status = 'unmatched' WHERE id = $1")
.bind(instructor_id)
.execute(&mut *tx)
.await?;
}
// Reset accepted candidates back to pending when unmatching
// This allows the candidates to be re-matched later
if let Some(legacy_id) = rmp_legacy_id {
// Reset only the specific candidate
sqlx::query(
"UPDATE rmp_match_candidates
SET status = 'pending', resolved_at = NULL, resolved_by = NULL
WHERE instructor_id = $1 AND rmp_legacy_id = $2 AND status = 'accepted'",
)
.bind(instructor_id)
.bind(legacy_id)
.execute(&mut *tx)
.await?;
} else {
// Reset all accepted candidates for this instructor
sqlx::query(
"UPDATE rmp_match_candidates
SET status = 'pending', resolved_at = NULL, resolved_by = NULL
WHERE instructor_id = $1 AND status = 'accepted'",
)
.bind(instructor_id)
.execute(&mut *tx)
.await?;
}
tx.commit().await?;
Ok(())
}
+695
View File
@@ -0,0 +1,695 @@
//! Confidence scoring and candidate generation for RMP instructor matching.
use crate::data::names::{matching_keys, parse_banner_name, parse_rmp_name};
use crate::error::Result;
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use std::collections::{HashMap, HashSet};
use tracing::{debug, info};
// ---------------------------------------------------------------------------
// Scoring types
// ---------------------------------------------------------------------------
/// Breakdown of individual scoring signals.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ScoreBreakdown {
pub name: f32,
pub department: f32,
pub uniqueness: f32,
pub volume: f32,
}
/// Result of scoring a single instructorRMP candidate pair.
#[derive(Debug, Clone)]
pub struct MatchScore {
pub score: f32,
pub breakdown: ScoreBreakdown,
}
// ---------------------------------------------------------------------------
// Thresholds
// ---------------------------------------------------------------------------
/// Minimum composite score to store a candidate row.
const MIN_CANDIDATE_THRESHOLD: f32 = 0.40;
/// Score at or above which a candidate is auto-accepted.
const AUTO_ACCEPT_THRESHOLD: f32 = 0.85;
// ---------------------------------------------------------------------------
// Weights (must sum to 1.0)
// ---------------------------------------------------------------------------
const WEIGHT_NAME: f32 = 0.50;
const WEIGHT_DEPARTMENT: f32 = 0.25;
const WEIGHT_UNIQUENESS: f32 = 0.15;
const WEIGHT_VOLUME: f32 = 0.10;
// ---------------------------------------------------------------------------
// Pure scoring functions
// ---------------------------------------------------------------------------
/// Check if an instructor's subjects overlap with an RMP department.
///
/// Returns `1.0` for a match, `0.2` for a mismatch, `0.5` when the RMP
/// department is unknown.
fn department_similarity(subjects: &[String], rmp_department: Option<&str>) -> f32 {
let Some(dept) = rmp_department else {
return 0.5;
};
let dept_lower = dept.to_lowercase();
// Quick check: does any subject appear directly in the department string
// or vice-versa?
for subj in subjects {
let subj_lower = subj.to_lowercase();
if dept_lower.contains(&subj_lower) || subj_lower.contains(&dept_lower) {
return 1.0;
}
// Handle common UTSA abbreviation mappings.
if matches_known_abbreviation(&subj_lower, &dept_lower) {
return 1.0;
}
}
0.2
}
/// Expand common subject abbreviations used at UTSA and check for overlap.
fn matches_known_abbreviation(subject: &str, department: &str) -> bool {
const MAPPINGS: &[(&str, &[&str])] = &[
// Core subjects (original mappings, corrected)
("cs", &["computer science"]),
("ece", &["early childhood education", "early childhood"]),
("ee", &["electrical engineering", "electrical"]),
("me", &["mechanical engineering", "mechanical"]),
("ce", &["civil engineering", "civil"]),
("bio", &["biology", "biological"]),
("chem", &["chemistry"]),
("phys", &["physics"]),
("math", &["mathematics"]),
("sta", &["statistics"]),
("eng", &["english"]),
("his", &["history"]),
("pol", &["political science"]),
("psy", &["psychology"]),
("soc", &["sociology"]),
("mus", &["music"]),
("art", &["art"]),
("phi", &["philosophy"]),
("eco", &["economics"]),
("acc", &["accounting"]),
("fin", &["finance"]),
("mgt", &["management"]),
("mkt", &["marketing"]),
("is", &["information systems"]),
("ms", &["management science"]),
("kin", &["kinesiology"]),
("com", &["communication"]),
// Architecture & Design
("arc", &["architecture"]),
("ide", &["interior design", "design"]),
// Anthropology & Ethnic Studies
("ant", &["anthropology"]),
("aas", &["african american studies", "ethnic studies"]),
("mas", &["mexican american studies", "ethnic studies"]),
("regs", &["ethnic studies", "gender"]),
// Languages
("lng", &["linguistics", "applied linguistics"]),
("spn", &["spanish"]),
("frn", &["french"]),
("ger", &["german"]),
("chn", &["chinese"]),
("jpn", &["japanese"]),
("kor", &["korean"]),
("itl", &["italian"]),
("rus", &["russian"]),
("lat", &["latin"]),
("grk", &["greek"]),
("asl", &["american sign language", "sign language"]),
(
"fl",
&["foreign languages", "languages", "modern languages"],
),
// Education
("edu", &["education"]),
("ci", &["curriculum", "education"]),
("edl", &["educational leadership", "education"]),
("edp", &["educational psychology", "education"]),
("bbl", &["bilingual education"]),
("spe", &["special education", "education"]),
// Business
("ent", &["entrepreneurship"]),
("gba", &["general business", "business"]),
("blw", &["business law", "law"]),
("rfd", &["real estate"]),
("mot", &["management of technology", "management"]),
// Engineering
("egr", &["engineering"]),
("bme", &["biomedical engineering", "engineering"]),
("cme", &["chemical engineering", "engineering"]),
("cpe", &["computer engineering", "engineering"]),
("ise", &["industrial", "systems engineering", "engineering"]),
("mate", &["materials engineering", "engineering"]),
// Sciences
("che", &["chemistry"]),
("bch", &["biochemistry", "chemistry"]),
("geo", &["geology"]),
("phy", &["physics"]),
("ast", &["astronomy"]),
("es", &["environmental science"]),
// Social Sciences
("crj", &["criminal justice"]),
("swk", &["social work"]),
("pad", &["public administration"]),
("grg", &["geography"]),
("ges", &["geography"]),
// Humanities
("cla", &["classics"]),
("hum", &["humanities"]),
("wgss", &["women's studies"]),
// Health
("hth", &["health"]),
("hcp", &["health science", "health"]),
("ntr", &["nutrition"]),
// Military
("msc", &["military science"]),
("asc", &["aerospace"]),
// Arts
("dan", &["dance"]),
("thr", &["theater"]),
("ahc", &["art history"]),
// Other
("cou", &["counseling"]),
("hon", &["honors"]),
("csm", &["construction"]),
("wrc", &["writing"]),
("set", &["tourism management", "tourism"]),
];
for &(abbr, expansions) in MAPPINGS {
if subject == abbr {
return expansions
.iter()
.any(|expansion| department.contains(expansion));
}
}
false
}
/// Compute match confidence score (0.01.0) for an instructorRMP pair.
///
/// The name signal is always 1.0 since candidates are only generated for
/// exact normalized name matches. The effective score range is 0.501.0.
pub fn compute_match_score(
instructor_subjects: &[String],
rmp_department: Option<&str>,
candidate_count: usize,
rmp_num_ratings: i32,
) -> MatchScore {
// --- Name (0.50) — always 1.0, candidates only exist for exact matches ---
let name_score = 1.0;
// --- Department (0.25) ---
let dept_score = department_similarity(instructor_subjects, rmp_department);
// --- Uniqueness (0.15) ---
let uniqueness_score = match candidate_count {
0 | 1 => 1.0,
2 => 0.5,
_ => 0.2,
};
// --- Volume (0.10) ---
let volume_score = ((rmp_num_ratings as f32).ln_1p() / 5.0_f32.ln_1p()).clamp(0.0, 1.0);
let composite = name_score * WEIGHT_NAME
+ dept_score * WEIGHT_DEPARTMENT
+ uniqueness_score * WEIGHT_UNIQUENESS
+ volume_score * WEIGHT_VOLUME;
MatchScore {
score: composite,
breakdown: ScoreBreakdown {
name: name_score,
department: dept_score,
uniqueness: uniqueness_score,
volume: volume_score,
},
}
}
// ---------------------------------------------------------------------------
// Candidate generation (DB)
// ---------------------------------------------------------------------------
/// Statistics returned from candidate generation.
#[derive(Debug)]
pub struct MatchingStats {
pub total_unmatched: usize,
pub candidates_created: usize,
pub candidates_rescored: usize,
pub auto_matched: usize,
pub skipped_unparseable: usize,
pub skipped_no_candidates: usize,
}
/// Lightweight row for building the in-memory RMP name index.
struct RmpProfForMatching {
legacy_id: i32,
department: Option<String>,
num_ratings: i32,
}
/// Generate match candidates for all unmatched instructors.
///
/// For each unmatched instructor:
/// 1. Parse `display_name` into [`NameParts`] and generate matching keys.
/// 2. Find RMP professors with matching normalized name keys.
/// 3. Score each candidate.
/// 4. Store candidates scoring above [`MIN_CANDIDATE_THRESHOLD`].
/// 5. Auto-accept if the top candidate scores ≥ [`AUTO_ACCEPT_THRESHOLD`]
/// and no existing rejected candidate exists for that pair.
///
/// Already-evaluated instructorRMP pairs (any status) are skipped.
pub async fn generate_candidates(db_pool: &PgPool) -> Result<MatchingStats> {
// 1. Load unmatched instructors
let instructors: Vec<(i32, String)> = sqlx::query_as(
"SELECT id, display_name FROM instructors WHERE rmp_match_status = 'unmatched'",
)
.fetch_all(db_pool)
.await?;
if instructors.is_empty() {
info!("No unmatched instructors to generate candidates for");
return Ok(MatchingStats {
total_unmatched: 0,
candidates_created: 0,
candidates_rescored: 0,
auto_matched: 0,
skipped_unparseable: 0,
skipped_no_candidates: 0,
});
}
let instructor_ids: Vec<i32> = instructors.iter().map(|(id, _)| *id).collect();
let total_unmatched = instructors.len();
// 2. Load instructor subjects
let subject_rows: Vec<(i32, String)> = sqlx::query_as(
r#"
SELECT DISTINCT ci.instructor_id, c.subject
FROM course_instructors ci
JOIN courses c ON c.id = ci.course_id
WHERE ci.instructor_id = ANY($1)
"#,
)
.bind(&instructor_ids)
.fetch_all(db_pool)
.await?;
let mut subject_map: HashMap<i32, Vec<String>> = HashMap::new();
for (iid, subject) in subject_rows {
subject_map.entry(iid).or_default().push(subject);
}
// 3. Load all RMP professors and build multi-key name index
let prof_rows: Vec<(i32, String, String, Option<String>, i32)> = sqlx::query_as(
"SELECT legacy_id, first_name, last_name, department, num_ratings FROM rmp_professors",
)
.fetch_all(db_pool)
.await?;
// Build name index: (normalized_last, normalized_first) -> Vec<RmpProfForMatching>
// Each professor may appear under multiple keys (nicknames, token variants).
let mut name_index: HashMap<(String, String), Vec<RmpProfForMatching>> = HashMap::new();
let mut rmp_parse_failures = 0usize;
for (legacy_id, first_name, last_name, department, num_ratings) in &prof_rows {
match parse_rmp_name(first_name, last_name) {
Some(parts) => {
let keys = matching_keys(&parts);
for key in keys {
name_index.entry(key).or_default().push(RmpProfForMatching {
legacy_id: *legacy_id,
department: department.clone(),
num_ratings: *num_ratings,
});
}
}
None => {
rmp_parse_failures += 1;
debug!(
legacy_id,
first_name, last_name, "Unparseable RMP professor name, skipping"
);
}
}
}
if rmp_parse_failures > 0 {
debug!(
count = rmp_parse_failures,
"RMP professors with unparseable names"
);
}
// 4. Load existing candidate pairs — only skip resolved (accepted/rejected) pairs.
// Pending candidates are rescored so updated mappings take effect.
let candidate_rows: Vec<(i32, i32, String)> =
sqlx::query_as("SELECT instructor_id, rmp_legacy_id, status FROM rmp_match_candidates")
.fetch_all(db_pool)
.await?;
let mut resolved_pairs: HashSet<(i32, i32)> = HashSet::new();
let mut pending_pairs: HashSet<(i32, i32)> = HashSet::new();
let mut rejected_pairs: HashSet<(i32, i32)> = HashSet::new();
for (iid, lid, status) in candidate_rows {
match status.as_str() {
"accepted" | "rejected" => {
resolved_pairs.insert((iid, lid));
if status == "rejected" {
rejected_pairs.insert((iid, lid));
}
}
_ => {
pending_pairs.insert((iid, lid));
}
}
}
// 5. Score and collect candidates (new + rescored pending)
let empty_subjects: Vec<String> = Vec::new();
let mut new_candidates: Vec<(i32, i32, f32, serde_json::Value)> = Vec::new();
let mut rescored_candidates: Vec<(i32, i32, f32, serde_json::Value)> = Vec::new();
let mut auto_accept: Vec<(i32, i32)> = Vec::new(); // (instructor_id, legacy_id)
let mut skipped_unparseable = 0usize;
let mut skipped_no_candidates = 0usize;
for (instructor_id, display_name) in &instructors {
let Some(instructor_parts) = parse_banner_name(display_name) else {
skipped_unparseable += 1;
debug!(
instructor_id,
display_name, "Unparseable display name, skipping"
);
continue;
};
let subjects = subject_map.get(instructor_id).unwrap_or(&empty_subjects);
// Generate all matching keys for this instructor and collect candidate
// RMP professors across all key variants (deduplicated by legacy_id).
let instructor_keys = matching_keys(&instructor_parts);
let mut seen_profs: HashSet<i32> = HashSet::new();
let mut matched_profs: Vec<&RmpProfForMatching> = Vec::new();
for key in &instructor_keys {
if let Some(profs) = name_index.get(key) {
for prof in profs {
if seen_profs.insert(prof.legacy_id) {
matched_profs.push(prof);
}
}
}
}
if matched_profs.is_empty() {
skipped_no_candidates += 1;
continue;
}
let candidate_count = matched_profs.len();
let mut best: Option<(f32, i32)> = None;
for prof in &matched_profs {
let pair = (*instructor_id, prof.legacy_id);
if resolved_pairs.contains(&pair) {
continue;
}
let ms = compute_match_score(
subjects,
prof.department.as_deref(),
candidate_count,
prof.num_ratings,
);
if ms.score < MIN_CANDIDATE_THRESHOLD {
continue;
}
let breakdown_json =
serde_json::to_value(&ms.breakdown).unwrap_or_else(|_| serde_json::json!({}));
if pending_pairs.contains(&pair) {
rescored_candidates.push((
*instructor_id,
prof.legacy_id,
ms.score,
breakdown_json,
));
} else {
new_candidates.push((*instructor_id, prof.legacy_id, ms.score, breakdown_json));
}
match best {
Some((s, _)) if ms.score > s => best = Some((ms.score, prof.legacy_id)),
None => best = Some((ms.score, prof.legacy_id)),
_ => {}
}
}
// Auto-accept the top candidate if it meets the threshold and is not
// previously rejected.
if let Some((score, legacy_id)) = best
&& score >= AUTO_ACCEPT_THRESHOLD
&& !rejected_pairs.contains(&(*instructor_id, legacy_id))
{
auto_accept.push((*instructor_id, legacy_id));
}
}
// 67. Write candidates, rescore, and auto-accept within a single transaction
let candidates_created = new_candidates.len();
let candidates_rescored = rescored_candidates.len();
let auto_matched = auto_accept.len();
let mut tx = db_pool.begin().await?;
// 6a. Batch-insert new candidates
if !new_candidates.is_empty() {
let c_instructor_ids: Vec<i32> = new_candidates.iter().map(|(iid, _, _, _)| *iid).collect();
let c_legacy_ids: Vec<i32> = new_candidates.iter().map(|(_, lid, _, _)| *lid).collect();
let c_scores: Vec<f32> = new_candidates.iter().map(|(_, _, s, _)| *s).collect();
let c_breakdowns: Vec<serde_json::Value> =
new_candidates.into_iter().map(|(_, _, _, b)| b).collect();
sqlx::query(
r#"
INSERT INTO rmp_match_candidates (instructor_id, rmp_legacy_id, score, score_breakdown)
SELECT v.instructor_id, v.rmp_legacy_id, v.score, v.score_breakdown
FROM UNNEST($1::int4[], $2::int4[], $3::real[], $4::jsonb[])
AS v(instructor_id, rmp_legacy_id, score, score_breakdown)
ON CONFLICT (instructor_id, rmp_legacy_id) DO NOTHING
"#,
)
.bind(&c_instructor_ids)
.bind(&c_legacy_ids)
.bind(&c_scores)
.bind(&c_breakdowns)
.execute(&mut *tx)
.await?;
}
// 6b. Batch-update rescored pending candidates
if !rescored_candidates.is_empty() {
let r_instructor_ids: Vec<i32> = rescored_candidates
.iter()
.map(|(iid, _, _, _)| *iid)
.collect();
let r_legacy_ids: Vec<i32> = rescored_candidates
.iter()
.map(|(_, lid, _, _)| *lid)
.collect();
let r_scores: Vec<f32> = rescored_candidates.iter().map(|(_, _, s, _)| *s).collect();
let r_breakdowns: Vec<serde_json::Value> = rescored_candidates
.into_iter()
.map(|(_, _, _, b)| b)
.collect();
sqlx::query(
r#"
UPDATE rmp_match_candidates mc
SET score = v.score, score_breakdown = v.score_breakdown
FROM UNNEST($1::int4[], $2::int4[], $3::real[], $4::jsonb[])
AS v(instructor_id, rmp_legacy_id, score, score_breakdown)
WHERE mc.instructor_id = v.instructor_id
AND mc.rmp_legacy_id = v.rmp_legacy_id
"#,
)
.bind(&r_instructor_ids)
.bind(&r_legacy_ids)
.bind(&r_scores)
.bind(&r_breakdowns)
.execute(&mut *tx)
.await?;
}
// 7. Auto-accept top candidates
if !auto_accept.is_empty() {
let aa_instructor_ids: Vec<i32> = auto_accept.iter().map(|(iid, _)| *iid).collect();
let aa_legacy_ids: Vec<i32> = auto_accept.iter().map(|(_, lid)| *lid).collect();
// Mark the candidate row as accepted
sqlx::query(
r#"
UPDATE rmp_match_candidates mc
SET status = 'accepted', resolved_at = NOW()
FROM UNNEST($1::int4[], $2::int4[]) AS v(instructor_id, rmp_legacy_id)
WHERE mc.instructor_id = v.instructor_id
AND mc.rmp_legacy_id = v.rmp_legacy_id
"#,
)
.bind(&aa_instructor_ids)
.bind(&aa_legacy_ids)
.execute(&mut *tx)
.await?;
// Insert links into instructor_rmp_links
sqlx::query(
r#"
INSERT INTO instructor_rmp_links (instructor_id, rmp_legacy_id, source)
SELECT v.instructor_id, v.rmp_legacy_id, 'auto'
FROM UNNEST($1::int4[], $2::int4[]) AS v(instructor_id, rmp_legacy_id)
ON CONFLICT (rmp_legacy_id) DO NOTHING
"#,
)
.bind(&aa_instructor_ids)
.bind(&aa_legacy_ids)
.execute(&mut *tx)
.await?;
// Update instructor match status
sqlx::query(
r#"
UPDATE instructors i
SET rmp_match_status = 'auto'
FROM UNNEST($1::int4[]) AS v(instructor_id)
WHERE i.id = v.instructor_id
"#,
)
.bind(&aa_instructor_ids)
.execute(&mut *tx)
.await?;
}
tx.commit().await?;
let stats = MatchingStats {
total_unmatched,
candidates_created,
candidates_rescored,
auto_matched,
skipped_unparseable,
skipped_no_candidates,
};
info!(
total_unmatched = stats.total_unmatched,
candidates_created = stats.candidates_created,
candidates_rescored = stats.candidates_rescored,
auto_matched = stats.auto_matched,
skipped_unparseable = stats.skipped_unparseable,
skipped_no_candidates = stats.skipped_no_candidates,
"Candidate generation complete"
);
Ok(stats)
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ideal_candidate_high_score() {
let ms = compute_match_score(
&["CS".to_string()],
Some("Computer Science"),
1, // unique candidate
50, // decent ratings
);
// name 1.0*0.50 + dept 1.0*0.25 + unique 1.0*0.15 + volume ~0.97*0.10 ≈ 0.997
assert!(ms.score >= 0.85, "Expected score >= 0.85, got {}", ms.score);
assert_eq!(ms.breakdown.name, 1.0);
assert_eq!(ms.breakdown.uniqueness, 1.0);
assert_eq!(ms.breakdown.department, 1.0);
}
#[test]
fn test_ambiguous_candidates_lower_score() {
let unique = compute_match_score(&[], None, 1, 10);
let ambiguous = compute_match_score(&[], None, 3, 10);
assert!(
unique.score > ambiguous.score,
"Unique ({}) should outscore ambiguous ({})",
unique.score,
ambiguous.score
);
assert_eq!(unique.breakdown.uniqueness, 1.0);
assert_eq!(ambiguous.breakdown.uniqueness, 0.2);
}
#[test]
fn test_no_department_neutral() {
let ms = compute_match_score(&["CS".to_string()], None, 1, 10);
assert_eq!(ms.breakdown.department, 0.5);
}
#[test]
fn test_department_match() {
let ms = compute_match_score(&["CS".to_string()], Some("Computer Science"), 1, 10);
assert_eq!(ms.breakdown.department, 1.0);
}
#[test]
fn test_department_mismatch() {
let ms = compute_match_score(&["CS".to_string()], Some("History"), 1, 10);
assert_eq!(ms.breakdown.department, 0.2);
}
#[test]
fn test_department_match_outscores_mismatch() {
let matched = compute_match_score(&["CS".to_string()], Some("Computer Science"), 1, 10);
let mismatched = compute_match_score(&["CS".to_string()], Some("History"), 1, 10);
assert!(
matched.score > mismatched.score,
"Department match ({}) should outscore mismatch ({})",
matched.score,
mismatched.score
);
}
#[test]
fn test_volume_scaling() {
let zero = compute_match_score(&[], None, 1, 0);
let many = compute_match_score(&[], None, 1, 100);
assert!(
many.breakdown.volume > zero.breakdown.volume,
"100 ratings ({}) should outscore 0 ratings ({})",
many.breakdown.volume,
zero.breakdown.volume
);
assert_eq!(zero.breakdown.volume, 0.0);
assert!(
many.breakdown.volume > 0.9,
"100 ratings should be near max"
);
}
}
+321
View File
@@ -0,0 +1,321 @@
//! Database operations for scrape job queue management.
use crate::data::models::{ScrapeJob, ScrapePriority, TargetType, UpsertCounts};
use crate::error::Result;
use chrono::{DateTime, Utc};
use sqlx::PgPool;
use std::collections::HashSet;
/// Force-unlock all jobs that have a non-NULL `locked_at`.
///
/// Intended to be called once at startup to recover jobs left locked by
/// a previous unclean shutdown (crash, OOM kill, etc.).
///
/// # Returns
/// The number of jobs that were unlocked.
pub async fn force_unlock_all(db_pool: &PgPool) -> Result<u64> {
let result = sqlx::query(
"UPDATE scrape_jobs SET locked_at = NULL, queued_at = NOW() WHERE locked_at IS NOT NULL",
)
.execute(db_pool)
.await?;
Ok(result.rows_affected())
}
/// How long a lock can be held before it is considered expired and reclaimable.
///
/// This acts as a safety net for cases where a worker dies without unlocking
/// (OOM kill, crash, network partition). Under normal operation, the worker's
/// own job timeout fires well before this threshold.
const LOCK_EXPIRY: std::time::Duration = std::time::Duration::from_secs(10 * 60);
/// Atomically fetch and lock the next available scrape job.
///
/// Uses `FOR UPDATE SKIP LOCKED` to allow multiple workers to poll the queue
/// concurrently without conflicts. Considers jobs that are:
/// - Unlocked and ready to execute, OR
/// - Locked but past [`LOCK_EXPIRY`] (abandoned by a dead worker)
///
/// # Arguments
/// * `db_pool` - PostgreSQL connection pool
///
/// # Returns
/// * `Ok(Some(job))` if a job was successfully fetched and locked
/// * `Ok(None)` if no jobs are available
pub async fn fetch_and_lock_job(db_pool: &PgPool) -> Result<Option<ScrapeJob>> {
let mut tx = db_pool.begin().await?;
let lock_expiry_secs = LOCK_EXPIRY.as_secs() as i32;
let job = sqlx::query_as::<_, ScrapeJob>(
"SELECT * FROM scrape_jobs \
WHERE (locked_at IS NULL OR locked_at < NOW() - make_interval(secs => $1::double precision)) \
AND execute_at <= NOW() \
ORDER BY priority DESC, execute_at ASC \
LIMIT 1 \
FOR UPDATE SKIP LOCKED"
)
.bind(lock_expiry_secs)
.fetch_optional(&mut *tx)
.await?;
if let Some(ref job) = job {
sqlx::query("UPDATE scrape_jobs SET locked_at = NOW() WHERE id = $1")
.bind(job.id)
.execute(&mut *tx)
.await?;
}
tx.commit().await?;
Ok(job)
}
/// Delete a scrape job by ID.
///
/// Typically called after a job has been successfully processed or permanently failed.
///
/// # Arguments
/// * `job_id` - The database ID of the job to delete
/// * `db_pool` - PostgreSQL connection pool
pub async fn delete_job(job_id: i32, db_pool: &PgPool) -> Result<()> {
sqlx::query("DELETE FROM scrape_jobs WHERE id = $1")
.bind(job_id)
.execute(db_pool)
.await?;
Ok(())
}
/// Unlock a scrape job by clearing its `locked_at` timestamp.
///
/// Used to release a job back to the queue, e.g. during graceful shutdown.
///
/// # Arguments
/// * `job_id` - The database ID of the job to unlock
/// * `db_pool` - PostgreSQL connection pool
pub async fn unlock_job(job_id: i32, db_pool: &PgPool) -> Result<()> {
sqlx::query("UPDATE scrape_jobs SET locked_at = NULL WHERE id = $1")
.bind(job_id)
.execute(db_pool)
.await?;
Ok(())
}
/// Atomically unlock a job, increment its retry count, and reset `queued_at`.
///
/// Returns the new `queued_at` timestamp if retries remain, or `None` if
/// the job has exhausted its retries. This is determined atomically in the
/// database to avoid race conditions between workers.
///
/// # Arguments
/// * `job_id` - The database ID of the job
/// * `max_retries` - Maximum number of retries allowed for this job
/// * `db_pool` - PostgreSQL connection pool
///
/// # Returns
/// * `Ok(Some(queued_at))` if the job was unlocked and retries remain
/// * `Ok(None)` if the job has exhausted its retries
pub async fn unlock_and_increment_retry(
job_id: i32,
max_retries: i32,
db_pool: &PgPool,
) -> Result<Option<chrono::DateTime<chrono::Utc>>> {
let result = sqlx::query_scalar::<_, Option<chrono::DateTime<chrono::Utc>>>(
"UPDATE scrape_jobs
SET locked_at = NULL, retry_count = retry_count + 1, queued_at = NOW()
WHERE id = $1
RETURNING CASE WHEN retry_count <= $2 THEN queued_at ELSE NULL END",
)
.bind(job_id)
.bind(max_retries)
.fetch_one(db_pool)
.await?;
Ok(result)
}
/// Find existing job payloads matching the given target type and candidates.
///
/// Returns a set of stringified JSON payloads that already exist in the queue
/// (both locked and unlocked), used for deduplication when scheduling new jobs.
///
/// # Arguments
/// * `target_type` - The target type to filter by
/// * `candidate_payloads` - Candidate payloads to check against existing jobs
/// * `db_pool` - PostgreSQL connection pool
///
/// # Returns
/// A `HashSet` of stringified JSON payloads that already have pending or in-progress jobs
pub async fn find_existing_job_payloads(
target_type: TargetType,
candidate_payloads: &[serde_json::Value],
db_pool: &PgPool,
) -> Result<HashSet<String>> {
let existing_jobs: Vec<(serde_json::Value,)> = sqlx::query_as(
"SELECT target_payload FROM scrape_jobs
WHERE target_type = $1 AND target_payload = ANY($2)",
)
.bind(target_type)
.bind(candidate_payloads)
.fetch_all(db_pool)
.await?;
let existing_payloads = existing_jobs
.into_iter()
.map(|(payload,)| payload.to_string())
.collect();
Ok(existing_payloads)
}
/// Insert a scrape job result log entry.
#[allow(clippy::too_many_arguments)]
pub async fn insert_job_result(
target_type: TargetType,
payload: serde_json::Value,
priority: ScrapePriority,
queued_at: DateTime<Utc>,
started_at: DateTime<Utc>,
duration_ms: i32,
success: bool,
error_message: Option<&str>,
retry_count: i32,
counts: Option<&UpsertCounts>,
db_pool: &PgPool,
) -> Result<()> {
sqlx::query(
r#"
INSERT INTO scrape_job_results (
target_type, payload, priority,
queued_at, started_at, duration_ms,
success, error_message, retry_count,
courses_fetched, courses_changed, courses_unchanged,
audits_generated, metrics_generated
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
"#,
)
.bind(target_type)
.bind(&payload)
.bind(priority)
.bind(queued_at)
.bind(started_at)
.bind(duration_ms)
.bind(success)
.bind(error_message)
.bind(retry_count)
.bind(counts.map(|c| c.courses_fetched))
.bind(counts.map(|c| c.courses_changed))
.bind(counts.map(|c| c.courses_unchanged))
.bind(counts.map(|c| c.audits_generated))
.bind(counts.map(|c| c.metrics_generated))
.execute(db_pool)
.await?;
Ok(())
}
/// Per-subject aggregated stats from recent scrape results.
///
/// Populated by [`fetch_subject_stats`] and converted into
/// [`crate::scraper::adaptive::SubjectStats`] for interval computation.
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct SubjectResultStats {
pub subject: String,
pub recent_runs: i64,
pub avg_change_ratio: f64,
pub consecutive_zero_changes: i64,
pub consecutive_empty_fetches: i64,
pub recent_failure_count: i64,
pub recent_success_count: i64,
pub last_completed: DateTime<Utc>,
}
/// Fetch aggregated per-subject statistics from the last 24 hours of results.
///
/// For each subject, examines the 20 most recent results and computes:
/// - Average change ratio (courses_changed / courses_fetched)
/// - Consecutive zero-change runs from the most recent result
/// - Consecutive empty-fetch runs from the most recent result
/// - Failure and success counts
/// - Last completion timestamp
pub async fn fetch_subject_stats(db_pool: &PgPool) -> Result<Vec<SubjectResultStats>> {
let rows = sqlx::query_as::<_, SubjectResultStats>(
r#"
WITH recent AS (
SELECT payload->>'subject' AS subject, success,
COALESCE(courses_fetched, 0) AS courses_fetched,
COALESCE(courses_changed, 0) AS courses_changed,
completed_at,
ROW_NUMBER() OVER (PARTITION BY payload->>'subject' ORDER BY completed_at DESC) AS rn
FROM scrape_job_results
WHERE target_type = 'Subject' AND completed_at > NOW() - INTERVAL '24 hours'
),
filtered AS (SELECT * FROM recent WHERE rn <= 20),
zero_break AS (
SELECT subject,
MIN(rn) FILTER (WHERE courses_changed > 0 AND success) AS first_nonzero_rn,
MIN(rn) FILTER (WHERE courses_fetched > 0 AND success) AS first_nonempty_rn
FROM filtered GROUP BY subject
)
SELECT
f.subject::TEXT AS subject,
COUNT(*)::BIGINT AS recent_runs,
COALESCE(AVG(CASE WHEN f.success AND f.courses_fetched > 0
THEN f.courses_changed::FLOAT / f.courses_fetched ELSE NULL END), 0.0)::FLOAT8 AS avg_change_ratio,
COALESCE(zb.first_nonzero_rn - 1, COUNT(*) FILTER (WHERE f.success AND f.courses_changed = 0))::BIGINT AS consecutive_zero_changes,
COALESCE(zb.first_nonempty_rn - 1, COUNT(*) FILTER (WHERE f.success AND f.courses_fetched = 0))::BIGINT AS consecutive_empty_fetches,
COUNT(*) FILTER (WHERE NOT f.success)::BIGINT AS recent_failure_count,
COUNT(*) FILTER (WHERE f.success)::BIGINT AS recent_success_count,
MAX(f.completed_at) AS last_completed
FROM filtered f
LEFT JOIN zero_break zb ON f.subject = zb.subject
GROUP BY f.subject, zb.first_nonzero_rn, zb.first_nonempty_rn
"#,
)
.fetch_all(db_pool)
.await?;
Ok(rows)
}
/// Batch insert scrape jobs using UNNEST for a single round-trip.
///
/// All jobs are inserted with `execute_at` set to the current time.
///
/// # Arguments
/// * `jobs` - Slice of `(payload, target_type, priority)` tuples to insert
/// * `db_pool` - PostgreSQL connection pool
pub async fn batch_insert_jobs(
jobs: &[(serde_json::Value, TargetType, ScrapePriority)],
db_pool: &PgPool,
) -> Result<Vec<ScrapeJob>> {
if jobs.is_empty() {
return Ok(Vec::new());
}
let mut target_types: Vec<String> = Vec::with_capacity(jobs.len());
let mut payloads: Vec<serde_json::Value> = Vec::with_capacity(jobs.len());
let mut priorities: Vec<String> = Vec::with_capacity(jobs.len());
for (payload, target_type, priority) in jobs {
target_types.push(format!("{target_type:?}"));
payloads.push(payload.clone());
priorities.push(format!("{priority:?}"));
}
let inserted = sqlx::query_as::<_, ScrapeJob>(
r#"
INSERT INTO scrape_jobs (target_type, target_payload, priority, execute_at, queued_at)
SELECT v.target_type::target_type, v.payload, v.priority::scrape_priority, NOW(), NOW()
FROM UNNEST($1::text[], $2::jsonb[], $3::text[])
AS v(target_type, payload, priority)
RETURNING *
"#,
)
.bind(&target_types)
.bind(&payloads)
.bind(&priorities)
.fetch_all(db_pool)
.await?;
Ok(inserted)
}
+100
View File
@@ -0,0 +1,100 @@
//! Database query functions for user sessions.
use anyhow::Context;
use rand::Rng;
use sqlx::PgPool;
use super::models::UserSession;
use crate::error::Result;
/// Session lifetime: 7 days (in seconds).
pub const SESSION_DURATION_SECS: u64 = 7 * 24 * 3600;
/// Generate a cryptographically random 32-byte hex token.
fn generate_token() -> String {
let bytes: [u8; 32] = rand::rng().random();
bytes.iter().map(|b| format!("{b:02x}")).collect()
}
/// Create a new session for a user with the given duration.
pub async fn create_session(
pool: &PgPool,
user_id: i64,
duration: std::time::Duration,
) -> Result<UserSession> {
let token = generate_token();
let duration_secs = duration.as_secs() as i64;
sqlx::query_as::<_, UserSession>(
r#"
INSERT INTO user_sessions (id, user_id, expires_at)
VALUES ($1, $2, now() + make_interval(secs => $3::double precision))
RETURNING *
"#,
)
.bind(&token)
.bind(user_id)
.bind(duration_secs as f64)
.fetch_one(pool)
.await
.context("failed to create session")
}
/// Fetch a session by token, only if it has not expired.
pub async fn get_session(pool: &PgPool, token: &str) -> Result<Option<UserSession>> {
sqlx::query_as::<_, UserSession>(
"SELECT * FROM user_sessions WHERE id = $1 AND expires_at > now()",
)
.bind(token)
.fetch_optional(pool)
.await
.context("failed to get session")
}
/// Update the last-active timestamp and extend session expiry (sliding window).
pub async fn touch_session(pool: &PgPool, token: &str) -> Result<()> {
sqlx::query(
r#"
UPDATE user_sessions
SET last_active_at = now(),
expires_at = now() + make_interval(secs => $2::double precision)
WHERE id = $1
"#,
)
.bind(token)
.bind(SESSION_DURATION_SECS as f64)
.execute(pool)
.await
.context("failed to touch session")?;
Ok(())
}
/// Delete a session by token.
pub async fn delete_session(pool: &PgPool, token: &str) -> Result<()> {
sqlx::query("DELETE FROM user_sessions WHERE id = $1")
.bind(token)
.execute(pool)
.await
.context("failed to delete session")?;
Ok(())
}
/// Delete all sessions for a user. Returns the number of sessions deleted.
#[allow(dead_code)] // Available for admin user-deletion flow
pub async fn delete_user_sessions(pool: &PgPool, user_id: i64) -> Result<u64> {
let result = sqlx::query("DELETE FROM user_sessions WHERE user_id = $1")
.bind(user_id)
.execute(pool)
.await
.context("failed to delete user sessions")?;
Ok(result.rows_affected())
}
/// Delete all expired sessions. Returns the number of sessions cleaned up.
pub async fn cleanup_expired(pool: &PgPool) -> Result<u64> {
let result = sqlx::query("DELETE FROM user_sessions WHERE expires_at <= now()")
.execute(pool)
.await
.context("failed to cleanup expired sessions")?;
Ok(result.rows_affected())
}
+86
View File
@@ -0,0 +1,86 @@
//! Database query functions for users.
use anyhow::Context;
use sqlx::PgPool;
use super::models::User;
use crate::error::Result;
/// Insert a new user or update username/avatar on conflict.
pub async fn upsert_user(
pool: &PgPool,
discord_id: i64,
username: &str,
avatar_hash: Option<&str>,
) -> Result<User> {
sqlx::query_as::<_, User>(
r#"
INSERT INTO users (discord_id, discord_username, discord_avatar_hash)
VALUES ($1, $2, $3)
ON CONFLICT (discord_id) DO UPDATE
SET discord_username = EXCLUDED.discord_username,
discord_avatar_hash = EXCLUDED.discord_avatar_hash,
updated_at = now()
RETURNING *
"#,
)
.bind(discord_id)
.bind(username)
.bind(avatar_hash)
.fetch_one(pool)
.await
.context("failed to upsert user")
}
/// Fetch a user by Discord ID.
pub async fn get_user(pool: &PgPool, discord_id: i64) -> Result<Option<User>> {
sqlx::query_as::<_, User>("SELECT * FROM users WHERE discord_id = $1")
.bind(discord_id)
.fetch_optional(pool)
.await
.context("failed to get user")
}
/// List all users ordered by creation date (newest first).
pub async fn list_users(pool: &PgPool) -> Result<Vec<User>> {
sqlx::query_as::<_, User>("SELECT * FROM users ORDER BY created_at DESC")
.fetch_all(pool)
.await
.context("failed to list users")
}
/// Set the admin flag for a user, returning the updated user if found.
pub async fn set_admin(pool: &PgPool, discord_id: i64, is_admin: bool) -> Result<Option<User>> {
sqlx::query_as::<_, User>(
r#"
UPDATE users
SET is_admin = $2, updated_at = now()
WHERE discord_id = $1
RETURNING *
"#,
)
.bind(discord_id)
.bind(is_admin)
.fetch_optional(pool)
.await
.context("failed to set admin status")
}
/// Ensure a seed admin exists. Upserts with `is_admin = true` and a placeholder
/// username that will be replaced on first OAuth login.
pub async fn ensure_seed_admin(pool: &PgPool, discord_id: i64) -> Result<User> {
sqlx::query_as::<_, User>(
r#"
INSERT INTO users (discord_id, discord_username, is_admin)
VALUES ($1, 'seed-admin', true)
ON CONFLICT (discord_id) DO UPDATE
SET is_admin = true,
updated_at = now()
RETURNING *
"#,
)
.bind(discord_id)
.fetch_one(pool)
.await
.context("failed to ensure seed admin")
}
+4
View File
@@ -0,0 +1,4 @@
//! Application-specific error types.
pub type Error = anyhow::Error;
pub type Result<T, E = Error> = anyhow::Result<T, E>;
+269
View File
@@ -0,0 +1,269 @@
//! Custom tracing formatter
use serde::Serialize;
use serde_json::{Map, Value};
use std::fmt;
use time::macros::format_description;
use time::{OffsetDateTime, format_description::FormatItem};
use tracing::field::{Field, Visit};
use tracing::{Event, Level, Subscriber};
use tracing_subscriber::fmt::format::Writer;
use tracing_subscriber::fmt::{FmtContext, FormatEvent, FormatFields, FormattedFields};
use tracing_subscriber::registry::LookupSpan;
use yansi::Paint;
/// Cached format description for timestamps
const TIMESTAMP_FORMAT: &[FormatItem<'static>] =
format_description!("[hour]:[minute]:[second].[subsecond digits:5]");
/// A custom formatter with enhanced timestamp formatting
///
/// Re-implementation of the Full formatter with improved timestamp display.
pub struct CustomPrettyFormatter;
impl<S, N> FormatEvent<S, N> for CustomPrettyFormatter
where
S: Subscriber + for<'a> LookupSpan<'a>,
N: for<'a> FormatFields<'a> + 'static,
{
fn format_event(
&self,
ctx: &FmtContext<'_, S, N>,
mut writer: Writer<'_>,
event: &Event<'_>,
) -> fmt::Result {
let meta = event.metadata();
// 1) Timestamp (dimmed when ANSI)
let now = OffsetDateTime::now_utc();
let formatted_time = now.format(&TIMESTAMP_FORMAT).map_err(|e| {
eprintln!("Failed to format timestamp: {}", e);
fmt::Error
})?;
write_dimmed(&mut writer, formatted_time)?;
writer.write_char(' ')?;
// 2) Colored 5-char level like Full
write_colored_level(&mut writer, meta.level())?;
writer.write_char(' ')?;
// 3) Span scope chain (bold names, fields in braces, dimmed ':')
if let Some(scope) = ctx.event_scope() {
let mut saw_any = false;
for span in scope.from_root() {
write_bold(&mut writer, span.metadata().name())?;
saw_any = true;
write_dimmed(&mut writer, ":")?;
let ext = span.extensions();
if let Some(fields) = &ext.get::<FormattedFields<N>>()
&& !fields.fields.is_empty()
{
write_bold(&mut writer, "{")?;
writer.write_str(fields.fields.as_str())?;
write_bold(&mut writer, "}")?;
}
write_dimmed(&mut writer, ":")?;
}
if saw_any {
writer.write_char(' ')?;
}
}
// 4) Target (dimmed), then a space
if writer.has_ansi_escapes() {
write!(writer, "{}: ", Paint::new(meta.target()).dim())?;
} else {
write!(writer, "{}: ", meta.target())?;
}
// 5) Event fields
ctx.format_fields(writer.by_ref(), event)?;
// 6) Newline
writeln!(writer)
}
}
/// A custom JSON formatter that flattens fields to root level
///
/// Outputs logs in the format: { "message": "...", "level": "...", "customAttribute": "..." }
pub struct CustomJsonFormatter;
impl<S, N> FormatEvent<S, N> for CustomJsonFormatter
where
S: Subscriber + for<'a> LookupSpan<'a>,
N: for<'a> FormatFields<'a> + 'static,
{
fn format_event(
&self,
ctx: &FmtContext<'_, S, N>,
mut writer: Writer<'_>,
event: &Event<'_>,
) -> fmt::Result {
let meta = event.metadata();
#[derive(Serialize)]
struct EventFields {
message: String,
level: String,
target: String,
#[serde(flatten)]
spans: Map<String, Value>,
#[serde(flatten)]
fields: Map<String, Value>,
}
let (message, fields, spans) = {
let mut message: Option<String> = None;
let mut fields: Map<String, Value> = Map::new();
let mut spans: Map<String, Value> = Map::new();
struct FieldVisitor<'a> {
message: &'a mut Option<String>,
fields: &'a mut Map<String, Value>,
}
impl<'a> Visit for FieldVisitor<'a> {
fn record_debug(&mut self, field: &Field, value: &dyn std::fmt::Debug) {
let key = field.name();
if key == "message" {
*self.message = Some(format!("{:?}", value));
} else {
// Use typed methods for better performance
self.fields
.insert(key.to_string(), Value::String(format!("{:?}", value)));
}
}
fn record_str(&mut self, field: &Field, value: &str) {
let key = field.name();
if key == "message" {
*self.message = Some(value.to_string());
} else {
self.fields
.insert(key.to_string(), Value::String(value.to_string()));
}
}
fn record_i64(&mut self, field: &Field, value: i64) {
let key = field.name();
if key != "message" {
self.fields.insert(
key.to_string(),
Value::Number(serde_json::Number::from(value)),
);
}
}
fn record_u64(&mut self, field: &Field, value: u64) {
let key = field.name();
if key != "message" {
self.fields.insert(
key.to_string(),
Value::Number(serde_json::Number::from(value)),
);
}
}
fn record_bool(&mut self, field: &Field, value: bool) {
let key = field.name();
if key != "message" {
self.fields.insert(key.to_string(), Value::Bool(value));
}
}
}
let mut visitor = FieldVisitor {
message: &mut message,
fields: &mut fields,
};
event.record(&mut visitor);
// Collect span information from the span hierarchy
if let Some(scope) = ctx.event_scope() {
for span in scope.from_root() {
let span_name = span.metadata().name().to_string();
let mut span_fields: Map<String, Value> = Map::new();
// Try to extract fields from FormattedFields
let ext = span.extensions();
if let Some(formatted_fields) = ext.get::<FormattedFields<N>>() {
// Try to parse as JSON first
if let Ok(json_fields) = serde_json::from_str::<Map<String, Value>>(
formatted_fields.fields.as_str(),
) {
span_fields.extend(json_fields);
} else {
// If not valid JSON, treat the entire field string as a single field
span_fields.insert(
"raw".to_string(),
Value::String(formatted_fields.fields.as_str().to_string()),
);
}
}
// Insert span as a nested object directly into the spans map
spans.insert(span_name, Value::Object(span_fields));
}
}
(message, fields, spans)
};
let json = EventFields {
message: message.unwrap_or_default(),
level: meta.level().to_string(),
target: meta.target().to_string(),
spans,
fields,
};
writeln!(
writer,
"{}",
serde_json::to_string(&json).unwrap_or_else(|_| "{}".to_string())
)
}
}
/// Write the verbosity level with the same coloring/alignment as the Full formatter.
fn write_colored_level(writer: &mut Writer<'_>, level: &Level) -> fmt::Result {
if writer.has_ansi_escapes() {
let paint = match *level {
Level::TRACE => Paint::new("TRACE").magenta(),
Level::DEBUG => Paint::new("DEBUG").blue(),
Level::INFO => Paint::new(" INFO").green(),
Level::WARN => Paint::new(" WARN").yellow(),
Level::ERROR => Paint::new("ERROR").red(),
};
write!(writer, "{}", paint)
} else {
// Right-pad to width 5 like Full's non-ANSI mode
match *level {
Level::TRACE => write!(writer, "{:>5}", "TRACE"),
Level::DEBUG => write!(writer, "{:>5}", "DEBUG"),
Level::INFO => write!(writer, "{:>5}", " INFO"),
Level::WARN => write!(writer, "{:>5}", " WARN"),
Level::ERROR => write!(writer, "{:>5}", "ERROR"),
}
}
}
fn write_dimmed(writer: &mut Writer<'_>, s: impl fmt::Display) -> fmt::Result {
if writer.has_ansi_escapes() {
write!(writer, "{}", Paint::new(s).dim())
} else {
write!(writer, "{}", s)
}
}
fn write_bold(writer: &mut Writer<'_>, s: impl fmt::Display) -> fmt::Result {
if writer.has_ansi_escapes() {
write!(writer, "{}", Paint::new(s).bold())
} else {
write!(writer, "{}", s)
}
}
+14 -1
View File
@@ -1,5 +1,18 @@
pub mod app_state;
pub mod app;
pub mod banner;
pub mod bot;
pub mod calendar;
pub mod cli;
pub mod config;
pub mod data;
pub mod error;
pub mod formatter;
pub mod logging;
pub mod rmp;
pub mod scraper;
pub mod services;
pub mod signals;
pub mod state;
pub mod status;
pub mod utils;
pub mod web;
+47
View File
@@ -0,0 +1,47 @@
use crate::cli::TracingFormat;
use crate::config::Config;
use crate::formatter;
use tracing_subscriber::fmt::format::JsonFields;
use tracing_subscriber::{EnvFilter, FmtSubscriber};
/// Configure and initialize logging for the application
pub fn setup_logging(config: &Config, tracing_format: TracingFormat) {
// Configure logging based on config
// Note: Even when base_level is trace or debug, we suppress trace logs from noisy
// infrastructure modules to keep output readable. These modules use debug for important
// events and trace only for very detailed debugging.
let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| {
let base_level = &config.log_level;
EnvFilter::new(format!(
"warn,banner={},banner::rate_limiter=warn,banner::session=debug,banner::rate_limit_middleware=warn,banner::middleware=debug",
base_level
))
});
// Select formatter based on CLI args
let use_pretty = match tracing_format {
TracingFormat::Pretty => true,
TracingFormat::Json => false,
};
let subscriber: Box<dyn tracing::Subscriber + Send + Sync> = if use_pretty {
Box::new(
FmtSubscriber::builder()
.with_target(true)
.event_format(formatter::CustomPrettyFormatter)
.with_env_filter(filter)
.finish(),
)
} else {
Box::new(
FmtSubscriber::builder()
.with_target(true)
.event_format(formatter::CustomJsonFormatter)
.fmt_fields(JsonFields::new())
.with_env_filter(filter)
.finish(),
)
};
tracing::subscriber::set_global_default(subscriber).expect("setting default subscriber failed");
}
+52 -149
View File
@@ -1,169 +1,72 @@
use serenity::all::{ClientBuilder, GatewayIntents};
use tokio::signal;
use tracing::{debug, error, info, warn};
use tracing_subscriber::{EnvFilter, FmtSubscriber};
use crate::app::App;
use crate::cli::{Args, ServiceName};
use crate::logging::setup_logging;
use clap::Parser;
use std::process::ExitCode;
use tracing::info;
use crate::app_state::AppState;
use crate::banner::BannerApi;
use crate::bot::{Data, get_commands};
use crate::config::Config;
use crate::services::manager::ServiceManager;
use crate::services::{ServiceResult, bot::BotService, run_service};
use figment::{Figment, providers::Env};
mod app_state;
mod app;
mod banner;
mod bot;
mod calendar;
mod cli;
mod config;
mod data;
mod error;
mod formatter;
mod logging;
mod rmp;
mod scraper;
mod services;
mod signals;
mod state;
mod status;
mod web;
#[tokio::main]
async fn main() {
async fn main() -> ExitCode {
dotenvy::dotenv().ok();
// Configure logging
let filter =
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("warn,banner=debug"));
let subscriber = {
#[cfg(debug_assertions)]
{
FmtSubscriber::builder()
}
#[cfg(not(debug_assertions))]
{
FmtSubscriber::builder().json()
}
}
.with_env_filter(filter)
.finish();
tracing::subscriber::set_global_default(subscriber).expect("setting default subscriber failed");
// Parse CLI arguments
let args = Args::parse();
let config: Config = Figment::new()
.merge(Env::prefixed("APP_"))
.extract()
.expect("Failed to load config");
// Always run all services
let enabled_services = ServiceName::all();
// Create BannerApi and AppState
let banner_api =
BannerApi::new(config.banner_base_url.clone()).expect("Failed to create BannerApi");
banner_api
.setup()
.await
.expect("Failed to set up BannerApi session");
// Create and initialize the application
let mut app = App::new().await.expect("Failed to initialize application");
let app_state =
AppState::new(banner_api, &config.redis_url).expect("Failed to create AppState");
// Setup logging — must happen before any info!() calls to avoid silently dropped logs
setup_logging(app.config(), args.tracing);
// Configure the client with your Discord bot token in the environment
let intents = GatewayIntents::non_privileged();
info!(
enabled_services = ?enabled_services,
"services configuration loaded"
);
let bot_target_guild = config.bot_target_guild;
// Log application startup context
info!(
version = env!("CARGO_PKG_VERSION"),
environment = if cfg!(debug_assertions) {
"development"
} else {
"production"
},
"starting banner"
);
let framework = poise::Framework::builder()
.options(poise::FrameworkOptions {
commands: get_commands(),
..Default::default()
})
.setup(move |ctx, _ready, framework| {
let app_state = app_state.clone();
Box::pin(async move {
poise::builtins::register_in_guild(
ctx,
&framework.options().commands,
bot_target_guild.into(),
)
.await?;
poise::builtins::register_globally(ctx, &framework.options().commands).await?;
Ok(Data { app_state })
})
})
.build();
// Setup services (web, scraper)
app.setup_services(&enabled_services)
.expect("Failed to setup services");
let client = ClientBuilder::new(config.bot_token, intents)
.framework(framework)
.await
.expect("Failed to build client");
// Extract shutdown timeout before moving config
let shutdown_timeout = config.shutdown_timeout;
// Create service manager
let mut service_manager = ServiceManager::new();
// Create and add services
let bot_service = Box::new(BotService::new(client));
let bot_handle = tokio::spawn(run_service(bot_service, service_manager.subscribe()));
service_manager.add_service("bot".to_string(), bot_handle);
// Set up CTRL+C signal handling
let ctrl_c = async {
signal::ctrl_c()
// Setup bot service if enabled
if enabled_services.contains(&ServiceName::Bot) {
app.setup_bot_service()
.await
.expect("Failed to install CTRL+C signal handler");
info!("Received CTRL+C, gracefully shutting down...");
};
// Main application loop - wait for services or CTRL+C
let mut exit_code = 0;
let join = |strings: Vec<String>| {
strings
.iter()
.map(|s| format!("\"{}\"", s))
.collect::<Vec<_>>()
.join(", ")
};
tokio::select! {
(service_name, result) = service_manager.run() => {
// A service completed unexpectedly
match result {
ServiceResult::GracefulShutdown => {
info!(service = service_name, "Service completed gracefully");
}
ServiceResult::NormalCompletion => {
warn!(service = service_name, "Service completed unexpectedly");
exit_code = 1;
}
ServiceResult::Error(e) => {
error!(service = service_name, "Service failed: {e}");
exit_code = 1;
}
}
// Shutdown remaining services
match service_manager.shutdown(shutdown_timeout).await {
Ok(()) => {
debug!("Graceful shutdown complete");
}
Err(pending_services) => {
warn!(
"Graceful shutdown elapsed - the following service(s) did not complete: {}",
join(pending_services)
);
exit_code = if exit_code == 0 { 2 } else { exit_code };
}
}
}
_ = ctrl_c => {
// User requested shutdown
match service_manager.shutdown(shutdown_timeout).await {
Ok(()) => {
debug!("Graceful shutdown complete");
}
Err(pending_services) => {
warn!(
"Graceful shutdown elapsed - the following service(s) did not complete: {}",
join(pending_services)
);
exit_code = 2;
}
}
}
.expect("Failed to setup bot service");
}
info!(exit_code = exit_code, "Shutdown complete");
std::process::exit(exit_code);
// Start all services and run the application
app.start_services();
app.run().await
}
+156
View File
@@ -0,0 +1,156 @@
//! RateMyProfessors GraphQL client for bulk professor data sync.
use anyhow::Result;
use serde::{Deserialize, Serialize};
use tracing::{debug, info};
/// UTSA's school ID on RateMyProfessors (base64 of "School-1516").
const UTSA_SCHOOL_ID: &str = "U2Nob29sLTE1MTY=";
/// Basic auth header value (base64 of "test:test").
const AUTH_HEADER: &str = "Basic dGVzdDp0ZXN0";
/// GraphQL endpoint.
const GRAPHQL_URL: &str = "https://www.ratemyprofessors.com/graphql";
/// Page size for paginated fetches.
const PAGE_SIZE: u32 = 100;
/// A professor record from RateMyProfessors.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RmpProfessor {
pub legacy_id: i32,
pub graphql_id: String,
pub first_name: String,
pub last_name: String,
pub department: Option<String>,
pub avg_rating: Option<f32>,
pub avg_difficulty: Option<f32>,
pub num_ratings: i32,
pub would_take_again_pct: Option<f32>,
}
/// Client for fetching professor data from RateMyProfessors.
pub struct RmpClient {
http: reqwest::Client,
}
impl Default for RmpClient {
fn default() -> Self {
Self::new()
}
}
impl RmpClient {
pub fn new() -> Self {
Self {
http: reqwest::Client::new(),
}
}
/// Fetch all professors for UTSA via paginated GraphQL queries.
pub async fn fetch_all_professors(&self) -> Result<Vec<RmpProfessor>> {
let mut all = Vec::new();
let mut cursor: Option<String> = None;
loop {
let after_clause = match &cursor {
Some(c) => format!(r#", after: "{}""#, c),
None => String::new(),
};
let query = format!(
r#"query {{
newSearch {{
teachers(query: {{ text: "", schoolID: "{school_id}" }}, first: {page_size}{after}) {{
edges {{
cursor
node {{
id
legacyId
firstName
lastName
department
avgRating
avgDifficulty
numRatings
wouldTakeAgainPercent
}}
}}
pageInfo {{
hasNextPage
endCursor
}}
}}
}}
}}"#,
school_id = UTSA_SCHOOL_ID,
page_size = PAGE_SIZE,
after = after_clause,
);
let body = serde_json::json!({ "query": query });
let resp = self
.http
.post(GRAPHQL_URL)
.header("Authorization", AUTH_HEADER)
.json(&body)
.send()
.await?;
let status = resp.status();
if !status.is_success() {
let text = resp.text().await.unwrap_or_default();
anyhow::bail!("RMP GraphQL request failed ({status}): {text}");
}
let json: serde_json::Value = resp.json().await?;
let teachers = &json["data"]["newSearch"]["teachers"];
let edges = teachers["edges"]
.as_array()
.ok_or_else(|| anyhow::anyhow!("Missing edges in RMP response"))?;
for edge in edges {
let node = &edge["node"];
let wta = node["wouldTakeAgainPercent"]
.as_f64()
.map(|v| v as f32)
.filter(|&v| v >= 0.0);
all.push(RmpProfessor {
legacy_id: node["legacyId"]
.as_i64()
.ok_or_else(|| anyhow::anyhow!("Missing legacyId"))?
as i32,
graphql_id: node["id"]
.as_str()
.ok_or_else(|| anyhow::anyhow!("Missing id"))?
.to_string(),
first_name: node["firstName"].as_str().unwrap_or_default().to_string(),
last_name: node["lastName"].as_str().unwrap_or_default().to_string(),
department: node["department"].as_str().map(|s| s.to_string()),
avg_rating: node["avgRating"].as_f64().map(|v| v as f32),
avg_difficulty: node["avgDifficulty"].as_f64().map(|v| v as f32),
num_ratings: node["numRatings"].as_i64().unwrap_or(0) as i32,
would_take_again_pct: wta,
});
}
let page_info = &teachers["pageInfo"];
let has_next = page_info["hasNextPage"].as_bool().unwrap_or(false);
if !has_next {
break;
}
cursor = page_info["endCursor"].as_str().map(|s| s.to_string());
debug!(fetched = all.len(), "RMP pagination: fetching next page");
}
info!(total = all.len(), "Fetched all RMP professors");
Ok(all)
}
}
+326
View File
@@ -0,0 +1,326 @@
//! Adaptive scraping interval computation.
//!
//! Assigns per-subject scrape intervals based on recent change rates,
//! consecutive zero-change runs, failure patterns, and time of day.
use chrono::{DateTime, Datelike, Timelike, Utc};
use chrono_tz::US::Central;
use std::time::Duration;
use crate::data::scrape_jobs::SubjectResultStats;
const FLOOR_INTERVAL: Duration = Duration::from_secs(3 * 60);
const MODERATE_HIGH_INTERVAL: Duration = Duration::from_secs(5 * 60);
const MODERATE_LOW_INTERVAL: Duration = Duration::from_secs(15 * 60);
const LOW_CHANGE_INTERVAL: Duration = Duration::from_secs(30 * 60);
const ZERO_5_INTERVAL: Duration = Duration::from_secs(60 * 60);
const ZERO_10_INTERVAL: Duration = Duration::from_secs(2 * 60 * 60);
const CEILING_INTERVAL: Duration = Duration::from_secs(4 * 60 * 60);
const COLD_START_INTERVAL: Duration = FLOOR_INTERVAL;
const PAUSE_PROBE_INTERVAL: Duration = Duration::from_secs(6 * 60 * 60);
const EMPTY_FETCH_PAUSE_THRESHOLD: i64 = 3;
const FAILURE_PAUSE_THRESHOLD: i64 = 5;
/// Aggregated per-subject statistics derived from recent scrape results.
#[derive(Debug, Clone)]
pub struct SubjectStats {
pub subject: String,
pub recent_runs: i64,
pub avg_change_ratio: f64,
pub consecutive_zero_changes: i64,
pub consecutive_empty_fetches: i64,
pub recent_failure_count: i64,
pub recent_success_count: i64,
pub last_completed: DateTime<Utc>,
}
/// Scheduling decision for a subject.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SubjectSchedule {
/// Subject is due for scraping, with the computed interval.
Eligible(Duration),
/// Subject was scraped recently; wait for the remaining cooldown.
Cooldown(Duration),
/// Subject is paused due to repeated empty fetches or failures.
Paused,
/// Subject belongs to a past term and should not be scraped.
ReadOnly,
}
impl From<SubjectResultStats> for SubjectStats {
fn from(row: SubjectResultStats) -> Self {
Self {
subject: row.subject,
recent_runs: row.recent_runs,
avg_change_ratio: row.avg_change_ratio,
consecutive_zero_changes: row.consecutive_zero_changes,
consecutive_empty_fetches: row.consecutive_empty_fetches,
recent_failure_count: row.recent_failure_count,
recent_success_count: row.recent_success_count,
last_completed: row.last_completed,
}
}
}
/// Compute the base interval tier from change-rate statistics.
pub fn compute_base_interval(stats: &SubjectStats) -> Duration {
if stats.recent_runs == 0 {
return COLD_START_INTERVAL;
}
// Consecutive-zero tiers take precedence when change ratio is near zero
if stats.avg_change_ratio < 0.001 {
return match stats.consecutive_zero_changes {
0..5 => LOW_CHANGE_INTERVAL,
5..10 => ZERO_5_INTERVAL,
10..20 => ZERO_10_INTERVAL,
_ => CEILING_INTERVAL,
};
}
match stats.avg_change_ratio {
r if r >= 0.10 => FLOOR_INTERVAL,
r if r >= 0.05 => MODERATE_HIGH_INTERVAL,
r if r >= 0.01 => MODERATE_LOW_INTERVAL,
_ => LOW_CHANGE_INTERVAL,
}
}
/// Return a time-of-day multiplier for the given UTC timestamp.
///
/// Peak hours (weekdays 8am-6pm CT) return 1; off-peak (weekdays 6pm-midnight CT)
/// return 2; night (midnight-8am CT) and weekends return 4.
pub fn time_of_day_multiplier(now: DateTime<Utc>) -> u32 {
let ct = now.with_timezone(&Central);
let weekday = ct.weekday();
let hour = ct.hour();
// Weekends get the slowest multiplier
if matches!(weekday, chrono::Weekday::Sat | chrono::Weekday::Sun) {
return 4;
}
match hour {
8..18 => 1, // peak
18..24 => 2, // off-peak
_ => 4, // night (0..8)
}
}
/// Evaluate whether a subject should be scraped now.
///
/// Combines base interval, time-of-day multiplier, pause detection (empty
/// fetches / consecutive failures), and past-term read-only status.
pub fn evaluate_subject(
stats: &SubjectStats,
now: DateTime<Utc>,
is_past_term: bool,
) -> SubjectSchedule {
if is_past_term {
return SubjectSchedule::ReadOnly;
}
let elapsed = (now - stats.last_completed)
.to_std()
.unwrap_or(Duration::ZERO);
let probe_due = elapsed >= PAUSE_PROBE_INTERVAL;
// Pause on repeated empty fetches
if stats.consecutive_empty_fetches >= EMPTY_FETCH_PAUSE_THRESHOLD {
return if probe_due {
SubjectSchedule::Eligible(PAUSE_PROBE_INTERVAL)
} else {
SubjectSchedule::Paused
};
}
// Pause on all-failures
if stats.recent_success_count == 0 && stats.recent_failure_count >= FAILURE_PAUSE_THRESHOLD {
return if probe_due {
SubjectSchedule::Eligible(PAUSE_PROBE_INTERVAL)
} else {
SubjectSchedule::Paused
};
}
let base = compute_base_interval(stats);
let multiplier = time_of_day_multiplier(now);
let effective = base * multiplier;
if elapsed >= effective {
SubjectSchedule::Eligible(effective)
} else {
let remaining = effective - elapsed;
SubjectSchedule::Cooldown(remaining)
}
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::TimeZone;
/// Create a default `SubjectStats` for testing. Callers mutate fields as needed.
fn make_stats(subject: &str) -> SubjectStats {
SubjectStats {
subject: subject.to_string(),
recent_runs: 10,
avg_change_ratio: 0.0,
consecutive_zero_changes: 0,
consecutive_empty_fetches: 0,
recent_failure_count: 0,
recent_success_count: 10,
last_completed: Utc::now() - chrono::Duration::hours(1),
}
}
// -- compute_base_interval tests --
#[test]
fn test_cold_start_returns_floor() {
let mut stats = make_stats("CS");
stats.recent_runs = 0;
assert_eq!(compute_base_interval(&stats), COLD_START_INTERVAL);
}
#[test]
fn test_high_change_rate() {
let mut stats = make_stats("CS");
stats.avg_change_ratio = 0.15;
assert_eq!(compute_base_interval(&stats), FLOOR_INTERVAL);
}
#[test]
fn test_moderate_high_change() {
let mut stats = make_stats("CS");
stats.avg_change_ratio = 0.07;
assert_eq!(compute_base_interval(&stats), MODERATE_HIGH_INTERVAL);
}
#[test]
fn test_moderate_low_change() {
let mut stats = make_stats("CS");
stats.avg_change_ratio = 0.03;
assert_eq!(compute_base_interval(&stats), MODERATE_LOW_INTERVAL);
}
#[test]
fn test_low_change() {
let mut stats = make_stats("CS");
stats.avg_change_ratio = 0.005;
assert_eq!(compute_base_interval(&stats), LOW_CHANGE_INTERVAL);
}
#[test]
fn test_zero_5_consecutive() {
let mut stats = make_stats("CS");
stats.avg_change_ratio = 0.0;
stats.consecutive_zero_changes = 5;
assert_eq!(compute_base_interval(&stats), ZERO_5_INTERVAL);
}
#[test]
fn test_zero_10_consecutive() {
let mut stats = make_stats("CS");
stats.avg_change_ratio = 0.0;
stats.consecutive_zero_changes = 10;
assert_eq!(compute_base_interval(&stats), ZERO_10_INTERVAL);
}
#[test]
fn test_zero_20_consecutive() {
let mut stats = make_stats("CS");
stats.avg_change_ratio = 0.0;
stats.consecutive_zero_changes = 20;
assert_eq!(compute_base_interval(&stats), CEILING_INTERVAL);
}
// -- evaluate_subject tests --
#[test]
fn test_pause_empty_fetches() {
let mut stats = make_stats("CS");
stats.consecutive_empty_fetches = 3;
stats.last_completed = Utc::now() - chrono::Duration::minutes(10);
let result = evaluate_subject(&stats, Utc::now(), false);
assert_eq!(result, SubjectSchedule::Paused);
}
#[test]
fn test_pause_all_failures() {
let mut stats = make_stats("CS");
stats.recent_success_count = 0;
stats.recent_failure_count = 5;
stats.last_completed = Utc::now() - chrono::Duration::minutes(10);
let result = evaluate_subject(&stats, Utc::now(), false);
assert_eq!(result, SubjectSchedule::Paused);
}
#[test]
fn test_probe_after_pause() {
let mut stats = make_stats("CS");
stats.consecutive_empty_fetches = 5;
stats.last_completed = Utc::now() - chrono::Duration::hours(7);
let result = evaluate_subject(&stats, Utc::now(), false);
assert_eq!(result, SubjectSchedule::Eligible(PAUSE_PROBE_INTERVAL));
}
#[test]
fn test_read_only_past_term() {
let stats = make_stats("CS");
let result = evaluate_subject(&stats, Utc::now(), true);
assert_eq!(result, SubjectSchedule::ReadOnly);
}
#[test]
fn test_cooldown_not_elapsed() {
let mut stats = make_stats("CS");
stats.avg_change_ratio = 0.15; // floor = 3 min
stats.last_completed = Utc::now() - chrono::Duration::seconds(30);
// Use a peak-hours timestamp so multiplier = 1
let peak = Utc.with_ymd_and_hms(2025, 7, 14, 15, 0, 0).unwrap(); // Mon 10am CT
stats.last_completed = peak - chrono::Duration::seconds(30);
let result = evaluate_subject(&stats, peak, false);
assert!(matches!(result, SubjectSchedule::Cooldown(_)));
}
#[test]
fn test_eligible_elapsed() {
let mut stats = make_stats("CS");
stats.avg_change_ratio = 0.15; // floor = 3 min
let peak = Utc.with_ymd_and_hms(2025, 7, 14, 15, 0, 0).unwrap(); // Mon 10am CT
stats.last_completed = peak - chrono::Duration::minutes(5);
let result = evaluate_subject(&stats, peak, false);
assert!(matches!(result, SubjectSchedule::Eligible(_)));
}
// -- time_of_day_multiplier tests --
#[test]
fn test_time_multiplier_peak() {
// Monday 10am CT = 15:00 UTC
let dt = Utc.with_ymd_and_hms(2025, 7, 14, 15, 0, 0).unwrap();
assert_eq!(time_of_day_multiplier(dt), 1);
}
#[test]
fn test_time_multiplier_offpeak() {
// Monday 8pm CT = 01:00 UTC next day, but let's use Tuesday 01:00 UTC = Mon 8pm CT
let dt = Utc.with_ymd_and_hms(2025, 7, 15, 1, 0, 0).unwrap();
assert_eq!(time_of_day_multiplier(dt), 2);
}
#[test]
fn test_time_multiplier_night() {
// 3am CT = 08:00 UTC
let dt = Utc.with_ymd_and_hms(2025, 7, 14, 8, 0, 0).unwrap();
assert_eq!(time_of_day_multiplier(dt), 4);
}
#[test]
fn test_time_multiplier_weekend() {
// Saturday noon CT = 17:00 UTC
let dt = Utc.with_ymd_and_hms(2025, 7, 12, 17, 0, 0).unwrap();
assert_eq!(time_of_day_multiplier(dt), 4);
}
}
+151
View File
@@ -0,0 +1,151 @@
pub mod subject;
use crate::banner::BannerApi;
use crate::data::models::{TargetType, UpsertCounts};
use crate::error::Result;
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use thiserror::Error;
/// Errors that can occur during job parsing
#[derive(Debug, Error)]
pub enum JobParseError {
#[error("Invalid JSON in job payload: {0}")]
InvalidJson(#[from] serde_json::Error),
#[error("Unsupported target type: {0:?}")]
UnsupportedTargetType(TargetType),
}
/// Errors that can occur during job processing
#[derive(Debug, Error)]
pub enum JobError {
#[error("Recoverable error: {0}")]
Recoverable(#[source] anyhow::Error),
#[error("Unrecoverable error: {0}")]
Unrecoverable(#[source] anyhow::Error),
}
/// Common trait interface for all job types
#[async_trait::async_trait]
pub trait Job: Send + Sync {
/// The target type this job handles
#[allow(dead_code)]
fn target_type(&self) -> TargetType;
/// Process the job with the given API client and database pool.
/// Returns upsert effectiveness counts on success.
async fn process(&self, banner_api: &BannerApi, db_pool: &PgPool) -> Result<UpsertCounts>;
/// Get a human-readable description of the job
fn description(&self) -> String;
}
/// Main job enum that dispatches to specific job implementations
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum JobType {
Subject(subject::SubjectJob),
}
impl JobType {
/// Create a job from the target type and payload
pub fn from_target_type_and_payload(
target_type: TargetType,
payload: serde_json::Value,
) -> Result<Self, JobParseError> {
match target_type {
TargetType::Subject => {
let subject_job: subject::SubjectJob =
serde_json::from_value(payload).map_err(JobParseError::InvalidJson)?;
Ok(JobType::Subject(subject_job))
}
_ => Err(JobParseError::UnsupportedTargetType(target_type)),
}
}
/// Convert to a Job trait object
pub fn boxed(self) -> Box<dyn Job> {
match self {
JobType::Subject(job) => Box::new(job),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
// --- Valid dispatch ---
#[test]
fn test_from_target_subject_valid() {
let result =
JobType::from_target_type_and_payload(TargetType::Subject, json!({"subject": "CS"}));
assert!(matches!(result, Ok(JobType::Subject(_))));
}
#[test]
fn test_from_target_subject_empty_string() {
let result =
JobType::from_target_type_and_payload(TargetType::Subject, json!({"subject": ""}));
assert!(matches!(result, Ok(JobType::Subject(_))));
}
// --- Invalid JSON ---
#[test]
fn test_from_target_subject_missing_field() {
let result = JobType::from_target_type_and_payload(TargetType::Subject, json!({}));
assert!(matches!(result, Err(JobParseError::InvalidJson(_))));
}
#[test]
fn test_from_target_subject_wrong_type() {
let result =
JobType::from_target_type_and_payload(TargetType::Subject, json!({"subject": 123}));
assert!(matches!(result, Err(JobParseError::InvalidJson(_))));
}
#[test]
fn test_from_target_subject_null_payload() {
let result = JobType::from_target_type_and_payload(TargetType::Subject, json!(null));
assert!(matches!(result, Err(JobParseError::InvalidJson(_))));
}
// --- Unsupported target types ---
#[test]
fn test_from_target_unsupported_variants() {
let unsupported = [
TargetType::CourseRange,
TargetType::CrnList,
TargetType::SingleCrn,
];
for target_type in unsupported {
let result =
JobType::from_target_type_and_payload(target_type, json!({"subject": "CS"}));
assert!(
matches!(result, Err(JobParseError::UnsupportedTargetType(_))),
"expected UnsupportedTargetType for {target_type:?}"
);
}
}
// --- Error Display ---
#[test]
fn test_job_parse_error_display() {
let invalid_json_err =
JobType::from_target_type_and_payload(TargetType::Subject, json!(null)).unwrap_err();
let display = invalid_json_err.to_string();
assert!(display.contains("Invalid JSON"), "got: {display}");
let unsupported_err =
JobType::from_target_type_and_payload(TargetType::CrnList, json!({})).unwrap_err();
let display = unsupported_err.to_string();
assert!(
display.contains("Unsupported target type"),
"got: {display}"
);
}
}
+58
View File
@@ -0,0 +1,58 @@
use super::Job;
use crate::banner::{BannerApi, SearchQuery, Term};
use crate::data::batch::batch_upsert_courses;
use crate::data::models::{TargetType, UpsertCounts};
use crate::error::Result;
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use tracing::{debug, info};
/// Job implementation for scraping subject data
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SubjectJob {
pub subject: String,
}
impl SubjectJob {
pub fn new(subject: String) -> Self {
Self { subject }
}
}
#[async_trait::async_trait]
impl Job for SubjectJob {
fn target_type(&self) -> TargetType {
TargetType::Subject
}
#[tracing::instrument(skip(self, banner_api, db_pool), fields(subject = %self.subject))]
async fn process(&self, banner_api: &BannerApi, db_pool: &PgPool) -> Result<UpsertCounts> {
let subject_code = &self.subject;
// Get the current term
let term = Term::get_current().inner().to_string();
let query = SearchQuery::new().subject(subject_code).max_results(500);
let search_result = banner_api
.search(&term, &query, "subjectDescription", false)
.await?;
let counts = if let Some(courses_from_api) = search_result.data {
info!(
subject = %subject_code,
count = courses_from_api.len(),
"Found courses"
);
batch_upsert_courses(&courses_from_api, db_pool).await?
} else {
UpsertCounts::default()
};
debug!(subject = %subject_code, "Subject job completed");
Ok(counts)
}
fn description(&self) -> String {
format!("Scrape subject: {}", self.subject)
}
}
+156
View File
@@ -0,0 +1,156 @@
pub mod adaptive;
pub mod jobs;
pub mod scheduler;
pub mod worker;
use crate::banner::BannerApi;
use crate::data::scrape_jobs;
use crate::services::Service;
use crate::state::ReferenceCache;
use crate::status::{ServiceStatus, ServiceStatusRegistry};
use crate::web::ws::ScrapeJobEvent;
use sqlx::PgPool;
use std::sync::Arc;
use tokio::sync::{RwLock, broadcast};
use tokio::task::JoinHandle;
use tracing::{info, warn};
use self::scheduler::Scheduler;
use self::worker::Worker;
/// The main service that will be managed by the application's `ServiceManager`.
///
/// It holds the shared resources (database pool, API client) and manages the
/// lifecycle of the Scheduler and Worker tasks.
pub struct ScraperService {
db_pool: PgPool,
banner_api: Arc<BannerApi>,
reference_cache: Arc<RwLock<ReferenceCache>>,
service_statuses: ServiceStatusRegistry,
job_events_tx: broadcast::Sender<ScrapeJobEvent>,
scheduler_handle: Option<JoinHandle<()>>,
worker_handles: Vec<JoinHandle<()>>,
shutdown_tx: Option<broadcast::Sender<()>>,
}
impl ScraperService {
/// Creates a new `ScraperService`.
pub fn new(
db_pool: PgPool,
banner_api: Arc<BannerApi>,
reference_cache: Arc<RwLock<ReferenceCache>>,
service_statuses: ServiceStatusRegistry,
job_events_tx: broadcast::Sender<ScrapeJobEvent>,
) -> Self {
Self {
db_pool,
banner_api,
reference_cache,
service_statuses,
job_events_tx,
scheduler_handle: None,
worker_handles: Vec::new(),
shutdown_tx: None,
}
}
/// Starts the scheduler and a pool of workers.
///
/// Force-unlocks any jobs left locked by a previous unclean shutdown before
/// spawning workers, so those jobs re-enter the queue immediately.
pub async fn start(&mut self) {
// Recover jobs left locked by a previous crash/unclean shutdown
match scrape_jobs::force_unlock_all(&self.db_pool).await {
Ok(0) => {}
Ok(count) => warn!(count, "Force-unlocked stale jobs from previous run"),
Err(e) => warn!(error = ?e, "Failed to force-unlock stale jobs"),
}
info!("ScraperService starting");
// Create shutdown channel
let (shutdown_tx, _) = broadcast::channel(1);
self.shutdown_tx = Some(shutdown_tx.clone());
let scheduler = Scheduler::new(
self.db_pool.clone(),
self.banner_api.clone(),
self.reference_cache.clone(),
self.job_events_tx.clone(),
);
let shutdown_rx = shutdown_tx.subscribe();
let scheduler_handle = tokio::spawn(async move {
scheduler.run(shutdown_rx).await;
});
self.scheduler_handle = Some(scheduler_handle);
info!("Scheduler task spawned");
let worker_count = 4; // This could be configurable
for i in 0..worker_count {
let worker = Worker::new(
i,
self.db_pool.clone(),
self.banner_api.clone(),
self.job_events_tx.clone(),
);
let shutdown_rx = shutdown_tx.subscribe();
let worker_handle = tokio::spawn(async move {
worker.run(shutdown_rx).await;
});
self.worker_handles.push(worker_handle);
}
info!(
worker_count = self.worker_handles.len(),
"Spawned worker tasks"
);
self.service_statuses.set("scraper", ServiceStatus::Active);
}
}
#[async_trait::async_trait]
impl Service for ScraperService {
fn name(&self) -> &'static str {
"scraper"
}
async fn run(&mut self) -> Result<(), anyhow::Error> {
self.start().await;
std::future::pending::<()>().await;
Ok(())
}
async fn shutdown(&mut self) -> Result<(), anyhow::Error> {
self.service_statuses
.set("scraper", ServiceStatus::Disabled);
info!("Shutting down scraper service");
// Send shutdown signal to all tasks
if let Some(shutdown_tx) = self.shutdown_tx.take() {
let _ = shutdown_tx.send(());
} else {
warn!("No shutdown channel found for scraper service");
return Err(anyhow::anyhow!("No shutdown channel available"));
}
// Collect all handles
let mut all_handles = Vec::new();
if let Some(handle) = self.scheduler_handle.take() {
all_handles.push(handle);
}
all_handles.append(&mut self.worker_handles);
// Wait for all tasks to complete (no internal timeout - let ServiceManager handle it)
let results = futures::future::join_all(all_handles).await;
let failed = results.iter().filter(|r| r.is_err()).count();
if failed > 0 {
warn!(
failed_count = failed,
"Some scraper tasks panicked during shutdown"
);
return Err(anyhow::anyhow!("{} task(s) panicked", failed));
}
info!("All scraper tasks shutdown gracefully");
Ok(())
}
}
+426
View File
@@ -0,0 +1,426 @@
use crate::banner::{BannerApi, Term};
use crate::data::models::{ReferenceData, ScrapePriority, TargetType};
use crate::data::scrape_jobs;
use crate::error::Result;
use crate::rmp::RmpClient;
use crate::scraper::adaptive::{SubjectSchedule, SubjectStats, evaluate_subject};
use crate::scraper::jobs::subject::SubjectJob;
use crate::state::ReferenceCache;
use crate::web::ws::{ScrapeJobDto, ScrapeJobEvent};
use chrono::{DateTime, Utc};
use serde_json::json;
use sqlx::PgPool;
use std::collections::HashMap;
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::sync::{RwLock, broadcast};
use tokio::time;
use tokio_util::sync::CancellationToken;
use tracing::{debug, error, info, warn};
/// How often reference data is re-scraped (6 hours).
const REFERENCE_DATA_INTERVAL: Duration = Duration::from_secs(6 * 60 * 60);
/// How often RMP data is synced (24 hours).
const RMP_SYNC_INTERVAL: Duration = Duration::from_secs(24 * 60 * 60);
/// Periodically analyzes data and enqueues prioritized scrape jobs.
pub struct Scheduler {
db_pool: PgPool,
banner_api: Arc<BannerApi>,
reference_cache: Arc<RwLock<ReferenceCache>>,
job_events_tx: broadcast::Sender<ScrapeJobEvent>,
}
impl Scheduler {
pub fn new(
db_pool: PgPool,
banner_api: Arc<BannerApi>,
reference_cache: Arc<RwLock<ReferenceCache>>,
job_events_tx: broadcast::Sender<ScrapeJobEvent>,
) -> Self {
Self {
db_pool,
banner_api,
reference_cache,
job_events_tx,
}
}
/// Runs the scheduler's main loop with graceful shutdown support.
///
/// The scheduler wakes up every 60 seconds to analyze data and enqueue jobs.
/// When a shutdown signal is received:
/// 1. Any in-progress scheduling work is gracefully cancelled via CancellationToken
/// 2. The scheduler waits up to 5 seconds for work to complete
/// 3. If timeout occurs, the task is abandoned (it will be aborted when dropped)
///
/// This ensures that shutdown is responsive even if scheduling work is blocked.
pub async fn run(&self, mut shutdown_rx: broadcast::Receiver<()>) {
info!("Scheduler service started");
let work_interval = Duration::from_secs(60);
let mut next_run = time::Instant::now();
let mut current_work: Option<(tokio::task::JoinHandle<()>, CancellationToken)> = None;
// Scrape reference data immediately on first cycle
let mut last_ref_scrape = Instant::now() - REFERENCE_DATA_INTERVAL;
// Sync RMP data immediately on first cycle
let mut last_rmp_sync = Instant::now() - RMP_SYNC_INTERVAL;
loop {
tokio::select! {
_ = time::sleep_until(next_run) => {
let cancel_token = CancellationToken::new();
let should_scrape_ref = last_ref_scrape.elapsed() >= REFERENCE_DATA_INTERVAL;
let should_sync_rmp = last_rmp_sync.elapsed() >= RMP_SYNC_INTERVAL;
// Spawn work in separate task to allow graceful cancellation during shutdown.
let work_handle = tokio::spawn({
let db_pool = self.db_pool.clone();
let banner_api = self.banner_api.clone();
let cancel_token = cancel_token.clone();
let reference_cache = self.reference_cache.clone();
let job_events_tx = self.job_events_tx.clone();
async move {
tokio::select! {
_ = async {
// RMP sync is independent of Banner API — run it
// concurrently with reference data scraping so it
// doesn't wait behind rate-limited Banner calls.
let rmp_fut = async {
if should_sync_rmp
&& let Err(e) = Self::sync_rmp_data(&db_pool).await
{
error!(error = ?e, "Failed to sync RMP data");
}
};
let ref_fut = async {
if should_scrape_ref
&& let Err(e) = Self::scrape_reference_data(&db_pool, &banner_api, &reference_cache).await
{
error!(error = ?e, "Failed to scrape reference data");
}
};
tokio::join!(rmp_fut, ref_fut);
if let Err(e) = Self::schedule_jobs_impl(&db_pool, &banner_api, Some(&job_events_tx)).await {
error!(error = ?e, "Failed to schedule jobs");
}
} => {}
_ = cancel_token.cancelled() => {
debug!("Scheduling work cancelled gracefully");
}
}
}
});
if should_scrape_ref {
last_ref_scrape = Instant::now();
}
if should_sync_rmp {
last_rmp_sync = Instant::now();
}
current_work = Some((work_handle, cancel_token));
next_run = time::Instant::now() + work_interval;
}
_ = shutdown_rx.recv() => {
info!("Scheduler received shutdown signal");
if let Some((handle, cancel_token)) = current_work.take() {
cancel_token.cancel();
// Wait briefly for graceful completion
if tokio::time::timeout(Duration::from_secs(5), handle).await.is_err() {
warn!("Scheduling work did not complete within 5s, abandoning");
} else {
debug!("Scheduling work completed gracefully");
}
}
info!("Scheduler exiting gracefully");
break;
}
}
}
}
/// Core scheduling logic that analyzes data and creates scrape jobs.
///
/// Uses adaptive scheduling to determine per-subject scrape intervals based
/// on recent change rates, failure patterns, and time of day. Only subjects
/// that are eligible (i.e. their cooldown has elapsed) are enqueued.
///
/// This is a static method (not &self) to allow it to be called from spawned tasks.
#[tracing::instrument(skip_all, fields(term))]
async fn schedule_jobs_impl(
db_pool: &PgPool,
banner_api: &BannerApi,
job_events_tx: Option<&broadcast::Sender<ScrapeJobEvent>>,
) -> Result<()> {
let term = Term::get_current().inner().to_string();
tracing::Span::current().record("term", term.as_str());
debug!(term = term, "Enqueuing subject jobs");
let subjects = banner_api.get_subjects("", &term, 1, 500).await?;
debug!(
subject_count = subjects.len(),
"Retrieved subjects from API"
);
// Fetch per-subject stats and build a lookup map
let stats_rows = scrape_jobs::fetch_subject_stats(db_pool).await?;
let stats_map: HashMap<String, SubjectStats> = stats_rows
.into_iter()
.map(|row| {
let subject = row.subject.clone();
(subject, SubjectStats::from(row))
})
.collect();
// Evaluate each subject using adaptive scheduling
let now = Utc::now();
let is_past_term = false; // Scheduler currently only fetches current term subjects
let mut eligible_subjects: Vec<String> = Vec::new();
let mut cooldown_count: usize = 0;
let mut paused_count: usize = 0;
let mut read_only_count: usize = 0;
for subject in &subjects {
let stats = stats_map.get(&subject.code).cloned().unwrap_or_else(|| {
// Cold start: no history for this subject
SubjectStats {
subject: subject.code.clone(),
recent_runs: 0,
avg_change_ratio: 0.0,
consecutive_zero_changes: 0,
consecutive_empty_fetches: 0,
recent_failure_count: 0,
recent_success_count: 0,
last_completed: DateTime::<Utc>::MIN_UTC,
}
});
match evaluate_subject(&stats, now, is_past_term) {
SubjectSchedule::Eligible(_) => {
eligible_subjects.push(subject.code.clone());
}
SubjectSchedule::Cooldown(_) => cooldown_count += 1,
SubjectSchedule::Paused => paused_count += 1,
SubjectSchedule::ReadOnly => read_only_count += 1,
}
}
info!(
total = subjects.len(),
eligible = eligible_subjects.len(),
cooldown = cooldown_count,
paused = paused_count,
read_only = read_only_count,
"Adaptive scheduling decisions"
);
if eligible_subjects.is_empty() {
debug!("No eligible subjects to schedule");
return Ok(());
}
// Create payloads only for eligible subjects
let subject_payloads: Vec<_> = eligible_subjects
.iter()
.map(|code| json!({ "subject": code }))
.collect();
// Query existing jobs for eligible subjects only
let existing_payloads = scrape_jobs::find_existing_job_payloads(
TargetType::Subject,
&subject_payloads,
db_pool,
)
.await?;
// Filter out subjects that already have pending jobs
let mut skipped_count = 0;
let new_jobs: Vec<_> = eligible_subjects
.into_iter()
.filter_map(|subject_code| {
let job = SubjectJob::new(subject_code.clone());
let payload = serde_json::to_value(&job).unwrap();
let payload_str = payload.to_string();
if existing_payloads.contains(&payload_str) {
skipped_count += 1;
None
} else {
Some((payload, subject_code))
}
})
.collect();
if skipped_count > 0 {
debug!(count = skipped_count, "Skipped subjects with existing jobs");
}
// Insert all new jobs in a single batch
if !new_jobs.is_empty() {
for (_, subject_code) in &new_jobs {
debug!(subject = subject_code, "New job enqueued for subject");
}
let jobs: Vec<_> = new_jobs
.into_iter()
.map(|(payload, _)| (payload, TargetType::Subject, ScrapePriority::Low))
.collect();
let inserted = scrape_jobs::batch_insert_jobs(&jobs, db_pool).await?;
if let Some(tx) = job_events_tx {
inserted.iter().for_each(|job| {
debug!(job_id = job.id, "Emitting JobCreated event");
let _ = tx.send(ScrapeJobEvent::JobCreated {
job: ScrapeJobDto::from(job),
});
});
}
}
debug!("Job scheduling complete");
Ok(())
}
/// Fetch all RMP professors, upsert to DB, and auto-match against Banner instructors.
#[tracing::instrument(skip_all)]
async fn sync_rmp_data(db_pool: &PgPool) -> Result<()> {
info!("Starting RMP data sync");
let client = RmpClient::new();
let professors = client.fetch_all_professors().await?;
let total = professors.len();
crate::data::rmp::batch_upsert_rmp_professors(&professors, db_pool).await?;
info!(total, "RMP professors upserted");
let stats = crate::data::rmp_matching::generate_candidates(db_pool).await?;
info!(
total,
stats.total_unmatched,
stats.candidates_created,
stats.candidates_rescored,
stats.auto_matched,
stats.skipped_unparseable,
stats.skipped_no_candidates,
"RMP sync complete"
);
Ok(())
}
/// Scrape all reference data categories from Banner and upsert to DB, then refresh cache.
#[tracing::instrument(skip_all)]
async fn scrape_reference_data(
db_pool: &PgPool,
banner_api: &BannerApi,
reference_cache: &Arc<RwLock<ReferenceCache>>,
) -> Result<()> {
let term = Term::get_current().inner().to_string();
info!(term = %term, "Scraping reference data");
let mut all_entries = Vec::new();
// Terms (fetched via session pool, no active session needed)
match banner_api.sessions.get_terms("", 1, 500).await {
Ok(terms) => {
debug!(count = terms.len(), "Fetched terms");
all_entries.extend(terms.into_iter().map(|t| ReferenceData {
category: "term".to_string(),
code: t.code,
description: t.description,
}));
}
Err(e) => warn!(error = ?e, "Failed to fetch terms"),
}
// Subjects
match banner_api.get_subjects("", &term, 1, 500).await {
Ok(pairs) => {
debug!(count = pairs.len(), "Fetched subjects");
all_entries.extend(pairs.into_iter().map(|p| ReferenceData {
category: "subject".to_string(),
code: p.code,
description: p.description,
}));
}
Err(e) => warn!(error = ?e, "Failed to fetch subjects"),
}
// Campuses
match banner_api.get_campuses(&term).await {
Ok(pairs) => {
debug!(count = pairs.len(), "Fetched campuses");
all_entries.extend(pairs.into_iter().map(|p| ReferenceData {
category: "campus".to_string(),
code: p.code,
description: p.description,
}));
}
Err(e) => warn!(error = ?e, "Failed to fetch campuses"),
}
// Instructional methods
match banner_api.get_instructional_methods(&term).await {
Ok(pairs) => {
debug!(count = pairs.len(), "Fetched instructional methods");
all_entries.extend(pairs.into_iter().map(|p| ReferenceData {
category: "instructional_method".to_string(),
code: p.code,
description: p.description,
}));
}
Err(e) => warn!(error = ?e, "Failed to fetch instructional methods"),
}
// Parts of term
match banner_api.get_parts_of_term(&term).await {
Ok(pairs) => {
debug!(count = pairs.len(), "Fetched parts of term");
all_entries.extend(pairs.into_iter().map(|p| ReferenceData {
category: "part_of_term".to_string(),
code: p.code,
description: p.description,
}));
}
Err(e) => warn!(error = ?e, "Failed to fetch parts of term"),
}
// Attributes
match banner_api.get_attributes(&term).await {
Ok(pairs) => {
debug!(count = pairs.len(), "Fetched attributes");
all_entries.extend(pairs.into_iter().map(|p| ReferenceData {
category: "attribute".to_string(),
code: p.code,
description: p.description,
}));
}
Err(e) => warn!(error = ?e, "Failed to fetch attributes"),
}
// Batch upsert all entries
let total = all_entries.len();
crate::data::reference::batch_upsert(&all_entries, db_pool).await?;
info!(total_entries = total, "Reference data upserted to DB");
// Refresh in-memory cache
let all = crate::data::reference::get_all(db_pool).await?;
let count = all.len();
*reference_cache.write().await = ReferenceCache::from_entries(all);
info!(entries = count, "Reference cache refreshed");
Ok(())
}
}
+425
View File
@@ -0,0 +1,425 @@
use crate::banner::{BannerApi, BannerApiError};
use crate::data::models::{ScrapeJob, ScrapeJobStatus, UpsertCounts};
use crate::data::scrape_jobs;
use crate::error::Result;
use crate::scraper::jobs::{JobError, JobType};
use crate::web::ws::ScrapeJobEvent;
use chrono::{DateTime, Utc};
use sqlx::PgPool;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::broadcast;
use tokio::time;
use tracing::{Instrument, debug, error, info, trace, warn};
/// Maximum time a single job is allowed to run before being considered stuck.
const JOB_TIMEOUT: Duration = Duration::from_secs(5 * 60);
/// A single worker instance.
///
/// Each worker runs in its own asynchronous task and continuously polls the
/// database for scrape jobs to execute.
pub struct Worker {
id: usize, // For logging purposes
db_pool: PgPool,
banner_api: Arc<BannerApi>,
job_events_tx: broadcast::Sender<ScrapeJobEvent>,
}
impl Worker {
pub fn new(
id: usize,
db_pool: PgPool,
banner_api: Arc<BannerApi>,
job_events_tx: broadcast::Sender<ScrapeJobEvent>,
) -> Self {
Self {
id,
db_pool,
banner_api,
job_events_tx,
}
}
/// Runs the worker's main loop.
pub async fn run(&self, mut shutdown_rx: broadcast::Receiver<()>) {
info!(worker_id = self.id, "Worker started");
loop {
// Fetch and lock a job, racing against shutdown signal
let job = tokio::select! {
_ = shutdown_rx.recv() => {
info!(worker_id = self.id, "Worker received shutdown signal, exiting gracefully");
break;
}
result = self.fetch_and_lock_job() => {
match result {
Ok(Some(job)) => job,
Ok(None) => {
trace!(worker_id = self.id, "No jobs available, waiting");
time::sleep(Duration::from_secs(5)).await;
continue;
}
Err(e) => {
warn!(worker_id = self.id, error = ?e, "Failed to fetch job, waiting");
time::sleep(Duration::from_secs(10)).await;
continue;
}
}
}
};
let job_id = job.id;
let retry_count = job.retry_count;
let max_retries = job.max_retries;
let target_type = job.target_type;
let payload = job.target_payload.clone();
let priority = job.priority;
let queued_at = job.queued_at;
let started_at = Utc::now();
let start = std::time::Instant::now();
// Emit JobLocked event
let locked_at = started_at.to_rfc3339();
debug!(job_id, "Emitting JobLocked event");
let _ = self.job_events_tx.send(ScrapeJobEvent::JobLocked {
id: job_id,
locked_at,
status: ScrapeJobStatus::Processing,
});
// Process the job, racing against shutdown signal and timeout
let process_result = tokio::select! {
_ = shutdown_rx.recv() => {
self.handle_shutdown_during_processing(job_id).await;
break;
}
result = async {
match time::timeout(JOB_TIMEOUT, self.process_job(job)).await {
Ok(result) => result,
Err(_elapsed) => {
Err(JobError::Recoverable(anyhow::anyhow!(
"job timed out after {}s",
JOB_TIMEOUT.as_secs()
)))
}
}
} => result
};
let duration = start.elapsed();
// Handle the job processing result
self.handle_job_result(
job_id,
retry_count,
max_retries,
process_result,
duration,
target_type,
payload,
priority,
queued_at,
started_at,
)
.await;
}
}
/// Atomically fetches a job from the queue, locking it for processing.
///
/// This uses a `FOR UPDATE SKIP LOCKED` query to ensure that multiple
/// workers can poll the queue concurrently without conflicts.
async fn fetch_and_lock_job(&self) -> Result<Option<ScrapeJob>> {
scrape_jobs::fetch_and_lock_job(&self.db_pool).await
}
async fn process_job(&self, job: ScrapeJob) -> Result<UpsertCounts, JobError> {
// Convert the database job to our job type
let job_type = JobType::from_target_type_and_payload(job.target_type, job.target_payload)
.map_err(|e| JobError::Unrecoverable(anyhow::anyhow!(e)))?; // Parse errors are unrecoverable
// Get the job implementation
let job_impl = job_type.boxed();
// Create span with job context
let span = tracing::info_span!(
"process_job",
job_id = job.id,
job_type = job_impl.description()
);
async move {
debug!(
worker_id = self.id,
job_id = job.id,
description = job_impl.description(),
"Processing job"
);
// Process the job - API errors are recoverable
job_impl
.process(&self.banner_api, &self.db_pool)
.await
.map_err(JobError::Recoverable)
}
.instrument(span)
.await
}
async fn delete_job(&self, job_id: i32) -> Result<()> {
scrape_jobs::delete_job(job_id, &self.db_pool).await
}
async fn unlock_job(&self, job_id: i32) -> Result<()> {
scrape_jobs::unlock_job(job_id, &self.db_pool).await
}
async fn unlock_and_increment_retry(
&self,
job_id: i32,
max_retries: i32,
) -> Result<Option<chrono::DateTime<chrono::Utc>>> {
scrape_jobs::unlock_and_increment_retry(job_id, max_retries, &self.db_pool).await
}
/// Handle shutdown signal received during job processing
async fn handle_shutdown_during_processing(&self, job_id: i32) {
info!(
worker_id = self.id,
job_id, "Shutdown received during job processing"
);
if let Err(e) = self.unlock_job(job_id).await {
warn!(
worker_id = self.id,
job_id,
error = ?e,
"Failed to unlock job during shutdown"
);
} else {
debug!(worker_id = self.id, job_id, "Job unlocked during shutdown");
}
info!(worker_id = self.id, "Worker exiting gracefully");
}
/// Handle the result of job processing
#[allow(clippy::too_many_arguments)]
async fn handle_job_result(
&self,
job_id: i32,
retry_count: i32,
max_retries: i32,
result: Result<UpsertCounts, JobError>,
duration: std::time::Duration,
target_type: crate::data::models::TargetType,
payload: serde_json::Value,
priority: crate::data::models::ScrapePriority,
queued_at: DateTime<Utc>,
started_at: DateTime<Utc>,
) {
let duration_ms = duration.as_millis() as i32;
match result {
Ok(counts) => {
debug!(
worker_id = self.id,
job_id,
duration_ms = duration.as_millis(),
courses_fetched = counts.courses_fetched,
courses_changed = counts.courses_changed,
courses_unchanged = counts.courses_unchanged,
"Job completed successfully"
);
// Log the result
if let Err(e) = scrape_jobs::insert_job_result(
target_type,
payload,
priority,
queued_at,
started_at,
duration_ms,
true,
None,
retry_count,
Some(&counts),
&self.db_pool,
)
.await
{
error!(worker_id = self.id, job_id, error = ?e, "Failed to insert job result");
}
if let Err(e) = self.delete_job(job_id).await {
error!(worker_id = self.id, job_id, error = ?e, "Failed to delete completed job");
}
debug!(job_id, "Emitting JobCompleted event");
let _ = self
.job_events_tx
.send(ScrapeJobEvent::JobCompleted { id: job_id });
}
Err(JobError::Recoverable(e)) => {
self.handle_recoverable_error(
job_id,
retry_count,
max_retries,
e,
duration,
target_type,
payload,
priority,
queued_at,
started_at,
)
.await;
}
Err(JobError::Unrecoverable(e)) => {
// Log the failed result
let err_msg = format!("{e:#}");
if let Err(log_err) = scrape_jobs::insert_job_result(
target_type,
payload,
priority,
queued_at,
started_at,
duration_ms,
false,
Some(&err_msg),
retry_count,
None,
&self.db_pool,
)
.await
{
error!(worker_id = self.id, job_id, error = ?log_err, "Failed to insert job result");
}
error!(
worker_id = self.id,
job_id,
duration_ms = duration.as_millis(),
error = ?e,
"Job corrupted, deleting"
);
if let Err(e) = self.delete_job(job_id).await {
error!(worker_id = self.id, job_id, error = ?e, "Failed to delete corrupted job");
}
debug!(job_id, "Emitting JobDeleted event");
let _ = self
.job_events_tx
.send(ScrapeJobEvent::JobDeleted { id: job_id });
}
}
}
/// Handle recoverable errors by logging appropriately and unlocking the job
#[allow(clippy::too_many_arguments)]
async fn handle_recoverable_error(
&self,
job_id: i32,
retry_count: i32,
max_retries: i32,
e: anyhow::Error,
duration: std::time::Duration,
target_type: crate::data::models::TargetType,
payload: serde_json::Value,
priority: crate::data::models::ScrapePriority,
queued_at: DateTime<Utc>,
started_at: DateTime<Utc>,
) {
let next_attempt = retry_count.saturating_add(1);
let remaining_retries = max_retries.saturating_sub(next_attempt);
// Log the error appropriately based on type
if let Some(BannerApiError::InvalidSession(_)) = e.downcast_ref::<BannerApiError>() {
warn!(
worker_id = self.id,
job_id,
duration_ms = duration.as_millis(),
retry_attempt = next_attempt,
max_retries = max_retries,
remaining_retries = remaining_retries,
"Invalid session detected, will retry"
);
} else {
error!(
worker_id = self.id,
job_id,
duration_ms = duration.as_millis(),
retry_attempt = next_attempt,
max_retries = max_retries,
remaining_retries = remaining_retries,
error = ?e,
"Failed to process job, will retry"
);
}
// Atomically unlock and increment retry count, checking if retry is allowed
match self.unlock_and_increment_retry(job_id, max_retries).await {
Ok(Some(new_queued_at)) => {
debug!(
worker_id = self.id,
job_id,
retry_attempt = next_attempt,
remaining_retries = remaining_retries,
"Job unlocked for retry"
);
debug!(job_id, "Emitting JobRetried event");
let _ = self.job_events_tx.send(ScrapeJobEvent::JobRetried {
id: job_id,
retry_count: next_attempt,
queued_at: new_queued_at.to_rfc3339(),
status: ScrapeJobStatus::Pending,
});
// Don't log a result yet — the job will be retried
}
Ok(None) => {
// Max retries exceeded — log final failure result
let duration_ms = duration.as_millis() as i32;
let err_msg = format!("{e:#}");
if let Err(log_err) = scrape_jobs::insert_job_result(
target_type,
payload,
priority,
queued_at,
started_at,
duration_ms,
false,
Some(&err_msg),
next_attempt,
None,
&self.db_pool,
)
.await
{
error!(worker_id = self.id, job_id, error = ?log_err, "Failed to insert job result");
}
error!(
worker_id = self.id,
job_id,
duration_ms = duration.as_millis(),
retry_count = next_attempt,
max_retries = max_retries,
error = ?e,
"Job failed permanently (max retries exceeded), deleting"
);
if let Err(e) = self.delete_job(job_id).await {
error!(worker_id = self.id, job_id, error = ?e, "Failed to delete failed job");
}
debug!(job_id, "Emitting JobExhausted and JobDeleted events");
let _ = self
.job_events_tx
.send(ScrapeJobEvent::JobExhausted { id: job_id });
let _ = self
.job_events_tx
.send(ScrapeJobEvent::JobDeleted { id: job_id });
}
Err(e) => {
error!(worker_id = self.id, job_id, error = ?e, "Failed to unlock and increment retry count");
}
}
}
}
+212 -4
View File
@@ -1,20 +1,205 @@
use super::Service;
use crate::bot::{Data, get_commands};
use crate::config::Config;
use crate::state::AppState;
use crate::status::{ServiceStatus, ServiceStatusRegistry};
use num_format::{Locale, ToFormattedString};
use serenity::Client;
use serenity::all::{ActivityData, ClientBuilder, GatewayIntents};
use std::sync::Arc;
use tracing::{error, warn};
use std::time::Duration;
use tokio::sync::{Mutex, broadcast};
use tokio::task::JoinHandle;
use tracing::{debug, error, info, warn};
/// Discord bot service implementation
pub struct BotService {
client: Client,
shard_manager: Arc<serenity::gateway::ShardManager>,
status_task_handle: Arc<Mutex<Option<JoinHandle<()>>>>,
status_shutdown_tx: Option<broadcast::Sender<()>>,
service_statuses: ServiceStatusRegistry,
}
impl BotService {
pub fn new(client: Client) -> Self {
/// Create a new Discord bot client with full configuration
pub async fn create_client(
config: &Config,
app_state: AppState,
status_task_handle: Arc<Mutex<Option<JoinHandle<()>>>>,
status_shutdown_rx: broadcast::Receiver<()>,
) -> Result<Client, anyhow::Error> {
let intents = GatewayIntents::non_privileged();
let bot_target_guild = config.bot_target_guild;
let framework = poise::Framework::builder()
.options(poise::FrameworkOptions {
commands: get_commands(),
pre_command: |ctx| {
Box::pin(async move {
let content = match ctx {
poise::Context::Application(_) => ctx.invocation_string(),
poise::Context::Prefix(prefix) => prefix.msg.content.to_string(),
};
let channel_name = ctx
.channel_id()
.name(ctx.http())
.await
.unwrap_or("unknown".to_string());
let span = tracing::Span::current();
span.record("command_name", ctx.command().qualified_name.as_str());
span.record("invocation", ctx.invocation_string());
span.record("msg.content", content.as_str());
span.record("msg.author", ctx.author().tag().as_str());
span.record("msg.id", ctx.id());
span.record("msg.channel_id", ctx.channel_id().get());
span.record("msg.channel", channel_name.as_str());
tracing::info!(
command_name = ctx.command().qualified_name.as_str(),
invocation = ctx.invocation_string(),
msg.content = %content,
msg.author = %ctx.author().tag(),
msg.author_id = %ctx.author().id,
msg.id = %ctx.id(),
msg.channel = %channel_name.as_str(),
msg.channel_id = %ctx.channel_id(),
"{} invoked by {}",
ctx.command().name,
ctx.author().tag()
);
})
},
on_error: |error| {
Box::pin(async move {
if let Err(e) = poise::builtins::on_error(error).await {
tracing::error!(error = %e, "Fatal error while sending error message");
}
})
},
..Default::default()
})
.setup(move |ctx, _ready, framework| {
let app_state = app_state.clone();
let status_task_handle = status_task_handle.clone();
Box::pin(async move {
poise::builtins::register_in_guild(
ctx,
&framework.options().commands,
bot_target_guild.into(),
)
.await?;
poise::builtins::register_globally(ctx, &framework.options().commands).await?;
// Start status update task with shutdown support
let handle = Self::start_status_update_task(
ctx.clone(),
app_state.clone(),
status_shutdown_rx,
);
*status_task_handle.lock().await = Some(handle);
app_state.service_statuses.set("bot", ServiceStatus::Active);
Ok(Data { app_state })
})
})
.build();
Ok(ClientBuilder::new(config.bot_token.clone(), intents)
.framework(framework)
.await?)
}
/// Start the status update task for the Discord bot with graceful shutdown support
fn start_status_update_task(
ctx: serenity::client::Context,
app_state: AppState,
mut shutdown_rx: broadcast::Receiver<()>,
) -> JoinHandle<()> {
tokio::spawn(async move {
let max_interval = Duration::from_secs(300); // 5 minutes
let base_interval = Duration::from_secs(30);
let mut interval = tokio::time::interval(base_interval);
let mut previous_course_count: Option<i64> = None;
// This runs once immediately on startup, then with adaptive intervals
loop {
tokio::select! {
_ = interval.tick() => {
// Get the course count, update the activity if it has changed/hasn't been set this session
let course_count = match app_state.get_course_count().await {
Ok(count) => count,
Err(e) => {
warn!(error = %e, "Failed to fetch course count for status update");
continue;
}
};
if previous_course_count.is_none() || previous_course_count != Some(course_count) {
ctx.set_activity(Some(ActivityData::playing(format!(
"Querying {:} classes",
course_count.to_formatted_string(&Locale::en)
))));
}
// Increase or reset the interval
interval = tokio::time::interval(
// Avoid logging the first 'change'
if course_count != previous_course_count.unwrap_or(0) {
if previous_course_count.is_some() {
debug!(
new_course_count = course_count,
last_interval = interval.period().as_secs(),
"Course count changed, resetting interval"
);
}
// Record the new course count
previous_course_count = Some(course_count);
// Reset to base interval
base_interval
} else {
// Increase interval by 10% (up to maximum)
let new_interval = interval.period().mul_f32(1.1).min(max_interval);
debug!(
current_course_count = course_count,
last_interval = interval.period().as_secs(),
new_interval = new_interval.as_secs(),
"Course count unchanged, increasing interval"
);
new_interval
},
);
// Reset the interval, otherwise it will tick again immediately
interval.reset();
}
_ = shutdown_rx.recv() => {
info!("Status update task received shutdown signal");
break;
}
}
}
})
}
pub fn new(
client: Client,
status_task_handle: Arc<Mutex<Option<JoinHandle<()>>>>,
status_shutdown_tx: broadcast::Sender<()>,
service_statuses: ServiceStatusRegistry,
) -> Self {
let shard_manager = client.shard_manager.clone();
Self {
client,
shard_manager,
status_task_handle,
status_shutdown_tx: Some(status_shutdown_tx),
service_statuses,
}
}
}
@@ -28,17 +213,40 @@ impl Service for BotService {
async fn run(&mut self) -> Result<(), anyhow::Error> {
match self.client.start().await {
Ok(()) => {
warn!(service = "bot", "Stopped early.");
warn!(service = "bot", "stopped early");
Err(anyhow::anyhow!("bot stopped early"))
}
Err(e) => {
error!(service = "bot", "Error: {e:?}");
error!(service = "bot", "error: {e:?}");
Err(e.into())
}
}
}
async fn shutdown(&mut self) -> Result<(), anyhow::Error> {
self.service_statuses.set("bot", ServiceStatus::Disabled);
// Signal status update task to stop
if let Some(status_shutdown_tx) = self.status_shutdown_tx.take() {
let _ = status_shutdown_tx.send(());
}
// Wait for status update task to complete (with timeout)
let handle = self.status_task_handle.lock().await.take();
if let Some(handle) = handle {
match tokio::time::timeout(Duration::from_secs(2), handle).await {
Ok(Ok(())) => {
debug!("Status update task completed gracefully");
}
Ok(Err(e)) => {
warn!(error = ?e, "Status update task panicked");
}
Err(_) => {
warn!("Status update task did not complete within 2s timeout");
}
}
}
// Shutdown Discord shards
self.shard_manager.shutdown_all().await;
Ok(())
}
+175 -110
View File
@@ -1,155 +1,220 @@
use std::collections::HashMap;
use std::time::Duration;
use tokio::sync::broadcast;
use tokio::task::JoinHandle;
use tracing::{error, info, warn};
use tokio::sync::{broadcast, mpsc};
use tracing::{debug, info, trace, warn};
use crate::services::ServiceResult;
use crate::services::{Service, ServiceResult, run_service};
/// Manages multiple services and their lifecycle
pub struct ServiceManager {
services: HashMap<String, JoinHandle<ServiceResult>>,
registered_services: HashMap<String, Box<dyn Service>>,
service_handles: HashMap<String, tokio::task::AbortHandle>,
completion_rx: Option<mpsc::UnboundedReceiver<(String, ServiceResult)>>,
completion_tx: mpsc::UnboundedSender<(String, ServiceResult)>,
shutdown_tx: broadcast::Sender<()>,
}
impl Default for ServiceManager {
fn default() -> Self {
Self::new()
}
}
impl ServiceManager {
pub fn new() -> Self {
let (shutdown_tx, _) = broadcast::channel(1);
let (completion_tx, completion_rx) = mpsc::unbounded_channel();
Self {
services: HashMap::new(),
registered_services: HashMap::new(),
service_handles: HashMap::new(),
completion_rx: Some(completion_rx),
completion_tx,
shutdown_tx,
}
}
/// Add a service to be managed
pub fn add_service(&mut self, name: String, handle: JoinHandle<ServiceResult>) {
self.services.insert(name, handle);
/// Register a service to be managed (not yet spawned)
pub fn register_service(&mut self, name: &str, service: Box<dyn Service>) {
self.registered_services.insert(name.to_string(), service);
}
/// Get a shutdown receiver for services to subscribe to
pub fn subscribe(&self) -> broadcast::Receiver<()> {
self.shutdown_tx.subscribe()
/// Check if there are any registered services
pub fn has_services(&self) -> bool {
!self.registered_services.is_empty()
}
/// Spawn all registered services
pub fn spawn_all(&mut self) {
let service_count = self.registered_services.len();
let service_names: Vec<_> = self.registered_services.keys().cloned().collect();
for (name, service) in self.registered_services.drain() {
let shutdown_rx = self.shutdown_tx.subscribe();
let completion_tx = self.completion_tx.clone();
let name_clone = name.clone();
// Spawn service task
let handle = tokio::spawn(async move {
let result = run_service(service, shutdown_rx).await;
// Send completion notification
let _ = completion_tx.send((name_clone, result));
});
// Store abort handle for shutdown control
self.service_handles
.insert(name.clone(), handle.abort_handle());
debug!(service = name, id = ?handle.id(), "service spawned");
}
info!(
service_count,
services = ?service_names,
"spawned {} services",
service_count
);
}
/// Run all services until one completes or fails
/// Returns the first service that completes and its result
pub async fn run(&mut self) -> (String, ServiceResult) {
if self.services.is_empty() {
if self.service_handles.is_empty() {
return (
"none".to_string(),
ServiceResult::Error(anyhow::anyhow!("No services to run")),
);
}
info!("ServiceManager running {} services", self.services.len());
// Wait for any service to complete
loop {
let mut completed_services = Vec::new();
for (name, handle) in &mut self.services {
if handle.is_finished() {
completed_services.push(name.clone());
}
}
if let Some(completed_name) = completed_services.first() {
let handle = self.services.remove(completed_name).unwrap();
match handle.await {
Ok(result) => {
return (completed_name.clone(), result);
}
Err(e) => {
error!(service = completed_name, "Service task panicked: {e}");
return (
completed_name.clone(),
ServiceResult::Error(anyhow::anyhow!("Task panic: {e}")),
);
}
}
}
// Small delay to prevent busy-waiting
tokio::time::sleep(Duration::from_millis(10)).await;
}
}
/// Shutdown all services gracefully with a timeout
/// Returns Ok(()) if all services shut down, or Err(Vec<String>) with names of services that timed out
pub async fn shutdown(mut self, timeout: Duration) -> Result<(), Vec<String>> {
if self.services.is_empty() {
info!("No services to shutdown");
return Ok(());
}
info!(
"Shutting down {} services with {}s timeout",
self.services.len(),
timeout.as_secs()
"servicemanager running {} services",
self.service_handles.len()
);
// Signal all services to shutdown
// Wait for any service to complete via the channel
let completion_rx = self
.completion_rx
.as_mut()
.expect("completion_rx should be available");
completion_rx
.recv()
.await
.map(|(name, result)| {
self.service_handles.remove(&name);
(name, result)
})
.unwrap_or_else(|| {
(
"channel_closed".to_string(),
ServiceResult::Error(anyhow::anyhow!("Completion channel closed")),
)
})
}
/// Shutdown all services gracefully with a timeout.
///
/// All services receive the shutdown signal simultaneously and shut down in parallel.
/// Each service gets the full timeout duration (they don't share/consume from a budget).
/// If any service fails to shutdown within the timeout, it will be aborted.
///
/// Returns the elapsed time if all succeed, or a list of failed service names.
pub async fn shutdown(&mut self, timeout: Duration) -> Result<Duration, Vec<String>> {
let service_count = self.service_handles.len();
let service_names: Vec<_> = self.service_handles.keys().cloned().collect();
info!(
service_count,
services = ?service_names,
timeout = format!("{:.2?}", timeout),
"shutting down {} services in parallel with {:?} timeout each",
service_count,
timeout
);
if service_count == 0 {
return Ok(Duration::ZERO);
}
// Send shutdown signal to all services simultaneously
let _ = self.shutdown_tx.send(());
// Wait for all services to complete with timeout
let shutdown_result = tokio::time::timeout(timeout, async {
let mut completed = Vec::new();
let mut failed = Vec::new();
let start_time = std::time::Instant::now();
while !self.services.is_empty() {
let mut to_remove = Vec::new();
// Collect results from all services with timeout
let completion_rx = self
.completion_rx
.as_mut()
.expect("completion_rx should be available");
for (name, handle) in &mut self.services {
if handle.is_finished() {
to_remove.push(name.clone());
}
}
for name in to_remove {
let handle = self.services.remove(&name).unwrap();
match handle.await {
Ok(ServiceResult::GracefulShutdown) => {
completed.push(name);
}
Ok(ServiceResult::NormalCompletion) => {
warn!(service = name, "Service completed normally during shutdown");
completed.push(name);
}
Ok(ServiceResult::Error(e)) => {
error!(service = name, "Service error during shutdown: {e}");
failed.push(name);
}
Err(e) => {
error!(service = name, "Service panic during shutdown: {e}");
failed.push(name);
}
}
}
if !self.services.is_empty() {
tokio::time::sleep(Duration::from_millis(10)).await;
// Collect all completion results with a single timeout
let collect_future = async {
let mut collected: Vec<Option<(String, ServiceResult)>> = Vec::new();
for _ in 0..service_count {
if let Some(result) = completion_rx.recv().await {
collected.push(Some(result));
} else {
collected.push(None);
}
}
collected
};
(completed, failed)
})
.await;
match shutdown_result {
Ok((completed, failed)) => {
if !completed.is_empty() {
info!("Services shutdown completed: {}", completed.join(", "));
}
if !failed.is_empty() {
warn!("Services had errors during shutdown: {}", failed.join(", "));
}
Ok(())
}
let results = match tokio::time::timeout(timeout, collect_future).await {
Ok(results) => results,
Err(_) => {
// Timeout occurred - return names of services that didn't complete
let pending_services: Vec<String> = self.services.keys().cloned().collect();
Err(pending_services)
// Timeout exceeded - abort all remaining services
warn!(
timeout = format!("{:.2?}", timeout),
"shutdown timeout exceeded - aborting all remaining services"
);
let failed: Vec<String> = self.service_handles.keys().cloned().collect();
for handle in self.service_handles.values() {
handle.abort();
}
self.service_handles.clear();
return Err(failed);
}
};
// Process results and identify failures
let mut failed_services = Vec::new();
for (name, service_result) in results.into_iter().flatten() {
self.service_handles.remove(&name);
if matches!(service_result, ServiceResult::GracefulShutdown) {
trace!(service = name, "service shutdown completed");
} else {
warn!(
service = name,
result = ?service_result,
"service shutdown with non-graceful result"
);
failed_services.push(name);
}
}
let elapsed = start_time.elapsed();
if failed_services.is_empty() {
info!(
service_count,
elapsed = format!("{:.2?}", elapsed),
"all services shutdown successfully: {}",
service_names.join(", ")
);
Ok(elapsed)
} else {
warn!(
failed_count = failed_services.len(),
failed_services = ?failed_services,
elapsed = format!("{:.2?}", elapsed),
"{} service(s) failed to shutdown gracefully: {}",
failed_services.len(),
failed_services.join(", ")
);
Err(failed_services)
}
}
}

Some files were not shown because too many files have changed in this diff Show More