mirror of
https://github.com/Xevion/banner.git
synced 2026-01-31 14:23:36 -06:00
Compare commits
89 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| b69c1eec54 | |||
| 567c4aec3c | |||
| f5a639e88b | |||
| d91f7ab342 | |||
| 7f0f08725a | |||
| 02b18f0c66 | |||
| 106bf232c4 | |||
| 239f7ee38c | |||
| 0ee4e8a8bc | |||
| 5729a821d5 | |||
| 5134ae9388 | |||
| 9e825cd113 | |||
| ac8dbb2eef | |||
| 5dd35ed215 | |||
| 2acf52a63b | |||
| 47132e71d7 | |||
| 87db1a4ccb | |||
| e203e8e182 | |||
| cbb0a51bca | |||
| c533768362 | |||
| 16039e02a9 | |||
| 7d2255a988 | |||
| 8bfc14e55c | |||
| 2689587dd5 | |||
| 1ad614dad0 | |||
| ebb7a97c11 | |||
| 2df0ba0ec5 | |||
| dd148e08a0 | |||
| 3494341e3f | |||
| acccaa54d4 | |||
| 6863ee58d0 | |||
| 550401b85c | |||
| b02a0738e2 | |||
| 5d7d60cd96 | |||
| 1954166db6 | |||
| a2a9116b7a | |||
| a103f0643a | |||
| 474d519b9d | |||
| fb27bdc119 | |||
| 669dec0235 | |||
| 67ba63339a | |||
| 7b8c11ac13 | |||
| a767a3f8be | |||
| 8ce398c0e0 | |||
| 9fed651641 | |||
| 75a99c10ea | |||
| 857ceabcca | |||
| 203c337cf0 | |||
| 39ba131322 | |||
| 2fad9c969d | |||
| 47b4f3315f | |||
| fa28f13a45 | |||
| 5a6ea1e53a | |||
| ba2b2fc50a | |||
| cfe098d193 | |||
| d861888e5e | |||
| f0645d82d9 | |||
| 7a1cd2a39b | |||
| d2985f98ce | |||
| b58eb840f3 | |||
| 2bc6fbdf30 | |||
| e41b970d6e | |||
| e880126281 | |||
| db0ec1e69d | |||
| 2947face06 | |||
| 36bcc27d7f | |||
| 9e403e5043 | |||
| 98a6d978c6 | |||
| 4deeef2f00 | |||
| e008ee5a12 | |||
| a007ccb6a2 | |||
| 527cbebc6a | |||
| 4207783cdd | |||
| c90bd740de | |||
| 61f8bd9de7 | |||
| b5eaedc9bc | |||
| 58475c8673 | |||
| 78159707e2 | |||
| 779144a4d5 | |||
| 0da2e810fe | |||
| ed72ac6bff | |||
| 57b5cafb27 | |||
| 841191c44d | |||
| 67d7c81ef4 | |||
| d108a41f91 | |||
| 5fab8c216a | |||
| 15256ff91c | |||
| 6df4303bd6 | |||
| e3b855b956 |
@@ -0,0 +1,11 @@
|
||||
# cargo-audit configuration
|
||||
# https://github.com/rustsec/rustsec/tree/main/cargo-audit
|
||||
|
||||
[advisories]
|
||||
# Transitive dependencies we can't control
|
||||
ignore = [
|
||||
# rsa: Marvin Attack timing sidechannel (via sqlx-mysql, no fix available)
|
||||
"RUSTSEC-2023-0071",
|
||||
# derivative: unmaintained (via poise)
|
||||
"RUSTSEC-2024-0388",
|
||||
]
|
||||
@@ -0,0 +1,2 @@
|
||||
[env]
|
||||
TS_RS_EXPORT_DIR = { value = "web/src/lib/bindings/", relative = true }
|
||||
Vendored
+34
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json",
|
||||
"changelog-sections": [
|
||||
{ "type": "feat", "section": "Features" },
|
||||
{ "type": "fix", "section": "Bug Fixes" },
|
||||
{ "type": "perf", "section": "Performance Improvements" },
|
||||
{ "type": "refactor", "section": "Code Refactoring" },
|
||||
{ "type": "docs", "section": "Documentation" },
|
||||
{ "type": "ci", "section": "Continuous Integration" },
|
||||
{ "type": "build", "section": "Build System" },
|
||||
{ "type": "chore", "section": "Miscellaneous" },
|
||||
{ "type": "style", "section": "Styles", "hidden": true },
|
||||
{ "type": "test", "section": "Tests", "hidden": true }
|
||||
],
|
||||
"bump-minor-pre-major": true,
|
||||
"always-update": true,
|
||||
"bump-patch-for-minor-pre-major": true,
|
||||
"include-v-in-tag": true,
|
||||
"include-component-in-tag": false,
|
||||
"plugins": ["sentence-case"],
|
||||
"packages": {
|
||||
".": {
|
||||
"release-type": "rust",
|
||||
"exclude-paths": [".vscode", "docs"],
|
||||
"extra-files": [
|
||||
{
|
||||
"type": "toml",
|
||||
"path": "Cargo.lock",
|
||||
"jsonpath": "$.package[?(@.name=='banner')].version"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
Vendored
+3
@@ -0,0 +1,3 @@
|
||||
{
|
||||
".": "0.6.2"
|
||||
}
|
||||
Vendored
+143
-27
@@ -11,9 +11,9 @@ env:
|
||||
RUST_BACKTRACE: 1
|
||||
|
||||
jobs:
|
||||
check:
|
||||
rust-quality:
|
||||
name: Rust Quality
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
@@ -22,44 +22,160 @@ jobs:
|
||||
with:
|
||||
components: rustfmt, clippy
|
||||
|
||||
- name: Cache Rust dependencies
|
||||
uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
cache-on-failure: true
|
||||
|
||||
- name: Check formatting
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "pull_request" ]; then
|
||||
cargo fmt --all -- --check
|
||||
else
|
||||
cargo fmt --all -- --check || echo "::warning::Rust formatting issues found (not failing on push)"
|
||||
fi
|
||||
|
||||
- name: Clippy
|
||||
run: cargo clippy --no-default-features -- -D warnings
|
||||
|
||||
frontend-quality:
|
||||
name: Frontend Quality
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Bun
|
||||
uses: oven-sh/setup-bun@v1
|
||||
uses: oven-sh/setup-bun@v2
|
||||
with:
|
||||
bun-version: latest
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: web
|
||||
run: bun install --frozen-lockfile
|
||||
|
||||
- name: Check formatting
|
||||
working-directory: web
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "pull_request" ]; then
|
||||
bun run format:check
|
||||
else
|
||||
bun run format:check || echo "::warning::Frontend formatting issues found (not failing on push)"
|
||||
fi
|
||||
|
||||
- name: Type check
|
||||
working-directory: web
|
||||
run: bun run typecheck
|
||||
|
||||
rust-tests:
|
||||
name: Rust Tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
postgres:
|
||||
image: postgres:17-alpine
|
||||
env:
|
||||
POSTGRES_USER: banner
|
||||
POSTGRES_PASSWORD: banner
|
||||
POSTGRES_DB: banner
|
||||
ports:
|
||||
- 5432:5432
|
||||
options: >-
|
||||
--health-cmd pg_isready
|
||||
--health-interval 10s
|
||||
--health-timeout 5s
|
||||
--health-retries 5
|
||||
env:
|
||||
DATABASE_URL: postgresql://banner:banner@localhost:5432/banner
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
|
||||
- name: Cache Rust dependencies
|
||||
uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
cache-on-failure: true
|
||||
|
||||
- name: Run tests
|
||||
run: cargo test --no-default-features
|
||||
|
||||
frontend-tests:
|
||||
name: Frontend Tests
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Bun
|
||||
uses: oven-sh/setup-bun@v2
|
||||
with:
|
||||
bun-version: latest
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: web
|
||||
run: bun install --frozen-lockfile
|
||||
|
||||
- name: Run tests
|
||||
working-directory: web
|
||||
run: bun run test
|
||||
|
||||
docker-build:
|
||||
name: Docker Build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build image
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
push: false
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
security:
|
||||
name: Security Scan
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
security-events: write
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install cargo-audit
|
||||
uses: taiki-e/install-action@cargo-audit
|
||||
|
||||
- name: Rust security audit
|
||||
run: cargo audit
|
||||
|
||||
- name: Setup Bun
|
||||
uses: oven-sh/setup-bun@v2
|
||||
with:
|
||||
bun-version: latest
|
||||
|
||||
- name: Install frontend dependencies
|
||||
working-directory: web
|
||||
run: bun install --frozen-lockfile
|
||||
|
||||
- name: Check Rust formatting
|
||||
run: cargo fmt --all -- --check
|
||||
|
||||
- name: Check TypeScript formatting
|
||||
- name: Frontend security audit
|
||||
working-directory: web
|
||||
run: bun run format:check
|
||||
run: bun audit --audit-level=moderate
|
||||
continue-on-error: true
|
||||
|
||||
- name: TypeScript type check
|
||||
working-directory: web
|
||||
run: bun run typecheck
|
||||
- name: Trivy filesystem scan
|
||||
uses: aquasecurity/trivy-action@master
|
||||
with:
|
||||
scan-type: fs
|
||||
scan-ref: .
|
||||
format: sarif
|
||||
output: trivy-results.sarif
|
||||
severity: CRITICAL,HIGH
|
||||
exit-code: 0
|
||||
|
||||
- name: ESLint
|
||||
working-directory: web
|
||||
run: bun run lint
|
||||
|
||||
- name: Clippy
|
||||
run: cargo clippy --all-features -- --deny warnings
|
||||
|
||||
- name: Run tests
|
||||
run: cargo test --all-features
|
||||
|
||||
- name: Build frontend
|
||||
working-directory: web
|
||||
run: bun run build
|
||||
|
||||
- name: Build backend
|
||||
run: cargo build --release --bin banner
|
||||
- name: Upload Trivy results
|
||||
uses: github/codeql-action/upload-sarif@v4
|
||||
if: always() && hashFiles('trivy-results.sarif') != ''
|
||||
with:
|
||||
sarif_file: trivy-results.sarif
|
||||
|
||||
Vendored
+27
@@ -0,0 +1,27 @@
|
||||
name: Release Please
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
workflow_run:
|
||||
workflows: ["CI"]
|
||||
types:
|
||||
- completed
|
||||
branches:
|
||||
- master
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
release-please:
|
||||
name: Create Release PR
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }}
|
||||
steps:
|
||||
- uses: googleapis/release-please-action@v4
|
||||
with:
|
||||
token: ${{ secrets.RELEASE_PLEASE_TOKEN }}
|
||||
config-file: .github/release-please-config.json
|
||||
manifest-file: .github/release-please-manifest.json
|
||||
Vendored
+3
-3
@@ -1,5 +1,5 @@
|
||||
.env
|
||||
/target
|
||||
/go/
|
||||
.cargo/config.toml
|
||||
src/scraper/README.md
|
||||
/scripts/node_modules
|
||||
|
||||
|
||||
|
||||
+173
@@ -0,0 +1,173 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||
|
||||
## [0.6.2](https://github.com/Xevion/Banner/compare/v0.6.1...v0.6.2) (2026-01-31)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* **web:** Add dynamic range sliders with consolidated search options API ([f5a639e](https://github.com/Xevion/Banner/commit/f5a639e88bfe03dfc635f25e06fc22208ee0c855))
|
||||
* **web:** Implement aligned course codes with jetbrains mono ([567c4ae](https://github.com/Xevion/Banner/commit/567c4aec3ca7baaeb548fff2005d83f7e6228d79))
|
||||
* **web:** Implement multi-dimensional course filtering system ([106bf23](https://github.com/Xevion/Banner/commit/106bf232c4b53f4ca8902a582f185e146878c54e))
|
||||
* **web:** Implement smooth view transitions for search results ([5729a82](https://github.com/Xevion/Banner/commit/5729a821d54d95a00e9f4ba736a2bd884c0c409b))
|
||||
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* **cli:** Add proper flag validation for check script ([2acf52a](https://github.com/Xevion/Banner/commit/2acf52a63b6dcd24ca826b99061bf7a51a9230b1))
|
||||
* Re-add overflow hidden for page transitions, but with negative margin padding to avoid clipping ([9e825cd](https://github.com/Xevion/Banner/commit/9e825cd113bbc65c10f0386b5300b6aec50bf936))
|
||||
* Separate Biome format and lint checks to enable auto-format ([ac8dbb2](https://github.com/Xevion/Banner/commit/ac8dbb2eefe79ec5d898cfa719e270f4713125d5))
|
||||
* **web:** Prevent duplicate searches and background fetching on navigation ([5dd35ed](https://github.com/Xevion/Banner/commit/5dd35ed215d3d1f3603e67a2aa59eaddf619f5c9))
|
||||
* **web:** Prevent interaction blocking during search transitions ([7f0f087](https://github.com/Xevion/Banner/commit/7f0f08725a668c5ac88c510f43791d90ce2f795e))
|
||||
|
||||
|
||||
### Code Refactoring
|
||||
|
||||
* Migrate API responses from manual JSON to type-safe bindings ([0ee4e8a](https://github.com/Xevion/Banner/commit/0ee4e8a8bc1fe0b079fea84ac303674083b43a59))
|
||||
* Standardize error responses with ApiError and ts-rs bindings ([239f7ee](https://github.com/Xevion/Banner/commit/239f7ee38cbc0e49d9041579fc9923fd4a4608bf))
|
||||
* **web:** Consolidate tooltip implementations with shared components ([d91f7ab](https://github.com/Xevion/Banner/commit/d91f7ab34299b26dc12d629bf99d502ee05e7cfa))
|
||||
|
||||
|
||||
### Miscellaneous
|
||||
|
||||
* Add aliases to Justfile ([02b18f0](https://github.com/Xevion/Banner/commit/02b18f0c66dc8b876452f35999c027475df52462))
|
||||
* Add dev-build flag for embedded vite builds ([5134ae9](https://github.com/Xevion/Banner/commit/5134ae93881854ac722dc9e7f3f5040aee3e517a))
|
||||
|
||||
## [0.6.1](https://github.com/Xevion/Banner/compare/v0.6.0...v0.6.1) (2026-01-31)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* **build:** Auto-regenerate TypeScript bindings on source changes ([e203e8e](https://github.com/Xevion/Banner/commit/e203e8e182f7a0b0224a8f9e6bf79d15259215a2))
|
||||
* **course:** Distinguish async from synchronous online courses ([8bfc14e](https://github.com/Xevion/Banner/commit/8bfc14e55c1bdf5acc2006096476e0b1eb1b7cc6))
|
||||
* **scraper:** Improve dashboard clarity with stat tooltips ([1ad614d](https://github.com/Xevion/Banner/commit/1ad614dad03d3631a8d119203786718c814e72c7))
|
||||
* **scraper:** Improve results visibility and loading states ([c533768](https://github.com/Xevion/Banner/commit/c53376836238f3aca92ac82cd5fd59a077bcceff))
|
||||
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* Avoid status flickering on subjects table ([2689587](https://github.com/Xevion/Banner/commit/2689587dd53c572a65eeb91f74c737662e1f148b))
|
||||
* **ci:** Add postgres container service for rust tests ([ebb7a97](https://github.com/Xevion/Banner/commit/ebb7a97c113fa1d4b61b8637dfe97cae5260075c))
|
||||
* **ci:** Fix rust/frontend/security job failures and expand local checks ([dd148e0](https://github.com/Xevion/Banner/commit/dd148e08a0b6d5b7afe4ff614d7d6e4e4d0dfce6))
|
||||
* **data:** Decode HTML entities in course titles and instructor names ([7d2255a](https://github.com/Xevion/Banner/commit/7d2255a988a23f6e1b1c8e7cb5a8ead833ad34da))
|
||||
* **metrics:** Always emit baseline metrics on initial course insertion ([16039e0](https://github.com/Xevion/Banner/commit/16039e02a999c668d4969a43eb9ed1d4e8d370e1))
|
||||
|
||||
|
||||
### Code Refactoring
|
||||
|
||||
* **terms:** Move term formatting from frontend to backend ([cbb0a51](https://github.com/Xevion/Banner/commit/cbb0a51bca9e4e0d6a8fcee90465c93943f2a30e))
|
||||
* Use friendly term codes in URL query parameters ([550401b](https://github.com/Xevion/Banner/commit/550401b85ceb8a447e316209b479c69062c5b658))
|
||||
|
||||
|
||||
### Continuous Integration
|
||||
|
||||
* Add Release Please automation for changelog and version management ([6863ee5](https://github.com/Xevion/Banner/commit/6863ee58d0a5778303af1b7626b2a9eda3043ca0))
|
||||
* Split quality checks into parallel jobs with security scanning ([3494341](https://github.com/Xevion/Banner/commit/3494341e3fbe9ffd96b6fcd8abbe7f95ecec6f45))
|
||||
|
||||
|
||||
### Miscellaneous
|
||||
|
||||
* Add ts-rs generated bindings ([2df0ba0](https://github.com/Xevion/Banner/commit/2df0ba0ec58155d73830a66132cb635dc819e8a9))
|
||||
* Update frontend packages ([acccaa5](https://github.com/Xevion/Banner/commit/acccaa54d4455500db60d1b6437cad1c592445f1))
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
## [0.6.0] - 2026-01-30
|
||||
|
||||
### Added
|
||||
|
||||
- User authentication system with Discord OAuth, sessions, admin roles, and login page with FAQ.
|
||||
- Interactive timeline visualization with D3 canvas, pan/zoom, touch gestures, and enrollment aggregation API.
|
||||
- Scraper analytics dashboard with timeseries charts, subject monitoring, and per-subject detail views.
|
||||
- Adaptive scraper scheduling with admin endpoints for monitoring and configuration.
|
||||
- Scrape job result persistence for effectiveness tracking.
|
||||
- WebSocket support for real-time scrape job monitoring with connection status indicators.
|
||||
- Course change auditing with field-level tracking and time-series metrics endpoint.
|
||||
- Audit log UI with smart JSON diffing, conditional request caching, and auto-refresh.
|
||||
- Calendar export web endpoints for ICS download and Google Calendar redirect.
|
||||
- Confidence-based RMP matching with manual review workflow and admin instructor UI.
|
||||
- RMP profile links and confidence-aware rating display.
|
||||
- Name parsing and normalization for improved instructor-RMP matching.
|
||||
- Mobile touch controls with gesture detection for timeline.
|
||||
- Worker timeout protection and crash recovery for job queue.
|
||||
- Build-time asset compression with encoding negotiation (gzip, brotli, zstd).
|
||||
- Smart page transitions with theme-aware element transitions.
|
||||
- Search duration and result count feedback.
|
||||
- Root error page handling.
|
||||
- Login page with FAQ section and improved styling.
|
||||
|
||||
### Changed
|
||||
|
||||
- Consolidated navigation with top nav bar and route groups.
|
||||
- Centralized number formatting with locale-aware utility.
|
||||
- Modernized Justfile commands and simplified service management.
|
||||
- Persisted audit log state in module scope for cross-navigation caching.
|
||||
- Relative time feedback and improved tooltip customization.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Instructor/course mismatching via build-order-independent map for association.
|
||||
- Page content clipping.
|
||||
- Backend startup delays with retry logic in auth.
|
||||
- Banner API timeouts increased to handle slow responses.
|
||||
- i64 serialization for JavaScript compatibility, fixing avatar URL display.
|
||||
- Frontend build ordering with `-e` embed flag in Justfile.
|
||||
- Login page centering and unnecessary scrollbar.
|
||||
- ts-rs serde warnings.
|
||||
|
||||
## [0.5.0] - 2026-01-29
|
||||
|
||||
### Added
|
||||
|
||||
- Multi-select subject filtering with searchable comboboxes.
|
||||
- Smart instructor name abbreviation for compact table display.
|
||||
- Delivery mode indicators and tooltips in location column.
|
||||
- Page selector dropdown with animated pagination controls.
|
||||
- FLIP animations for smooth table row transitions during pagination.
|
||||
- Time tooltip with detailed meeting schedule and day abbreviations.
|
||||
- Reusable SimpleTooltip component for consistent UI hints.
|
||||
|
||||
### Changed
|
||||
|
||||
- Consolidated query logic and eliminated N+1 instructor loads via batch fetching.
|
||||
- Consolidated menu snippets and strengthened component type safety.
|
||||
- Enhanced table scrolling with OverlayScrollbars and theme-aware styling.
|
||||
- Eliminated initial theme flash on page load.
|
||||
|
||||
## [0.4.0] - 2026-01-28
|
||||
|
||||
### Added
|
||||
|
||||
- Web-based course search UI with interactive data table, multi-column sorting, and column visibility controls.
|
||||
- TypeScript type bindings generated from Rust types via ts-rs.
|
||||
- RateMyProfessors integration: bulk professor sync via GraphQL and inline rating display in search results.
|
||||
- Course detail expansion panel with enrollment, meeting times, and instructor info.
|
||||
- OverlayScrollbars integration for styled, theme-aware scrollable areas.
|
||||
- Pagination component for navigating large search result sets.
|
||||
- Footer component with version display.
|
||||
- API endpoints: `/api/courses/search`, `/api/courses/:term/:crn`, `/api/terms`, `/api/subjects`, `/api/reference/:category`.
|
||||
- Frontend API client with typed request/response handling and test coverage.
|
||||
- Course formatting utilities with comprehensive unit tests.
|
||||
|
||||
## [0.3.4] - 2026-01
|
||||
|
||||
### Added
|
||||
|
||||
- Live service status tracking on web dashboard with auto-refresh and health indicators.
|
||||
- DB operation extraction for improved testability.
|
||||
- Unit test suite foundation covering core functionality.
|
||||
- Docker support for PostgreSQL development environment.
|
||||
- ICS calendar export with comprehensive holiday exclusion coverage.
|
||||
- Google Calendar link generation with recurrence rules and meeting details.
|
||||
- Job queue with priority-based scheduling for background scraping.
|
||||
- Rate limiting with burst allowance for Banner API requests.
|
||||
- Session management and caching for Banner API interactions.
|
||||
- Discord bot commands: search, terms, ics, gcal.
|
||||
- Intelligent scraping system with priority queues and retry tracking.
|
||||
|
||||
### Changed
|
||||
|
||||
- Type consolidation and dead code removal across the codebase.
|
||||
Generated
+502
-323
File diff suppressed because it is too large
Load Diff
+10
-3
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "banner"
|
||||
version = "0.3.4"
|
||||
version = "0.6.2"
|
||||
edition = "2024"
|
||||
default-run = "banner"
|
||||
|
||||
@@ -11,7 +11,7 @@ embed-assets = ["dep:rust-embed", "dep:mime_guess"]
|
||||
[dependencies]
|
||||
anyhow = "1.0.99"
|
||||
async-trait = "0.1"
|
||||
axum = "0.8.4"
|
||||
axum = { version = "0.8.4", features = ["ws"] }
|
||||
bitflags = { version = "2.9.4", features = ["serde"] }
|
||||
chrono = { version = "0.4.42", features = ["serde"] }
|
||||
compile-time = "0.2.0"
|
||||
@@ -48,13 +48,20 @@ url = "2.5"
|
||||
governor = "0.10.1"
|
||||
serde_path_to_error = "0.1.17"
|
||||
num-format = "0.4.4"
|
||||
tower-http = { version = "0.6.0", features = ["cors", "trace", "timeout"] }
|
||||
tower-http = { version = "0.6.0", features = ["cors", "trace", "timeout", "compression-full"] }
|
||||
rust-embed = { version = "8.0", features = ["include-exclude"], optional = true }
|
||||
mime_guess = { version = "2.0", optional = true }
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
rapidhash = "4.1.0"
|
||||
yansi = "1.0.1"
|
||||
extension-traits = "2"
|
||||
ts-rs = { version = "11.1.0", features = ["chrono-impl", "serde-compat", "serde-json-impl", "no-serde-warnings"] }
|
||||
html-escape = "0.2.13"
|
||||
axum-extra = { version = "0.12.5", features = ["query"] }
|
||||
urlencoding = "2.1.3"
|
||||
chrono-tz = "0.10.4"
|
||||
htmlize = { version = "1.0.6", features = ["unescape"] }
|
||||
unicode-normalization = "0.1.25"
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
|
||||
+7
-4
@@ -7,6 +7,9 @@ FROM oven/bun:1 AS frontend-builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install zstd for pre-compression
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends zstd && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy backend Cargo.toml for build-time version retrieval
|
||||
COPY ./Cargo.toml ./
|
||||
|
||||
@@ -19,8 +22,8 @@ RUN bun install --frozen-lockfile
|
||||
# Copy frontend source code
|
||||
COPY ./web ./
|
||||
|
||||
# Build frontend
|
||||
RUN bun run build
|
||||
# Build frontend, then pre-compress static assets (gzip, brotli, zstd)
|
||||
RUN bun run build && bun run scripts/compress-assets.ts
|
||||
|
||||
# --- Chef Base Stage ---
|
||||
FROM lukemathwalker/cargo-chef:latest-rust-${RUST_VERSION} AS chef
|
||||
@@ -112,5 +115,5 @@ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
||||
ENV HOSTS=0.0.0.0,[::]
|
||||
|
||||
# Implicitly uses PORT environment variable
|
||||
# temporary: running without 'scraper' service
|
||||
CMD ["sh", "-c", "exec ./banner --services web,bot"]
|
||||
# Runs all services: web, bot, and scraper
|
||||
CMD ["sh", "-c", "exec ./banner"]
|
||||
|
||||
@@ -1,198 +1,54 @@
|
||||
set dotenv-load
|
||||
default_services := "bot,web,scraper"
|
||||
|
||||
# Aliases
|
||||
alias c := check
|
||||
alias d := dev
|
||||
alias t := test
|
||||
alias f := format
|
||||
alias fmt := format
|
||||
alias s := search
|
||||
alias bld := build
|
||||
alias bind := bindings
|
||||
alias b := bun
|
||||
|
||||
default:
|
||||
just --list
|
||||
|
||||
# Run all checks (format, clippy, tests, lint)
|
||||
check:
|
||||
cargo fmt --all -- --check
|
||||
cargo clippy --all-features -- --deny warnings
|
||||
cargo nextest run
|
||||
bun run --cwd web typecheck
|
||||
bun run --cwd web lint
|
||||
bun run --cwd web test --run
|
||||
|
||||
# Run all tests (Rust + frontend)
|
||||
test: test-rust test-web
|
||||
|
||||
# Run only Rust tests
|
||||
test-rust *ARGS:
|
||||
cargo nextest run {{ARGS}}
|
||||
|
||||
# Run only frontend tests
|
||||
test-web:
|
||||
bun run --cwd web test --run
|
||||
|
||||
# Quick check: clippy + tests only (skips formatting)
|
||||
check-quick:
|
||||
cargo clippy --all-features -- --deny warnings
|
||||
cargo nextest run
|
||||
bun run --cwd web typecheck
|
||||
|
||||
# Run the Banner API search demo (hits live UTSA API, ~20s)
|
||||
search *ARGS:
|
||||
cargo run -q --bin search -- {{ARGS}}
|
||||
# Run all checks in parallel. Pass -f/--fix to auto-format and fix first.
|
||||
check *flags:
|
||||
bun scripts/check.ts {{flags}}
|
||||
|
||||
# Format all Rust and TypeScript code
|
||||
format:
|
||||
cargo fmt --all
|
||||
bun run --cwd web format
|
||||
|
||||
# Check formatting without modifying (CI-friendly)
|
||||
format-check:
|
||||
cargo fmt --all -- --check
|
||||
bun run --cwd web format:check
|
||||
# Run tests. Usage: just test [rust|web|<nextest filter args>]
|
||||
test *args:
|
||||
bun scripts/test.ts {{args}}
|
||||
|
||||
# Generate TypeScript bindings from Rust types (ts-rs)
|
||||
bindings:
|
||||
bun scripts/bindings.ts
|
||||
|
||||
# Run the Banner API search demo (hits live UTSA API, ~20s)
|
||||
search *ARGS:
|
||||
cargo run -q --bin search -- {{ARGS}}
|
||||
|
||||
# Dev server. Flags: -f(rontend) -b(ackend) -W(no-watch) -n(o-build) -r(elease) -e(mbed) --tracing <fmt>
|
||||
# Pass args to binary after --: just dev -n -- --some-flag
|
||||
dev *flags:
|
||||
bun scripts/dev.ts {{flags}}
|
||||
|
||||
# Production build. Flags: -d(ebug) -f(rontend-only) -b(ackend-only)
|
||||
build *flags:
|
||||
bun scripts/build.ts {{flags}}
|
||||
|
||||
# Start PostgreSQL in Docker and update .env with connection string
|
||||
# Commands: start (default), reset, rm
|
||||
[script("bun")]
|
||||
db cmd="start":
|
||||
const fs = await import("fs/promises");
|
||||
const { spawnSync } = await import("child_process");
|
||||
bun scripts/db.ts {{cmd}}
|
||||
|
||||
const NAME = "banner-postgres";
|
||||
const USER = "banner";
|
||||
const PASS = "banner";
|
||||
const DB = "banner";
|
||||
const PORT = "59489";
|
||||
const ENV_FILE = ".env";
|
||||
const CMD = "{{cmd}}";
|
||||
|
||||
const run = (args) => spawnSync("docker", args, { encoding: "utf8" });
|
||||
const getContainer = () => {
|
||||
const res = run(["ps", "-a", "--filter", `name=^${NAME}$`, "--format", "json"]);
|
||||
return res.stdout.trim() ? JSON.parse(res.stdout) : null;
|
||||
};
|
||||
|
||||
const updateEnv = async () => {
|
||||
const url = `postgresql://${USER}:${PASS}@localhost:${PORT}/${DB}`;
|
||||
try {
|
||||
let content = await fs.readFile(ENV_FILE, "utf8");
|
||||
content = content.includes("DATABASE_URL=")
|
||||
? content.replace(/DATABASE_URL=.*$/m, `DATABASE_URL=${url}`)
|
||||
: content.trim() + `\nDATABASE_URL=${url}\n`;
|
||||
await fs.writeFile(ENV_FILE, content);
|
||||
} catch {
|
||||
await fs.writeFile(ENV_FILE, `DATABASE_URL=${url}\n`);
|
||||
}
|
||||
};
|
||||
|
||||
const create = () => {
|
||||
run(["run", "-d", "--name", NAME, "-e", `POSTGRES_USER=${USER}`,
|
||||
"-e", `POSTGRES_PASSWORD=${PASS}`, "-e", `POSTGRES_DB=${DB}`,
|
||||
"-p", `${PORT}:5432`, "postgres:17-alpine"]);
|
||||
console.log("created");
|
||||
};
|
||||
|
||||
const container = getContainer();
|
||||
|
||||
if (CMD === "rm") {
|
||||
if (!container) process.exit(0);
|
||||
run(["stop", NAME]);
|
||||
run(["rm", NAME]);
|
||||
console.log("removed");
|
||||
} else if (CMD === "reset") {
|
||||
if (!container) create();
|
||||
else {
|
||||
run(["exec", NAME, "psql", "-U", USER, "-d", "postgres", "-c", `DROP DATABASE IF EXISTS ${DB}`]);
|
||||
run(["exec", NAME, "psql", "-U", USER, "-d", "postgres", "-c", `CREATE DATABASE ${DB}`]);
|
||||
console.log("reset");
|
||||
}
|
||||
await updateEnv();
|
||||
} else {
|
||||
if (!container) {
|
||||
create();
|
||||
} else if (container.State !== "running") {
|
||||
run(["start", NAME]);
|
||||
console.log("started");
|
||||
} else {
|
||||
console.log("running");
|
||||
}
|
||||
await updateEnv();
|
||||
}
|
||||
|
||||
# Auto-reloading frontend server
|
||||
frontend:
|
||||
bun run --cwd web dev
|
||||
|
||||
# Production build of frontend
|
||||
build-frontend:
|
||||
bun run --cwd web build
|
||||
|
||||
# Auto-reloading backend server (with embedded assets)
|
||||
backend *ARGS:
|
||||
bacon --headless run -- -- {{ARGS}}
|
||||
|
||||
# Auto-reloading backend server (no embedded assets, for dev proxy mode)
|
||||
backend-dev *ARGS:
|
||||
bacon --headless run -- --no-default-features -- {{ARGS}}
|
||||
|
||||
# Production build
|
||||
build:
|
||||
bun run --cwd web build
|
||||
cargo build --release --bin banner
|
||||
|
||||
# Run auto-reloading development build with release characteristics
|
||||
dev-build *ARGS='--services web --tracing pretty': build-frontend
|
||||
bacon --headless run -- --profile dev-release -- {{ARGS}}
|
||||
|
||||
# Auto-reloading development build: Vite frontend + backend (no embedded assets, proxies to Vite)
|
||||
[parallel]
|
||||
dev *ARGS='--services web,bot': frontend (backend-dev ARGS)
|
||||
|
||||
# Smoke test: start web server, hit API endpoints, verify responses
|
||||
[script("bash")]
|
||||
test-smoke port="18080":
|
||||
set -euo pipefail
|
||||
PORT={{port}}
|
||||
|
||||
cleanup() { kill "$SERVER_PID" 2>/dev/null; wait "$SERVER_PID" 2>/dev/null; }
|
||||
|
||||
# Start server in background
|
||||
PORT=$PORT cargo run -q --no-default-features -- --services web --tracing json &
|
||||
SERVER_PID=$!
|
||||
trap cleanup EXIT
|
||||
|
||||
# Wait for server to be ready (up to 15s)
|
||||
for i in $(seq 1 30); do
|
||||
if curl -sf "http://localhost:$PORT/api/health" >/dev/null 2>&1; then break; fi
|
||||
if ! kill -0 "$SERVER_PID" 2>/dev/null; then echo "FAIL: server exited early"; exit 1; fi
|
||||
sleep 0.5
|
||||
done
|
||||
|
||||
PASS=0; FAIL=0
|
||||
check() {
|
||||
local label="$1" url="$2" expected="$3"
|
||||
body=$(curl -sf "$url") || { echo "FAIL: $label - request failed"; FAIL=$((FAIL+1)); return; }
|
||||
if echo "$body" | grep -q "$expected"; then
|
||||
echo "PASS: $label"
|
||||
PASS=$((PASS+1))
|
||||
else
|
||||
echo "FAIL: $label - expected '$expected' in: $body"
|
||||
FAIL=$((FAIL+1))
|
||||
fi
|
||||
}
|
||||
|
||||
check "GET /api/health" "http://localhost:$PORT/api/health" '"status":"healthy"'
|
||||
check "GET /api/status" "http://localhost:$PORT/api/status" '"version"'
|
||||
check "GET /api/metrics" "http://localhost:$PORT/api/metrics" '"banner_api"'
|
||||
|
||||
# Test 404
|
||||
STATUS=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$PORT/api/nonexistent")
|
||||
if [ "$STATUS" = "404" ]; then
|
||||
echo "PASS: 404 on unknown route"
|
||||
PASS=$((PASS+1))
|
||||
else
|
||||
echo "FAIL: expected 404, got $STATUS"
|
||||
FAIL=$((FAIL+1))
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Results: $PASS passed, $FAIL failed"
|
||||
[ "$FAIL" -eq 0 ]
|
||||
|
||||
alias b := bun
|
||||
bun *ARGS:
|
||||
cd web && bun {{ ARGS }}
|
||||
|
||||
|
||||
@@ -29,8 +29,7 @@ The application consists of three modular services that can be run independently
|
||||
bun install --cwd web # Install frontend dependencies
|
||||
cargo build # Build the backend
|
||||
|
||||
just dev # Runs auto-reloading dev build
|
||||
just dev --services bot,web # Runs auto-reloading dev build, running only the bot and web services
|
||||
just dev # Runs auto-reloading dev build with all services
|
||||
just dev-build # Development build with release characteristics (frontend is embedded, non-auto-reloading)
|
||||
|
||||
just build # Production build that embeds assets
|
||||
|
||||
+27
-4
@@ -4,10 +4,33 @@
|
||||
|
||||
The Banner project is built as a multi-service application with the following components:
|
||||
|
||||
- **Discord Bot Service**: Handles Discord interactions and commands
|
||||
- **Web Service**: Serves the React frontend and provides API endpoints
|
||||
- **Scraper Service**: Background data collection and synchronization
|
||||
- **Database Layer**: PostgreSQL for persistent storage
|
||||
- **Discord Bot Service**: Handles Discord interactions and commands (Serenity/Poise)
|
||||
- **Web Service**: Axum HTTP server serving the SvelteKit frontend and REST API endpoints
|
||||
- **Scraper Service**: Background data collection and synchronization with job queue
|
||||
- **Database Layer**: PostgreSQL 17 for persistent storage (SQLx with compile-time verification)
|
||||
- **RateMyProfessors Client**: GraphQL-based bulk sync of professor ratings
|
||||
|
||||
### Frontend Stack
|
||||
|
||||
- **SvelteKit** with Svelte 5 runes (`$state`, `$derived`, `$effect`)
|
||||
- **Tailwind CSS v4** via `@tailwindcss/vite`
|
||||
- **bits-ui** for headless UI primitives (comboboxes, tooltips, dropdowns)
|
||||
- **TanStack Table** for interactive data tables with sorting and column control
|
||||
- **OverlayScrollbars** for styled, theme-aware scrollable areas
|
||||
- **ts-rs** generates TypeScript type bindings from Rust structs
|
||||
|
||||
### API Endpoints
|
||||
|
||||
| Endpoint | Description |
|
||||
|---|---|
|
||||
| `GET /api/health` | Health check |
|
||||
| `GET /api/status` | Service status, version, and commit hash |
|
||||
| `GET /api/metrics` | Basic metrics |
|
||||
| `GET /api/courses/search` | Paginated course search with filters (term, subject, query, open-only, sort) |
|
||||
| `GET /api/courses/:term/:crn` | Single course detail with instructors and RMP ratings |
|
||||
| `GET /api/terms` | Available terms from reference cache |
|
||||
| `GET /api/subjects?term=` | Subjects for a term, ordered by enrollment |
|
||||
| `GET /api/reference/:category` | Reference data lookups (campuses, instructional methods, etc.) |
|
||||
|
||||
## Technical Analysis
|
||||
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
# Features
|
||||
|
||||
## Current Features
|
||||
|
||||
### Discord Bot Commands
|
||||
|
||||
- **search** - Search for courses with various filters (title, course code, keywords)
|
||||
- **terms** - List available terms or search for a specific term
|
||||
- **time** - Get meeting times for a specific course (CRN)
|
||||
- **ics** - Generate ICS calendar file for a course with holiday exclusions
|
||||
- **gcal** - Generate Google Calendar link for a course
|
||||
|
||||
### Data Pipeline
|
||||
|
||||
- Intelligent scraping system with priority queues
|
||||
- Rate limiting and burst handling
|
||||
- Background data synchronization
|
||||
|
||||
## Feature Wishlist
|
||||
|
||||
### Commands
|
||||
|
||||
- ICS Download (get a ICS download of your classes with location & timing perfectly - set for every class you're in)
|
||||
- Classes Now (find classes happening)
|
||||
- Autocomplete
|
||||
- Class Title
|
||||
- Course Number
|
||||
- Term/Part of Term
|
||||
- Professor
|
||||
- Attribute
|
||||
- Component Pagination
|
||||
- RateMyProfessor Integration (Linked/Embedded)
|
||||
- Smart term selection (i.e. Summer 2024 will be selected automatically when opened)
|
||||
- Rate Limiting (bursting with global/user limits)
|
||||
- DMs Integration (allow usage of the bot in DMs)
|
||||
- Class Change Notifications (get notified when details about a class change)
|
||||
- Multi-term Querying (currently the backend for searching is kinda weird)
|
||||
- Full Autocomplete for Every Search Option
|
||||
- Metrics, Log Query, Privileged Error Feedback
|
||||
- Search for Classes
|
||||
- Major, Professor, Location, Name, Time of Day
|
||||
- Subscribe to Classes
|
||||
- Availability (seat, pre-seat)
|
||||
- Waitlist Movement
|
||||
- Detail Changes (meta, time, location, seats, professor)
|
||||
- `time` Start, End, Days of Week
|
||||
- `seats` Any change in seat/waitlist data
|
||||
- `meta`
|
||||
- Lookup via Course Reference Number (CRN)
|
||||
- Smart Time of Day Handling
|
||||
- "2 PM" -> Start within 2:00 PM to 2:59 PM
|
||||
- "2-3 PM" -> Start within 2:00 PM to 3:59 PM
|
||||
- "ends by 2 PM" -> Ends within 12:00 AM to 2:00 PM
|
||||
- "after 2 PM" -> Start within 2:01 PM to 11:59 PM
|
||||
- "before 2 PM" -> Ends within 12:00 AM to 1:59 PM
|
||||
- Get By Section Command
|
||||
- CS 4393 001 =>
|
||||
- Will require SQL to be able to search for a class by its section number
|
||||
+2
-1
@@ -4,7 +4,8 @@ This folder contains detailed documentation for the Banner project. This file ac
|
||||
|
||||
## Files
|
||||
|
||||
- [`FEATURES.md`](FEATURES.md) - Current features, implemented functionality, and future roadmap
|
||||
- [`CHANGELOG.md`](CHANGELOG.md) - Notable changes by version
|
||||
- [`ROADMAP.md`](ROADMAP.md) - Planned features and priorities
|
||||
- [`BANNER.md`](BANNER.md) - General API documentation on the Banner system
|
||||
- [`ARCHITECTURE.md`](ARCHITECTURE.md) - Technical implementation details, system design, and analysis
|
||||
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
# Roadmap
|
||||
|
||||
## Now
|
||||
|
||||
- **Discord bot revival** - Audit and fix all existing commands (search, terms, ics, gcal) against the current data model. Add test coverage. Bot has been untouched since ~0.3.4 and commands may be broken.
|
||||
- **Notification and subscription system** - Subscribe to courses and get alerts on seat availability, waitlist movement, and detail changes (time, location, professor, seats). Deliver via Discord bot and web dashboard.
|
||||
- **Mobile/responsive redesign** - Hamburger nav for sidebar, responsive table column hiding, mobile-friendly admin pages. Timeline is the only area with solid mobile support; most pages need work.
|
||||
- **Professor name search filter** - Filter search results by instructor. Backend code exists but is commented out.
|
||||
- **Search field autocomplete** - Typeahead for course titles, course numbers, professors, and terms.
|
||||
- **Large component extraction** - Break down CourseTable, Instructors page, and TimelineCanvas into smaller, testable subcomponents.
|
||||
|
||||
## Soon
|
||||
|
||||
- **Bot slash command parity** - Keep Discord bot commands in sync with web features: timeline summaries, RMP lookups, audit log highlights, notification management via bot.
|
||||
- **E2E test suite** - Playwright tests for critical user flows: search, login, admin pages, timeline interaction.
|
||||
- **Settings page** - Replace placeholder with theme preferences, notification settings, default term/subject selection.
|
||||
- **Profile enhancements** - Expand from read-only stub to subscription management, saved searches, and course watchlists.
|
||||
- **Smart time-of-day search parsing** - Support natural queries like "2 PM", "ends by 2 PM", "after 2 PM" mapped to time ranges.
|
||||
- **Multi-term querying** - Query across multiple terms in a single search instead of one at a time.
|
||||
- **Historical analytics visualization** - Build trend UI on top of existing course metrics and timeline API. Fill-rate charts per course or professor.
|
||||
- **Schedule builder** - Visual weekly schedule tool for assembling a conflict-free course lineup. Timeline visualization serves as a foundation.
|
||||
|
||||
## Eventually
|
||||
|
||||
- **API rate limiting** - Rate limiter on public API endpoints. Needed before any public or external exposure.
|
||||
- **Bulk admin operations** - Batch RMP match/reject, bulk user management, data export from admin pages.
|
||||
- **Degree audit helper** - Map available courses to degree requirements and suggest what to take next.
|
||||
- **DM support** - Allow the Discord bot to respond in direct messages, not just guild channels.
|
||||
- **"Classes Now" command** - Find classes currently in session based on the current day and time.
|
||||
- **Privileged error feedback** - Detailed error information surfaced to bot admins when commands fail.
|
||||
|
||||
## Done
|
||||
|
||||
- **Interactive timeline visualization** - D3 canvas with pan/zoom, touch gestures, and enrollment aggregation API. *(0.6.0)*
|
||||
- **Scraper analytics dashboard** - Timeseries charts, subject monitoring, adaptive scheduling, and admin endpoints. *(0.6.0)*
|
||||
- **WebSocket job monitoring** - Real-time scrape job queue with live connection status indicators. *(0.6.0)*
|
||||
- **Course change audit log** - Field-level change tracking with smart diffing, conditional caching, and auto-refresh. *(0.6.0)*
|
||||
- **User authentication system** - Discord OAuth, sessions, admin roles, and login page. *(0.6.0)*
|
||||
- **Dynamic scraper scheduling** - Adaptive scrape intervals based on change frequency and course volume. *(0.6.0)*
|
||||
- **Metrics dashboard** - Scraper and service metrics surfaced on the web dashboard. *(0.6.0)*
|
||||
- **Subject/major search filter** - Multi-select subject filtering with searchable comboboxes. *(0.5.0)*
|
||||
- **Web course search UI** - Browser-based course search with interactive data table, sorting, pagination, and column controls. *(0.4.0)*
|
||||
- **RateMyProfessor integration** - Bulk professor sync via GraphQL with inline ratings in search results. *(0.4.0)*
|
||||
- **Test coverage expansion** - Unit tests for course formatting, API client, query builder, CLI args, and config parsing. *(0.3.4--0.4.0)*
|
||||
@@ -0,0 +1,83 @@
|
||||
-- ============================================================
|
||||
-- Expand courses table with rich Banner API fields
|
||||
-- ============================================================
|
||||
|
||||
-- Section identifiers
|
||||
ALTER TABLE courses ADD COLUMN sequence_number VARCHAR;
|
||||
ALTER TABLE courses ADD COLUMN part_of_term VARCHAR;
|
||||
|
||||
-- Schedule & delivery (store codes, descriptions come from reference_data)
|
||||
ALTER TABLE courses ADD COLUMN instructional_method VARCHAR;
|
||||
ALTER TABLE courses ADD COLUMN campus VARCHAR;
|
||||
|
||||
-- Credit hours
|
||||
ALTER TABLE courses ADD COLUMN credit_hours INTEGER;
|
||||
ALTER TABLE courses ADD COLUMN credit_hour_low INTEGER;
|
||||
ALTER TABLE courses ADD COLUMN credit_hour_high INTEGER;
|
||||
|
||||
-- Cross-listing
|
||||
ALTER TABLE courses ADD COLUMN cross_list VARCHAR;
|
||||
ALTER TABLE courses ADD COLUMN cross_list_capacity INTEGER;
|
||||
ALTER TABLE courses ADD COLUMN cross_list_count INTEGER;
|
||||
|
||||
-- Section linking
|
||||
ALTER TABLE courses ADD COLUMN link_identifier VARCHAR;
|
||||
ALTER TABLE courses ADD COLUMN is_section_linked BOOLEAN;
|
||||
|
||||
-- JSONB columns for 1-to-many data
|
||||
ALTER TABLE courses ADD COLUMN meeting_times JSONB NOT NULL DEFAULT '[]'::jsonb;
|
||||
ALTER TABLE courses ADD COLUMN attributes JSONB NOT NULL DEFAULT '[]'::jsonb;
|
||||
|
||||
-- ============================================================
|
||||
-- Full-text search support
|
||||
-- ============================================================
|
||||
|
||||
-- Generated tsvector for word-based search on title
|
||||
ALTER TABLE courses ADD COLUMN title_search tsvector
|
||||
GENERATED ALWAYS AS (to_tsvector('simple', coalesce(title, ''))) STORED;
|
||||
|
||||
CREATE INDEX idx_courses_title_search ON courses USING GIN (title_search);
|
||||
|
||||
-- Trigram index for substring/ILIKE search on title
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||
CREATE INDEX idx_courses_title_trgm ON courses USING GIN (title gin_trgm_ops);
|
||||
|
||||
-- ============================================================
|
||||
-- New filter indexes
|
||||
-- ============================================================
|
||||
|
||||
CREATE INDEX idx_courses_instructional_method ON courses(instructional_method);
|
||||
CREATE INDEX idx_courses_campus ON courses(campus);
|
||||
|
||||
-- Composite for "open CS courses in Fall 2024" pattern
|
||||
CREATE INDEX idx_courses_term_subject_avail ON courses(term_code, subject, max_enrollment, enrollment);
|
||||
|
||||
-- ============================================================
|
||||
-- Instructors table (normalized, deduplicated)
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE instructors (
|
||||
banner_id VARCHAR PRIMARY KEY,
|
||||
display_name VARCHAR NOT NULL,
|
||||
email VARCHAR
|
||||
);
|
||||
|
||||
CREATE TABLE course_instructors (
|
||||
course_id INTEGER NOT NULL REFERENCES courses(id) ON DELETE CASCADE,
|
||||
instructor_id VARCHAR NOT NULL REFERENCES instructors(banner_id) ON DELETE CASCADE,
|
||||
is_primary BOOLEAN NOT NULL DEFAULT false,
|
||||
PRIMARY KEY (course_id, instructor_id)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_course_instructors_instructor ON course_instructors(instructor_id);
|
||||
|
||||
-- ============================================================
|
||||
-- Reference data table (all code→description lookups)
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE reference_data (
|
||||
category VARCHAR NOT NULL,
|
||||
code VARCHAR NOT NULL,
|
||||
description VARCHAR NOT NULL,
|
||||
PRIMARY KEY (category, code)
|
||||
);
|
||||
@@ -0,0 +1,17 @@
|
||||
-- RMP professor data (bulk synced from RateMyProfessors)
|
||||
CREATE TABLE rmp_professors (
|
||||
legacy_id INTEGER PRIMARY KEY,
|
||||
graphql_id VARCHAR NOT NULL,
|
||||
first_name VARCHAR NOT NULL,
|
||||
last_name VARCHAR NOT NULL,
|
||||
department VARCHAR,
|
||||
avg_rating REAL,
|
||||
avg_difficulty REAL,
|
||||
num_ratings INTEGER NOT NULL DEFAULT 0,
|
||||
would_take_again_pct REAL,
|
||||
last_synced_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Link Banner instructors to RMP professors
|
||||
ALTER TABLE instructors ADD COLUMN rmp_legacy_id INTEGER REFERENCES rmp_professors(legacy_id);
|
||||
ALTER TABLE instructors ADD COLUMN rmp_match_status VARCHAR NOT NULL DEFAULT 'pending';
|
||||
@@ -0,0 +1,7 @@
|
||||
-- Add queued_at column to track when a job last entered the "ready to pick up" state.
|
||||
-- For fresh jobs this equals execute_at; for retried jobs it is updated to NOW().
|
||||
ALTER TABLE scrape_jobs
|
||||
ADD COLUMN queued_at TIMESTAMPTZ NOT NULL DEFAULT NOW();
|
||||
|
||||
-- Backfill existing rows: set queued_at = execute_at (best approximation)
|
||||
UPDATE scrape_jobs SET queued_at = execute_at;
|
||||
@@ -0,0 +1,19 @@
|
||||
CREATE TABLE users (
|
||||
discord_id BIGINT PRIMARY KEY,
|
||||
discord_username TEXT NOT NULL,
|
||||
discord_avatar_hash TEXT,
|
||||
is_admin BOOLEAN NOT NULL DEFAULT false,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE TABLE user_sessions (
|
||||
id TEXT PRIMARY KEY,
|
||||
user_id BIGINT NOT NULL REFERENCES users(discord_id) ON DELETE CASCADE,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
expires_at TIMESTAMPTZ NOT NULL,
|
||||
last_active_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_user_sessions_user_id ON user_sessions(user_id);
|
||||
CREATE INDEX idx_user_sessions_expires_at ON user_sessions(expires_at);
|
||||
@@ -0,0 +1,80 @@
|
||||
-- Collapse instructors from per-banner-id rows to per-person rows (deduped by lowercased email).
|
||||
-- All existing RMP matches are deliberately dropped; the new auto-matcher will re-score from scratch.
|
||||
|
||||
-- 1. Create the new instructors table (1 row per person, keyed by email)
|
||||
CREATE TABLE instructors_new (
|
||||
id SERIAL PRIMARY KEY,
|
||||
display_name VARCHAR NOT NULL,
|
||||
email VARCHAR NOT NULL,
|
||||
rmp_professor_id INTEGER UNIQUE REFERENCES rmp_professors(legacy_id),
|
||||
rmp_match_status VARCHAR NOT NULL DEFAULT 'unmatched',
|
||||
CONSTRAINT instructors_email_unique UNIQUE (email)
|
||||
);
|
||||
|
||||
-- 2. Populate from existing data, deduplicating by lowercased email.
|
||||
-- For each email, pick the display_name from the row with the highest banner_id
|
||||
-- (deterministic tiebreaker). All rmp fields start fresh (NULL / 'unmatched').
|
||||
INSERT INTO instructors_new (display_name, email)
|
||||
SELECT DISTINCT ON (LOWER(email))
|
||||
display_name,
|
||||
LOWER(email)
|
||||
FROM instructors
|
||||
ORDER BY LOWER(email), banner_id DESC;
|
||||
|
||||
-- 3. Create the new course_instructors table with integer FK and banner_id column
|
||||
CREATE TABLE course_instructors_new (
|
||||
course_id INTEGER NOT NULL REFERENCES courses(id) ON DELETE CASCADE,
|
||||
instructor_id INTEGER NOT NULL REFERENCES instructors_new(id) ON DELETE CASCADE,
|
||||
banner_id VARCHAR NOT NULL,
|
||||
is_primary BOOLEAN NOT NULL DEFAULT false,
|
||||
PRIMARY KEY (course_id, instructor_id)
|
||||
);
|
||||
|
||||
-- 4. Populate from old data, mapping old banner_id → new instructor id via lowercased email.
|
||||
-- Use DISTINCT ON to handle cases where multiple old banner_ids for the same person
|
||||
-- taught the same course (would cause duplicate (course_id, instructor_id) pairs).
|
||||
INSERT INTO course_instructors_new (course_id, instructor_id, banner_id, is_primary)
|
||||
SELECT DISTINCT ON (ci.course_id, inew.id)
|
||||
ci.course_id,
|
||||
inew.id,
|
||||
ci.instructor_id, -- old banner_id
|
||||
ci.is_primary
|
||||
FROM course_instructors ci
|
||||
JOIN instructors iold ON iold.banner_id = ci.instructor_id
|
||||
JOIN instructors_new inew ON inew.email = LOWER(iold.email)
|
||||
ORDER BY ci.course_id, inew.id, ci.is_primary DESC;
|
||||
|
||||
-- 5. Drop old tables (course_instructors first due to FK dependency)
|
||||
DROP TABLE course_instructors;
|
||||
DROP TABLE instructors;
|
||||
|
||||
-- 6. Rename new tables into place
|
||||
ALTER TABLE instructors_new RENAME TO instructors;
|
||||
ALTER TABLE course_instructors_new RENAME TO course_instructors;
|
||||
|
||||
-- 7. Rename constraints to match the final table names
|
||||
ALTER TABLE instructors RENAME CONSTRAINT instructors_new_pkey TO instructors_pkey;
|
||||
ALTER TABLE instructors RENAME CONSTRAINT instructors_new_rmp_professor_id_key TO instructors_rmp_professor_id_key;
|
||||
ALTER TABLE course_instructors RENAME CONSTRAINT course_instructors_new_pkey TO course_instructors_pkey;
|
||||
|
||||
-- 8. Recreate indexes
|
||||
CREATE INDEX idx_course_instructors_instructor ON course_instructors (instructor_id);
|
||||
CREATE INDEX idx_instructors_rmp_status ON instructors (rmp_match_status);
|
||||
CREATE INDEX idx_instructors_email ON instructors (email);
|
||||
|
||||
-- 9. Create rmp_match_candidates table
|
||||
CREATE TABLE rmp_match_candidates (
|
||||
id SERIAL PRIMARY KEY,
|
||||
instructor_id INTEGER NOT NULL REFERENCES instructors(id) ON DELETE CASCADE,
|
||||
rmp_legacy_id INTEGER NOT NULL REFERENCES rmp_professors(legacy_id),
|
||||
score REAL NOT NULL,
|
||||
score_breakdown JSONB NOT NULL DEFAULT '{}',
|
||||
status VARCHAR NOT NULL DEFAULT 'pending',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
resolved_at TIMESTAMPTZ,
|
||||
resolved_by BIGINT REFERENCES users(discord_id),
|
||||
CONSTRAINT uq_candidate_pair UNIQUE (instructor_id, rmp_legacy_id)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_match_candidates_instructor ON rmp_match_candidates (instructor_id);
|
||||
CREATE INDEX idx_match_candidates_status ON rmp_match_candidates (status);
|
||||
@@ -0,0 +1,24 @@
|
||||
-- Multi-RMP profile support: allow many RMP profiles per instructor.
|
||||
-- Each RMP profile still links to at most one instructor (rmp_legacy_id UNIQUE).
|
||||
|
||||
-- 1. Create junction table
|
||||
CREATE TABLE instructor_rmp_links (
|
||||
id SERIAL PRIMARY KEY,
|
||||
instructor_id INTEGER NOT NULL REFERENCES instructors(id) ON DELETE CASCADE,
|
||||
rmp_legacy_id INTEGER NOT NULL UNIQUE REFERENCES rmp_professors(legacy_id),
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
created_by BIGINT REFERENCES users(discord_id),
|
||||
source VARCHAR NOT NULL DEFAULT 'manual' -- 'auto' | 'manual'
|
||||
);
|
||||
|
||||
CREATE INDEX idx_instructor_rmp_links_instructor ON instructor_rmp_links (instructor_id);
|
||||
|
||||
-- 2. Migrate existing matches
|
||||
INSERT INTO instructor_rmp_links (instructor_id, rmp_legacy_id, source)
|
||||
SELECT id, rmp_professor_id,
|
||||
CASE rmp_match_status WHEN 'auto' THEN 'auto' ELSE 'manual' END
|
||||
FROM instructors
|
||||
WHERE rmp_professor_id IS NOT NULL;
|
||||
|
||||
-- 3. Drop old column (and its unique constraint)
|
||||
ALTER TABLE instructors DROP COLUMN rmp_professor_id;
|
||||
@@ -0,0 +1,31 @@
|
||||
-- Scrape job results log: one row per completed (or failed) job for effectiveness tracking.
|
||||
CREATE TABLE scrape_job_results (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
target_type target_type NOT NULL,
|
||||
payload JSONB NOT NULL,
|
||||
priority scrape_priority NOT NULL,
|
||||
|
||||
-- Timing
|
||||
queued_at TIMESTAMPTZ NOT NULL,
|
||||
started_at TIMESTAMPTZ NOT NULL,
|
||||
completed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
duration_ms INT NOT NULL,
|
||||
|
||||
-- Outcome
|
||||
success BOOLEAN NOT NULL,
|
||||
error_message TEXT,
|
||||
retry_count INT NOT NULL DEFAULT 0,
|
||||
|
||||
-- Effectiveness (NULL when success = false)
|
||||
courses_fetched INT,
|
||||
courses_changed INT,
|
||||
courses_unchanged INT,
|
||||
audits_generated INT,
|
||||
metrics_generated INT
|
||||
);
|
||||
|
||||
CREATE INDEX idx_scrape_job_results_target_time
|
||||
ON scrape_job_results (target_type, completed_at);
|
||||
|
||||
CREATE INDEX idx_scrape_job_results_completed
|
||||
ON scrape_job_results (completed_at);
|
||||
@@ -0,0 +1,13 @@
|
||||
-- Indexes for the timeline aggregation endpoint.
|
||||
-- The query buckets course_metrics by 15-minute intervals, joins to courses
|
||||
-- for subject, and aggregates enrollment. These indexes support efficient
|
||||
-- time-range scans and the join.
|
||||
|
||||
-- Primary access pattern: scan course_metrics by timestamp range
|
||||
CREATE INDEX IF NOT EXISTS idx_course_metrics_timestamp
|
||||
ON course_metrics (timestamp);
|
||||
|
||||
-- Composite index for the DISTINCT ON (bucket, course_id) ordered by timestamp DESC
|
||||
-- to efficiently pick the latest metric per course per bucket.
|
||||
CREATE INDEX IF NOT EXISTS idx_course_metrics_course_timestamp
|
||||
ON course_metrics (course_id, timestamp DESC);
|
||||
@@ -0,0 +1,5 @@
|
||||
-- Add structured first/last name columns to instructors.
|
||||
-- Populated by Rust-side backfill (parse_banner_name) since we need
|
||||
-- HTML entity decoding and suffix extraction that SQL can't handle well.
|
||||
ALTER TABLE instructors ADD COLUMN first_name VARCHAR;
|
||||
ALTER TABLE instructors ADD COLUMN last_name VARCHAR;
|
||||
@@ -0,0 +1,32 @@
|
||||
/**
|
||||
* Generate TypeScript bindings from Rust types (ts-rs).
|
||||
*
|
||||
* Usage: bun scripts/bindings.ts
|
||||
*/
|
||||
|
||||
import { readdirSync, writeFileSync, rmSync } from "fs";
|
||||
import { run } from "./lib/proc";
|
||||
|
||||
const BINDINGS_DIR = "web/src/lib/bindings";
|
||||
|
||||
// Build test binary first (slow part) — fail before deleting anything
|
||||
run(["cargo", "test", "--no-run"]);
|
||||
|
||||
// Clean slate
|
||||
rmSync(BINDINGS_DIR, { recursive: true, force: true });
|
||||
|
||||
// Run the export (fast, already compiled)
|
||||
run(["cargo", "test", "export_bindings"]);
|
||||
|
||||
// Auto-generate index.ts from emitted .ts files
|
||||
const types = readdirSync(BINDINGS_DIR)
|
||||
.filter((f) => f.endsWith(".ts") && f !== "index.ts")
|
||||
.map((f) => f.replace(/\.ts$/, ""))
|
||||
.sort();
|
||||
|
||||
writeFileSync(
|
||||
`${BINDINGS_DIR}/index.ts`,
|
||||
types.map((t) => `export type { ${t} } from "./${t}";`).join("\n") + "\n",
|
||||
);
|
||||
|
||||
console.log(`Generated ${BINDINGS_DIR}/index.ts (${types.length} types)`);
|
||||
@@ -0,0 +1,45 @@
|
||||
/**
|
||||
* Production build.
|
||||
*
|
||||
* Usage: bun scripts/build.ts [flags]
|
||||
*
|
||||
* Flags:
|
||||
* -d, --debug Debug build instead of release
|
||||
* -f, --frontend-only Frontend only
|
||||
* -b, --backend-only Backend only
|
||||
*/
|
||||
|
||||
import { parseFlags, c } from "./lib/fmt";
|
||||
import { run } from "./lib/proc";
|
||||
|
||||
const { flags } = parseFlags(
|
||||
process.argv.slice(2),
|
||||
{
|
||||
debug: "bool",
|
||||
"frontend-only": "bool",
|
||||
"backend-only": "bool",
|
||||
} as const,
|
||||
{ d: "debug", f: "frontend-only", b: "backend-only" },
|
||||
{ debug: false, "frontend-only": false, "backend-only": false },
|
||||
);
|
||||
|
||||
if (flags["frontend-only"] && flags["backend-only"]) {
|
||||
console.error("Cannot use -f and -b together");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const buildFrontend = !flags["backend-only"];
|
||||
const buildBackend = !flags["frontend-only"];
|
||||
const profile = flags.debug ? "debug" : "release";
|
||||
|
||||
if (buildFrontend) {
|
||||
console.log(c("1;36", "→ Building frontend..."));
|
||||
run(["bun", "run", "--cwd", "web", "build"]);
|
||||
}
|
||||
|
||||
if (buildBackend) {
|
||||
console.log(c("1;36", `→ Building backend (${profile})...`));
|
||||
const cmd = ["cargo", "build", "--bin", "banner"];
|
||||
if (!flags.debug) cmd.push("--release");
|
||||
run(cmd);
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"lockfileVersion": 1,
|
||||
"configVersion": 1,
|
||||
"workspaces": {
|
||||
"": {
|
||||
"name": "banner-scripts",
|
||||
"devDependencies": {
|
||||
"@types/bun": "^1.3.8",
|
||||
},
|
||||
},
|
||||
},
|
||||
"packages": {
|
||||
"@types/bun": ["@types/bun@1.3.8", "", { "dependencies": { "bun-types": "1.3.8" } }, "sha512-3LvWJ2q5GerAXYxO2mffLTqOzEu5qnhEAlh48Vnu8WQfnmSwbgagjGZV6BoHKJztENYEDn6QmVd949W4uESRJA=="],
|
||||
|
||||
"@types/node": ["@types/node@25.1.0", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-t7frlewr6+cbx+9Ohpl0NOTKXZNV9xHRmNOvql47BFJKcEG1CxtxlPEEe+gR9uhVWM4DwhnvTF110mIL4yP9RA=="],
|
||||
|
||||
"bun-types": ["bun-types@1.3.8", "", { "dependencies": { "@types/node": "*" } }, "sha512-fL99nxdOWvV4LqjmC+8Q9kW3M4QTtTR1eePs94v5ctGqU8OeceWrSUaRw3JYb7tU3FkMIAjkueehrHPPPGKi5Q=="],
|
||||
|
||||
"undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="],
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,250 @@
|
||||
/**
|
||||
* Run all project checks in parallel. Auto-fixes formatting when safe.
|
||||
*
|
||||
* Usage: bun scripts/check.ts [--fix|-f]
|
||||
*/
|
||||
|
||||
import { c, elapsed, isStderrTTY } from "./lib/fmt";
|
||||
import { run, runPiped, spawnCollect, raceInOrder, type CollectResult } from "./lib/proc";
|
||||
import { existsSync, statSync, readdirSync, writeFileSync, rmSync } from "fs";
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
let fix = false;
|
||||
|
||||
for (const arg of args) {
|
||||
if (arg === "-f" || arg === "--fix") {
|
||||
fix = true;
|
||||
} else {
|
||||
console.error(`Unknown flag: ${arg}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Fix path: format + clippy fix, then fall through to verification
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
if (fix) {
|
||||
console.log(c("1;36", "→ Fixing..."));
|
||||
run(["cargo", "fmt", "--all"]);
|
||||
run(["bun", "run", "--cwd", "web", "format"]);
|
||||
run([
|
||||
"cargo", "clippy", "--all-features", "--fix", "--allow-dirty", "--allow-staged",
|
||||
"--", "--deny", "warnings",
|
||||
]);
|
||||
console.log(c("1;36", "→ Verifying..."));
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Ensure TypeScript bindings are up-to-date before frontend checks
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
{
|
||||
const BINDINGS_DIR = "web/src/lib/bindings";
|
||||
|
||||
let newestSrcMtime = 0;
|
||||
for (const file of new Bun.Glob("src/**/*.rs").scanSync(".")) {
|
||||
const mt = statSync(file).mtimeMs;
|
||||
if (mt > newestSrcMtime) newestSrcMtime = mt;
|
||||
}
|
||||
for (const f of ["Cargo.toml", "Cargo.lock"]) {
|
||||
if (existsSync(f)) {
|
||||
const mt = statSync(f).mtimeMs;
|
||||
if (mt > newestSrcMtime) newestSrcMtime = mt;
|
||||
}
|
||||
}
|
||||
|
||||
let newestBindingMtime = 0;
|
||||
if (existsSync(BINDINGS_DIR)) {
|
||||
for (const file of new Bun.Glob("**/*").scanSync(BINDINGS_DIR)) {
|
||||
const mt = statSync(`${BINDINGS_DIR}/${file}`).mtimeMs;
|
||||
if (mt > newestBindingMtime) newestBindingMtime = mt;
|
||||
}
|
||||
}
|
||||
|
||||
const stale = newestBindingMtime === 0 || newestSrcMtime > newestBindingMtime;
|
||||
if (stale) {
|
||||
const t = Date.now();
|
||||
process.stdout.write(
|
||||
c("1;36", "→ Regenerating TypeScript bindings (Rust sources changed)...") + "\n",
|
||||
);
|
||||
run(["cargo", "test", "--no-run"]);
|
||||
rmSync(BINDINGS_DIR, { recursive: true, force: true });
|
||||
run(["cargo", "test", "export_bindings"]);
|
||||
|
||||
const types = readdirSync(BINDINGS_DIR)
|
||||
.filter((f) => f.endsWith(".ts") && f !== "index.ts")
|
||||
.map((f) => f.replace(/\.ts$/, ""))
|
||||
.sort();
|
||||
writeFileSync(
|
||||
`${BINDINGS_DIR}/index.ts`,
|
||||
types.map((t) => `export type { ${t} } from "./${t}";`).join("\n") + "\n",
|
||||
);
|
||||
|
||||
process.stdout.write(c("32", "✓ bindings") + ` (${elapsed(t)}s, ${types.length} types)\n`);
|
||||
} else {
|
||||
process.stdout.write(c("2", "· bindings up-to-date, skipped") + "\n");
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Check definitions
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface Check {
|
||||
name: string;
|
||||
cmd: string[];
|
||||
hint?: string;
|
||||
}
|
||||
|
||||
const checks: Check[] = [
|
||||
{
|
||||
name: "rust-format",
|
||||
cmd: ["cargo", "fmt", "--all", "--", "--check"],
|
||||
hint: "Run 'cargo fmt --all' to see and fix formatting issues.",
|
||||
},
|
||||
{ name: "rust-lint", cmd: ["cargo", "clippy", "--all-features", "--", "--deny", "warnings"] },
|
||||
{ name: "rust-check", cmd: ["cargo", "check", "--all-features"] },
|
||||
{ name: "rust-test", cmd: ["cargo", "nextest", "run", "-E", "not test(export_bindings)"] },
|
||||
{ name: "svelte-check", cmd: ["bun", "run", "--cwd", "web", "check"] },
|
||||
{ name: "web-format", cmd: ["bun", "run", "--cwd", "web", "format:check"] },
|
||||
{ name: "web-test", cmd: ["bun", "run", "--cwd", "web", "test"] },
|
||||
{ name: "actionlint", cmd: ["actionlint"] },
|
||||
];
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Domain groups: formatter → { peers, format command, sanity rechecks }
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const domains: Record<
|
||||
string,
|
||||
{
|
||||
peers: string[];
|
||||
format: () => ReturnType<typeof runPiped>;
|
||||
recheck: Check[];
|
||||
}
|
||||
> = {
|
||||
"rust-format": {
|
||||
peers: ["rust-lint", "rust-check", "rust-test"],
|
||||
format: () => runPiped(["cargo", "fmt", "--all"]),
|
||||
recheck: [
|
||||
{ name: "rust-format", cmd: ["cargo", "fmt", "--all", "--", "--check"] },
|
||||
{ name: "rust-check", cmd: ["cargo", "check", "--all-features"] },
|
||||
],
|
||||
},
|
||||
"web-format": {
|
||||
peers: ["svelte-check", "web-test"],
|
||||
format: () => runPiped(["bun", "run", "--cwd", "web", "format"]),
|
||||
recheck: [
|
||||
{ name: "web-format", cmd: ["bun", "run", "--cwd", "web", "format:check"] },
|
||||
{ name: "svelte-check", cmd: ["bun", "run", "--cwd", "web", "check"] },
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Phase 1: run all checks in parallel, display in completion order
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const start = Date.now();
|
||||
const remaining = new Set(checks.map((ch) => ch.name));
|
||||
|
||||
const promises = checks.map(async (check) => ({
|
||||
...check,
|
||||
...(await spawnCollect(check.cmd, start)),
|
||||
}));
|
||||
|
||||
const interval = isStderrTTY
|
||||
? setInterval(() => {
|
||||
process.stderr.write(`\r\x1b[K${elapsed(start)}s [${Array.from(remaining).join(", ")}]`);
|
||||
}, 100)
|
||||
: null;
|
||||
|
||||
const results: Record<string, Check & CollectResult> = {};
|
||||
|
||||
await raceInOrder(promises, checks, (r) => {
|
||||
results[r.name] = r;
|
||||
remaining.delete(r.name);
|
||||
if (isStderrTTY) process.stderr.write("\r\x1b[K");
|
||||
|
||||
if (r.exitCode !== 0) {
|
||||
process.stdout.write(c("31", `✗ ${r.name}`) + ` (${r.elapsed}s)\n`);
|
||||
if (r.hint) {
|
||||
process.stdout.write(c("2", ` ${r.hint}`) + "\n");
|
||||
} else {
|
||||
if (r.stdout) process.stdout.write(r.stdout);
|
||||
if (r.stderr) process.stderr.write(r.stderr);
|
||||
}
|
||||
} else {
|
||||
process.stdout.write(c("32", `✓ ${r.name}`) + ` (${r.elapsed}s)\n`);
|
||||
}
|
||||
});
|
||||
|
||||
if (interval) clearInterval(interval);
|
||||
if (isStderrTTY) process.stderr.write("\r\x1b[K");
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Phase 2: auto-fix formatting if it's the only failure in its domain
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const autoFixedDomains = new Set<string>();
|
||||
|
||||
for (const [fmtName, domain] of Object.entries(domains)) {
|
||||
const fmtResult = results[fmtName];
|
||||
if (!fmtResult || fmtResult.exitCode === 0) continue;
|
||||
if (!domain.peers.every((p) => results[p]?.exitCode === 0)) continue;
|
||||
|
||||
process.stdout.write(
|
||||
"\n" +
|
||||
c("1;36", `→ Auto-formatting ${fmtName} (peers passed, only formatting failed)...`) +
|
||||
"\n",
|
||||
);
|
||||
const fmtOut = domain.format();
|
||||
if (fmtOut.exitCode !== 0) {
|
||||
process.stdout.write(c("31", ` ✗ ${fmtName} formatter failed`) + "\n");
|
||||
if (fmtOut.stdout) process.stdout.write(fmtOut.stdout);
|
||||
if (fmtOut.stderr) process.stderr.write(fmtOut.stderr);
|
||||
continue;
|
||||
}
|
||||
|
||||
const recheckStart = Date.now();
|
||||
const recheckPromises = domain.recheck.map(async (ch) => ({
|
||||
...ch,
|
||||
...(await spawnCollect(ch.cmd, recheckStart)),
|
||||
}));
|
||||
|
||||
let recheckFailed = false;
|
||||
await raceInOrder(recheckPromises, domain.recheck, (r) => {
|
||||
if (r.exitCode !== 0) {
|
||||
recheckFailed = true;
|
||||
process.stdout.write(c("31", ` ✗ ${r.name}`) + ` (${r.elapsed}s)\n`);
|
||||
if (r.stdout) process.stdout.write(r.stdout);
|
||||
if (r.stderr) process.stderr.write(r.stderr);
|
||||
} else {
|
||||
process.stdout.write(c("32", ` ✓ ${r.name}`) + ` (${r.elapsed}s)\n`);
|
||||
}
|
||||
});
|
||||
|
||||
if (!recheckFailed) {
|
||||
process.stdout.write(c("32", ` ✓ ${fmtName} auto-fix succeeded`) + "\n");
|
||||
autoFixedDomains.add(fmtName);
|
||||
} else {
|
||||
process.stdout.write(c("31", ` ✗ ${fmtName} auto-fix failed sanity check`) + "\n");
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Final verdict
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const finalFailed = Object.entries(results).some(
|
||||
([name, r]) => r.exitCode !== 0 && !autoFixedDomains.has(name),
|
||||
);
|
||||
|
||||
if (autoFixedDomains.size > 0 && !finalFailed) {
|
||||
process.stdout.write(
|
||||
"\n" + c("1;32", "✓ All checks passed (formatting was auto-fixed)") + "\n",
|
||||
);
|
||||
}
|
||||
|
||||
process.exit(finalFailed ? 1 : 0);
|
||||
@@ -0,0 +1,79 @@
|
||||
/**
|
||||
* PostgreSQL Docker container management.
|
||||
*
|
||||
* Usage: bun scripts/db.ts [start|reset|rm]
|
||||
*/
|
||||
|
||||
import { readFile, writeFile } from "fs/promises";
|
||||
import { spawnSync } from "child_process";
|
||||
|
||||
const NAME = "banner-postgres";
|
||||
const USER = "banner";
|
||||
const PASS = "banner";
|
||||
const DB = "banner";
|
||||
const PORT = "59489";
|
||||
const ENV_FILE = ".env";
|
||||
|
||||
const cmd = process.argv[2] || "start";
|
||||
|
||||
function docker(...args: string[]) {
|
||||
return spawnSync("docker", args, { encoding: "utf8" });
|
||||
}
|
||||
|
||||
function getContainer() {
|
||||
const res = docker("ps", "-a", "--filter", `name=^${NAME}$`, "--format", "json");
|
||||
return res.stdout.trim() ? JSON.parse(res.stdout) : null;
|
||||
}
|
||||
|
||||
async function updateEnv() {
|
||||
const url = `postgresql://${USER}:${PASS}@localhost:${PORT}/${DB}`;
|
||||
try {
|
||||
let content = await readFile(ENV_FILE, "utf8");
|
||||
content = content.includes("DATABASE_URL=")
|
||||
? content.replace(/DATABASE_URL=.*$/m, `DATABASE_URL=${url}`)
|
||||
: content.trim() + `\nDATABASE_URL=${url}\n`;
|
||||
await writeFile(ENV_FILE, content);
|
||||
} catch {
|
||||
await writeFile(ENV_FILE, `DATABASE_URL=${url}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
function create() {
|
||||
docker(
|
||||
"run", "-d", "--name", NAME,
|
||||
"-e", `POSTGRES_USER=${USER}`,
|
||||
"-e", `POSTGRES_PASSWORD=${PASS}`,
|
||||
"-e", `POSTGRES_DB=${DB}`,
|
||||
"-p", `${PORT}:5432`,
|
||||
"postgres:17-alpine",
|
||||
);
|
||||
console.log("created");
|
||||
}
|
||||
|
||||
const container = getContainer();
|
||||
|
||||
if (cmd === "rm") {
|
||||
if (!container) process.exit(0);
|
||||
docker("stop", NAME);
|
||||
docker("rm", NAME);
|
||||
console.log("removed");
|
||||
} else if (cmd === "reset") {
|
||||
if (!container) {
|
||||
create();
|
||||
} else {
|
||||
docker("exec", NAME, "psql", "-U", USER, "-d", "postgres", "-c", `DROP DATABASE IF EXISTS ${DB}`);
|
||||
docker("exec", NAME, "psql", "-U", USER, "-d", "postgres", "-c", `CREATE DATABASE ${DB}`);
|
||||
console.log("reset");
|
||||
}
|
||||
await updateEnv();
|
||||
} else {
|
||||
if (!container) {
|
||||
create();
|
||||
} else if (container.State !== "running") {
|
||||
docker("start", NAME);
|
||||
console.log("started");
|
||||
} else {
|
||||
console.log("running");
|
||||
}
|
||||
await updateEnv();
|
||||
}
|
||||
+119
@@ -0,0 +1,119 @@
|
||||
/**
|
||||
* Dev server orchestrator.
|
||||
*
|
||||
* Usage: bun scripts/dev.ts [flags] [-- passthrough-args]
|
||||
*
|
||||
* Flags:
|
||||
* -f, --frontend-only Frontend only (Vite dev server)
|
||||
* -b, --backend-only Backend only (bacon watch)
|
||||
* -W, --no-watch Build once + run (no watch)
|
||||
* -n, --no-build Run last compiled binary (no rebuild)
|
||||
* -r, --release Use release profile
|
||||
* -e, --embed Embed assets (implies -b)
|
||||
* -d, --dev-build Use dev build for frontend (faster, no minification)
|
||||
* --tracing <fmt> Tracing format (default: pretty)
|
||||
*/
|
||||
|
||||
import { existsSync } from "fs";
|
||||
import { parseFlags, c } from "./lib/fmt";
|
||||
import { run, ProcessGroup } from "./lib/proc";
|
||||
|
||||
const { flags, passthrough } = parseFlags(
|
||||
process.argv.slice(2),
|
||||
{
|
||||
"frontend-only": "bool",
|
||||
"backend-only": "bool",
|
||||
"no-watch": "bool",
|
||||
"no-build": "bool",
|
||||
release: "bool",
|
||||
embed: "bool",
|
||||
"dev-build": "bool",
|
||||
tracing: "string",
|
||||
} as const,
|
||||
{ f: "frontend-only", b: "backend-only", W: "no-watch", n: "no-build", r: "release", e: "embed", d: "dev-build" },
|
||||
{
|
||||
"frontend-only": false,
|
||||
"backend-only": false,
|
||||
"no-watch": false,
|
||||
"no-build": false,
|
||||
release: false,
|
||||
embed: false,
|
||||
"dev-build": false,
|
||||
tracing: "pretty",
|
||||
},
|
||||
);
|
||||
|
||||
let frontendOnly = flags["frontend-only"];
|
||||
let backendOnly = flags["backend-only"];
|
||||
let noWatch = flags["no-watch"];
|
||||
const noBuild = flags["no-build"];
|
||||
const release = flags.release;
|
||||
const embed = flags.embed;
|
||||
const devBuild = flags["dev-build"];
|
||||
const tracing = flags.tracing as string;
|
||||
|
||||
// -e implies -b
|
||||
if (embed) backendOnly = true;
|
||||
// -n implies -W
|
||||
if (noBuild) noWatch = true;
|
||||
|
||||
if (frontendOnly && backendOnly) {
|
||||
console.error("Cannot use -f and -b together (or -e implies -b)");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const runFrontend = !backendOnly;
|
||||
const runBackend = !frontendOnly;
|
||||
const profile = release ? "release" : "dev";
|
||||
const profileDir = release ? "release" : "debug";
|
||||
const group = new ProcessGroup();
|
||||
|
||||
// Build frontend first when embedding assets
|
||||
if (embed && !noBuild) {
|
||||
const buildMode = devBuild ? "development" : "production";
|
||||
console.log(c("1;36", `→ Building frontend (${buildMode}, for embedding)...`));
|
||||
const buildArgs = ["bun", "run", "--cwd", "web", "build"];
|
||||
if (devBuild) buildArgs.push("--", "--mode", "development");
|
||||
run(buildArgs);
|
||||
}
|
||||
|
||||
// Frontend: Vite dev server
|
||||
if (runFrontend) {
|
||||
group.spawn(["bun", "run", "--cwd", "web", "dev"]);
|
||||
}
|
||||
|
||||
// Backend
|
||||
if (runBackend) {
|
||||
const backendArgs = ["--tracing", tracing, ...passthrough];
|
||||
const bin = `target/${profileDir}/banner`;
|
||||
|
||||
if (noWatch) {
|
||||
if (!noBuild) {
|
||||
console.log(c("1;36", `→ Building backend (${profile})...`));
|
||||
const cargoArgs = ["cargo", "build", "--bin", "banner"];
|
||||
if (!embed) cargoArgs.push("--no-default-features");
|
||||
if (release) cargoArgs.push("--release");
|
||||
run(cargoArgs);
|
||||
}
|
||||
|
||||
if (!existsSync(bin)) {
|
||||
console.error(`Binary not found: ${bin}`);
|
||||
console.error(`Run 'just build${release ? "" : " -d"}' first, or remove -n to use bacon.`);
|
||||
await group.killAll();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(c("1;36", `→ Running ${bin} (no watch)`));
|
||||
group.spawn([bin, ...backendArgs]);
|
||||
} else {
|
||||
// Bacon watch mode
|
||||
const baconArgs = ["bacon", "--headless", "run", "--"];
|
||||
if (!embed) baconArgs.push("--no-default-features");
|
||||
if (release) baconArgs.push("--profile", "release");
|
||||
baconArgs.push("--", ...backendArgs);
|
||||
group.spawn(baconArgs);
|
||||
}
|
||||
}
|
||||
|
||||
const code = await group.waitForFirst();
|
||||
process.exit(code);
|
||||
@@ -0,0 +1,96 @@
|
||||
/**
|
||||
* Shared formatting, color, and CLI argument parsing utilities.
|
||||
*/
|
||||
|
||||
const isTTY = process.stdout.isTTY ?? false;
|
||||
const isStderrTTY = process.stderr.isTTY ?? false;
|
||||
|
||||
/** ANSI color wrapper — no-op when stdout is not a TTY. */
|
||||
export function c(code: string, text: string): string {
|
||||
return isTTY ? `\x1b[${code}m${text}\x1b[0m` : text;
|
||||
}
|
||||
|
||||
/** Elapsed seconds since `start` as a formatted string. */
|
||||
export function elapsed(start: number): string {
|
||||
return ((Date.now() - start) / 1000).toFixed(1);
|
||||
}
|
||||
|
||||
/** Whether stderr is a TTY (for progress spinners). */
|
||||
export { isStderrTTY };
|
||||
|
||||
/**
|
||||
* Parse short and long CLI flags from a flat argument array.
|
||||
*
|
||||
* `spec` maps flag names to their type:
|
||||
* - `"bool"` — presence sets the value to `true`
|
||||
* - `"string"` — consumes the next argument as the value
|
||||
*
|
||||
* Short flags can be combined: `-fbW` expands to `-f -b -W`.
|
||||
* Long flags: `--frontend-only`, `--tracing pretty`.
|
||||
* `--` terminates flag parsing; remaining args go to `passthrough`.
|
||||
*
|
||||
* Returns `{ flags, passthrough }`.
|
||||
*/
|
||||
export function parseFlags<T extends Record<string, "bool" | "string">>(
|
||||
argv: string[],
|
||||
spec: T,
|
||||
shortMap: Record<string, keyof T>,
|
||||
defaults: { [K in keyof T]: T[K] extends "bool" ? boolean : string },
|
||||
): { flags: typeof defaults; passthrough: string[] } {
|
||||
const flags = { ...defaults };
|
||||
const passthrough: string[] = [];
|
||||
let i = 0;
|
||||
|
||||
while (i < argv.length) {
|
||||
const arg = argv[i];
|
||||
|
||||
if (arg === "--") {
|
||||
passthrough.push(...argv.slice(i + 1));
|
||||
break;
|
||||
}
|
||||
|
||||
if (arg.startsWith("--")) {
|
||||
const name = arg.slice(2);
|
||||
if (!(name in spec)) {
|
||||
console.error(`Unknown flag: ${arg}`);
|
||||
process.exit(1);
|
||||
}
|
||||
if (spec[name] === "string") {
|
||||
(flags as Record<string, unknown>)[name] = argv[++i] || "";
|
||||
} else {
|
||||
(flags as Record<string, unknown>)[name] = true;
|
||||
}
|
||||
} else if (arg.startsWith("-") && arg.length > 1) {
|
||||
for (const ch of arg.slice(1)) {
|
||||
const mapped = shortMap[ch];
|
||||
if (!mapped) {
|
||||
console.error(`Unknown flag: -${ch}`);
|
||||
process.exit(1);
|
||||
}
|
||||
if (spec[mapped as string] === "string") {
|
||||
(flags as Record<string, unknown>)[mapped as string] = argv[++i] || "";
|
||||
} else {
|
||||
(flags as Record<string, unknown>)[mapped as string] = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
console.error(`Unknown argument: ${arg}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
return { flags, passthrough };
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple positional-or-keyword argument parser.
|
||||
* Returns the first positional arg, or empty string.
|
||||
*/
|
||||
export function parseArgs(raw: string): string[] {
|
||||
return raw
|
||||
.trim()
|
||||
.split(/\s+/)
|
||||
.filter(Boolean);
|
||||
}
|
||||
@@ -0,0 +1,113 @@
|
||||
/**
|
||||
* Shared process spawning utilities for project scripts.
|
||||
*/
|
||||
|
||||
import { elapsed } from "./fmt";
|
||||
|
||||
export interface CollectResult {
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
exitCode: number;
|
||||
elapsed: string;
|
||||
}
|
||||
|
||||
/** Sync spawn with inherited stdio. Exits process on failure. */
|
||||
export function run(cmd: string[]): void {
|
||||
const proc = Bun.spawnSync(cmd, { stdio: ["inherit", "inherit", "inherit"] });
|
||||
if (proc.exitCode !== 0) process.exit(proc.exitCode);
|
||||
}
|
||||
|
||||
/** Sync spawn with piped stdio. Returns captured output. */
|
||||
export function runPiped(cmd: string[]): { exitCode: number; stdout: string; stderr: string } {
|
||||
const proc = Bun.spawnSync(cmd, { stdout: "pipe", stderr: "pipe" });
|
||||
return {
|
||||
exitCode: proc.exitCode,
|
||||
stdout: proc.stdout?.toString() ?? "",
|
||||
stderr: proc.stderr?.toString() ?? "",
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Async spawn that collects stdout/stderr. Returns a result object.
|
||||
* Catches spawn failures (e.g. missing binary) instead of throwing.
|
||||
*/
|
||||
export async function spawnCollect(cmd: string[], startTime: number): Promise<CollectResult> {
|
||||
try {
|
||||
const proc = Bun.spawn(cmd, {
|
||||
env: { ...process.env, FORCE_COLOR: "1" },
|
||||
stdout: "pipe",
|
||||
stderr: "pipe",
|
||||
});
|
||||
const [stdout, stderr] = await Promise.all([
|
||||
new Response(proc.stdout).text(),
|
||||
new Response(proc.stderr).text(),
|
||||
]);
|
||||
await proc.exited;
|
||||
return { stdout, stderr, exitCode: proc.exitCode, elapsed: elapsed(startTime) };
|
||||
} catch (err) {
|
||||
return { stdout: "", stderr: String(err), exitCode: 1, elapsed: elapsed(startTime) };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Race all promises, yielding results in completion order via callback.
|
||||
* Spawn failures become results, not unhandled rejections.
|
||||
*/
|
||||
export async function raceInOrder<T extends { name: string }>(
|
||||
promises: Promise<T & CollectResult>[],
|
||||
fallbacks: T[],
|
||||
onResult: (r: T & CollectResult) => void,
|
||||
): Promise<void> {
|
||||
const tagged = promises.map((p, i) =>
|
||||
p
|
||||
.then((r) => ({ i, r }))
|
||||
.catch((err) => ({
|
||||
i,
|
||||
r: {
|
||||
...fallbacks[i],
|
||||
exitCode: 1,
|
||||
stdout: "",
|
||||
stderr: String(err),
|
||||
elapsed: "?",
|
||||
} as T & CollectResult,
|
||||
})),
|
||||
);
|
||||
for (let n = 0; n < promises.length; n++) {
|
||||
const { i, r } = await Promise.race(tagged);
|
||||
tagged[i] = new Promise(() => {}); // sentinel: never resolves
|
||||
onResult(r);
|
||||
}
|
||||
}
|
||||
|
||||
/** Spawn managed processes with coordinated cleanup on exit. */
|
||||
export class ProcessGroup {
|
||||
private procs: ReturnType<typeof Bun.spawn>[] = [];
|
||||
|
||||
constructor() {
|
||||
const cleanup = async () => {
|
||||
await this.killAll();
|
||||
process.exit(0);
|
||||
};
|
||||
process.on("SIGINT", cleanup);
|
||||
process.on("SIGTERM", cleanup);
|
||||
}
|
||||
|
||||
spawn(cmd: string[]): ReturnType<typeof Bun.spawn> {
|
||||
const proc = Bun.spawn(cmd, { stdio: ["inherit", "inherit", "inherit"] });
|
||||
this.procs.push(proc);
|
||||
return proc;
|
||||
}
|
||||
|
||||
async killAll(): Promise<void> {
|
||||
for (const p of this.procs) p.kill();
|
||||
await Promise.all(this.procs.map((p) => p.exited));
|
||||
}
|
||||
|
||||
/** Wait for any process to exit, kill the rest, return exit code. */
|
||||
async waitForFirst(): Promise<number> {
|
||||
const results = this.procs.map((p, i) => p.exited.then((code) => ({ i, code })));
|
||||
const first = await Promise.race(results);
|
||||
await this.killAll();
|
||||
return first.code;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"name": "banner-scripts",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"devDependencies": {
|
||||
"@types/bun": "^1.3.8"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
/**
|
||||
* Run project tests.
|
||||
*
|
||||
* Usage: bun scripts/test.ts [rust|web|<nextest filter args>]
|
||||
*/
|
||||
|
||||
import { run } from "./lib/proc";
|
||||
|
||||
const input = process.argv.slice(2).join(" ").trim();
|
||||
|
||||
if (input === "web") {
|
||||
run(["bun", "run", "--cwd", "web", "test"]);
|
||||
} else if (input === "rust") {
|
||||
run(["cargo", "nextest", "run", "-E", "not test(export_bindings)"]);
|
||||
} else if (input === "") {
|
||||
run(["cargo", "nextest", "run", "-E", "not test(export_bindings)"]);
|
||||
run(["bun", "run", "--cwd", "web", "test"]);
|
||||
} else {
|
||||
run(["cargo", "nextest", "run", ...input.split(/\s+/)]);
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"strict": true,
|
||||
"noEmit": true,
|
||||
"skipLibCheck": true,
|
||||
"types": ["bun-types"],
|
||||
"paths": {
|
||||
"#lib/*": ["./lib/*"]
|
||||
}
|
||||
},
|
||||
"include": ["**/*.ts"]
|
||||
}
|
||||
+38
-3
@@ -6,14 +6,15 @@ use crate::services::bot::BotService;
|
||||
use crate::services::manager::ServiceManager;
|
||||
use crate::services::web::WebService;
|
||||
use crate::state::AppState;
|
||||
use crate::web::auth::AuthConfig;
|
||||
use anyhow::Context;
|
||||
use figment::value::UncasedStr;
|
||||
use figment::{Figment, providers::Env};
|
||||
use sqlx::postgres::PgPoolOptions;
|
||||
use std::process::ExitCode;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use anyhow::Context;
|
||||
use tracing::{error, info};
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
/// Main application struct containing all necessary components
|
||||
pub struct App {
|
||||
@@ -69,6 +70,11 @@ impl App {
|
||||
.context("Failed to run database migrations")?;
|
||||
info!("Database migrations completed successfully");
|
||||
|
||||
// Backfill structured name columns for existing instructors
|
||||
if let Err(e) = crate::data::names::backfill_instructor_names(&db_pool).await {
|
||||
warn!(error = ?e, "Failed to backfill instructor names (non-fatal)");
|
||||
}
|
||||
|
||||
// Create BannerApi and AppState
|
||||
let banner_api = BannerApi::new_with_config(
|
||||
config.banner_base_url.clone(),
|
||||
@@ -79,6 +85,24 @@ impl App {
|
||||
let banner_api_arc = Arc::new(banner_api);
|
||||
let app_state = AppState::new(banner_api_arc.clone(), db_pool.clone());
|
||||
|
||||
// Load reference data cache from DB (may be empty on first run)
|
||||
if let Err(e) = app_state.load_reference_cache().await {
|
||||
info!(error = ?e, "Could not load reference cache on startup (may be empty)");
|
||||
}
|
||||
|
||||
// Load schedule cache for timeline enrollment queries
|
||||
if let Err(e) = app_state.schedule_cache.load().await {
|
||||
info!(error = ?e, "Could not load schedule cache on startup (may be empty)");
|
||||
}
|
||||
|
||||
// Seed the initial admin user if configured
|
||||
if let Some(admin_id) = config.admin_discord_id {
|
||||
let user = crate::data::users::ensure_seed_admin(&db_pool, admin_id as i64)
|
||||
.await
|
||||
.context("Failed to seed admin user")?;
|
||||
info!(discord_id = admin_id, username = %user.discord_username, "Seed admin ensured");
|
||||
}
|
||||
|
||||
Ok(App {
|
||||
config,
|
||||
db_pool,
|
||||
@@ -92,7 +116,16 @@ impl App {
|
||||
pub fn setup_services(&mut self, services: &[ServiceName]) -> Result<(), anyhow::Error> {
|
||||
// Register enabled services with the manager
|
||||
if services.contains(&ServiceName::Web) {
|
||||
let web_service = Box::new(WebService::new(self.config.port, self.app_state.clone()));
|
||||
let auth_config = AuthConfig {
|
||||
client_id: self.config.discord_client_id.clone(),
|
||||
client_secret: self.config.discord_client_secret.clone(),
|
||||
redirect_base: self.config.discord_redirect_uri.clone(),
|
||||
};
|
||||
let web_service = Box::new(WebService::new(
|
||||
self.config.port,
|
||||
self.app_state.clone(),
|
||||
auth_config,
|
||||
));
|
||||
self.service_manager
|
||||
.register_service(ServiceName::Web.as_str(), web_service);
|
||||
}
|
||||
@@ -101,7 +134,9 @@ impl App {
|
||||
let scraper_service = Box::new(ScraperService::new(
|
||||
self.db_pool.clone(),
|
||||
self.banner_api.clone(),
|
||||
self.app_state.reference_cache.clone(),
|
||||
self.app_state.service_statuses.clone(),
|
||||
self.app_state.scrape_job_tx.clone(),
|
||||
));
|
||||
self.service_manager
|
||||
.register_service(ServiceName::Scraper.as_str(), scraper_service);
|
||||
|
||||
+26
-3
@@ -40,9 +40,9 @@ impl BannerApi {
|
||||
.cookie_store(false)
|
||||
.user_agent(user_agent())
|
||||
.tcp_keepalive(Some(std::time::Duration::from_secs(60 * 5)))
|
||||
.read_timeout(std::time::Duration::from_secs(10))
|
||||
.connect_timeout(std::time::Duration::from_secs(10))
|
||||
.timeout(std::time::Duration::from_secs(30))
|
||||
.read_timeout(std::time::Duration::from_secs(20))
|
||||
.connect_timeout(std::time::Duration::from_secs(15))
|
||||
.timeout(std::time::Duration::from_secs(40))
|
||||
.build()
|
||||
.context("Failed to create HTTP client")?,
|
||||
)
|
||||
@@ -228,6 +228,29 @@ impl BannerApi {
|
||||
.await
|
||||
}
|
||||
|
||||
/// Retrieves campus codes and descriptions.
|
||||
pub async fn get_campuses(&self, term: &str) -> Result<Vec<Pair>> {
|
||||
self.get_list_endpoint("get_campus", "", term, 1, 500).await
|
||||
}
|
||||
|
||||
/// Retrieves instructional method codes and descriptions.
|
||||
pub async fn get_instructional_methods(&self, term: &str) -> Result<Vec<Pair>> {
|
||||
self.get_list_endpoint("get_instructionalMethod", "", term, 1, 500)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Retrieves part-of-term codes and descriptions.
|
||||
pub async fn get_parts_of_term(&self, term: &str) -> Result<Vec<Pair>> {
|
||||
self.get_list_endpoint("get_partOfTerm", "", term, 1, 500)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Retrieves section attribute codes and descriptions.
|
||||
pub async fn get_attributes(&self, term: &str) -> Result<Vec<Pair>> {
|
||||
self.get_list_endpoint("get_attribute", "", term, 1, 500)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Retrieves meeting time information for a course.
|
||||
pub async fn get_course_meeting_time(
|
||||
&self,
|
||||
|
||||
@@ -325,6 +325,7 @@ mod tests {
|
||||
fn test_parse_json_with_context_null_value() {
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct TestStruct {
|
||||
#[allow(dead_code)]
|
||||
name: String,
|
||||
}
|
||||
|
||||
@@ -363,12 +364,14 @@ mod tests {
|
||||
#[allow(dead_code)]
|
||||
#[serde(rename = "courseTitle")]
|
||||
course_title: String,
|
||||
#[allow(dead_code)]
|
||||
faculty: Vec<Faculty>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct Faculty {
|
||||
#[serde(rename = "displayName")]
|
||||
#[allow(dead_code)]
|
||||
display_name: String,
|
||||
#[allow(dead_code)]
|
||||
email: String,
|
||||
@@ -376,6 +379,7 @@ mod tests {
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct SearchResult {
|
||||
#[allow(dead_code)]
|
||||
data: Vec<Course>,
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use bitflags::{bitflags, Flags};
|
||||
use bitflags::{Flags, bitflags};
|
||||
use chrono::{DateTime, NaiveDate, NaiveTime, Timelike, Utc, Weekday};
|
||||
use extension_traits::extension;
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
@@ -320,10 +320,11 @@ pub enum MeetingType {
|
||||
Unknown(String),
|
||||
}
|
||||
|
||||
impl MeetingType {
|
||||
/// Parse from the meeting type string
|
||||
pub fn from_string(s: &str) -> Self {
|
||||
match s {
|
||||
impl std::str::FromStr for MeetingType {
|
||||
type Err = std::convert::Infallible;
|
||||
|
||||
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"HB" | "H2" | "H1" => MeetingType::HybridBlended,
|
||||
"OS" => MeetingType::OnlineSynchronous,
|
||||
"OA" => MeetingType::OnlineAsynchronous,
|
||||
@@ -331,9 +332,11 @@ impl MeetingType {
|
||||
"ID" => MeetingType::IndependentStudy,
|
||||
"FF" => MeetingType::FaceToFace,
|
||||
other => MeetingType::Unknown(other.to_string()),
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl MeetingType {
|
||||
/// Get description for the meeting type
|
||||
pub fn description(&self) -> &'static str {
|
||||
match self {
|
||||
@@ -424,7 +427,7 @@ impl MeetingScheduleInfo {
|
||||
end: now,
|
||||
}
|
||||
});
|
||||
let meeting_type = MeetingType::from_string(&meeting_time.meeting_type);
|
||||
let meeting_type: MeetingType = meeting_time.meeting_type.parse().unwrap();
|
||||
let location = MeetingLocation::from_meeting_time(meeting_time);
|
||||
let duration_weeks = date_range.weeks_duration();
|
||||
|
||||
|
||||
@@ -147,6 +147,37 @@ impl Term {
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// URL-friendly slug, e.g. "spring-2026"
|
||||
pub fn slug(&self) -> String {
|
||||
format!("{}-{}", self.season.slug(), self.year)
|
||||
}
|
||||
|
||||
/// Parse a slug like "spring-2026" into a Term
|
||||
pub fn from_slug(s: &str) -> Option<Self> {
|
||||
let (season_str, year_str) = s.rsplit_once('-')?;
|
||||
let season = Season::from_slug(season_str)?;
|
||||
let year = year_str.parse::<u32>().ok()?;
|
||||
if !VALID_YEARS.contains(&year) {
|
||||
return None;
|
||||
}
|
||||
Some(Term { year, season })
|
||||
}
|
||||
|
||||
/// Human-readable description, e.g. "Spring 2026"
|
||||
pub fn description(&self) -> String {
|
||||
format!("{} {}", self.season, self.year)
|
||||
}
|
||||
|
||||
/// Resolve a string that is either a term code ("202620") or a slug ("spring-2026") to a term code.
|
||||
pub fn resolve_to_code(s: &str) -> Option<String> {
|
||||
// Try parsing as a 6-digit code first
|
||||
if let Ok(term) = s.parse::<Term>() {
|
||||
return Some(term.to_string());
|
||||
}
|
||||
// Try parsing as a slug
|
||||
Term::from_slug(s).map(|t| t.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl TermPoint {
|
||||
@@ -195,6 +226,25 @@ impl Season {
|
||||
Season::Summer => "30",
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the lowercase slug for URL-friendly representation
|
||||
pub fn slug(self) -> &'static str {
|
||||
match self {
|
||||
Season::Fall => "fall",
|
||||
Season::Spring => "spring",
|
||||
Season::Summer => "summer",
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a slug like "spring", "summer", "fall" into a Season
|
||||
pub fn from_slug(s: &str) -> Option<Self> {
|
||||
match s {
|
||||
"fall" => Some(Season::Fall),
|
||||
"spring" => Some(Season::Spring),
|
||||
"summer" => Some(Season::Summer),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Season {
|
||||
@@ -445,4 +495,79 @@ mod tests {
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
// --- Season::slug / from_slug ---
|
||||
|
||||
#[test]
|
||||
fn test_season_slug_roundtrip() {
|
||||
for season in [Season::Fall, Season::Spring, Season::Summer] {
|
||||
assert_eq!(Season::from_slug(season.slug()), Some(season));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_season_from_slug_invalid() {
|
||||
assert_eq!(Season::from_slug("winter"), None);
|
||||
assert_eq!(Season::from_slug(""), None);
|
||||
assert_eq!(Season::from_slug("Spring"), None); // case-sensitive
|
||||
}
|
||||
|
||||
// --- Term::slug / from_slug ---
|
||||
|
||||
#[test]
|
||||
fn test_term_slug() {
|
||||
let term = Term {
|
||||
year: 2026,
|
||||
season: Season::Spring,
|
||||
};
|
||||
assert_eq!(term.slug(), "spring-2026");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_term_from_slug_roundtrip() {
|
||||
for code in ["202510", "202520", "202530"] {
|
||||
let term = Term::from_str(code).unwrap();
|
||||
let slug = term.slug();
|
||||
let parsed = Term::from_slug(&slug).unwrap();
|
||||
assert_eq!(parsed, term);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_term_from_slug_invalid() {
|
||||
assert_eq!(Term::from_slug("winter-2026"), None);
|
||||
assert_eq!(Term::from_slug("spring"), None);
|
||||
assert_eq!(Term::from_slug(""), None);
|
||||
}
|
||||
|
||||
// --- Term::description ---
|
||||
|
||||
#[test]
|
||||
fn test_term_description() {
|
||||
let term = Term {
|
||||
year: 2026,
|
||||
season: Season::Spring,
|
||||
};
|
||||
assert_eq!(term.description(), "Spring 2026");
|
||||
}
|
||||
|
||||
// --- Term::resolve_to_code ---
|
||||
|
||||
#[test]
|
||||
fn test_resolve_to_code_from_code() {
|
||||
assert_eq!(Term::resolve_to_code("202620"), Some("202620".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_to_code_from_slug() {
|
||||
assert_eq!(
|
||||
Term::resolve_to_code("spring-2026"),
|
||||
Some("202620".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_to_code_invalid() {
|
||||
assert_eq!(Term::resolve_to_code("garbage"), None);
|
||||
}
|
||||
}
|
||||
|
||||
+3
-14
@@ -10,8 +10,9 @@ pub struct Range {
|
||||
pub high: i32,
|
||||
}
|
||||
|
||||
/// Builder for constructing Banner API search queries
|
||||
/// Builder for constructing Banner API search queries.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
#[allow(dead_code)]
|
||||
pub struct SearchQuery {
|
||||
subject: Option<String>,
|
||||
title: Option<String>,
|
||||
@@ -32,6 +33,7 @@ pub struct SearchQuery {
|
||||
course_number_range: Option<Range>,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl SearchQuery {
|
||||
/// Creates a new SearchQuery with default values
|
||||
pub fn new() -> Self {
|
||||
@@ -67,7 +69,6 @@ impl SearchQuery {
|
||||
}
|
||||
|
||||
/// Adds a keyword to the query
|
||||
#[allow(dead_code)]
|
||||
pub fn keyword<S: Into<String>>(mut self, keyword: S) -> Self {
|
||||
match &mut self.keywords {
|
||||
Some(keywords) => keywords.push(keyword.into()),
|
||||
@@ -77,63 +78,54 @@ impl SearchQuery {
|
||||
}
|
||||
|
||||
/// Sets whether to search for open courses only
|
||||
#[allow(dead_code)]
|
||||
pub fn open_only(mut self, open_only: bool) -> Self {
|
||||
self.open_only = Some(open_only);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the term part for the query
|
||||
#[allow(dead_code)]
|
||||
pub fn term_part(mut self, term_part: Vec<String>) -> Self {
|
||||
self.term_part = Some(term_part);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the campuses for the query
|
||||
#[allow(dead_code)]
|
||||
pub fn campus(mut self, campus: Vec<String>) -> Self {
|
||||
self.campus = Some(campus);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the instructional methods for the query
|
||||
#[allow(dead_code)]
|
||||
pub fn instructional_method(mut self, instructional_method: Vec<String>) -> Self {
|
||||
self.instructional_method = Some(instructional_method);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the attributes for the query
|
||||
#[allow(dead_code)]
|
||||
pub fn attributes(mut self, attributes: Vec<String>) -> Self {
|
||||
self.attributes = Some(attributes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the instructors for the query
|
||||
#[allow(dead_code)]
|
||||
pub fn instructor(mut self, instructor: Vec<u64>) -> Self {
|
||||
self.instructor = Some(instructor);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the start time for the query
|
||||
#[allow(dead_code)]
|
||||
pub fn start_time(mut self, start_time: Duration) -> Self {
|
||||
self.start_time = Some(start_time);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the end time for the query
|
||||
#[allow(dead_code)]
|
||||
pub fn end_time(mut self, end_time: Duration) -> Self {
|
||||
self.end_time = Some(end_time);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the credit range for the query
|
||||
#[allow(dead_code)]
|
||||
pub fn credits(mut self, low: i32, high: i32) -> Self {
|
||||
self.min_credits = Some(low);
|
||||
self.max_credits = Some(high);
|
||||
@@ -141,14 +133,12 @@ impl SearchQuery {
|
||||
}
|
||||
|
||||
/// Sets the minimum credits for the query
|
||||
#[allow(dead_code)]
|
||||
pub fn min_credits(mut self, value: i32) -> Self {
|
||||
self.min_credits = Some(value);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the maximum credits for the query
|
||||
#[allow(dead_code)]
|
||||
pub fn max_credits(mut self, value: i32) -> Self {
|
||||
self.max_credits = Some(value);
|
||||
self
|
||||
@@ -161,7 +151,6 @@ impl SearchQuery {
|
||||
}
|
||||
|
||||
/// Sets the offset for pagination
|
||||
#[allow(dead_code)]
|
||||
pub fn offset(mut self, offset: i32) -> Self {
|
||||
self.offset = offset;
|
||||
self
|
||||
|
||||
+117
-65
@@ -11,7 +11,9 @@ use rand::distr::{Alphanumeric, SampleString};
|
||||
use reqwest_middleware::ClientWithMiddleware;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
|
||||
use std::mem::ManuallyDrop;
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, LazyLock};
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio::sync::{Mutex, Notify};
|
||||
@@ -121,6 +123,64 @@ impl BannerSession {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::time::Duration;
|
||||
|
||||
/// Verifies that cancelling `acquire()` mid-session-creation resets `is_creating`,
|
||||
/// allowing subsequent callers to proceed rather than deadlocking.
|
||||
#[tokio::test]
|
||||
async fn test_acquire_not_deadlocked_after_cancellation() {
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
let (tx, mut rx) = mpsc::channel::<()>(10);
|
||||
|
||||
// Local server: /registration signals arrival via `tx`, then hangs forever.
|
||||
let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let addr = listener.local_addr().unwrap();
|
||||
|
||||
let app = axum::Router::new().route(
|
||||
"/StudentRegistrationSsb/registration",
|
||||
axum::routing::get(move || {
|
||||
let tx = tx.clone();
|
||||
async move {
|
||||
let _ = tx.send(()).await;
|
||||
std::future::pending::<&str>().await
|
||||
}
|
||||
}),
|
||||
);
|
||||
tokio::spawn(async move {
|
||||
axum::serve(listener, app).await.unwrap();
|
||||
});
|
||||
|
||||
let base_url = format!("http://{}/StudentRegistrationSsb", addr);
|
||||
let client = reqwest_middleware::ClientBuilder::new(
|
||||
reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(300))
|
||||
.build()
|
||||
.unwrap(),
|
||||
)
|
||||
.build();
|
||||
|
||||
let pool = SessionPool::new(client, base_url);
|
||||
let term: Term = "202620".parse().unwrap();
|
||||
|
||||
// First acquire: cancel once the request reaches the server.
|
||||
tokio::select! {
|
||||
_ = pool.acquire(term) => panic!("server hangs — acquire should never complete"),
|
||||
_ = rx.recv() => {} // Request arrived; dropping the future simulates timeout cancellation.
|
||||
}
|
||||
|
||||
// Second acquire: verify it reaches the server (i.e., is_creating was reset).
|
||||
// The global rate limiter has a 10s period, so allow 15s for the second attempt.
|
||||
tokio::select! {
|
||||
_ = pool.acquire(term) => {}
|
||||
result = tokio::time::timeout(Duration::from_secs(15), rx.recv()) => {
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"acquire() deadlocked — is_creating was not reset after cancellation"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_new_session_creates_session() {
|
||||
@@ -200,50 +260,53 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
/// A smart pointer that returns a BannerSession to the pool when dropped.
|
||||
/// A smart pointer that returns a `BannerSession` to the pool when dropped.
|
||||
pub struct PooledSession {
|
||||
session: Option<BannerSession>,
|
||||
// This Arc points directly to the term-specific pool.
|
||||
session: ManuallyDrop<BannerSession>,
|
||||
pool: Arc<TermPool>,
|
||||
}
|
||||
|
||||
impl PooledSession {
|
||||
pub fn been_used(&self) -> bool {
|
||||
self.session.as_ref().unwrap().been_used()
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for PooledSession {
|
||||
type Target = BannerSession;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
// The option is only ever None after drop is called, so this is safe.
|
||||
self.session.as_ref().unwrap()
|
||||
&self.session
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for PooledSession {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
self.session.as_mut().unwrap()
|
||||
&mut self.session
|
||||
}
|
||||
}
|
||||
|
||||
/// The magic happens here: when the guard goes out of scope, this is called.
|
||||
impl Drop for PooledSession {
|
||||
fn drop(&mut self) {
|
||||
if let Some(session) = self.session.take() {
|
||||
let pool = self.pool.clone();
|
||||
// Since drop() cannot be async, we spawn a task to return the session.
|
||||
tokio::spawn(async move {
|
||||
pool.release(session).await;
|
||||
});
|
||||
}
|
||||
// SAFETY: `drop` is called exactly once by Rust's drop semantics,
|
||||
// so `ManuallyDrop::take` is guaranteed to see a valid value.
|
||||
let session = unsafe { ManuallyDrop::take(&mut self.session) };
|
||||
let pool = self.pool.clone();
|
||||
tokio::spawn(async move {
|
||||
pool.release(session).await;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TermPool {
|
||||
sessions: Mutex<VecDeque<BannerSession>>,
|
||||
notifier: Notify,
|
||||
is_creating: Mutex<bool>,
|
||||
is_creating: AtomicBool,
|
||||
}
|
||||
|
||||
/// RAII guard ensuring `is_creating` is reset on drop for cancellation safety.
|
||||
/// Without this, a cancelled `acquire()` future would leave the flag set permanently,
|
||||
/// deadlocking all subsequent callers.
|
||||
struct CreatingGuard(Arc<TermPool>);
|
||||
|
||||
impl Drop for CreatingGuard {
|
||||
fn drop(&mut self) {
|
||||
self.0.is_creating.store(false, Ordering::Release);
|
||||
self.0.notifier.notify_waiters();
|
||||
}
|
||||
}
|
||||
|
||||
impl TermPool {
|
||||
@@ -251,7 +314,7 @@ impl TermPool {
|
||||
Self {
|
||||
sessions: Mutex::new(VecDeque::new()),
|
||||
notifier: Notify::new(),
|
||||
is_creating: Mutex::new(false),
|
||||
is_creating: AtomicBool::new(false),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -308,7 +371,7 @@ impl SessionPool {
|
||||
if let Some(session) = queue.pop_front() {
|
||||
if !session.is_expired() {
|
||||
return Ok(PooledSession {
|
||||
session: Some(session),
|
||||
session: ManuallyDrop::new(session),
|
||||
pool: Arc::clone(&term_pool),
|
||||
});
|
||||
} else {
|
||||
@@ -317,45 +380,38 @@ impl SessionPool {
|
||||
}
|
||||
} // MutexGuard is dropped, lock is released.
|
||||
|
||||
// Slow path: No sessions available. We must either wait or become the creator.
|
||||
let mut is_creating_guard = term_pool.is_creating.lock().await;
|
||||
if *is_creating_guard {
|
||||
// Another task is already creating a session. Release the lock and wait.
|
||||
drop(is_creating_guard);
|
||||
// Slow path: wait for an in-progress creation, or become the creator.
|
||||
if term_pool.is_creating.load(Ordering::Acquire) {
|
||||
if !waited_for_creation {
|
||||
trace!("Waiting for another task to create session");
|
||||
waited_for_creation = true;
|
||||
}
|
||||
term_pool.notifier.notified().await;
|
||||
// Loop back to the top to try the fast path again.
|
||||
continue;
|
||||
}
|
||||
|
||||
// This task is now the designated creator.
|
||||
*is_creating_guard = true;
|
||||
drop(is_creating_guard);
|
||||
// CAS to become the designated creator.
|
||||
if term_pool
|
||||
.is_creating
|
||||
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire)
|
||||
.is_err()
|
||||
{
|
||||
continue; // Lost the race — loop back and wait.
|
||||
}
|
||||
|
||||
// Guard resets is_creating on drop (including cancellation).
|
||||
let creating_guard = CreatingGuard(Arc::clone(&term_pool));
|
||||
|
||||
// Race: wait for a session to be returned OR for the rate limiter to allow a new one.
|
||||
trace!("Pool empty, creating new session");
|
||||
tokio::select! {
|
||||
_ = term_pool.notifier.notified() => {
|
||||
// A session was returned while we were waiting!
|
||||
// We are no longer the creator. Reset the flag and loop to race for the new session.
|
||||
let mut guard = term_pool.is_creating.lock().await;
|
||||
*guard = false;
|
||||
drop(guard);
|
||||
// A session was returned — release creator role and race for it.
|
||||
drop(creating_guard);
|
||||
continue;
|
||||
}
|
||||
_ = SESSION_CREATION_RATE_LIMITER.until_ready() => {
|
||||
// The rate limit has elapsed. It's our job to create the session.
|
||||
let new_session_result = self.create_session(&term).await;
|
||||
|
||||
// After creation, we are no longer the creator. Reset the flag
|
||||
// and notify all other waiting tasks.
|
||||
let mut guard = term_pool.is_creating.lock().await;
|
||||
*guard = false;
|
||||
drop(guard);
|
||||
term_pool.notifier.notify_waiters();
|
||||
drop(creating_guard);
|
||||
|
||||
match new_session_result {
|
||||
Ok(new_session) => {
|
||||
@@ -366,12 +422,11 @@ impl SessionPool {
|
||||
"Created new session"
|
||||
);
|
||||
return Ok(PooledSession {
|
||||
session: Some(new_session),
|
||||
session: ManuallyDrop::new(new_session),
|
||||
pool: term_pool,
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
// Propagate the error if session creation failed.
|
||||
return Err(e.context("Failed to create new session in pool"));
|
||||
}
|
||||
}
|
||||
@@ -380,8 +435,8 @@ impl SessionPool {
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets up initial session cookies by making required Banner API requests
|
||||
pub async fn create_session(&self, term: &Term) -> Result<BannerSession> {
|
||||
/// Sets up initial session cookies by making required Banner API requests.
|
||||
async fn create_session(&self, term: &Term) -> Result<BannerSession> {
|
||||
info!(term = %term, "setting up banner session");
|
||||
|
||||
// The 'register' or 'search' registration page
|
||||
@@ -392,22 +447,15 @@ impl SessionPool {
|
||||
.await?;
|
||||
// TODO: Validate success
|
||||
|
||||
let cookies = initial_registration
|
||||
let cookies: HashMap<String, String> = initial_registration
|
||||
.headers()
|
||||
.get_all("Set-Cookie")
|
||||
.iter()
|
||||
.filter_map(|header_value| {
|
||||
if let Ok(cookie_str) = header_value.to_str() {
|
||||
if let Ok(cookie) = Cookie::parse(cookie_str) {
|
||||
Some((cookie.name().to_string(), cookie.value().to_string()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
.filter_map(|v| {
|
||||
let c = Cookie::parse(v.to_str().ok()?).ok()?;
|
||||
Some((c.name().to_string(), c.value().to_string()))
|
||||
})
|
||||
.collect::<HashMap<String, String>>();
|
||||
.collect();
|
||||
|
||||
let jsessionid = cookies
|
||||
.get("JSESSIONID")
|
||||
@@ -452,7 +500,11 @@ impl SessionPool {
|
||||
self.select_term(&term.to_string(), &unique_session_id, &cookie_header)
|
||||
.await?;
|
||||
|
||||
Ok(BannerSession::new(&unique_session_id, jsessionid, ssb_cookie))
|
||||
Ok(BannerSession::new(
|
||||
&unique_session_id,
|
||||
jsessionid,
|
||||
ssb_cookie,
|
||||
))
|
||||
}
|
||||
|
||||
/// Retrieves a list of terms from the Banner API.
|
||||
@@ -490,8 +542,8 @@ impl SessionPool {
|
||||
Ok(terms)
|
||||
}
|
||||
|
||||
/// Selects a term for the current session
|
||||
pub async fn select_term(
|
||||
/// Selects a term for the current session.
|
||||
async fn select_term(
|
||||
&self,
|
||||
term: &str,
|
||||
unique_session_id: &str,
|
||||
|
||||
@@ -18,7 +18,9 @@ fn nth_weekday_of_month(year: i32, month: u32, weekday: Weekday, n: u32) -> Opti
|
||||
|
||||
/// Compute a consecutive range of dates starting from `start` for `count` days.
|
||||
fn date_range(start: NaiveDate, count: i64) -> Vec<NaiveDate> {
|
||||
(0..count).filter_map(|i| start.checked_add_signed(Duration::days(i))).collect()
|
||||
(0..count)
|
||||
.filter_map(|i| start.checked_add_signed(Duration::days(i)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Compute university holidays for a given year.
|
||||
|
||||
+462
@@ -0,0 +1,462 @@
|
||||
//! Shared calendar generation logic for ICS files and Google Calendar URLs.
|
||||
//!
|
||||
//! Used by both the Discord bot commands and the web API endpoints.
|
||||
|
||||
use crate::data::models::DbMeetingTime;
|
||||
use chrono::{Datelike, Duration, NaiveDate, NaiveTime, Weekday};
|
||||
|
||||
/// Course metadata needed for calendar generation (shared interface between bot and web).
|
||||
pub struct CalendarCourse {
|
||||
pub crn: String,
|
||||
pub subject: String,
|
||||
pub course_number: String,
|
||||
pub title: String,
|
||||
pub sequence_number: Option<String>,
|
||||
pub primary_instructor: Option<String>,
|
||||
}
|
||||
|
||||
impl CalendarCourse {
|
||||
/// Display title like "CS 1083 - Introduction to Computer Science"
|
||||
pub fn display_title(&self) -> String {
|
||||
format!("{} {} - {}", self.subject, self.course_number, self.title)
|
||||
}
|
||||
|
||||
/// Filename-safe identifier: "CS_1083_001"
|
||||
pub fn filename_stem(&self) -> String {
|
||||
format!(
|
||||
"{}_{}{}",
|
||||
self.subject.replace(' ', "_"),
|
||||
self.course_number,
|
||||
self.sequence_number
|
||||
.as_deref()
|
||||
.map(|s| format!("_{s}"))
|
||||
.unwrap_or_default()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Date parsing helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Parse a date string in either MM/DD/YYYY or YYYY-MM-DD format.
|
||||
fn parse_date(s: &str) -> Option<NaiveDate> {
|
||||
NaiveDate::parse_from_str(s, "%m/%d/%Y")
|
||||
.or_else(|_| NaiveDate::parse_from_str(s, "%Y-%m-%d"))
|
||||
.ok()
|
||||
}
|
||||
|
||||
/// Parse an HHMM time string into `NaiveTime`.
|
||||
fn parse_hhmm(s: &str) -> Option<NaiveTime> {
|
||||
if s.len() != 4 {
|
||||
return None;
|
||||
}
|
||||
let hours = s[..2].parse::<u32>().ok()?;
|
||||
let minutes = s[2..].parse::<u32>().ok()?;
|
||||
NaiveTime::from_hms_opt(hours, minutes, 0)
|
||||
}
|
||||
|
||||
/// Active weekdays for a meeting time.
|
||||
fn active_weekdays(mt: &DbMeetingTime) -> Vec<Weekday> {
|
||||
let mapping: [(bool, Weekday); 7] = [
|
||||
(mt.monday, Weekday::Mon),
|
||||
(mt.tuesday, Weekday::Tue),
|
||||
(mt.wednesday, Weekday::Wed),
|
||||
(mt.thursday, Weekday::Thu),
|
||||
(mt.friday, Weekday::Fri),
|
||||
(mt.saturday, Weekday::Sat),
|
||||
(mt.sunday, Weekday::Sun),
|
||||
];
|
||||
mapping
|
||||
.iter()
|
||||
.filter(|(active, _)| *active)
|
||||
.map(|(_, day)| *day)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// ICS two-letter day code for RRULE BYDAY.
|
||||
fn ics_day_code(day: Weekday) -> &'static str {
|
||||
match day {
|
||||
Weekday::Mon => "MO",
|
||||
Weekday::Tue => "TU",
|
||||
Weekday::Wed => "WE",
|
||||
Weekday::Thu => "TH",
|
||||
Weekday::Fri => "FR",
|
||||
Weekday::Sat => "SA",
|
||||
Weekday::Sun => "SU",
|
||||
}
|
||||
}
|
||||
|
||||
/// Location string from a `DbMeetingTime`.
|
||||
fn location_string(mt: &DbMeetingTime) -> String {
|
||||
let building = mt
|
||||
.building_description
|
||||
.as_deref()
|
||||
.or(mt.building.as_deref())
|
||||
.unwrap_or("");
|
||||
let room = mt.room.as_deref().unwrap_or("");
|
||||
let combined = format!("{building} {room}").trim().to_string();
|
||||
if combined.is_empty() {
|
||||
"Online".to_string()
|
||||
} else {
|
||||
combined
|
||||
}
|
||||
}
|
||||
|
||||
/// Days display string (e.g. "MWF", "TTh").
|
||||
fn days_display(mt: &DbMeetingTime) -> String {
|
||||
let weekdays = active_weekdays(mt);
|
||||
if weekdays.is_empty() {
|
||||
return "TBA".to_string();
|
||||
}
|
||||
weekdays
|
||||
.iter()
|
||||
.map(|d| ics_day_code(*d))
|
||||
.collect::<Vec<_>>()
|
||||
.join("")
|
||||
}
|
||||
|
||||
/// Escape text for ICS property values.
|
||||
fn escape_ics(text: &str) -> String {
|
||||
text.replace('\\', "\\\\")
|
||||
.replace(';', "\\;")
|
||||
.replace(',', "\\,")
|
||||
.replace('\n', "\\n")
|
||||
.replace('\r', "")
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// University holidays (ported from bot/commands/ics.rs)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Find the nth occurrence of a weekday in a given month/year (1-based).
|
||||
fn nth_weekday_of_month(year: i32, month: u32, weekday: Weekday, n: u32) -> Option<NaiveDate> {
|
||||
let first = NaiveDate::from_ymd_opt(year, month, 1)?;
|
||||
let days_ahead = (weekday.num_days_from_monday() as i64
|
||||
- first.weekday().num_days_from_monday() as i64)
|
||||
.rem_euclid(7) as u32;
|
||||
let day = 1 + days_ahead + 7 * (n - 1);
|
||||
NaiveDate::from_ymd_opt(year, month, day)
|
||||
}
|
||||
|
||||
/// Compute a consecutive range of dates starting from `start` for `count` days.
|
||||
fn date_range(start: NaiveDate, count: i64) -> Vec<NaiveDate> {
|
||||
(0..count)
|
||||
.filter_map(|i| start.checked_add_signed(Duration::days(i)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Compute university holidays for a given year.
|
||||
fn compute_holidays_for_year(year: i32) -> Vec<(&'static str, Vec<NaiveDate>)> {
|
||||
let mut holidays = Vec::new();
|
||||
|
||||
// Labor Day: 1st Monday of September
|
||||
if let Some(d) = nth_weekday_of_month(year, 9, Weekday::Mon, 1) {
|
||||
holidays.push(("Labor Day", vec![d]));
|
||||
}
|
||||
|
||||
// Fall Break: Mon-Tue of Columbus Day week
|
||||
if let Some(mon) = nth_weekday_of_month(year, 10, Weekday::Mon, 2) {
|
||||
holidays.push(("Fall Break", date_range(mon, 2)));
|
||||
}
|
||||
|
||||
// Day before Thanksgiving
|
||||
if let Some(thu) = nth_weekday_of_month(year, 11, Weekday::Thu, 4)
|
||||
&& let Some(wed) = thu.checked_sub_signed(Duration::days(1))
|
||||
{
|
||||
holidays.push(("Day Before Thanksgiving", vec![wed]));
|
||||
}
|
||||
|
||||
// Thanksgiving: 4th Thursday + Friday
|
||||
if let Some(thu) = nth_weekday_of_month(year, 11, Weekday::Thu, 4) {
|
||||
holidays.push(("Thanksgiving", date_range(thu, 2)));
|
||||
}
|
||||
|
||||
// Winter Holiday: Dec 23-31
|
||||
if let Some(start) = NaiveDate::from_ymd_opt(year, 12, 23) {
|
||||
holidays.push(("Winter Holiday", date_range(start, 9)));
|
||||
}
|
||||
|
||||
// New Year's Day
|
||||
if let Some(d) = NaiveDate::from_ymd_opt(year, 1, 1) {
|
||||
holidays.push(("New Year's Day", vec![d]));
|
||||
}
|
||||
|
||||
// MLK Day: 3rd Monday of January
|
||||
if let Some(d) = nth_weekday_of_month(year, 1, Weekday::Mon, 3) {
|
||||
holidays.push(("MLK Day", vec![d]));
|
||||
}
|
||||
|
||||
// Spring Break: full week starting 2nd Monday of March
|
||||
if let Some(mon) = nth_weekday_of_month(year, 3, Weekday::Mon, 2) {
|
||||
holidays.push(("Spring Break", date_range(mon, 6)));
|
||||
}
|
||||
|
||||
holidays
|
||||
}
|
||||
|
||||
/// Get holiday dates within a date range that fall on specific weekdays.
|
||||
fn holiday_exceptions(start: NaiveDate, end: NaiveDate, weekdays: &[Weekday]) -> Vec<NaiveDate> {
|
||||
let start_year = start.year();
|
||||
let end_year = end.year();
|
||||
|
||||
(start_year..=end_year)
|
||||
.flat_map(compute_holidays_for_year)
|
||||
.flat_map(|(_, dates)| dates)
|
||||
.filter(|&date| date >= start && date <= end && weekdays.contains(&date.weekday()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Names of excluded holidays (for user-facing messages).
|
||||
fn excluded_holiday_names(
|
||||
start: NaiveDate,
|
||||
end: NaiveDate,
|
||||
exceptions: &[NaiveDate],
|
||||
) -> Vec<String> {
|
||||
let start_year = start.year();
|
||||
let end_year = end.year();
|
||||
let all_holidays: Vec<_> = (start_year..=end_year)
|
||||
.flat_map(compute_holidays_for_year)
|
||||
.collect();
|
||||
|
||||
let mut names = Vec::new();
|
||||
for (holiday_name, holiday_dates) in &all_holidays {
|
||||
for &exc in exceptions {
|
||||
if holiday_dates.contains(&exc) {
|
||||
names.push(format!("{} ({})", holiday_name, exc.format("%a, %b %d")));
|
||||
}
|
||||
}
|
||||
}
|
||||
names.sort();
|
||||
names.dedup();
|
||||
names
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ICS generation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Result from ICS generation, including the file content and excluded holiday names.
|
||||
pub struct IcsResult {
|
||||
pub content: String,
|
||||
pub filename: String,
|
||||
/// Holiday dates excluded via EXDATE rules, for user-facing messages.
|
||||
#[allow(dead_code)]
|
||||
pub excluded_holidays: Vec<String>,
|
||||
}
|
||||
|
||||
/// Generate an ICS calendar file for a course.
|
||||
pub fn generate_ics(
|
||||
course: &CalendarCourse,
|
||||
meeting_times: &[DbMeetingTime],
|
||||
) -> Result<IcsResult, anyhow::Error> {
|
||||
let mut ics = String::new();
|
||||
let mut all_excluded = Vec::new();
|
||||
|
||||
// Header
|
||||
ics.push_str("BEGIN:VCALENDAR\r\n");
|
||||
ics.push_str("VERSION:2.0\r\n");
|
||||
ics.push_str("PRODID:-//Banner Bot//Course Calendar//EN\r\n");
|
||||
ics.push_str("CALSCALE:GREGORIAN\r\n");
|
||||
ics.push_str("METHOD:PUBLISH\r\n");
|
||||
ics.push_str(&format!(
|
||||
"X-WR-CALNAME:{}\r\n",
|
||||
escape_ics(&course.display_title())
|
||||
));
|
||||
|
||||
for (index, mt) in meeting_times.iter().enumerate() {
|
||||
let (event, holidays) = generate_ics_event(course, mt, index)?;
|
||||
ics.push_str(&event);
|
||||
all_excluded.extend(holidays);
|
||||
}
|
||||
|
||||
ics.push_str("END:VCALENDAR\r\n");
|
||||
|
||||
Ok(IcsResult {
|
||||
content: ics,
|
||||
filename: format!("{}.ics", course.filename_stem()),
|
||||
excluded_holidays: all_excluded,
|
||||
})
|
||||
}
|
||||
|
||||
/// Generate a single VEVENT for one meeting time.
|
||||
fn generate_ics_event(
|
||||
course: &CalendarCourse,
|
||||
mt: &DbMeetingTime,
|
||||
index: usize,
|
||||
) -> Result<(String, Vec<String>), anyhow::Error> {
|
||||
let start_date = parse_date(&mt.start_date)
|
||||
.ok_or_else(|| anyhow::anyhow!("Invalid start_date: {}", mt.start_date))?;
|
||||
let end_date = parse_date(&mt.end_date)
|
||||
.ok_or_else(|| anyhow::anyhow!("Invalid end_date: {}", mt.end_date))?;
|
||||
|
||||
let start_time = mt.begin_time.as_deref().and_then(parse_hhmm);
|
||||
let end_time = mt.end_time.as_deref().and_then(parse_hhmm);
|
||||
|
||||
// DTSTART/DTEND: first occurrence with time, or all-day on start_date
|
||||
let (dtstart, dtend) = match (start_time, end_time) {
|
||||
(Some(st), Some(et)) => {
|
||||
let s = start_date.and_time(st).and_utc();
|
||||
let e = start_date.and_time(et).and_utc();
|
||||
(
|
||||
s.format("%Y%m%dT%H%M%SZ").to_string(),
|
||||
e.format("%Y%m%dT%H%M%SZ").to_string(),
|
||||
)
|
||||
}
|
||||
_ => {
|
||||
let s = start_date.and_hms_opt(0, 0, 0).unwrap().and_utc();
|
||||
let e = start_date.and_hms_opt(0, 0, 0).unwrap().and_utc();
|
||||
(
|
||||
s.format("%Y%m%dT%H%M%SZ").to_string(),
|
||||
e.format("%Y%m%dT%H%M%SZ").to_string(),
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
let event_title = if index > 0 {
|
||||
format!("{} (Meeting {})", course.display_title(), index + 1)
|
||||
} else {
|
||||
course.display_title()
|
||||
};
|
||||
|
||||
let instructor = course.primary_instructor.as_deref().unwrap_or("Staff");
|
||||
|
||||
let description = format!(
|
||||
"CRN: {}\\nInstructor: {}\\nDays: {}\\nMeeting Type: {}",
|
||||
course.crn,
|
||||
instructor,
|
||||
days_display(mt),
|
||||
mt.meeting_type,
|
||||
);
|
||||
|
||||
let location = location_string(mt);
|
||||
|
||||
let uid = format!(
|
||||
"{}-{}-{}@banner-bot.local",
|
||||
course.crn,
|
||||
index,
|
||||
start_date
|
||||
.and_hms_opt(0, 0, 0)
|
||||
.unwrap()
|
||||
.and_utc()
|
||||
.timestamp()
|
||||
);
|
||||
|
||||
let mut event = String::new();
|
||||
event.push_str("BEGIN:VEVENT\r\n");
|
||||
event.push_str(&format!("UID:{uid}\r\n"));
|
||||
event.push_str(&format!("DTSTART:{dtstart}\r\n"));
|
||||
event.push_str(&format!("DTEND:{dtend}\r\n"));
|
||||
event.push_str(&format!("SUMMARY:{}\r\n", escape_ics(&event_title)));
|
||||
event.push_str(&format!("DESCRIPTION:{}\r\n", escape_ics(&description)));
|
||||
event.push_str(&format!("LOCATION:{}\r\n", escape_ics(&location)));
|
||||
|
||||
let weekdays = active_weekdays(mt);
|
||||
let mut holiday_names = Vec::new();
|
||||
|
||||
if let (false, Some(st)) = (weekdays.is_empty(), start_time) {
|
||||
let by_day: Vec<&str> = weekdays.iter().map(|d| ics_day_code(*d)).collect();
|
||||
let until = end_date.format("%Y%m%dT000000Z").to_string();
|
||||
|
||||
event.push_str(&format!(
|
||||
"RRULE:FREQ=WEEKLY;BYDAY={};UNTIL={}\r\n",
|
||||
by_day.join(","),
|
||||
until,
|
||||
));
|
||||
|
||||
// Holiday exceptions
|
||||
let exceptions = holiday_exceptions(start_date, end_date, &weekdays);
|
||||
if !exceptions.is_empty() {
|
||||
let start_utc = start_date.and_time(st).and_utc();
|
||||
let exdates: Vec<String> = exceptions
|
||||
.iter()
|
||||
.map(|&d| {
|
||||
d.and_time(start_utc.time())
|
||||
.and_utc()
|
||||
.format("%Y%m%dT%H%M%SZ")
|
||||
.to_string()
|
||||
})
|
||||
.collect();
|
||||
event.push_str(&format!("EXDATE:{}\r\n", exdates.join(",")));
|
||||
}
|
||||
|
||||
holiday_names = excluded_holiday_names(start_date, end_date, &exceptions);
|
||||
}
|
||||
|
||||
event.push_str("END:VEVENT\r\n");
|
||||
Ok((event, holiday_names))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Google Calendar URL generation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Generate a Google Calendar "add event" URL for a single meeting time.
|
||||
pub fn generate_gcal_url(
|
||||
course: &CalendarCourse,
|
||||
mt: &DbMeetingTime,
|
||||
) -> Result<String, anyhow::Error> {
|
||||
let start_date = parse_date(&mt.start_date)
|
||||
.ok_or_else(|| anyhow::anyhow!("Invalid start_date: {}", mt.start_date))?;
|
||||
let end_date = parse_date(&mt.end_date)
|
||||
.ok_or_else(|| anyhow::anyhow!("Invalid end_date: {}", mt.end_date))?;
|
||||
|
||||
let start_time = mt.begin_time.as_deref().and_then(parse_hhmm);
|
||||
let end_time = mt.end_time.as_deref().and_then(parse_hhmm);
|
||||
|
||||
let dates_text = match (start_time, end_time) {
|
||||
(Some(st), Some(et)) => {
|
||||
let s = start_date.and_time(st);
|
||||
let e = start_date.and_time(et);
|
||||
format!(
|
||||
"{}/{}",
|
||||
s.format("%Y%m%dT%H%M%S"),
|
||||
e.format("%Y%m%dT%H%M%S")
|
||||
)
|
||||
}
|
||||
_ => {
|
||||
let s = start_date.format("%Y%m%d").to_string();
|
||||
format!("{s}/{s}")
|
||||
}
|
||||
};
|
||||
|
||||
let instructor = course.primary_instructor.as_deref().unwrap_or("Staff");
|
||||
|
||||
let details = format!(
|
||||
"CRN: {}\nInstructor: {}\nDays: {}",
|
||||
course.crn,
|
||||
instructor,
|
||||
days_display(mt),
|
||||
);
|
||||
|
||||
let location = location_string(mt);
|
||||
|
||||
let weekdays = active_weekdays(mt);
|
||||
let recur = if !weekdays.is_empty() && start_time.is_some() {
|
||||
let by_day: Vec<&str> = weekdays.iter().map(|d| ics_day_code(*d)).collect();
|
||||
let until = end_date.format("%Y%m%dT000000Z").to_string();
|
||||
format!(
|
||||
"RRULE:FREQ=WEEKLY;BYDAY={};UNTIL={}",
|
||||
by_day.join(","),
|
||||
until
|
||||
)
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
let course_text = course.display_title();
|
||||
|
||||
let params: Vec<(&str, &str)> = vec![
|
||||
("action", "TEMPLATE"),
|
||||
("text", &course_text),
|
||||
("dates", &dates_text),
|
||||
("details", &details),
|
||||
("location", &location),
|
||||
("trp", "true"),
|
||||
("ctz", "America/Chicago"),
|
||||
("recur", &recur),
|
||||
];
|
||||
|
||||
let url = url::Url::parse_with_params("https://calendar.google.com/calendar/render", ¶ms)?;
|
||||
Ok(url.to_string())
|
||||
}
|
||||
+1
-105
@@ -2,34 +2,16 @@ use clap::Parser;
|
||||
|
||||
/// Banner Discord Bot - Course availability monitoring
|
||||
///
|
||||
/// This application runs multiple services that can be controlled via CLI arguments:
|
||||
/// This application runs all services:
|
||||
/// - bot: Discord bot for course monitoring commands
|
||||
/// - web: HTTP server for web interface and API
|
||||
/// - scraper: Background service for scraping course data
|
||||
///
|
||||
/// Use --services to specify which services to run, or --disable-services to exclude specific services.
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
pub struct Args {
|
||||
/// Log formatter to use
|
||||
#[arg(long, value_enum, default_value_t = default_tracing_format())]
|
||||
pub tracing: TracingFormat,
|
||||
|
||||
/// Services to run (comma-separated). Default: all services
|
||||
///
|
||||
/// Examples:
|
||||
/// --services bot,web # Run only bot and web services
|
||||
/// --services scraper # Run only the scraper service
|
||||
#[arg(long, value_delimiter = ',', conflicts_with = "disable_services")]
|
||||
pub services: Option<Vec<ServiceName>>,
|
||||
|
||||
/// Services to disable (comma-separated)
|
||||
///
|
||||
/// Examples:
|
||||
/// --disable-services bot # Run web and scraper only
|
||||
/// --disable-services bot,web # Run only the scraper service
|
||||
#[arg(long, value_delimiter = ',', conflicts_with = "services")]
|
||||
pub disable_services: Option<Vec<ServiceName>>,
|
||||
}
|
||||
|
||||
#[derive(clap::ValueEnum, Clone, Debug)]
|
||||
@@ -66,34 +48,6 @@ impl ServiceName {
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine which services should be enabled based on CLI arguments
|
||||
pub fn determine_enabled_services(args: &Args) -> Result<Vec<ServiceName>, anyhow::Error> {
|
||||
match (&args.services, &args.disable_services) {
|
||||
(Some(services), None) => {
|
||||
// User specified which services to run
|
||||
Ok(services.clone())
|
||||
}
|
||||
(None, Some(disabled)) => {
|
||||
// User specified which services to disable
|
||||
let enabled: Vec<ServiceName> = ServiceName::all()
|
||||
.into_iter()
|
||||
.filter(|s| !disabled.contains(s))
|
||||
.collect();
|
||||
Ok(enabled)
|
||||
}
|
||||
(None, None) => {
|
||||
// Default: run all services
|
||||
Ok(ServiceName::all())
|
||||
}
|
||||
(Some(_), Some(_)) => {
|
||||
// This should be prevented by clap's conflicts_with, but just in case
|
||||
Err(anyhow::anyhow!(
|
||||
"Cannot specify both --services and --disable-services"
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
const DEFAULT_TRACING_FORMAT: TracingFormat = TracingFormat::Pretty;
|
||||
#[cfg(not(debug_assertions))]
|
||||
@@ -107,64 +61,6 @@ fn default_tracing_format() -> TracingFormat {
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn args_with_services(
|
||||
services: Option<Vec<ServiceName>>,
|
||||
disable: Option<Vec<ServiceName>>,
|
||||
) -> Args {
|
||||
Args {
|
||||
tracing: TracingFormat::Pretty,
|
||||
services,
|
||||
disable_services: disable,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_default_enables_all_services() {
|
||||
let result = determine_enabled_services(&args_with_services(None, None)).unwrap();
|
||||
assert_eq!(result.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_explicit_services_only_those() {
|
||||
let result =
|
||||
determine_enabled_services(&args_with_services(Some(vec![ServiceName::Web]), None))
|
||||
.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].as_str(), "web");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_disable_bot_leaves_web_and_scraper() {
|
||||
let result =
|
||||
determine_enabled_services(&args_with_services(None, Some(vec![ServiceName::Bot])))
|
||||
.unwrap();
|
||||
assert_eq!(result.len(), 2);
|
||||
assert!(result.iter().all(|s| s.as_str() != "bot"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_disable_all_leaves_empty() {
|
||||
let result = determine_enabled_services(&args_with_services(
|
||||
None,
|
||||
Some(vec![
|
||||
ServiceName::Bot,
|
||||
ServiceName::Web,
|
||||
ServiceName::Scraper,
|
||||
]),
|
||||
))
|
||||
.unwrap();
|
||||
assert!(result.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_both_specified_returns_error() {
|
||||
let result = determine_enabled_services(&args_with_services(
|
||||
Some(vec![ServiceName::Web]),
|
||||
Some(vec![ServiceName::Bot]),
|
||||
));
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_service_name_as_str() {
|
||||
assert_eq!(ServiceName::Bot.as_str(), "bot");
|
||||
|
||||
@@ -47,6 +47,19 @@ pub struct Config {
|
||||
/// Rate limiting configuration for Banner API requests
|
||||
#[serde(default = "default_rate_limiting")]
|
||||
pub rate_limiting: RateLimitingConfig,
|
||||
|
||||
/// Discord OAuth2 client ID for web authentication
|
||||
#[serde(deserialize_with = "deserialize_string_or_uint")]
|
||||
pub discord_client_id: String,
|
||||
/// Discord OAuth2 client secret for web authentication
|
||||
pub discord_client_secret: String,
|
||||
/// Optional base URL override for OAuth2 redirect (e.g. "https://banner.xevion.dev").
|
||||
/// When unset, the redirect URI is derived from the incoming request's Origin/Host.
|
||||
#[serde(default)]
|
||||
pub discord_redirect_uri: Option<String>,
|
||||
/// Discord user ID to seed as initial admin on startup (optional)
|
||||
#[serde(default)]
|
||||
pub admin_discord_id: Option<u64>,
|
||||
}
|
||||
|
||||
/// Default log level of "info"
|
||||
@@ -216,6 +229,43 @@ where
|
||||
deserializer.deserialize_any(DurationVisitor)
|
||||
}
|
||||
|
||||
/// Deserializes a value that may arrive as either a string or unsigned integer.
|
||||
///
|
||||
/// Figment's env provider infers types from raw values, so numeric-looking strings
|
||||
/// like Discord client IDs get parsed as integers. This accepts both forms.
|
||||
fn deserialize_string_or_uint<'de, D>(deserializer: D) -> Result<String, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
use serde::de::Visitor;
|
||||
|
||||
struct StringOrUintVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for StringOrUintVisitor {
|
||||
type Value = String;
|
||||
|
||||
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
formatter.write_str("a string or unsigned integer")
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
|
||||
where
|
||||
E: serde::de::Error,
|
||||
{
|
||||
Ok(value.to_owned())
|
||||
}
|
||||
|
||||
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
|
||||
where
|
||||
E: serde::de::Error,
|
||||
{
|
||||
Ok(value.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_any(StringOrUintVisitor)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
+712
-70
@@ -1,98 +1,597 @@
|
||||
//! Batch database operations for improved performance.
|
||||
|
||||
use crate::banner::Course;
|
||||
use crate::data::models::{DbMeetingTime, UpsertCounts};
|
||||
use crate::data::names::{decode_html_entities, parse_banner_name};
|
||||
use crate::error::Result;
|
||||
use sqlx::PgConnection;
|
||||
use sqlx::PgPool;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::time::Instant;
|
||||
use tracing::info;
|
||||
|
||||
/// Convert a Banner API course's meeting times to the DB JSONB shape.
|
||||
fn to_db_meeting_times(course: &Course) -> serde_json::Value {
|
||||
let meetings: Vec<DbMeetingTime> = course
|
||||
.meetings_faculty
|
||||
.iter()
|
||||
.map(|mf| {
|
||||
let mt = &mf.meeting_time;
|
||||
DbMeetingTime {
|
||||
begin_time: mt.begin_time.clone(),
|
||||
end_time: mt.end_time.clone(),
|
||||
start_date: mt.start_date.clone(),
|
||||
end_date: mt.end_date.clone(),
|
||||
monday: mt.monday,
|
||||
tuesday: mt.tuesday,
|
||||
wednesday: mt.wednesday,
|
||||
thursday: mt.thursday,
|
||||
friday: mt.friday,
|
||||
saturday: mt.saturday,
|
||||
sunday: mt.sunday,
|
||||
building: mt.building.clone(),
|
||||
building_description: mt.building_description.clone(),
|
||||
room: mt.room.clone(),
|
||||
campus: mt.campus.clone(),
|
||||
meeting_type: mt.meeting_type.clone(),
|
||||
meeting_schedule_type: mt.meeting_schedule_type.clone(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
serde_json::to_value(meetings).unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Convert a Banner API course's section attributes to a JSONB array of code strings.
|
||||
fn to_db_attributes(course: &Course) -> serde_json::Value {
|
||||
let codes: Vec<&str> = course
|
||||
.section_attributes
|
||||
.iter()
|
||||
.map(|a| a.code.as_str())
|
||||
.collect();
|
||||
serde_json::to_value(codes).unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Extract the campus code from the first meeting time (Banner doesn't put it on the course directly).
|
||||
fn extract_campus_code(course: &Course) -> Option<String> {
|
||||
course
|
||||
.meetings_faculty
|
||||
.first()
|
||||
.and_then(|mf| mf.meeting_time.campus.clone())
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Task 1: UpsertDiffRow — captures pre- and post-upsert state for diffing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Row returned by the CTE-based upsert query, carrying both old and new values
|
||||
/// for every auditable field. `old_id` is `None` for fresh inserts.
|
||||
#[derive(sqlx::FromRow, Debug)]
|
||||
struct UpsertDiffRow {
|
||||
id: i32,
|
||||
old_id: Option<i32>,
|
||||
crn: String,
|
||||
term_code: String,
|
||||
|
||||
// enrollment fields
|
||||
old_enrollment: Option<i32>,
|
||||
new_enrollment: i32,
|
||||
old_max_enrollment: Option<i32>,
|
||||
new_max_enrollment: i32,
|
||||
old_wait_count: Option<i32>,
|
||||
new_wait_count: i32,
|
||||
old_wait_capacity: Option<i32>,
|
||||
new_wait_capacity: i32,
|
||||
|
||||
// text fields (non-nullable in DB)
|
||||
old_subject: Option<String>,
|
||||
new_subject: String,
|
||||
old_course_number: Option<String>,
|
||||
new_course_number: String,
|
||||
old_title: Option<String>,
|
||||
new_title: String,
|
||||
|
||||
// nullable text fields
|
||||
old_sequence_number: Option<String>,
|
||||
new_sequence_number: Option<String>,
|
||||
old_part_of_term: Option<String>,
|
||||
new_part_of_term: Option<String>,
|
||||
old_instructional_method: Option<String>,
|
||||
new_instructional_method: Option<String>,
|
||||
old_campus: Option<String>,
|
||||
new_campus: Option<String>,
|
||||
|
||||
// nullable int fields
|
||||
old_credit_hours: Option<i32>,
|
||||
new_credit_hours: Option<i32>,
|
||||
old_credit_hour_low: Option<i32>,
|
||||
new_credit_hour_low: Option<i32>,
|
||||
old_credit_hour_high: Option<i32>,
|
||||
new_credit_hour_high: Option<i32>,
|
||||
|
||||
// cross-list fields
|
||||
old_cross_list: Option<String>,
|
||||
new_cross_list: Option<String>,
|
||||
old_cross_list_capacity: Option<i32>,
|
||||
new_cross_list_capacity: Option<i32>,
|
||||
old_cross_list_count: Option<i32>,
|
||||
new_cross_list_count: Option<i32>,
|
||||
|
||||
// link fields
|
||||
old_link_identifier: Option<String>,
|
||||
new_link_identifier: Option<String>,
|
||||
old_is_section_linked: Option<bool>,
|
||||
new_is_section_linked: Option<bool>,
|
||||
|
||||
// JSONB fields
|
||||
old_meeting_times: Option<serde_json::Value>,
|
||||
new_meeting_times: serde_json::Value,
|
||||
old_attributes: Option<serde_json::Value>,
|
||||
new_attributes: serde_json::Value,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Task 3: Entry types and diff logic
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
struct AuditEntry {
|
||||
course_id: i32,
|
||||
field_changed: &'static str,
|
||||
old_value: String,
|
||||
new_value: String,
|
||||
}
|
||||
|
||||
struct MetricEntry {
|
||||
course_id: i32,
|
||||
enrollment: i32,
|
||||
wait_count: i32,
|
||||
seats_available: i32,
|
||||
}
|
||||
|
||||
/// Compare old vs new for a single field, pushing an `AuditEntry` when they differ.
|
||||
///
|
||||
/// Three variants:
|
||||
/// - `diff_field!(audits, row, field_name, old_field, new_field)` — `Option<T>` old vs `T` new
|
||||
/// - `diff_field!(opt audits, row, field_name, old_field, new_field)` — `Option<T>` old vs `Option<T>` new
|
||||
/// - `diff_field!(json audits, row, field_name, old_field, new_field)` — `Option<Value>` old vs `Value` new
|
||||
///
|
||||
/// All variants skip when `old_id` is None (fresh insert).
|
||||
macro_rules! diff_field {
|
||||
// Standard: Option<T> old vs T new (non-nullable columns)
|
||||
($audits:ident, $row:ident, $field:expr, $old:ident, $new:ident) => {
|
||||
if $row.old_id.is_some() {
|
||||
let old_str = $row
|
||||
.$old
|
||||
.as_ref()
|
||||
.map(|v| v.to_string())
|
||||
.unwrap_or_default();
|
||||
let new_str = $row.$new.to_string();
|
||||
if old_str != new_str {
|
||||
$audits.push(AuditEntry {
|
||||
course_id: $row.id,
|
||||
field_changed: $field,
|
||||
old_value: old_str,
|
||||
new_value: new_str,
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
// Nullable: Option<T> old vs Option<T> new
|
||||
(opt $audits:ident, $row:ident, $field:expr, $old:ident, $new:ident) => {
|
||||
if $row.old_id.is_some() {
|
||||
let old_str = $row
|
||||
.$old
|
||||
.as_ref()
|
||||
.map(|v| v.to_string())
|
||||
.unwrap_or_default();
|
||||
let new_str = $row
|
||||
.$new
|
||||
.as_ref()
|
||||
.map(|v| v.to_string())
|
||||
.unwrap_or_default();
|
||||
if old_str != new_str {
|
||||
$audits.push(AuditEntry {
|
||||
course_id: $row.id,
|
||||
field_changed: $field,
|
||||
old_value: old_str,
|
||||
new_value: new_str,
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
// JSONB: Option<Value> old vs Value new
|
||||
(json $audits:ident, $row:ident, $field:expr, $old:ident, $new:ident) => {
|
||||
if $row.old_id.is_some() {
|
||||
let old_val = $row
|
||||
.$old
|
||||
.as_ref()
|
||||
.cloned()
|
||||
.unwrap_or(serde_json::Value::Null);
|
||||
let new_val = &$row.$new;
|
||||
if old_val != *new_val {
|
||||
$audits.push(AuditEntry {
|
||||
course_id: $row.id,
|
||||
field_changed: $field,
|
||||
old_value: old_val.to_string(),
|
||||
new_value: new_val.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Compute audit entries (field-level diffs) and metric entries from upsert diff rows.
|
||||
fn compute_diffs(rows: &[UpsertDiffRow]) -> (Vec<AuditEntry>, Vec<MetricEntry>) {
|
||||
let mut audits = Vec::new();
|
||||
let mut metrics = Vec::new();
|
||||
|
||||
for row in rows {
|
||||
// Non-nullable fields
|
||||
diff_field!(audits, row, "enrollment", old_enrollment, new_enrollment);
|
||||
diff_field!(
|
||||
audits,
|
||||
row,
|
||||
"max_enrollment",
|
||||
old_max_enrollment,
|
||||
new_max_enrollment
|
||||
);
|
||||
diff_field!(audits, row, "wait_count", old_wait_count, new_wait_count);
|
||||
diff_field!(
|
||||
audits,
|
||||
row,
|
||||
"wait_capacity",
|
||||
old_wait_capacity,
|
||||
new_wait_capacity
|
||||
);
|
||||
diff_field!(audits, row, "subject", old_subject, new_subject);
|
||||
diff_field!(
|
||||
audits,
|
||||
row,
|
||||
"course_number",
|
||||
old_course_number,
|
||||
new_course_number
|
||||
);
|
||||
diff_field!(audits, row, "title", old_title, new_title);
|
||||
|
||||
// Nullable text fields
|
||||
diff_field!(opt audits, row, "sequence_number", old_sequence_number, new_sequence_number);
|
||||
diff_field!(opt audits, row, "part_of_term", old_part_of_term, new_part_of_term);
|
||||
diff_field!(opt audits, row, "instructional_method", old_instructional_method, new_instructional_method);
|
||||
diff_field!(opt audits, row, "campus", old_campus, new_campus);
|
||||
|
||||
// Nullable int fields
|
||||
diff_field!(opt audits, row, "credit_hours", old_credit_hours, new_credit_hours);
|
||||
diff_field!(opt audits, row, "credit_hour_low", old_credit_hour_low, new_credit_hour_low);
|
||||
diff_field!(opt audits, row, "credit_hour_high", old_credit_hour_high, new_credit_hour_high);
|
||||
|
||||
// Cross-list fields
|
||||
diff_field!(opt audits, row, "cross_list", old_cross_list, new_cross_list);
|
||||
diff_field!(opt audits, row, "cross_list_capacity", old_cross_list_capacity, new_cross_list_capacity);
|
||||
diff_field!(opt audits, row, "cross_list_count", old_cross_list_count, new_cross_list_count);
|
||||
|
||||
// Link fields
|
||||
diff_field!(opt audits, row, "link_identifier", old_link_identifier, new_link_identifier);
|
||||
diff_field!(opt audits, row, "is_section_linked", old_is_section_linked, new_is_section_linked);
|
||||
|
||||
// JSONB fields
|
||||
diff_field!(json audits, row, "meeting_times", old_meeting_times, new_meeting_times);
|
||||
diff_field!(json audits, row, "attributes", old_attributes, new_attributes);
|
||||
|
||||
// Emit a metric entry on fresh insert (baseline) or when enrollment data changed
|
||||
let is_new = row.old_id.is_none();
|
||||
let enrollment_changed = row.old_id.is_some()
|
||||
&& (row.old_enrollment != Some(row.new_enrollment)
|
||||
|| row.old_wait_count != Some(row.new_wait_count)
|
||||
|| row.old_max_enrollment != Some(row.new_max_enrollment));
|
||||
|
||||
if is_new || enrollment_changed {
|
||||
metrics.push(MetricEntry {
|
||||
course_id: row.id,
|
||||
enrollment: row.new_enrollment,
|
||||
wait_count: row.new_wait_count,
|
||||
seats_available: row.new_max_enrollment - row.new_enrollment,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
(audits, metrics)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Task 4: Batch insert functions for audits and metrics
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async fn insert_audits(audits: &[AuditEntry], conn: &mut PgConnection) -> Result<()> {
|
||||
if audits.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let course_ids: Vec<i32> = audits.iter().map(|a| a.course_id).collect();
|
||||
let fields: Vec<&str> = audits.iter().map(|a| a.field_changed).collect();
|
||||
let old_values: Vec<&str> = audits.iter().map(|a| a.old_value.as_str()).collect();
|
||||
let new_values: Vec<&str> = audits.iter().map(|a| a.new_value.as_str()).collect();
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO course_audits (course_id, timestamp, field_changed, old_value, new_value)
|
||||
SELECT v.course_id, NOW(), v.field_changed, v.old_value, v.new_value
|
||||
FROM UNNEST($1::int4[], $2::text[], $3::text[], $4::text[])
|
||||
AS v(course_id, field_changed, old_value, new_value)
|
||||
"#,
|
||||
)
|
||||
.bind(&course_ids)
|
||||
.bind(&fields)
|
||||
.bind(&old_values)
|
||||
.bind(&new_values)
|
||||
.execute(&mut *conn)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("Failed to batch insert course_audits: {}", e))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn insert_metrics(metrics: &[MetricEntry], conn: &mut PgConnection) -> Result<()> {
|
||||
if metrics.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let course_ids: Vec<i32> = metrics.iter().map(|m| m.course_id).collect();
|
||||
let enrollments: Vec<i32> = metrics.iter().map(|m| m.enrollment).collect();
|
||||
let wait_counts: Vec<i32> = metrics.iter().map(|m| m.wait_count).collect();
|
||||
let seats_available: Vec<i32> = metrics.iter().map(|m| m.seats_available).collect();
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO course_metrics (course_id, timestamp, enrollment, wait_count, seats_available)
|
||||
SELECT v.course_id, NOW(), v.enrollment, v.wait_count, v.seats_available
|
||||
FROM UNNEST($1::int4[], $2::int4[], $3::int4[], $4::int4[])
|
||||
AS v(course_id, enrollment, wait_count, seats_available)
|
||||
"#,
|
||||
)
|
||||
.bind(&course_ids)
|
||||
.bind(&enrollments)
|
||||
.bind(&wait_counts)
|
||||
.bind(&seats_available)
|
||||
.execute(&mut *conn)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("Failed to batch insert course_metrics: {}", e))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Core upsert functions (updated to use &mut PgConnection)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Batch upsert courses in a single database query.
|
||||
///
|
||||
/// This function performs a bulk INSERT...ON CONFLICT DO UPDATE for all courses
|
||||
/// in a single round-trip to the database, significantly reducing overhead compared
|
||||
/// to individual inserts.
|
||||
/// Performs a bulk INSERT...ON CONFLICT DO UPDATE for all courses, including
|
||||
/// new fields (meeting times, attributes, instructor data). Captures pre-update
|
||||
/// state for audit/metric tracking, all within a single transaction.
|
||||
///
|
||||
/// # Performance
|
||||
/// - Reduces N database round-trips to 1
|
||||
/// - Reduces N database round-trips to 5 (old-data CTE + upsert, audits, metrics, instructors, junction)
|
||||
/// - Typical usage: 50-200 courses per batch
|
||||
/// - PostgreSQL parameter limit: 65,535 (we use ~10 per course)
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `courses` - Slice of Course structs from the Banner API
|
||||
/// * `db_pool` - PostgreSQL connection pool
|
||||
///
|
||||
/// # Returns
|
||||
/// * `Ok(())` on success
|
||||
/// * `Err(_)` if the database operation fails
|
||||
///
|
||||
/// # Example
|
||||
/// ```no_run
|
||||
/// use banner::data::batch::batch_upsert_courses;
|
||||
/// use banner::banner::Course;
|
||||
/// use sqlx::PgPool;
|
||||
///
|
||||
/// async fn example(courses: &[Course], pool: &PgPool) -> anyhow::Result<()> {
|
||||
/// batch_upsert_courses(courses, pool).await?;
|
||||
/// Ok(())
|
||||
/// }
|
||||
/// ```
|
||||
pub async fn batch_upsert_courses(courses: &[Course], db_pool: &PgPool) -> Result<()> {
|
||||
// Early return for empty batches
|
||||
pub async fn batch_upsert_courses(courses: &[Course], db_pool: &PgPool) -> Result<UpsertCounts> {
|
||||
if courses.is_empty() {
|
||||
info!("No courses to upsert, skipping batch operation");
|
||||
return Ok(());
|
||||
return Ok(UpsertCounts::default());
|
||||
}
|
||||
|
||||
let start = Instant::now();
|
||||
let course_count = courses.len();
|
||||
|
||||
// Extract course fields into vectors for UNNEST
|
||||
let mut tx = db_pool.begin().await?;
|
||||
|
||||
// Step 1: Upsert courses with CTE, returning diff rows
|
||||
let diff_rows = upsert_courses(courses, &mut tx).await?;
|
||||
|
||||
// Step 2: Build (crn, term_code) → course_id map for instructor linking.
|
||||
// RETURNING order from INSERT ... ON CONFLICT is not guaranteed to match
|
||||
// the input array order, so we must key by (crn, term_code) rather than
|
||||
// relying on positional correspondence.
|
||||
let crn_term_to_id: HashMap<(&str, &str), i32> = diff_rows
|
||||
.iter()
|
||||
.map(|r| ((r.crn.as_str(), r.term_code.as_str()), r.id))
|
||||
.collect();
|
||||
|
||||
// Step 3: Compute audit/metric diffs
|
||||
let (audits, metrics) = compute_diffs(&diff_rows);
|
||||
|
||||
// Count courses that had at least one field change (existing rows only)
|
||||
let changed_ids: HashSet<i32> = audits.iter().map(|a| a.course_id).collect();
|
||||
let existing_count = diff_rows.iter().filter(|r| r.old_id.is_some()).count() as i32;
|
||||
let courses_changed = changed_ids.len() as i32;
|
||||
|
||||
let counts = UpsertCounts {
|
||||
courses_fetched: course_count as i32,
|
||||
courses_changed,
|
||||
courses_unchanged: existing_count - courses_changed,
|
||||
audits_generated: audits.len() as i32,
|
||||
metrics_generated: metrics.len() as i32,
|
||||
};
|
||||
|
||||
// Step 4: Insert audits and metrics
|
||||
insert_audits(&audits, &mut tx).await?;
|
||||
insert_metrics(&metrics, &mut tx).await?;
|
||||
|
||||
// Step 5: Upsert instructors (returns email -> id map)
|
||||
let email_to_id = upsert_instructors(courses, &mut tx).await?;
|
||||
|
||||
// Step 6: Link courses to instructors via junction table
|
||||
upsert_course_instructors(courses, &crn_term_to_id, &email_to_id, &mut tx).await?;
|
||||
|
||||
tx.commit().await?;
|
||||
|
||||
let duration = start.elapsed();
|
||||
info!(
|
||||
courses_count = course_count,
|
||||
courses_changed = counts.courses_changed,
|
||||
courses_unchanged = counts.courses_unchanged,
|
||||
audit_entries = counts.audits_generated,
|
||||
metric_entries = counts.metrics_generated,
|
||||
duration_ms = duration.as_millis(),
|
||||
"Batch upserted courses with instructors, audits, and metrics"
|
||||
);
|
||||
|
||||
Ok(counts)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Task 2: CTE-based upsert returning old+new values
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Upsert all courses and return diff rows with old and new values for auditing.
|
||||
async fn upsert_courses(courses: &[Course], conn: &mut PgConnection) -> Result<Vec<UpsertDiffRow>> {
|
||||
let crns: Vec<&str> = courses
|
||||
.iter()
|
||||
.map(|c| c.course_reference_number.as_str())
|
||||
.collect();
|
||||
|
||||
let subjects: Vec<&str> = courses.iter().map(|c| c.subject.as_str()).collect();
|
||||
|
||||
let course_numbers: Vec<&str> = courses.iter().map(|c| c.course_number.as_str()).collect();
|
||||
|
||||
let titles: Vec<&str> = courses.iter().map(|c| c.course_title.as_str()).collect();
|
||||
|
||||
let titles: Vec<String> = courses
|
||||
.iter()
|
||||
.map(|c| decode_html_entities(&c.course_title))
|
||||
.collect();
|
||||
let term_codes: Vec<&str> = courses.iter().map(|c| c.term.as_str()).collect();
|
||||
|
||||
let enrollments: Vec<i32> = courses.iter().map(|c| c.enrollment).collect();
|
||||
|
||||
let max_enrollments: Vec<i32> = courses.iter().map(|c| c.maximum_enrollment).collect();
|
||||
|
||||
let wait_counts: Vec<i32> = courses.iter().map(|c| c.wait_count).collect();
|
||||
|
||||
let wait_capacities: Vec<i32> = courses.iter().map(|c| c.wait_capacity).collect();
|
||||
|
||||
// Perform batch upsert using UNNEST for efficient bulk insertion
|
||||
let result = sqlx::query(
|
||||
// New scalar fields
|
||||
let sequence_numbers: Vec<Option<&str>> = courses
|
||||
.iter()
|
||||
.map(|c| Some(c.sequence_number.as_str()))
|
||||
.collect();
|
||||
let parts_of_term: Vec<Option<&str>> = courses
|
||||
.iter()
|
||||
.map(|c| Some(c.part_of_term.as_str()))
|
||||
.collect();
|
||||
let instructional_methods: Vec<Option<&str>> = courses
|
||||
.iter()
|
||||
.map(|c| Some(c.instructional_method.as_str()))
|
||||
.collect();
|
||||
let campuses: Vec<Option<String>> = courses.iter().map(extract_campus_code).collect();
|
||||
let credit_hours: Vec<Option<i32>> = courses.iter().map(|c| c.credit_hours).collect();
|
||||
let credit_hour_lows: Vec<Option<i32>> = courses.iter().map(|c| c.credit_hour_low).collect();
|
||||
let credit_hour_highs: Vec<Option<i32>> = courses.iter().map(|c| c.credit_hour_high).collect();
|
||||
let cross_lists: Vec<Option<&str>> = courses.iter().map(|c| c.cross_list.as_deref()).collect();
|
||||
let cross_list_capacities: Vec<Option<i32>> =
|
||||
courses.iter().map(|c| c.cross_list_capacity).collect();
|
||||
let cross_list_counts: Vec<Option<i32>> = courses.iter().map(|c| c.cross_list_count).collect();
|
||||
let link_identifiers: Vec<Option<&str>> = courses
|
||||
.iter()
|
||||
.map(|c| c.link_identifier.as_deref())
|
||||
.collect();
|
||||
let is_section_linkeds: Vec<Option<bool>> =
|
||||
courses.iter().map(|c| Some(c.is_section_linked)).collect();
|
||||
|
||||
// JSONB fields
|
||||
let meeting_times_json: Vec<serde_json::Value> =
|
||||
courses.iter().map(to_db_meeting_times).collect();
|
||||
let attributes_json: Vec<serde_json::Value> = courses.iter().map(to_db_attributes).collect();
|
||||
|
||||
let rows = sqlx::query_as::<_, UpsertDiffRow>(
|
||||
r#"
|
||||
INSERT INTO courses (
|
||||
crn, subject, course_number, title, term_code,
|
||||
enrollment, max_enrollment, wait_count, wait_capacity, last_scraped_at
|
||||
WITH old_data AS (
|
||||
SELECT id, enrollment, max_enrollment, wait_count, wait_capacity,
|
||||
subject, course_number, title,
|
||||
sequence_number, part_of_term, instructional_method, campus,
|
||||
credit_hours, credit_hour_low, credit_hour_high,
|
||||
cross_list, cross_list_capacity, cross_list_count,
|
||||
link_identifier, is_section_linked,
|
||||
meeting_times, attributes,
|
||||
crn, term_code
|
||||
FROM courses
|
||||
WHERE (crn, term_code) IN (SELECT * FROM UNNEST($1::text[], $5::text[]))
|
||||
),
|
||||
upserted AS (
|
||||
INSERT INTO courses (
|
||||
crn, subject, course_number, title, term_code,
|
||||
enrollment, max_enrollment, wait_count, wait_capacity, last_scraped_at,
|
||||
sequence_number, part_of_term, instructional_method, campus,
|
||||
credit_hours, credit_hour_low, credit_hour_high,
|
||||
cross_list, cross_list_capacity, cross_list_count,
|
||||
link_identifier, is_section_linked,
|
||||
meeting_times, attributes
|
||||
)
|
||||
SELECT
|
||||
v.crn, v.subject, v.course_number, v.title, v.term_code,
|
||||
v.enrollment, v.max_enrollment, v.wait_count, v.wait_capacity, NOW(),
|
||||
v.sequence_number, v.part_of_term, v.instructional_method, v.campus,
|
||||
v.credit_hours, v.credit_hour_low, v.credit_hour_high,
|
||||
v.cross_list, v.cross_list_capacity, v.cross_list_count,
|
||||
v.link_identifier, v.is_section_linked,
|
||||
v.meeting_times, v.attributes
|
||||
FROM UNNEST(
|
||||
$1::text[], $2::text[], $3::text[], $4::text[], $5::text[],
|
||||
$6::int4[], $7::int4[], $8::int4[], $9::int4[],
|
||||
$10::text[], $11::text[], $12::text[], $13::text[],
|
||||
$14::int4[], $15::int4[], $16::int4[],
|
||||
$17::text[], $18::int4[], $19::int4[],
|
||||
$20::text[], $21::bool[],
|
||||
$22::jsonb[], $23::jsonb[]
|
||||
) AS v(
|
||||
crn, subject, course_number, title, term_code,
|
||||
enrollment, max_enrollment, wait_count, wait_capacity,
|
||||
sequence_number, part_of_term, instructional_method, campus,
|
||||
credit_hours, credit_hour_low, credit_hour_high,
|
||||
cross_list, cross_list_capacity, cross_list_count,
|
||||
link_identifier, is_section_linked,
|
||||
meeting_times, attributes
|
||||
)
|
||||
ON CONFLICT (crn, term_code)
|
||||
DO UPDATE SET
|
||||
subject = EXCLUDED.subject,
|
||||
course_number = EXCLUDED.course_number,
|
||||
title = EXCLUDED.title,
|
||||
enrollment = EXCLUDED.enrollment,
|
||||
max_enrollment = EXCLUDED.max_enrollment,
|
||||
wait_count = EXCLUDED.wait_count,
|
||||
wait_capacity = EXCLUDED.wait_capacity,
|
||||
last_scraped_at = EXCLUDED.last_scraped_at,
|
||||
sequence_number = EXCLUDED.sequence_number,
|
||||
part_of_term = EXCLUDED.part_of_term,
|
||||
instructional_method = EXCLUDED.instructional_method,
|
||||
campus = EXCLUDED.campus,
|
||||
credit_hours = EXCLUDED.credit_hours,
|
||||
credit_hour_low = EXCLUDED.credit_hour_low,
|
||||
credit_hour_high = EXCLUDED.credit_hour_high,
|
||||
cross_list = EXCLUDED.cross_list,
|
||||
cross_list_capacity = EXCLUDED.cross_list_capacity,
|
||||
cross_list_count = EXCLUDED.cross_list_count,
|
||||
link_identifier = EXCLUDED.link_identifier,
|
||||
is_section_linked = EXCLUDED.is_section_linked,
|
||||
meeting_times = EXCLUDED.meeting_times,
|
||||
attributes = EXCLUDED.attributes
|
||||
RETURNING *
|
||||
)
|
||||
SELECT * FROM UNNEST(
|
||||
$1::text[], $2::text[], $3::text[], $4::text[], $5::text[],
|
||||
$6::int4[], $7::int4[], $8::int4[], $9::int4[],
|
||||
array_fill(NOW()::timestamptz, ARRAY[$10])
|
||||
) AS t(
|
||||
crn, subject, course_number, title, term_code,
|
||||
enrollment, max_enrollment, wait_count, wait_capacity, last_scraped_at
|
||||
)
|
||||
ON CONFLICT (crn, term_code)
|
||||
DO UPDATE SET
|
||||
subject = EXCLUDED.subject,
|
||||
course_number = EXCLUDED.course_number,
|
||||
title = EXCLUDED.title,
|
||||
enrollment = EXCLUDED.enrollment,
|
||||
max_enrollment = EXCLUDED.max_enrollment,
|
||||
wait_count = EXCLUDED.wait_count,
|
||||
wait_capacity = EXCLUDED.wait_capacity,
|
||||
last_scraped_at = EXCLUDED.last_scraped_at
|
||||
SELECT u.id,
|
||||
o.id AS old_id,
|
||||
u.crn, u.term_code,
|
||||
o.enrollment AS old_enrollment, u.enrollment AS new_enrollment,
|
||||
o.max_enrollment AS old_max_enrollment, u.max_enrollment AS new_max_enrollment,
|
||||
o.wait_count AS old_wait_count, u.wait_count AS new_wait_count,
|
||||
o.wait_capacity AS old_wait_capacity, u.wait_capacity AS new_wait_capacity,
|
||||
o.subject AS old_subject, u.subject AS new_subject,
|
||||
o.course_number AS old_course_number, u.course_number AS new_course_number,
|
||||
o.title AS old_title, u.title AS new_title,
|
||||
o.sequence_number AS old_sequence_number, u.sequence_number AS new_sequence_number,
|
||||
o.part_of_term AS old_part_of_term, u.part_of_term AS new_part_of_term,
|
||||
o.instructional_method AS old_instructional_method, u.instructional_method AS new_instructional_method,
|
||||
o.campus AS old_campus, u.campus AS new_campus,
|
||||
o.credit_hours AS old_credit_hours, u.credit_hours AS new_credit_hours,
|
||||
o.credit_hour_low AS old_credit_hour_low, u.credit_hour_low AS new_credit_hour_low,
|
||||
o.credit_hour_high AS old_credit_hour_high, u.credit_hour_high AS new_credit_hour_high,
|
||||
o.cross_list AS old_cross_list, u.cross_list AS new_cross_list,
|
||||
o.cross_list_capacity AS old_cross_list_capacity, u.cross_list_capacity AS new_cross_list_capacity,
|
||||
o.cross_list_count AS old_cross_list_count, u.cross_list_count AS new_cross_list_count,
|
||||
o.link_identifier AS old_link_identifier, u.link_identifier AS new_link_identifier,
|
||||
o.is_section_linked AS old_is_section_linked, u.is_section_linked AS new_is_section_linked,
|
||||
o.meeting_times AS old_meeting_times, u.meeting_times AS new_meeting_times,
|
||||
o.attributes AS old_attributes, u.attributes AS new_attributes
|
||||
FROM upserted u
|
||||
LEFT JOIN old_data o ON u.crn = o.crn AND u.term_code = o.term_code
|
||||
"#,
|
||||
)
|
||||
.bind(&crns)
|
||||
@@ -104,19 +603,162 @@ pub async fn batch_upsert_courses(courses: &[Course], db_pool: &PgPool) -> Resul
|
||||
.bind(&max_enrollments)
|
||||
.bind(&wait_counts)
|
||||
.bind(&wait_capacities)
|
||||
.bind(course_count as i32)
|
||||
.execute(db_pool)
|
||||
.bind(&sequence_numbers)
|
||||
.bind(&parts_of_term)
|
||||
.bind(&instructional_methods)
|
||||
.bind(&campuses)
|
||||
.bind(&credit_hours)
|
||||
.bind(&credit_hour_lows)
|
||||
.bind(&credit_hour_highs)
|
||||
.bind(&cross_lists)
|
||||
.bind(&cross_list_capacities)
|
||||
.bind(&cross_list_counts)
|
||||
.bind(&link_identifiers)
|
||||
.bind(&is_section_linkeds)
|
||||
.bind(&meeting_times_json)
|
||||
.bind(&attributes_json)
|
||||
.fetch_all(&mut *conn)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("Failed to batch upsert courses: {}", e))?;
|
||||
|
||||
let duration = start.elapsed();
|
||||
Ok(rows)
|
||||
}
|
||||
|
||||
info!(
|
||||
courses_count = course_count,
|
||||
rows_affected = result.rows_affected(),
|
||||
duration_ms = duration.as_millis(),
|
||||
"Batch upserted courses"
|
||||
);
|
||||
/// Deduplicate and upsert all instructors from the batch by email.
|
||||
/// Returns a map of lowercased_email -> instructor id for junction linking.
|
||||
async fn upsert_instructors(
|
||||
courses: &[Course],
|
||||
conn: &mut PgConnection,
|
||||
) -> Result<HashMap<String, i32>> {
|
||||
let mut seen = HashSet::new();
|
||||
let mut display_names: Vec<String> = Vec::new();
|
||||
let mut first_names: Vec<Option<String>> = Vec::new();
|
||||
let mut last_names: Vec<Option<String>> = Vec::new();
|
||||
let mut emails_lower: Vec<String> = Vec::new();
|
||||
let mut skipped_no_email = 0u32;
|
||||
|
||||
for course in courses {
|
||||
for faculty in &course.faculty {
|
||||
if let Some(email) = &faculty.email_address {
|
||||
let email_lower = email.to_lowercase();
|
||||
if seen.insert(email_lower.clone()) {
|
||||
let parts = parse_banner_name(&faculty.display_name);
|
||||
display_names.push(decode_html_entities(&faculty.display_name));
|
||||
first_names.push(parts.as_ref().map(|p| p.first.clone()));
|
||||
last_names.push(parts.as_ref().map(|p| p.last.clone()));
|
||||
emails_lower.push(email_lower);
|
||||
}
|
||||
} else {
|
||||
skipped_no_email += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if skipped_no_email > 0 {
|
||||
tracing::warn!(
|
||||
count = skipped_no_email,
|
||||
"Skipped instructors with no email address"
|
||||
);
|
||||
}
|
||||
|
||||
if display_names.is_empty() {
|
||||
return Ok(HashMap::new());
|
||||
}
|
||||
|
||||
let email_refs: Vec<&str> = emails_lower.iter().map(|s| s.as_str()).collect();
|
||||
let first_name_refs: Vec<Option<&str>> = first_names.iter().map(|s| s.as_deref()).collect();
|
||||
let last_name_refs: Vec<Option<&str>> = last_names.iter().map(|s| s.as_deref()).collect();
|
||||
|
||||
let rows: Vec<(i32, String)> = sqlx::query_as(
|
||||
r#"
|
||||
INSERT INTO instructors (display_name, email, first_name, last_name)
|
||||
SELECT * FROM UNNEST($1::text[], $2::text[], $3::text[], $4::text[])
|
||||
ON CONFLICT (email)
|
||||
DO UPDATE SET
|
||||
display_name = EXCLUDED.display_name,
|
||||
first_name = EXCLUDED.first_name,
|
||||
last_name = EXCLUDED.last_name
|
||||
RETURNING id, email
|
||||
"#,
|
||||
)
|
||||
.bind(&display_names)
|
||||
.bind(&email_refs)
|
||||
.bind(&first_name_refs)
|
||||
.bind(&last_name_refs)
|
||||
.fetch_all(&mut *conn)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("Failed to batch upsert instructors: {}", e))?;
|
||||
|
||||
Ok(rows.into_iter().map(|(id, email)| (email, id)).collect())
|
||||
}
|
||||
|
||||
/// Link courses to their instructors via the junction table.
|
||||
async fn upsert_course_instructors(
|
||||
courses: &[Course],
|
||||
crn_term_to_id: &HashMap<(&str, &str), i32>,
|
||||
email_to_id: &HashMap<String, i32>,
|
||||
conn: &mut PgConnection,
|
||||
) -> Result<()> {
|
||||
let mut cids = Vec::new();
|
||||
let mut instructor_ids: Vec<i32> = Vec::new();
|
||||
let mut banner_ids: Vec<&str> = Vec::new();
|
||||
let mut primaries = Vec::new();
|
||||
|
||||
for course in courses {
|
||||
let key = (
|
||||
course.course_reference_number.as_str(),
|
||||
course.term.as_str(),
|
||||
);
|
||||
let Some(&course_id) = crn_term_to_id.get(&key) else {
|
||||
tracing::warn!(
|
||||
crn = %course.course_reference_number,
|
||||
term = %course.term,
|
||||
"No course_id found for CRN/term pair during instructor linking"
|
||||
);
|
||||
continue;
|
||||
};
|
||||
|
||||
for faculty in &course.faculty {
|
||||
if let Some(email) = &faculty.email_address {
|
||||
let email_lower = email.to_lowercase();
|
||||
if let Some(&instructor_id) = email_to_id.get(&email_lower) {
|
||||
cids.push(course_id);
|
||||
instructor_ids.push(instructor_id);
|
||||
banner_ids.push(faculty.banner_id.as_str());
|
||||
primaries.push(faculty.primary_indicator);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if cids.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Delete existing links for these courses then re-insert.
|
||||
// This handles instructor changes cleanly.
|
||||
sqlx::query("DELETE FROM course_instructors WHERE course_id = ANY($1)")
|
||||
.bind(&cids)
|
||||
.execute(&mut *conn)
|
||||
.await?;
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO course_instructors (course_id, instructor_id, banner_id, is_primary)
|
||||
SELECT * FROM UNNEST($1::int4[], $2::int4[], $3::text[], $4::bool[])
|
||||
ON CONFLICT (course_id, instructor_id)
|
||||
DO UPDATE SET
|
||||
banner_id = EXCLUDED.banner_id,
|
||||
is_primary = EXCLUDED.is_primary
|
||||
"#,
|
||||
)
|
||||
.bind(&cids)
|
||||
.bind(&instructor_ids)
|
||||
.bind(&banner_ids)
|
||||
.bind(&primaries)
|
||||
.execute(&mut *conn)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("Failed to batch upsert course_instructors: {}", e))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -0,0 +1,376 @@
|
||||
//! Database query functions for courses, used by the web API.
|
||||
|
||||
use crate::data::models::{Course, CourseInstructorDetail};
|
||||
use crate::error::Result;
|
||||
use sqlx::PgPool;
|
||||
use std::collections::HashMap;
|
||||
use ts_rs::TS;
|
||||
|
||||
/// Column to sort search results by.
|
||||
#[derive(Debug, Clone, Copy, serde::Deserialize, serde::Serialize, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
#[ts(export)]
|
||||
pub enum SortColumn {
|
||||
CourseCode,
|
||||
Title,
|
||||
Instructor,
|
||||
Time,
|
||||
Seats,
|
||||
}
|
||||
|
||||
/// Sort direction.
|
||||
#[derive(Debug, Clone, Copy, serde::Deserialize, serde::Serialize, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
#[ts(export)]
|
||||
pub enum SortDirection {
|
||||
Asc,
|
||||
Desc,
|
||||
}
|
||||
|
||||
/// Aggregate min/max ranges for filter sliders, computed per-term.
|
||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, ts_rs::TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct FilterRanges {
|
||||
pub course_number_min: i32,
|
||||
pub course_number_max: i32,
|
||||
pub credit_hour_min: i32,
|
||||
pub credit_hour_max: i32,
|
||||
pub wait_count_max: i32,
|
||||
}
|
||||
|
||||
/// Shared WHERE clause for course search filters.
|
||||
///
|
||||
/// Parameters $1-$17 match the bind order in `search_courses`.
|
||||
const SEARCH_WHERE: &str = r#"
|
||||
WHERE term_code = $1
|
||||
AND ($2::text[] IS NULL OR subject = ANY($2))
|
||||
AND ($3::text IS NULL OR title_search @@ plainto_tsquery('simple', $3) OR title ILIKE '%' || $3 || '%')
|
||||
AND ($4::int IS NULL OR course_number::int >= $4)
|
||||
AND ($5::int IS NULL OR course_number::int <= $5)
|
||||
AND ($6::bool = false OR max_enrollment > enrollment)
|
||||
AND ($7::text[] IS NULL OR instructional_method = ANY($7))
|
||||
AND ($8::text[] IS NULL OR campus = ANY($8))
|
||||
AND ($9::int IS NULL OR wait_count <= $9)
|
||||
AND ($10::text[] IS NULL OR EXISTS (
|
||||
SELECT 1 FROM jsonb_array_elements(meeting_times) AS mt
|
||||
WHERE (NOT 'monday' = ANY($10) OR (mt->>'monday')::bool)
|
||||
AND (NOT 'tuesday' = ANY($10) OR (mt->>'tuesday')::bool)
|
||||
AND (NOT 'wednesday' = ANY($10) OR (mt->>'wednesday')::bool)
|
||||
AND (NOT 'thursday' = ANY($10) OR (mt->>'thursday')::bool)
|
||||
AND (NOT 'friday' = ANY($10) OR (mt->>'friday')::bool)
|
||||
AND (NOT 'saturday' = ANY($10) OR (mt->>'saturday')::bool)
|
||||
AND (NOT 'sunday' = ANY($10) OR (mt->>'sunday')::bool)
|
||||
))
|
||||
AND ($11::text IS NULL OR EXISTS (
|
||||
SELECT 1 FROM jsonb_array_elements(meeting_times) AS mt
|
||||
WHERE (mt->>'begin_time') >= $11
|
||||
))
|
||||
AND ($12::text IS NULL OR EXISTS (
|
||||
SELECT 1 FROM jsonb_array_elements(meeting_times) AS mt
|
||||
WHERE (mt->>'end_time') <= $12
|
||||
))
|
||||
AND ($13::text[] IS NULL OR part_of_term = ANY($13))
|
||||
AND ($14::text[] IS NULL OR EXISTS (
|
||||
SELECT 1 FROM jsonb_array_elements_text(attributes) a
|
||||
WHERE a = ANY($14)
|
||||
))
|
||||
AND ($15::int IS NULL OR COALESCE(credit_hours, credit_hour_low, 0) >= $15)
|
||||
AND ($16::int IS NULL OR COALESCE(credit_hours, credit_hour_high, 0) <= $16)
|
||||
AND ($17::text IS NULL OR EXISTS (
|
||||
SELECT 1 FROM course_instructors ci
|
||||
JOIN instructors i ON i.id = ci.instructor_id
|
||||
WHERE ci.course_id = courses.id
|
||||
AND i.display_name ILIKE '%' || $17 || '%'
|
||||
))
|
||||
"#;
|
||||
|
||||
/// Build a safe ORDER BY clause from typed sort parameters.
|
||||
///
|
||||
/// All column names are hardcoded string literals — no caller input is interpolated.
|
||||
fn sort_clause(column: Option<SortColumn>, direction: Option<SortDirection>) -> String {
|
||||
let dir = match direction.unwrap_or(SortDirection::Asc) {
|
||||
SortDirection::Asc => "ASC",
|
||||
SortDirection::Desc => "DESC",
|
||||
};
|
||||
|
||||
match column {
|
||||
Some(SortColumn::CourseCode) => {
|
||||
format!("subject {dir}, course_number {dir}, sequence_number {dir}")
|
||||
}
|
||||
Some(SortColumn::Title) => format!("title {dir}"),
|
||||
Some(SortColumn::Instructor) => {
|
||||
format!(
|
||||
"(SELECT i.display_name FROM course_instructors ci \
|
||||
JOIN instructors i ON i.id = ci.instructor_id \
|
||||
WHERE ci.course_id = courses.id AND ci.is_primary = true \
|
||||
LIMIT 1) {dir} NULLS LAST"
|
||||
)
|
||||
}
|
||||
Some(SortColumn::Time) => {
|
||||
format!("(meeting_times->0->>'begin_time') {dir} NULLS LAST")
|
||||
}
|
||||
Some(SortColumn::Seats) => {
|
||||
format!("(max_enrollment - enrollment) {dir}")
|
||||
}
|
||||
None => "subject ASC, course_number ASC, sequence_number ASC".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Search courses by term with optional filters.
|
||||
///
|
||||
/// Returns `(courses, total_count)` for pagination. Uses FTS tsvector for word
|
||||
/// search and falls back to trigram ILIKE for substring matching.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn search_courses(
|
||||
db_pool: &PgPool,
|
||||
term_code: &str,
|
||||
subject: Option<&[String]>,
|
||||
title_query: Option<&str>,
|
||||
course_number_low: Option<i32>,
|
||||
course_number_high: Option<i32>,
|
||||
open_only: bool,
|
||||
instructional_method: Option<&[String]>,
|
||||
campus: Option<&[String]>,
|
||||
wait_count_max: Option<i32>,
|
||||
days: Option<&[String]>,
|
||||
time_start: Option<&str>,
|
||||
time_end: Option<&str>,
|
||||
part_of_term: Option<&[String]>,
|
||||
attributes: Option<&[String]>,
|
||||
credit_hour_min: Option<i32>,
|
||||
credit_hour_max: Option<i32>,
|
||||
instructor: Option<&str>,
|
||||
limit: i32,
|
||||
offset: i32,
|
||||
sort_by: Option<SortColumn>,
|
||||
sort_dir: Option<SortDirection>,
|
||||
) -> Result<(Vec<Course>, i64)> {
|
||||
let order_by = sort_clause(sort_by, sort_dir);
|
||||
|
||||
let data_query =
|
||||
format!("SELECT * FROM courses {SEARCH_WHERE} ORDER BY {order_by} LIMIT $18 OFFSET $19");
|
||||
let count_query = format!("SELECT COUNT(*) FROM courses {SEARCH_WHERE}");
|
||||
|
||||
let courses = sqlx::query_as::<_, Course>(&data_query)
|
||||
.bind(term_code) // $1
|
||||
.bind(subject) // $2
|
||||
.bind(title_query) // $3
|
||||
.bind(course_number_low) // $4
|
||||
.bind(course_number_high) // $5
|
||||
.bind(open_only) // $6
|
||||
.bind(instructional_method) // $7
|
||||
.bind(campus) // $8
|
||||
.bind(wait_count_max) // $9
|
||||
.bind(days) // $10
|
||||
.bind(time_start) // $11
|
||||
.bind(time_end) // $12
|
||||
.bind(part_of_term) // $13
|
||||
.bind(attributes) // $14
|
||||
.bind(credit_hour_min) // $15
|
||||
.bind(credit_hour_max) // $16
|
||||
.bind(instructor) // $17
|
||||
.bind(limit) // $18
|
||||
.bind(offset) // $19
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
|
||||
let total: (i64,) = sqlx::query_as(&count_query)
|
||||
.bind(term_code) // $1
|
||||
.bind(subject) // $2
|
||||
.bind(title_query) // $3
|
||||
.bind(course_number_low) // $4
|
||||
.bind(course_number_high) // $5
|
||||
.bind(open_only) // $6
|
||||
.bind(instructional_method) // $7
|
||||
.bind(campus) // $8
|
||||
.bind(wait_count_max) // $9
|
||||
.bind(days) // $10
|
||||
.bind(time_start) // $11
|
||||
.bind(time_end) // $12
|
||||
.bind(part_of_term) // $13
|
||||
.bind(attributes) // $14
|
||||
.bind(credit_hour_min) // $15
|
||||
.bind(credit_hour_max) // $16
|
||||
.bind(instructor) // $17
|
||||
.fetch_one(db_pool)
|
||||
.await?;
|
||||
|
||||
Ok((courses, total.0))
|
||||
}
|
||||
|
||||
/// Get a single course by CRN and term.
|
||||
pub async fn get_course_by_crn(
|
||||
db_pool: &PgPool,
|
||||
crn: &str,
|
||||
term_code: &str,
|
||||
) -> Result<Option<Course>> {
|
||||
let course =
|
||||
sqlx::query_as::<_, Course>("SELECT * FROM courses WHERE crn = $1 AND term_code = $2")
|
||||
.bind(crn)
|
||||
.bind(term_code)
|
||||
.fetch_optional(db_pool)
|
||||
.await?;
|
||||
Ok(course)
|
||||
}
|
||||
|
||||
/// Get instructors for a single course by course ID.
|
||||
pub async fn get_course_instructors(
|
||||
db_pool: &PgPool,
|
||||
course_id: i32,
|
||||
) -> Result<Vec<CourseInstructorDetail>> {
|
||||
let rows = sqlx::query_as::<_, CourseInstructorDetail>(
|
||||
r#"
|
||||
SELECT i.id as instructor_id, ci.banner_id, i.display_name, i.email, ci.is_primary,
|
||||
rmp.avg_rating, rmp.num_ratings, rmp.rmp_legacy_id,
|
||||
ci.course_id
|
||||
FROM course_instructors ci
|
||||
JOIN instructors i ON i.id = ci.instructor_id
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT rp.avg_rating, rp.num_ratings, rp.legacy_id as rmp_legacy_id
|
||||
FROM instructor_rmp_links irl
|
||||
JOIN rmp_professors rp ON rp.legacy_id = irl.rmp_legacy_id
|
||||
WHERE irl.instructor_id = i.id
|
||||
ORDER BY rp.num_ratings DESC NULLS LAST, rp.legacy_id ASC
|
||||
LIMIT 1
|
||||
) rmp ON true
|
||||
WHERE ci.course_id = $1
|
||||
ORDER BY ci.is_primary DESC, i.display_name
|
||||
"#,
|
||||
)
|
||||
.bind(course_id)
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
Ok(rows)
|
||||
}
|
||||
|
||||
/// Batch-fetch instructors for multiple courses in a single query.
|
||||
///
|
||||
/// Returns a map of `course_id → Vec<CourseInstructorDetail>`.
|
||||
pub async fn get_instructors_for_courses(
|
||||
db_pool: &PgPool,
|
||||
course_ids: &[i32],
|
||||
) -> Result<HashMap<i32, Vec<CourseInstructorDetail>>> {
|
||||
if course_ids.is_empty() {
|
||||
return Ok(HashMap::new());
|
||||
}
|
||||
|
||||
let rows = sqlx::query_as::<_, CourseInstructorDetail>(
|
||||
r#"
|
||||
SELECT i.id as instructor_id, ci.banner_id, i.display_name, i.email, ci.is_primary,
|
||||
rmp.avg_rating, rmp.num_ratings, rmp.rmp_legacy_id,
|
||||
ci.course_id
|
||||
FROM course_instructors ci
|
||||
JOIN instructors i ON i.id = ci.instructor_id
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT rp.avg_rating, rp.num_ratings, rp.legacy_id as rmp_legacy_id
|
||||
FROM instructor_rmp_links irl
|
||||
JOIN rmp_professors rp ON rp.legacy_id = irl.rmp_legacy_id
|
||||
WHERE irl.instructor_id = i.id
|
||||
ORDER BY rp.num_ratings DESC NULLS LAST, rp.legacy_id ASC
|
||||
LIMIT 1
|
||||
) rmp ON true
|
||||
WHERE ci.course_id = ANY($1)
|
||||
ORDER BY ci.course_id, ci.is_primary DESC, i.display_name
|
||||
"#,
|
||||
)
|
||||
.bind(course_ids)
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
|
||||
let mut map: HashMap<i32, Vec<CourseInstructorDetail>> = HashMap::new();
|
||||
for row in rows {
|
||||
// course_id is always present in the batch query
|
||||
let cid = row.course_id.unwrap_or_default();
|
||||
map.entry(cid).or_default().push(row);
|
||||
}
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
/// Get subjects for a term, sorted by total enrollment (descending).
|
||||
///
|
||||
/// Returns only subjects that have courses in the given term, with their
|
||||
/// descriptions from reference_data and enrollment totals for ranking.
|
||||
pub async fn get_subjects_by_enrollment(
|
||||
db_pool: &PgPool,
|
||||
term_code: &str,
|
||||
) -> Result<Vec<(String, String, i64)>> {
|
||||
let rows: Vec<(String, String, i64)> = sqlx::query_as(
|
||||
r#"
|
||||
SELECT c.subject,
|
||||
COALESCE(rd.description, c.subject),
|
||||
COALESCE(SUM(c.enrollment), 0) as total_enrollment
|
||||
FROM courses c
|
||||
LEFT JOIN reference_data rd ON rd.category = 'subject' AND rd.code = c.subject
|
||||
WHERE c.term_code = $1
|
||||
GROUP BY c.subject, rd.description
|
||||
ORDER BY total_enrollment DESC
|
||||
"#,
|
||||
)
|
||||
.bind(term_code)
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
Ok(rows)
|
||||
}
|
||||
|
||||
/// Get all distinct term codes that have courses in the DB.
|
||||
pub async fn get_available_terms(db_pool: &PgPool) -> Result<Vec<String>> {
|
||||
let rows: Vec<(String,)> =
|
||||
sqlx::query_as("SELECT DISTINCT term_code FROM courses ORDER BY term_code DESC")
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
Ok(rows.into_iter().map(|(tc,)| tc).collect())
|
||||
}
|
||||
|
||||
type RangeRow = (
|
||||
Option<i32>,
|
||||
Option<i32>,
|
||||
Option<i32>,
|
||||
Option<i32>,
|
||||
Option<i32>,
|
||||
);
|
||||
|
||||
/// Get aggregate filter ranges for a term (course number, credit hours, waitlist).
|
||||
pub async fn get_filter_ranges(db_pool: &PgPool, term_code: &str) -> Result<FilterRanges> {
|
||||
let row: RangeRow = sqlx::query_as(
|
||||
r#"
|
||||
SELECT
|
||||
MIN(course_number::int),
|
||||
MAX(course_number::int),
|
||||
MIN(COALESCE(credit_hours, credit_hour_low, 0)),
|
||||
MAX(COALESCE(credit_hours, credit_hour_high, 0)),
|
||||
MAX(wait_count)
|
||||
FROM courses
|
||||
WHERE term_code = $1
|
||||
AND course_number ~ '^\d+$'
|
||||
"#,
|
||||
)
|
||||
.bind(term_code)
|
||||
.fetch_one(db_pool)
|
||||
.await?;
|
||||
|
||||
let cn_min = row.0.unwrap_or(1000);
|
||||
let cn_max = row.1.unwrap_or(9000);
|
||||
let ch_min = row.2.unwrap_or(0);
|
||||
let ch_max = row.3.unwrap_or(8);
|
||||
let wc_max_raw = row.4.unwrap_or(0);
|
||||
|
||||
// Round course number to hundreds: floor min, ceil max
|
||||
let cn_min_rounded = (cn_min / 100) * 100;
|
||||
let cn_max_rounded = ((cn_max + 99) / 100) * 100;
|
||||
|
||||
// Waitlist ceiling: (max / 10 + 1) * 10
|
||||
let wc_max = if wc_max_raw > 0 {
|
||||
(wc_max_raw / 10 + 1) * 10
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
Ok(FilterRanges {
|
||||
course_number_min: cn_min_rounded,
|
||||
course_number_max: cn_max_rounded,
|
||||
credit_hour_min: ch_min,
|
||||
credit_hour_max: ch_max,
|
||||
wait_count_max: wc_max,
|
||||
})
|
||||
}
|
||||
@@ -1,5 +1,12 @@
|
||||
//! Database models and schema.
|
||||
|
||||
pub mod batch;
|
||||
pub mod courses;
|
||||
pub mod models;
|
||||
pub mod names;
|
||||
pub mod reference;
|
||||
pub mod rmp;
|
||||
pub mod rmp_matching;
|
||||
pub mod scrape_jobs;
|
||||
pub mod sessions;
|
||||
pub mod users;
|
||||
|
||||
@@ -1,7 +1,68 @@
|
||||
//! `sqlx` models for the database schema.
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use serde_json::Value;
|
||||
use ts_rs::TS;
|
||||
|
||||
/// Serialize an `i64` as a string to avoid JavaScript precision loss for values exceeding 2^53.
|
||||
fn serialize_i64_as_string<S: Serializer>(value: &i64, serializer: S) -> Result<S::Ok, S::Error> {
|
||||
serializer.serialize_str(&value.to_string())
|
||||
}
|
||||
|
||||
/// Deserialize an `i64` from either a number or a string.
|
||||
fn deserialize_i64_from_string<'de, D: Deserializer<'de>>(
|
||||
deserializer: D,
|
||||
) -> Result<i64, D::Error> {
|
||||
use serde::de;
|
||||
|
||||
struct I64OrStringVisitor;
|
||||
|
||||
impl<'de> de::Visitor<'de> for I64OrStringVisitor {
|
||||
type Value = i64;
|
||||
|
||||
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
formatter.write_str("an integer or a string containing an integer")
|
||||
}
|
||||
|
||||
fn visit_i64<E: de::Error>(self, value: i64) -> Result<i64, E> {
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
fn visit_u64<E: de::Error>(self, value: u64) -> Result<i64, E> {
|
||||
i64::try_from(value).map_err(|_| E::custom(format!("u64 {value} out of i64 range")))
|
||||
}
|
||||
|
||||
fn visit_str<E: de::Error>(self, value: &str) -> Result<i64, E> {
|
||||
value.parse().map_err(de::Error::custom)
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_any(I64OrStringVisitor)
|
||||
}
|
||||
|
||||
/// Represents a meeting time stored as JSONB in the courses table.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, TS)]
|
||||
#[ts(export)]
|
||||
pub struct DbMeetingTime {
|
||||
pub begin_time: Option<String>,
|
||||
pub end_time: Option<String>,
|
||||
pub start_date: String,
|
||||
pub end_date: String,
|
||||
pub monday: bool,
|
||||
pub tuesday: bool,
|
||||
pub wednesday: bool,
|
||||
pub thursday: bool,
|
||||
pub friday: bool,
|
||||
pub saturday: bool,
|
||||
pub sunday: bool,
|
||||
pub building: Option<String>,
|
||||
pub building_description: Option<String>,
|
||||
pub room: Option<String>,
|
||||
pub campus: Option<String>,
|
||||
pub meeting_type: String,
|
||||
pub meeting_schedule_type: String,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(sqlx::FromRow, Debug, Clone)]
|
||||
@@ -17,6 +78,65 @@ pub struct Course {
|
||||
pub wait_count: i32,
|
||||
pub wait_capacity: i32,
|
||||
pub last_scraped_at: DateTime<Utc>,
|
||||
// New scalar fields
|
||||
pub sequence_number: Option<String>,
|
||||
pub part_of_term: Option<String>,
|
||||
pub instructional_method: Option<String>,
|
||||
pub campus: Option<String>,
|
||||
pub credit_hours: Option<i32>,
|
||||
pub credit_hour_low: Option<i32>,
|
||||
pub credit_hour_high: Option<i32>,
|
||||
pub cross_list: Option<String>,
|
||||
pub cross_list_capacity: Option<i32>,
|
||||
pub cross_list_count: Option<i32>,
|
||||
pub link_identifier: Option<String>,
|
||||
pub is_section_linked: Option<bool>,
|
||||
// JSONB fields
|
||||
pub meeting_times: Value,
|
||||
pub attributes: Value,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(sqlx::FromRow, Debug, Clone)]
|
||||
pub struct Instructor {
|
||||
pub id: i32,
|
||||
pub display_name: String,
|
||||
pub email: String,
|
||||
pub rmp_match_status: String,
|
||||
pub first_name: Option<String>,
|
||||
pub last_name: Option<String>,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(sqlx::FromRow, Debug, Clone)]
|
||||
pub struct CourseInstructor {
|
||||
pub course_id: i32,
|
||||
pub instructor_id: i32,
|
||||
pub banner_id: String,
|
||||
pub is_primary: bool,
|
||||
}
|
||||
|
||||
/// Joined instructor data for a course (from course_instructors + instructors + rmp_professors).
|
||||
#[derive(sqlx::FromRow, Debug, Clone)]
|
||||
pub struct CourseInstructorDetail {
|
||||
pub instructor_id: i32,
|
||||
pub banner_id: String,
|
||||
pub display_name: String,
|
||||
pub email: String,
|
||||
pub is_primary: bool,
|
||||
pub avg_rating: Option<f32>,
|
||||
pub num_ratings: Option<i32>,
|
||||
pub rmp_legacy_id: Option<i32>,
|
||||
/// Present when fetched via batch query; `None` for single-course queries.
|
||||
pub course_id: Option<i32>,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(sqlx::FromRow, Debug, Clone)]
|
||||
pub struct ReferenceData {
|
||||
pub category: String,
|
||||
pub code: String,
|
||||
pub description: String,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
@@ -41,6 +161,16 @@ pub struct CourseAudit {
|
||||
pub new_value: String,
|
||||
}
|
||||
|
||||
/// Aggregate counts returned by batch upsert, used for scrape job result logging.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct UpsertCounts {
|
||||
pub courses_fetched: i32,
|
||||
pub courses_changed: i32,
|
||||
pub courses_unchanged: i32,
|
||||
pub audits_generated: i32,
|
||||
pub metrics_generated: i32,
|
||||
}
|
||||
|
||||
/// The priority level of a scrape job.
|
||||
#[derive(sqlx::Type, Copy, Debug, Clone)]
|
||||
#[sqlx(type_name = "scrape_priority", rename_all = "PascalCase")]
|
||||
@@ -61,6 +191,21 @@ pub enum TargetType {
|
||||
SingleCrn,
|
||||
}
|
||||
|
||||
/// Computed status for a scrape job, derived from existing fields.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub enum ScrapeJobStatus {
|
||||
Processing,
|
||||
StaleLock,
|
||||
Exhausted,
|
||||
Scheduled,
|
||||
Pending,
|
||||
}
|
||||
|
||||
/// How long a lock can be held before it is considered stale (mirrors `scrape_jobs::LOCK_EXPIRY`).
|
||||
const LOCK_EXPIRY_SECS: i64 = 10 * 60;
|
||||
|
||||
/// Represents a queryable job from the database.
|
||||
#[allow(dead_code)]
|
||||
#[derive(sqlx::FromRow, Debug, Clone)]
|
||||
@@ -76,4 +221,54 @@ pub struct ScrapeJob {
|
||||
pub retry_count: i32,
|
||||
/// Maximum number of retry attempts allowed (non-negative, enforced by CHECK constraint)
|
||||
pub max_retries: i32,
|
||||
/// When the job last entered the "ready to pick up" state.
|
||||
/// Set to NOW() on creation; updated to NOW() on retry.
|
||||
pub queued_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl ScrapeJob {
|
||||
/// Compute the current status of this job from its fields.
|
||||
pub fn status(&self) -> ScrapeJobStatus {
|
||||
let now = Utc::now();
|
||||
match self.locked_at {
|
||||
Some(locked) if (now - locked).num_seconds() < LOCK_EXPIRY_SECS => {
|
||||
ScrapeJobStatus::Processing
|
||||
}
|
||||
Some(_) => ScrapeJobStatus::StaleLock,
|
||||
None if self.retry_count >= self.max_retries && self.max_retries > 0 => {
|
||||
ScrapeJobStatus::Exhausted
|
||||
}
|
||||
None if self.execute_at > now => ScrapeJobStatus::Scheduled,
|
||||
None => ScrapeJobStatus::Pending,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A user authenticated via Discord OAuth.
|
||||
#[derive(sqlx::FromRow, Debug, Clone, Serialize, Deserialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct User {
|
||||
#[serde(
|
||||
serialize_with = "serialize_i64_as_string",
|
||||
deserialize_with = "deserialize_i64_from_string"
|
||||
)]
|
||||
#[ts(type = "string")]
|
||||
pub discord_id: i64,
|
||||
pub discord_username: String,
|
||||
pub discord_avatar_hash: Option<String>,
|
||||
pub is_admin: bool,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// A server-side session for an authenticated user.
|
||||
#[allow(dead_code)] // Fields read via sqlx::FromRow; some only used in DB queries
|
||||
#[derive(sqlx::FromRow, Debug, Clone)]
|
||||
pub struct UserSession {
|
||||
pub id: String,
|
||||
pub user_id: i64,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub expires_at: DateTime<Utc>,
|
||||
pub last_active_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
@@ -0,0 +1,728 @@
|
||||
//! Name parsing, normalization, and matching utilities.
|
||||
//!
|
||||
//! Handles the mismatch between Banner's single `display_name` ("Last, First Middle")
|
||||
//! and RMP's separate `first_name`/`last_name` fields, plus data quality issues
|
||||
//! from both sources (HTML entities, accents, nicknames, suffixes, junk).
|
||||
|
||||
use sqlx::PgPool;
|
||||
use tracing::{info, warn};
|
||||
use unicode_normalization::UnicodeNormalization;
|
||||
|
||||
/// Known name suffixes to extract from the last-name portion.
|
||||
const SUFFIXES: &[&str] = &["iv", "iii", "ii", "jr", "sr"];
|
||||
|
||||
/// Parsed, cleaned name components.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct NameParts {
|
||||
/// Cleaned display-quality first name(s): "H. Paul", "María"
|
||||
pub first: String,
|
||||
/// Cleaned display-quality last name: "O'Brien", "LeBlanc"
|
||||
pub last: String,
|
||||
/// Middle name/initial if detected: "Manuel", "L."
|
||||
pub middle: Option<String>,
|
||||
/// Suffix if detected: "III", "Jr"
|
||||
pub suffix: Option<String>,
|
||||
/// Nicknames extracted from parentheses: ["Ken"], ["Qian"]
|
||||
pub nicknames: Vec<String>,
|
||||
}
|
||||
|
||||
/// Decode common HTML entities found in Banner data.
|
||||
///
|
||||
/// Handles both named entities (`&`, `ü`) and numeric references
|
||||
/// (`'`, `'`).
|
||||
pub(crate) fn decode_html_entities(s: &str) -> String {
|
||||
if !s.contains('&') {
|
||||
return s.to_string();
|
||||
}
|
||||
htmlize::unescape(s).to_string()
|
||||
}
|
||||
|
||||
/// Extract parenthesized nicknames from a name string.
|
||||
///
|
||||
/// `"William (Ken)"` → `("William", vec!["Ken"])`
|
||||
/// `"Guenevere (Qian)"` → `("Guenevere", vec!["Qian"])`
|
||||
/// `"John (jack) C."` → `("John C.", vec!["jack"])`
|
||||
fn extract_nicknames(s: &str) -> (String, Vec<String>) {
|
||||
let mut nicknames = Vec::new();
|
||||
let mut cleaned = String::with_capacity(s.len());
|
||||
let mut chars = s.chars().peekable();
|
||||
|
||||
while let Some(ch) = chars.next() {
|
||||
if ch == '(' {
|
||||
let mut nick = String::new();
|
||||
for inner in chars.by_ref() {
|
||||
if inner == ')' {
|
||||
break;
|
||||
}
|
||||
nick.push(inner);
|
||||
}
|
||||
let nick = nick.trim().to_string();
|
||||
if !nick.is_empty() {
|
||||
nicknames.push(nick);
|
||||
}
|
||||
} else if ch == '"' || ch == '\u{201C}' || ch == '\u{201D}' {
|
||||
// Extract quoted nicknames: Thomas "Butch" → nickname "Butch"
|
||||
let mut nick = String::new();
|
||||
for inner in chars.by_ref() {
|
||||
if inner == '"' || inner == '\u{201C}' || inner == '\u{201D}' {
|
||||
break;
|
||||
}
|
||||
nick.push(inner);
|
||||
}
|
||||
let nick = nick.trim().to_string();
|
||||
if !nick.is_empty() {
|
||||
nicknames.push(nick);
|
||||
}
|
||||
} else {
|
||||
cleaned.push(ch);
|
||||
}
|
||||
}
|
||||
|
||||
// Collapse multiple spaces left by extraction
|
||||
let cleaned = collapse_whitespace(&cleaned);
|
||||
(cleaned, nicknames)
|
||||
}
|
||||
|
||||
/// Extract a suffix (Jr, Sr, II, III, IV) from the last-name portion.
|
||||
///
|
||||
/// `"LeBlanc III"` → `("LeBlanc", Some("III"))`
|
||||
/// `"Smith Jr."` → `("Smith", Some("Jr."))`
|
||||
fn extract_suffix(last: &str) -> (String, Option<String>) {
|
||||
// Try to match the last token as a suffix
|
||||
let tokens: Vec<&str> = last.split_whitespace().collect();
|
||||
if tokens.len() < 2 {
|
||||
return (last.to_string(), None);
|
||||
}
|
||||
|
||||
let candidate = tokens.last().unwrap();
|
||||
let candidate_normalized = candidate.to_lowercase().trim_end_matches('.').to_string();
|
||||
|
||||
if SUFFIXES.contains(&candidate_normalized.as_str()) {
|
||||
let name_part = tokens[..tokens.len() - 1].join(" ");
|
||||
return (name_part, Some(candidate.to_string()));
|
||||
}
|
||||
|
||||
(last.to_string(), None)
|
||||
}
|
||||
|
||||
/// Strip junk commonly found in RMP name fields.
|
||||
///
|
||||
/// - Trailing commas: `"Cronenberger,"` → `"Cronenberger"`
|
||||
/// - Email addresses: `"Neel.Baumgardner@utsa.edu"` → `""` (returns empty)
|
||||
fn strip_junk(s: &str) -> String {
|
||||
let s = s.trim();
|
||||
|
||||
// If the string looks like an email, return empty
|
||||
if s.contains('@') && s.contains('.') && !s.contains(' ') {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
// Strip trailing commas
|
||||
s.trim_end_matches(',').trim().to_string()
|
||||
}
|
||||
|
||||
/// Collapse runs of whitespace into single spaces and trim.
|
||||
fn collapse_whitespace(s: &str) -> String {
|
||||
s.split_whitespace().collect::<Vec<_>>().join(" ")
|
||||
}
|
||||
|
||||
/// Parse a Banner `display_name` ("Last, First Middle") into structured parts.
|
||||
///
|
||||
/// Handles HTML entities, suffixes, and multi-token names.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use banner::data::names::parse_banner_name;
|
||||
///
|
||||
/// let parts = parse_banner_name("O'Brien, Erin").unwrap();
|
||||
/// assert_eq!(parts.first, "Erin");
|
||||
/// assert_eq!(parts.last, "O'Brien");
|
||||
/// ```
|
||||
pub fn parse_banner_name(display_name: &str) -> Option<NameParts> {
|
||||
// 1. Decode HTML entities
|
||||
let decoded = decode_html_entities(display_name);
|
||||
|
||||
// 2. Split on first comma
|
||||
let (last_part, first_part) = decoded.split_once(',')?;
|
||||
let last_part = last_part.trim();
|
||||
let first_part = first_part.trim();
|
||||
|
||||
if last_part.is_empty() || first_part.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// 3. Extract suffix from last name
|
||||
let (last_clean, suffix) = extract_suffix(last_part);
|
||||
|
||||
// 4. Parse first-name portion: first token(s) + optional middle
|
||||
// Banner format is "First Middle", so we keep all tokens as first_name
|
||||
// to support "H. Paul" style names
|
||||
let first_clean = collapse_whitespace(first_part);
|
||||
|
||||
Some(NameParts {
|
||||
first: first_clean,
|
||||
last: last_clean,
|
||||
middle: None, // Banner doesn't clearly delineate middle vs first
|
||||
suffix,
|
||||
nicknames: Vec::new(), // Banner doesn't include nicknames
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse RMP professor name fields into structured parts.
|
||||
///
|
||||
/// Handles junk data, nicknames in parentheses/quotes, and suffixes.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use banner::data::names::parse_rmp_name;
|
||||
///
|
||||
/// let parts = parse_rmp_name("William (Ken)", "Burchenal").unwrap();
|
||||
/// assert_eq!(parts.first, "William");
|
||||
/// assert_eq!(parts.nicknames, vec!["Ken"]);
|
||||
/// ```
|
||||
pub fn parse_rmp_name(first_name: &str, last_name: &str) -> Option<NameParts> {
|
||||
let first_cleaned = strip_junk(first_name);
|
||||
let last_cleaned = strip_junk(last_name);
|
||||
|
||||
if first_cleaned.is_empty() || last_cleaned.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Extract nicknames from parens/quotes in first name
|
||||
let (first_no_nicks, nicknames) = extract_nicknames(&first_cleaned);
|
||||
let first_final = collapse_whitespace(&first_no_nicks);
|
||||
|
||||
// Extract suffix from last name
|
||||
let (last_final, suffix) = extract_suffix(&last_cleaned);
|
||||
|
||||
if first_final.is_empty() || last_final.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(NameParts {
|
||||
first: first_final,
|
||||
last: last_final,
|
||||
middle: None,
|
||||
suffix,
|
||||
nicknames,
|
||||
})
|
||||
}
|
||||
|
||||
/// Normalize a name string for matching comparison.
|
||||
///
|
||||
/// Pipeline: lowercase → NFD decompose → strip combining marks →
|
||||
/// strip punctuation/hyphens → collapse whitespace → trim.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use banner::data::names::normalize_for_matching;
|
||||
///
|
||||
/// assert_eq!(normalize_for_matching("García"), "garcia");
|
||||
/// assert_eq!(normalize_for_matching("O'Brien"), "obrien");
|
||||
/// assert_eq!(normalize_for_matching("Aguirre-Mesa"), "aguirremesa");
|
||||
/// ```
|
||||
/// Normalize a name string for matching index keys.
|
||||
///
|
||||
/// Pipeline: lowercase → NFD decompose → strip combining marks →
|
||||
/// strip ALL punctuation, hyphens, and whitespace.
|
||||
///
|
||||
/// This produces a compact, space-free string so that "Aguirre Mesa" (Banner)
|
||||
/// and "Aguirre-Mesa" (RMP) both become "aguirremesa".
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use banner::data::names::normalize_for_matching;
|
||||
///
|
||||
/// assert_eq!(normalize_for_matching("García"), "garcia");
|
||||
/// assert_eq!(normalize_for_matching("O'Brien"), "obrien");
|
||||
/// assert_eq!(normalize_for_matching("Aguirre-Mesa"), "aguirremesa");
|
||||
/// assert_eq!(normalize_for_matching("Aguirre Mesa"), "aguirremesa");
|
||||
/// ```
|
||||
pub fn normalize_for_matching(s: &str) -> String {
|
||||
s.to_lowercase()
|
||||
.nfd()
|
||||
.filter(|c| {
|
||||
// Keep only non-combining alphabetic characters — strip everything else
|
||||
c.is_alphabetic() && !unicode_normalization::char::is_combining_mark(*c)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Generate all matching index keys for a parsed name.
|
||||
///
|
||||
/// For a name like "H. Paul" / "LeBlanc" with no nicknames, generates:
|
||||
/// - `("leblanc", "h paul")` — full normalized first
|
||||
/// - `("leblanc", "paul")` — individual token (if multi-token)
|
||||
/// - `("leblanc", "h")` — individual token (if multi-token)
|
||||
///
|
||||
/// For a name like "William" / "Burchenal" with nickname "Ken":
|
||||
/// - `("burchenal", "william")` — primary
|
||||
/// - `("burchenal", "ken")` — nickname variant
|
||||
pub fn matching_keys(parts: &NameParts) -> Vec<(String, String)> {
|
||||
let norm_last = normalize_for_matching(&parts.last);
|
||||
if norm_last.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let mut keys = Vec::new();
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
|
||||
// Primary key: full first name (all spaces stripped)
|
||||
let norm_first_full = normalize_for_matching(&parts.first);
|
||||
if !norm_first_full.is_empty() && seen.insert(norm_first_full.clone()) {
|
||||
keys.push((norm_last.clone(), norm_first_full));
|
||||
}
|
||||
|
||||
// Individual tokens from the display-form first name
|
||||
// (split before full normalization so we can generate per-token keys)
|
||||
let first_tokens: Vec<&str> = parts.first.split_whitespace().collect();
|
||||
if first_tokens.len() > 1 {
|
||||
for token in &first_tokens {
|
||||
let norm_token = normalize_for_matching(token);
|
||||
if !norm_token.is_empty() && seen.insert(norm_token.clone()) {
|
||||
keys.push((norm_last.clone(), norm_token));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Nickname variants
|
||||
for nick in &parts.nicknames {
|
||||
let norm_nick = normalize_for_matching(nick);
|
||||
if !norm_nick.is_empty() && seen.insert(norm_nick.clone()) {
|
||||
keys.push((norm_last.clone(), norm_nick));
|
||||
}
|
||||
}
|
||||
|
||||
keys
|
||||
}
|
||||
|
||||
/// Backfill `first_name`/`last_name` columns for all instructors that have
|
||||
/// a `display_name` but NULL structured name fields.
|
||||
///
|
||||
/// Parses each `display_name` using [`parse_banner_name`] and updates the row.
|
||||
/// Logs warnings for any names that fail to parse.
|
||||
pub async fn backfill_instructor_names(db_pool: &PgPool) -> crate::error::Result<()> {
|
||||
let rows: Vec<(i32, String)> = sqlx::query_as(
|
||||
"SELECT id, display_name FROM instructors WHERE first_name IS NULL OR last_name IS NULL",
|
||||
)
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
|
||||
if rows.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let total = rows.len();
|
||||
let mut ids: Vec<i32> = Vec::with_capacity(total);
|
||||
let mut firsts: Vec<String> = Vec::with_capacity(total);
|
||||
let mut lasts: Vec<String> = Vec::with_capacity(total);
|
||||
let mut unparseable = 0usize;
|
||||
|
||||
for (id, display_name) in &rows {
|
||||
match parse_banner_name(display_name) {
|
||||
Some(parts) => {
|
||||
ids.push(*id);
|
||||
firsts.push(parts.first);
|
||||
lasts.push(parts.last);
|
||||
}
|
||||
None => {
|
||||
warn!(
|
||||
id,
|
||||
display_name, "Failed to parse instructor display_name during backfill"
|
||||
);
|
||||
unparseable += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !ids.is_empty() {
|
||||
let first_refs: Vec<&str> = firsts.iter().map(|s| s.as_str()).collect();
|
||||
let last_refs: Vec<&str> = lasts.iter().map(|s| s.as_str()).collect();
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
UPDATE instructors i
|
||||
SET first_name = v.first_name, last_name = v.last_name
|
||||
FROM UNNEST($1::int4[], $2::text[], $3::text[])
|
||||
AS v(id, first_name, last_name)
|
||||
WHERE i.id = v.id
|
||||
"#,
|
||||
)
|
||||
.bind(&ids)
|
||||
.bind(&first_refs)
|
||||
.bind(&last_refs)
|
||||
.execute(db_pool)
|
||||
.await?;
|
||||
}
|
||||
|
||||
info!(
|
||||
total,
|
||||
updated = ids.len(),
|
||||
unparseable,
|
||||
"Instructor name backfill complete"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// HTML entity decoding
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn decode_apostrophe_entity() {
|
||||
assert_eq!(decode_html_entities("O'Brien"), "O'Brien");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_umlaut_entity() {
|
||||
assert_eq!(decode_html_entities("Bülent"), "Bülent");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_no_entities() {
|
||||
assert_eq!(decode_html_entities("Smith"), "Smith");
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Nickname extraction
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn extract_paren_nickname() {
|
||||
let (cleaned, nicks) = extract_nicknames("William (Ken)");
|
||||
assert_eq!(cleaned, "William");
|
||||
assert_eq!(nicks, vec!["Ken"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_quoted_nickname() {
|
||||
let (cleaned, nicks) = extract_nicknames("Thomas \"Butch\"");
|
||||
assert_eq!(cleaned, "Thomas");
|
||||
assert_eq!(nicks, vec!["Butch"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_paren_with_extra_text() {
|
||||
let (cleaned, nicks) = extract_nicknames("John (jack) C.");
|
||||
assert_eq!(cleaned, "John C.");
|
||||
assert_eq!(nicks, vec!["jack"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_no_nicknames() {
|
||||
let (cleaned, nicks) = extract_nicknames("Maria Elena");
|
||||
assert_eq!(cleaned, "Maria Elena");
|
||||
assert!(nicks.is_empty());
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Suffix extraction
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn extract_suffix_iii() {
|
||||
let (name, suffix) = extract_suffix("LeBlanc III");
|
||||
assert_eq!(name, "LeBlanc");
|
||||
assert_eq!(suffix, Some("III".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_suffix_jr_period() {
|
||||
let (name, suffix) = extract_suffix("Smith Jr.");
|
||||
assert_eq!(name, "Smith");
|
||||
assert_eq!(suffix, Some("Jr.".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_no_suffix() {
|
||||
let (name, suffix) = extract_suffix("García");
|
||||
assert_eq!(name, "García");
|
||||
assert_eq!(suffix, None);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Junk stripping
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn strip_trailing_comma() {
|
||||
assert_eq!(strip_junk("Cronenberger,"), "Cronenberger");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_email_address() {
|
||||
assert_eq!(strip_junk("Neel.Baumgardner@utsa.edu"), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_clean_name() {
|
||||
assert_eq!(strip_junk(" Maria "), "Maria");
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// normalize_for_matching
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn normalize_strips_accents() {
|
||||
assert_eq!(normalize_for_matching("García"), "garcia");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_strips_apostrophe() {
|
||||
assert_eq!(normalize_for_matching("O'Brien"), "obrien");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_strips_hyphen() {
|
||||
assert_eq!(normalize_for_matching("Aguirre-Mesa"), "aguirremesa");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_tilde_n() {
|
||||
assert_eq!(normalize_for_matching("Muñoz"), "munoz");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_umlaut() {
|
||||
assert_eq!(normalize_for_matching("Müller"), "muller");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_period() {
|
||||
assert_eq!(normalize_for_matching("H. Paul"), "hpaul");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_strips_spaces() {
|
||||
assert_eq!(normalize_for_matching("Mary Lou"), "marylou");
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// parse_banner_name
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn banner_standard_name() {
|
||||
let p = parse_banner_name("Smith, John").unwrap();
|
||||
assert_eq!(p.first, "John");
|
||||
assert_eq!(p.last, "Smith");
|
||||
assert_eq!(p.suffix, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn banner_html_entity_apostrophe() {
|
||||
let p = parse_banner_name("O'Brien, Erin").unwrap();
|
||||
assert_eq!(p.first, "Erin");
|
||||
assert_eq!(p.last, "O'Brien");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn banner_html_entity_umlaut() {
|
||||
let p = parse_banner_name("Temel, Bülent").unwrap();
|
||||
assert_eq!(p.first, "Bülent");
|
||||
assert_eq!(p.last, "Temel");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn banner_suffix_iii() {
|
||||
let p = parse_banner_name("LeBlanc III, H. Paul").unwrap();
|
||||
assert_eq!(p.first, "H. Paul");
|
||||
assert_eq!(p.last, "LeBlanc");
|
||||
assert_eq!(p.suffix, Some("III".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn banner_suffix_ii() {
|
||||
let p = parse_banner_name("Ellis II, Ronald").unwrap();
|
||||
assert_eq!(p.first, "Ronald");
|
||||
assert_eq!(p.last, "Ellis");
|
||||
assert_eq!(p.suffix, Some("II".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn banner_multi_word_last() {
|
||||
let p = parse_banner_name("Aguirre Mesa, Andres").unwrap();
|
||||
assert_eq!(p.first, "Andres");
|
||||
assert_eq!(p.last, "Aguirre Mesa");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn banner_hyphenated_last() {
|
||||
let p = parse_banner_name("Abu-Lail, Nehal").unwrap();
|
||||
assert_eq!(p.first, "Nehal");
|
||||
assert_eq!(p.last, "Abu-Lail");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn banner_with_middle_name() {
|
||||
let p = parse_banner_name("Smith, John David").unwrap();
|
||||
assert_eq!(p.first, "John David");
|
||||
assert_eq!(p.last, "Smith");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn banner_no_comma() {
|
||||
assert!(parse_banner_name("SingleName").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn banner_empty_first() {
|
||||
assert!(parse_banner_name("Smith,").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn banner_empty_last() {
|
||||
assert!(parse_banner_name(", John").is_none());
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// parse_rmp_name
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn rmp_standard_name() {
|
||||
let p = parse_rmp_name("John", "Smith").unwrap();
|
||||
assert_eq!(p.first, "John");
|
||||
assert_eq!(p.last, "Smith");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rmp_with_nickname() {
|
||||
let p = parse_rmp_name("William (Ken)", "Burchenal").unwrap();
|
||||
assert_eq!(p.first, "William");
|
||||
assert_eq!(p.nicknames, vec!["Ken"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rmp_trailing_comma_last() {
|
||||
let p = parse_rmp_name("J.", "Cronenberger,").unwrap();
|
||||
assert_eq!(p.last, "Cronenberger");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rmp_email_in_first() {
|
||||
assert!(parse_rmp_name("Neel.Baumgardner@utsa.edu", "Baumgardner").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rmp_suffix_in_last() {
|
||||
let p = parse_rmp_name("H. Paul", "LeBlanc III").unwrap();
|
||||
assert_eq!(p.first, "H. Paul");
|
||||
assert_eq!(p.last, "LeBlanc");
|
||||
assert_eq!(p.suffix, Some("III".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rmp_quoted_nickname() {
|
||||
let p = parse_rmp_name("Thomas \"Butch\"", "Matjeka").unwrap();
|
||||
assert_eq!(p.first, "Thomas");
|
||||
assert_eq!(p.nicknames, vec!["Butch"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rmp_accented_last() {
|
||||
let p = parse_rmp_name("Liliana", "Saldaña").unwrap();
|
||||
assert_eq!(p.last, "Saldaña");
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// matching_keys
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn keys_simple_name() {
|
||||
let parts = NameParts {
|
||||
first: "John".into(),
|
||||
last: "Smith".into(),
|
||||
middle: None,
|
||||
suffix: None,
|
||||
nicknames: vec![],
|
||||
};
|
||||
let keys = matching_keys(&parts);
|
||||
assert_eq!(keys, vec![("smith".into(), "john".into())]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keys_multi_token_first() {
|
||||
let parts = NameParts {
|
||||
first: "H. Paul".into(),
|
||||
last: "LeBlanc".into(),
|
||||
middle: None,
|
||||
suffix: Some("III".into()),
|
||||
nicknames: vec![],
|
||||
};
|
||||
let keys = matching_keys(&parts);
|
||||
assert!(keys.contains(&("leblanc".into(), "hpaul".into())));
|
||||
assert!(keys.contains(&("leblanc".into(), "paul".into())));
|
||||
assert!(keys.contains(&("leblanc".into(), "h".into())));
|
||||
assert_eq!(keys.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keys_with_nickname() {
|
||||
let parts = NameParts {
|
||||
first: "William".into(),
|
||||
last: "Burchenal".into(),
|
||||
middle: None,
|
||||
suffix: None,
|
||||
nicknames: vec!["Ken".into()],
|
||||
};
|
||||
let keys = matching_keys(&parts);
|
||||
assert!(keys.contains(&("burchenal".into(), "william".into())));
|
||||
assert!(keys.contains(&("burchenal".into(), "ken".into())));
|
||||
assert_eq!(keys.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keys_hyphenated_last() {
|
||||
let parts = parse_banner_name("Aguirre-Mesa, Andres").unwrap();
|
||||
let keys = matching_keys(&parts);
|
||||
// Hyphen removed: "aguirremesa"
|
||||
assert!(keys.contains(&("aguirremesa".into(), "andres".into())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keys_accented_name() {
|
||||
let parts = parse_rmp_name("Liliana", "Saldaña").unwrap();
|
||||
let keys = matching_keys(&parts);
|
||||
assert!(keys.contains(&("saldana".into(), "liliana".into())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keys_cross_source_match() {
|
||||
// Banner: "Aguirre Mesa, Andres" → last="Aguirre Mesa"
|
||||
let banner = parse_banner_name("Aguirre Mesa, Andres").unwrap();
|
||||
let banner_keys = matching_keys(&banner);
|
||||
|
||||
// RMP: "Andres" / "Aguirre-Mesa" → last="Aguirre-Mesa"
|
||||
let rmp = parse_rmp_name("Andres", "Aguirre-Mesa").unwrap();
|
||||
let rmp_keys = matching_keys(&rmp);
|
||||
|
||||
// Both should normalize to ("aguirremesa", "andres")
|
||||
assert!(banner_keys.iter().any(|k| rmp_keys.contains(k)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keys_accent_cross_match() {
|
||||
// Banner: "García, José" (if Banner ever has accents)
|
||||
let banner = parse_banner_name("Garcia, Jose").unwrap();
|
||||
let banner_keys = matching_keys(&banner);
|
||||
|
||||
// RMP: "José" / "García"
|
||||
let rmp = parse_rmp_name("José", "García").unwrap();
|
||||
let rmp_keys = matching_keys(&rmp);
|
||||
|
||||
// Both normalize to ("garcia", "jose")
|
||||
assert!(banner_keys.iter().any(|k| rmp_keys.contains(k)));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
//! Database operations for the `reference_data` table (code→description lookups).
|
||||
|
||||
use crate::data::models::ReferenceData;
|
||||
use crate::error::Result;
|
||||
use html_escape::decode_html_entities;
|
||||
use sqlx::PgPool;
|
||||
|
||||
/// Batch upsert reference data entries.
|
||||
pub async fn batch_upsert(entries: &[ReferenceData], db_pool: &PgPool) -> Result<()> {
|
||||
if entries.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let categories: Vec<&str> = entries.iter().map(|e| e.category.as_str()).collect();
|
||||
let codes: Vec<&str> = entries.iter().map(|e| e.code.as_str()).collect();
|
||||
let descriptions: Vec<String> = entries
|
||||
.iter()
|
||||
.map(|e| decode_html_entities(&e.description).into_owned())
|
||||
.collect();
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO reference_data (category, code, description)
|
||||
SELECT * FROM UNNEST($1::text[], $2::text[], $3::text[])
|
||||
ON CONFLICT (category, code)
|
||||
DO UPDATE SET description = EXCLUDED.description
|
||||
"#,
|
||||
)
|
||||
.bind(&categories)
|
||||
.bind(&codes)
|
||||
.bind(&descriptions)
|
||||
.execute(db_pool)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get all reference data entries for a category.
|
||||
pub async fn get_by_category(category: &str, db_pool: &PgPool) -> Result<Vec<ReferenceData>> {
|
||||
let rows = sqlx::query_as::<_, ReferenceData>(
|
||||
"SELECT category, code, description FROM reference_data WHERE category = $1 ORDER BY description",
|
||||
)
|
||||
.bind(category)
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
Ok(rows)
|
||||
}
|
||||
|
||||
/// Get all reference data entries (for cache initialization).
|
||||
pub async fn get_all(db_pool: &PgPool) -> Result<Vec<ReferenceData>> {
|
||||
let rows = sqlx::query_as::<_, ReferenceData>(
|
||||
"SELECT category, code, description FROM reference_data ORDER BY category, description",
|
||||
)
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
Ok(rows)
|
||||
}
|
||||
+192
@@ -0,0 +1,192 @@
|
||||
//! Database operations for RateMyProfessors data.
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::rmp::RmpProfessor;
|
||||
use sqlx::PgPool;
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// Bulk upsert RMP professors using the UNNEST pattern.
|
||||
///
|
||||
/// Deduplicates by `legacy_id` before inserting — the RMP API can return
|
||||
/// the same professor on multiple pages.
|
||||
pub async fn batch_upsert_rmp_professors(
|
||||
professors: &[RmpProfessor],
|
||||
db_pool: &PgPool,
|
||||
) -> Result<()> {
|
||||
if professors.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Deduplicate: keep last occurrence per legacy_id (latest page wins)
|
||||
let mut seen = HashSet::new();
|
||||
let deduped: Vec<&RmpProfessor> = professors
|
||||
.iter()
|
||||
.rev()
|
||||
.filter(|p| seen.insert(p.legacy_id))
|
||||
.collect();
|
||||
|
||||
let legacy_ids: Vec<i32> = deduped.iter().map(|p| p.legacy_id).collect();
|
||||
let graphql_ids: Vec<&str> = deduped.iter().map(|p| p.graphql_id.as_str()).collect();
|
||||
let first_names: Vec<String> = deduped
|
||||
.iter()
|
||||
.map(|p| p.first_name.trim().to_string())
|
||||
.collect();
|
||||
let first_name_refs: Vec<&str> = first_names.iter().map(|s| s.as_str()).collect();
|
||||
let last_names: Vec<String> = deduped
|
||||
.iter()
|
||||
.map(|p| p.last_name.trim().to_string())
|
||||
.collect();
|
||||
let last_name_refs: Vec<&str> = last_names.iter().map(|s| s.as_str()).collect();
|
||||
let departments: Vec<Option<&str>> = deduped.iter().map(|p| p.department.as_deref()).collect();
|
||||
let avg_ratings: Vec<Option<f32>> = deduped.iter().map(|p| p.avg_rating).collect();
|
||||
let avg_difficulties: Vec<Option<f32>> = deduped.iter().map(|p| p.avg_difficulty).collect();
|
||||
let num_ratings: Vec<i32> = deduped.iter().map(|p| p.num_ratings).collect();
|
||||
let would_take_again_pcts: Vec<Option<f32>> =
|
||||
deduped.iter().map(|p| p.would_take_again_pct).collect();
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO rmp_professors (
|
||||
legacy_id, graphql_id, first_name, last_name, department,
|
||||
avg_rating, avg_difficulty, num_ratings, would_take_again_pct,
|
||||
last_synced_at
|
||||
)
|
||||
SELECT
|
||||
v.legacy_id, v.graphql_id, v.first_name, v.last_name, v.department,
|
||||
v.avg_rating, v.avg_difficulty, v.num_ratings, v.would_take_again_pct,
|
||||
NOW()
|
||||
FROM UNNEST(
|
||||
$1::int4[], $2::text[], $3::text[], $4::text[], $5::text[],
|
||||
$6::real[], $7::real[], $8::int4[], $9::real[]
|
||||
) AS v(
|
||||
legacy_id, graphql_id, first_name, last_name, department,
|
||||
avg_rating, avg_difficulty, num_ratings, would_take_again_pct
|
||||
)
|
||||
ON CONFLICT (legacy_id)
|
||||
DO UPDATE SET
|
||||
graphql_id = EXCLUDED.graphql_id,
|
||||
first_name = EXCLUDED.first_name,
|
||||
last_name = EXCLUDED.last_name,
|
||||
department = EXCLUDED.department,
|
||||
avg_rating = EXCLUDED.avg_rating,
|
||||
avg_difficulty = EXCLUDED.avg_difficulty,
|
||||
num_ratings = EXCLUDED.num_ratings,
|
||||
would_take_again_pct = EXCLUDED.would_take_again_pct,
|
||||
last_synced_at = EXCLUDED.last_synced_at
|
||||
"#,
|
||||
)
|
||||
.bind(&legacy_ids)
|
||||
.bind(&graphql_ids)
|
||||
.bind(&first_name_refs)
|
||||
.bind(&last_name_refs)
|
||||
.bind(&departments)
|
||||
.bind(&avg_ratings)
|
||||
.bind(&avg_difficulties)
|
||||
.bind(&num_ratings)
|
||||
.bind(&would_take_again_pcts)
|
||||
.execute(db_pool)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("Failed to batch upsert RMP professors: {}", e))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Retrieve RMP rating data for an instructor by instructor id.
|
||||
///
|
||||
/// Returns `(avg_rating, num_ratings)` for the best linked RMP profile
|
||||
/// (most ratings). Returns `None` if no link exists.
|
||||
#[allow(dead_code)]
|
||||
pub async fn get_instructor_rmp_data(
|
||||
db_pool: &PgPool,
|
||||
instructor_id: i32,
|
||||
) -> Result<Option<(f32, i32)>> {
|
||||
let row: Option<(f32, i32)> = sqlx::query_as(
|
||||
r#"
|
||||
SELECT rp.avg_rating, rp.num_ratings
|
||||
FROM instructor_rmp_links irl
|
||||
JOIN rmp_professors rp ON rp.legacy_id = irl.rmp_legacy_id
|
||||
WHERE irl.instructor_id = $1
|
||||
AND rp.avg_rating IS NOT NULL
|
||||
ORDER BY rp.num_ratings DESC NULLS LAST
|
||||
LIMIT 1
|
||||
"#,
|
||||
)
|
||||
.bind(instructor_id)
|
||||
.fetch_optional(db_pool)
|
||||
.await?;
|
||||
Ok(row)
|
||||
}
|
||||
|
||||
/// Unmatch an instructor from an RMP profile.
|
||||
///
|
||||
/// Removes the link from `instructor_rmp_links` and updates the instructor's
|
||||
/// `rmp_match_status` to 'unmatched' if no links remain.
|
||||
///
|
||||
/// If `rmp_legacy_id` is `Some`, removes only that specific link.
|
||||
/// If `None`, removes all links for the instructor.
|
||||
pub async fn unmatch_instructor(
|
||||
db_pool: &PgPool,
|
||||
instructor_id: i32,
|
||||
rmp_legacy_id: Option<i32>,
|
||||
) -> Result<()> {
|
||||
let mut tx = db_pool.begin().await?;
|
||||
|
||||
// Delete specific link or all links
|
||||
if let Some(legacy_id) = rmp_legacy_id {
|
||||
sqlx::query(
|
||||
"DELETE FROM instructor_rmp_links WHERE instructor_id = $1 AND rmp_legacy_id = $2",
|
||||
)
|
||||
.bind(instructor_id)
|
||||
.bind(legacy_id)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
} else {
|
||||
sqlx::query("DELETE FROM instructor_rmp_links WHERE instructor_id = $1")
|
||||
.bind(instructor_id)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// Check if any links remain
|
||||
let (remaining,): (i64,) =
|
||||
sqlx::query_as("SELECT COUNT(*) FROM instructor_rmp_links WHERE instructor_id = $1")
|
||||
.bind(instructor_id)
|
||||
.fetch_one(&mut *tx)
|
||||
.await?;
|
||||
|
||||
// Update instructor status if no links remain
|
||||
if remaining == 0 {
|
||||
sqlx::query("UPDATE instructors SET rmp_match_status = 'unmatched' WHERE id = $1")
|
||||
.bind(instructor_id)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// Reset accepted candidates back to pending when unmatching
|
||||
// This allows the candidates to be re-matched later
|
||||
if let Some(legacy_id) = rmp_legacy_id {
|
||||
// Reset only the specific candidate
|
||||
sqlx::query(
|
||||
"UPDATE rmp_match_candidates
|
||||
SET status = 'pending', resolved_at = NULL, resolved_by = NULL
|
||||
WHERE instructor_id = $1 AND rmp_legacy_id = $2 AND status = 'accepted'",
|
||||
)
|
||||
.bind(instructor_id)
|
||||
.bind(legacy_id)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
} else {
|
||||
// Reset all accepted candidates for this instructor
|
||||
sqlx::query(
|
||||
"UPDATE rmp_match_candidates
|
||||
SET status = 'pending', resolved_at = NULL, resolved_by = NULL
|
||||
WHERE instructor_id = $1 AND status = 'accepted'",
|
||||
)
|
||||
.bind(instructor_id)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
}
|
||||
|
||||
tx.commit().await?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -0,0 +1,695 @@
|
||||
//! Confidence scoring and candidate generation for RMP instructor matching.
|
||||
|
||||
use crate::data::names::{matching_keys, parse_banner_name, parse_rmp_name};
|
||||
use crate::error::Result;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::PgPool;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use tracing::{debug, info};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Scoring types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Breakdown of individual scoring signals.
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct ScoreBreakdown {
|
||||
pub name: f32,
|
||||
pub department: f32,
|
||||
pub uniqueness: f32,
|
||||
pub volume: f32,
|
||||
}
|
||||
|
||||
/// Result of scoring a single instructor–RMP candidate pair.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MatchScore {
|
||||
pub score: f32,
|
||||
pub breakdown: ScoreBreakdown,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Thresholds
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Minimum composite score to store a candidate row.
|
||||
const MIN_CANDIDATE_THRESHOLD: f32 = 0.40;
|
||||
|
||||
/// Score at or above which a candidate is auto-accepted.
|
||||
const AUTO_ACCEPT_THRESHOLD: f32 = 0.85;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Weights (must sum to 1.0)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const WEIGHT_NAME: f32 = 0.50;
|
||||
const WEIGHT_DEPARTMENT: f32 = 0.25;
|
||||
const WEIGHT_UNIQUENESS: f32 = 0.15;
|
||||
const WEIGHT_VOLUME: f32 = 0.10;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pure scoring functions
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Check if an instructor's subjects overlap with an RMP department.
|
||||
///
|
||||
/// Returns `1.0` for a match, `0.2` for a mismatch, `0.5` when the RMP
|
||||
/// department is unknown.
|
||||
fn department_similarity(subjects: &[String], rmp_department: Option<&str>) -> f32 {
|
||||
let Some(dept) = rmp_department else {
|
||||
return 0.5;
|
||||
};
|
||||
let dept_lower = dept.to_lowercase();
|
||||
|
||||
// Quick check: does any subject appear directly in the department string
|
||||
// or vice-versa?
|
||||
for subj in subjects {
|
||||
let subj_lower = subj.to_lowercase();
|
||||
if dept_lower.contains(&subj_lower) || subj_lower.contains(&dept_lower) {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
// Handle common UTSA abbreviation mappings.
|
||||
if matches_known_abbreviation(&subj_lower, &dept_lower) {
|
||||
return 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
0.2
|
||||
}
|
||||
|
||||
/// Expand common subject abbreviations used at UTSA and check for overlap.
|
||||
fn matches_known_abbreviation(subject: &str, department: &str) -> bool {
|
||||
const MAPPINGS: &[(&str, &[&str])] = &[
|
||||
// Core subjects (original mappings, corrected)
|
||||
("cs", &["computer science"]),
|
||||
("ece", &["early childhood education", "early childhood"]),
|
||||
("ee", &["electrical engineering", "electrical"]),
|
||||
("me", &["mechanical engineering", "mechanical"]),
|
||||
("ce", &["civil engineering", "civil"]),
|
||||
("bio", &["biology", "biological"]),
|
||||
("chem", &["chemistry"]),
|
||||
("phys", &["physics"]),
|
||||
("math", &["mathematics"]),
|
||||
("sta", &["statistics"]),
|
||||
("eng", &["english"]),
|
||||
("his", &["history"]),
|
||||
("pol", &["political science"]),
|
||||
("psy", &["psychology"]),
|
||||
("soc", &["sociology"]),
|
||||
("mus", &["music"]),
|
||||
("art", &["art"]),
|
||||
("phi", &["philosophy"]),
|
||||
("eco", &["economics"]),
|
||||
("acc", &["accounting"]),
|
||||
("fin", &["finance"]),
|
||||
("mgt", &["management"]),
|
||||
("mkt", &["marketing"]),
|
||||
("is", &["information systems"]),
|
||||
("ms", &["management science"]),
|
||||
("kin", &["kinesiology"]),
|
||||
("com", &["communication"]),
|
||||
// Architecture & Design
|
||||
("arc", &["architecture"]),
|
||||
("ide", &["interior design", "design"]),
|
||||
// Anthropology & Ethnic Studies
|
||||
("ant", &["anthropology"]),
|
||||
("aas", &["african american studies", "ethnic studies"]),
|
||||
("mas", &["mexican american studies", "ethnic studies"]),
|
||||
("regs", &["ethnic studies", "gender"]),
|
||||
// Languages
|
||||
("lng", &["linguistics", "applied linguistics"]),
|
||||
("spn", &["spanish"]),
|
||||
("frn", &["french"]),
|
||||
("ger", &["german"]),
|
||||
("chn", &["chinese"]),
|
||||
("jpn", &["japanese"]),
|
||||
("kor", &["korean"]),
|
||||
("itl", &["italian"]),
|
||||
("rus", &["russian"]),
|
||||
("lat", &["latin"]),
|
||||
("grk", &["greek"]),
|
||||
("asl", &["american sign language", "sign language"]),
|
||||
(
|
||||
"fl",
|
||||
&["foreign languages", "languages", "modern languages"],
|
||||
),
|
||||
// Education
|
||||
("edu", &["education"]),
|
||||
("ci", &["curriculum", "education"]),
|
||||
("edl", &["educational leadership", "education"]),
|
||||
("edp", &["educational psychology", "education"]),
|
||||
("bbl", &["bilingual education"]),
|
||||
("spe", &["special education", "education"]),
|
||||
// Business
|
||||
("ent", &["entrepreneurship"]),
|
||||
("gba", &["general business", "business"]),
|
||||
("blw", &["business law", "law"]),
|
||||
("rfd", &["real estate"]),
|
||||
("mot", &["management of technology", "management"]),
|
||||
// Engineering
|
||||
("egr", &["engineering"]),
|
||||
("bme", &["biomedical engineering", "engineering"]),
|
||||
("cme", &["chemical engineering", "engineering"]),
|
||||
("cpe", &["computer engineering", "engineering"]),
|
||||
("ise", &["industrial", "systems engineering", "engineering"]),
|
||||
("mate", &["materials engineering", "engineering"]),
|
||||
// Sciences
|
||||
("che", &["chemistry"]),
|
||||
("bch", &["biochemistry", "chemistry"]),
|
||||
("geo", &["geology"]),
|
||||
("phy", &["physics"]),
|
||||
("ast", &["astronomy"]),
|
||||
("es", &["environmental science"]),
|
||||
// Social Sciences
|
||||
("crj", &["criminal justice"]),
|
||||
("swk", &["social work"]),
|
||||
("pad", &["public administration"]),
|
||||
("grg", &["geography"]),
|
||||
("ges", &["geography"]),
|
||||
// Humanities
|
||||
("cla", &["classics"]),
|
||||
("hum", &["humanities"]),
|
||||
("wgss", &["women's studies"]),
|
||||
// Health
|
||||
("hth", &["health"]),
|
||||
("hcp", &["health science", "health"]),
|
||||
("ntr", &["nutrition"]),
|
||||
// Military
|
||||
("msc", &["military science"]),
|
||||
("asc", &["aerospace"]),
|
||||
// Arts
|
||||
("dan", &["dance"]),
|
||||
("thr", &["theater"]),
|
||||
("ahc", &["art history"]),
|
||||
// Other
|
||||
("cou", &["counseling"]),
|
||||
("hon", &["honors"]),
|
||||
("csm", &["construction"]),
|
||||
("wrc", &["writing"]),
|
||||
("set", &["tourism management", "tourism"]),
|
||||
];
|
||||
|
||||
for &(abbr, expansions) in MAPPINGS {
|
||||
if subject == abbr {
|
||||
return expansions
|
||||
.iter()
|
||||
.any(|expansion| department.contains(expansion));
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Compute match confidence score (0.0–1.0) for an instructor–RMP pair.
|
||||
///
|
||||
/// The name signal is always 1.0 since candidates are only generated for
|
||||
/// exact normalized name matches. The effective score range is 0.50–1.0.
|
||||
pub fn compute_match_score(
|
||||
instructor_subjects: &[String],
|
||||
rmp_department: Option<&str>,
|
||||
candidate_count: usize,
|
||||
rmp_num_ratings: i32,
|
||||
) -> MatchScore {
|
||||
// --- Name (0.50) — always 1.0, candidates only exist for exact matches ---
|
||||
let name_score = 1.0;
|
||||
|
||||
// --- Department (0.25) ---
|
||||
let dept_score = department_similarity(instructor_subjects, rmp_department);
|
||||
|
||||
// --- Uniqueness (0.15) ---
|
||||
let uniqueness_score = match candidate_count {
|
||||
0 | 1 => 1.0,
|
||||
2 => 0.5,
|
||||
_ => 0.2,
|
||||
};
|
||||
|
||||
// --- Volume (0.10) ---
|
||||
let volume_score = ((rmp_num_ratings as f32).ln_1p() / 5.0_f32.ln_1p()).clamp(0.0, 1.0);
|
||||
|
||||
let composite = name_score * WEIGHT_NAME
|
||||
+ dept_score * WEIGHT_DEPARTMENT
|
||||
+ uniqueness_score * WEIGHT_UNIQUENESS
|
||||
+ volume_score * WEIGHT_VOLUME;
|
||||
|
||||
MatchScore {
|
||||
score: composite,
|
||||
breakdown: ScoreBreakdown {
|
||||
name: name_score,
|
||||
department: dept_score,
|
||||
uniqueness: uniqueness_score,
|
||||
volume: volume_score,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Candidate generation (DB)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Statistics returned from candidate generation.
|
||||
#[derive(Debug)]
|
||||
pub struct MatchingStats {
|
||||
pub total_unmatched: usize,
|
||||
pub candidates_created: usize,
|
||||
pub candidates_rescored: usize,
|
||||
pub auto_matched: usize,
|
||||
pub skipped_unparseable: usize,
|
||||
pub skipped_no_candidates: usize,
|
||||
}
|
||||
|
||||
/// Lightweight row for building the in-memory RMP name index.
|
||||
struct RmpProfForMatching {
|
||||
legacy_id: i32,
|
||||
department: Option<String>,
|
||||
num_ratings: i32,
|
||||
}
|
||||
|
||||
/// Generate match candidates for all unmatched instructors.
|
||||
///
|
||||
/// For each unmatched instructor:
|
||||
/// 1. Parse `display_name` into [`NameParts`] and generate matching keys.
|
||||
/// 2. Find RMP professors with matching normalized name keys.
|
||||
/// 3. Score each candidate.
|
||||
/// 4. Store candidates scoring above [`MIN_CANDIDATE_THRESHOLD`].
|
||||
/// 5. Auto-accept if the top candidate scores ≥ [`AUTO_ACCEPT_THRESHOLD`]
|
||||
/// and no existing rejected candidate exists for that pair.
|
||||
///
|
||||
/// Already-evaluated instructor–RMP pairs (any status) are skipped.
|
||||
pub async fn generate_candidates(db_pool: &PgPool) -> Result<MatchingStats> {
|
||||
// 1. Load unmatched instructors
|
||||
let instructors: Vec<(i32, String)> = sqlx::query_as(
|
||||
"SELECT id, display_name FROM instructors WHERE rmp_match_status = 'unmatched'",
|
||||
)
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
|
||||
if instructors.is_empty() {
|
||||
info!("No unmatched instructors to generate candidates for");
|
||||
return Ok(MatchingStats {
|
||||
total_unmatched: 0,
|
||||
candidates_created: 0,
|
||||
candidates_rescored: 0,
|
||||
auto_matched: 0,
|
||||
skipped_unparseable: 0,
|
||||
skipped_no_candidates: 0,
|
||||
});
|
||||
}
|
||||
|
||||
let instructor_ids: Vec<i32> = instructors.iter().map(|(id, _)| *id).collect();
|
||||
let total_unmatched = instructors.len();
|
||||
|
||||
// 2. Load instructor subjects
|
||||
let subject_rows: Vec<(i32, String)> = sqlx::query_as(
|
||||
r#"
|
||||
SELECT DISTINCT ci.instructor_id, c.subject
|
||||
FROM course_instructors ci
|
||||
JOIN courses c ON c.id = ci.course_id
|
||||
WHERE ci.instructor_id = ANY($1)
|
||||
"#,
|
||||
)
|
||||
.bind(&instructor_ids)
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
|
||||
let mut subject_map: HashMap<i32, Vec<String>> = HashMap::new();
|
||||
for (iid, subject) in subject_rows {
|
||||
subject_map.entry(iid).or_default().push(subject);
|
||||
}
|
||||
|
||||
// 3. Load all RMP professors and build multi-key name index
|
||||
let prof_rows: Vec<(i32, String, String, Option<String>, i32)> = sqlx::query_as(
|
||||
"SELECT legacy_id, first_name, last_name, department, num_ratings FROM rmp_professors",
|
||||
)
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
|
||||
// Build name index: (normalized_last, normalized_first) -> Vec<RmpProfForMatching>
|
||||
// Each professor may appear under multiple keys (nicknames, token variants).
|
||||
let mut name_index: HashMap<(String, String), Vec<RmpProfForMatching>> = HashMap::new();
|
||||
let mut rmp_parse_failures = 0usize;
|
||||
for (legacy_id, first_name, last_name, department, num_ratings) in &prof_rows {
|
||||
match parse_rmp_name(first_name, last_name) {
|
||||
Some(parts) => {
|
||||
let keys = matching_keys(&parts);
|
||||
for key in keys {
|
||||
name_index.entry(key).or_default().push(RmpProfForMatching {
|
||||
legacy_id: *legacy_id,
|
||||
department: department.clone(),
|
||||
num_ratings: *num_ratings,
|
||||
});
|
||||
}
|
||||
}
|
||||
None => {
|
||||
rmp_parse_failures += 1;
|
||||
debug!(
|
||||
legacy_id,
|
||||
first_name, last_name, "Unparseable RMP professor name, skipping"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if rmp_parse_failures > 0 {
|
||||
debug!(
|
||||
count = rmp_parse_failures,
|
||||
"RMP professors with unparseable names"
|
||||
);
|
||||
}
|
||||
|
||||
// 4. Load existing candidate pairs — only skip resolved (accepted/rejected) pairs.
|
||||
// Pending candidates are rescored so updated mappings take effect.
|
||||
let candidate_rows: Vec<(i32, i32, String)> =
|
||||
sqlx::query_as("SELECT instructor_id, rmp_legacy_id, status FROM rmp_match_candidates")
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
|
||||
let mut resolved_pairs: HashSet<(i32, i32)> = HashSet::new();
|
||||
let mut pending_pairs: HashSet<(i32, i32)> = HashSet::new();
|
||||
let mut rejected_pairs: HashSet<(i32, i32)> = HashSet::new();
|
||||
for (iid, lid, status) in candidate_rows {
|
||||
match status.as_str() {
|
||||
"accepted" | "rejected" => {
|
||||
resolved_pairs.insert((iid, lid));
|
||||
if status == "rejected" {
|
||||
rejected_pairs.insert((iid, lid));
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
pending_pairs.insert((iid, lid));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Score and collect candidates (new + rescored pending)
|
||||
let empty_subjects: Vec<String> = Vec::new();
|
||||
let mut new_candidates: Vec<(i32, i32, f32, serde_json::Value)> = Vec::new();
|
||||
let mut rescored_candidates: Vec<(i32, i32, f32, serde_json::Value)> = Vec::new();
|
||||
let mut auto_accept: Vec<(i32, i32)> = Vec::new(); // (instructor_id, legacy_id)
|
||||
let mut skipped_unparseable = 0usize;
|
||||
let mut skipped_no_candidates = 0usize;
|
||||
|
||||
for (instructor_id, display_name) in &instructors {
|
||||
let Some(instructor_parts) = parse_banner_name(display_name) else {
|
||||
skipped_unparseable += 1;
|
||||
debug!(
|
||||
instructor_id,
|
||||
display_name, "Unparseable display name, skipping"
|
||||
);
|
||||
continue;
|
||||
};
|
||||
|
||||
let subjects = subject_map.get(instructor_id).unwrap_or(&empty_subjects);
|
||||
|
||||
// Generate all matching keys for this instructor and collect candidate
|
||||
// RMP professors across all key variants (deduplicated by legacy_id).
|
||||
let instructor_keys = matching_keys(&instructor_parts);
|
||||
let mut seen_profs: HashSet<i32> = HashSet::new();
|
||||
let mut matched_profs: Vec<&RmpProfForMatching> = Vec::new();
|
||||
|
||||
for key in &instructor_keys {
|
||||
if let Some(profs) = name_index.get(key) {
|
||||
for prof in profs {
|
||||
if seen_profs.insert(prof.legacy_id) {
|
||||
matched_profs.push(prof);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if matched_profs.is_empty() {
|
||||
skipped_no_candidates += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let candidate_count = matched_profs.len();
|
||||
let mut best: Option<(f32, i32)> = None;
|
||||
|
||||
for prof in &matched_profs {
|
||||
let pair = (*instructor_id, prof.legacy_id);
|
||||
if resolved_pairs.contains(&pair) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let ms = compute_match_score(
|
||||
subjects,
|
||||
prof.department.as_deref(),
|
||||
candidate_count,
|
||||
prof.num_ratings,
|
||||
);
|
||||
|
||||
if ms.score < MIN_CANDIDATE_THRESHOLD {
|
||||
continue;
|
||||
}
|
||||
|
||||
let breakdown_json =
|
||||
serde_json::to_value(&ms.breakdown).unwrap_or_else(|_| serde_json::json!({}));
|
||||
|
||||
if pending_pairs.contains(&pair) {
|
||||
rescored_candidates.push((
|
||||
*instructor_id,
|
||||
prof.legacy_id,
|
||||
ms.score,
|
||||
breakdown_json,
|
||||
));
|
||||
} else {
|
||||
new_candidates.push((*instructor_id, prof.legacy_id, ms.score, breakdown_json));
|
||||
}
|
||||
|
||||
match best {
|
||||
Some((s, _)) if ms.score > s => best = Some((ms.score, prof.legacy_id)),
|
||||
None => best = Some((ms.score, prof.legacy_id)),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Auto-accept the top candidate if it meets the threshold and is not
|
||||
// previously rejected.
|
||||
if let Some((score, legacy_id)) = best
|
||||
&& score >= AUTO_ACCEPT_THRESHOLD
|
||||
&& !rejected_pairs.contains(&(*instructor_id, legacy_id))
|
||||
{
|
||||
auto_accept.push((*instructor_id, legacy_id));
|
||||
}
|
||||
}
|
||||
|
||||
// 6–7. Write candidates, rescore, and auto-accept within a single transaction
|
||||
let candidates_created = new_candidates.len();
|
||||
let candidates_rescored = rescored_candidates.len();
|
||||
let auto_matched = auto_accept.len();
|
||||
|
||||
let mut tx = db_pool.begin().await?;
|
||||
|
||||
// 6a. Batch-insert new candidates
|
||||
if !new_candidates.is_empty() {
|
||||
let c_instructor_ids: Vec<i32> = new_candidates.iter().map(|(iid, _, _, _)| *iid).collect();
|
||||
let c_legacy_ids: Vec<i32> = new_candidates.iter().map(|(_, lid, _, _)| *lid).collect();
|
||||
let c_scores: Vec<f32> = new_candidates.iter().map(|(_, _, s, _)| *s).collect();
|
||||
let c_breakdowns: Vec<serde_json::Value> =
|
||||
new_candidates.into_iter().map(|(_, _, _, b)| b).collect();
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO rmp_match_candidates (instructor_id, rmp_legacy_id, score, score_breakdown)
|
||||
SELECT v.instructor_id, v.rmp_legacy_id, v.score, v.score_breakdown
|
||||
FROM UNNEST($1::int4[], $2::int4[], $3::real[], $4::jsonb[])
|
||||
AS v(instructor_id, rmp_legacy_id, score, score_breakdown)
|
||||
ON CONFLICT (instructor_id, rmp_legacy_id) DO NOTHING
|
||||
"#,
|
||||
)
|
||||
.bind(&c_instructor_ids)
|
||||
.bind(&c_legacy_ids)
|
||||
.bind(&c_scores)
|
||||
.bind(&c_breakdowns)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// 6b. Batch-update rescored pending candidates
|
||||
if !rescored_candidates.is_empty() {
|
||||
let r_instructor_ids: Vec<i32> = rescored_candidates
|
||||
.iter()
|
||||
.map(|(iid, _, _, _)| *iid)
|
||||
.collect();
|
||||
let r_legacy_ids: Vec<i32> = rescored_candidates
|
||||
.iter()
|
||||
.map(|(_, lid, _, _)| *lid)
|
||||
.collect();
|
||||
let r_scores: Vec<f32> = rescored_candidates.iter().map(|(_, _, s, _)| *s).collect();
|
||||
let r_breakdowns: Vec<serde_json::Value> = rescored_candidates
|
||||
.into_iter()
|
||||
.map(|(_, _, _, b)| b)
|
||||
.collect();
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
UPDATE rmp_match_candidates mc
|
||||
SET score = v.score, score_breakdown = v.score_breakdown
|
||||
FROM UNNEST($1::int4[], $2::int4[], $3::real[], $4::jsonb[])
|
||||
AS v(instructor_id, rmp_legacy_id, score, score_breakdown)
|
||||
WHERE mc.instructor_id = v.instructor_id
|
||||
AND mc.rmp_legacy_id = v.rmp_legacy_id
|
||||
"#,
|
||||
)
|
||||
.bind(&r_instructor_ids)
|
||||
.bind(&r_legacy_ids)
|
||||
.bind(&r_scores)
|
||||
.bind(&r_breakdowns)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// 7. Auto-accept top candidates
|
||||
if !auto_accept.is_empty() {
|
||||
let aa_instructor_ids: Vec<i32> = auto_accept.iter().map(|(iid, _)| *iid).collect();
|
||||
let aa_legacy_ids: Vec<i32> = auto_accept.iter().map(|(_, lid)| *lid).collect();
|
||||
|
||||
// Mark the candidate row as accepted
|
||||
sqlx::query(
|
||||
r#"
|
||||
UPDATE rmp_match_candidates mc
|
||||
SET status = 'accepted', resolved_at = NOW()
|
||||
FROM UNNEST($1::int4[], $2::int4[]) AS v(instructor_id, rmp_legacy_id)
|
||||
WHERE mc.instructor_id = v.instructor_id
|
||||
AND mc.rmp_legacy_id = v.rmp_legacy_id
|
||||
"#,
|
||||
)
|
||||
.bind(&aa_instructor_ids)
|
||||
.bind(&aa_legacy_ids)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
|
||||
// Insert links into instructor_rmp_links
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO instructor_rmp_links (instructor_id, rmp_legacy_id, source)
|
||||
SELECT v.instructor_id, v.rmp_legacy_id, 'auto'
|
||||
FROM UNNEST($1::int4[], $2::int4[]) AS v(instructor_id, rmp_legacy_id)
|
||||
ON CONFLICT (rmp_legacy_id) DO NOTHING
|
||||
"#,
|
||||
)
|
||||
.bind(&aa_instructor_ids)
|
||||
.bind(&aa_legacy_ids)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
|
||||
// Update instructor match status
|
||||
sqlx::query(
|
||||
r#"
|
||||
UPDATE instructors i
|
||||
SET rmp_match_status = 'auto'
|
||||
FROM UNNEST($1::int4[]) AS v(instructor_id)
|
||||
WHERE i.id = v.instructor_id
|
||||
"#,
|
||||
)
|
||||
.bind(&aa_instructor_ids)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
}
|
||||
|
||||
tx.commit().await?;
|
||||
|
||||
let stats = MatchingStats {
|
||||
total_unmatched,
|
||||
candidates_created,
|
||||
candidates_rescored,
|
||||
auto_matched,
|
||||
skipped_unparseable,
|
||||
skipped_no_candidates,
|
||||
};
|
||||
|
||||
info!(
|
||||
total_unmatched = stats.total_unmatched,
|
||||
candidates_created = stats.candidates_created,
|
||||
candidates_rescored = stats.candidates_rescored,
|
||||
auto_matched = stats.auto_matched,
|
||||
skipped_unparseable = stats.skipped_unparseable,
|
||||
skipped_no_candidates = stats.skipped_no_candidates,
|
||||
"Candidate generation complete"
|
||||
);
|
||||
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_ideal_candidate_high_score() {
|
||||
let ms = compute_match_score(
|
||||
&["CS".to_string()],
|
||||
Some("Computer Science"),
|
||||
1, // unique candidate
|
||||
50, // decent ratings
|
||||
);
|
||||
// name 1.0*0.50 + dept 1.0*0.25 + unique 1.0*0.15 + volume ~0.97*0.10 ≈ 0.997
|
||||
assert!(ms.score >= 0.85, "Expected score >= 0.85, got {}", ms.score);
|
||||
assert_eq!(ms.breakdown.name, 1.0);
|
||||
assert_eq!(ms.breakdown.uniqueness, 1.0);
|
||||
assert_eq!(ms.breakdown.department, 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ambiguous_candidates_lower_score() {
|
||||
let unique = compute_match_score(&[], None, 1, 10);
|
||||
let ambiguous = compute_match_score(&[], None, 3, 10);
|
||||
assert!(
|
||||
unique.score > ambiguous.score,
|
||||
"Unique ({}) should outscore ambiguous ({})",
|
||||
unique.score,
|
||||
ambiguous.score
|
||||
);
|
||||
assert_eq!(unique.breakdown.uniqueness, 1.0);
|
||||
assert_eq!(ambiguous.breakdown.uniqueness, 0.2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_no_department_neutral() {
|
||||
let ms = compute_match_score(&["CS".to_string()], None, 1, 10);
|
||||
assert_eq!(ms.breakdown.department, 0.5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_department_match() {
|
||||
let ms = compute_match_score(&["CS".to_string()], Some("Computer Science"), 1, 10);
|
||||
assert_eq!(ms.breakdown.department, 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_department_mismatch() {
|
||||
let ms = compute_match_score(&["CS".to_string()], Some("History"), 1, 10);
|
||||
assert_eq!(ms.breakdown.department, 0.2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_department_match_outscores_mismatch() {
|
||||
let matched = compute_match_score(&["CS".to_string()], Some("Computer Science"), 1, 10);
|
||||
let mismatched = compute_match_score(&["CS".to_string()], Some("History"), 1, 10);
|
||||
assert!(
|
||||
matched.score > mismatched.score,
|
||||
"Department match ({}) should outscore mismatch ({})",
|
||||
matched.score,
|
||||
mismatched.score
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_volume_scaling() {
|
||||
let zero = compute_match_score(&[], None, 1, 0);
|
||||
let many = compute_match_score(&[], None, 1, 100);
|
||||
assert!(
|
||||
many.breakdown.volume > zero.breakdown.volume,
|
||||
"100 ratings ({}) should outscore 0 ratings ({})",
|
||||
many.breakdown.volume,
|
||||
zero.breakdown.volume
|
||||
);
|
||||
assert_eq!(zero.breakdown.volume, 0.0);
|
||||
assert!(
|
||||
many.breakdown.volume > 0.9,
|
||||
"100 ratings should be near max"
|
||||
);
|
||||
}
|
||||
}
|
||||
+186
-35
@@ -1,15 +1,40 @@
|
||||
//! Database operations for scrape job queue management.
|
||||
|
||||
use crate::data::models::{ScrapeJob, ScrapePriority, TargetType};
|
||||
use crate::data::models::{ScrapeJob, ScrapePriority, TargetType, UpsertCounts};
|
||||
use crate::error::Result;
|
||||
use chrono::{DateTime, Utc};
|
||||
use sqlx::PgPool;
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// Force-unlock all jobs that have a non-NULL `locked_at`.
|
||||
///
|
||||
/// Intended to be called once at startup to recover jobs left locked by
|
||||
/// a previous unclean shutdown (crash, OOM kill, etc.).
|
||||
///
|
||||
/// # Returns
|
||||
/// The number of jobs that were unlocked.
|
||||
pub async fn force_unlock_all(db_pool: &PgPool) -> Result<u64> {
|
||||
let result = sqlx::query(
|
||||
"UPDATE scrape_jobs SET locked_at = NULL, queued_at = NOW() WHERE locked_at IS NOT NULL",
|
||||
)
|
||||
.execute(db_pool)
|
||||
.await?;
|
||||
Ok(result.rows_affected())
|
||||
}
|
||||
|
||||
/// How long a lock can be held before it is considered expired and reclaimable.
|
||||
///
|
||||
/// This acts as a safety net for cases where a worker dies without unlocking
|
||||
/// (OOM kill, crash, network partition). Under normal operation, the worker's
|
||||
/// own job timeout fires well before this threshold.
|
||||
const LOCK_EXPIRY: std::time::Duration = std::time::Duration::from_secs(10 * 60);
|
||||
|
||||
/// Atomically fetch and lock the next available scrape job.
|
||||
///
|
||||
/// Uses `FOR UPDATE SKIP LOCKED` to allow multiple workers to poll the queue
|
||||
/// concurrently without conflicts. Only jobs that are unlocked and ready to
|
||||
/// execute (based on `execute_at`) are considered.
|
||||
/// concurrently without conflicts. Considers jobs that are:
|
||||
/// - Unlocked and ready to execute, OR
|
||||
/// - Locked but past [`LOCK_EXPIRY`] (abandoned by a dead worker)
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `db_pool` - PostgreSQL connection pool
|
||||
@@ -20,9 +45,16 @@ use std::collections::HashSet;
|
||||
pub async fn fetch_and_lock_job(db_pool: &PgPool) -> Result<Option<ScrapeJob>> {
|
||||
let mut tx = db_pool.begin().await?;
|
||||
|
||||
let lock_expiry_secs = LOCK_EXPIRY.as_secs() as i32;
|
||||
let job = sqlx::query_as::<_, ScrapeJob>(
|
||||
"SELECT * FROM scrape_jobs WHERE locked_at IS NULL AND execute_at <= NOW() ORDER BY priority DESC, execute_at ASC LIMIT 1 FOR UPDATE SKIP LOCKED"
|
||||
"SELECT * FROM scrape_jobs \
|
||||
WHERE (locked_at IS NULL OR locked_at < NOW() - make_interval(secs => $1::double precision)) \
|
||||
AND execute_at <= NOW() \
|
||||
ORDER BY priority DESC, execute_at ASC \
|
||||
LIMIT 1 \
|
||||
FOR UPDATE SKIP LOCKED"
|
||||
)
|
||||
.bind(lock_expiry_secs)
|
||||
.fetch_optional(&mut *tx)
|
||||
.await?;
|
||||
|
||||
@@ -68,10 +100,11 @@ pub async fn unlock_job(job_id: i32, db_pool: &PgPool) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Atomically unlock a job and increment its retry count.
|
||||
/// Atomically unlock a job, increment its retry count, and reset `queued_at`.
|
||||
///
|
||||
/// Returns whether the job still has retries remaining. This is determined
|
||||
/// atomically in the database to avoid race conditions between workers.
|
||||
/// Returns the new `queued_at` timestamp if retries remain, or `None` if
|
||||
/// the job has exhausted its retries. This is determined atomically in the
|
||||
/// database to avoid race conditions between workers.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `job_id` - The database ID of the job
|
||||
@@ -79,31 +112,31 @@ pub async fn unlock_job(job_id: i32, db_pool: &PgPool) -> Result<()> {
|
||||
/// * `db_pool` - PostgreSQL connection pool
|
||||
///
|
||||
/// # Returns
|
||||
/// * `Ok(true)` if the job was unlocked and retries remain
|
||||
/// * `Ok(false)` if the job has exhausted its retries
|
||||
/// * `Ok(Some(queued_at))` if the job was unlocked and retries remain
|
||||
/// * `Ok(None)` if the job has exhausted its retries
|
||||
pub async fn unlock_and_increment_retry(
|
||||
job_id: i32,
|
||||
max_retries: i32,
|
||||
db_pool: &PgPool,
|
||||
) -> Result<bool> {
|
||||
let result = sqlx::query_scalar::<_, Option<i32>>(
|
||||
) -> Result<Option<chrono::DateTime<chrono::Utc>>> {
|
||||
let result = sqlx::query_scalar::<_, Option<chrono::DateTime<chrono::Utc>>>(
|
||||
"UPDATE scrape_jobs
|
||||
SET locked_at = NULL, retry_count = retry_count + 1
|
||||
SET locked_at = NULL, retry_count = retry_count + 1, queued_at = NOW()
|
||||
WHERE id = $1
|
||||
RETURNING CASE WHEN retry_count < $2 THEN retry_count ELSE NULL END",
|
||||
RETURNING CASE WHEN retry_count <= $2 THEN queued_at ELSE NULL END",
|
||||
)
|
||||
.bind(job_id)
|
||||
.bind(max_retries)
|
||||
.fetch_one(db_pool)
|
||||
.await?;
|
||||
|
||||
Ok(result.is_some())
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Find existing unlocked job payloads matching the given target type and candidates.
|
||||
/// Find existing job payloads matching the given target type and candidates.
|
||||
///
|
||||
/// Returns a set of stringified JSON payloads that already exist in the queue,
|
||||
/// used for deduplication when scheduling new jobs.
|
||||
/// Returns a set of stringified JSON payloads that already exist in the queue
|
||||
/// (both locked and unlocked), used for deduplication when scheduling new jobs.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `target_type` - The target type to filter by
|
||||
@@ -111,7 +144,7 @@ pub async fn unlock_and_increment_retry(
|
||||
/// * `db_pool` - PostgreSQL connection pool
|
||||
///
|
||||
/// # Returns
|
||||
/// A `HashSet` of stringified JSON payloads that already have pending jobs
|
||||
/// A `HashSet` of stringified JSON payloads that already have pending or in-progress jobs
|
||||
pub async fn find_existing_job_payloads(
|
||||
target_type: TargetType,
|
||||
candidate_payloads: &[serde_json::Value],
|
||||
@@ -119,7 +152,7 @@ pub async fn find_existing_job_payloads(
|
||||
) -> Result<HashSet<String>> {
|
||||
let existing_jobs: Vec<(serde_json::Value,)> = sqlx::query_as(
|
||||
"SELECT target_payload FROM scrape_jobs
|
||||
WHERE target_type = $1 AND target_payload = ANY($2) AND locked_at IS NULL",
|
||||
WHERE target_type = $1 AND target_payload = ANY($2)",
|
||||
)
|
||||
.bind(target_type)
|
||||
.bind(candidate_payloads)
|
||||
@@ -134,7 +167,117 @@ pub async fn find_existing_job_payloads(
|
||||
Ok(existing_payloads)
|
||||
}
|
||||
|
||||
/// Batch insert scrape jobs in a single transaction.
|
||||
/// Insert a scrape job result log entry.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn insert_job_result(
|
||||
target_type: TargetType,
|
||||
payload: serde_json::Value,
|
||||
priority: ScrapePriority,
|
||||
queued_at: DateTime<Utc>,
|
||||
started_at: DateTime<Utc>,
|
||||
duration_ms: i32,
|
||||
success: bool,
|
||||
error_message: Option<&str>,
|
||||
retry_count: i32,
|
||||
counts: Option<&UpsertCounts>,
|
||||
db_pool: &PgPool,
|
||||
) -> Result<()> {
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO scrape_job_results (
|
||||
target_type, payload, priority,
|
||||
queued_at, started_at, duration_ms,
|
||||
success, error_message, retry_count,
|
||||
courses_fetched, courses_changed, courses_unchanged,
|
||||
audits_generated, metrics_generated
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
|
||||
"#,
|
||||
)
|
||||
.bind(target_type)
|
||||
.bind(&payload)
|
||||
.bind(priority)
|
||||
.bind(queued_at)
|
||||
.bind(started_at)
|
||||
.bind(duration_ms)
|
||||
.bind(success)
|
||||
.bind(error_message)
|
||||
.bind(retry_count)
|
||||
.bind(counts.map(|c| c.courses_fetched))
|
||||
.bind(counts.map(|c| c.courses_changed))
|
||||
.bind(counts.map(|c| c.courses_unchanged))
|
||||
.bind(counts.map(|c| c.audits_generated))
|
||||
.bind(counts.map(|c| c.metrics_generated))
|
||||
.execute(db_pool)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Per-subject aggregated stats from recent scrape results.
|
||||
///
|
||||
/// Populated by [`fetch_subject_stats`] and converted into
|
||||
/// [`crate::scraper::adaptive::SubjectStats`] for interval computation.
|
||||
#[derive(sqlx::FromRow, Debug, Clone)]
|
||||
pub struct SubjectResultStats {
|
||||
pub subject: String,
|
||||
pub recent_runs: i64,
|
||||
pub avg_change_ratio: f64,
|
||||
pub consecutive_zero_changes: i64,
|
||||
pub consecutive_empty_fetches: i64,
|
||||
pub recent_failure_count: i64,
|
||||
pub recent_success_count: i64,
|
||||
pub last_completed: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// Fetch aggregated per-subject statistics from the last 24 hours of results.
|
||||
///
|
||||
/// For each subject, examines the 20 most recent results and computes:
|
||||
/// - Average change ratio (courses_changed / courses_fetched)
|
||||
/// - Consecutive zero-change runs from the most recent result
|
||||
/// - Consecutive empty-fetch runs from the most recent result
|
||||
/// - Failure and success counts
|
||||
/// - Last completion timestamp
|
||||
pub async fn fetch_subject_stats(db_pool: &PgPool) -> Result<Vec<SubjectResultStats>> {
|
||||
let rows = sqlx::query_as::<_, SubjectResultStats>(
|
||||
r#"
|
||||
WITH recent AS (
|
||||
SELECT payload->>'subject' AS subject, success,
|
||||
COALESCE(courses_fetched, 0) AS courses_fetched,
|
||||
COALESCE(courses_changed, 0) AS courses_changed,
|
||||
completed_at,
|
||||
ROW_NUMBER() OVER (PARTITION BY payload->>'subject' ORDER BY completed_at DESC) AS rn
|
||||
FROM scrape_job_results
|
||||
WHERE target_type = 'Subject' AND completed_at > NOW() - INTERVAL '24 hours'
|
||||
),
|
||||
filtered AS (SELECT * FROM recent WHERE rn <= 20),
|
||||
zero_break AS (
|
||||
SELECT subject,
|
||||
MIN(rn) FILTER (WHERE courses_changed > 0 AND success) AS first_nonzero_rn,
|
||||
MIN(rn) FILTER (WHERE courses_fetched > 0 AND success) AS first_nonempty_rn
|
||||
FROM filtered GROUP BY subject
|
||||
)
|
||||
SELECT
|
||||
f.subject::TEXT AS subject,
|
||||
COUNT(*)::BIGINT AS recent_runs,
|
||||
COALESCE(AVG(CASE WHEN f.success AND f.courses_fetched > 0
|
||||
THEN f.courses_changed::FLOAT / f.courses_fetched ELSE NULL END), 0.0)::FLOAT8 AS avg_change_ratio,
|
||||
COALESCE(zb.first_nonzero_rn - 1, COUNT(*) FILTER (WHERE f.success AND f.courses_changed = 0))::BIGINT AS consecutive_zero_changes,
|
||||
COALESCE(zb.first_nonempty_rn - 1, COUNT(*) FILTER (WHERE f.success AND f.courses_fetched = 0))::BIGINT AS consecutive_empty_fetches,
|
||||
COUNT(*) FILTER (WHERE NOT f.success)::BIGINT AS recent_failure_count,
|
||||
COUNT(*) FILTER (WHERE f.success)::BIGINT AS recent_success_count,
|
||||
MAX(f.completed_at) AS last_completed
|
||||
FROM filtered f
|
||||
LEFT JOIN zero_break zb ON f.subject = zb.subject
|
||||
GROUP BY f.subject, zb.first_nonzero_rn, zb.first_nonempty_rn
|
||||
"#,
|
||||
)
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
|
||||
Ok(rows)
|
||||
}
|
||||
|
||||
/// Batch insert scrape jobs using UNNEST for a single round-trip.
|
||||
///
|
||||
/// All jobs are inserted with `execute_at` set to the current time.
|
||||
///
|
||||
@@ -144,27 +287,35 @@ pub async fn find_existing_job_payloads(
|
||||
pub async fn batch_insert_jobs(
|
||||
jobs: &[(serde_json::Value, TargetType, ScrapePriority)],
|
||||
db_pool: &PgPool,
|
||||
) -> Result<()> {
|
||||
) -> Result<Vec<ScrapeJob>> {
|
||||
if jobs.is_empty() {
|
||||
return Ok(());
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let now = chrono::Utc::now();
|
||||
let mut tx = db_pool.begin().await?;
|
||||
let mut target_types: Vec<String> = Vec::with_capacity(jobs.len());
|
||||
let mut payloads: Vec<serde_json::Value> = Vec::with_capacity(jobs.len());
|
||||
let mut priorities: Vec<String> = Vec::with_capacity(jobs.len());
|
||||
|
||||
for (payload, target_type, priority) in jobs {
|
||||
sqlx::query(
|
||||
"INSERT INTO scrape_jobs (target_type, target_payload, priority, execute_at) VALUES ($1, $2, $3, $4)"
|
||||
)
|
||||
.bind(target_type)
|
||||
.bind(payload)
|
||||
.bind(priority)
|
||||
.bind(now)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
target_types.push(format!("{target_type:?}"));
|
||||
payloads.push(payload.clone());
|
||||
priorities.push(format!("{priority:?}"));
|
||||
}
|
||||
|
||||
tx.commit().await?;
|
||||
let inserted = sqlx::query_as::<_, ScrapeJob>(
|
||||
r#"
|
||||
INSERT INTO scrape_jobs (target_type, target_payload, priority, execute_at, queued_at)
|
||||
SELECT v.target_type::target_type, v.payload, v.priority::scrape_priority, NOW(), NOW()
|
||||
FROM UNNEST($1::text[], $2::jsonb[], $3::text[])
|
||||
AS v(target_type, payload, priority)
|
||||
RETURNING *
|
||||
"#,
|
||||
)
|
||||
.bind(&target_types)
|
||||
.bind(&payloads)
|
||||
.bind(&priorities)
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
Ok(inserted)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,100 @@
|
||||
//! Database query functions for user sessions.
|
||||
|
||||
use anyhow::Context;
|
||||
use rand::Rng;
|
||||
use sqlx::PgPool;
|
||||
|
||||
use super::models::UserSession;
|
||||
use crate::error::Result;
|
||||
|
||||
/// Session lifetime: 7 days (in seconds).
|
||||
pub const SESSION_DURATION_SECS: u64 = 7 * 24 * 3600;
|
||||
|
||||
/// Generate a cryptographically random 32-byte hex token.
|
||||
fn generate_token() -> String {
|
||||
let bytes: [u8; 32] = rand::rng().random();
|
||||
bytes.iter().map(|b| format!("{b:02x}")).collect()
|
||||
}
|
||||
|
||||
/// Create a new session for a user with the given duration.
|
||||
pub async fn create_session(
|
||||
pool: &PgPool,
|
||||
user_id: i64,
|
||||
duration: std::time::Duration,
|
||||
) -> Result<UserSession> {
|
||||
let token = generate_token();
|
||||
let duration_secs = duration.as_secs() as i64;
|
||||
|
||||
sqlx::query_as::<_, UserSession>(
|
||||
r#"
|
||||
INSERT INTO user_sessions (id, user_id, expires_at)
|
||||
VALUES ($1, $2, now() + make_interval(secs => $3::double precision))
|
||||
RETURNING *
|
||||
"#,
|
||||
)
|
||||
.bind(&token)
|
||||
.bind(user_id)
|
||||
.bind(duration_secs as f64)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.context("failed to create session")
|
||||
}
|
||||
|
||||
/// Fetch a session by token, only if it has not expired.
|
||||
pub async fn get_session(pool: &PgPool, token: &str) -> Result<Option<UserSession>> {
|
||||
sqlx::query_as::<_, UserSession>(
|
||||
"SELECT * FROM user_sessions WHERE id = $1 AND expires_at > now()",
|
||||
)
|
||||
.bind(token)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
.context("failed to get session")
|
||||
}
|
||||
|
||||
/// Update the last-active timestamp and extend session expiry (sliding window).
|
||||
pub async fn touch_session(pool: &PgPool, token: &str) -> Result<()> {
|
||||
sqlx::query(
|
||||
r#"
|
||||
UPDATE user_sessions
|
||||
SET last_active_at = now(),
|
||||
expires_at = now() + make_interval(secs => $2::double precision)
|
||||
WHERE id = $1
|
||||
"#,
|
||||
)
|
||||
.bind(token)
|
||||
.bind(SESSION_DURATION_SECS as f64)
|
||||
.execute(pool)
|
||||
.await
|
||||
.context("failed to touch session")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete a session by token.
|
||||
pub async fn delete_session(pool: &PgPool, token: &str) -> Result<()> {
|
||||
sqlx::query("DELETE FROM user_sessions WHERE id = $1")
|
||||
.bind(token)
|
||||
.execute(pool)
|
||||
.await
|
||||
.context("failed to delete session")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete all sessions for a user. Returns the number of sessions deleted.
|
||||
#[allow(dead_code)] // Available for admin user-deletion flow
|
||||
pub async fn delete_user_sessions(pool: &PgPool, user_id: i64) -> Result<u64> {
|
||||
let result = sqlx::query("DELETE FROM user_sessions WHERE user_id = $1")
|
||||
.bind(user_id)
|
||||
.execute(pool)
|
||||
.await
|
||||
.context("failed to delete user sessions")?;
|
||||
Ok(result.rows_affected())
|
||||
}
|
||||
|
||||
/// Delete all expired sessions. Returns the number of sessions cleaned up.
|
||||
pub async fn cleanup_expired(pool: &PgPool) -> Result<u64> {
|
||||
let result = sqlx::query("DELETE FROM user_sessions WHERE expires_at <= now()")
|
||||
.execute(pool)
|
||||
.await
|
||||
.context("failed to cleanup expired sessions")?;
|
||||
Ok(result.rows_affected())
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
//! Database query functions for users.
|
||||
|
||||
use anyhow::Context;
|
||||
use sqlx::PgPool;
|
||||
|
||||
use super::models::User;
|
||||
use crate::error::Result;
|
||||
|
||||
/// Insert a new user or update username/avatar on conflict.
|
||||
pub async fn upsert_user(
|
||||
pool: &PgPool,
|
||||
discord_id: i64,
|
||||
username: &str,
|
||||
avatar_hash: Option<&str>,
|
||||
) -> Result<User> {
|
||||
sqlx::query_as::<_, User>(
|
||||
r#"
|
||||
INSERT INTO users (discord_id, discord_username, discord_avatar_hash)
|
||||
VALUES ($1, $2, $3)
|
||||
ON CONFLICT (discord_id) DO UPDATE
|
||||
SET discord_username = EXCLUDED.discord_username,
|
||||
discord_avatar_hash = EXCLUDED.discord_avatar_hash,
|
||||
updated_at = now()
|
||||
RETURNING *
|
||||
"#,
|
||||
)
|
||||
.bind(discord_id)
|
||||
.bind(username)
|
||||
.bind(avatar_hash)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.context("failed to upsert user")
|
||||
}
|
||||
|
||||
/// Fetch a user by Discord ID.
|
||||
pub async fn get_user(pool: &PgPool, discord_id: i64) -> Result<Option<User>> {
|
||||
sqlx::query_as::<_, User>("SELECT * FROM users WHERE discord_id = $1")
|
||||
.bind(discord_id)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
.context("failed to get user")
|
||||
}
|
||||
|
||||
/// List all users ordered by creation date (newest first).
|
||||
pub async fn list_users(pool: &PgPool) -> Result<Vec<User>> {
|
||||
sqlx::query_as::<_, User>("SELECT * FROM users ORDER BY created_at DESC")
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.context("failed to list users")
|
||||
}
|
||||
|
||||
/// Set the admin flag for a user, returning the updated user if found.
|
||||
pub async fn set_admin(pool: &PgPool, discord_id: i64, is_admin: bool) -> Result<Option<User>> {
|
||||
sqlx::query_as::<_, User>(
|
||||
r#"
|
||||
UPDATE users
|
||||
SET is_admin = $2, updated_at = now()
|
||||
WHERE discord_id = $1
|
||||
RETURNING *
|
||||
"#,
|
||||
)
|
||||
.bind(discord_id)
|
||||
.bind(is_admin)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
.context("failed to set admin status")
|
||||
}
|
||||
|
||||
/// Ensure a seed admin exists. Upserts with `is_admin = true` and a placeholder
|
||||
/// username that will be replaced on first OAuth login.
|
||||
pub async fn ensure_seed_admin(pool: &PgPool, discord_id: i64) -> Result<User> {
|
||||
sqlx::query_as::<_, User>(
|
||||
r#"
|
||||
INSERT INTO users (discord_id, discord_username, is_admin)
|
||||
VALUES ($1, 'seed-admin', true)
|
||||
ON CONFLICT (discord_id) DO UPDATE
|
||||
SET is_admin = true,
|
||||
updated_at = now()
|
||||
RETURNING *
|
||||
"#,
|
||||
)
|
||||
.bind(discord_id)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.context("failed to ensure seed admin")
|
||||
}
|
||||
@@ -1,12 +1,14 @@
|
||||
pub mod app;
|
||||
pub mod banner;
|
||||
pub mod bot;
|
||||
pub mod calendar;
|
||||
pub mod cli;
|
||||
pub mod config;
|
||||
pub mod data;
|
||||
pub mod error;
|
||||
pub mod formatter;
|
||||
pub mod logging;
|
||||
pub mod rmp;
|
||||
pub mod scraper;
|
||||
pub mod services;
|
||||
pub mod signals;
|
||||
|
||||
+5
-5
@@ -1,5 +1,5 @@
|
||||
use crate::app::App;
|
||||
use crate::cli::{Args, ServiceName, determine_enabled_services};
|
||||
use crate::cli::{Args, ServiceName};
|
||||
use crate::logging::setup_logging;
|
||||
use clap::Parser;
|
||||
use std::process::ExitCode;
|
||||
@@ -8,17 +8,18 @@ use tracing::info;
|
||||
mod app;
|
||||
mod banner;
|
||||
mod bot;
|
||||
mod calendar;
|
||||
mod cli;
|
||||
mod config;
|
||||
mod data;
|
||||
mod error;
|
||||
mod formatter;
|
||||
mod logging;
|
||||
mod rmp;
|
||||
mod scraper;
|
||||
mod services;
|
||||
mod signals;
|
||||
mod state;
|
||||
#[allow(dead_code)]
|
||||
mod status;
|
||||
mod web;
|
||||
|
||||
@@ -29,9 +30,8 @@ async fn main() -> ExitCode {
|
||||
// Parse CLI arguments
|
||||
let args = Args::parse();
|
||||
|
||||
// Determine which services should be enabled
|
||||
let enabled_services: Vec<ServiceName> =
|
||||
determine_enabled_services(&args).expect("Failed to determine enabled services");
|
||||
// Always run all services
|
||||
let enabled_services = ServiceName::all();
|
||||
|
||||
// Create and initialize the application
|
||||
let mut app = App::new().await.expect("Failed to initialize application");
|
||||
|
||||
+156
@@ -0,0 +1,156 @@
|
||||
//! RateMyProfessors GraphQL client for bulk professor data sync.
|
||||
|
||||
use anyhow::Result;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::{debug, info};
|
||||
|
||||
/// UTSA's school ID on RateMyProfessors (base64 of "School-1516").
|
||||
const UTSA_SCHOOL_ID: &str = "U2Nob29sLTE1MTY=";
|
||||
|
||||
/// Basic auth header value (base64 of "test:test").
|
||||
const AUTH_HEADER: &str = "Basic dGVzdDp0ZXN0";
|
||||
|
||||
/// GraphQL endpoint.
|
||||
const GRAPHQL_URL: &str = "https://www.ratemyprofessors.com/graphql";
|
||||
|
||||
/// Page size for paginated fetches.
|
||||
const PAGE_SIZE: u32 = 100;
|
||||
|
||||
/// A professor record from RateMyProfessors.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RmpProfessor {
|
||||
pub legacy_id: i32,
|
||||
pub graphql_id: String,
|
||||
pub first_name: String,
|
||||
pub last_name: String,
|
||||
pub department: Option<String>,
|
||||
pub avg_rating: Option<f32>,
|
||||
pub avg_difficulty: Option<f32>,
|
||||
pub num_ratings: i32,
|
||||
pub would_take_again_pct: Option<f32>,
|
||||
}
|
||||
|
||||
/// Client for fetching professor data from RateMyProfessors.
|
||||
pub struct RmpClient {
|
||||
http: reqwest::Client,
|
||||
}
|
||||
|
||||
impl Default for RmpClient {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl RmpClient {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
http: reqwest::Client::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch all professors for UTSA via paginated GraphQL queries.
|
||||
pub async fn fetch_all_professors(&self) -> Result<Vec<RmpProfessor>> {
|
||||
let mut all = Vec::new();
|
||||
let mut cursor: Option<String> = None;
|
||||
|
||||
loop {
|
||||
let after_clause = match &cursor {
|
||||
Some(c) => format!(r#", after: "{}""#, c),
|
||||
None => String::new(),
|
||||
};
|
||||
|
||||
let query = format!(
|
||||
r#"query {{
|
||||
newSearch {{
|
||||
teachers(query: {{ text: "", schoolID: "{school_id}" }}, first: {page_size}{after}) {{
|
||||
edges {{
|
||||
cursor
|
||||
node {{
|
||||
id
|
||||
legacyId
|
||||
firstName
|
||||
lastName
|
||||
department
|
||||
avgRating
|
||||
avgDifficulty
|
||||
numRatings
|
||||
wouldTakeAgainPercent
|
||||
}}
|
||||
}}
|
||||
pageInfo {{
|
||||
hasNextPage
|
||||
endCursor
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
}}"#,
|
||||
school_id = UTSA_SCHOOL_ID,
|
||||
page_size = PAGE_SIZE,
|
||||
after = after_clause,
|
||||
);
|
||||
|
||||
let body = serde_json::json!({ "query": query });
|
||||
|
||||
let resp = self
|
||||
.http
|
||||
.post(GRAPHQL_URL)
|
||||
.header("Authorization", AUTH_HEADER)
|
||||
.json(&body)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
let status = resp.status();
|
||||
if !status.is_success() {
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
anyhow::bail!("RMP GraphQL request failed ({status}): {text}");
|
||||
}
|
||||
|
||||
let json: serde_json::Value = resp.json().await?;
|
||||
|
||||
let teachers = &json["data"]["newSearch"]["teachers"];
|
||||
let edges = teachers["edges"]
|
||||
.as_array()
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing edges in RMP response"))?;
|
||||
|
||||
for edge in edges {
|
||||
let node = &edge["node"];
|
||||
let wta = node["wouldTakeAgainPercent"]
|
||||
.as_f64()
|
||||
.map(|v| v as f32)
|
||||
.filter(|&v| v >= 0.0);
|
||||
|
||||
all.push(RmpProfessor {
|
||||
legacy_id: node["legacyId"]
|
||||
.as_i64()
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing legacyId"))?
|
||||
as i32,
|
||||
graphql_id: node["id"]
|
||||
.as_str()
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing id"))?
|
||||
.to_string(),
|
||||
first_name: node["firstName"].as_str().unwrap_or_default().to_string(),
|
||||
last_name: node["lastName"].as_str().unwrap_or_default().to_string(),
|
||||
department: node["department"].as_str().map(|s| s.to_string()),
|
||||
avg_rating: node["avgRating"].as_f64().map(|v| v as f32),
|
||||
avg_difficulty: node["avgDifficulty"].as_f64().map(|v| v as f32),
|
||||
num_ratings: node["numRatings"].as_i64().unwrap_or(0) as i32,
|
||||
would_take_again_pct: wta,
|
||||
});
|
||||
}
|
||||
|
||||
let page_info = &teachers["pageInfo"];
|
||||
let has_next = page_info["hasNextPage"].as_bool().unwrap_or(false);
|
||||
|
||||
if !has_next {
|
||||
break;
|
||||
}
|
||||
|
||||
cursor = page_info["endCursor"].as_str().map(|s| s.to_string());
|
||||
|
||||
debug!(fetched = all.len(), "RMP pagination: fetching next page");
|
||||
}
|
||||
|
||||
info!(total = all.len(), "Fetched all RMP professors");
|
||||
Ok(all)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,326 @@
|
||||
//! Adaptive scraping interval computation.
|
||||
//!
|
||||
//! Assigns per-subject scrape intervals based on recent change rates,
|
||||
//! consecutive zero-change runs, failure patterns, and time of day.
|
||||
|
||||
use chrono::{DateTime, Datelike, Timelike, Utc};
|
||||
use chrono_tz::US::Central;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::data::scrape_jobs::SubjectResultStats;
|
||||
|
||||
const FLOOR_INTERVAL: Duration = Duration::from_secs(3 * 60);
|
||||
const MODERATE_HIGH_INTERVAL: Duration = Duration::from_secs(5 * 60);
|
||||
const MODERATE_LOW_INTERVAL: Duration = Duration::from_secs(15 * 60);
|
||||
const LOW_CHANGE_INTERVAL: Duration = Duration::from_secs(30 * 60);
|
||||
const ZERO_5_INTERVAL: Duration = Duration::from_secs(60 * 60);
|
||||
const ZERO_10_INTERVAL: Duration = Duration::from_secs(2 * 60 * 60);
|
||||
const CEILING_INTERVAL: Duration = Duration::from_secs(4 * 60 * 60);
|
||||
const COLD_START_INTERVAL: Duration = FLOOR_INTERVAL;
|
||||
const PAUSE_PROBE_INTERVAL: Duration = Duration::from_secs(6 * 60 * 60);
|
||||
const EMPTY_FETCH_PAUSE_THRESHOLD: i64 = 3;
|
||||
const FAILURE_PAUSE_THRESHOLD: i64 = 5;
|
||||
|
||||
/// Aggregated per-subject statistics derived from recent scrape results.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SubjectStats {
|
||||
pub subject: String,
|
||||
pub recent_runs: i64,
|
||||
pub avg_change_ratio: f64,
|
||||
pub consecutive_zero_changes: i64,
|
||||
pub consecutive_empty_fetches: i64,
|
||||
pub recent_failure_count: i64,
|
||||
pub recent_success_count: i64,
|
||||
pub last_completed: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// Scheduling decision for a subject.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum SubjectSchedule {
|
||||
/// Subject is due for scraping, with the computed interval.
|
||||
Eligible(Duration),
|
||||
/// Subject was scraped recently; wait for the remaining cooldown.
|
||||
Cooldown(Duration),
|
||||
/// Subject is paused due to repeated empty fetches or failures.
|
||||
Paused,
|
||||
/// Subject belongs to a past term and should not be scraped.
|
||||
ReadOnly,
|
||||
}
|
||||
|
||||
impl From<SubjectResultStats> for SubjectStats {
|
||||
fn from(row: SubjectResultStats) -> Self {
|
||||
Self {
|
||||
subject: row.subject,
|
||||
recent_runs: row.recent_runs,
|
||||
avg_change_ratio: row.avg_change_ratio,
|
||||
consecutive_zero_changes: row.consecutive_zero_changes,
|
||||
consecutive_empty_fetches: row.consecutive_empty_fetches,
|
||||
recent_failure_count: row.recent_failure_count,
|
||||
recent_success_count: row.recent_success_count,
|
||||
last_completed: row.last_completed,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the base interval tier from change-rate statistics.
|
||||
pub fn compute_base_interval(stats: &SubjectStats) -> Duration {
|
||||
if stats.recent_runs == 0 {
|
||||
return COLD_START_INTERVAL;
|
||||
}
|
||||
|
||||
// Consecutive-zero tiers take precedence when change ratio is near zero
|
||||
if stats.avg_change_ratio < 0.001 {
|
||||
return match stats.consecutive_zero_changes {
|
||||
0..5 => LOW_CHANGE_INTERVAL,
|
||||
5..10 => ZERO_5_INTERVAL,
|
||||
10..20 => ZERO_10_INTERVAL,
|
||||
_ => CEILING_INTERVAL,
|
||||
};
|
||||
}
|
||||
|
||||
match stats.avg_change_ratio {
|
||||
r if r >= 0.10 => FLOOR_INTERVAL,
|
||||
r if r >= 0.05 => MODERATE_HIGH_INTERVAL,
|
||||
r if r >= 0.01 => MODERATE_LOW_INTERVAL,
|
||||
_ => LOW_CHANGE_INTERVAL,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a time-of-day multiplier for the given UTC timestamp.
|
||||
///
|
||||
/// Peak hours (weekdays 8am-6pm CT) return 1; off-peak (weekdays 6pm-midnight CT)
|
||||
/// return 2; night (midnight-8am CT) and weekends return 4.
|
||||
pub fn time_of_day_multiplier(now: DateTime<Utc>) -> u32 {
|
||||
let ct = now.with_timezone(&Central);
|
||||
let weekday = ct.weekday();
|
||||
let hour = ct.hour();
|
||||
|
||||
// Weekends get the slowest multiplier
|
||||
if matches!(weekday, chrono::Weekday::Sat | chrono::Weekday::Sun) {
|
||||
return 4;
|
||||
}
|
||||
|
||||
match hour {
|
||||
8..18 => 1, // peak
|
||||
18..24 => 2, // off-peak
|
||||
_ => 4, // night (0..8)
|
||||
}
|
||||
}
|
||||
|
||||
/// Evaluate whether a subject should be scraped now.
|
||||
///
|
||||
/// Combines base interval, time-of-day multiplier, pause detection (empty
|
||||
/// fetches / consecutive failures), and past-term read-only status.
|
||||
pub fn evaluate_subject(
|
||||
stats: &SubjectStats,
|
||||
now: DateTime<Utc>,
|
||||
is_past_term: bool,
|
||||
) -> SubjectSchedule {
|
||||
if is_past_term {
|
||||
return SubjectSchedule::ReadOnly;
|
||||
}
|
||||
|
||||
let elapsed = (now - stats.last_completed)
|
||||
.to_std()
|
||||
.unwrap_or(Duration::ZERO);
|
||||
let probe_due = elapsed >= PAUSE_PROBE_INTERVAL;
|
||||
|
||||
// Pause on repeated empty fetches
|
||||
if stats.consecutive_empty_fetches >= EMPTY_FETCH_PAUSE_THRESHOLD {
|
||||
return if probe_due {
|
||||
SubjectSchedule::Eligible(PAUSE_PROBE_INTERVAL)
|
||||
} else {
|
||||
SubjectSchedule::Paused
|
||||
};
|
||||
}
|
||||
|
||||
// Pause on all-failures
|
||||
if stats.recent_success_count == 0 && stats.recent_failure_count >= FAILURE_PAUSE_THRESHOLD {
|
||||
return if probe_due {
|
||||
SubjectSchedule::Eligible(PAUSE_PROBE_INTERVAL)
|
||||
} else {
|
||||
SubjectSchedule::Paused
|
||||
};
|
||||
}
|
||||
|
||||
let base = compute_base_interval(stats);
|
||||
let multiplier = time_of_day_multiplier(now);
|
||||
let effective = base * multiplier;
|
||||
|
||||
if elapsed >= effective {
|
||||
SubjectSchedule::Eligible(effective)
|
||||
} else {
|
||||
let remaining = effective - elapsed;
|
||||
SubjectSchedule::Cooldown(remaining)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use chrono::TimeZone;
|
||||
|
||||
/// Create a default `SubjectStats` for testing. Callers mutate fields as needed.
|
||||
fn make_stats(subject: &str) -> SubjectStats {
|
||||
SubjectStats {
|
||||
subject: subject.to_string(),
|
||||
recent_runs: 10,
|
||||
avg_change_ratio: 0.0,
|
||||
consecutive_zero_changes: 0,
|
||||
consecutive_empty_fetches: 0,
|
||||
recent_failure_count: 0,
|
||||
recent_success_count: 10,
|
||||
last_completed: Utc::now() - chrono::Duration::hours(1),
|
||||
}
|
||||
}
|
||||
|
||||
// -- compute_base_interval tests --
|
||||
|
||||
#[test]
|
||||
fn test_cold_start_returns_floor() {
|
||||
let mut stats = make_stats("CS");
|
||||
stats.recent_runs = 0;
|
||||
assert_eq!(compute_base_interval(&stats), COLD_START_INTERVAL);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_high_change_rate() {
|
||||
let mut stats = make_stats("CS");
|
||||
stats.avg_change_ratio = 0.15;
|
||||
assert_eq!(compute_base_interval(&stats), FLOOR_INTERVAL);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_moderate_high_change() {
|
||||
let mut stats = make_stats("CS");
|
||||
stats.avg_change_ratio = 0.07;
|
||||
assert_eq!(compute_base_interval(&stats), MODERATE_HIGH_INTERVAL);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_moderate_low_change() {
|
||||
let mut stats = make_stats("CS");
|
||||
stats.avg_change_ratio = 0.03;
|
||||
assert_eq!(compute_base_interval(&stats), MODERATE_LOW_INTERVAL);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_low_change() {
|
||||
let mut stats = make_stats("CS");
|
||||
stats.avg_change_ratio = 0.005;
|
||||
assert_eq!(compute_base_interval(&stats), LOW_CHANGE_INTERVAL);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_zero_5_consecutive() {
|
||||
let mut stats = make_stats("CS");
|
||||
stats.avg_change_ratio = 0.0;
|
||||
stats.consecutive_zero_changes = 5;
|
||||
assert_eq!(compute_base_interval(&stats), ZERO_5_INTERVAL);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_zero_10_consecutive() {
|
||||
let mut stats = make_stats("CS");
|
||||
stats.avg_change_ratio = 0.0;
|
||||
stats.consecutive_zero_changes = 10;
|
||||
assert_eq!(compute_base_interval(&stats), ZERO_10_INTERVAL);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_zero_20_consecutive() {
|
||||
let mut stats = make_stats("CS");
|
||||
stats.avg_change_ratio = 0.0;
|
||||
stats.consecutive_zero_changes = 20;
|
||||
assert_eq!(compute_base_interval(&stats), CEILING_INTERVAL);
|
||||
}
|
||||
|
||||
// -- evaluate_subject tests --
|
||||
|
||||
#[test]
|
||||
fn test_pause_empty_fetches() {
|
||||
let mut stats = make_stats("CS");
|
||||
stats.consecutive_empty_fetches = 3;
|
||||
stats.last_completed = Utc::now() - chrono::Duration::minutes(10);
|
||||
let result = evaluate_subject(&stats, Utc::now(), false);
|
||||
assert_eq!(result, SubjectSchedule::Paused);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pause_all_failures() {
|
||||
let mut stats = make_stats("CS");
|
||||
stats.recent_success_count = 0;
|
||||
stats.recent_failure_count = 5;
|
||||
stats.last_completed = Utc::now() - chrono::Duration::minutes(10);
|
||||
let result = evaluate_subject(&stats, Utc::now(), false);
|
||||
assert_eq!(result, SubjectSchedule::Paused);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_probe_after_pause() {
|
||||
let mut stats = make_stats("CS");
|
||||
stats.consecutive_empty_fetches = 5;
|
||||
stats.last_completed = Utc::now() - chrono::Duration::hours(7);
|
||||
let result = evaluate_subject(&stats, Utc::now(), false);
|
||||
assert_eq!(result, SubjectSchedule::Eligible(PAUSE_PROBE_INTERVAL));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_read_only_past_term() {
|
||||
let stats = make_stats("CS");
|
||||
let result = evaluate_subject(&stats, Utc::now(), true);
|
||||
assert_eq!(result, SubjectSchedule::ReadOnly);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cooldown_not_elapsed() {
|
||||
let mut stats = make_stats("CS");
|
||||
stats.avg_change_ratio = 0.15; // floor = 3 min
|
||||
stats.last_completed = Utc::now() - chrono::Duration::seconds(30);
|
||||
// Use a peak-hours timestamp so multiplier = 1
|
||||
let peak = Utc.with_ymd_and_hms(2025, 7, 14, 15, 0, 0).unwrap(); // Mon 10am CT
|
||||
stats.last_completed = peak - chrono::Duration::seconds(30);
|
||||
let result = evaluate_subject(&stats, peak, false);
|
||||
assert!(matches!(result, SubjectSchedule::Cooldown(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_eligible_elapsed() {
|
||||
let mut stats = make_stats("CS");
|
||||
stats.avg_change_ratio = 0.15; // floor = 3 min
|
||||
let peak = Utc.with_ymd_and_hms(2025, 7, 14, 15, 0, 0).unwrap(); // Mon 10am CT
|
||||
stats.last_completed = peak - chrono::Duration::minutes(5);
|
||||
let result = evaluate_subject(&stats, peak, false);
|
||||
assert!(matches!(result, SubjectSchedule::Eligible(_)));
|
||||
}
|
||||
|
||||
// -- time_of_day_multiplier tests --
|
||||
|
||||
#[test]
|
||||
fn test_time_multiplier_peak() {
|
||||
// Monday 10am CT = 15:00 UTC
|
||||
let dt = Utc.with_ymd_and_hms(2025, 7, 14, 15, 0, 0).unwrap();
|
||||
assert_eq!(time_of_day_multiplier(dt), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_time_multiplier_offpeak() {
|
||||
// Monday 8pm CT = 01:00 UTC next day, but let's use Tuesday 01:00 UTC = Mon 8pm CT
|
||||
let dt = Utc.with_ymd_and_hms(2025, 7, 15, 1, 0, 0).unwrap();
|
||||
assert_eq!(time_of_day_multiplier(dt), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_time_multiplier_night() {
|
||||
// 3am CT = 08:00 UTC
|
||||
let dt = Utc.with_ymd_and_hms(2025, 7, 14, 8, 0, 0).unwrap();
|
||||
assert_eq!(time_of_day_multiplier(dt), 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_time_multiplier_weekend() {
|
||||
// Saturday noon CT = 17:00 UTC
|
||||
let dt = Utc.with_ymd_and_hms(2025, 7, 12, 17, 0, 0).unwrap();
|
||||
assert_eq!(time_of_day_multiplier(dt), 4);
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
pub mod subject;
|
||||
|
||||
use crate::banner::BannerApi;
|
||||
use crate::data::models::TargetType;
|
||||
use crate::data::models::{TargetType, UpsertCounts};
|
||||
use crate::error::Result;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::PgPool;
|
||||
@@ -32,8 +32,9 @@ pub trait Job: Send + Sync {
|
||||
#[allow(dead_code)]
|
||||
fn target_type(&self) -> TargetType;
|
||||
|
||||
/// Process the job with the given API client and database pool
|
||||
async fn process(&self, banner_api: &BannerApi, db_pool: &PgPool) -> Result<()>;
|
||||
/// Process the job with the given API client and database pool.
|
||||
/// Returns upsert effectiveness counts on success.
|
||||
async fn process(&self, banner_api: &BannerApi, db_pool: &PgPool) -> Result<UpsertCounts>;
|
||||
|
||||
/// Get a human-readable description of the job
|
||||
fn description(&self) -> String;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use super::Job;
|
||||
use crate::banner::{BannerApi, SearchQuery, Term};
|
||||
use crate::data::batch::batch_upsert_courses;
|
||||
use crate::data::models::TargetType;
|
||||
use crate::data::models::{TargetType, UpsertCounts};
|
||||
use crate::error::Result;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::PgPool;
|
||||
@@ -26,7 +26,7 @@ impl Job for SubjectJob {
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip(self, banner_api, db_pool), fields(subject = %self.subject))]
|
||||
async fn process(&self, banner_api: &BannerApi, db_pool: &PgPool) -> Result<()> {
|
||||
async fn process(&self, banner_api: &BannerApi, db_pool: &PgPool) -> Result<UpsertCounts> {
|
||||
let subject_code = &self.subject;
|
||||
|
||||
// Get the current term
|
||||
@@ -37,17 +37,19 @@ impl Job for SubjectJob {
|
||||
.search(&term, &query, "subjectDescription", false)
|
||||
.await?;
|
||||
|
||||
if let Some(courses_from_api) = search_result.data {
|
||||
let counts = if let Some(courses_from_api) = search_result.data {
|
||||
info!(
|
||||
subject = %subject_code,
|
||||
count = courses_from_api.len(),
|
||||
"Found courses"
|
||||
);
|
||||
batch_upsert_courses(&courses_from_api, db_pool).await?;
|
||||
}
|
||||
batch_upsert_courses(&courses_from_api, db_pool).await?
|
||||
} else {
|
||||
UpsertCounts::default()
|
||||
};
|
||||
|
||||
debug!(subject = %subject_code, "Subject job completed");
|
||||
Ok(())
|
||||
Ok(counts)
|
||||
}
|
||||
|
||||
fn description(&self) -> String {
|
||||
|
||||
+42
-7
@@ -1,13 +1,17 @@
|
||||
pub mod adaptive;
|
||||
pub mod jobs;
|
||||
pub mod scheduler;
|
||||
pub mod worker;
|
||||
|
||||
use crate::banner::BannerApi;
|
||||
use crate::data::scrape_jobs;
|
||||
use crate::services::Service;
|
||||
use crate::state::ReferenceCache;
|
||||
use crate::status::{ServiceStatus, ServiceStatusRegistry};
|
||||
use crate::web::ws::ScrapeJobEvent;
|
||||
use sqlx::PgPool;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::broadcast;
|
||||
use tokio::sync::{RwLock, broadcast};
|
||||
use tokio::task::JoinHandle;
|
||||
use tracing::{info, warn};
|
||||
|
||||
@@ -21,7 +25,9 @@ use self::worker::Worker;
|
||||
pub struct ScraperService {
|
||||
db_pool: PgPool,
|
||||
banner_api: Arc<BannerApi>,
|
||||
reference_cache: Arc<RwLock<ReferenceCache>>,
|
||||
service_statuses: ServiceStatusRegistry,
|
||||
job_events_tx: broadcast::Sender<ScrapeJobEvent>,
|
||||
scheduler_handle: Option<JoinHandle<()>>,
|
||||
worker_handles: Vec<JoinHandle<()>>,
|
||||
shutdown_tx: Option<broadcast::Sender<()>>,
|
||||
@@ -29,11 +35,19 @@ pub struct ScraperService {
|
||||
|
||||
impl ScraperService {
|
||||
/// Creates a new `ScraperService`.
|
||||
pub fn new(db_pool: PgPool, banner_api: Arc<BannerApi>, service_statuses: ServiceStatusRegistry) -> Self {
|
||||
pub fn new(
|
||||
db_pool: PgPool,
|
||||
banner_api: Arc<BannerApi>,
|
||||
reference_cache: Arc<RwLock<ReferenceCache>>,
|
||||
service_statuses: ServiceStatusRegistry,
|
||||
job_events_tx: broadcast::Sender<ScrapeJobEvent>,
|
||||
) -> Self {
|
||||
Self {
|
||||
db_pool,
|
||||
banner_api,
|
||||
reference_cache,
|
||||
service_statuses,
|
||||
job_events_tx,
|
||||
scheduler_handle: None,
|
||||
worker_handles: Vec::new(),
|
||||
shutdown_tx: None,
|
||||
@@ -41,14 +55,29 @@ impl ScraperService {
|
||||
}
|
||||
|
||||
/// Starts the scheduler and a pool of workers.
|
||||
pub fn start(&mut self) {
|
||||
///
|
||||
/// Force-unlocks any jobs left locked by a previous unclean shutdown before
|
||||
/// spawning workers, so those jobs re-enter the queue immediately.
|
||||
pub async fn start(&mut self) {
|
||||
// Recover jobs left locked by a previous crash/unclean shutdown
|
||||
match scrape_jobs::force_unlock_all(&self.db_pool).await {
|
||||
Ok(0) => {}
|
||||
Ok(count) => warn!(count, "Force-unlocked stale jobs from previous run"),
|
||||
Err(e) => warn!(error = ?e, "Failed to force-unlock stale jobs"),
|
||||
}
|
||||
|
||||
info!("ScraperService starting");
|
||||
|
||||
// Create shutdown channel
|
||||
let (shutdown_tx, _) = broadcast::channel(1);
|
||||
self.shutdown_tx = Some(shutdown_tx.clone());
|
||||
|
||||
let scheduler = Scheduler::new(self.db_pool.clone(), self.banner_api.clone());
|
||||
let scheduler = Scheduler::new(
|
||||
self.db_pool.clone(),
|
||||
self.banner_api.clone(),
|
||||
self.reference_cache.clone(),
|
||||
self.job_events_tx.clone(),
|
||||
);
|
||||
let shutdown_rx = shutdown_tx.subscribe();
|
||||
let scheduler_handle = tokio::spawn(async move {
|
||||
scheduler.run(shutdown_rx).await;
|
||||
@@ -58,7 +87,12 @@ impl ScraperService {
|
||||
|
||||
let worker_count = 4; // This could be configurable
|
||||
for i in 0..worker_count {
|
||||
let worker = Worker::new(i, self.db_pool.clone(), self.banner_api.clone());
|
||||
let worker = Worker::new(
|
||||
i,
|
||||
self.db_pool.clone(),
|
||||
self.banner_api.clone(),
|
||||
self.job_events_tx.clone(),
|
||||
);
|
||||
let shutdown_rx = shutdown_tx.subscribe();
|
||||
let worker_handle = tokio::spawn(async move {
|
||||
worker.run(shutdown_rx).await;
|
||||
@@ -80,13 +114,14 @@ impl Service for ScraperService {
|
||||
}
|
||||
|
||||
async fn run(&mut self) -> Result<(), anyhow::Error> {
|
||||
self.start();
|
||||
self.start().await;
|
||||
std::future::pending::<()>().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn shutdown(&mut self) -> Result<(), anyhow::Error> {
|
||||
self.service_statuses.set("scraper", ServiceStatus::Disabled);
|
||||
self.service_statuses
|
||||
.set("scraper", ServiceStatus::Disabled);
|
||||
info!("Shutting down scraper service");
|
||||
|
||||
// Send shutdown signal to all tasks
|
||||
|
||||
+288
-35
@@ -1,28 +1,49 @@
|
||||
use crate::banner::{BannerApi, Term};
|
||||
use crate::data::models::{ScrapePriority, TargetType};
|
||||
use crate::data::models::{ReferenceData, ScrapePriority, TargetType};
|
||||
use crate::data::scrape_jobs;
|
||||
use crate::error::Result;
|
||||
use crate::rmp::RmpClient;
|
||||
use crate::scraper::adaptive::{SubjectSchedule, SubjectStats, evaluate_subject};
|
||||
use crate::scraper::jobs::subject::SubjectJob;
|
||||
use crate::state::ReferenceCache;
|
||||
use crate::web::ws::{ScrapeJobDto, ScrapeJobEvent};
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde_json::json;
|
||||
use sqlx::PgPool;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::sync::broadcast;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio::sync::{RwLock, broadcast};
|
||||
use tokio::time;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, error, info, warn};
|
||||
|
||||
/// How often reference data is re-scraped (6 hours).
|
||||
const REFERENCE_DATA_INTERVAL: Duration = Duration::from_secs(6 * 60 * 60);
|
||||
|
||||
/// How often RMP data is synced (24 hours).
|
||||
const RMP_SYNC_INTERVAL: Duration = Duration::from_secs(24 * 60 * 60);
|
||||
|
||||
/// Periodically analyzes data and enqueues prioritized scrape jobs.
|
||||
pub struct Scheduler {
|
||||
db_pool: PgPool,
|
||||
banner_api: Arc<BannerApi>,
|
||||
reference_cache: Arc<RwLock<ReferenceCache>>,
|
||||
job_events_tx: broadcast::Sender<ScrapeJobEvent>,
|
||||
}
|
||||
|
||||
impl Scheduler {
|
||||
pub fn new(db_pool: PgPool, banner_api: Arc<BannerApi>) -> Self {
|
||||
pub fn new(
|
||||
db_pool: PgPool,
|
||||
banner_api: Arc<BannerApi>,
|
||||
reference_cache: Arc<RwLock<ReferenceCache>>,
|
||||
job_events_tx: broadcast::Sender<ScrapeJobEvent>,
|
||||
) -> Self {
|
||||
Self {
|
||||
db_pool,
|
||||
banner_api,
|
||||
reference_cache,
|
||||
job_events_tx,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,33 +62,69 @@ impl Scheduler {
|
||||
let work_interval = Duration::from_secs(60);
|
||||
let mut next_run = time::Instant::now();
|
||||
let mut current_work: Option<(tokio::task::JoinHandle<()>, CancellationToken)> = None;
|
||||
// Scrape reference data immediately on first cycle
|
||||
let mut last_ref_scrape = Instant::now() - REFERENCE_DATA_INTERVAL;
|
||||
// Sync RMP data immediately on first cycle
|
||||
let mut last_rmp_sync = Instant::now() - RMP_SYNC_INTERVAL;
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = time::sleep_until(next_run) => {
|
||||
let cancel_token = CancellationToken::new();
|
||||
|
||||
let should_scrape_ref = last_ref_scrape.elapsed() >= REFERENCE_DATA_INTERVAL;
|
||||
let should_sync_rmp = last_rmp_sync.elapsed() >= RMP_SYNC_INTERVAL;
|
||||
|
||||
// Spawn work in separate task to allow graceful cancellation during shutdown.
|
||||
// Without this, shutdown would have to wait for the full scheduling cycle.
|
||||
let work_handle = tokio::spawn({
|
||||
let db_pool = self.db_pool.clone();
|
||||
let banner_api = self.banner_api.clone();
|
||||
let cancel_token = cancel_token.clone();
|
||||
let reference_cache = self.reference_cache.clone();
|
||||
let job_events_tx = self.job_events_tx.clone();
|
||||
|
||||
async move {
|
||||
tokio::select! {
|
||||
result = Self::schedule_jobs_impl(&db_pool, &banner_api) => {
|
||||
if let Err(e) = result {
|
||||
error!(error = ?e, "Failed to schedule jobs");
|
||||
async move {
|
||||
tokio::select! {
|
||||
_ = async {
|
||||
// RMP sync is independent of Banner API — run it
|
||||
// concurrently with reference data scraping so it
|
||||
// doesn't wait behind rate-limited Banner calls.
|
||||
let rmp_fut = async {
|
||||
if should_sync_rmp
|
||||
&& let Err(e) = Self::sync_rmp_data(&db_pool).await
|
||||
{
|
||||
error!(error = ?e, "Failed to sync RMP data");
|
||||
}
|
||||
};
|
||||
|
||||
let ref_fut = async {
|
||||
if should_scrape_ref
|
||||
&& let Err(e) = Self::scrape_reference_data(&db_pool, &banner_api, &reference_cache).await
|
||||
{
|
||||
error!(error = ?e, "Failed to scrape reference data");
|
||||
}
|
||||
};
|
||||
|
||||
tokio::join!(rmp_fut, ref_fut);
|
||||
|
||||
if let Err(e) = Self::schedule_jobs_impl(&db_pool, &banner_api, Some(&job_events_tx)).await {
|
||||
error!(error = ?e, "Failed to schedule jobs");
|
||||
}
|
||||
} => {}
|
||||
_ = cancel_token.cancelled() => {
|
||||
debug!("Scheduling work cancelled gracefully");
|
||||
}
|
||||
}
|
||||
}
|
||||
_ = cancel_token.cancelled() => {
|
||||
debug!("Scheduling work cancelled gracefully");
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if should_scrape_ref {
|
||||
last_ref_scrape = Instant::now();
|
||||
}
|
||||
if should_sync_rmp {
|
||||
last_rmp_sync = Instant::now();
|
||||
}
|
||||
|
||||
current_work = Some((work_handle, cancel_token));
|
||||
next_run = time::Instant::now() + work_interval;
|
||||
}
|
||||
@@ -94,18 +151,17 @@ impl Scheduler {
|
||||
|
||||
/// Core scheduling logic that analyzes data and creates scrape jobs.
|
||||
///
|
||||
/// Strategy:
|
||||
/// 1. Fetch all subjects for the current term from Banner API
|
||||
/// 2. Query existing jobs in a single batch query
|
||||
/// 3. Create jobs only for subjects that don't have pending jobs
|
||||
/// Uses adaptive scheduling to determine per-subject scrape intervals based
|
||||
/// on recent change rates, failure patterns, and time of day. Only subjects
|
||||
/// that are eligible (i.e. their cooldown has elapsed) are enqueued.
|
||||
///
|
||||
/// This is a static method (not &self) to allow it to be called from spawned tasks.
|
||||
#[tracing::instrument(skip_all, fields(term))]
|
||||
async fn schedule_jobs_impl(db_pool: &PgPool, banner_api: &BannerApi) -> Result<()> {
|
||||
// For now, we will implement a simple baseline scheduling strategy:
|
||||
// 1. Get a list of all subjects from the Banner API.
|
||||
// 2. Query existing jobs for all subjects in a single query.
|
||||
// 3. Create new jobs only for subjects that don't have existing jobs.
|
||||
async fn schedule_jobs_impl(
|
||||
db_pool: &PgPool,
|
||||
banner_api: &BannerApi,
|
||||
job_events_tx: Option<&broadcast::Sender<ScrapeJobEvent>>,
|
||||
) -> Result<()> {
|
||||
let term = Term::get_current().inner().to_string();
|
||||
|
||||
tracing::Span::current().record("term", term.as_str());
|
||||
@@ -117,13 +173,70 @@ impl Scheduler {
|
||||
"Retrieved subjects from API"
|
||||
);
|
||||
|
||||
// Create payloads for all subjects
|
||||
let subject_payloads: Vec<_> = subjects
|
||||
.iter()
|
||||
.map(|subject| json!({ "subject": subject.code }))
|
||||
// Fetch per-subject stats and build a lookup map
|
||||
let stats_rows = scrape_jobs::fetch_subject_stats(db_pool).await?;
|
||||
let stats_map: HashMap<String, SubjectStats> = stats_rows
|
||||
.into_iter()
|
||||
.map(|row| {
|
||||
let subject = row.subject.clone();
|
||||
(subject, SubjectStats::from(row))
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Query existing jobs for all subjects in a single query
|
||||
// Evaluate each subject using adaptive scheduling
|
||||
let now = Utc::now();
|
||||
let is_past_term = false; // Scheduler currently only fetches current term subjects
|
||||
let mut eligible_subjects: Vec<String> = Vec::new();
|
||||
let mut cooldown_count: usize = 0;
|
||||
let mut paused_count: usize = 0;
|
||||
let mut read_only_count: usize = 0;
|
||||
|
||||
for subject in &subjects {
|
||||
let stats = stats_map.get(&subject.code).cloned().unwrap_or_else(|| {
|
||||
// Cold start: no history for this subject
|
||||
SubjectStats {
|
||||
subject: subject.code.clone(),
|
||||
recent_runs: 0,
|
||||
avg_change_ratio: 0.0,
|
||||
consecutive_zero_changes: 0,
|
||||
consecutive_empty_fetches: 0,
|
||||
recent_failure_count: 0,
|
||||
recent_success_count: 0,
|
||||
last_completed: DateTime::<Utc>::MIN_UTC,
|
||||
}
|
||||
});
|
||||
|
||||
match evaluate_subject(&stats, now, is_past_term) {
|
||||
SubjectSchedule::Eligible(_) => {
|
||||
eligible_subjects.push(subject.code.clone());
|
||||
}
|
||||
SubjectSchedule::Cooldown(_) => cooldown_count += 1,
|
||||
SubjectSchedule::Paused => paused_count += 1,
|
||||
SubjectSchedule::ReadOnly => read_only_count += 1,
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
total = subjects.len(),
|
||||
eligible = eligible_subjects.len(),
|
||||
cooldown = cooldown_count,
|
||||
paused = paused_count,
|
||||
read_only = read_only_count,
|
||||
"Adaptive scheduling decisions"
|
||||
);
|
||||
|
||||
if eligible_subjects.is_empty() {
|
||||
debug!("No eligible subjects to schedule");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Create payloads only for eligible subjects
|
||||
let subject_payloads: Vec<_> = eligible_subjects
|
||||
.iter()
|
||||
.map(|code| json!({ "subject": code }))
|
||||
.collect();
|
||||
|
||||
// Query existing jobs for eligible subjects only
|
||||
let existing_payloads = scrape_jobs::find_existing_job_payloads(
|
||||
TargetType::Subject,
|
||||
&subject_payloads,
|
||||
@@ -131,12 +244,12 @@ impl Scheduler {
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Filter out subjects that already have jobs and prepare new jobs
|
||||
// Filter out subjects that already have pending jobs
|
||||
let mut skipped_count = 0;
|
||||
let new_jobs: Vec<_> = subjects
|
||||
let new_jobs: Vec<_> = eligible_subjects
|
||||
.into_iter()
|
||||
.filter_map(|subject| {
|
||||
let job = SubjectJob::new(subject.code.clone());
|
||||
.filter_map(|subject_code| {
|
||||
let job = SubjectJob::new(subject_code.clone());
|
||||
let payload = serde_json::to_value(&job).unwrap();
|
||||
let payload_str = payload.to_string();
|
||||
|
||||
@@ -144,7 +257,7 @@ impl Scheduler {
|
||||
skipped_count += 1;
|
||||
None
|
||||
} else {
|
||||
Some((payload, subject.code))
|
||||
Some((payload, subject_code))
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
@@ -164,10 +277,150 @@ impl Scheduler {
|
||||
.map(|(payload, _)| (payload, TargetType::Subject, ScrapePriority::Low))
|
||||
.collect();
|
||||
|
||||
scrape_jobs::batch_insert_jobs(&jobs, db_pool).await?;
|
||||
let inserted = scrape_jobs::batch_insert_jobs(&jobs, db_pool).await?;
|
||||
|
||||
if let Some(tx) = job_events_tx {
|
||||
inserted.iter().for_each(|job| {
|
||||
debug!(job_id = job.id, "Emitting JobCreated event");
|
||||
let _ = tx.send(ScrapeJobEvent::JobCreated {
|
||||
job: ScrapeJobDto::from(job),
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
debug!("Job scheduling complete");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Fetch all RMP professors, upsert to DB, and auto-match against Banner instructors.
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn sync_rmp_data(db_pool: &PgPool) -> Result<()> {
|
||||
info!("Starting RMP data sync");
|
||||
|
||||
let client = RmpClient::new();
|
||||
let professors = client.fetch_all_professors().await?;
|
||||
let total = professors.len();
|
||||
|
||||
crate::data::rmp::batch_upsert_rmp_professors(&professors, db_pool).await?;
|
||||
info!(total, "RMP professors upserted");
|
||||
|
||||
let stats = crate::data::rmp_matching::generate_candidates(db_pool).await?;
|
||||
info!(
|
||||
total,
|
||||
stats.total_unmatched,
|
||||
stats.candidates_created,
|
||||
stats.candidates_rescored,
|
||||
stats.auto_matched,
|
||||
stats.skipped_unparseable,
|
||||
stats.skipped_no_candidates,
|
||||
"RMP sync complete"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Scrape all reference data categories from Banner and upsert to DB, then refresh cache.
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn scrape_reference_data(
|
||||
db_pool: &PgPool,
|
||||
banner_api: &BannerApi,
|
||||
reference_cache: &Arc<RwLock<ReferenceCache>>,
|
||||
) -> Result<()> {
|
||||
let term = Term::get_current().inner().to_string();
|
||||
info!(term = %term, "Scraping reference data");
|
||||
|
||||
let mut all_entries = Vec::new();
|
||||
|
||||
// Terms (fetched via session pool, no active session needed)
|
||||
match banner_api.sessions.get_terms("", 1, 500).await {
|
||||
Ok(terms) => {
|
||||
debug!(count = terms.len(), "Fetched terms");
|
||||
all_entries.extend(terms.into_iter().map(|t| ReferenceData {
|
||||
category: "term".to_string(),
|
||||
code: t.code,
|
||||
description: t.description,
|
||||
}));
|
||||
}
|
||||
Err(e) => warn!(error = ?e, "Failed to fetch terms"),
|
||||
}
|
||||
|
||||
// Subjects
|
||||
match banner_api.get_subjects("", &term, 1, 500).await {
|
||||
Ok(pairs) => {
|
||||
debug!(count = pairs.len(), "Fetched subjects");
|
||||
all_entries.extend(pairs.into_iter().map(|p| ReferenceData {
|
||||
category: "subject".to_string(),
|
||||
code: p.code,
|
||||
description: p.description,
|
||||
}));
|
||||
}
|
||||
Err(e) => warn!(error = ?e, "Failed to fetch subjects"),
|
||||
}
|
||||
|
||||
// Campuses
|
||||
match banner_api.get_campuses(&term).await {
|
||||
Ok(pairs) => {
|
||||
debug!(count = pairs.len(), "Fetched campuses");
|
||||
all_entries.extend(pairs.into_iter().map(|p| ReferenceData {
|
||||
category: "campus".to_string(),
|
||||
code: p.code,
|
||||
description: p.description,
|
||||
}));
|
||||
}
|
||||
Err(e) => warn!(error = ?e, "Failed to fetch campuses"),
|
||||
}
|
||||
|
||||
// Instructional methods
|
||||
match banner_api.get_instructional_methods(&term).await {
|
||||
Ok(pairs) => {
|
||||
debug!(count = pairs.len(), "Fetched instructional methods");
|
||||
all_entries.extend(pairs.into_iter().map(|p| ReferenceData {
|
||||
category: "instructional_method".to_string(),
|
||||
code: p.code,
|
||||
description: p.description,
|
||||
}));
|
||||
}
|
||||
Err(e) => warn!(error = ?e, "Failed to fetch instructional methods"),
|
||||
}
|
||||
|
||||
// Parts of term
|
||||
match banner_api.get_parts_of_term(&term).await {
|
||||
Ok(pairs) => {
|
||||
debug!(count = pairs.len(), "Fetched parts of term");
|
||||
all_entries.extend(pairs.into_iter().map(|p| ReferenceData {
|
||||
category: "part_of_term".to_string(),
|
||||
code: p.code,
|
||||
description: p.description,
|
||||
}));
|
||||
}
|
||||
Err(e) => warn!(error = ?e, "Failed to fetch parts of term"),
|
||||
}
|
||||
|
||||
// Attributes
|
||||
match banner_api.get_attributes(&term).await {
|
||||
Ok(pairs) => {
|
||||
debug!(count = pairs.len(), "Fetched attributes");
|
||||
all_entries.extend(pairs.into_iter().map(|p| ReferenceData {
|
||||
category: "attribute".to_string(),
|
||||
code: p.code,
|
||||
description: p.description,
|
||||
}));
|
||||
}
|
||||
Err(e) => warn!(error = ?e, "Failed to fetch attributes"),
|
||||
}
|
||||
|
||||
// Batch upsert all entries
|
||||
let total = all_entries.len();
|
||||
crate::data::reference::batch_upsert(&all_entries, db_pool).await?;
|
||||
info!(total_entries = total, "Reference data upserted to DB");
|
||||
|
||||
// Refresh in-memory cache
|
||||
let all = crate::data::reference::get_all(db_pool).await?;
|
||||
let count = all.len();
|
||||
*reference_cache.write().await = ReferenceCache::from_entries(all);
|
||||
info!(entries = count, "Reference cache refreshed");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
+178
-18
@@ -1,8 +1,10 @@
|
||||
use crate::banner::{BannerApi, BannerApiError};
|
||||
use crate::data::models::ScrapeJob;
|
||||
use crate::data::models::{ScrapeJob, ScrapeJobStatus, UpsertCounts};
|
||||
use crate::data::scrape_jobs;
|
||||
use crate::error::Result;
|
||||
use crate::scraper::jobs::{JobError, JobType};
|
||||
use crate::web::ws::ScrapeJobEvent;
|
||||
use chrono::{DateTime, Utc};
|
||||
use sqlx::PgPool;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
@@ -10,6 +12,9 @@ use tokio::sync::broadcast;
|
||||
use tokio::time;
|
||||
use tracing::{Instrument, debug, error, info, trace, warn};
|
||||
|
||||
/// Maximum time a single job is allowed to run before being considered stuck.
|
||||
const JOB_TIMEOUT: Duration = Duration::from_secs(5 * 60);
|
||||
|
||||
/// A single worker instance.
|
||||
///
|
||||
/// Each worker runs in its own asynchronous task and continuously polls the
|
||||
@@ -18,14 +23,21 @@ pub struct Worker {
|
||||
id: usize, // For logging purposes
|
||||
db_pool: PgPool,
|
||||
banner_api: Arc<BannerApi>,
|
||||
job_events_tx: broadcast::Sender<ScrapeJobEvent>,
|
||||
}
|
||||
|
||||
impl Worker {
|
||||
pub fn new(id: usize, db_pool: PgPool, banner_api: Arc<BannerApi>) -> Self {
|
||||
pub fn new(
|
||||
id: usize,
|
||||
db_pool: PgPool,
|
||||
banner_api: Arc<BannerApi>,
|
||||
job_events_tx: broadcast::Sender<ScrapeJobEvent>,
|
||||
) -> Self {
|
||||
Self {
|
||||
id,
|
||||
db_pool,
|
||||
banner_api,
|
||||
job_events_tx,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -60,22 +72,57 @@ impl Worker {
|
||||
let job_id = job.id;
|
||||
let retry_count = job.retry_count;
|
||||
let max_retries = job.max_retries;
|
||||
let target_type = job.target_type;
|
||||
let payload = job.target_payload.clone();
|
||||
let priority = job.priority;
|
||||
let queued_at = job.queued_at;
|
||||
let started_at = Utc::now();
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
// Process the job, racing against shutdown signal
|
||||
// Emit JobLocked event
|
||||
let locked_at = started_at.to_rfc3339();
|
||||
debug!(job_id, "Emitting JobLocked event");
|
||||
let _ = self.job_events_tx.send(ScrapeJobEvent::JobLocked {
|
||||
id: job_id,
|
||||
locked_at,
|
||||
status: ScrapeJobStatus::Processing,
|
||||
});
|
||||
|
||||
// Process the job, racing against shutdown signal and timeout
|
||||
let process_result = tokio::select! {
|
||||
_ = shutdown_rx.recv() => {
|
||||
self.handle_shutdown_during_processing(job_id).await;
|
||||
break;
|
||||
}
|
||||
result = self.process_job(job) => result
|
||||
result = async {
|
||||
match time::timeout(JOB_TIMEOUT, self.process_job(job)).await {
|
||||
Ok(result) => result,
|
||||
Err(_elapsed) => {
|
||||
Err(JobError::Recoverable(anyhow::anyhow!(
|
||||
"job timed out after {}s",
|
||||
JOB_TIMEOUT.as_secs()
|
||||
)))
|
||||
}
|
||||
}
|
||||
} => result
|
||||
};
|
||||
|
||||
let duration = start.elapsed();
|
||||
|
||||
// Handle the job processing result
|
||||
self.handle_job_result(job_id, retry_count, max_retries, process_result, duration)
|
||||
.await;
|
||||
self.handle_job_result(
|
||||
job_id,
|
||||
retry_count,
|
||||
max_retries,
|
||||
process_result,
|
||||
duration,
|
||||
target_type,
|
||||
payload,
|
||||
priority,
|
||||
queued_at,
|
||||
started_at,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,7 +134,7 @@ impl Worker {
|
||||
scrape_jobs::fetch_and_lock_job(&self.db_pool).await
|
||||
}
|
||||
|
||||
async fn process_job(&self, job: ScrapeJob) -> Result<(), JobError> {
|
||||
async fn process_job(&self, job: ScrapeJob) -> Result<UpsertCounts, JobError> {
|
||||
// Convert the database job to our job type
|
||||
let job_type = JobType::from_target_type_and_payload(job.target_type, job.target_payload)
|
||||
.map_err(|e| JobError::Unrecoverable(anyhow::anyhow!(e)))?; // Parse errors are unrecoverable
|
||||
@@ -114,9 +161,7 @@ impl Worker {
|
||||
job_impl
|
||||
.process(&self.banner_api, &self.db_pool)
|
||||
.await
|
||||
.map_err(JobError::Recoverable)?;
|
||||
|
||||
Ok(())
|
||||
.map_err(JobError::Recoverable)
|
||||
}
|
||||
.instrument(span)
|
||||
.await
|
||||
@@ -130,7 +175,11 @@ impl Worker {
|
||||
scrape_jobs::unlock_job(job_id, &self.db_pool).await
|
||||
}
|
||||
|
||||
async fn unlock_and_increment_retry(&self, job_id: i32, max_retries: i32) -> Result<bool> {
|
||||
async fn unlock_and_increment_retry(
|
||||
&self,
|
||||
job_id: i32,
|
||||
max_retries: i32,
|
||||
) -> Result<Option<chrono::DateTime<chrono::Utc>>> {
|
||||
scrape_jobs::unlock_and_increment_retry(job_id, max_retries, &self.db_pool).await
|
||||
}
|
||||
|
||||
@@ -156,31 +205,97 @@ impl Worker {
|
||||
}
|
||||
|
||||
/// Handle the result of job processing
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn handle_job_result(
|
||||
&self,
|
||||
job_id: i32,
|
||||
retry_count: i32,
|
||||
max_retries: i32,
|
||||
result: Result<(), JobError>,
|
||||
result: Result<UpsertCounts, JobError>,
|
||||
duration: std::time::Duration,
|
||||
target_type: crate::data::models::TargetType,
|
||||
payload: serde_json::Value,
|
||||
priority: crate::data::models::ScrapePriority,
|
||||
queued_at: DateTime<Utc>,
|
||||
started_at: DateTime<Utc>,
|
||||
) {
|
||||
let duration_ms = duration.as_millis() as i32;
|
||||
|
||||
match result {
|
||||
Ok(()) => {
|
||||
Ok(counts) => {
|
||||
debug!(
|
||||
worker_id = self.id,
|
||||
job_id,
|
||||
duration_ms = duration.as_millis(),
|
||||
courses_fetched = counts.courses_fetched,
|
||||
courses_changed = counts.courses_changed,
|
||||
courses_unchanged = counts.courses_unchanged,
|
||||
"Job completed successfully"
|
||||
);
|
||||
|
||||
// Log the result
|
||||
if let Err(e) = scrape_jobs::insert_job_result(
|
||||
target_type,
|
||||
payload,
|
||||
priority,
|
||||
queued_at,
|
||||
started_at,
|
||||
duration_ms,
|
||||
true,
|
||||
None,
|
||||
retry_count,
|
||||
Some(&counts),
|
||||
&self.db_pool,
|
||||
)
|
||||
.await
|
||||
{
|
||||
error!(worker_id = self.id, job_id, error = ?e, "Failed to insert job result");
|
||||
}
|
||||
|
||||
if let Err(e) = self.delete_job(job_id).await {
|
||||
error!(worker_id = self.id, job_id, error = ?e, "Failed to delete completed job");
|
||||
}
|
||||
debug!(job_id, "Emitting JobCompleted event");
|
||||
let _ = self
|
||||
.job_events_tx
|
||||
.send(ScrapeJobEvent::JobCompleted { id: job_id });
|
||||
}
|
||||
Err(JobError::Recoverable(e)) => {
|
||||
self.handle_recoverable_error(job_id, retry_count, max_retries, e, duration)
|
||||
.await;
|
||||
self.handle_recoverable_error(
|
||||
job_id,
|
||||
retry_count,
|
||||
max_retries,
|
||||
e,
|
||||
duration,
|
||||
target_type,
|
||||
payload,
|
||||
priority,
|
||||
queued_at,
|
||||
started_at,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
Err(JobError::Unrecoverable(e)) => {
|
||||
// Log the failed result
|
||||
let err_msg = format!("{e:#}");
|
||||
if let Err(log_err) = scrape_jobs::insert_job_result(
|
||||
target_type,
|
||||
payload,
|
||||
priority,
|
||||
queued_at,
|
||||
started_at,
|
||||
duration_ms,
|
||||
false,
|
||||
Some(&err_msg),
|
||||
retry_count,
|
||||
None,
|
||||
&self.db_pool,
|
||||
)
|
||||
.await
|
||||
{
|
||||
error!(worker_id = self.id, job_id, error = ?log_err, "Failed to insert job result");
|
||||
}
|
||||
|
||||
error!(
|
||||
worker_id = self.id,
|
||||
job_id,
|
||||
@@ -191,11 +306,16 @@ impl Worker {
|
||||
if let Err(e) = self.delete_job(job_id).await {
|
||||
error!(worker_id = self.id, job_id, error = ?e, "Failed to delete corrupted job");
|
||||
}
|
||||
debug!(job_id, "Emitting JobDeleted event");
|
||||
let _ = self
|
||||
.job_events_tx
|
||||
.send(ScrapeJobEvent::JobDeleted { id: job_id });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle recoverable errors by logging appropriately and unlocking the job
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn handle_recoverable_error(
|
||||
&self,
|
||||
job_id: i32,
|
||||
@@ -203,6 +323,11 @@ impl Worker {
|
||||
max_retries: i32,
|
||||
e: anyhow::Error,
|
||||
duration: std::time::Duration,
|
||||
target_type: crate::data::models::TargetType,
|
||||
payload: serde_json::Value,
|
||||
priority: crate::data::models::ScrapePriority,
|
||||
queued_at: DateTime<Utc>,
|
||||
started_at: DateTime<Utc>,
|
||||
) {
|
||||
let next_attempt = retry_count.saturating_add(1);
|
||||
let remaining_retries = max_retries.saturating_sub(next_attempt);
|
||||
@@ -233,7 +358,7 @@ impl Worker {
|
||||
|
||||
// Atomically unlock and increment retry count, checking if retry is allowed
|
||||
match self.unlock_and_increment_retry(job_id, max_retries).await {
|
||||
Ok(can_retry) if can_retry => {
|
||||
Ok(Some(new_queued_at)) => {
|
||||
debug!(
|
||||
worker_id = self.id,
|
||||
job_id,
|
||||
@@ -241,9 +366,37 @@ impl Worker {
|
||||
remaining_retries = remaining_retries,
|
||||
"Job unlocked for retry"
|
||||
);
|
||||
debug!(job_id, "Emitting JobRetried event");
|
||||
let _ = self.job_events_tx.send(ScrapeJobEvent::JobRetried {
|
||||
id: job_id,
|
||||
retry_count: next_attempt,
|
||||
queued_at: new_queued_at.to_rfc3339(),
|
||||
status: ScrapeJobStatus::Pending,
|
||||
});
|
||||
// Don't log a result yet — the job will be retried
|
||||
}
|
||||
Ok(_) => {
|
||||
// Max retries exceeded (detected atomically)
|
||||
Ok(None) => {
|
||||
// Max retries exceeded — log final failure result
|
||||
let duration_ms = duration.as_millis() as i32;
|
||||
let err_msg = format!("{e:#}");
|
||||
if let Err(log_err) = scrape_jobs::insert_job_result(
|
||||
target_type,
|
||||
payload,
|
||||
priority,
|
||||
queued_at,
|
||||
started_at,
|
||||
duration_ms,
|
||||
false,
|
||||
Some(&err_msg),
|
||||
next_attempt,
|
||||
None,
|
||||
&self.db_pool,
|
||||
)
|
||||
.await
|
||||
{
|
||||
error!(worker_id = self.id, job_id, error = ?log_err, "Failed to insert job result");
|
||||
}
|
||||
|
||||
error!(
|
||||
worker_id = self.id,
|
||||
job_id,
|
||||
@@ -256,6 +409,13 @@ impl Worker {
|
||||
if let Err(e) = self.delete_job(job_id).await {
|
||||
error!(worker_id = self.id, job_id, error = ?e, "Failed to delete failed job");
|
||||
}
|
||||
debug!(job_id, "Emitting JobExhausted and JobDeleted events");
|
||||
let _ = self
|
||||
.job_events_tx
|
||||
.send(ScrapeJobEvent::JobExhausted { id: job_id });
|
||||
let _ = self
|
||||
.job_events_tx
|
||||
.send(ScrapeJobEvent::JobDeleted { id: job_id });
|
||||
}
|
||||
Err(e) => {
|
||||
error!(worker_id = self.id, job_id, error = ?e, "Failed to unlock and increment retry count");
|
||||
|
||||
+43
-7
@@ -1,6 +1,7 @@
|
||||
use super::Service;
|
||||
use crate::state::AppState;
|
||||
use crate::status::ServiceStatus;
|
||||
use crate::web::auth::AuthConfig;
|
||||
use crate::web::create_router;
|
||||
use std::net::SocketAddr;
|
||||
use tokio::net::TcpListener;
|
||||
@@ -11,22 +12,21 @@ use tracing::{info, trace, warn};
|
||||
pub struct WebService {
|
||||
port: u16,
|
||||
app_state: AppState,
|
||||
auth_config: AuthConfig,
|
||||
shutdown_tx: Option<broadcast::Sender<()>>,
|
||||
}
|
||||
|
||||
impl WebService {
|
||||
pub fn new(port: u16, app_state: AppState) -> Self {
|
||||
pub fn new(port: u16, app_state: AppState, auth_config: AuthConfig) -> Self {
|
||||
Self {
|
||||
port,
|
||||
app_state,
|
||||
auth_config,
|
||||
shutdown_tx: None,
|
||||
}
|
||||
}
|
||||
/// Periodically pings the database and updates the "database" service status.
|
||||
async fn db_health_check_loop(
|
||||
state: AppState,
|
||||
mut shutdown_rx: broadcast::Receiver<()>,
|
||||
) {
|
||||
async fn db_health_check_loop(state: AppState, mut shutdown_rx: broadcast::Receiver<()>) {
|
||||
use std::time::Duration;
|
||||
let mut interval = tokio::time::interval(Duration::from_secs(30));
|
||||
|
||||
@@ -51,6 +51,33 @@ impl WebService {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Periodically cleans up expired sessions from the database and in-memory cache.
|
||||
async fn session_cleanup_loop(state: AppState, mut shutdown_rx: broadcast::Receiver<()>) {
|
||||
use std::time::Duration;
|
||||
// Run every hour
|
||||
let mut interval = tokio::time::interval(Duration::from_secs(3600));
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = interval.tick() => {
|
||||
match state.session_cache.cleanup_expired().await {
|
||||
Ok(deleted) => {
|
||||
if deleted > 0 {
|
||||
info!(deleted, "cleaned up expired sessions");
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(error = %e, "session cleanup failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
_ = shutdown_rx.recv() => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -61,12 +88,14 @@ impl Service for WebService {
|
||||
|
||||
async fn run(&mut self) -> Result<(), anyhow::Error> {
|
||||
// Create the main router with Banner API routes
|
||||
let app = create_router(self.app_state.clone());
|
||||
let app = create_router(self.app_state.clone(), self.auth_config.clone());
|
||||
|
||||
let addr = SocketAddr::from(([0, 0, 0, 0], self.port));
|
||||
|
||||
let listener = TcpListener::bind(addr).await?;
|
||||
self.app_state.service_statuses.set("web", ServiceStatus::Active);
|
||||
self.app_state
|
||||
.service_statuses
|
||||
.set("web", ServiceStatus::Active);
|
||||
info!(
|
||||
service = "web",
|
||||
address = %addr,
|
||||
@@ -85,6 +114,13 @@ impl Service for WebService {
|
||||
Self::db_health_check_loop(health_state, health_shutdown_rx).await;
|
||||
});
|
||||
|
||||
// Spawn session cleanup task
|
||||
let cleanup_state = self.app_state.clone();
|
||||
let cleanup_shutdown_rx = shutdown_tx.subscribe();
|
||||
tokio::spawn(async move {
|
||||
Self::session_cleanup_loop(cleanup_state, cleanup_shutdown_rx).await;
|
||||
});
|
||||
|
||||
// Use axum's graceful shutdown with the internal shutdown signal
|
||||
axum::serve(listener, app)
|
||||
.with_graceful_shutdown(async move {
|
||||
|
||||
@@ -2,27 +2,120 @@
|
||||
|
||||
use crate::banner::BannerApi;
|
||||
use crate::banner::Course;
|
||||
use crate::data::models::ReferenceData;
|
||||
use crate::status::ServiceStatusRegistry;
|
||||
use crate::web::schedule_cache::ScheduleCache;
|
||||
use crate::web::session_cache::{OAuthStateStore, SessionCache};
|
||||
use crate::web::ws::ScrapeJobEvent;
|
||||
use anyhow::Result;
|
||||
use dashmap::DashMap;
|
||||
use sqlx::PgPool;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
use tokio::sync::{RwLock, broadcast};
|
||||
|
||||
/// In-memory cache for reference data (code→description lookups).
|
||||
///
|
||||
/// Loaded from the `reference_data` table on startup and refreshed periodically.
|
||||
/// Uses a two-level HashMap so lookups take `&str` without allocating.
|
||||
pub struct ReferenceCache {
|
||||
/// category → (code → description)
|
||||
data: HashMap<String, HashMap<String, String>>,
|
||||
}
|
||||
|
||||
impl Default for ReferenceCache {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl ReferenceCache {
|
||||
/// Create an empty cache.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
data: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Build cache from a list of reference data entries.
|
||||
pub fn from_entries(entries: Vec<ReferenceData>) -> Self {
|
||||
let mut data: HashMap<String, HashMap<String, String>> = HashMap::new();
|
||||
for e in entries {
|
||||
data.entry(e.category)
|
||||
.or_default()
|
||||
.insert(e.code, e.description);
|
||||
}
|
||||
Self { data }
|
||||
}
|
||||
|
||||
/// Look up a description by category and code. Zero allocations.
|
||||
pub fn lookup(&self, category: &str, code: &str) -> Option<&str> {
|
||||
self.data
|
||||
.get(category)
|
||||
.and_then(|codes| codes.get(code))
|
||||
.map(|s| s.as_str())
|
||||
}
|
||||
|
||||
/// Get all `(code, description)` pairs for a category, sorted by description.
|
||||
pub fn entries_for_category(&self, category: &str) -> Vec<(&str, &str)> {
|
||||
let Some(codes) = self.data.get(category) else {
|
||||
return Vec::new();
|
||||
};
|
||||
let mut entries: Vec<(&str, &str)> = codes
|
||||
.iter()
|
||||
.map(|(code, desc)| (code.as_str(), desc.as_str()))
|
||||
.collect();
|
||||
entries.sort_by(|a, b| a.1.cmp(b.1));
|
||||
entries
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AppState {
|
||||
pub banner_api: Arc<BannerApi>,
|
||||
pub db_pool: PgPool,
|
||||
pub service_statuses: ServiceStatusRegistry,
|
||||
pub reference_cache: Arc<RwLock<ReferenceCache>>,
|
||||
pub session_cache: SessionCache,
|
||||
pub oauth_state_store: OAuthStateStore,
|
||||
pub schedule_cache: ScheduleCache,
|
||||
pub scrape_job_tx: broadcast::Sender<ScrapeJobEvent>,
|
||||
pub search_options_cache: Arc<DashMap<String, (Instant, serde_json::Value)>>,
|
||||
}
|
||||
|
||||
impl AppState {
|
||||
pub fn new(banner_api: Arc<BannerApi>, db_pool: PgPool) -> Self {
|
||||
let (scrape_job_tx, _) = broadcast::channel(64);
|
||||
let schedule_cache = ScheduleCache::new(db_pool.clone());
|
||||
Self {
|
||||
session_cache: SessionCache::new(db_pool.clone()),
|
||||
oauth_state_store: OAuthStateStore::new(),
|
||||
banner_api,
|
||||
db_pool,
|
||||
service_statuses: ServiceStatusRegistry::new(),
|
||||
reference_cache: Arc::new(RwLock::new(ReferenceCache::new())),
|
||||
schedule_cache,
|
||||
scrape_job_tx,
|
||||
search_options_cache: Arc::new(DashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Subscribe to scrape job lifecycle events.
|
||||
pub fn scrape_job_events(&self) -> broadcast::Receiver<ScrapeJobEvent> {
|
||||
self.scrape_job_tx.subscribe()
|
||||
}
|
||||
|
||||
/// Initialize the reference cache from the database.
|
||||
pub async fn load_reference_cache(&self) -> Result<()> {
|
||||
let entries = crate::data::reference::get_all(&self.db_pool).await?;
|
||||
let count = entries.len();
|
||||
let cache = ReferenceCache::from_entries(entries);
|
||||
*self.reference_cache.write().await = cache;
|
||||
tracing::info!(entries = count, "Reference cache loaded");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get a course by CRN directly from Banner API
|
||||
pub async fn get_course_or_fetch(&self, term: &str, crn: &str) -> Result<Course> {
|
||||
self.banner_api
|
||||
|
||||
+6
-1
@@ -3,11 +3,14 @@ use std::time::Instant;
|
||||
|
||||
use dashmap::DashMap;
|
||||
use serde::Serialize;
|
||||
use ts_rs::TS;
|
||||
|
||||
/// Health status of a service.
|
||||
#[derive(Debug, Clone, Serialize, PartialEq)]
|
||||
#[derive(Debug, Clone, Serialize, PartialEq, TS)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
#[ts(export)]
|
||||
pub enum ServiceStatus {
|
||||
#[allow(dead_code)]
|
||||
Starting,
|
||||
Active,
|
||||
Connected,
|
||||
@@ -19,6 +22,7 @@ pub enum ServiceStatus {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StatusEntry {
|
||||
pub status: ServiceStatus,
|
||||
#[allow(dead_code)]
|
||||
pub updated_at: Instant,
|
||||
}
|
||||
|
||||
@@ -46,6 +50,7 @@ impl ServiceStatusRegistry {
|
||||
}
|
||||
|
||||
/// Returns the current status of a named service, if present.
|
||||
#[allow(dead_code)]
|
||||
pub fn get(&self, name: &str) -> Option<ServiceStatus> {
|
||||
self.inner.get(name).map(|entry| entry.status.clone())
|
||||
}
|
||||
|
||||
@@ -0,0 +1,301 @@
|
||||
//! Admin API handlers.
|
||||
//!
|
||||
//! All endpoints require the `AdminUser` extractor, returning 401/403 as needed.
|
||||
|
||||
use axum::extract::{Path, State};
|
||||
use axum::http::{HeaderMap, StatusCode, header};
|
||||
use axum::response::{IntoResponse, Json, Response};
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Value, json};
|
||||
use ts_rs::TS;
|
||||
|
||||
use crate::data::models::User;
|
||||
use crate::state::AppState;
|
||||
use crate::status::ServiceStatus;
|
||||
use crate::web::extractors::AdminUser;
|
||||
use crate::web::ws::ScrapeJobDto;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct ScrapeJobsResponse {
|
||||
pub jobs: Vec<ScrapeJobDto>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct AdminServiceInfo {
|
||||
name: String,
|
||||
status: ServiceStatus,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct AdminStatusResponse {
|
||||
#[ts(type = "number")]
|
||||
user_count: i64,
|
||||
#[ts(type = "number")]
|
||||
session_count: i64,
|
||||
#[ts(type = "number")]
|
||||
course_count: i64,
|
||||
#[ts(type = "number")]
|
||||
scrape_job_count: i64,
|
||||
services: Vec<AdminServiceInfo>,
|
||||
}
|
||||
|
||||
/// `GET /api/admin/status` — Enhanced system status for admins.
|
||||
pub async fn admin_status(
|
||||
AdminUser(_user): AdminUser,
|
||||
State(state): State<AppState>,
|
||||
) -> Result<Json<AdminStatusResponse>, (StatusCode, Json<Value>)> {
|
||||
let (user_count,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM users")
|
||||
.fetch_one(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(error = %e, "failed to count users");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({"error": "failed to count users"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let (session_count,): (i64,) =
|
||||
sqlx::query_as("SELECT COUNT(*) FROM user_sessions WHERE expires_at > now()")
|
||||
.fetch_one(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(error = %e, "failed to count sessions");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({"error": "failed to count sessions"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let course_count = state.get_course_count().await.map_err(|e| {
|
||||
tracing::error!(error = %e, "failed to count courses");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({"error": "failed to count courses"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let (scrape_job_count,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM scrape_jobs")
|
||||
.fetch_one(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(error = %e, "failed to count scrape jobs");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({"error": "failed to count scrape jobs"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let services: Vec<AdminServiceInfo> = state
|
||||
.service_statuses
|
||||
.all()
|
||||
.into_iter()
|
||||
.map(|(name, status)| AdminServiceInfo { name, status })
|
||||
.collect();
|
||||
|
||||
Ok(Json(AdminStatusResponse {
|
||||
user_count,
|
||||
session_count,
|
||||
course_count,
|
||||
scrape_job_count,
|
||||
services,
|
||||
}))
|
||||
}
|
||||
|
||||
/// `GET /api/admin/users` — List all users.
|
||||
pub async fn list_users(
|
||||
AdminUser(_user): AdminUser,
|
||||
State(state): State<AppState>,
|
||||
) -> Result<Json<Vec<User>>, (StatusCode, Json<Value>)> {
|
||||
let users = crate::data::users::list_users(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(error = %e, "failed to list users");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({"error": "failed to list users"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(Json(users))
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct SetAdminBody {
|
||||
is_admin: bool,
|
||||
}
|
||||
|
||||
/// `PUT /api/admin/users/{discord_id}/admin` — Set admin status for a user.
|
||||
pub async fn set_user_admin(
|
||||
AdminUser(_user): AdminUser,
|
||||
State(state): State<AppState>,
|
||||
Path(discord_id): Path<i64>,
|
||||
Json(body): Json<SetAdminBody>,
|
||||
) -> Result<Json<User>, (StatusCode, Json<Value>)> {
|
||||
let user = crate::data::users::set_admin(&state.db_pool, discord_id, body.is_admin)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(error = %e, "failed to set admin status");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({"error": "failed to set admin status"})),
|
||||
)
|
||||
})?
|
||||
.ok_or_else(|| {
|
||||
(
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(json!({"error": "user not found"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
state.session_cache.evict_user(discord_id);
|
||||
|
||||
Ok(Json(user))
|
||||
}
|
||||
|
||||
/// `GET /api/admin/scrape-jobs` — List scrape jobs.
|
||||
pub async fn list_scrape_jobs(
|
||||
AdminUser(_user): AdminUser,
|
||||
State(state): State<AppState>,
|
||||
) -> Result<Json<ScrapeJobsResponse>, (StatusCode, Json<Value>)> {
|
||||
let rows = sqlx::query_as::<_, crate::data::models::ScrapeJob>(
|
||||
"SELECT * FROM scrape_jobs ORDER BY priority DESC, execute_at ASC LIMIT 100",
|
||||
)
|
||||
.fetch_all(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(error = %e, "failed to list scrape jobs");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({"error": "failed to list scrape jobs"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let jobs: Vec<ScrapeJobDto> = rows.iter().map(ScrapeJobDto::from).collect();
|
||||
|
||||
Ok(Json(ScrapeJobsResponse { jobs }))
|
||||
}
|
||||
|
||||
/// Row returned by the audit-log query (audit + joined course fields).
|
||||
#[derive(sqlx::FromRow, Debug)]
|
||||
struct AuditRow {
|
||||
id: i32,
|
||||
course_id: i32,
|
||||
timestamp: chrono::DateTime<chrono::Utc>,
|
||||
field_changed: String,
|
||||
old_value: String,
|
||||
new_value: String,
|
||||
// Joined from courses table (nullable in case the course was deleted)
|
||||
subject: Option<String>,
|
||||
course_number: Option<String>,
|
||||
crn: Option<String>,
|
||||
title: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct AuditLogEntry {
|
||||
pub id: i32,
|
||||
pub course_id: i32,
|
||||
pub timestamp: String,
|
||||
pub field_changed: String,
|
||||
pub old_value: String,
|
||||
pub new_value: String,
|
||||
pub subject: Option<String>,
|
||||
pub course_number: Option<String>,
|
||||
pub crn: Option<String>,
|
||||
pub course_title: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct AuditLogResponse {
|
||||
pub entries: Vec<AuditLogEntry>,
|
||||
}
|
||||
|
||||
/// Format a `DateTime<Utc>` as an HTTP-date (RFC 2822) for Last-Modified headers.
|
||||
fn to_http_date(dt: &DateTime<Utc>) -> String {
|
||||
dt.format("%a, %d %b %Y %H:%M:%S GMT").to_string()
|
||||
}
|
||||
|
||||
/// Parse an `If-Modified-Since` header value into a `DateTime<Utc>`.
|
||||
fn parse_if_modified_since(headers: &HeaderMap) -> Option<DateTime<Utc>> {
|
||||
let val = headers.get(header::IF_MODIFIED_SINCE)?.to_str().ok()?;
|
||||
DateTime::parse_from_rfc2822(val)
|
||||
.ok()
|
||||
.map(|dt| dt.with_timezone(&Utc))
|
||||
}
|
||||
|
||||
/// `GET /api/admin/audit-log` — List recent audit entries.
|
||||
///
|
||||
/// Supports `If-Modified-Since`: returns 304 when the newest entry hasn't changed.
|
||||
pub async fn list_audit_log(
|
||||
AdminUser(_user): AdminUser,
|
||||
headers: HeaderMap,
|
||||
State(state): State<AppState>,
|
||||
) -> Result<Response, (StatusCode, Json<Value>)> {
|
||||
let rows = sqlx::query_as::<_, AuditRow>(
|
||||
"SELECT a.id, a.course_id, a.timestamp, a.field_changed, a.old_value, a.new_value, \
|
||||
c.subject, c.course_number, c.crn, c.title \
|
||||
FROM course_audits a \
|
||||
LEFT JOIN courses c ON c.id = a.course_id \
|
||||
ORDER BY a.timestamp DESC LIMIT 200",
|
||||
)
|
||||
.fetch_all(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(error = %e, "failed to list audit log");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({"error": "failed to list audit log"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
// Determine the latest timestamp across all rows (query is DESC so first row is newest)
|
||||
let latest = rows.first().map(|r| r.timestamp);
|
||||
|
||||
// If the client sent If-Modified-Since and our data hasn't changed, return 304
|
||||
if let (Some(since), Some(latest_ts)) = (parse_if_modified_since(&headers), latest) {
|
||||
// Truncate to seconds for comparison (HTTP dates have second precision)
|
||||
if latest_ts.timestamp() <= since.timestamp() {
|
||||
let mut resp = StatusCode::NOT_MODIFIED.into_response();
|
||||
if let Ok(val) = to_http_date(&latest_ts).parse() {
|
||||
resp.headers_mut().insert(header::LAST_MODIFIED, val);
|
||||
}
|
||||
return Ok(resp);
|
||||
}
|
||||
}
|
||||
|
||||
let entries: Vec<AuditLogEntry> = rows
|
||||
.iter()
|
||||
.map(|a| AuditLogEntry {
|
||||
id: a.id,
|
||||
course_id: a.course_id,
|
||||
timestamp: a.timestamp.to_rfc3339(),
|
||||
field_changed: a.field_changed.clone(),
|
||||
old_value: a.old_value.clone(),
|
||||
new_value: a.new_value.clone(),
|
||||
subject: a.subject.clone(),
|
||||
course_number: a.course_number.clone(),
|
||||
crn: a.crn.clone(),
|
||||
course_title: a.title.clone(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut resp = Json(AuditLogResponse { entries }).into_response();
|
||||
if let Some(latest_ts) = latest
|
||||
&& let Ok(val) = to_http_date(&latest_ts).parse()
|
||||
{
|
||||
resp.headers_mut().insert(header::LAST_MODIFIED, val);
|
||||
}
|
||||
Ok(resp)
|
||||
}
|
||||
@@ -0,0 +1,836 @@
|
||||
//! Admin API handlers for RMP instructor matching management.
|
||||
|
||||
use axum::extract::{Path, Query, State};
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::Json;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Value, json};
|
||||
use ts_rs::TS;
|
||||
|
||||
use crate::state::AppState;
|
||||
use crate::web::extractors::AdminUser;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Query / body types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Deserialize, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct ListInstructorsParams {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub status: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub search: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub page: Option<i32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub per_page: Option<i32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub sort: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct MatchBody {
|
||||
pub rmp_legacy_id: i32,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct RejectCandidateBody {
|
||||
pub rmp_legacy_id: i32,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Response types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Simple acknowledgement response for mutating operations.
|
||||
#[derive(Debug, Clone, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct OkResponse {
|
||||
pub ok: bool,
|
||||
}
|
||||
|
||||
/// A top-candidate summary shown in the instructor list view.
|
||||
#[derive(Debug, Clone, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct TopCandidateResponse {
|
||||
pub rmp_legacy_id: i32,
|
||||
pub score: Option<f32>,
|
||||
#[ts(as = "Option<std::collections::HashMap<String, f32>>")]
|
||||
pub score_breakdown: Option<serde_json::Value>,
|
||||
pub first_name: Option<String>,
|
||||
pub last_name: Option<String>,
|
||||
pub department: Option<String>,
|
||||
pub avg_rating: Option<f32>,
|
||||
pub num_ratings: Option<i32>,
|
||||
}
|
||||
|
||||
/// An instructor row in the paginated list.
|
||||
#[derive(Debug, Clone, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct InstructorListItem {
|
||||
pub id: i32,
|
||||
pub display_name: String,
|
||||
pub email: String,
|
||||
pub rmp_match_status: String,
|
||||
#[ts(as = "i32")]
|
||||
pub rmp_link_count: i64,
|
||||
#[ts(as = "i32")]
|
||||
pub candidate_count: i64,
|
||||
#[ts(as = "i32")]
|
||||
pub course_subject_count: i64,
|
||||
pub top_candidate: Option<TopCandidateResponse>,
|
||||
}
|
||||
|
||||
/// Aggregate status counts for the instructor list.
|
||||
#[derive(Debug, Clone, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct InstructorStats {
|
||||
#[ts(as = "i32")]
|
||||
pub total: i64,
|
||||
#[ts(as = "i32")]
|
||||
pub unmatched: i64,
|
||||
#[ts(as = "i32")]
|
||||
pub auto: i64,
|
||||
#[ts(as = "i32")]
|
||||
pub confirmed: i64,
|
||||
#[ts(as = "i32")]
|
||||
pub rejected: i64,
|
||||
#[ts(as = "i32")]
|
||||
pub with_candidates: i64,
|
||||
}
|
||||
|
||||
/// Response for `GET /api/admin/instructors`.
|
||||
#[derive(Debug, Clone, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct ListInstructorsResponse {
|
||||
pub instructors: Vec<InstructorListItem>,
|
||||
#[ts(as = "i32")]
|
||||
pub total: i64,
|
||||
pub page: i32,
|
||||
pub per_page: i32,
|
||||
pub stats: InstructorStats,
|
||||
}
|
||||
|
||||
/// Instructor summary in the detail view.
|
||||
#[derive(Debug, Clone, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct InstructorDetail {
|
||||
pub id: i32,
|
||||
pub display_name: String,
|
||||
pub email: String,
|
||||
pub rmp_match_status: String,
|
||||
pub subjects_taught: Vec<String>,
|
||||
#[ts(as = "i32")]
|
||||
pub course_count: i64,
|
||||
}
|
||||
|
||||
/// A linked RMP profile in the detail view.
|
||||
#[derive(Debug, Clone, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct LinkedRmpProfile {
|
||||
pub link_id: i32,
|
||||
pub legacy_id: i32,
|
||||
pub first_name: Option<String>,
|
||||
pub last_name: Option<String>,
|
||||
pub department: Option<String>,
|
||||
pub avg_rating: Option<f32>,
|
||||
pub avg_difficulty: Option<f32>,
|
||||
pub num_ratings: Option<i32>,
|
||||
pub would_take_again_pct: Option<f32>,
|
||||
}
|
||||
|
||||
/// A match candidate in the detail view.
|
||||
#[derive(Debug, Clone, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct CandidateResponse {
|
||||
pub id: i32,
|
||||
pub rmp_legacy_id: i32,
|
||||
pub first_name: Option<String>,
|
||||
pub last_name: Option<String>,
|
||||
pub department: Option<String>,
|
||||
pub avg_rating: Option<f32>,
|
||||
pub avg_difficulty: Option<f32>,
|
||||
pub num_ratings: Option<i32>,
|
||||
pub would_take_again_pct: Option<f32>,
|
||||
pub score: Option<f32>,
|
||||
#[ts(as = "Option<std::collections::HashMap<String, f32>>")]
|
||||
pub score_breakdown: Option<serde_json::Value>,
|
||||
pub status: String,
|
||||
}
|
||||
|
||||
/// Response for `GET /api/admin/instructors/{id}` and `POST .../match`.
|
||||
#[derive(Debug, Clone, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct InstructorDetailResponse {
|
||||
pub instructor: InstructorDetail,
|
||||
pub current_matches: Vec<LinkedRmpProfile>,
|
||||
pub candidates: Vec<CandidateResponse>,
|
||||
}
|
||||
|
||||
/// Response for `POST /api/admin/rmp/rescore`.
|
||||
#[derive(Debug, Clone, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct RescoreResponse {
|
||||
pub total_unmatched: usize,
|
||||
pub candidates_created: usize,
|
||||
pub candidates_rescored: usize,
|
||||
pub auto_matched: usize,
|
||||
pub skipped_unparseable: usize,
|
||||
pub skipped_no_candidates: usize,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helper: map sqlx errors to the standard admin error tuple
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn db_error(context: &str, e: sqlx::Error) -> (StatusCode, Json<Value>) {
|
||||
tracing::error!(error = %e, "{context}");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({"error": context})),
|
||||
)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Row types for SQL queries
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(sqlx::FromRow)]
|
||||
struct InstructorRow {
|
||||
id: i32,
|
||||
display_name: String,
|
||||
email: String,
|
||||
rmp_match_status: String,
|
||||
rmp_link_count: Option<i64>,
|
||||
top_candidate_rmp_id: Option<i32>,
|
||||
top_candidate_score: Option<f32>,
|
||||
top_candidate_breakdown: Option<serde_json::Value>,
|
||||
tc_first_name: Option<String>,
|
||||
tc_last_name: Option<String>,
|
||||
tc_department: Option<String>,
|
||||
tc_avg_rating: Option<f32>,
|
||||
tc_num_ratings: Option<i32>,
|
||||
candidate_count: Option<i64>,
|
||||
course_subject_count: Option<i64>,
|
||||
}
|
||||
|
||||
#[derive(sqlx::FromRow)]
|
||||
struct StatusCount {
|
||||
rmp_match_status: String,
|
||||
count: i64,
|
||||
}
|
||||
|
||||
#[derive(sqlx::FromRow)]
|
||||
struct CandidateRow {
|
||||
id: i32,
|
||||
rmp_legacy_id: i32,
|
||||
score: Option<f32>,
|
||||
score_breakdown: Option<serde_json::Value>,
|
||||
status: String,
|
||||
first_name: Option<String>,
|
||||
last_name: Option<String>,
|
||||
department: Option<String>,
|
||||
avg_rating: Option<f32>,
|
||||
avg_difficulty: Option<f32>,
|
||||
num_ratings: Option<i32>,
|
||||
would_take_again_pct: Option<f32>,
|
||||
}
|
||||
|
||||
#[derive(sqlx::FromRow)]
|
||||
struct LinkedRmpProfileRow {
|
||||
link_id: i32,
|
||||
legacy_id: i32,
|
||||
first_name: Option<String>,
|
||||
last_name: Option<String>,
|
||||
department: Option<String>,
|
||||
avg_rating: Option<f32>,
|
||||
avg_difficulty: Option<f32>,
|
||||
num_ratings: Option<i32>,
|
||||
would_take_again_pct: Option<f32>,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 1. GET /api/admin/instructors — paginated list with filtering
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// `GET /api/admin/instructors` — List instructors with filtering and pagination.
|
||||
pub async fn list_instructors(
|
||||
AdminUser(_user): AdminUser,
|
||||
State(state): State<AppState>,
|
||||
Query(params): Query<ListInstructorsParams>,
|
||||
) -> Result<Json<ListInstructorsResponse>, (StatusCode, Json<Value>)> {
|
||||
let page = params.page.unwrap_or(1).max(1);
|
||||
let per_page = params.per_page.unwrap_or(50).clamp(1, 100);
|
||||
let offset = (page - 1) * per_page;
|
||||
|
||||
let sort_clause = match params.sort.as_deref() {
|
||||
Some("name_asc") => "i.display_name ASC",
|
||||
Some("name_desc") => "i.display_name DESC",
|
||||
Some("status") => "i.rmp_match_status ASC, i.display_name ASC",
|
||||
_ => "tc.score DESC NULLS LAST, i.display_name ASC",
|
||||
};
|
||||
|
||||
// Build WHERE clause
|
||||
let mut conditions = Vec::new();
|
||||
let mut bind_idx = 0u32;
|
||||
|
||||
if params.status.is_some() {
|
||||
bind_idx += 1;
|
||||
conditions.push(format!("i.rmp_match_status = ${bind_idx}"));
|
||||
}
|
||||
if params.search.is_some() {
|
||||
bind_idx += 1;
|
||||
conditions.push(format!(
|
||||
"(i.display_name ILIKE ${bind_idx} OR i.email ILIKE ${bind_idx})"
|
||||
));
|
||||
}
|
||||
|
||||
let where_clause = if conditions.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
format!("WHERE {}", conditions.join(" AND "))
|
||||
};
|
||||
|
||||
let query_str = format!(
|
||||
r#"
|
||||
SELECT
|
||||
i.id, i.display_name, i.email, i.rmp_match_status,
|
||||
(SELECT COUNT(*) FROM instructor_rmp_links irl WHERE irl.instructor_id = i.id) as rmp_link_count,
|
||||
tc.rmp_legacy_id as top_candidate_rmp_id,
|
||||
tc.score as top_candidate_score,
|
||||
tc.score_breakdown as top_candidate_breakdown,
|
||||
rp.first_name as tc_first_name,
|
||||
rp.last_name as tc_last_name,
|
||||
rp.department as tc_department,
|
||||
rp.avg_rating as tc_avg_rating,
|
||||
rp.num_ratings as tc_num_ratings,
|
||||
(SELECT COUNT(*) FROM rmp_match_candidates mc WHERE mc.instructor_id = i.id AND mc.status = 'pending') as candidate_count,
|
||||
(SELECT COUNT(DISTINCT c.subject) FROM course_instructors ci JOIN courses c ON c.id = ci.course_id WHERE ci.instructor_id = i.id) as course_subject_count
|
||||
FROM instructors i
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT mc.rmp_legacy_id, mc.score, mc.score_breakdown
|
||||
FROM rmp_match_candidates mc
|
||||
WHERE mc.instructor_id = i.id AND mc.status = 'pending'
|
||||
ORDER BY mc.score DESC
|
||||
LIMIT 1
|
||||
) tc ON true
|
||||
LEFT JOIN rmp_professors rp ON rp.legacy_id = tc.rmp_legacy_id
|
||||
{where_clause}
|
||||
ORDER BY {sort_clause}
|
||||
LIMIT {per_page} OFFSET {offset}
|
||||
"#
|
||||
);
|
||||
|
||||
// Build the query with dynamic binds
|
||||
let mut query = sqlx::query_as::<_, InstructorRow>(&query_str);
|
||||
if let Some(ref status) = params.status {
|
||||
query = query.bind(status);
|
||||
}
|
||||
if let Some(ref search) = params.search {
|
||||
query = query.bind(format!("%{search}%"));
|
||||
}
|
||||
|
||||
let rows = query
|
||||
.fetch_all(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to list instructors", e))?;
|
||||
|
||||
// Count total with filters
|
||||
let count_query_str = format!("SELECT COUNT(*) FROM instructors i {where_clause}");
|
||||
let mut count_query = sqlx::query_as::<_, (i64,)>(&count_query_str);
|
||||
if let Some(ref status) = params.status {
|
||||
count_query = count_query.bind(status);
|
||||
}
|
||||
if let Some(ref search) = params.search {
|
||||
count_query = count_query.bind(format!("%{search}%"));
|
||||
}
|
||||
|
||||
let (total,) = count_query
|
||||
.fetch_one(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to count instructors", e))?;
|
||||
|
||||
// Aggregate stats (unfiltered)
|
||||
let stats_rows = sqlx::query_as::<_, StatusCount>(
|
||||
"SELECT rmp_match_status, COUNT(*) as count FROM instructors GROUP BY rmp_match_status",
|
||||
)
|
||||
.fetch_all(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to get instructor stats", e))?;
|
||||
|
||||
// Count instructors with at least one candidate (for progress bar denominator)
|
||||
let (with_candidates,): (i64,) =
|
||||
sqlx::query_as("SELECT COUNT(DISTINCT instructor_id) FROM rmp_match_candidates")
|
||||
.fetch_one(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to count instructors with candidates", e))?;
|
||||
|
||||
let mut stats = InstructorStats {
|
||||
total: 0,
|
||||
unmatched: 0,
|
||||
auto: 0,
|
||||
confirmed: 0,
|
||||
rejected: 0,
|
||||
with_candidates,
|
||||
};
|
||||
for row in &stats_rows {
|
||||
stats.total += row.count;
|
||||
match row.rmp_match_status.as_str() {
|
||||
"unmatched" => stats.unmatched = row.count,
|
||||
"auto" => stats.auto = row.count,
|
||||
"confirmed" => stats.confirmed = row.count,
|
||||
"rejected" => stats.rejected = row.count,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
let instructors: Vec<InstructorListItem> = rows
|
||||
.iter()
|
||||
.map(|r| {
|
||||
let top_candidate = r.top_candidate_rmp_id.map(|rmp_id| TopCandidateResponse {
|
||||
rmp_legacy_id: rmp_id,
|
||||
score: r.top_candidate_score,
|
||||
score_breakdown: r.top_candidate_breakdown.clone(),
|
||||
first_name: r.tc_first_name.clone(),
|
||||
last_name: r.tc_last_name.clone(),
|
||||
department: r.tc_department.clone(),
|
||||
avg_rating: r.tc_avg_rating,
|
||||
num_ratings: r.tc_num_ratings,
|
||||
});
|
||||
|
||||
InstructorListItem {
|
||||
id: r.id,
|
||||
display_name: r.display_name.clone(),
|
||||
email: r.email.clone(),
|
||||
rmp_match_status: r.rmp_match_status.clone(),
|
||||
rmp_link_count: r.rmp_link_count.unwrap_or(0),
|
||||
candidate_count: r.candidate_count.unwrap_or(0),
|
||||
course_subject_count: r.course_subject_count.unwrap_or(0),
|
||||
top_candidate,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Json(ListInstructorsResponse {
|
||||
instructors,
|
||||
total,
|
||||
page,
|
||||
per_page,
|
||||
stats,
|
||||
}))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 2. GET /api/admin/instructors/{id} — full detail
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// `GET /api/admin/instructors/{id}` — Full instructor detail with candidates.
|
||||
pub async fn get_instructor(
|
||||
AdminUser(_user): AdminUser,
|
||||
State(state): State<AppState>,
|
||||
Path(id): Path<i32>,
|
||||
) -> Result<Json<InstructorDetailResponse>, (StatusCode, Json<Value>)> {
|
||||
build_instructor_detail(&state, id).await
|
||||
}
|
||||
|
||||
/// Shared helper that builds the full instructor detail response.
|
||||
async fn build_instructor_detail(
|
||||
state: &AppState,
|
||||
id: i32,
|
||||
) -> Result<Json<InstructorDetailResponse>, (StatusCode, Json<Value>)> {
|
||||
// Fetch instructor
|
||||
let instructor: Option<(i32, String, String, String)> = sqlx::query_as(
|
||||
"SELECT id, display_name, email, rmp_match_status FROM instructors WHERE id = $1",
|
||||
)
|
||||
.bind(id)
|
||||
.fetch_optional(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to fetch instructor", e))?;
|
||||
|
||||
let (inst_id, display_name, email, rmp_match_status) = instructor.ok_or_else(|| {
|
||||
(
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(json!({"error": "instructor not found"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
// Subjects taught
|
||||
let subjects: Vec<(String,)> = sqlx::query_as(
|
||||
"SELECT DISTINCT c.subject FROM course_instructors ci JOIN courses c ON c.id = ci.course_id WHERE ci.instructor_id = $1 ORDER BY c.subject",
|
||||
)
|
||||
.bind(inst_id)
|
||||
.fetch_all(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to fetch subjects", e))?;
|
||||
|
||||
// Course count
|
||||
let (course_count,): (i64,) = sqlx::query_as(
|
||||
"SELECT COUNT(DISTINCT ci.course_id) FROM course_instructors ci WHERE ci.instructor_id = $1",
|
||||
)
|
||||
.bind(inst_id)
|
||||
.fetch_one(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to count courses", e))?;
|
||||
|
||||
// Candidates with RMP professor info
|
||||
let candidates = sqlx::query_as::<_, CandidateRow>(
|
||||
r#"
|
||||
SELECT mc.id, mc.rmp_legacy_id, mc.score, mc.score_breakdown, mc.status,
|
||||
rp.first_name, rp.last_name, rp.department,
|
||||
rp.avg_rating, rp.avg_difficulty, rp.num_ratings, rp.would_take_again_pct
|
||||
FROM rmp_match_candidates mc
|
||||
JOIN rmp_professors rp ON rp.legacy_id = mc.rmp_legacy_id
|
||||
WHERE mc.instructor_id = $1
|
||||
ORDER BY mc.score DESC
|
||||
"#,
|
||||
)
|
||||
.bind(inst_id)
|
||||
.fetch_all(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to fetch candidates", e))?;
|
||||
|
||||
// Current matches (all linked RMP profiles)
|
||||
let current_matches = sqlx::query_as::<_, LinkedRmpProfileRow>(
|
||||
r#"
|
||||
SELECT irl.id as link_id,
|
||||
rp.legacy_id, rp.first_name, rp.last_name, rp.department,
|
||||
rp.avg_rating, rp.avg_difficulty, rp.num_ratings, rp.would_take_again_pct
|
||||
FROM instructor_rmp_links irl
|
||||
JOIN rmp_professors rp ON rp.legacy_id = irl.rmp_legacy_id
|
||||
WHERE irl.instructor_id = $1
|
||||
ORDER BY rp.num_ratings DESC NULLS LAST
|
||||
"#,
|
||||
)
|
||||
.bind(inst_id)
|
||||
.fetch_all(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to fetch linked rmp profiles", e))?;
|
||||
|
||||
let current_matches_resp: Vec<LinkedRmpProfile> = current_matches
|
||||
.into_iter()
|
||||
.map(|p| LinkedRmpProfile {
|
||||
link_id: p.link_id,
|
||||
legacy_id: p.legacy_id,
|
||||
first_name: p.first_name,
|
||||
last_name: p.last_name,
|
||||
department: p.department,
|
||||
avg_rating: p.avg_rating,
|
||||
avg_difficulty: p.avg_difficulty,
|
||||
num_ratings: p.num_ratings,
|
||||
would_take_again_pct: p.would_take_again_pct,
|
||||
})
|
||||
.collect();
|
||||
|
||||
let candidates_resp: Vec<CandidateResponse> = candidates
|
||||
.into_iter()
|
||||
.map(|c| CandidateResponse {
|
||||
id: c.id,
|
||||
rmp_legacy_id: c.rmp_legacy_id,
|
||||
first_name: c.first_name,
|
||||
last_name: c.last_name,
|
||||
department: c.department,
|
||||
avg_rating: c.avg_rating,
|
||||
avg_difficulty: c.avg_difficulty,
|
||||
num_ratings: c.num_ratings,
|
||||
would_take_again_pct: c.would_take_again_pct,
|
||||
score: c.score,
|
||||
score_breakdown: c.score_breakdown,
|
||||
status: c.status,
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Json(InstructorDetailResponse {
|
||||
instructor: InstructorDetail {
|
||||
id: inst_id,
|
||||
display_name,
|
||||
email,
|
||||
rmp_match_status,
|
||||
subjects_taught: subjects.into_iter().map(|(s,)| s).collect(),
|
||||
course_count,
|
||||
},
|
||||
current_matches: current_matches_resp,
|
||||
candidates: candidates_resp,
|
||||
}))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 3. POST /api/admin/instructors/{id}/match — accept a candidate
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// `POST /api/admin/instructors/{id}/match` — Accept a candidate match.
|
||||
pub async fn match_instructor(
|
||||
AdminUser(user): AdminUser,
|
||||
State(state): State<AppState>,
|
||||
Path(id): Path<i32>,
|
||||
Json(body): Json<MatchBody>,
|
||||
) -> Result<Json<InstructorDetailResponse>, (StatusCode, Json<Value>)> {
|
||||
// Verify the candidate exists and is pending
|
||||
let candidate: Option<(i32,)> = sqlx::query_as(
|
||||
"SELECT id FROM rmp_match_candidates WHERE instructor_id = $1 AND rmp_legacy_id = $2 AND status = 'pending'",
|
||||
)
|
||||
.bind(id)
|
||||
.bind(body.rmp_legacy_id)
|
||||
.fetch_optional(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to check candidate", e))?;
|
||||
|
||||
if candidate.is_none() {
|
||||
return Err((
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(json!({"error": "pending candidate not found for this instructor"})),
|
||||
));
|
||||
}
|
||||
|
||||
// Check if this RMP profile is already linked to a different instructor
|
||||
let conflict: Option<(i32,)> = sqlx::query_as(
|
||||
"SELECT instructor_id FROM instructor_rmp_links WHERE rmp_legacy_id = $1 AND instructor_id != $2",
|
||||
)
|
||||
.bind(body.rmp_legacy_id)
|
||||
.bind(id)
|
||||
.fetch_optional(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to check rmp uniqueness", e))?;
|
||||
|
||||
if let Some((other_id,)) = conflict {
|
||||
return Err((
|
||||
StatusCode::CONFLICT,
|
||||
Json(json!({
|
||||
"error": "RMP profile already linked to another instructor",
|
||||
"conflictingInstructorId": other_id,
|
||||
})),
|
||||
));
|
||||
}
|
||||
|
||||
let mut tx = state
|
||||
.db_pool
|
||||
.begin()
|
||||
.await
|
||||
.map_err(|e| db_error("failed to begin transaction", e))?;
|
||||
|
||||
// Insert link into instructor_rmp_links
|
||||
sqlx::query(
|
||||
"INSERT INTO instructor_rmp_links (instructor_id, rmp_legacy_id, created_by, source) VALUES ($1, $2, $3, 'manual') ON CONFLICT (rmp_legacy_id) DO NOTHING",
|
||||
)
|
||||
.bind(id)
|
||||
.bind(body.rmp_legacy_id)
|
||||
.bind(user.discord_id)
|
||||
.execute(&mut *tx)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to insert rmp link", e))?;
|
||||
|
||||
// Update instructor match status
|
||||
sqlx::query("UPDATE instructors SET rmp_match_status = 'confirmed' WHERE id = $1")
|
||||
.bind(id)
|
||||
.execute(&mut *tx)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to update instructor match status", e))?;
|
||||
|
||||
// Accept the candidate
|
||||
sqlx::query(
|
||||
"UPDATE rmp_match_candidates SET status = 'accepted', resolved_at = NOW(), resolved_by = $1 WHERE instructor_id = $2 AND rmp_legacy_id = $3",
|
||||
)
|
||||
.bind(user.discord_id)
|
||||
.bind(id)
|
||||
.bind(body.rmp_legacy_id)
|
||||
.execute(&mut *tx)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to accept candidate", e))?;
|
||||
|
||||
tx.commit()
|
||||
.await
|
||||
.map_err(|e| db_error("failed to commit transaction", e))?;
|
||||
|
||||
build_instructor_detail(&state, id).await
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 4. POST /api/admin/instructors/{id}/reject-candidate — reject one candidate
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// `POST /api/admin/instructors/{id}/reject-candidate` — Reject a single candidate.
|
||||
pub async fn reject_candidate(
|
||||
AdminUser(user): AdminUser,
|
||||
State(state): State<AppState>,
|
||||
Path(id): Path<i32>,
|
||||
Json(body): Json<RejectCandidateBody>,
|
||||
) -> Result<Json<OkResponse>, (StatusCode, Json<Value>)> {
|
||||
let result = sqlx::query(
|
||||
"UPDATE rmp_match_candidates SET status = 'rejected', resolved_at = NOW(), resolved_by = $1 WHERE instructor_id = $2 AND rmp_legacy_id = $3 AND status = 'pending'",
|
||||
)
|
||||
.bind(user.discord_id)
|
||||
.bind(id)
|
||||
.bind(body.rmp_legacy_id)
|
||||
.execute(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to reject candidate", e))?;
|
||||
|
||||
if result.rows_affected() == 0 {
|
||||
return Err((
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(json!({"error": "pending candidate not found"})),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(Json(OkResponse { ok: true }))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 5. POST /api/admin/instructors/{id}/reject-all — no valid match
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// `POST /api/admin/instructors/{id}/reject-all` — Mark instructor as having no valid RMP match.
|
||||
pub async fn reject_all(
|
||||
AdminUser(user): AdminUser,
|
||||
State(state): State<AppState>,
|
||||
Path(id): Path<i32>,
|
||||
) -> Result<Json<OkResponse>, (StatusCode, Json<Value>)> {
|
||||
let mut tx = state
|
||||
.db_pool
|
||||
.begin()
|
||||
.await
|
||||
.map_err(|e| db_error("failed to begin transaction", e))?;
|
||||
|
||||
// Check current status — cannot reject an instructor with confirmed matches
|
||||
let current_status: Option<(String,)> =
|
||||
sqlx::query_as("SELECT rmp_match_status FROM instructors WHERE id = $1")
|
||||
.bind(id)
|
||||
.fetch_optional(&mut *tx)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to fetch instructor status", e))?;
|
||||
|
||||
let (status,) = current_status.ok_or_else(|| {
|
||||
(
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(json!({"error": "instructor not found"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
if status == "confirmed" {
|
||||
return Err((
|
||||
StatusCode::CONFLICT,
|
||||
Json(
|
||||
json!({"error": "cannot reject instructor with confirmed matches — unmatch first"}),
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
// Update instructor status
|
||||
sqlx::query("UPDATE instructors SET rmp_match_status = 'rejected' WHERE id = $1")
|
||||
.bind(id)
|
||||
.execute(&mut *tx)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to update instructor status", e))?;
|
||||
|
||||
// Reject all pending candidates
|
||||
sqlx::query(
|
||||
"UPDATE rmp_match_candidates SET status = 'rejected', resolved_at = NOW(), resolved_by = $1 WHERE instructor_id = $2 AND status = 'pending'",
|
||||
)
|
||||
.bind(user.discord_id)
|
||||
.bind(id)
|
||||
.execute(&mut *tx)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to reject candidates", e))?;
|
||||
|
||||
tx.commit()
|
||||
.await
|
||||
.map_err(|e| db_error("failed to commit transaction", e))?;
|
||||
|
||||
Ok(Json(OkResponse { ok: true }))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 6. POST /api/admin/instructors/{id}/unmatch — remove current match
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Body for unmatch — optional `rmpLegacyId` to remove a specific link.
|
||||
/// If omitted (or null), all links are removed.
|
||||
#[derive(Deserialize, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct UnmatchBody {
|
||||
rmp_legacy_id: Option<i32>,
|
||||
}
|
||||
|
||||
/// `POST /api/admin/instructors/{id}/unmatch` — Remove RMP link(s).
|
||||
///
|
||||
/// Send `{ "rmpLegacyId": N }` to remove a specific link, or an empty body / `{}`
|
||||
/// to remove all links for the instructor.
|
||||
pub async fn unmatch_instructor(
|
||||
AdminUser(_user): AdminUser,
|
||||
State(state): State<AppState>,
|
||||
Path(id): Path<i32>,
|
||||
body: Option<Json<UnmatchBody>>,
|
||||
) -> Result<Json<OkResponse>, (StatusCode, Json<Value>)> {
|
||||
let rmp_legacy_id = body.and_then(|b| b.rmp_legacy_id);
|
||||
|
||||
// Verify instructor exists
|
||||
let exists: Option<(i32,)> = sqlx::query_as("SELECT id FROM instructors WHERE id = $1")
|
||||
.bind(id)
|
||||
.fetch_optional(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| db_error("failed to check instructor", e))?;
|
||||
|
||||
if exists.is_none() {
|
||||
return Err((
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(json!({"error": "instructor not found"})),
|
||||
));
|
||||
}
|
||||
|
||||
// Use the data layer function to perform the unmatch
|
||||
crate::data::rmp::unmatch_instructor(&state.db_pool, id, rmp_legacy_id)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(error = %e, "failed to unmatch instructor");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({"error": "failed to unmatch instructor"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(Json(OkResponse { ok: true }))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 7. POST /api/admin/rmp/rescore — re-run candidate generation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// `POST /api/admin/rmp/rescore` — Re-run RMP candidate generation.
|
||||
pub async fn rescore(
|
||||
AdminUser(_user): AdminUser,
|
||||
State(state): State<AppState>,
|
||||
) -> Result<Json<RescoreResponse>, (StatusCode, Json<Value>)> {
|
||||
let stats = crate::data::rmp_matching::generate_candidates(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(error = %e, "failed to run candidate generation");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({"error": "candidate generation failed"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(Json(RescoreResponse {
|
||||
total_unmatched: stats.total_unmatched,
|
||||
candidates_created: stats.candidates_created,
|
||||
candidates_rescored: stats.candidates_rescored,
|
||||
auto_matched: stats.auto_matched,
|
||||
skipped_unparseable: stats.skipped_unparseable,
|
||||
skipped_no_candidates: stats.skipped_no_candidates,
|
||||
}))
|
||||
}
|
||||
@@ -0,0 +1,538 @@
|
||||
//! Admin API handlers for scraper observability.
|
||||
//!
|
||||
//! All endpoints require the `AdminUser` extractor, returning 401/403 as needed.
|
||||
|
||||
use axum::extract::{Path, Query, State};
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::Json;
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
use sqlx::Row;
|
||||
use ts_rs::TS;
|
||||
|
||||
use crate::banner::models::terms::Term;
|
||||
use crate::data::scrape_jobs;
|
||||
use crate::scraper::adaptive::{self, SubjectSchedule, SubjectStats};
|
||||
use crate::state::AppState;
|
||||
use crate::web::extractors::AdminUser;
|
||||
|
||||
type ApiError = (StatusCode, Json<serde_json::Value>);
|
||||
|
||||
fn parse_period(period: &str) -> Result<chrono::Duration, ApiError> {
|
||||
match period {
|
||||
"1h" => Ok(chrono::Duration::hours(1)),
|
||||
"6h" => Ok(chrono::Duration::hours(6)),
|
||||
"24h" => Ok(chrono::Duration::hours(24)),
|
||||
"7d" => Ok(chrono::Duration::days(7)),
|
||||
"30d" => Ok(chrono::Duration::days(30)),
|
||||
_ => Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
Json(
|
||||
json!({"error": format!("Invalid period '{period}'. Valid: 1h, 6h, 24h, 7d, 30d")}),
|
||||
),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn period_to_interval_str(period: &str) -> &'static str {
|
||||
match period {
|
||||
"1h" => "1 hour",
|
||||
"6h" => "6 hours",
|
||||
"24h" => "24 hours",
|
||||
"7d" => "7 days",
|
||||
"30d" => "30 days",
|
||||
_ => "24 hours",
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_bucket(bucket: &str) -> Result<&'static str, ApiError> {
|
||||
match bucket {
|
||||
"1m" => Ok("1 minute"),
|
||||
"5m" => Ok("5 minutes"),
|
||||
"15m" => Ok("15 minutes"),
|
||||
"1h" => Ok("1 hour"),
|
||||
"6h" => Ok("6 hours"),
|
||||
_ => Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
Json(
|
||||
json!({"error": format!("Invalid bucket '{bucket}'. Valid: 1m, 5m, 15m, 1h, 6h")}),
|
||||
),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn default_bucket_for_period(period: &str) -> &'static str {
|
||||
match period {
|
||||
"1h" => "1m",
|
||||
"6h" => "5m",
|
||||
"24h" => "15m",
|
||||
"7d" => "1h",
|
||||
"30d" => "6h",
|
||||
_ => "15m",
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Endpoint 1: GET /api/admin/scraper/stats
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Deserialize, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct StatsParams {
|
||||
#[serde(default = "default_period")]
|
||||
pub period: String,
|
||||
}
|
||||
|
||||
fn default_period() -> String {
|
||||
"24h".to_string()
|
||||
}
|
||||
|
||||
#[derive(Serialize, TS)]
|
||||
#[ts(export)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ScraperStatsResponse {
|
||||
period: String,
|
||||
#[ts(type = "number")]
|
||||
total_scrapes: i64,
|
||||
#[ts(type = "number")]
|
||||
successful_scrapes: i64,
|
||||
#[ts(type = "number")]
|
||||
failed_scrapes: i64,
|
||||
success_rate: Option<f64>,
|
||||
avg_duration_ms: Option<f64>,
|
||||
#[ts(type = "number")]
|
||||
total_courses_changed: i64,
|
||||
#[ts(type = "number")]
|
||||
total_courses_fetched: i64,
|
||||
#[ts(type = "number")]
|
||||
total_audits_generated: i64,
|
||||
#[ts(type = "number")]
|
||||
pending_jobs: i64,
|
||||
#[ts(type = "number")]
|
||||
locked_jobs: i64,
|
||||
}
|
||||
|
||||
pub async fn scraper_stats(
|
||||
_admin: AdminUser,
|
||||
State(state): State<AppState>,
|
||||
Query(params): Query<StatsParams>,
|
||||
) -> Result<Json<ScraperStatsResponse>, ApiError> {
|
||||
let _duration = parse_period(¶ms.period)?;
|
||||
let interval_str = period_to_interval_str(¶ms.period);
|
||||
|
||||
let row = sqlx::query(
|
||||
"SELECT \
|
||||
COUNT(*) AS total_scrapes, \
|
||||
COUNT(*) FILTER (WHERE success) AS successful_scrapes, \
|
||||
COUNT(*) FILTER (WHERE NOT success) AS failed_scrapes, \
|
||||
(AVG(duration_ms) FILTER (WHERE success))::FLOAT8 AS avg_duration_ms, \
|
||||
COALESCE(SUM(courses_changed) FILTER (WHERE success), 0) AS total_courses_changed, \
|
||||
COALESCE(SUM(courses_fetched) FILTER (WHERE success), 0) AS total_courses_fetched, \
|
||||
COALESCE(SUM(audits_generated) FILTER (WHERE success), 0) AS total_audits_generated \
|
||||
FROM scrape_job_results \
|
||||
WHERE completed_at > NOW() - $1::interval",
|
||||
)
|
||||
.bind(interval_str)
|
||||
.fetch_one(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(error = %e, "Failed to fetch scraper stats");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({"error": "Failed to fetch scraper stats"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let total_scrapes: i64 = row.get("total_scrapes");
|
||||
let successful_scrapes: i64 = row.get("successful_scrapes");
|
||||
let failed_scrapes: i64 = row.get("failed_scrapes");
|
||||
let avg_duration_ms: Option<f64> = row.get("avg_duration_ms");
|
||||
let total_courses_changed: i64 = row.get("total_courses_changed");
|
||||
let total_courses_fetched: i64 = row.get("total_courses_fetched");
|
||||
let total_audits_generated: i64 = row.get("total_audits_generated");
|
||||
|
||||
let queue_row = sqlx::query(
|
||||
"SELECT \
|
||||
COUNT(*) FILTER (WHERE locked_at IS NULL) AS pending_jobs, \
|
||||
COUNT(*) FILTER (WHERE locked_at IS NOT NULL) AS locked_jobs \
|
||||
FROM scrape_jobs",
|
||||
)
|
||||
.fetch_one(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(error = %e, "Failed to fetch queue stats");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({"error": "Failed to fetch queue stats"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let pending_jobs: i64 = queue_row.get("pending_jobs");
|
||||
let locked_jobs: i64 = queue_row.get("locked_jobs");
|
||||
|
||||
let success_rate = if total_scrapes > 0 {
|
||||
Some(successful_scrapes as f64 / total_scrapes as f64)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(Json(ScraperStatsResponse {
|
||||
period: params.period,
|
||||
total_scrapes,
|
||||
successful_scrapes,
|
||||
failed_scrapes,
|
||||
success_rate,
|
||||
avg_duration_ms,
|
||||
total_courses_changed,
|
||||
total_courses_fetched,
|
||||
total_audits_generated,
|
||||
pending_jobs,
|
||||
locked_jobs,
|
||||
}))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Endpoint 2: GET /api/admin/scraper/timeseries
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Deserialize, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct TimeseriesParams {
|
||||
#[serde(default = "default_period")]
|
||||
pub period: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub bucket: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, TS)]
|
||||
#[ts(export)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct TimeseriesResponse {
|
||||
period: String,
|
||||
bucket: String,
|
||||
points: Vec<TimeseriesPoint>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, TS)]
|
||||
#[ts(export)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct TimeseriesPoint {
|
||||
/// ISO-8601 UTC timestamp for this data point (e.g., "2024-01-15T10:00:00Z")
|
||||
#[ts(type = "string")]
|
||||
timestamp: DateTime<Utc>,
|
||||
#[ts(type = "number")]
|
||||
scrape_count: i64,
|
||||
#[ts(type = "number")]
|
||||
success_count: i64,
|
||||
#[ts(type = "number")]
|
||||
error_count: i64,
|
||||
#[ts(type = "number")]
|
||||
courses_changed: i64,
|
||||
avg_duration_ms: f64,
|
||||
}
|
||||
|
||||
pub async fn scraper_timeseries(
|
||||
_admin: AdminUser,
|
||||
State(state): State<AppState>,
|
||||
Query(params): Query<TimeseriesParams>,
|
||||
) -> Result<Json<TimeseriesResponse>, ApiError> {
|
||||
let _duration = parse_period(¶ms.period)?;
|
||||
let period_interval = period_to_interval_str(¶ms.period);
|
||||
|
||||
let bucket_code = match ¶ms.bucket {
|
||||
Some(b) => {
|
||||
// Validate the bucket
|
||||
parse_bucket(b)?;
|
||||
b.as_str()
|
||||
}
|
||||
None => default_bucket_for_period(¶ms.period),
|
||||
};
|
||||
let bucket_interval = parse_bucket(bucket_code)?;
|
||||
|
||||
let rows = sqlx::query(
|
||||
"WITH buckets AS ( \
|
||||
SELECT generate_series( \
|
||||
date_bin($1::interval, NOW() - $2::interval, '2020-01-01'::timestamptz), \
|
||||
date_bin($1::interval, NOW(), '2020-01-01'::timestamptz), \
|
||||
$1::interval \
|
||||
) AS bucket_start \
|
||||
), \
|
||||
raw AS ( \
|
||||
SELECT date_bin($1::interval, completed_at, '2020-01-01'::timestamptz) AS bucket_start, \
|
||||
COUNT(*)::BIGINT AS scrape_count, \
|
||||
COUNT(*) FILTER (WHERE success)::BIGINT AS success_count, \
|
||||
COUNT(*) FILTER (WHERE NOT success)::BIGINT AS error_count, \
|
||||
COALESCE(SUM(courses_changed) FILTER (WHERE success), 0)::BIGINT AS courses_changed, \
|
||||
COALESCE(AVG(duration_ms) FILTER (WHERE success), 0)::FLOAT8 AS avg_duration_ms \
|
||||
FROM scrape_job_results \
|
||||
WHERE completed_at > NOW() - $2::interval \
|
||||
GROUP BY 1 \
|
||||
) \
|
||||
SELECT b.bucket_start, \
|
||||
COALESCE(r.scrape_count, 0) AS scrape_count, \
|
||||
COALESCE(r.success_count, 0) AS success_count, \
|
||||
COALESCE(r.error_count, 0) AS error_count, \
|
||||
COALESCE(r.courses_changed, 0) AS courses_changed, \
|
||||
COALESCE(r.avg_duration_ms, 0) AS avg_duration_ms \
|
||||
FROM buckets b \
|
||||
LEFT JOIN raw r ON b.bucket_start = r.bucket_start \
|
||||
ORDER BY b.bucket_start",
|
||||
)
|
||||
.bind(bucket_interval)
|
||||
.bind(period_interval)
|
||||
.fetch_all(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(error = %e, "Failed to fetch scraper timeseries");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({"error": "Failed to fetch scraper timeseries"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let points = rows
|
||||
.iter()
|
||||
.map(|row| TimeseriesPoint {
|
||||
timestamp: row.get("bucket_start"),
|
||||
scrape_count: row.get("scrape_count"),
|
||||
success_count: row.get("success_count"),
|
||||
error_count: row.get("error_count"),
|
||||
courses_changed: row.get("courses_changed"),
|
||||
avg_duration_ms: row.get("avg_duration_ms"),
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Json(TimeseriesResponse {
|
||||
period: params.period,
|
||||
bucket: bucket_code.to_string(),
|
||||
points,
|
||||
}))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Endpoint 3: GET /api/admin/scraper/subjects
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Serialize, TS)]
|
||||
#[ts(export)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SubjectsResponse {
|
||||
subjects: Vec<SubjectSummary>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, TS)]
|
||||
#[ts(export)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SubjectSummary {
|
||||
subject: String,
|
||||
subject_description: Option<String>,
|
||||
#[ts(type = "number")]
|
||||
tracked_course_count: i64,
|
||||
schedule_state: String,
|
||||
#[ts(type = "number")]
|
||||
current_interval_secs: u64,
|
||||
time_multiplier: u32,
|
||||
/// ISO-8601 UTC timestamp of last scrape (e.g., "2024-01-15T10:30:00Z")
|
||||
#[ts(type = "string")]
|
||||
last_scraped: DateTime<Utc>,
|
||||
/// ISO-8601 UTC timestamp when next scrape is eligible (e.g., "2024-01-15T11:00:00Z")
|
||||
#[ts(type = "string | null")]
|
||||
next_eligible_at: Option<DateTime<Utc>>,
|
||||
#[ts(type = "number | null")]
|
||||
cooldown_remaining_secs: Option<u64>,
|
||||
avg_change_ratio: f64,
|
||||
#[ts(type = "number")]
|
||||
consecutive_zero_changes: i64,
|
||||
#[ts(type = "number")]
|
||||
recent_runs: i64,
|
||||
#[ts(type = "number")]
|
||||
recent_failures: i64,
|
||||
}
|
||||
|
||||
pub async fn scraper_subjects(
|
||||
_admin: AdminUser,
|
||||
State(state): State<AppState>,
|
||||
) -> Result<Json<SubjectsResponse>, ApiError> {
|
||||
let raw_stats = scrape_jobs::fetch_subject_stats(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(error = %e, "Failed to fetch subject stats");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({"error": "Failed to fetch subject stats"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let now = Utc::now();
|
||||
let multiplier = adaptive::time_of_day_multiplier(now);
|
||||
|
||||
// Look up subject descriptions from the reference cache
|
||||
let ref_cache = state.reference_cache.read().await;
|
||||
|
||||
// Count tracked courses per subject for the current term
|
||||
let term = Term::get_current().inner().to_string();
|
||||
let course_counts: std::collections::HashMap<String, i64> = sqlx::query_as(
|
||||
"SELECT subject, COUNT(*)::BIGINT AS cnt FROM courses WHERE term_code = $1 GROUP BY subject",
|
||||
)
|
||||
.bind(&term)
|
||||
.fetch_all(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(error = %e, "Failed to fetch course counts");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({"error": "Failed to fetch course counts"})),
|
||||
)
|
||||
})?
|
||||
.into_iter()
|
||||
.map(|(subject, cnt): (String, i64)| (subject, cnt))
|
||||
.collect();
|
||||
|
||||
let subjects = raw_stats
|
||||
.into_iter()
|
||||
.map(|row| {
|
||||
let stats: SubjectStats = row.into();
|
||||
let schedule = adaptive::evaluate_subject(&stats, now, false);
|
||||
let base_interval = adaptive::compute_base_interval(&stats);
|
||||
|
||||
let schedule_state = match &schedule {
|
||||
SubjectSchedule::Eligible(_) => "eligible",
|
||||
SubjectSchedule::Cooldown(_) => "cooldown",
|
||||
SubjectSchedule::Paused => "paused",
|
||||
SubjectSchedule::ReadOnly => "read_only",
|
||||
};
|
||||
|
||||
let current_interval_secs = base_interval.as_secs() * multiplier as u64;
|
||||
|
||||
let (next_eligible_at, cooldown_remaining_secs) = match &schedule {
|
||||
SubjectSchedule::Eligible(_) => (Some(now), Some(0)),
|
||||
SubjectSchedule::Cooldown(remaining) => {
|
||||
let remaining_secs = remaining.as_secs();
|
||||
(
|
||||
Some(now + chrono::Duration::seconds(remaining_secs as i64)),
|
||||
Some(remaining_secs),
|
||||
)
|
||||
}
|
||||
SubjectSchedule::Paused | SubjectSchedule::ReadOnly => (None, None),
|
||||
};
|
||||
|
||||
let subject_description = ref_cache
|
||||
.lookup("subject", &stats.subject)
|
||||
.map(|s| s.to_string());
|
||||
|
||||
let tracked_course_count = course_counts.get(&stats.subject).copied().unwrap_or(0);
|
||||
|
||||
SubjectSummary {
|
||||
subject: stats.subject,
|
||||
subject_description,
|
||||
tracked_course_count,
|
||||
schedule_state: schedule_state.to_string(),
|
||||
current_interval_secs,
|
||||
time_multiplier: multiplier,
|
||||
last_scraped: stats.last_completed,
|
||||
next_eligible_at,
|
||||
cooldown_remaining_secs,
|
||||
avg_change_ratio: stats.avg_change_ratio,
|
||||
consecutive_zero_changes: stats.consecutive_zero_changes,
|
||||
recent_runs: stats.recent_runs,
|
||||
recent_failures: stats.recent_failure_count,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Json(SubjectsResponse { subjects }))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Endpoint 4: GET /api/admin/scraper/subjects/{subject}
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Deserialize, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct SubjectDetailParams {
|
||||
#[serde(default = "default_detail_limit")]
|
||||
pub limit: i32,
|
||||
}
|
||||
|
||||
fn default_detail_limit() -> i32 {
|
||||
50
|
||||
}
|
||||
|
||||
#[derive(Serialize, TS)]
|
||||
#[ts(export)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SubjectDetailResponse {
|
||||
subject: String,
|
||||
results: Vec<SubjectResultEntry>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, TS)]
|
||||
#[ts(export)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SubjectResultEntry {
|
||||
#[ts(type = "number")]
|
||||
id: i64,
|
||||
/// ISO-8601 UTC timestamp when the scrape job completed (e.g., "2024-01-15T10:30:00Z")
|
||||
#[ts(type = "string")]
|
||||
completed_at: DateTime<Utc>,
|
||||
duration_ms: i32,
|
||||
success: bool,
|
||||
error_message: Option<String>,
|
||||
courses_fetched: Option<i32>,
|
||||
courses_changed: Option<i32>,
|
||||
courses_unchanged: Option<i32>,
|
||||
audits_generated: Option<i32>,
|
||||
metrics_generated: Option<i32>,
|
||||
}
|
||||
|
||||
pub async fn scraper_subject_detail(
|
||||
_admin: AdminUser,
|
||||
State(state): State<AppState>,
|
||||
Path(subject): Path<String>,
|
||||
Query(params): Query<SubjectDetailParams>,
|
||||
) -> Result<Json<SubjectDetailResponse>, ApiError> {
|
||||
let limit = params.limit.clamp(1, 200);
|
||||
|
||||
let rows = sqlx::query(
|
||||
"SELECT id, completed_at, duration_ms, success, error_message, \
|
||||
courses_fetched, courses_changed, courses_unchanged, \
|
||||
audits_generated, metrics_generated \
|
||||
FROM scrape_job_results \
|
||||
WHERE target_type = 'Subject' AND payload->>'subject' = $1 \
|
||||
ORDER BY completed_at DESC \
|
||||
LIMIT $2",
|
||||
)
|
||||
.bind(&subject)
|
||||
.bind(limit)
|
||||
.fetch_all(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(error = %e, subject = %subject, "Failed to fetch subject detail");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({"error": "Failed to fetch subject detail"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let results = rows
|
||||
.iter()
|
||||
.map(|row| SubjectResultEntry {
|
||||
id: row.get("id"),
|
||||
completed_at: row.get("completed_at"),
|
||||
duration_ms: row.get("duration_ms"),
|
||||
success: row.get("success"),
|
||||
error_message: row.get("error_message"),
|
||||
courses_fetched: row.get("courses_fetched"),
|
||||
courses_changed: row.get("courses_changed"),
|
||||
courses_unchanged: row.get("courses_unchanged"),
|
||||
audits_generated: row.get("audits_generated"),
|
||||
metrics_generated: row.get("metrics_generated"),
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Json(SubjectDetailResponse { subject, results }))
|
||||
}
|
||||
+114
-19
@@ -1,14 +1,18 @@
|
||||
//! Embedded assets for the web frontend
|
||||
//! Embedded assets for the web frontend.
|
||||
//!
|
||||
//! This module handles serving static assets that are embedded into the binary
|
||||
//! at compile time using rust-embed.
|
||||
//! Serves static assets embedded into the binary at compile time using rust-embed.
|
||||
//! Supports content negotiation for pre-compressed variants (.br, .gz, .zst)
|
||||
//! generated at build time by `web/scripts/compress-assets.ts`.
|
||||
|
||||
use axum::http::{HeaderMap, HeaderValue, header};
|
||||
use dashmap::DashMap;
|
||||
use rapidhash::v3::rapidhash_v3;
|
||||
use rust_embed::RustEmbed;
|
||||
use std::fmt;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use super::encoding::{COMPRESSION_MIN_SIZE, ContentEncoding, parse_accepted_encodings};
|
||||
|
||||
/// Embedded web assets from the dist directory
|
||||
#[derive(RustEmbed)]
|
||||
#[folder = "web/dist/"]
|
||||
@@ -21,17 +25,15 @@ pub struct WebAssets;
|
||||
pub struct AssetHash(u64);
|
||||
|
||||
impl AssetHash {
|
||||
/// Create a new AssetHash from u64 value
|
||||
pub fn new(hash: u64) -> Self {
|
||||
Self(hash)
|
||||
}
|
||||
|
||||
/// Get the hash as a hex string
|
||||
pub fn to_hex(&self) -> String {
|
||||
format!("{:016x}", self.0)
|
||||
}
|
||||
|
||||
/// Get the hash as a quoted hex string
|
||||
/// Get the hash as a quoted hex string (for ETag headers)
|
||||
pub fn quoted(&self) -> String {
|
||||
format!("\"{}\"", self.to_hex())
|
||||
}
|
||||
@@ -51,12 +53,8 @@ pub struct AssetMetadata {
|
||||
}
|
||||
|
||||
impl AssetMetadata {
|
||||
/// Check if the etag matches the asset hash
|
||||
pub fn etag_matches(&self, etag: &str) -> bool {
|
||||
// Remove quotes if present (ETags are typically quoted)
|
||||
let etag = etag.trim_matches('"');
|
||||
|
||||
// ETags generated from u64 hex should be 16 characters
|
||||
etag.len() == 16
|
||||
&& u64::from_str_radix(etag, 16)
|
||||
.map(|parsed| parsed == self.hash.0)
|
||||
@@ -68,28 +66,125 @@ impl AssetMetadata {
|
||||
static ASSET_CACHE: LazyLock<DashMap<String, AssetMetadata>> = LazyLock::new(DashMap::new);
|
||||
|
||||
/// Get cached asset metadata for a file path, caching on-demand
|
||||
/// Returns AssetMetadata containing MIME type and RapidHash hash
|
||||
pub fn get_asset_metadata_cached(path: &str, content: &[u8]) -> AssetMetadata {
|
||||
// Check cache first
|
||||
if let Some(cached) = ASSET_CACHE.get(path) {
|
||||
return cached.value().clone();
|
||||
}
|
||||
|
||||
// Calculate MIME type
|
||||
let mime_type = mime_guess::from_path(path)
|
||||
.first()
|
||||
.map(|mime| mime.to_string());
|
||||
|
||||
// Calculate RapidHash hash (using u64 native output size)
|
||||
let hash_value = rapidhash_v3(content);
|
||||
let hash = AssetHash::new(hash_value);
|
||||
|
||||
let hash = AssetHash::new(rapidhash_v3(content));
|
||||
let metadata = AssetMetadata { mime_type, hash };
|
||||
|
||||
// Only cache if we haven't exceeded the limit
|
||||
if ASSET_CACHE.len() < 1000 {
|
||||
ASSET_CACHE.insert(path.to_string(), metadata.clone());
|
||||
}
|
||||
|
||||
metadata
|
||||
}
|
||||
|
||||
/// Set appropriate `Cache-Control` header based on the asset path.
|
||||
///
|
||||
/// SvelteKit outputs fingerprinted assets under `_app/immutable/` which are
|
||||
/// safe to cache indefinitely. Other assets get shorter cache durations.
|
||||
fn set_cache_control(headers: &mut HeaderMap, path: &str) {
|
||||
let cache_control = if path.contains("immutable/") {
|
||||
// SvelteKit fingerprinted assets — cache forever
|
||||
"public, max-age=31536000, immutable"
|
||||
} else if path == "index.html" || path.ends_with(".html") {
|
||||
"public, max-age=300"
|
||||
} else {
|
||||
match path.rsplit_once('.').map(|(_, ext)| ext) {
|
||||
Some("css" | "js") => "public, max-age=86400",
|
||||
Some("png" | "jpg" | "jpeg" | "gif" | "svg" | "ico") => "public, max-age=2592000",
|
||||
_ => "public, max-age=3600",
|
||||
}
|
||||
};
|
||||
|
||||
if let Ok(value) = HeaderValue::from_str(cache_control) {
|
||||
headers.insert(header::CACHE_CONTROL, value);
|
||||
}
|
||||
}
|
||||
|
||||
/// Serve an embedded asset with content encoding negotiation.
|
||||
///
|
||||
/// Tries pre-compressed variants (.br, .gz, .zst) in the order preferred by
|
||||
/// the client's `Accept-Encoding` header, falling back to the uncompressed
|
||||
/// original. Returns `None` if the asset doesn't exist at all.
|
||||
pub fn try_serve_asset_with_encoding(
|
||||
path: &str,
|
||||
request_headers: &HeaderMap,
|
||||
) -> Option<axum::response::Response> {
|
||||
use axum::response::IntoResponse;
|
||||
|
||||
let asset_path = path.strip_prefix('/').unwrap_or(path);
|
||||
|
||||
// Get the uncompressed original first (for metadata: MIME type, ETag)
|
||||
let original = WebAssets::get(asset_path)?;
|
||||
let metadata = get_asset_metadata_cached(asset_path, &original.data);
|
||||
|
||||
// Check ETag for conditional requests (304 Not Modified)
|
||||
if let Some(etag) = request_headers.get(header::IF_NONE_MATCH)
|
||||
&& etag.to_str().is_ok_and(|s| metadata.etag_matches(s))
|
||||
{
|
||||
return Some(axum::http::StatusCode::NOT_MODIFIED.into_response());
|
||||
}
|
||||
|
||||
let mime_type = metadata
|
||||
.mime_type
|
||||
.unwrap_or_else(|| "application/octet-stream".to_string());
|
||||
|
||||
// Only attempt pre-compressed variants for files above the compression
|
||||
// threshold — the build script skips smaller files too.
|
||||
let accepted_encodings = if original.data.len() >= COMPRESSION_MIN_SIZE {
|
||||
parse_accepted_encodings(request_headers)
|
||||
} else {
|
||||
vec![ContentEncoding::Identity]
|
||||
};
|
||||
|
||||
for encoding in &accepted_encodings {
|
||||
if *encoding == ContentEncoding::Identity {
|
||||
continue;
|
||||
}
|
||||
|
||||
let compressed_path = format!("{}{}", asset_path, encoding.extension());
|
||||
if let Some(compressed) = WebAssets::get(&compressed_path) {
|
||||
let mut response_headers = HeaderMap::new();
|
||||
|
||||
if let Ok(ct) = HeaderValue::from_str(&mime_type) {
|
||||
response_headers.insert(header::CONTENT_TYPE, ct);
|
||||
}
|
||||
if let Some(ce) = encoding.header_value() {
|
||||
response_headers.insert(header::CONTENT_ENCODING, ce);
|
||||
}
|
||||
if let Ok(etag_val) = HeaderValue::from_str(&metadata.hash.quoted()) {
|
||||
response_headers.insert(header::ETAG, etag_val);
|
||||
}
|
||||
// Vary so caches distinguish by encoding
|
||||
response_headers.insert(header::VARY, HeaderValue::from_static("Accept-Encoding"));
|
||||
set_cache_control(&mut response_headers, asset_path);
|
||||
|
||||
return Some(
|
||||
(
|
||||
axum::http::StatusCode::OK,
|
||||
response_headers,
|
||||
compressed.data,
|
||||
)
|
||||
.into_response(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// No compressed variant found — serve uncompressed original
|
||||
let mut response_headers = HeaderMap::new();
|
||||
if let Ok(ct) = HeaderValue::from_str(&mime_type) {
|
||||
response_headers.insert(header::CONTENT_TYPE, ct);
|
||||
}
|
||||
if let Ok(etag_val) = HeaderValue::from_str(&metadata.hash.quoted()) {
|
||||
response_headers.insert(header::ETAG, etag_val);
|
||||
}
|
||||
set_cache_control(&mut response_headers, asset_path);
|
||||
|
||||
Some((axum::http::StatusCode::OK, response_headers, original.data).into_response())
|
||||
}
|
||||
|
||||
+304
@@ -0,0 +1,304 @@
|
||||
//! Discord OAuth2 authentication handlers.
|
||||
//!
|
||||
//! Provides login, callback, logout, and session introspection endpoints
|
||||
//! for Discord OAuth2 authentication flow.
|
||||
|
||||
use axum::extract::{Extension, Query, State};
|
||||
use axum::http::{HeaderMap, StatusCode, header};
|
||||
use axum::response::{IntoResponse, Json, Redirect, Response};
|
||||
use serde::Deserialize;
|
||||
use serde_json::{Value, json};
|
||||
use std::time::Duration;
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
use crate::state::AppState;
|
||||
|
||||
/// OAuth configuration passed as an Axum Extension.
|
||||
#[derive(Clone)]
|
||||
pub struct AuthConfig {
|
||||
pub client_id: String,
|
||||
pub client_secret: String,
|
||||
/// Optional base URL override (e.g. "https://banner.xevion.dev").
|
||||
/// When `None`, the redirect URI is derived from the request's Origin/Host header.
|
||||
pub redirect_base: Option<String>,
|
||||
}
|
||||
|
||||
const CALLBACK_PATH: &str = "/api/auth/callback";
|
||||
|
||||
/// Derive the origin (scheme + host + port) the user's browser is actually on.
|
||||
///
|
||||
/// Priority:
|
||||
/// 1. Configured `redirect_base` (production override)
|
||||
/// 2. `Referer` header — preserves the real browser origin even through
|
||||
/// reverse proxies that rewrite `Host` (e.g. Vite dev proxy with
|
||||
/// `changeOrigin: true`)
|
||||
/// 3. `Origin` header (present on POST / CORS requests)
|
||||
/// 4. `Host` header (last resort, may be rewritten by proxies)
|
||||
fn resolve_origin(auth_config: &AuthConfig, headers: &HeaderMap) -> String {
|
||||
if let Some(base) = &auth_config.redirect_base {
|
||||
return base.trim_end_matches('/').to_owned();
|
||||
}
|
||||
|
||||
// Referer carries the full browser URL; extract just the origin.
|
||||
if let Some(referer) = headers.get(header::REFERER).and_then(|v| v.to_str().ok())
|
||||
&& let Ok(parsed) = url::Url::parse(referer)
|
||||
{
|
||||
let origin = parsed.origin().unicode_serialization();
|
||||
if origin != "null" {
|
||||
return origin;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(origin) = headers.get("origin").and_then(|v| v.to_str().ok()) {
|
||||
return origin.trim_end_matches('/').to_owned();
|
||||
}
|
||||
|
||||
if let Some(host) = headers.get(header::HOST).and_then(|v| v.to_str().ok()) {
|
||||
return format!("http://{host}");
|
||||
}
|
||||
|
||||
"http://localhost:8080".to_owned()
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct CallbackParams {
|
||||
code: String,
|
||||
state: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct TokenResponse {
|
||||
access_token: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct DiscordUser {
|
||||
id: String,
|
||||
username: String,
|
||||
avatar: Option<String>,
|
||||
}
|
||||
|
||||
/// Extract the `session` cookie value from request headers.
|
||||
fn extract_session_token(headers: &HeaderMap) -> Option<String> {
|
||||
headers
|
||||
.get(header::COOKIE)?
|
||||
.to_str()
|
||||
.ok()?
|
||||
.split(';')
|
||||
.find_map(|cookie| {
|
||||
let cookie = cookie.trim();
|
||||
cookie.strip_prefix("session=").map(|v| v.to_owned())
|
||||
})
|
||||
}
|
||||
|
||||
/// Build a `Set-Cookie` header value for the session cookie.
|
||||
fn session_cookie(token: &str, max_age: i64, secure: bool) -> String {
|
||||
let mut cookie = format!("session={token}; HttpOnly; SameSite=Lax; Path=/; Max-Age={max_age}");
|
||||
if secure {
|
||||
cookie.push_str("; Secure");
|
||||
}
|
||||
cookie
|
||||
}
|
||||
|
||||
/// `GET /api/auth/login` — Redirect to Discord OAuth2 authorization page.
|
||||
pub async fn auth_login(
|
||||
State(state): State<AppState>,
|
||||
Extension(auth_config): Extension<AuthConfig>,
|
||||
headers: HeaderMap,
|
||||
) -> Redirect {
|
||||
let origin = resolve_origin(&auth_config, &headers);
|
||||
let redirect_uri = format!("{origin}{CALLBACK_PATH}");
|
||||
let csrf_state = state.oauth_state_store.generate(origin);
|
||||
let redirect_uri_encoded = urlencoding::encode(&redirect_uri);
|
||||
|
||||
let url = format!(
|
||||
"https://discord.com/oauth2/authorize\
|
||||
?client_id={}\
|
||||
&redirect_uri={redirect_uri_encoded}\
|
||||
&response_type=code\
|
||||
&scope=identify\
|
||||
&state={csrf_state}",
|
||||
auth_config.client_id,
|
||||
);
|
||||
|
||||
Redirect::temporary(&url)
|
||||
}
|
||||
|
||||
/// `GET /api/auth/callback` — Handle Discord OAuth2 callback.
|
||||
pub async fn auth_callback(
|
||||
State(state): State<AppState>,
|
||||
Extension(auth_config): Extension<AuthConfig>,
|
||||
Query(params): Query<CallbackParams>,
|
||||
) -> Result<Response, (StatusCode, Json<Value>)> {
|
||||
// 1. Validate CSRF state and recover the origin used during login
|
||||
let origin = state
|
||||
.oauth_state_store
|
||||
.validate(¶ms.state)
|
||||
.ok_or_else(|| {
|
||||
warn!("OAuth callback with invalid CSRF state");
|
||||
(
|
||||
StatusCode::BAD_REQUEST,
|
||||
Json(json!({ "error": "Invalid OAuth state" })),
|
||||
)
|
||||
})?;
|
||||
|
||||
// 2. Exchange authorization code for access token
|
||||
let redirect_uri = format!("{origin}{CALLBACK_PATH}");
|
||||
let client = reqwest::Client::new();
|
||||
let token_response = client
|
||||
.post("https://discord.com/api/oauth2/token")
|
||||
.form(&[
|
||||
("client_id", auth_config.client_id.as_str()),
|
||||
("client_secret", auth_config.client_secret.as_str()),
|
||||
("grant_type", "authorization_code"),
|
||||
("code", params.code.as_str()),
|
||||
("redirect_uri", redirect_uri.as_str()),
|
||||
])
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error!(error = %e, "failed to exchange OAuth code for token");
|
||||
(
|
||||
StatusCode::BAD_GATEWAY,
|
||||
Json(json!({ "error": "Failed to exchange code with Discord" })),
|
||||
)
|
||||
})?;
|
||||
|
||||
if !token_response.status().is_success() {
|
||||
let status = token_response.status();
|
||||
let body = token_response.text().await.unwrap_or_default();
|
||||
error!(%status, %body, "Discord token exchange returned error");
|
||||
return Err((
|
||||
StatusCode::BAD_GATEWAY,
|
||||
Json(json!({ "error": "Discord token exchange failed" })),
|
||||
));
|
||||
}
|
||||
|
||||
let token_data: TokenResponse = token_response.json().await.map_err(|e| {
|
||||
error!(error = %e, "failed to parse Discord token response");
|
||||
(
|
||||
StatusCode::BAD_GATEWAY,
|
||||
Json(json!({ "error": "Invalid token response from Discord" })),
|
||||
)
|
||||
})?;
|
||||
|
||||
// 3. Fetch Discord user profile
|
||||
let discord_user: DiscordUser = client
|
||||
.get("https://discord.com/api/users/@me")
|
||||
.bearer_auth(&token_data.access_token)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error!(error = %e, "failed to fetch Discord user profile");
|
||||
(
|
||||
StatusCode::BAD_GATEWAY,
|
||||
Json(json!({ "error": "Failed to fetch Discord profile" })),
|
||||
)
|
||||
})?
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error!(error = %e, "failed to parse Discord user profile");
|
||||
(
|
||||
StatusCode::BAD_GATEWAY,
|
||||
Json(json!({ "error": "Invalid user profile from Discord" })),
|
||||
)
|
||||
})?;
|
||||
|
||||
let discord_id: i64 = discord_user.id.parse().map_err(|_| {
|
||||
error!(id = %discord_user.id, "Discord user ID is not a valid i64");
|
||||
(
|
||||
StatusCode::BAD_GATEWAY,
|
||||
Json(json!({ "error": "Invalid Discord user ID" })),
|
||||
)
|
||||
})?;
|
||||
|
||||
// 4. Upsert user
|
||||
let user = crate::data::users::upsert_user(
|
||||
&state.db_pool,
|
||||
discord_id,
|
||||
&discord_user.username,
|
||||
discord_user.avatar.as_deref(),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error!(error = %e, "failed to upsert user");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({ "error": "Database error" })),
|
||||
)
|
||||
})?;
|
||||
|
||||
info!(discord_id, username = %user.discord_username, "user authenticated via OAuth");
|
||||
|
||||
// 5. Create session
|
||||
let session = crate::data::sessions::create_session(
|
||||
&state.db_pool,
|
||||
discord_id,
|
||||
Duration::from_secs(crate::data::sessions::SESSION_DURATION_SECS),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error!(error = %e, "failed to create session");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({ "error": "Failed to create session" })),
|
||||
)
|
||||
})?;
|
||||
|
||||
// 6. Build response with session cookie
|
||||
let secure = redirect_uri.starts_with("https://");
|
||||
let cookie = session_cookie(
|
||||
&session.id,
|
||||
crate::data::sessions::SESSION_DURATION_SECS as i64,
|
||||
secure,
|
||||
);
|
||||
|
||||
let redirect_to = if user.is_admin { "/admin" } else { "/" };
|
||||
|
||||
Ok((
|
||||
[(header::SET_COOKIE, cookie)],
|
||||
Redirect::temporary(redirect_to),
|
||||
)
|
||||
.into_response())
|
||||
}
|
||||
|
||||
/// `POST /api/auth/logout` — Destroy the current session.
|
||||
pub async fn auth_logout(State(state): State<AppState>, headers: HeaderMap) -> Response {
|
||||
if let Some(token) = extract_session_token(&headers) {
|
||||
if let Err(e) = crate::data::sessions::delete_session(&state.db_pool, &token).await {
|
||||
warn!(error = %e, "failed to delete session from database");
|
||||
}
|
||||
state.session_cache.evict(&token);
|
||||
}
|
||||
|
||||
let cookie = session_cookie("", 0, false);
|
||||
|
||||
(
|
||||
StatusCode::OK,
|
||||
[(header::SET_COOKIE, cookie)],
|
||||
Json(json!({ "ok": true })),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
/// `GET /api/auth/me` — Return the current authenticated user's info.
|
||||
pub async fn auth_me(
|
||||
State(state): State<AppState>,
|
||||
headers: HeaderMap,
|
||||
) -> Result<Json<Value>, StatusCode> {
|
||||
let token = extract_session_token(&headers).ok_or(StatusCode::UNAUTHORIZED)?;
|
||||
|
||||
let user = state
|
||||
.session_cache
|
||||
.get_user(&token)
|
||||
.await
|
||||
.ok_or(StatusCode::UNAUTHORIZED)?;
|
||||
|
||||
Ok(Json(json!({
|
||||
"discordId": user.discord_id.to_string(),
|
||||
"username": user.discord_username,
|
||||
"avatarHash": user.discord_avatar_hash,
|
||||
"isAdmin": user.is_admin,
|
||||
})))
|
||||
}
|
||||
@@ -0,0 +1,136 @@
|
||||
//! Web API endpoints for calendar export (ICS download + Google Calendar redirect).
|
||||
|
||||
use axum::{
|
||||
extract::{Path, State},
|
||||
http::{StatusCode, header},
|
||||
response::{IntoResponse, Redirect, Response},
|
||||
};
|
||||
|
||||
use crate::calendar::{CalendarCourse, generate_gcal_url, generate_ics};
|
||||
use crate::data::models::DbMeetingTime;
|
||||
use crate::state::AppState;
|
||||
|
||||
/// Fetch course + meeting times, build a `CalendarCourse`.
|
||||
async fn load_calendar_course(
|
||||
state: &AppState,
|
||||
term: &str,
|
||||
crn: &str,
|
||||
) -> Result<(CalendarCourse, Vec<DbMeetingTime>), (StatusCode, String)> {
|
||||
let course = crate::data::courses::get_course_by_crn(&state.db_pool, crn, term)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(error = %e, "Calendar: course lookup failed");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
"Lookup failed".to_string(),
|
||||
)
|
||||
})?
|
||||
.ok_or_else(|| (StatusCode::NOT_FOUND, "Course not found".to_string()))?;
|
||||
|
||||
let instructors = crate::data::courses::get_course_instructors(&state.db_pool, course.id)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
let primary_instructor = instructors
|
||||
.iter()
|
||||
.find(|i| i.is_primary)
|
||||
.or(instructors.first())
|
||||
.map(|i| i.display_name.clone());
|
||||
|
||||
let meeting_times: Vec<DbMeetingTime> =
|
||||
serde_json::from_value(course.meeting_times.clone()).unwrap_or_default();
|
||||
|
||||
let cal_course = CalendarCourse {
|
||||
crn: course.crn.clone(),
|
||||
subject: course.subject.clone(),
|
||||
course_number: course.course_number.clone(),
|
||||
title: course.title.clone(),
|
||||
sequence_number: course.sequence_number.clone(),
|
||||
primary_instructor,
|
||||
};
|
||||
|
||||
Ok((cal_course, meeting_times))
|
||||
}
|
||||
|
||||
/// `GET /api/courses/{term}/{crn}/calendar.ics`
|
||||
///
|
||||
/// Returns an ICS file download for the course.
|
||||
pub async fn course_ics(
|
||||
State(state): State<AppState>,
|
||||
Path((term, crn)): Path<(String, String)>,
|
||||
) -> Result<Response, (StatusCode, String)> {
|
||||
let (cal_course, meeting_times) = load_calendar_course(&state, &term, &crn).await?;
|
||||
|
||||
if meeting_times.is_empty() {
|
||||
return Err((
|
||||
StatusCode::NOT_FOUND,
|
||||
"No meeting times found for this course".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let result = generate_ics(&cal_course, &meeting_times).map_err(|e| {
|
||||
tracing::error!(error = %e, "ICS generation failed");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
"Failed to generate ICS file".to_string(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let response = (
|
||||
[
|
||||
(header::CONTENT_TYPE, "text/calendar; charset=utf-8"),
|
||||
(
|
||||
header::CONTENT_DISPOSITION,
|
||||
&format!("attachment; filename=\"{}\"", result.filename),
|
||||
),
|
||||
(header::CACHE_CONTROL, "no-cache"),
|
||||
],
|
||||
result.content,
|
||||
)
|
||||
.into_response();
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
/// `GET /api/courses/{term}/{crn}/gcal`
|
||||
///
|
||||
/// Redirects to Google Calendar with a pre-filled event for the first meeting time.
|
||||
/// If multiple meeting times exist, uses the first one with scheduled days/times.
|
||||
pub async fn course_gcal(
|
||||
State(state): State<AppState>,
|
||||
Path((term, crn)): Path<(String, String)>,
|
||||
) -> Result<Response, (StatusCode, String)> {
|
||||
let (cal_course, meeting_times) = load_calendar_course(&state, &term, &crn).await?;
|
||||
|
||||
if meeting_times.is_empty() {
|
||||
return Err((
|
||||
StatusCode::NOT_FOUND,
|
||||
"No meeting times found for this course".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
// Prefer the first meeting time that has actual days/times scheduled
|
||||
let mt = meeting_times
|
||||
.iter()
|
||||
.find(|mt| {
|
||||
mt.begin_time.is_some()
|
||||
&& (mt.monday
|
||||
|| mt.tuesday
|
||||
|| mt.wednesday
|
||||
|| mt.thursday
|
||||
|| mt.friday
|
||||
|| mt.saturday
|
||||
|| mt.sunday)
|
||||
})
|
||||
.unwrap_or(&meeting_times[0]);
|
||||
|
||||
let url = generate_gcal_url(&cal_course, mt).map_err(|e| {
|
||||
tracing::error!(error = %e, "Google Calendar URL generation failed");
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
"Failed to generate Google Calendar URL".to_string(),
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(Redirect::temporary(&url).into_response())
|
||||
}
|
||||
@@ -0,0 +1,196 @@
|
||||
//! Content encoding negotiation for pre-compressed asset serving.
|
||||
//!
|
||||
//! Parses Accept-Encoding headers with quality values and returns
|
||||
//! supported encodings in priority order for content negotiation.
|
||||
|
||||
use axum::http::{HeaderMap, HeaderValue, header};
|
||||
|
||||
/// Minimum size threshold for compression (bytes).
|
||||
///
|
||||
/// Must match `MIN_SIZE` in `web/scripts/compress-assets.ts`.
|
||||
pub const COMPRESSION_MIN_SIZE: usize = 512;
|
||||
|
||||
/// Supported content encodings in priority order (best compression first).
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum ContentEncoding {
|
||||
Zstd,
|
||||
Brotli,
|
||||
Gzip,
|
||||
Identity,
|
||||
}
|
||||
|
||||
impl ContentEncoding {
|
||||
/// File extension suffix for pre-compressed variant lookup.
|
||||
#[inline]
|
||||
pub fn extension(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Zstd => ".zst",
|
||||
Self::Brotli => ".br",
|
||||
Self::Gzip => ".gz",
|
||||
Self::Identity => "",
|
||||
}
|
||||
}
|
||||
|
||||
/// `Content-Encoding` header value, or `None` for identity.
|
||||
#[inline]
|
||||
pub fn header_value(&self) -> Option<HeaderValue> {
|
||||
match self {
|
||||
Self::Zstd => Some(HeaderValue::from_static("zstd")),
|
||||
Self::Brotli => Some(HeaderValue::from_static("br")),
|
||||
Self::Gzip => Some(HeaderValue::from_static("gzip")),
|
||||
Self::Identity => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Default priority when quality values are equal (higher = better).
|
||||
#[inline]
|
||||
fn default_priority(&self) -> u8 {
|
||||
match self {
|
||||
Self::Zstd => 4,
|
||||
Self::Brotli => 3,
|
||||
Self::Gzip => 2,
|
||||
Self::Identity => 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse `Accept-Encoding` header and return supported encodings in priority order.
|
||||
///
|
||||
/// Supports quality values: `Accept-Encoding: gzip;q=0.8, br;q=1.0, zstd`
|
||||
/// When quality values are equal: zstd > brotli > gzip > identity.
|
||||
/// Encodings with `q=0` are excluded.
|
||||
pub fn parse_accepted_encodings(headers: &HeaderMap) -> Vec<ContentEncoding> {
|
||||
let Some(accept) = headers
|
||||
.get(header::ACCEPT_ENCODING)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
else {
|
||||
return vec![ContentEncoding::Identity];
|
||||
};
|
||||
|
||||
let mut encodings: Vec<(ContentEncoding, f32)> = Vec::new();
|
||||
|
||||
for part in accept.split(',') {
|
||||
let part = part.trim();
|
||||
if part.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let (encoding_str, quality) = if let Some((enc, params)) = part.split_once(';') {
|
||||
let q = params
|
||||
.split(';')
|
||||
.find_map(|p| p.trim().strip_prefix("q="))
|
||||
.and_then(|q| q.parse::<f32>().ok())
|
||||
.unwrap_or(1.0);
|
||||
(enc.trim(), q)
|
||||
} else {
|
||||
(part, 1.0)
|
||||
};
|
||||
|
||||
if quality == 0.0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let encoding = match encoding_str.to_lowercase().as_str() {
|
||||
"zstd" => ContentEncoding::Zstd,
|
||||
"br" | "brotli" => ContentEncoding::Brotli,
|
||||
"gzip" | "x-gzip" => ContentEncoding::Gzip,
|
||||
"*" => ContentEncoding::Gzip,
|
||||
"identity" => ContentEncoding::Identity,
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
encodings.push((encoding, quality));
|
||||
}
|
||||
|
||||
// Sort by quality (desc), then default priority (desc)
|
||||
encodings.sort_by(|a, b| {
|
||||
b.1.partial_cmp(&a.1)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
.then_with(|| b.0.default_priority().cmp(&a.0.default_priority()))
|
||||
});
|
||||
|
||||
if encodings.is_empty() {
|
||||
vec![ContentEncoding::Identity]
|
||||
} else {
|
||||
encodings.into_iter().map(|(e, _)| e).collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_all_encodings() {
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert(header::ACCEPT_ENCODING, "gzip, br, zstd".parse().unwrap());
|
||||
let encodings = parse_accepted_encodings(&headers);
|
||||
assert_eq!(encodings[0], ContentEncoding::Zstd);
|
||||
assert_eq!(encodings[1], ContentEncoding::Brotli);
|
||||
assert_eq!(encodings[2], ContentEncoding::Gzip);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_with_quality_values() {
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert(
|
||||
header::ACCEPT_ENCODING,
|
||||
"gzip;q=1.0, br;q=0.5, zstd;q=0.8".parse().unwrap(),
|
||||
);
|
||||
let encodings = parse_accepted_encodings(&headers);
|
||||
assert_eq!(encodings[0], ContentEncoding::Gzip);
|
||||
assert_eq!(encodings[1], ContentEncoding::Zstd);
|
||||
assert_eq!(encodings[2], ContentEncoding::Brotli);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_no_header_returns_identity() {
|
||||
let headers = HeaderMap::new();
|
||||
let encodings = parse_accepted_encodings(&headers);
|
||||
assert_eq!(encodings, vec![ContentEncoding::Identity]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_disabled_encoding_excluded() {
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert(
|
||||
header::ACCEPT_ENCODING,
|
||||
"zstd;q=0, br, gzip".parse().unwrap(),
|
||||
);
|
||||
let encodings = parse_accepted_encodings(&headers);
|
||||
assert_eq!(encodings[0], ContentEncoding::Brotli);
|
||||
assert_eq!(encodings[1], ContentEncoding::Gzip);
|
||||
assert!(!encodings.contains(&ContentEncoding::Zstd));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_real_chrome_header() {
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert(
|
||||
header::ACCEPT_ENCODING,
|
||||
"gzip, deflate, br, zstd".parse().unwrap(),
|
||||
);
|
||||
assert_eq!(parse_accepted_encodings(&headers)[0], ContentEncoding::Zstd);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extensions() {
|
||||
assert_eq!(ContentEncoding::Zstd.extension(), ".zst");
|
||||
assert_eq!(ContentEncoding::Brotli.extension(), ".br");
|
||||
assert_eq!(ContentEncoding::Gzip.extension(), ".gz");
|
||||
assert_eq!(ContentEncoding::Identity.extension(), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_header_values() {
|
||||
assert_eq!(
|
||||
ContentEncoding::Zstd.header_value().unwrap(),
|
||||
HeaderValue::from_static("zstd")
|
||||
);
|
||||
assert_eq!(
|
||||
ContentEncoding::Brotli.header_value().unwrap(),
|
||||
HeaderValue::from_static("br")
|
||||
);
|
||||
assert!(ContentEncoding::Identity.header_value().is_none());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
//! Standardized API error responses.
|
||||
|
||||
use axum::Json;
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use serde::Serialize;
|
||||
use ts_rs::TS;
|
||||
|
||||
/// Standardized error response for all API endpoints.
|
||||
#[derive(Debug, Clone, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct ApiError {
|
||||
/// Machine-readable error code (e.g., "NOT_FOUND", "INVALID_TERM")
|
||||
pub code: String,
|
||||
/// Human-readable error message
|
||||
pub message: String,
|
||||
/// Optional additional details (validation errors, field info, etc.)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub details: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
impl ApiError {
|
||||
pub fn new(code: impl Into<String>, message: impl Into<String>) -> Self {
|
||||
Self {
|
||||
code: code.into(),
|
||||
message: message.into(),
|
||||
details: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn with_details(mut self, details: serde_json::Value) -> Self {
|
||||
self.details = Some(details);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn not_found(message: impl Into<String>) -> Self {
|
||||
Self::new("NOT_FOUND", message)
|
||||
}
|
||||
|
||||
pub fn bad_request(message: impl Into<String>) -> Self {
|
||||
Self::new("BAD_REQUEST", message)
|
||||
}
|
||||
|
||||
pub fn internal_error(message: impl Into<String>) -> Self {
|
||||
Self::new("INTERNAL_ERROR", message)
|
||||
}
|
||||
|
||||
pub fn invalid_term(term: impl std::fmt::Display) -> Self {
|
||||
Self::new("INVALID_TERM", format!("Invalid term: {}", term))
|
||||
}
|
||||
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self.code.as_str() {
|
||||
"NOT_FOUND" => StatusCode::NOT_FOUND,
|
||||
"BAD_REQUEST" | "INVALID_TERM" | "INVALID_RANGE" => StatusCode::BAD_REQUEST,
|
||||
"UNAUTHORIZED" => StatusCode::UNAUTHORIZED,
|
||||
"FORBIDDEN" => StatusCode::FORBIDDEN,
|
||||
_ => StatusCode::INTERNAL_SERVER_ERROR,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoResponse for ApiError {
|
||||
fn into_response(self) -> Response {
|
||||
let status = self.status_code();
|
||||
(status, Json(self)).into_response()
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert `(StatusCode, String)` tuple errors to ApiError
|
||||
impl From<(StatusCode, String)> for ApiError {
|
||||
fn from((status, message): (StatusCode, String)) -> Self {
|
||||
let code = match status {
|
||||
StatusCode::NOT_FOUND => "NOT_FOUND",
|
||||
StatusCode::BAD_REQUEST => "BAD_REQUEST",
|
||||
StatusCode::UNAUTHORIZED => "UNAUTHORIZED",
|
||||
StatusCode::FORBIDDEN => "FORBIDDEN",
|
||||
_ => "INTERNAL_ERROR",
|
||||
};
|
||||
Self::new(code, message)
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper for converting database errors to ApiError
|
||||
pub fn db_error(context: &str, error: anyhow::Error) -> ApiError {
|
||||
tracing::error!(error = %error, context = context, "Database error");
|
||||
ApiError::internal_error(format!("{} failed", context))
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
//! Axum extractors for authentication and authorization.
|
||||
|
||||
use axum::extract::FromRequestParts;
|
||||
use axum::http::{StatusCode, header};
|
||||
use axum::response::Json;
|
||||
use http::request::Parts;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::data::models::User;
|
||||
use crate::state::AppState;
|
||||
|
||||
/// Extractor that resolves the session cookie to an authenticated [`User`].
|
||||
///
|
||||
/// Returns 401 if no valid session cookie is present.
|
||||
pub struct AuthUser(pub User);
|
||||
|
||||
impl FromRequestParts<AppState> for AuthUser {
|
||||
type Rejection = (StatusCode, Json<serde_json::Value>);
|
||||
|
||||
async fn from_request_parts(
|
||||
parts: &mut Parts,
|
||||
state: &AppState,
|
||||
) -> Result<Self, Self::Rejection> {
|
||||
let token = parts
|
||||
.headers
|
||||
.get(header::COOKIE)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|cookies| {
|
||||
cookies
|
||||
.split(';')
|
||||
.find_map(|c| c.trim().strip_prefix("session=").map(|v| v.to_owned()))
|
||||
})
|
||||
.ok_or_else(|| {
|
||||
(
|
||||
StatusCode::UNAUTHORIZED,
|
||||
Json(json!({"error": "unauthorized", "message": "No session cookie"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let user = state.session_cache.get_user(&token).await.ok_or_else(|| {
|
||||
(
|
||||
StatusCode::UNAUTHORIZED,
|
||||
Json(json!({"error": "unauthorized", "message": "Invalid or expired session"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(AuthUser(user))
|
||||
}
|
||||
}
|
||||
|
||||
/// Extractor that requires an authenticated admin user.
|
||||
///
|
||||
/// Returns 401 if not authenticated, 403 if not admin.
|
||||
pub struct AdminUser(pub User);
|
||||
|
||||
impl FromRequestParts<AppState> for AdminUser {
|
||||
type Rejection = (StatusCode, Json<serde_json::Value>);
|
||||
|
||||
async fn from_request_parts(
|
||||
parts: &mut Parts,
|
||||
state: &AppState,
|
||||
) -> Result<Self, Self::Rejection> {
|
||||
let AuthUser(user) = AuthUser::from_request_parts(parts, state).await?;
|
||||
|
||||
if !user.is_admin {
|
||||
return Err((
|
||||
StatusCode::FORBIDDEN,
|
||||
Json(json!({"error": "forbidden", "message": "Admin access required"})),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(AdminUser(user))
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,20 @@
|
||||
//! Web API module for the banner application.
|
||||
|
||||
pub mod admin;
|
||||
pub mod admin_rmp;
|
||||
pub mod admin_scraper;
|
||||
#[cfg(feature = "embed-assets")]
|
||||
pub mod assets;
|
||||
pub mod auth;
|
||||
pub mod calendar;
|
||||
#[cfg(feature = "embed-assets")]
|
||||
pub mod encoding;
|
||||
pub mod error;
|
||||
pub mod extractors;
|
||||
pub mod routes;
|
||||
pub mod schedule_cache;
|
||||
pub mod session_cache;
|
||||
pub mod timeline;
|
||||
pub mod ws;
|
||||
|
||||
pub use routes::*;
|
||||
|
||||
+716
-115
@@ -1,79 +1,118 @@
|
||||
//! Web API endpoints for Banner bot monitoring and metrics.
|
||||
|
||||
use axum::{
|
||||
Router,
|
||||
Extension, Router,
|
||||
body::Body,
|
||||
extract::{Request, State},
|
||||
extract::{Path, Query, Request, State},
|
||||
response::{Json, Response},
|
||||
routing::get,
|
||||
routing::{get, post, put},
|
||||
};
|
||||
|
||||
use crate::web::admin_scraper;
|
||||
use crate::web::auth::{self, AuthConfig};
|
||||
use crate::web::calendar;
|
||||
use crate::web::error::{ApiError, db_error};
|
||||
use crate::web::timeline;
|
||||
use crate::web::ws;
|
||||
use crate::{data, web::admin};
|
||||
use crate::{data::models, web::admin_rmp};
|
||||
#[cfg(feature = "embed-assets")]
|
||||
use axum::{
|
||||
http::{HeaderMap, HeaderValue, StatusCode, Uri},
|
||||
response::{Html, IntoResponse},
|
||||
http::{HeaderMap, StatusCode, Uri},
|
||||
response::IntoResponse,
|
||||
};
|
||||
#[cfg(feature = "embed-assets")]
|
||||
use http::header;
|
||||
use serde::Serialize;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Value, json};
|
||||
use std::{collections::BTreeMap, time::Duration};
|
||||
use ts_rs::TS;
|
||||
|
||||
use crate::state::AppState;
|
||||
use crate::status::ServiceStatus;
|
||||
#[cfg(not(feature = "embed-assets"))]
|
||||
use tower_http::cors::{Any, CorsLayer};
|
||||
use tower_http::{classify::ServerErrorsFailureClass, timeout::TimeoutLayer, trace::TraceLayer};
|
||||
use tower_http::{
|
||||
classify::ServerErrorsFailureClass, compression::CompressionLayer, timeout::TimeoutLayer,
|
||||
trace::TraceLayer,
|
||||
};
|
||||
use tracing::{Span, debug, trace, warn};
|
||||
|
||||
#[cfg(feature = "embed-assets")]
|
||||
use crate::web::assets::{WebAssets, get_asset_metadata_cached};
|
||||
|
||||
/// Set appropriate caching headers based on asset type
|
||||
#[cfg(feature = "embed-assets")]
|
||||
fn set_caching_headers(response: &mut Response, path: &str, etag: &str) {
|
||||
let headers = response.headers_mut();
|
||||
|
||||
// Set ETag
|
||||
if let Ok(etag_value) = HeaderValue::from_str(etag) {
|
||||
headers.insert(header::ETAG, etag_value);
|
||||
}
|
||||
|
||||
// Set Cache-Control based on asset type
|
||||
let cache_control = if path.starts_with("assets/") {
|
||||
// Static assets with hashed filenames - long-term cache
|
||||
"public, max-age=31536000, immutable"
|
||||
} else if path == "index.html" {
|
||||
// HTML files - short-term cache
|
||||
"public, max-age=300"
|
||||
} else {
|
||||
match path.split_once('.').map(|(_, extension)| extension) {
|
||||
Some(ext) => match ext {
|
||||
// CSS/JS files - medium-term cache
|
||||
"css" | "js" => "public, max-age=86400",
|
||||
// Images - long-term cache
|
||||
"png" | "jpg" | "jpeg" | "gif" | "svg" | "ico" => "public, max-age=2592000",
|
||||
// Default for other files
|
||||
_ => "public, max-age=3600",
|
||||
},
|
||||
// Default for files without an extension
|
||||
None => "public, max-age=3600",
|
||||
}
|
||||
};
|
||||
|
||||
if let Ok(cache_control_value) = HeaderValue::from_str(cache_control) {
|
||||
headers.insert(header::CACHE_CONTROL, cache_control_value);
|
||||
}
|
||||
}
|
||||
use crate::web::assets::try_serve_asset_with_encoding;
|
||||
|
||||
/// Creates the web server router
|
||||
pub fn create_router(app_state: AppState) -> Router {
|
||||
pub fn create_router(app_state: AppState, auth_config: AuthConfig) -> Router {
|
||||
let api_router = Router::new()
|
||||
.route("/health", get(health))
|
||||
.route("/status", get(status))
|
||||
.route("/metrics", get(metrics))
|
||||
.route("/courses/search", get(search_courses))
|
||||
.route("/courses/{term}/{crn}", get(get_course))
|
||||
.route(
|
||||
"/courses/{term}/{crn}/calendar.ics",
|
||||
get(calendar::course_ics),
|
||||
)
|
||||
.route("/courses/{term}/{crn}/gcal", get(calendar::course_gcal))
|
||||
.route("/reference/{category}", get(get_reference))
|
||||
.route("/search-options", get(get_search_options))
|
||||
.route("/timeline", post(timeline::timeline))
|
||||
.with_state(app_state.clone());
|
||||
|
||||
let auth_router = Router::new()
|
||||
.route("/auth/login", get(auth::auth_login))
|
||||
.route("/auth/callback", get(auth::auth_callback))
|
||||
.route("/auth/logout", post(auth::auth_logout))
|
||||
.route("/auth/me", get(auth::auth_me))
|
||||
.layer(Extension(auth_config))
|
||||
.with_state(app_state.clone());
|
||||
|
||||
let admin_router = Router::new()
|
||||
.route("/admin/status", get(admin::admin_status))
|
||||
.route("/admin/users", get(admin::list_users))
|
||||
.route(
|
||||
"/admin/users/{discord_id}/admin",
|
||||
put(admin::set_user_admin),
|
||||
)
|
||||
.route("/admin/scrape-jobs", get(admin::list_scrape_jobs))
|
||||
.route("/admin/scrape-jobs/ws", get(ws::scrape_jobs_ws))
|
||||
.route("/admin/audit-log", get(admin::list_audit_log))
|
||||
.route("/admin/instructors", get(admin_rmp::list_instructors))
|
||||
.route("/admin/instructors/{id}", get(admin_rmp::get_instructor))
|
||||
.route(
|
||||
"/admin/instructors/{id}/match",
|
||||
post(admin_rmp::match_instructor),
|
||||
)
|
||||
.route(
|
||||
"/admin/instructors/{id}/reject-candidate",
|
||||
post(admin_rmp::reject_candidate),
|
||||
)
|
||||
.route(
|
||||
"/admin/instructors/{id}/reject-all",
|
||||
post(admin_rmp::reject_all),
|
||||
)
|
||||
.route(
|
||||
"/admin/instructors/{id}/unmatch",
|
||||
post(admin_rmp::unmatch_instructor),
|
||||
)
|
||||
.route("/admin/rmp/rescore", post(admin_rmp::rescore))
|
||||
.route("/admin/scraper/stats", get(admin_scraper::scraper_stats))
|
||||
.route(
|
||||
"/admin/scraper/timeseries",
|
||||
get(admin_scraper::scraper_timeseries),
|
||||
)
|
||||
.route(
|
||||
"/admin/scraper/subjects",
|
||||
get(admin_scraper::scraper_subjects),
|
||||
)
|
||||
.route(
|
||||
"/admin/scraper/subjects/{subject}",
|
||||
get(admin_scraper::scraper_subject_detail),
|
||||
)
|
||||
.with_state(app_state);
|
||||
|
||||
let mut router = Router::new().nest("/api", api_router);
|
||||
let mut router = Router::new()
|
||||
.nest("/api", api_router)
|
||||
.nest("/api", auth_router)
|
||||
.nest("/api", admin_router);
|
||||
|
||||
// When embed-assets feature is enabled, serve embedded static assets
|
||||
#[cfg(feature = "embed-assets")]
|
||||
@@ -93,6 +132,13 @@ pub fn create_router(app_state: AppState) -> Router {
|
||||
}
|
||||
|
||||
router.layer((
|
||||
// Compress API responses (gzip/brotli/zstd). Pre-compressed static
|
||||
// assets already have Content-Encoding set, so tower-http skips them.
|
||||
CompressionLayer::new()
|
||||
.zstd(true)
|
||||
.br(true)
|
||||
.gzip(true)
|
||||
.quality(tower_http::CompressionLevel::Fastest),
|
||||
TraceLayer::new_for_http()
|
||||
.make_span_with(|request: &Request<Body>| {
|
||||
tracing::debug_span!("request", path = request.uri().path())
|
||||
@@ -139,71 +185,35 @@ pub fn create_router(app_state: AppState) -> Router {
|
||||
))
|
||||
}
|
||||
|
||||
/// Handler that extracts request information for caching
|
||||
/// SPA fallback handler with content encoding negotiation.
|
||||
///
|
||||
/// Serves embedded static assets with pre-compressed variants when available,
|
||||
/// falling back to `index.html` for SPA client-side routing.
|
||||
#[cfg(feature = "embed-assets")]
|
||||
async fn fallback(request: Request) -> Response {
|
||||
async fn fallback(request: Request) -> axum::response::Response {
|
||||
let uri = request.uri().clone();
|
||||
let headers = request.headers().clone();
|
||||
handle_spa_fallback_with_headers(uri, headers).await
|
||||
handle_spa_fallback(uri, headers).await
|
||||
}
|
||||
|
||||
/// Handles SPA routing by serving index.html for non-API, non-asset requests
|
||||
/// This version includes HTTP caching headers and ETag support
|
||||
#[cfg(feature = "embed-assets")]
|
||||
async fn handle_spa_fallback_with_headers(uri: Uri, request_headers: HeaderMap) -> Response {
|
||||
let path = uri.path().trim_start_matches('/');
|
||||
|
||||
if let Some(content) = WebAssets::get(path) {
|
||||
// Get asset metadata (MIME type and hash) with caching
|
||||
let metadata = get_asset_metadata_cached(path, &content.data);
|
||||
|
||||
// Check if client has a matching ETag (conditional request)
|
||||
if let Some(etag) = request_headers.get(header::IF_NONE_MATCH)
|
||||
&& etag.to_str().is_ok_and(|s| metadata.etag_matches(s))
|
||||
{
|
||||
return StatusCode::NOT_MODIFIED.into_response();
|
||||
}
|
||||
|
||||
// Use cached MIME type, only set Content-Type if we have a valid MIME type
|
||||
let mut response = (
|
||||
[(
|
||||
header::CONTENT_TYPE,
|
||||
// For unknown types, set to application/octet-stream
|
||||
metadata
|
||||
.mime_type
|
||||
.unwrap_or("application/octet-stream".to_string()),
|
||||
)],
|
||||
content.data,
|
||||
)
|
||||
.into_response();
|
||||
|
||||
// Set caching headers
|
||||
set_caching_headers(&mut response, path, &metadata.hash.quoted());
|
||||
async fn handle_spa_fallback(uri: Uri, request_headers: HeaderMap) -> axum::response::Response {
|
||||
let path = uri.path();
|
||||
|
||||
// Try serving the exact asset (with encoding negotiation)
|
||||
if let Some(response) = try_serve_asset_with_encoding(path, &request_headers) {
|
||||
return response;
|
||||
} else {
|
||||
// Any assets that are not found should be treated as a 404, not falling back to the SPA index.html
|
||||
if path.starts_with("assets/") {
|
||||
return (StatusCode::NOT_FOUND, "Asset not found").into_response();
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to the SPA index.html
|
||||
match WebAssets::get("index.html") {
|
||||
Some(content) => {
|
||||
let metadata = get_asset_metadata_cached("index.html", &content.data);
|
||||
// SvelteKit assets under _app/ that don't exist are a hard 404
|
||||
let trimmed = path.trim_start_matches('/');
|
||||
if trimmed.starts_with("_app/") || trimmed.starts_with("assets/") {
|
||||
return (StatusCode::NOT_FOUND, "Asset not found").into_response();
|
||||
}
|
||||
|
||||
// Check if client has a matching ETag for index.html
|
||||
if let Some(etag) = request_headers.get(header::IF_NONE_MATCH)
|
||||
&& etag.to_str().is_ok_and(|s| metadata.etag_matches(s))
|
||||
{
|
||||
return StatusCode::NOT_MODIFIED.into_response();
|
||||
}
|
||||
|
||||
let mut response = Html(content.data).into_response();
|
||||
set_caching_headers(&mut response, "index.html", &metadata.hash.quoted());
|
||||
response
|
||||
}
|
||||
// SPA fallback: serve index.html with encoding negotiation
|
||||
match try_serve_asset_with_encoding("/index.html", &request_headers) {
|
||||
Some(response) => response,
|
||||
None => (
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
"Failed to load index.html",
|
||||
@@ -221,14 +231,16 @@ async fn health() -> Json<Value> {
|
||||
}))
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct ServiceInfo {
|
||||
#[derive(Serialize, TS)]
|
||||
#[ts(export)]
|
||||
pub struct ServiceInfo {
|
||||
name: String,
|
||||
status: ServiceStatus,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct StatusResponse {
|
||||
#[derive(Serialize, TS)]
|
||||
#[ts(export)]
|
||||
pub struct StatusResponse {
|
||||
status: ServiceStatus,
|
||||
version: String,
|
||||
commit: String,
|
||||
@@ -249,7 +261,10 @@ async fn status(State(state): State<AppState>) -> Json<StatusResponse> {
|
||||
);
|
||||
}
|
||||
|
||||
let overall_status = if services.values().any(|s| matches!(s.status, ServiceStatus::Error)) {
|
||||
let overall_status = if services
|
||||
.values()
|
||||
.any(|s| matches!(s.status, ServiceStatus::Error))
|
||||
{
|
||||
ServiceStatus::Error
|
||||
} else if !services.is_empty()
|
||||
&& services
|
||||
@@ -272,12 +287,598 @@ async fn status(State(state): State<AppState>) -> Json<StatusResponse> {
|
||||
}
|
||||
|
||||
/// Metrics endpoint for monitoring
|
||||
async fn metrics() -> Json<Value> {
|
||||
// For now, return basic metrics structure
|
||||
Json(json!({
|
||||
"banner_api": {
|
||||
"status": "connected"
|
||||
},
|
||||
"timestamp": chrono::Utc::now().to_rfc3339()
|
||||
async fn metrics(
|
||||
State(state): State<AppState>,
|
||||
Query(params): Query<MetricsParams>,
|
||||
) -> Result<Json<MetricsResponse>, ApiError> {
|
||||
let limit = params.limit.clamp(1, 5000);
|
||||
|
||||
// Parse range shorthand, defaulting to 24h
|
||||
let range_str = params.range.as_deref().unwrap_or("24h");
|
||||
let duration = match range_str {
|
||||
"1h" => chrono::Duration::hours(1),
|
||||
"6h" => chrono::Duration::hours(6),
|
||||
"24h" => chrono::Duration::hours(24),
|
||||
"7d" => chrono::Duration::days(7),
|
||||
"30d" => chrono::Duration::days(30),
|
||||
_ => {
|
||||
return Err(ApiError::new(
|
||||
"INVALID_RANGE",
|
||||
format!("Invalid range '{range_str}'. Valid: 1h, 6h, 24h, 7d, 30d"),
|
||||
));
|
||||
}
|
||||
};
|
||||
let since = chrono::Utc::now() - duration;
|
||||
|
||||
// Resolve course_id: explicit param takes priority, then term+crn lookup
|
||||
let course_id = if let Some(id) = params.course_id {
|
||||
Some(id)
|
||||
} else if let (Some(term), Some(crn)) = (params.term.as_deref(), params.crn.as_deref()) {
|
||||
let row: Option<(i32,)> =
|
||||
sqlx::query_as("SELECT id FROM courses WHERE term_code = $1 AND crn = $2")
|
||||
.bind(term)
|
||||
.bind(crn)
|
||||
.fetch_optional(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| db_error("Course lookup for metrics", e.into()))?;
|
||||
row.map(|(id,)| id)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Build query dynamically based on filters
|
||||
let metrics: Vec<(i32, i32, chrono::DateTime<chrono::Utc>, i32, i32, i32)> =
|
||||
if let Some(cid) = course_id {
|
||||
sqlx::query_as(
|
||||
"SELECT id, course_id, timestamp, enrollment, wait_count, seats_available \
|
||||
FROM course_metrics \
|
||||
WHERE course_id = $1 AND timestamp >= $2 \
|
||||
ORDER BY timestamp DESC \
|
||||
LIMIT $3",
|
||||
)
|
||||
.bind(cid)
|
||||
.bind(since)
|
||||
.bind(limit)
|
||||
.fetch_all(&state.db_pool)
|
||||
.await
|
||||
} else {
|
||||
sqlx::query_as(
|
||||
"SELECT id, course_id, timestamp, enrollment, wait_count, seats_available \
|
||||
FROM course_metrics \
|
||||
WHERE timestamp >= $1 \
|
||||
ORDER BY timestamp DESC \
|
||||
LIMIT $2",
|
||||
)
|
||||
.bind(since)
|
||||
.bind(limit)
|
||||
.fetch_all(&state.db_pool)
|
||||
.await
|
||||
}
|
||||
.map_err(|e| db_error("Metrics query", e.into()))?;
|
||||
|
||||
let count = metrics.len();
|
||||
let metrics_entries: Vec<MetricEntry> = metrics
|
||||
.into_iter()
|
||||
.map(
|
||||
|(id, course_id, timestamp, enrollment, wait_count, seats_available)| MetricEntry {
|
||||
id,
|
||||
course_id,
|
||||
timestamp: timestamp.to_rfc3339(),
|
||||
enrollment,
|
||||
wait_count,
|
||||
seats_available,
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
Ok(Json(MetricsResponse {
|
||||
metrics: metrics_entries,
|
||||
count,
|
||||
timestamp: chrono::Utc::now().to_rfc3339(),
|
||||
}))
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Course search & detail API
|
||||
// ============================================================
|
||||
|
||||
#[derive(Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct MetricEntry {
|
||||
pub id: i32,
|
||||
pub course_id: i32,
|
||||
pub timestamp: String,
|
||||
pub enrollment: i32,
|
||||
pub wait_count: i32,
|
||||
pub seats_available: i32,
|
||||
}
|
||||
|
||||
#[derive(Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct MetricsResponse {
|
||||
pub metrics: Vec<MetricEntry>,
|
||||
pub count: usize,
|
||||
pub timestamp: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct MetricsParams {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub course_id: Option<i32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub term: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub crn: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub range: Option<String>,
|
||||
#[serde(default = "default_metrics_limit")]
|
||||
pub limit: i32,
|
||||
}
|
||||
|
||||
fn default_metrics_limit() -> i32 {
|
||||
500
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct SearchParams {
|
||||
pub term: String,
|
||||
#[serde(default)]
|
||||
pub subject: Vec<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub q: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none", alias = "course_number_low")]
|
||||
pub course_number_low: Option<i32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none", alias = "course_number_high")]
|
||||
pub course_number_high: Option<i32>,
|
||||
#[serde(default, alias = "open_only")]
|
||||
pub open_only: bool,
|
||||
#[serde(default, alias = "instructional_method")]
|
||||
pub instructional_method: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub campus: Vec<String>,
|
||||
#[serde(default = "default_limit")]
|
||||
pub limit: i32,
|
||||
#[serde(default)]
|
||||
pub offset: i32,
|
||||
#[serde(skip_serializing_if = "Option::is_none", alias = "sort_by")]
|
||||
pub sort_by: Option<SortColumn>,
|
||||
#[serde(skip_serializing_if = "Option::is_none", alias = "sort_dir")]
|
||||
pub sort_dir: Option<SortDirection>,
|
||||
#[serde(skip_serializing_if = "Option::is_none", alias = "wait_count_max")]
|
||||
pub wait_count_max: Option<i32>,
|
||||
#[serde(default)]
|
||||
pub days: Vec<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none", alias = "time_start")]
|
||||
pub time_start: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none", alias = "time_end")]
|
||||
pub time_end: Option<String>,
|
||||
#[serde(default, alias = "part_of_term")]
|
||||
pub part_of_term: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub attributes: Vec<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none", alias = "credit_hour_min")]
|
||||
pub credit_hour_min: Option<i32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none", alias = "credit_hour_max")]
|
||||
pub credit_hour_max: Option<i32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub instructor: Option<String>,
|
||||
}
|
||||
|
||||
use crate::data::courses::{SortColumn, SortDirection};
|
||||
|
||||
fn default_limit() -> i32 {
|
||||
25
|
||||
}
|
||||
|
||||
#[derive(Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct CourseResponse {
|
||||
crn: String,
|
||||
subject: String,
|
||||
course_number: String,
|
||||
title: String,
|
||||
term_code: String,
|
||||
sequence_number: Option<String>,
|
||||
instructional_method: Option<String>,
|
||||
campus: Option<String>,
|
||||
enrollment: i32,
|
||||
max_enrollment: i32,
|
||||
wait_count: i32,
|
||||
wait_capacity: i32,
|
||||
credit_hours: Option<i32>,
|
||||
credit_hour_low: Option<i32>,
|
||||
credit_hour_high: Option<i32>,
|
||||
cross_list: Option<String>,
|
||||
cross_list_capacity: Option<i32>,
|
||||
cross_list_count: Option<i32>,
|
||||
link_identifier: Option<String>,
|
||||
is_section_linked: Option<bool>,
|
||||
part_of_term: Option<String>,
|
||||
meeting_times: Vec<models::DbMeetingTime>,
|
||||
attributes: Vec<String>,
|
||||
instructors: Vec<InstructorResponse>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct InstructorResponse {
|
||||
instructor_id: i32,
|
||||
banner_id: String,
|
||||
display_name: String,
|
||||
email: String,
|
||||
is_primary: bool,
|
||||
rmp_rating: Option<f32>,
|
||||
rmp_num_ratings: Option<i32>,
|
||||
rmp_legacy_id: Option<i32>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct SearchResponse {
|
||||
courses: Vec<CourseResponse>,
|
||||
total_count: i32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct CodeDescription {
|
||||
code: String,
|
||||
description: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct TermResponse {
|
||||
code: String,
|
||||
slug: String,
|
||||
description: String,
|
||||
}
|
||||
|
||||
/// Response for the consolidated search-options endpoint.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct SearchOptionsResponse {
|
||||
pub terms: Vec<TermResponse>,
|
||||
pub subjects: Vec<CodeDescription>,
|
||||
pub reference: SearchOptionsReference,
|
||||
pub ranges: data::courses::FilterRanges,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct SearchOptionsReference {
|
||||
pub instructional_methods: Vec<CodeDescription>,
|
||||
pub campuses: Vec<CodeDescription>,
|
||||
pub parts_of_term: Vec<CodeDescription>,
|
||||
pub attributes: Vec<CodeDescription>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct SearchOptionsParams {
|
||||
pub term: Option<String>,
|
||||
}
|
||||
|
||||
/// Build a `CourseResponse` from a DB course with pre-fetched instructor details.
|
||||
fn build_course_response(
|
||||
course: &models::Course,
|
||||
instructors: Vec<models::CourseInstructorDetail>,
|
||||
) -> CourseResponse {
|
||||
let instructors = instructors
|
||||
.into_iter()
|
||||
.map(|i| InstructorResponse {
|
||||
instructor_id: i.instructor_id,
|
||||
banner_id: i.banner_id,
|
||||
display_name: i.display_name,
|
||||
email: i.email,
|
||||
is_primary: i.is_primary,
|
||||
rmp_rating: i.avg_rating,
|
||||
rmp_num_ratings: i.num_ratings,
|
||||
rmp_legacy_id: i.rmp_legacy_id,
|
||||
})
|
||||
.collect();
|
||||
|
||||
let meeting_times = serde_json::from_value(course.meeting_times.clone())
|
||||
.map_err(|e| {
|
||||
tracing::error!(
|
||||
course_id = course.id,
|
||||
crn = %course.crn,
|
||||
term = %course.term_code,
|
||||
error = %e,
|
||||
"Failed to deserialize meeting_times JSONB"
|
||||
);
|
||||
e
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let attributes = serde_json::from_value(course.attributes.clone())
|
||||
.map_err(|e| {
|
||||
tracing::error!(
|
||||
course_id = course.id,
|
||||
crn = %course.crn,
|
||||
term = %course.term_code,
|
||||
error = %e,
|
||||
"Failed to deserialize attributes JSONB"
|
||||
);
|
||||
e
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
CourseResponse {
|
||||
crn: course.crn.clone(),
|
||||
subject: course.subject.clone(),
|
||||
course_number: course.course_number.clone(),
|
||||
title: course.title.clone(),
|
||||
term_code: course.term_code.clone(),
|
||||
sequence_number: course.sequence_number.clone(),
|
||||
instructional_method: course.instructional_method.clone(),
|
||||
campus: course.campus.clone(),
|
||||
enrollment: course.enrollment,
|
||||
max_enrollment: course.max_enrollment,
|
||||
wait_count: course.wait_count,
|
||||
wait_capacity: course.wait_capacity,
|
||||
credit_hours: course.credit_hours,
|
||||
credit_hour_low: course.credit_hour_low,
|
||||
credit_hour_high: course.credit_hour_high,
|
||||
cross_list: course.cross_list.clone(),
|
||||
cross_list_capacity: course.cross_list_capacity,
|
||||
cross_list_count: course.cross_list_count,
|
||||
link_identifier: course.link_identifier.clone(),
|
||||
is_section_linked: course.is_section_linked,
|
||||
part_of_term: course.part_of_term.clone(),
|
||||
meeting_times,
|
||||
attributes,
|
||||
instructors,
|
||||
}
|
||||
}
|
||||
|
||||
/// `GET /api/courses/search`
|
||||
async fn search_courses(
|
||||
State(state): State<AppState>,
|
||||
axum_extra::extract::Query(params): axum_extra::extract::Query<SearchParams>,
|
||||
) -> Result<Json<SearchResponse>, ApiError> {
|
||||
use crate::banner::models::terms::Term;
|
||||
|
||||
let term_code =
|
||||
Term::resolve_to_code(¶ms.term).ok_or_else(|| ApiError::invalid_term(¶ms.term))?;
|
||||
let limit = params.limit.clamp(1, 100);
|
||||
let offset = params.offset.max(0);
|
||||
|
||||
let (courses, total_count) = data::courses::search_courses(
|
||||
&state.db_pool,
|
||||
&term_code,
|
||||
if params.subject.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(¶ms.subject)
|
||||
},
|
||||
params.q.as_deref(),
|
||||
params.course_number_low,
|
||||
params.course_number_high,
|
||||
params.open_only,
|
||||
if params.instructional_method.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(¶ms.instructional_method)
|
||||
},
|
||||
if params.campus.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(¶ms.campus)
|
||||
},
|
||||
params.wait_count_max,
|
||||
if params.days.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(¶ms.days)
|
||||
},
|
||||
params.time_start.as_deref(),
|
||||
params.time_end.as_deref(),
|
||||
if params.part_of_term.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(¶ms.part_of_term)
|
||||
},
|
||||
if params.attributes.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(¶ms.attributes)
|
||||
},
|
||||
params.credit_hour_min,
|
||||
params.credit_hour_max,
|
||||
params.instructor.as_deref(),
|
||||
limit,
|
||||
offset,
|
||||
params.sort_by,
|
||||
params.sort_dir,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| db_error("Course search", e))?;
|
||||
|
||||
// Batch-fetch all instructors in a single query instead of N+1
|
||||
let course_ids: Vec<i32> = courses.iter().map(|c| c.id).collect();
|
||||
let mut instructor_map =
|
||||
data::courses::get_instructors_for_courses(&state.db_pool, &course_ids)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
let course_responses: Vec<CourseResponse> = courses
|
||||
.iter()
|
||||
.map(|course| {
|
||||
let instructors = instructor_map.remove(&course.id).unwrap_or_default();
|
||||
build_course_response(course, instructors)
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Json(SearchResponse {
|
||||
courses: course_responses,
|
||||
total_count: total_count as i32,
|
||||
}))
|
||||
}
|
||||
|
||||
/// `GET /api/courses/:term/:crn`
|
||||
async fn get_course(
|
||||
State(state): State<AppState>,
|
||||
Path((term, crn)): Path<(String, String)>,
|
||||
) -> Result<Json<CourseResponse>, ApiError> {
|
||||
let course = data::courses::get_course_by_crn(&state.db_pool, &crn, &term)
|
||||
.await
|
||||
.map_err(|e| db_error("Course lookup", e))?
|
||||
.ok_or_else(|| ApiError::not_found("Course not found"))?;
|
||||
|
||||
let instructors = data::courses::get_course_instructors(&state.db_pool, course.id)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
Ok(Json(build_course_response(&course, instructors)))
|
||||
}
|
||||
|
||||
/// `GET /api/reference/:category`
|
||||
async fn get_reference(
|
||||
State(state): State<AppState>,
|
||||
Path(category): Path<String>,
|
||||
) -> Result<Json<Vec<CodeDescription>>, ApiError> {
|
||||
let cache = state.reference_cache.read().await;
|
||||
let entries = cache.entries_for_category(&category);
|
||||
|
||||
if entries.is_empty() {
|
||||
// Fall back to DB query in case cache doesn't have this category
|
||||
drop(cache);
|
||||
let rows = data::reference::get_by_category(&category, &state.db_pool)
|
||||
.await
|
||||
.map_err(|e| db_error(&format!("Reference lookup for {}", category), e))?;
|
||||
|
||||
return Ok(Json(
|
||||
rows.into_iter()
|
||||
.map(|r| CodeDescription {
|
||||
code: r.code,
|
||||
description: r.description,
|
||||
})
|
||||
.collect(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(Json(
|
||||
entries
|
||||
.into_iter()
|
||||
.map(|(code, desc)| CodeDescription {
|
||||
code: code.to_string(),
|
||||
description: desc.to_string(),
|
||||
})
|
||||
.collect(),
|
||||
))
|
||||
}
|
||||
|
||||
/// `GET /api/search-options?term={slug}` (term optional, defaults to latest)
|
||||
async fn get_search_options(
|
||||
State(state): State<AppState>,
|
||||
Query(params): Query<SearchOptionsParams>,
|
||||
) -> Result<Json<SearchOptionsResponse>, ApiError> {
|
||||
use crate::banner::models::terms::Term;
|
||||
use std::time::Instant;
|
||||
|
||||
// If no term specified, get the latest term
|
||||
let term_slug = if let Some(ref t) = params.term {
|
||||
t.clone()
|
||||
} else {
|
||||
// Fetch available terms to get the default (latest)
|
||||
let term_codes = data::courses::get_available_terms(&state.db_pool)
|
||||
.await
|
||||
.map_err(|e| db_error("Get terms for default", e))?;
|
||||
|
||||
let first_term: Term = term_codes
|
||||
.first()
|
||||
.and_then(|code| code.parse().ok())
|
||||
.ok_or_else(|| ApiError::new("NO_TERMS", "No terms available".to_string()))?;
|
||||
|
||||
first_term.slug()
|
||||
};
|
||||
|
||||
let term_code =
|
||||
Term::resolve_to_code(&term_slug).ok_or_else(|| ApiError::invalid_term(&term_slug))?;
|
||||
|
||||
// Check cache (10-minute TTL)
|
||||
if let Some(entry) = state.search_options_cache.get(&term_code) {
|
||||
let (cached_at, ref cached_value) = *entry;
|
||||
if cached_at.elapsed() < Duration::from_secs(600) {
|
||||
let response: SearchOptionsResponse = serde_json::from_value(cached_value.clone())
|
||||
.map_err(|e| {
|
||||
ApiError::internal_error(format!("Cache deserialization error: {e}"))
|
||||
})?;
|
||||
return Ok(Json(response));
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch all data in parallel
|
||||
let (term_codes, subject_rows, ranges) = tokio::try_join!(
|
||||
data::courses::get_available_terms(&state.db_pool),
|
||||
data::courses::get_subjects_by_enrollment(&state.db_pool, &term_code),
|
||||
data::courses::get_filter_ranges(&state.db_pool, &term_code),
|
||||
)
|
||||
.map_err(|e| db_error("Search options", e))?;
|
||||
|
||||
// Build terms
|
||||
let terms: Vec<TermResponse> = term_codes
|
||||
.into_iter()
|
||||
.filter_map(|code| {
|
||||
let term: Term = code.parse().ok()?;
|
||||
Some(TermResponse {
|
||||
code,
|
||||
slug: term.slug(),
|
||||
description: term.description(),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Build subjects
|
||||
let subjects: Vec<CodeDescription> = subject_rows
|
||||
.into_iter()
|
||||
.map(|(code, description, _enrollment)| CodeDescription { code, description })
|
||||
.collect();
|
||||
|
||||
// Build reference data from in-memory cache
|
||||
let ref_cache = state.reference_cache.read().await;
|
||||
let build_ref = |category: &str| -> Vec<CodeDescription> {
|
||||
ref_cache
|
||||
.entries_for_category(category)
|
||||
.into_iter()
|
||||
.map(|(code, desc)| CodeDescription {
|
||||
code: code.to_string(),
|
||||
description: desc.to_string(),
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
|
||||
let reference = SearchOptionsReference {
|
||||
instructional_methods: build_ref("instructional_method"),
|
||||
campuses: build_ref("campus"),
|
||||
parts_of_term: build_ref("part_of_term"),
|
||||
attributes: build_ref("attribute"),
|
||||
};
|
||||
|
||||
let response = SearchOptionsResponse {
|
||||
terms,
|
||||
subjects,
|
||||
reference,
|
||||
ranges,
|
||||
};
|
||||
|
||||
// Cache the response
|
||||
let cached_value = serde_json::to_value(&response).unwrap_or_default();
|
||||
state
|
||||
.search_options_cache
|
||||
.insert(term_code, (Instant::now(), cached_value));
|
||||
|
||||
Ok(Json(response))
|
||||
}
|
||||
|
||||
@@ -0,0 +1,443 @@
|
||||
//! ISR-style schedule cache for timeline enrollment queries.
|
||||
//!
|
||||
//! Loads all courses with their meeting times from the database, parses the
|
||||
//! JSONB meeting times into a compact in-memory representation, and caches
|
||||
//! the result. The cache is refreshed in the background every hour using a
|
||||
//! stale-while-revalidate pattern with singleflight deduplication — readers
|
||||
//! always get the current cached value instantly, never blocking on a refresh.
|
||||
|
||||
use chrono::NaiveDate;
|
||||
use serde_json::Value;
|
||||
use sqlx::PgPool;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use tokio::sync::watch;
|
||||
use tracing::{debug, error, info};
|
||||
|
||||
/// How often the cache is considered fresh (1 hour).
|
||||
const REFRESH_INTERVAL: std::time::Duration = std::time::Duration::from_secs(60 * 60);
|
||||
|
||||
// ── Compact schedule representation ─────────────────────────────────
|
||||
|
||||
/// A single meeting time block, pre-parsed for fast filtering.
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct ParsedSchedule {
|
||||
/// Bitmask of days: bit 0 = Monday, bit 6 = Sunday.
|
||||
days: u8,
|
||||
/// Minutes since midnight for start (e.g. 600 = 10:00).
|
||||
begin_minutes: u16,
|
||||
/// Minutes since midnight for end (e.g. 650 = 10:50).
|
||||
end_minutes: u16,
|
||||
/// First day the meeting pattern is active.
|
||||
start_date: NaiveDate,
|
||||
/// Last day the meeting pattern is active.
|
||||
end_date: NaiveDate,
|
||||
}
|
||||
|
||||
/// A course with its enrollment and pre-parsed schedule blocks.
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct CachedCourse {
|
||||
pub(crate) subject: String,
|
||||
pub(crate) enrollment: i32,
|
||||
pub(crate) schedules: Vec<ParsedSchedule>,
|
||||
}
|
||||
|
||||
/// The immutable snapshot of all courses, swapped atomically on refresh.
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct ScheduleSnapshot {
|
||||
pub(crate) courses: Vec<CachedCourse>,
|
||||
refreshed_at: std::time::Instant,
|
||||
}
|
||||
|
||||
// ── Cache handle ────────────────────────────────────────────────────
|
||||
|
||||
/// Shared schedule cache. Clone-cheap (all `Arc`-wrapped internals).
|
||||
#[derive(Clone)]
|
||||
pub struct ScheduleCache {
|
||||
/// Current snapshot, updated via `watch` channel for lock-free reads.
|
||||
rx: watch::Receiver<Arc<ScheduleSnapshot>>,
|
||||
/// Sender side, held to push new snapshots.
|
||||
tx: Arc<watch::Sender<Arc<ScheduleSnapshot>>>,
|
||||
/// Singleflight guard — true while a refresh task is in flight.
|
||||
refreshing: Arc<AtomicBool>,
|
||||
/// Database pool for refresh queries.
|
||||
pool: PgPool,
|
||||
}
|
||||
|
||||
impl ScheduleCache {
|
||||
/// Create a new cache with an empty initial snapshot.
|
||||
pub(crate) fn new(pool: PgPool) -> Self {
|
||||
let empty = Arc::new(ScheduleSnapshot {
|
||||
courses: Vec::new(),
|
||||
refreshed_at: std::time::Instant::now(),
|
||||
});
|
||||
let (tx, rx) = watch::channel(empty);
|
||||
Self {
|
||||
rx,
|
||||
tx: Arc::new(tx),
|
||||
refreshing: Arc::new(AtomicBool::new(false)),
|
||||
pool,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the current snapshot. Never blocks on refresh.
|
||||
pub(crate) fn snapshot(&self) -> Arc<ScheduleSnapshot> {
|
||||
self.rx.borrow().clone()
|
||||
}
|
||||
|
||||
/// Check freshness and trigger a background refresh if stale.
|
||||
/// Always returns immediately — the caller uses the current snapshot.
|
||||
pub(crate) fn ensure_fresh(&self) {
|
||||
let snap = self.rx.borrow();
|
||||
if snap.refreshed_at.elapsed() < REFRESH_INTERVAL {
|
||||
return;
|
||||
}
|
||||
// Singleflight: only one refresh at a time.
|
||||
if self
|
||||
.refreshing
|
||||
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire)
|
||||
.is_err()
|
||||
{
|
||||
debug!("Schedule cache refresh already in flight, skipping");
|
||||
return;
|
||||
}
|
||||
let cache = self.clone();
|
||||
tokio::spawn(async move {
|
||||
match load_snapshot(&cache.pool).await {
|
||||
Ok(snap) => {
|
||||
let count = snap.courses.len();
|
||||
let _ = cache.tx.send(Arc::new(snap));
|
||||
info!(courses = count, "Schedule cache refreshed");
|
||||
}
|
||||
Err(e) => {
|
||||
error!(error = %e, "Failed to refresh schedule cache");
|
||||
}
|
||||
}
|
||||
cache.refreshing.store(false, Ordering::Release);
|
||||
});
|
||||
}
|
||||
|
||||
/// Force an initial load (blocking). Call once at startup.
|
||||
pub(crate) async fn load(&self) -> anyhow::Result<()> {
|
||||
let snap = load_snapshot(&self.pool).await?;
|
||||
let count = snap.courses.len();
|
||||
let _ = self.tx.send(Arc::new(snap));
|
||||
info!(courses = count, "Schedule cache initially loaded");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// ── Database loading ────────────────────────────────────────────────
|
||||
|
||||
/// Row returned from the lightweight schedule query.
|
||||
#[derive(sqlx::FromRow)]
|
||||
struct ScheduleRow {
|
||||
subject: String,
|
||||
enrollment: i32,
|
||||
meeting_times: Value,
|
||||
}
|
||||
|
||||
/// Load all courses and parse their meeting times into a snapshot.
|
||||
async fn load_snapshot(pool: &PgPool) -> anyhow::Result<ScheduleSnapshot> {
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
let rows: Vec<ScheduleRow> =
|
||||
sqlx::query_as("SELECT subject, enrollment, meeting_times FROM courses")
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
let courses: Vec<CachedCourse> = rows
|
||||
.into_iter()
|
||||
.map(|row| {
|
||||
let schedules = parse_meeting_times(&row.meeting_times);
|
||||
CachedCourse {
|
||||
subject: row.subject,
|
||||
enrollment: row.enrollment,
|
||||
schedules,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
debug!(
|
||||
courses = courses.len(),
|
||||
elapsed_ms = start.elapsed().as_millis(),
|
||||
"Schedule snapshot built"
|
||||
);
|
||||
|
||||
Ok(ScheduleSnapshot {
|
||||
courses,
|
||||
refreshed_at: std::time::Instant::now(),
|
||||
})
|
||||
}
|
||||
|
||||
// ── Meeting time parsing ────────────────────────────────────────────
|
||||
|
||||
/// Parse the JSONB `meeting_times` array into compact `ParsedSchedule` values.
|
||||
fn parse_meeting_times(value: &Value) -> Vec<ParsedSchedule> {
|
||||
let Value::Array(arr) = value else {
|
||||
return Vec::new();
|
||||
};
|
||||
|
||||
arr.iter().filter_map(parse_one_meeting).collect()
|
||||
}
|
||||
|
||||
fn parse_one_meeting(mt: &Value) -> Option<ParsedSchedule> {
|
||||
let begin_time = mt.get("begin_time")?.as_str()?;
|
||||
let end_time = mt.get("end_time")?.as_str()?;
|
||||
|
||||
let begin_minutes = parse_hhmm(begin_time)?;
|
||||
let end_minutes = parse_hhmm(end_time)?;
|
||||
|
||||
if end_minutes <= begin_minutes {
|
||||
return None;
|
||||
}
|
||||
|
||||
let start_date = parse_date(mt.get("start_date")?.as_str()?)?;
|
||||
let end_date = parse_date(mt.get("end_date")?.as_str()?)?;
|
||||
|
||||
const DAY_KEYS: [&str; 7] = [
|
||||
"monday",
|
||||
"tuesday",
|
||||
"wednesday",
|
||||
"thursday",
|
||||
"friday",
|
||||
"saturday",
|
||||
"sunday",
|
||||
];
|
||||
let mut days: u8 = 0;
|
||||
for (bit, key) in DAY_KEYS.iter().enumerate() {
|
||||
if mt.get(*key).and_then(Value::as_bool).unwrap_or(false) {
|
||||
days |= 1 << bit;
|
||||
}
|
||||
}
|
||||
|
||||
// Skip meetings with no days (online async, etc.)
|
||||
if days == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(ParsedSchedule {
|
||||
days,
|
||||
begin_minutes,
|
||||
end_minutes,
|
||||
start_date,
|
||||
end_date,
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse "HHMM" → minutes since midnight.
|
||||
fn parse_hhmm(s: &str) -> Option<u16> {
|
||||
if s.len() != 4 {
|
||||
return None;
|
||||
}
|
||||
let hours: u16 = s[..2].parse().ok()?;
|
||||
let mins: u16 = s[2..].parse().ok()?;
|
||||
if hours >= 24 || mins >= 60 {
|
||||
return None;
|
||||
}
|
||||
Some(hours * 60 + mins)
|
||||
}
|
||||
|
||||
/// Parse "MM/DD/YYYY" → NaiveDate.
|
||||
fn parse_date(s: &str) -> Option<NaiveDate> {
|
||||
NaiveDate::parse_from_str(s, "%m/%d/%Y").ok()
|
||||
}
|
||||
|
||||
// ── Slot matching ───────────────────────────────────────────────────
|
||||
|
||||
/// Day-of-week as our bitmask index (Monday = 0 .. Sunday = 6).
|
||||
/// Chrono's `weekday().num_days_from_monday()` already gives 0=Mon..6=Sun.
|
||||
pub(crate) fn weekday_bit(day: chrono::Weekday) -> u8 {
|
||||
1 << day.num_days_from_monday()
|
||||
}
|
||||
|
||||
impl ParsedSchedule {
|
||||
/// Check if this schedule is active during a given slot.
|
||||
///
|
||||
/// `slot_date` is the calendar date of the slot.
|
||||
/// `slot_start` / `slot_end` are minutes since midnight for the 15-min window.
|
||||
#[inline]
|
||||
pub(crate) fn active_during(
|
||||
&self,
|
||||
slot_date: NaiveDate,
|
||||
slot_weekday_bit: u8,
|
||||
slot_start_minutes: u16,
|
||||
slot_end_minutes: u16,
|
||||
) -> bool {
|
||||
// Day-of-week check
|
||||
if self.days & slot_weekday_bit == 0 {
|
||||
return false;
|
||||
}
|
||||
// Date range check
|
||||
if slot_date < self.start_date || slot_date > self.end_date {
|
||||
return false;
|
||||
}
|
||||
// Time overlap: meeting [begin, end) overlaps slot [start, end)
|
||||
self.begin_minutes < slot_end_minutes && self.end_minutes > slot_start_minutes
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use chrono::NaiveDate;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn parse_hhmm_valid() {
|
||||
assert_eq!(parse_hhmm("0000"), Some(0));
|
||||
assert_eq!(parse_hhmm("0930"), Some(570));
|
||||
assert_eq!(parse_hhmm("1350"), Some(830));
|
||||
assert_eq!(parse_hhmm("2359"), Some(1439));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_hhmm_invalid() {
|
||||
assert_eq!(parse_hhmm(""), None);
|
||||
assert_eq!(parse_hhmm("abc"), None);
|
||||
assert_eq!(parse_hhmm("2500"), None);
|
||||
assert_eq!(parse_hhmm("0060"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_date_valid() {
|
||||
assert_eq!(
|
||||
parse_date("08/26/2025"),
|
||||
Some(NaiveDate::from_ymd_opt(2025, 8, 26).unwrap())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_meeting_times_basic() {
|
||||
let json = json!([{
|
||||
"begin_time": "1000",
|
||||
"end_time": "1050",
|
||||
"start_date": "08/26/2025",
|
||||
"end_date": "12/13/2025",
|
||||
"monday": true,
|
||||
"tuesday": false,
|
||||
"wednesday": true,
|
||||
"thursday": false,
|
||||
"friday": true,
|
||||
"saturday": false,
|
||||
"sunday": false,
|
||||
"building": "NPB",
|
||||
"building_description": "North Paseo Building",
|
||||
"room": "1.238",
|
||||
"campus": "11",
|
||||
"meeting_type": "FF",
|
||||
"meeting_schedule_type": "AFF"
|
||||
}]);
|
||||
|
||||
let schedules = parse_meeting_times(&json);
|
||||
assert_eq!(schedules.len(), 1);
|
||||
|
||||
let s = &schedules[0];
|
||||
assert_eq!(s.begin_minutes, 600); // 10:00
|
||||
assert_eq!(s.end_minutes, 650); // 10:50
|
||||
assert_eq!(s.days, 0b0010101); // Mon, Wed, Fri
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_meeting_times_skips_null_times() {
|
||||
let json = json!([{
|
||||
"begin_time": null,
|
||||
"end_time": null,
|
||||
"start_date": "08/26/2025",
|
||||
"end_date": "12/13/2025",
|
||||
"monday": false,
|
||||
"tuesday": false,
|
||||
"wednesday": false,
|
||||
"thursday": false,
|
||||
"friday": false,
|
||||
"saturday": false,
|
||||
"sunday": false,
|
||||
"meeting_type": "OS",
|
||||
"meeting_schedule_type": "AFF"
|
||||
}]);
|
||||
|
||||
let schedules = parse_meeting_times(&json);
|
||||
assert!(schedules.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn active_during_matching_slot() {
|
||||
let sched = ParsedSchedule {
|
||||
days: 0b0000001, // Monday
|
||||
begin_minutes: 600,
|
||||
end_minutes: 650,
|
||||
start_date: NaiveDate::from_ymd_opt(2025, 8, 26).unwrap(),
|
||||
end_date: NaiveDate::from_ymd_opt(2025, 12, 13).unwrap(),
|
||||
};
|
||||
|
||||
// Monday Sept 1 2025, 10:00-10:15 slot
|
||||
let date = NaiveDate::from_ymd_opt(2025, 9, 1).unwrap();
|
||||
assert!(sched.active_during(date, weekday_bit(chrono::Weekday::Mon), 600, 615));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn active_during_wrong_day() {
|
||||
let sched = ParsedSchedule {
|
||||
days: 0b0000001, // Monday only
|
||||
begin_minutes: 600,
|
||||
end_minutes: 650,
|
||||
start_date: NaiveDate::from_ymd_opt(2025, 8, 26).unwrap(),
|
||||
end_date: NaiveDate::from_ymd_opt(2025, 12, 13).unwrap(),
|
||||
};
|
||||
|
||||
// Tuesday Sept 2 2025
|
||||
let date = NaiveDate::from_ymd_opt(2025, 9, 2).unwrap();
|
||||
assert!(!sched.active_during(date, weekday_bit(chrono::Weekday::Tue), 600, 615));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn active_during_no_time_overlap() {
|
||||
let sched = ParsedSchedule {
|
||||
days: 0b0000001,
|
||||
begin_minutes: 600, // 10:00
|
||||
end_minutes: 650, // 10:50
|
||||
start_date: NaiveDate::from_ymd_opt(2025, 8, 26).unwrap(),
|
||||
end_date: NaiveDate::from_ymd_opt(2025, 12, 13).unwrap(),
|
||||
};
|
||||
|
||||
let date = NaiveDate::from_ymd_opt(2025, 9, 1).unwrap(); // Monday
|
||||
// Slot 11:00-11:15 — after the meeting ends
|
||||
assert!(!sched.active_during(date, weekday_bit(chrono::Weekday::Mon), 660, 675));
|
||||
// Slot 9:45-10:00 — just before meeting starts (end=600, begin=600 → no overlap)
|
||||
assert!(!sched.active_during(date, weekday_bit(chrono::Weekday::Mon), 585, 600));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn active_during_outside_date_range() {
|
||||
let sched = ParsedSchedule {
|
||||
days: 0b0000001,
|
||||
begin_minutes: 600,
|
||||
end_minutes: 650,
|
||||
start_date: NaiveDate::from_ymd_opt(2025, 8, 26).unwrap(),
|
||||
end_date: NaiveDate::from_ymd_opt(2025, 12, 13).unwrap(),
|
||||
};
|
||||
|
||||
// Monday Jan 6 2025 — before semester
|
||||
let date = NaiveDate::from_ymd_opt(2025, 1, 6).unwrap();
|
||||
assert!(!sched.active_during(date, weekday_bit(chrono::Weekday::Mon), 600, 615));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn active_during_edge_overlap() {
|
||||
let sched = ParsedSchedule {
|
||||
days: 0b0000001,
|
||||
begin_minutes: 600,
|
||||
end_minutes: 650,
|
||||
start_date: NaiveDate::from_ymd_opt(2025, 8, 26).unwrap(),
|
||||
end_date: NaiveDate::from_ymd_opt(2025, 12, 13).unwrap(),
|
||||
};
|
||||
|
||||
let date = NaiveDate::from_ymd_opt(2025, 9, 1).unwrap();
|
||||
// Slot 10:45-11:00 — overlaps last 5 minutes of meeting
|
||||
assert!(sched.active_during(date, weekday_bit(chrono::Weekday::Mon), 645, 660));
|
||||
// Slot 9:45-10:00 — ends exactly when meeting starts, no overlap
|
||||
assert!(!sched.active_during(date, weekday_bit(chrono::Weekday::Mon), 585, 600));
|
||||
// Slot 10:50-11:05 — starts exactly when meeting ends, no overlap
|
||||
assert!(!sched.active_during(date, weekday_bit(chrono::Weekday::Mon), 650, 665));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,187 @@
|
||||
//! In-memory caches for session resolution and OAuth CSRF state.
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use dashmap::DashMap;
|
||||
use rand::Rng;
|
||||
use sqlx::PgPool;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use crate::data::models::User;
|
||||
|
||||
/// Cached session entry with TTL.
|
||||
#[derive(Debug, Clone)]
|
||||
struct CachedSession {
|
||||
user: User,
|
||||
session_expires_at: DateTime<Utc>,
|
||||
cached_at: Instant,
|
||||
}
|
||||
|
||||
/// In-memory session cache backed by PostgreSQL.
|
||||
///
|
||||
/// Provides fast session resolution without a DB round-trip on every request.
|
||||
/// Cache entries expire after a configurable TTL (default 5 minutes).
|
||||
#[derive(Clone)]
|
||||
pub struct SessionCache {
|
||||
cache: Arc<DashMap<String, CachedSession>>,
|
||||
db_pool: PgPool,
|
||||
cache_ttl: Duration,
|
||||
}
|
||||
|
||||
impl SessionCache {
|
||||
/// Create a new session cache with a 5-minute default TTL.
|
||||
pub fn new(db_pool: PgPool) -> Self {
|
||||
Self {
|
||||
cache: Arc::new(DashMap::new()),
|
||||
db_pool,
|
||||
cache_ttl: Duration::from_secs(5 * 60),
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolve a session token to a [`User`], using the cache when possible.
|
||||
///
|
||||
/// On cache hit (entry present, not stale, session not expired), returns the
|
||||
/// cached user immediately. On miss or stale entry, queries the database for
|
||||
/// the session and user, populates the cache, and fire-and-forgets a
|
||||
/// `touch_session` call to update `last_active_at`.
|
||||
pub async fn get_user(&self, token: &str) -> Option<User> {
|
||||
// Check cache first
|
||||
if let Some(entry) = self.cache.get(token) {
|
||||
let now_instant = Instant::now();
|
||||
let now_utc = Utc::now();
|
||||
|
||||
let cache_fresh = entry.cached_at + self.cache_ttl > now_instant;
|
||||
let session_valid = entry.session_expires_at > now_utc;
|
||||
|
||||
if cache_fresh && session_valid {
|
||||
return Some(entry.user.clone());
|
||||
}
|
||||
|
||||
// Stale or expired — drop the ref before removing
|
||||
drop(entry);
|
||||
self.cache.remove(token);
|
||||
}
|
||||
|
||||
// Cache miss — query DB
|
||||
let session = crate::data::sessions::get_session(&self.db_pool, token)
|
||||
.await
|
||||
.ok()
|
||||
.flatten()?;
|
||||
|
||||
let user = crate::data::users::get_user(&self.db_pool, session.user_id)
|
||||
.await
|
||||
.ok()
|
||||
.flatten()?;
|
||||
|
||||
self.cache.insert(
|
||||
token.to_owned(),
|
||||
CachedSession {
|
||||
user: user.clone(),
|
||||
session_expires_at: session.expires_at,
|
||||
cached_at: Instant::now(),
|
||||
},
|
||||
);
|
||||
|
||||
// Fire-and-forget touch to update last_active_at
|
||||
let pool = self.db_pool.clone();
|
||||
let token_owned = token.to_owned();
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = crate::data::sessions::touch_session(&pool, &token_owned).await {
|
||||
tracing::warn!(error = %e, "failed to touch session");
|
||||
}
|
||||
});
|
||||
|
||||
Some(user)
|
||||
}
|
||||
|
||||
/// Remove a single session from the cache (e.g. on logout).
|
||||
pub fn evict(&self, token: &str) {
|
||||
self.cache.remove(token);
|
||||
}
|
||||
|
||||
/// Remove all cached sessions belonging to a user.
|
||||
pub fn evict_user(&self, discord_id: i64) {
|
||||
self.cache
|
||||
.retain(|_, entry| entry.user.discord_id != discord_id);
|
||||
}
|
||||
|
||||
/// Delete expired sessions from the database and sweep the in-memory cache.
|
||||
///
|
||||
/// Returns the number of sessions deleted from the database.
|
||||
pub async fn cleanup_expired(&self) -> anyhow::Result<u64> {
|
||||
let deleted = crate::data::sessions::cleanup_expired(&self.db_pool).await?;
|
||||
|
||||
let now = Utc::now();
|
||||
self.cache.retain(|_, entry| entry.session_expires_at > now);
|
||||
|
||||
Ok(deleted)
|
||||
}
|
||||
}
|
||||
|
||||
/// Data stored alongside each OAuth CSRF state token.
|
||||
struct OAuthStateEntry {
|
||||
created_at: Instant,
|
||||
/// The browser origin that initiated the login flow, so the callback
|
||||
/// can reconstruct the exact redirect_uri Discord expects.
|
||||
origin: String,
|
||||
}
|
||||
|
||||
/// Ephemeral store for OAuth CSRF state tokens.
|
||||
///
|
||||
/// Tokens are stored with creation time and expire after a configurable TTL.
|
||||
/// Each token is single-use: validation consumes it.
|
||||
#[derive(Clone)]
|
||||
pub struct OAuthStateStore {
|
||||
states: Arc<DashMap<String, OAuthStateEntry>>,
|
||||
ttl: Duration,
|
||||
}
|
||||
|
||||
impl Default for OAuthStateStore {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl OAuthStateStore {
|
||||
/// Create a new store with a 10-minute TTL.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
states: Arc::new(DashMap::new()),
|
||||
ttl: Duration::from_secs(10 * 60),
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a random 16-byte hex CSRF token, store it with the given
|
||||
/// origin, and return the token.
|
||||
pub fn generate(&self, origin: String) -> String {
|
||||
let bytes: [u8; 16] = rand::rng().random();
|
||||
let token: String = bytes.iter().map(|b| format!("{b:02x}")).collect();
|
||||
self.states.insert(
|
||||
token.clone(),
|
||||
OAuthStateEntry {
|
||||
created_at: Instant::now(),
|
||||
origin,
|
||||
},
|
||||
);
|
||||
token
|
||||
}
|
||||
|
||||
/// Validate and consume a CSRF token. Returns the stored origin if the
|
||||
/// token was present and not expired.
|
||||
pub fn validate(&self, state: &str) -> Option<String> {
|
||||
let (_, entry) = self.states.remove(state)?;
|
||||
if entry.created_at.elapsed() < self.ttl {
|
||||
Some(entry.origin)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove all expired entries from the store.
|
||||
#[allow(dead_code)] // Intended for periodic cleanup task (not yet wired)
|
||||
pub fn cleanup(&self) {
|
||||
let ttl = self.ttl;
|
||||
self.states
|
||||
.retain(|_, entry| entry.created_at.elapsed() < ttl);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,239 @@
|
||||
//! Timeline API endpoint for enrollment aggregation by subject over time.
|
||||
//!
|
||||
//! Accepts multiple time ranges, merges overlaps, aligns to 15-minute
|
||||
//! slot boundaries, and returns per-subject enrollment totals for each slot.
|
||||
//! Only courses whose meeting times overlap a given slot contribute to that
|
||||
//! slot's totals — so the chart reflects the actual class schedule rhythm.
|
||||
//!
|
||||
//! Course data is served from an ISR-style in-memory cache (see
|
||||
//! [`ScheduleCache`]) that refreshes hourly in the background with
|
||||
//! stale-while-revalidate semantics.
|
||||
|
||||
use axum::{extract::State, response::Json};
|
||||
use chrono::{DateTime, Datelike, Duration, NaiveTime, Timelike, Utc};
|
||||
use chrono_tz::US::Central;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use ts_rs::TS;
|
||||
|
||||
use crate::state::AppState;
|
||||
use crate::web::error::ApiError;
|
||||
use crate::web::schedule_cache::weekday_bit;
|
||||
|
||||
/// 15 minutes in seconds, matching the frontend `SLOT_INTERVAL_MS`.
|
||||
const SLOT_SECONDS: i64 = 15 * 60;
|
||||
const SLOT_MINUTES: u16 = 15;
|
||||
|
||||
/// Maximum number of ranges in a single request.
|
||||
const MAX_RANGES: usize = 20;
|
||||
|
||||
/// Maximum span of a single range (72 hours).
|
||||
const MAX_RANGE_SPAN: Duration = Duration::hours(72);
|
||||
|
||||
/// Maximum total span across all ranges to prevent excessive queries.
|
||||
const MAX_TOTAL_SPAN: Duration = Duration::hours(168); // 1 week
|
||||
|
||||
// ── Request / Response types ────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct TimelineRequest {
|
||||
ranges: Vec<TimeRange>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct TimeRange {
|
||||
/// ISO-8601 UTC timestamp (e.g., "2024-01-15T10:30:00Z")
|
||||
#[ts(type = "string")]
|
||||
start: DateTime<Utc>,
|
||||
/// ISO-8601 UTC timestamp (e.g., "2024-01-15T12:30:00Z")
|
||||
#[ts(type = "string")]
|
||||
end: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct TimelineResponse {
|
||||
/// 15-minute slots with per-subject enrollment totals, sorted by time.
|
||||
slots: Vec<TimelineSlot>,
|
||||
/// All subject codes present in the returned data.
|
||||
subjects: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct TimelineSlot {
|
||||
/// ISO-8601 UTC timestamp at the start of this 15-minute bucket (e.g., "2024-01-15T10:30:00Z")
|
||||
#[ts(type = "string")]
|
||||
time: DateTime<Utc>,
|
||||
/// Subject code → total enrollment in this slot.
|
||||
#[ts(type = "Record<string, number>")]
|
||||
subjects: BTreeMap<String, i64>,
|
||||
}
|
||||
|
||||
// ── Alignment helpers ───────────────────────────────────────────────
|
||||
|
||||
/// Floor a timestamp to the nearest 15-minute boundary.
|
||||
fn align_floor(ts: DateTime<Utc>) -> DateTime<Utc> {
|
||||
let secs = ts.timestamp();
|
||||
let aligned = (secs / SLOT_SECONDS) * SLOT_SECONDS;
|
||||
DateTime::from_timestamp(aligned, 0).unwrap_or(ts)
|
||||
}
|
||||
|
||||
/// Ceil a timestamp to the nearest 15-minute boundary.
|
||||
fn align_ceil(ts: DateTime<Utc>) -> DateTime<Utc> {
|
||||
let secs = ts.timestamp();
|
||||
let aligned = ((secs + SLOT_SECONDS - 1) / SLOT_SECONDS) * SLOT_SECONDS;
|
||||
DateTime::from_timestamp(aligned, 0).unwrap_or(ts)
|
||||
}
|
||||
|
||||
// ── Range merging ───────────────────────────────────────────────────
|
||||
|
||||
/// Aligned, validated range.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct AlignedRange {
|
||||
start: DateTime<Utc>,
|
||||
end: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// Merge overlapping/adjacent ranges into a minimal set.
|
||||
fn merge_ranges(mut ranges: Vec<AlignedRange>) -> Vec<AlignedRange> {
|
||||
if ranges.is_empty() {
|
||||
return ranges;
|
||||
}
|
||||
ranges.sort_by_key(|r| r.start);
|
||||
let mut merged: Vec<AlignedRange> = vec![ranges[0]];
|
||||
for r in &ranges[1..] {
|
||||
let last = merged.last_mut().unwrap();
|
||||
if r.start <= last.end {
|
||||
last.end = last.end.max(r.end);
|
||||
} else {
|
||||
merged.push(*r);
|
||||
}
|
||||
}
|
||||
merged
|
||||
}
|
||||
|
||||
/// Generate all aligned slot timestamps within the merged ranges.
|
||||
fn generate_slots(merged: &[AlignedRange]) -> BTreeSet<DateTime<Utc>> {
|
||||
let mut slots = BTreeSet::new();
|
||||
for range in merged {
|
||||
let mut t = range.start;
|
||||
while t < range.end {
|
||||
slots.insert(t);
|
||||
t += Duration::seconds(SLOT_SECONDS);
|
||||
}
|
||||
}
|
||||
slots
|
||||
}
|
||||
|
||||
// ── Handler ─────────────────────────────────────────────────────────
|
||||
|
||||
/// `POST /api/timeline`
|
||||
///
|
||||
/// Accepts a JSON body with multiple time ranges. Returns per-subject
|
||||
/// enrollment totals bucketed into 15-minute slots. Only courses whose
|
||||
/// meeting schedule overlaps a slot contribute to that slot's count.
|
||||
pub(crate) async fn timeline(
|
||||
State(state): State<AppState>,
|
||||
Json(body): Json<TimelineRequest>,
|
||||
) -> Result<Json<TimelineResponse>, ApiError> {
|
||||
// ── Validate ────────────────────────────────────────────────────
|
||||
if body.ranges.is_empty() {
|
||||
return Err(ApiError::bad_request("At least one range is required"));
|
||||
}
|
||||
if body.ranges.len() > MAX_RANGES {
|
||||
return Err(ApiError::bad_request(format!(
|
||||
"Too many ranges (max {MAX_RANGES})"
|
||||
)));
|
||||
}
|
||||
|
||||
let mut aligned: Vec<AlignedRange> = Vec::with_capacity(body.ranges.len());
|
||||
for r in &body.ranges {
|
||||
if r.end <= r.start {
|
||||
return Err(ApiError::bad_request(format!(
|
||||
"Range end ({}) must be after start ({})",
|
||||
r.end, r.start
|
||||
)));
|
||||
}
|
||||
let span = r.end - r.start;
|
||||
if span > MAX_RANGE_SPAN {
|
||||
return Err(ApiError::bad_request(format!(
|
||||
"Range span ({} hours) exceeds maximum ({} hours)",
|
||||
span.num_hours(),
|
||||
MAX_RANGE_SPAN.num_hours()
|
||||
)));
|
||||
}
|
||||
aligned.push(AlignedRange {
|
||||
start: align_floor(r.start),
|
||||
end: align_ceil(r.end),
|
||||
});
|
||||
}
|
||||
|
||||
let merged = merge_ranges(aligned);
|
||||
|
||||
// Validate total span
|
||||
let total_span: Duration = merged.iter().map(|r| r.end - r.start).sum();
|
||||
if total_span > MAX_TOTAL_SPAN {
|
||||
return Err(ApiError::bad_request(format!(
|
||||
"Total time span ({} hours) exceeds maximum ({} hours)",
|
||||
total_span.num_hours(),
|
||||
MAX_TOTAL_SPAN.num_hours()
|
||||
)));
|
||||
}
|
||||
|
||||
// ── Get cached schedule data (ISR: stale-while-revalidate) ───────
|
||||
state.schedule_cache.ensure_fresh();
|
||||
let snapshot = state.schedule_cache.snapshot();
|
||||
|
||||
// ── Build per-slot enrollment by filtering on meeting times ──────
|
||||
let slot_times = generate_slots(&merged);
|
||||
let mut all_subjects: BTreeSet<String> = BTreeSet::new();
|
||||
|
||||
let slots: Vec<TimelineSlot> = slot_times
|
||||
.into_iter()
|
||||
.map(|utc_time| {
|
||||
// Convert UTC slot to Central time for local day-of-week and time-of-day
|
||||
let local = utc_time.with_timezone(&Central);
|
||||
let local_date = local.date_naive();
|
||||
let local_time = local.time();
|
||||
let weekday = local.weekday();
|
||||
let wday_bit = weekday_bit(weekday);
|
||||
let slot_start_minutes = time_to_minutes(local_time);
|
||||
let slot_end_minutes = slot_start_minutes + SLOT_MINUTES;
|
||||
|
||||
let mut subject_totals: BTreeMap<String, i64> = BTreeMap::new();
|
||||
|
||||
for course in &snapshot.courses {
|
||||
let active = course.schedules.iter().any(|s| {
|
||||
s.active_during(local_date, wday_bit, slot_start_minutes, slot_end_minutes)
|
||||
});
|
||||
if active {
|
||||
*subject_totals.entry(course.subject.clone()).or_default() +=
|
||||
course.enrollment as i64;
|
||||
}
|
||||
}
|
||||
|
||||
all_subjects.extend(subject_totals.keys().cloned());
|
||||
|
||||
TimelineSlot {
|
||||
time: utc_time,
|
||||
subjects: subject_totals,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let subjects: Vec<String> = all_subjects.into_iter().collect();
|
||||
|
||||
Ok(Json(TimelineResponse { slots, subjects }))
|
||||
}
|
||||
|
||||
/// Convert a `NaiveTime` to minutes since midnight.
|
||||
fn time_to_minutes(t: NaiveTime) -> u16 {
|
||||
(t.hour() * 60 + t.minute()) as u16
|
||||
}
|
||||
+211
@@ -0,0 +1,211 @@
|
||||
//! WebSocket event types and handler for real-time scrape job updates.
|
||||
|
||||
use axum::{
|
||||
extract::{
|
||||
State,
|
||||
ws::{Message, WebSocket, WebSocketUpgrade},
|
||||
},
|
||||
response::IntoResponse,
|
||||
};
|
||||
use futures::{SinkExt, StreamExt};
|
||||
use serde::Serialize;
|
||||
use sqlx::PgPool;
|
||||
use tokio::sync::broadcast;
|
||||
use tracing::debug;
|
||||
use ts_rs::TS;
|
||||
|
||||
use crate::data::models::{ScrapeJob, ScrapeJobStatus};
|
||||
use crate::state::AppState;
|
||||
use crate::web::extractors::AdminUser;
|
||||
|
||||
/// A serializable DTO for `ScrapeJob` with computed `status`.
|
||||
#[derive(Debug, Clone, Serialize, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct ScrapeJobDto {
|
||||
pub id: i32,
|
||||
pub target_type: String,
|
||||
pub target_payload: serde_json::Value,
|
||||
pub priority: String,
|
||||
pub execute_at: String,
|
||||
pub created_at: String,
|
||||
pub locked_at: Option<String>,
|
||||
pub retry_count: i32,
|
||||
pub max_retries: i32,
|
||||
pub queued_at: String,
|
||||
pub status: ScrapeJobStatus,
|
||||
}
|
||||
|
||||
impl From<&ScrapeJob> for ScrapeJobDto {
|
||||
fn from(job: &ScrapeJob) -> Self {
|
||||
Self {
|
||||
id: job.id,
|
||||
target_type: format!("{:?}", job.target_type),
|
||||
target_payload: job.target_payload.clone(),
|
||||
priority: format!("{:?}", job.priority),
|
||||
execute_at: job.execute_at.to_rfc3339(),
|
||||
created_at: job.created_at.to_rfc3339(),
|
||||
locked_at: job.locked_at.map(|t| t.to_rfc3339()),
|
||||
retry_count: job.retry_count,
|
||||
max_retries: job.max_retries,
|
||||
queued_at: job.queued_at.to_rfc3339(),
|
||||
status: job.status(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Events broadcast when scrape job state changes.
|
||||
#[derive(Debug, Clone, Serialize, TS)]
|
||||
#[serde(tag = "type", rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub enum ScrapeJobEvent {
|
||||
Init {
|
||||
jobs: Vec<ScrapeJobDto>,
|
||||
},
|
||||
JobCreated {
|
||||
job: ScrapeJobDto,
|
||||
},
|
||||
JobLocked {
|
||||
id: i32,
|
||||
#[serde(rename = "lockedAt")]
|
||||
locked_at: String,
|
||||
status: ScrapeJobStatus,
|
||||
},
|
||||
JobCompleted {
|
||||
id: i32,
|
||||
},
|
||||
JobRetried {
|
||||
id: i32,
|
||||
#[serde(rename = "retryCount")]
|
||||
retry_count: i32,
|
||||
#[serde(rename = "queuedAt")]
|
||||
queued_at: String,
|
||||
status: ScrapeJobStatus,
|
||||
},
|
||||
JobExhausted {
|
||||
id: i32,
|
||||
},
|
||||
JobDeleted {
|
||||
id: i32,
|
||||
},
|
||||
}
|
||||
|
||||
/// Fetch current scrape jobs from the DB and build an `Init` event.
|
||||
async fn build_init_event(db_pool: &PgPool) -> Result<ScrapeJobEvent, sqlx::Error> {
|
||||
let rows = sqlx::query_as::<_, ScrapeJob>(
|
||||
"SELECT * FROM scrape_jobs ORDER BY priority DESC, execute_at ASC LIMIT 100",
|
||||
)
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
|
||||
let jobs = rows.iter().map(ScrapeJobDto::from).collect();
|
||||
Ok(ScrapeJobEvent::Init { jobs })
|
||||
}
|
||||
|
||||
/// WebSocket endpoint for real-time scrape job updates.
|
||||
///
|
||||
/// Auth is checked via `AdminUser` before the upgrade occurs — if rejected,
|
||||
/// a 401/403 is returned and the upgrade never happens.
|
||||
pub async fn scrape_jobs_ws(
|
||||
ws: WebSocketUpgrade,
|
||||
AdminUser(_user): AdminUser,
|
||||
State(state): State<AppState>,
|
||||
) -> impl IntoResponse {
|
||||
ws.on_upgrade(|socket| handle_scrape_jobs_ws(socket, state))
|
||||
}
|
||||
|
||||
/// Serialize an event and send it over the WebSocket sink.
|
||||
/// Returns `true` if the message was sent, `false` if the client disconnected.
|
||||
async fn send_event(
|
||||
sink: &mut futures::stream::SplitSink<WebSocket, Message>,
|
||||
event: &ScrapeJobEvent,
|
||||
) -> bool {
|
||||
let Ok(json) = serde_json::to_string(event) else {
|
||||
return true; // serialization failed, but connection is still alive
|
||||
};
|
||||
sink.send(Message::Text(json.into())).await.is_ok()
|
||||
}
|
||||
|
||||
async fn handle_scrape_jobs_ws(socket: WebSocket, state: AppState) {
|
||||
debug!("scrape-jobs WebSocket connected");
|
||||
|
||||
let (mut sink, mut stream) = socket.split();
|
||||
|
||||
// Send initial state
|
||||
let init_event = match build_init_event(&state.db_pool).await {
|
||||
Ok(event) => event,
|
||||
Err(e) => {
|
||||
debug!(error = %e, "failed to build init event, closing WebSocket");
|
||||
return;
|
||||
}
|
||||
};
|
||||
if !send_event(&mut sink, &init_event).await {
|
||||
debug!("client disconnected during init send");
|
||||
return;
|
||||
}
|
||||
|
||||
// Subscribe to broadcast events
|
||||
let mut rx = state.scrape_job_events();
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
result = rx.recv() => {
|
||||
match result {
|
||||
Ok(ref event) => {
|
||||
if !send_event(&mut sink, event).await {
|
||||
debug!("client disconnected during event send");
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(broadcast::error::RecvError::Lagged(n)) => {
|
||||
debug!(missed = n, "broadcast lagged, resyncing");
|
||||
match build_init_event(&state.db_pool).await {
|
||||
Ok(ref event) => {
|
||||
if !send_event(&mut sink, event).await {
|
||||
debug!("client disconnected during resync send");
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
debug!(error = %e, "failed to build resync init event");
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(broadcast::error::RecvError::Closed) => {
|
||||
debug!("broadcast channel closed");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
msg = stream.next() => {
|
||||
match msg {
|
||||
Some(Ok(Message::Text(text))) => {
|
||||
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&text)
|
||||
&& parsed.get("type").and_then(|t| t.as_str()) == Some("resync")
|
||||
{
|
||||
debug!("client requested resync");
|
||||
match build_init_event(&state.db_pool).await {
|
||||
Ok(ref event) => {
|
||||
if !send_event(&mut sink, event).await {
|
||||
debug!("client disconnected during resync send");
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
debug!(error = %e, "failed to build resync init event");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(Ok(Message::Close(_))) | None => {
|
||||
debug!("client disconnected");
|
||||
break;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
debug!("scrape-jobs WebSocket disconnected");
|
||||
}
|
||||
@@ -0,0 +1,103 @@
|
||||
#[allow(dead_code)]
|
||||
mod helpers;
|
||||
|
||||
use banner::data::rmp::unmatch_instructor;
|
||||
use sqlx::PgPool;
|
||||
|
||||
/// Test that unmatching an instructor resets accepted candidates back to pending.
|
||||
///
|
||||
/// When a user unmatches an instructor, accepted candidates should be reset to
|
||||
/// 'pending' so they can be re-matched later. This prevents the bug where
|
||||
/// candidates remain 'accepted' but have no corresponding link.
|
||||
#[sqlx::test]
|
||||
async fn unmatch_resets_accepted_candidates_to_pending(pool: PgPool) {
|
||||
// ARRANGE: Create an instructor
|
||||
let (instructor_id,): (i32,) = sqlx::query_as(
|
||||
"INSERT INTO instructors (display_name, email)
|
||||
VALUES ('Test, Instructor', 'test@utsa.edu')
|
||||
RETURNING id",
|
||||
)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.expect("failed to create instructor");
|
||||
|
||||
// ARRANGE: Create an RMP professor
|
||||
let (rmp_legacy_id,): (i32,) = sqlx::query_as(
|
||||
"INSERT INTO rmp_professors (legacy_id, graphql_id, first_name, last_name, num_ratings)
|
||||
VALUES (9999999, 'test-graphql-id', 'Test', 'Professor', 10)
|
||||
RETURNING legacy_id",
|
||||
)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.expect("failed to create rmp professor");
|
||||
|
||||
// ARRANGE: Create a match candidate with 'accepted' status
|
||||
sqlx::query(
|
||||
"INSERT INTO rmp_match_candidates (instructor_id, rmp_legacy_id, score, status)
|
||||
VALUES ($1, $2, 0.85, 'accepted')",
|
||||
)
|
||||
.bind(instructor_id)
|
||||
.bind(rmp_legacy_id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.expect("failed to create candidate");
|
||||
|
||||
// ARRANGE: Create a link in instructor_rmp_links
|
||||
sqlx::query(
|
||||
"INSERT INTO instructor_rmp_links (instructor_id, rmp_legacy_id, source)
|
||||
VALUES ($1, $2, 'manual')",
|
||||
)
|
||||
.bind(instructor_id)
|
||||
.bind(rmp_legacy_id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.expect("failed to create link");
|
||||
|
||||
// ARRANGE: Update instructor status to 'confirmed'
|
||||
sqlx::query("UPDATE instructors SET rmp_match_status = 'confirmed' WHERE id = $1")
|
||||
.bind(instructor_id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.expect("failed to update instructor status");
|
||||
|
||||
// ACT: Unmatch the specific RMP profile
|
||||
unmatch_instructor(&pool, instructor_id, Some(rmp_legacy_id))
|
||||
.await
|
||||
.expect("unmatch should succeed");
|
||||
|
||||
// ASSERT: Candidate should be reset to pending
|
||||
let (candidate_status,): (String,) = sqlx::query_as(
|
||||
"SELECT status FROM rmp_match_candidates
|
||||
WHERE instructor_id = $1 AND rmp_legacy_id = $2",
|
||||
)
|
||||
.bind(instructor_id)
|
||||
.bind(rmp_legacy_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.expect("failed to fetch candidate status");
|
||||
assert_eq!(
|
||||
candidate_status, "pending",
|
||||
"candidate should be reset to pending after unmatch"
|
||||
);
|
||||
|
||||
// ASSERT: Link should be deleted
|
||||
let (link_count,): (i64,) =
|
||||
sqlx::query_as("SELECT COUNT(*) FROM instructor_rmp_links WHERE instructor_id = $1")
|
||||
.bind(instructor_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.expect("failed to count links");
|
||||
assert_eq!(link_count, 0, "link should be deleted");
|
||||
|
||||
// ASSERT: Instructor status should be unmatched
|
||||
let (instructor_status,): (String,) =
|
||||
sqlx::query_as("SELECT rmp_match_status FROM instructors WHERE id = $1")
|
||||
.bind(instructor_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.expect("failed to fetch instructor status");
|
||||
assert_eq!(
|
||||
instructor_status, "unmatched",
|
||||
"instructor should be unmatched"
|
||||
);
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
#[allow(dead_code)]
|
||||
mod helpers;
|
||||
|
||||
use banner::data::batch::batch_upsert_courses;
|
||||
@@ -210,3 +211,127 @@ async fn test_batch_upsert_unique_constraint_crn_term(pool: PgPool) {
|
||||
assert_eq!(rows[1].0, "202520");
|
||||
assert_eq!(rows[1].1, 10);
|
||||
}
|
||||
|
||||
#[sqlx::test]
|
||||
async fn test_batch_upsert_creates_audit_and_metric_entries(pool: PgPool) {
|
||||
// Insert initial data — should create a baseline metric but no audits
|
||||
let initial = vec![helpers::make_course(
|
||||
"50001",
|
||||
"202510",
|
||||
"CS",
|
||||
"3443",
|
||||
"App Programming",
|
||||
10,
|
||||
35,
|
||||
0,
|
||||
5,
|
||||
)];
|
||||
batch_upsert_courses(&initial, &pool).await.unwrap();
|
||||
|
||||
let (audit_count,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM course_audits")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
audit_count, 0,
|
||||
"initial insert should not create audit entries"
|
||||
);
|
||||
|
||||
let (metric_count,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM course_metrics")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
metric_count, 1,
|
||||
"initial insert should create a baseline metric"
|
||||
);
|
||||
|
||||
// Verify baseline metric values
|
||||
let (enrollment, wait_count, seats): (i32, i32, i32) = sqlx::query_as(
|
||||
"SELECT enrollment, wait_count, seats_available FROM course_metrics ORDER BY timestamp LIMIT 1",
|
||||
)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(enrollment, 10);
|
||||
assert_eq!(wait_count, 0);
|
||||
assert_eq!(seats, 25); // 35 - 10
|
||||
|
||||
// Update enrollment and wait_count
|
||||
let updated = vec![helpers::make_course(
|
||||
"50001",
|
||||
"202510",
|
||||
"CS",
|
||||
"3443",
|
||||
"App Programming",
|
||||
20,
|
||||
35,
|
||||
2,
|
||||
5,
|
||||
)];
|
||||
batch_upsert_courses(&updated, &pool).await.unwrap();
|
||||
|
||||
// Should have audit entries for enrollment and wait_count changes
|
||||
let (audit_count,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM course_audits")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(
|
||||
audit_count >= 2,
|
||||
"should have audit entries for enrollment and wait_count changes, got {audit_count}"
|
||||
);
|
||||
|
||||
// Should have 2 metric entries: baseline + change
|
||||
let (metric_count,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM course_metrics")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(metric_count, 2, "should have baseline + 1 change metric");
|
||||
|
||||
// Verify the latest metric values
|
||||
let (enrollment, wait_count, seats): (i32, i32, i32) = sqlx::query_as(
|
||||
"SELECT enrollment, wait_count, seats_available FROM course_metrics ORDER BY timestamp DESC LIMIT 1",
|
||||
)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(enrollment, 20);
|
||||
assert_eq!(wait_count, 2);
|
||||
assert_eq!(seats, 15); // 35 - 20
|
||||
}
|
||||
|
||||
#[sqlx::test]
|
||||
async fn test_batch_upsert_no_change_no_audit(pool: PgPool) {
|
||||
// Insert then re-insert identical data — should produce baseline metric but no audits or extra metrics
|
||||
let course = vec![helpers::make_course(
|
||||
"60001",
|
||||
"202510",
|
||||
"CS",
|
||||
"1083",
|
||||
"Intro to CS",
|
||||
25,
|
||||
30,
|
||||
0,
|
||||
5,
|
||||
)];
|
||||
batch_upsert_courses(&course, &pool).await.unwrap();
|
||||
batch_upsert_courses(&course, &pool).await.unwrap();
|
||||
|
||||
let (audit_count,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM course_audits")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
audit_count, 0,
|
||||
"identical re-upsert should not create audit entries"
|
||||
);
|
||||
|
||||
let (metric_count,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM course_metrics")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
metric_count, 1,
|
||||
"identical re-upsert should only have the baseline metric"
|
||||
);
|
||||
}
|
||||
|
||||
+18
-11
@@ -1,3 +1,4 @@
|
||||
#[allow(dead_code)]
|
||||
mod helpers;
|
||||
|
||||
use banner::data::models::{ScrapePriority, TargetType};
|
||||
@@ -217,10 +218,13 @@ async fn unlock_and_increment_retry_has_retries_remaining(pool: PgPool) {
|
||||
)
|
||||
.await;
|
||||
|
||||
let has_retries = scrape_jobs::unlock_and_increment_retry(id, 3, &pool)
|
||||
let result = scrape_jobs::unlock_and_increment_retry(id, 3, &pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(has_retries, "should have retries remaining (0→1, max=3)");
|
||||
assert!(
|
||||
result.is_some(),
|
||||
"should have retries remaining (0→1, max=3)"
|
||||
);
|
||||
|
||||
// Verify state in DB
|
||||
let (retry_count, locked_at): (i32, Option<chrono::DateTime<chrono::Utc>>) =
|
||||
@@ -241,17 +245,17 @@ async fn unlock_and_increment_retry_exhausted(pool: PgPool) {
|
||||
json!({"subject": "CS"}),
|
||||
ScrapePriority::Medium,
|
||||
true,
|
||||
2, // retry_count
|
||||
3, // retry_count (already used all 3 retries)
|
||||
3, // max_retries
|
||||
)
|
||||
.await;
|
||||
|
||||
let has_retries = scrape_jobs::unlock_and_increment_retry(id, 3, &pool)
|
||||
let result = scrape_jobs::unlock_and_increment_retry(id, 3, &pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(
|
||||
!has_retries,
|
||||
"should NOT have retries remaining (2→3, max=3)"
|
||||
result.is_none(),
|
||||
"should NOT have retries remaining (3→4, max=3)"
|
||||
);
|
||||
|
||||
let (retry_count,): (i32,) =
|
||||
@@ -260,7 +264,7 @@ async fn unlock_and_increment_retry_exhausted(pool: PgPool) {
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(retry_count, 3);
|
||||
assert_eq!(retry_count, 4);
|
||||
}
|
||||
|
||||
#[sqlx::test]
|
||||
@@ -276,11 +280,11 @@ async fn unlock_and_increment_retry_already_exceeded(pool: PgPool) {
|
||||
)
|
||||
.await;
|
||||
|
||||
let has_retries = scrape_jobs::unlock_and_increment_retry(id, 3, &pool)
|
||||
let result = scrape_jobs::unlock_and_increment_retry(id, 3, &pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(
|
||||
!has_retries,
|
||||
result.is_none(),
|
||||
"should NOT have retries remaining (5→6, max=3)"
|
||||
);
|
||||
|
||||
@@ -346,7 +350,7 @@ async fn find_existing_payloads_returns_matching(pool: PgPool) {
|
||||
}
|
||||
|
||||
#[sqlx::test]
|
||||
async fn find_existing_payloads_ignores_locked(pool: PgPool) {
|
||||
async fn find_existing_payloads_includes_locked(pool: PgPool) {
|
||||
let payload = json!({"subject": "CS"});
|
||||
|
||||
helpers::insert_scrape_job(
|
||||
@@ -365,7 +369,10 @@ async fn find_existing_payloads_ignores_locked(pool: PgPool) {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(existing.is_empty(), "locked jobs should be ignored");
|
||||
assert!(
|
||||
existing.contains(&payload.to_string()),
|
||||
"locked jobs should be included in deduplication"
|
||||
);
|
||||
}
|
||||
|
||||
#[sqlx::test]
|
||||
|
||||
Vendored
+1
-2
@@ -5,5 +5,4 @@ dist-ssr
|
||||
*.local
|
||||
count.txt
|
||||
.env
|
||||
.nitro
|
||||
.tanstack
|
||||
.svelte-kit
|
||||
|
||||
+1
-1
@@ -7,7 +7,7 @@
|
||||
},
|
||||
"files": {
|
||||
"ignoreUnknown": false,
|
||||
"ignore": ["dist/", "node_modules/", ".tanstack/"]
|
||||
"ignore": ["dist/", "node_modules/", ".svelte-kit/", "src/lib/bindings/"]
|
||||
},
|
||||
"formatter": {
|
||||
"enabled": true,
|
||||
|
||||
+276
-767
File diff suppressed because it is too large
Load Diff
@@ -1,60 +0,0 @@
|
||||
import js from "@eslint/js";
|
||||
import tseslint from "typescript-eslint";
|
||||
import react from "eslint-plugin-react";
|
||||
import reactHooks from "eslint-plugin-react-hooks";
|
||||
import reactRefresh from "eslint-plugin-react-refresh";
|
||||
|
||||
export default tseslint.config(
|
||||
// Ignore generated files and build outputs
|
||||
{
|
||||
ignores: ["dist", "node_modules", "src/routeTree.gen.ts", "*.config.js"],
|
||||
},
|
||||
// Base configs
|
||||
js.configs.recommended,
|
||||
...tseslint.configs.recommendedTypeChecked,
|
||||
// React plugin configuration
|
||||
{
|
||||
files: ["**/*.{ts,tsx}"],
|
||||
plugins: {
|
||||
react,
|
||||
"react-hooks": reactHooks,
|
||||
"react-refresh": reactRefresh,
|
||||
},
|
||||
languageOptions: {
|
||||
parserOptions: {
|
||||
project: true,
|
||||
tsconfigRootDir: import.meta.dirname,
|
||||
ecmaFeatures: {
|
||||
jsx: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
settings: {
|
||||
react: {
|
||||
version: "19.0",
|
||||
},
|
||||
},
|
||||
rules: {
|
||||
// React rules
|
||||
...react.configs.recommended.rules,
|
||||
...react.configs["jsx-runtime"].rules,
|
||||
...reactHooks.configs.recommended.rules,
|
||||
|
||||
// React Refresh
|
||||
"react-refresh/only-export-components": ["warn", { allowConstantExport: true }],
|
||||
|
||||
// TypeScript overrides
|
||||
"@typescript-eslint/no-unused-vars": [
|
||||
"error",
|
||||
{
|
||||
argsIgnorePattern: "^_",
|
||||
varsIgnorePattern: "^_",
|
||||
},
|
||||
],
|
||||
"@typescript-eslint/no-explicit-any": "warn",
|
||||
|
||||
// Disable prop-types since we're using TypeScript
|
||||
"react/prop-types": "off",
|
||||
},
|
||||
}
|
||||
);
|
||||
@@ -1,20 +0,0 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<link rel="icon" href="/favicon.ico" />
|
||||
<meta name="theme-color" content="#000000" />
|
||||
<meta
|
||||
name="description"
|
||||
content="Banner, a Discord bot and web interface for UTSA Course Monitoring"
|
||||
/>
|
||||
<link rel="apple-touch-icon" href="/logo192.png" />
|
||||
<link rel="manifest" href="/manifest.json" />
|
||||
<title>Banner</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="app"></div>
|
||||
<script type="module" src="/src/main.tsx"></script>
|
||||
</body>
|
||||
</html>
|
||||
+38
-37
@@ -3,48 +3,49 @@
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite --port 3000",
|
||||
"start": "vite --port 3000",
|
||||
"build": "vite build && tsc",
|
||||
"serve": "vite preview",
|
||||
"dev": "vite dev --port 3000",
|
||||
"build": "vite build",
|
||||
"preview": "vite preview",
|
||||
"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
|
||||
"typecheck": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
|
||||
"lint": "biome lint .",
|
||||
"test": "vitest run",
|
||||
"lint": "tsc && eslint . --ext .ts,.tsx",
|
||||
"typecheck": "tsc --noEmit",
|
||||
"format": "biome format --write .",
|
||||
"format:check": "biome format ."
|
||||
},
|
||||
"dependencies": {
|
||||
"@radix-ui/themes": "^3.2.1",
|
||||
"@tanstack/react-devtools": "^0.2.2",
|
||||
"@tanstack/react-router": "^1.157.16",
|
||||
"@tanstack/react-router-devtools": "^1.157.16",
|
||||
"@tanstack/router-plugin": "^1.157.16",
|
||||
"lucide-react": "^0.544.0",
|
||||
"next-themes": "^0.4.6",
|
||||
"react": "^19.2.4",
|
||||
"react-dom": "^19.2.4",
|
||||
"react-timeago": "^8.3.0",
|
||||
"recharts": "^3.7.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@biomejs/biome": "^1.9.4",
|
||||
"@eslint/js": "^9.39.2",
|
||||
"@testing-library/dom": "^10.4.0",
|
||||
"@testing-library/react": "^16.3.2",
|
||||
"@types/node": "^24.10.9",
|
||||
"@types/react": "^19.2.10",
|
||||
"@types/react-dom": "^19.0.3",
|
||||
"@vitejs/plugin-react": "^4.3.4",
|
||||
"baseline-browser-mapping": "^2.9.19",
|
||||
"eslint": "^9.39.2",
|
||||
"eslint-plugin-react": "^7.37.5",
|
||||
"eslint-plugin-react-hooks": "^7.0.1",
|
||||
"eslint-plugin-react-refresh": "^0.4.26",
|
||||
"jsdom": "^26.0.0",
|
||||
"typescript": "^5.7.2",
|
||||
"typescript-eslint": "^8.54.0",
|
||||
"vite": "^6.3.5",
|
||||
"vitest": "^3.0.5",
|
||||
"web-vitals": "^4.2.4"
|
||||
"@fontsource-variable/inter": "^5.2.8",
|
||||
"@lucide/svelte": "^0.563.1",
|
||||
"@sveltejs/adapter-static": "^3.0.10",
|
||||
"@sveltejs/kit": "^2.50.1",
|
||||
"@sveltejs/vite-plugin-svelte": "^5.1.1",
|
||||
"@tailwindcss/vite": "^4.1.18",
|
||||
"@tanstack/table-core": "^8.21.3",
|
||||
"@types/d3-scale": "^4.0.9",
|
||||
"@types/d3-shape": "^3.1.8",
|
||||
"@types/d3-time-format": "^4.0.3",
|
||||
"@types/node": "^25.1.0",
|
||||
"bits-ui": "^1.8.0",
|
||||
"clsx": "^2.1.1",
|
||||
"jsdom": "^26.1.0",
|
||||
"svelte": "^5.49.1",
|
||||
"svelte-check": "^4.3.5",
|
||||
"tailwind-merge": "^3.4.0",
|
||||
"tailwindcss": "^4.1.18",
|
||||
"typescript": "^5.9.3",
|
||||
"vite": "^6.4.1",
|
||||
"vitest": "^3.2.4"
|
||||
},
|
||||
"dependencies": {
|
||||
"@fontsource-variable/jetbrains-mono": "^5.2.8",
|
||||
"@icons-pack/svelte-simple-icons": "^6.5.0",
|
||||
"d3-scale": "^4.0.2",
|
||||
"d3-shape": "^3.2.0",
|
||||
"d3-time-format": "^4.1.0",
|
||||
"date-fns": "^4.1.0",
|
||||
"layerchart": "^1.0.13",
|
||||
"overlayscrollbars": "^2.14.0",
|
||||
"overlayscrollbars-svelte": "^0.5.5"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,148 @@
|
||||
#!/usr/bin/env bun
|
||||
import { extname, join } from "path";
|
||||
import { constants, brotliCompressSync, gzipSync } from "zlib";
|
||||
import { $ } from "bun";
|
||||
/**
|
||||
* Pre-compress static assets with maximum compression levels.
|
||||
* Run after `bun run build`.
|
||||
*
|
||||
* Generates .gz, .br, .zst variants for compressible files ≥ MIN_SIZE bytes.
|
||||
* These are embedded alongside originals by rust-embed and served via
|
||||
* content negotiation in src/web/assets.rs.
|
||||
*/
|
||||
import { readFile, readdir, stat, writeFile } from "fs/promises";
|
||||
|
||||
// Must match COMPRESSION_MIN_SIZE in src/web/encoding.rs
|
||||
const MIN_SIZE = 512;
|
||||
|
||||
const COMPRESSIBLE_EXTENSIONS = new Set([
|
||||
".js",
|
||||
".css",
|
||||
".html",
|
||||
".json",
|
||||
".svg",
|
||||
".txt",
|
||||
".xml",
|
||||
".map",
|
||||
]);
|
||||
|
||||
// Check if zstd CLI is available
|
||||
let hasZstd = false;
|
||||
try {
|
||||
await $`which zstd`.quiet();
|
||||
hasZstd = true;
|
||||
} catch {
|
||||
console.warn("Warning: zstd not found, skipping .zst generation");
|
||||
}
|
||||
|
||||
async function* walkDir(dir: string): AsyncGenerator<string> {
|
||||
try {
|
||||
const entries = await readdir(dir, { withFileTypes: true });
|
||||
for (const entry of entries) {
|
||||
const path = join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
yield* walkDir(path);
|
||||
} else if (entry.isFile()) {
|
||||
yield path;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Directory doesn't exist, skip
|
||||
}
|
||||
}
|
||||
|
||||
async function compressFile(path: string): Promise<void> {
|
||||
const ext = extname(path);
|
||||
|
||||
if (!COMPRESSIBLE_EXTENSIONS.has(ext)) return;
|
||||
if (path.endsWith(".br") || path.endsWith(".gz") || path.endsWith(".zst")) return;
|
||||
|
||||
const stats = await stat(path);
|
||||
if (stats.size < MIN_SIZE) return;
|
||||
|
||||
// Skip if all compressed variants already exist
|
||||
const variantsExist = await Promise.all([
|
||||
stat(`${path}.br`).then(
|
||||
() => true,
|
||||
() => false
|
||||
),
|
||||
stat(`${path}.gz`).then(
|
||||
() => true,
|
||||
() => false
|
||||
),
|
||||
hasZstd
|
||||
? stat(`${path}.zst`).then(
|
||||
() => true,
|
||||
() => false
|
||||
)
|
||||
: Promise.resolve(false),
|
||||
]);
|
||||
|
||||
if (variantsExist.every((exists) => exists || !hasZstd)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const content = await readFile(path);
|
||||
const originalSize = content.length;
|
||||
|
||||
// Brotli (maximum quality = 11)
|
||||
const brContent = brotliCompressSync(content, {
|
||||
params: {
|
||||
[constants.BROTLI_PARAM_QUALITY]: 11,
|
||||
},
|
||||
});
|
||||
await writeFile(`${path}.br`, brContent);
|
||||
|
||||
// Gzip (level 9)
|
||||
const gzContent = gzipSync(content, { level: 9 });
|
||||
await writeFile(`${path}.gz`, gzContent);
|
||||
|
||||
// Zstd (level 19 - maximum)
|
||||
if (hasZstd) {
|
||||
try {
|
||||
await $`zstd -19 -q -f -o ${path}.zst ${path}`.quiet();
|
||||
} catch (e) {
|
||||
console.warn(`Warning: Failed to compress ${path} with zstd: ${e}`);
|
||||
}
|
||||
}
|
||||
|
||||
const brRatio = ((brContent.length / originalSize) * 100).toFixed(1);
|
||||
const gzRatio = ((gzContent.length / originalSize) * 100).toFixed(1);
|
||||
console.log(`Compressed: ${path} (br: ${brRatio}%, gz: ${gzRatio}%, ${originalSize} bytes)`);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log("Pre-compressing static assets...");
|
||||
|
||||
// Banner uses adapter-static with output in dist/
|
||||
const dirs = ["dist"];
|
||||
let scannedFiles = 0;
|
||||
let compressedFiles = 0;
|
||||
|
||||
for (const dir of dirs) {
|
||||
for await (const file of walkDir(dir)) {
|
||||
const ext = extname(file);
|
||||
scannedFiles++;
|
||||
|
||||
if (
|
||||
COMPRESSIBLE_EXTENSIONS.has(ext) &&
|
||||
!file.endsWith(".br") &&
|
||||
!file.endsWith(".gz") &&
|
||||
!file.endsWith(".zst")
|
||||
) {
|
||||
const stats = await stat(file);
|
||||
if (stats.size >= MIN_SIZE) {
|
||||
await compressFile(file);
|
||||
compressedFiles++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Done! Scanned ${scannedFiles} files, compressed ${compressedFiles} files.`);
|
||||
}
|
||||
|
||||
main().catch((e) => {
|
||||
console.error("Compression failed:", e);
|
||||
process.exit(1);
|
||||
});
|
||||
@@ -1,54 +0,0 @@
|
||||
.App {
|
||||
min-height: 100vh;
|
||||
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Oxygen", "Ubuntu",
|
||||
"Cantarell", "Fira Sans", "Droid Sans", "Helvetica Neue", sans-serif;
|
||||
background-color: var(--color-background);
|
||||
color: var(--color-text);
|
||||
}
|
||||
|
||||
@keyframes pulse {
|
||||
0%,
|
||||
100% {
|
||||
opacity: 0.2;
|
||||
}
|
||||
50% {
|
||||
opacity: 0.4;
|
||||
}
|
||||
}
|
||||
|
||||
.animate-pulse {
|
||||
animation: pulse 2s ease-in-out infinite;
|
||||
}
|
||||
|
||||
/* Theme toggle button */
|
||||
.theme-toggle {
|
||||
cursor: pointer;
|
||||
background-color: transparent;
|
||||
border: none;
|
||||
margin: 4px;
|
||||
padding: 7px;
|
||||
border-radius: 6px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
color: var(--gray-11);
|
||||
transition: background-color 0.2s, color 0.2s;
|
||||
transform: scale(1.25);
|
||||
}
|
||||
|
||||
.theme-toggle:hover {
|
||||
background-color: var(--gray-4);
|
||||
}
|
||||
|
||||
/* Screen reader only text */
|
||||
.sr-only {
|
||||
position: absolute;
|
||||
width: 1px;
|
||||
height: 1px;
|
||||
padding: 0;
|
||||
margin: -1px;
|
||||
overflow: hidden;
|
||||
clip: rect(0, 0, 0, 0);
|
||||
white-space: nowrap;
|
||||
border: 0;
|
||||
}
|
||||
Vendored
+11
@@ -0,0 +1,11 @@
|
||||
/// <reference types="@sveltejs/kit" />
|
||||
|
||||
declare const __APP_VERSION__: string;
|
||||
|
||||
declare namespace App {
|
||||
// interface Error {}
|
||||
// interface Locals {}
|
||||
// interface PageData {}
|
||||
// interface PageState {}
|
||||
// interface Platform {}
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<link rel="icon" href="%sveltekit.assets%/favicon.ico" />
|
||||
<meta name="theme-color" content="#000000" />
|
||||
<meta
|
||||
name="description"
|
||||
content="Banner, a Discord bot and web interface for UTSA Course Monitoring"
|
||||
/>
|
||||
<link rel="apple-touch-icon" href="%sveltekit.assets%/logo192.png" />
|
||||
<link rel="manifest" href="%sveltekit.assets%/manifest.json" />
|
||||
<title>Banner</title>
|
||||
<script>
|
||||
(function () {
|
||||
var stored = localStorage.getItem("theme");
|
||||
var isDark =
|
||||
stored === "dark" ||
|
||||
(stored !== "light" &&
|
||||
window.matchMedia("(prefers-color-scheme: dark)").matches);
|
||||
if (isDark) {
|
||||
document.documentElement.classList.add("dark");
|
||||
}
|
||||
})();
|
||||
</script>
|
||||
%sveltekit.head%
|
||||
</head>
|
||||
<body data-sveltekit-preload-data="hover">
|
||||
<div style="display: contents">%sveltekit.body%</div>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,36 +0,0 @@
|
||||
import { Button } from "@radix-ui/themes";
|
||||
import { Monitor, Moon, Sun } from "lucide-react";
|
||||
import { useTheme } from "next-themes";
|
||||
import { useMemo } from "react";
|
||||
|
||||
export function ThemeToggle() {
|
||||
const { theme, setTheme } = useTheme();
|
||||
|
||||
const nextTheme = useMemo(() => {
|
||||
switch (theme) {
|
||||
case "light":
|
||||
return "dark";
|
||||
case "dark":
|
||||
return "system";
|
||||
case "system":
|
||||
return "light";
|
||||
default:
|
||||
console.error(`Invalid theme: ${theme}`);
|
||||
return "system";
|
||||
}
|
||||
}, [theme]);
|
||||
|
||||
const icon = useMemo(() => {
|
||||
if (nextTheme === "system") {
|
||||
return <Monitor size={18} />;
|
||||
}
|
||||
return nextTheme === "dark" ? <Moon size={18} /> : <Sun size={18} />;
|
||||
}, [nextTheme]);
|
||||
|
||||
return (
|
||||
<Button variant="ghost" size="3" onClick={() => setTheme(nextTheme)} className="theme-toggle">
|
||||
{icon}
|
||||
<span className="sr-only">Toggle theme</span>
|
||||
</Button>
|
||||
);
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user