mirror of
https://github.com/Xevion/banner.git
synced 2026-01-31 08:23:35 -06:00
Compare commits
5 Commits
474d519b9d
..
v0.6.0
| Author | SHA1 | Date | |
|---|---|---|---|
| b02a0738e2 | |||
| 5d7d60cd96 | |||
| 1954166db6 | |||
| a2a9116b7a | |||
| a103f0643a |
Generated
+64
-5
@@ -272,7 +272,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "banner"
|
name = "banner"
|
||||||
version = "0.5.0"
|
version = "0.6.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
@@ -292,6 +292,7 @@ dependencies = [
|
|||||||
"futures",
|
"futures",
|
||||||
"governor",
|
"governor",
|
||||||
"html-escape",
|
"html-escape",
|
||||||
|
"htmlize",
|
||||||
"http 1.3.1",
|
"http 1.3.1",
|
||||||
"mime_guess",
|
"mime_guess",
|
||||||
"num-format",
|
"num-format",
|
||||||
@@ -315,6 +316,7 @@ dependencies = [
|
|||||||
"tracing",
|
"tracing",
|
||||||
"tracing-subscriber",
|
"tracing-subscriber",
|
||||||
"ts-rs",
|
"ts-rs",
|
||||||
|
"unicode-normalization",
|
||||||
"url",
|
"url",
|
||||||
"urlencoding",
|
"urlencoding",
|
||||||
"yansi",
|
"yansi",
|
||||||
@@ -492,7 +494,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3"
|
checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"chrono",
|
"chrono",
|
||||||
"phf",
|
"phf 0.12.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1348,6 +1350,19 @@ dependencies = [
|
|||||||
"utf8-width",
|
"utf8-width",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "htmlize"
|
||||||
|
version = "1.0.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d347c0de239be20ba0982e4822de3124404281e119ae3e11f5d7425a414e1935"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
"pastey",
|
||||||
|
"phf 0.11.3",
|
||||||
|
"phf_codegen",
|
||||||
|
"serde_json",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "http"
|
name = "http"
|
||||||
version = "0.2.12"
|
version = "0.2.12"
|
||||||
@@ -2100,6 +2115,12 @@ dependencies = [
|
|||||||
"windows-targets 0.52.6",
|
"windows-targets 0.52.6",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pastey"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "35fb2e5f958ec131621fdd531e9fc186ed768cbe395337403ae56c17a74c68ec"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pear"
|
name = "pear"
|
||||||
version = "0.2.9"
|
version = "0.2.9"
|
||||||
@@ -2138,13 +2159,51 @@ version = "2.3.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
|
checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf"
|
||||||
|
version = "0.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
|
||||||
|
dependencies = [
|
||||||
|
"phf_shared 0.11.3",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "phf"
|
name = "phf"
|
||||||
version = "0.12.1"
|
version = "0.12.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7"
|
checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"phf_shared",
|
"phf_shared 0.12.1",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_codegen"
|
||||||
|
version = "0.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
|
||||||
|
dependencies = [
|
||||||
|
"phf_generator",
|
||||||
|
"phf_shared 0.11.3",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_generator"
|
||||||
|
version = "0.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
|
||||||
|
dependencies = [
|
||||||
|
"phf_shared 0.11.3",
|
||||||
|
"rand 0.8.5",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_shared"
|
||||||
|
version = "0.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
|
||||||
|
dependencies = [
|
||||||
|
"siphasher",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -3982,9 +4041,9 @@ checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-normalization"
|
name = "unicode-normalization"
|
||||||
version = "0.1.24"
|
version = "0.1.25"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956"
|
checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"tinyvec",
|
"tinyvec",
|
||||||
]
|
]
|
||||||
|
|||||||
+3
-1
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "banner"
|
name = "banner"
|
||||||
version = "0.5.0"
|
version = "0.6.0"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
default-run = "banner"
|
default-run = "banner"
|
||||||
|
|
||||||
@@ -60,6 +60,8 @@ html-escape = "0.2.13"
|
|||||||
axum-extra = { version = "0.12.5", features = ["query"] }
|
axum-extra = { version = "0.12.5", features = ["query"] }
|
||||||
urlencoding = "2.1.3"
|
urlencoding = "2.1.3"
|
||||||
chrono-tz = "0.10.4"
|
chrono-tz = "0.10.4"
|
||||||
|
htmlize = { version = "1.0.6", features = ["unescape"] }
|
||||||
|
unicode-normalization = "0.1.25"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,49 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
## [0.6.0] - 2026-01-30
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- User authentication system with Discord OAuth, sessions, admin roles, and login page with FAQ.
|
||||||
|
- Interactive timeline visualization with D3 canvas, pan/zoom, touch gestures, and enrollment aggregation API.
|
||||||
|
- Scraper analytics dashboard with timeseries charts, subject monitoring, and per-subject detail views.
|
||||||
|
- Adaptive scraper scheduling with admin endpoints for monitoring and configuration.
|
||||||
|
- Scrape job result persistence for effectiveness tracking.
|
||||||
|
- WebSocket support for real-time scrape job monitoring with connection status indicators.
|
||||||
|
- Course change auditing with field-level tracking and time-series metrics endpoint.
|
||||||
|
- Audit log UI with smart JSON diffing, conditional request caching, and auto-refresh.
|
||||||
|
- Calendar export web endpoints for ICS download and Google Calendar redirect.
|
||||||
|
- Confidence-based RMP matching with manual review workflow and admin instructor UI.
|
||||||
|
- RMP profile links and confidence-aware rating display.
|
||||||
|
- Name parsing and normalization for improved instructor-RMP matching.
|
||||||
|
- Mobile touch controls with gesture detection for timeline.
|
||||||
|
- Worker timeout protection and crash recovery for job queue.
|
||||||
|
- Build-time asset compression with encoding negotiation (gzip, brotli, zstd).
|
||||||
|
- Smart page transitions with theme-aware element transitions.
|
||||||
|
- Search duration and result count feedback.
|
||||||
|
- Root error page handling.
|
||||||
|
- Login page with FAQ section and improved styling.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
- Consolidated navigation with top nav bar and route groups.
|
||||||
|
- Centralized number formatting with locale-aware utility.
|
||||||
|
- Modernized Justfile commands and simplified service management.
|
||||||
|
- Persisted audit log state in module scope for cross-navigation caching.
|
||||||
|
- Relative time feedback and improved tooltip customization.
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- Instructor/course mismatching via build-order-independent map for association.
|
||||||
|
- Page content clipping.
|
||||||
|
- Backend startup delays with retry logic in auth.
|
||||||
|
- Banner API timeouts increased to handle slow responses.
|
||||||
|
- i64 serialization for JavaScript compatibility, fixing avatar URL display.
|
||||||
|
- Frontend build ordering with `-e` embed flag in Justfile.
|
||||||
|
- Login page centering and unnecessary scrollbar.
|
||||||
|
- ts-rs serde warnings.
|
||||||
|
|
||||||
## [0.5.0] - 2026-01-29
|
## [0.5.0] - 2026-01-29
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|||||||
+23
-14
@@ -2,34 +2,43 @@
|
|||||||
|
|
||||||
## Now
|
## Now
|
||||||
|
|
||||||
- **Notification and subscription system** - Subscribe to courses and get alerts on seat availability, waitlist movement, and detail changes (time, location, professor, seats). DB schema exists.
|
- **Discord bot revival** - Audit and fix all existing commands (search, terms, ics, gcal) against the current data model. Add test coverage. Bot has been untouched since ~0.3.4 and commands may be broken.
|
||||||
|
- **Notification and subscription system** - Subscribe to courses and get alerts on seat availability, waitlist movement, and detail changes (time, location, professor, seats). Deliver via Discord bot and web dashboard.
|
||||||
|
- **Mobile/responsive redesign** - Hamburger nav for sidebar, responsive table column hiding, mobile-friendly admin pages. Timeline is the only area with solid mobile support; most pages need work.
|
||||||
- **Professor name search filter** - Filter search results by instructor. Backend code exists but is commented out.
|
- **Professor name search filter** - Filter search results by instructor. Backend code exists but is commented out.
|
||||||
- **Autocomplete for search fields** - Typeahead for course titles, course numbers, professors, and terms.
|
- **Search field autocomplete** - Typeahead for course titles, course numbers, professors, and terms.
|
||||||
- **Test coverage expansion** - Broaden coverage with session/rate-limiter tests and more DB integration tests.
|
- **Large component extraction** - Break down CourseTable, Instructors page, and TimelineCanvas into smaller, testable subcomponents.
|
||||||
|
|
||||||
## Soon
|
## Soon
|
||||||
|
|
||||||
- **Smart time-of-day search parsing** - Support natural queries like "2 PM", "2-3 PM", "ends by 2 PM", "after 2 PM", "before 2 PM" mapped to time ranges.
|
- **Bot slash command parity** - Keep Discord bot commands in sync with web features: timeline summaries, RMP lookups, audit log highlights, notification management via bot.
|
||||||
- **Section-based lookup** - Search by full section identifier, e.g. "CS 4393 001".
|
- **E2E test suite** - Playwright tests for critical user flows: search, login, admin pages, timeline interaction.
|
||||||
- **Search result pagination** - Paginated embeds for large result sets in Discord.
|
- **Settings page** - Replace placeholder with theme preferences, notification settings, default term/subject selection.
|
||||||
|
- **Profile enhancements** - Expand from read-only stub to subscription management, saved searches, and course watchlists.
|
||||||
|
- **Smart time-of-day search parsing** - Support natural queries like "2 PM", "ends by 2 PM", "after 2 PM" mapped to time ranges.
|
||||||
- **Multi-term querying** - Query across multiple terms in a single search instead of one at a time.
|
- **Multi-term querying** - Query across multiple terms in a single search instead of one at a time.
|
||||||
- **Historical analytics** - Track seat availability over time and visualize fill-rate trends per course or professor.
|
- **Historical analytics visualization** - Build trend UI on top of existing course metrics and timeline API. Fill-rate charts per course or professor.
|
||||||
- **Schedule builder** - Visual weekly schedule tool for assembling a conflict-free course lineup.
|
- **Schedule builder** - Visual weekly schedule tool for assembling a conflict-free course lineup. Timeline visualization serves as a foundation.
|
||||||
- **Professor stats** - Aggregate data views: average class size, typical waitlist length, schedule patterns across semesters.
|
|
||||||
|
|
||||||
## Eventually
|
## Eventually
|
||||||
|
|
||||||
|
- **API rate limiting** - Rate limiter on public API endpoints. Needed before any public or external exposure.
|
||||||
|
- **Bulk admin operations** - Batch RMP match/reject, bulk user management, data export from admin pages.
|
||||||
- **Degree audit helper** - Map available courses to degree requirements and suggest what to take next.
|
- **Degree audit helper** - Map available courses to degree requirements and suggest what to take next.
|
||||||
- **Dynamic scraper scheduling** - Adjust scrape intervals based on change frequency and course count (e.g. 2 hours per 500 courses, shorter intervals when changes are detected).
|
|
||||||
- **DM support** - Allow the Discord bot to respond in direct messages, not just guild channels.
|
- **DM support** - Allow the Discord bot to respond in direct messages, not just guild channels.
|
||||||
- **"Classes Now" command** - Find classes currently in session based on the current day and time.
|
- **"Classes Now" command** - Find classes currently in session based on the current day and time.
|
||||||
- **CRN direct lookup** - Look up a course by its CRN without going through search.
|
|
||||||
- **Metrics dashboard** - Surface scraper and service metrics visually on the web dashboard.
|
|
||||||
- **Privileged error feedback** - Detailed error information surfaced to bot admins when commands fail.
|
- **Privileged error feedback** - Detailed error information surfaced to bot admins when commands fail.
|
||||||
|
|
||||||
## Done
|
## Done
|
||||||
|
|
||||||
|
- **Interactive timeline visualization** - D3 canvas with pan/zoom, touch gestures, and enrollment aggregation API. *(0.6.0)*
|
||||||
|
- **Scraper analytics dashboard** - Timeseries charts, subject monitoring, adaptive scheduling, and admin endpoints. *(0.6.0)*
|
||||||
|
- **WebSocket job monitoring** - Real-time scrape job queue with live connection status indicators. *(0.6.0)*
|
||||||
|
- **Course change audit log** - Field-level change tracking with smart diffing, conditional caching, and auto-refresh. *(0.6.0)*
|
||||||
|
- **User authentication system** - Discord OAuth, sessions, admin roles, and login page. *(0.6.0)*
|
||||||
|
- **Dynamic scraper scheduling** - Adaptive scrape intervals based on change frequency and course volume. *(0.6.0)*
|
||||||
|
- **Metrics dashboard** - Scraper and service metrics surfaced on the web dashboard. *(0.6.0)*
|
||||||
|
- **Subject/major search filter** - Multi-select subject filtering with searchable comboboxes. *(0.5.0)*
|
||||||
- **Web course search UI** - Browser-based course search with interactive data table, sorting, pagination, and column controls. *(0.4.0)*
|
- **Web course search UI** - Browser-based course search with interactive data table, sorting, pagination, and column controls. *(0.4.0)*
|
||||||
- **RateMyProfessor integration** - Bulk professor sync via GraphQL with inline ratings in search results. *(0.4.0)*
|
- **RateMyProfessor integration** - Bulk professor sync via GraphQL with inline ratings in search results. *(0.4.0)*
|
||||||
- **Subject/major search filter** - Multi-select subject filtering with searchable comboboxes. *(0.5.0)*
|
- **Test coverage expansion** - Unit tests for course formatting, API client, query builder, CLI args, and config parsing. *(0.3.4--0.4.0)*
|
||||||
- **Test coverage expansion** - Unit tests for course formatting, API client, query builder, CLI args, and config parsing. *(0.3.4–0.4.0)*
|
|
||||||
|
|||||||
@@ -0,0 +1,5 @@
|
|||||||
|
-- Add structured first/last name columns to instructors.
|
||||||
|
-- Populated by Rust-side backfill (parse_banner_name) since we need
|
||||||
|
-- HTML entity decoding and suffix extraction that SQL can't handle well.
|
||||||
|
ALTER TABLE instructors ADD COLUMN first_name VARCHAR;
|
||||||
|
ALTER TABLE instructors ADD COLUMN last_name VARCHAR;
|
||||||
+6
-1
@@ -14,7 +14,7 @@ use sqlx::postgres::PgPoolOptions;
|
|||||||
use std::process::ExitCode;
|
use std::process::ExitCode;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use tracing::{error, info};
|
use tracing::{error, info, warn};
|
||||||
|
|
||||||
/// Main application struct containing all necessary components
|
/// Main application struct containing all necessary components
|
||||||
pub struct App {
|
pub struct App {
|
||||||
@@ -70,6 +70,11 @@ impl App {
|
|||||||
.context("Failed to run database migrations")?;
|
.context("Failed to run database migrations")?;
|
||||||
info!("Database migrations completed successfully");
|
info!("Database migrations completed successfully");
|
||||||
|
|
||||||
|
// Backfill structured name columns for existing instructors
|
||||||
|
if let Err(e) = crate::data::names::backfill_instructor_names(&db_pool).await {
|
||||||
|
warn!(error = ?e, "Failed to backfill instructor names (non-fatal)");
|
||||||
|
}
|
||||||
|
|
||||||
// Create BannerApi and AppState
|
// Create BannerApi and AppState
|
||||||
let banner_api = BannerApi::new_with_config(
|
let banner_api = BannerApi::new_with_config(
|
||||||
config.banner_base_url.clone(),
|
config.banner_base_url.clone(),
|
||||||
|
|||||||
@@ -325,6 +325,7 @@ mod tests {
|
|||||||
fn test_parse_json_with_context_null_value() {
|
fn test_parse_json_with_context_null_value() {
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
struct TestStruct {
|
struct TestStruct {
|
||||||
|
#[allow(dead_code)]
|
||||||
name: String,
|
name: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -363,12 +364,14 @@ mod tests {
|
|||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
#[serde(rename = "courseTitle")]
|
#[serde(rename = "courseTitle")]
|
||||||
course_title: String,
|
course_title: String,
|
||||||
|
#[allow(dead_code)]
|
||||||
faculty: Vec<Faculty>,
|
faculty: Vec<Faculty>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
struct Faculty {
|
struct Faculty {
|
||||||
#[serde(rename = "displayName")]
|
#[serde(rename = "displayName")]
|
||||||
|
#[allow(dead_code)]
|
||||||
display_name: String,
|
display_name: String,
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
email: String,
|
email: String,
|
||||||
@@ -376,6 +379,7 @@ mod tests {
|
|||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
struct SearchResult {
|
struct SearchResult {
|
||||||
|
#[allow(dead_code)]
|
||||||
data: Vec<Course>,
|
data: Vec<Course>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+112
-64
@@ -11,7 +11,9 @@ use rand::distr::{Alphanumeric, SampleString};
|
|||||||
use reqwest_middleware::ClientWithMiddleware;
|
use reqwest_middleware::ClientWithMiddleware;
|
||||||
use std::collections::{HashMap, VecDeque};
|
use std::collections::{HashMap, VecDeque};
|
||||||
|
|
||||||
|
use std::mem::ManuallyDrop;
|
||||||
use std::ops::{Deref, DerefMut};
|
use std::ops::{Deref, DerefMut};
|
||||||
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
use std::sync::{Arc, LazyLock};
|
use std::sync::{Arc, LazyLock};
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
use tokio::sync::{Mutex, Notify};
|
use tokio::sync::{Mutex, Notify};
|
||||||
@@ -121,6 +123,64 @@ impl BannerSession {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
/// Verifies that cancelling `acquire()` mid-session-creation resets `is_creating`,
|
||||||
|
/// allowing subsequent callers to proceed rather than deadlocking.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_acquire_not_deadlocked_after_cancellation() {
|
||||||
|
use tokio::sync::mpsc;
|
||||||
|
|
||||||
|
let (tx, mut rx) = mpsc::channel::<()>(10);
|
||||||
|
|
||||||
|
// Local server: /registration signals arrival via `tx`, then hangs forever.
|
||||||
|
let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||||
|
let addr = listener.local_addr().unwrap();
|
||||||
|
|
||||||
|
let app = axum::Router::new().route(
|
||||||
|
"/StudentRegistrationSsb/registration",
|
||||||
|
axum::routing::get(move || {
|
||||||
|
let tx = tx.clone();
|
||||||
|
async move {
|
||||||
|
let _ = tx.send(()).await;
|
||||||
|
std::future::pending::<&str>().await
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
tokio::spawn(async move {
|
||||||
|
axum::serve(listener, app).await.unwrap();
|
||||||
|
});
|
||||||
|
|
||||||
|
let base_url = format!("http://{}/StudentRegistrationSsb", addr);
|
||||||
|
let client = reqwest_middleware::ClientBuilder::new(
|
||||||
|
reqwest::Client::builder()
|
||||||
|
.timeout(Duration::from_secs(300))
|
||||||
|
.build()
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let pool = SessionPool::new(client, base_url);
|
||||||
|
let term: Term = "202620".parse().unwrap();
|
||||||
|
|
||||||
|
// First acquire: cancel once the request reaches the server.
|
||||||
|
tokio::select! {
|
||||||
|
_ = pool.acquire(term) => panic!("server hangs — acquire should never complete"),
|
||||||
|
_ = rx.recv() => {} // Request arrived; dropping the future simulates timeout cancellation.
|
||||||
|
}
|
||||||
|
|
||||||
|
// Second acquire: verify it reaches the server (i.e., is_creating was reset).
|
||||||
|
// The global rate limiter has a 10s period, so allow 15s for the second attempt.
|
||||||
|
tokio::select! {
|
||||||
|
_ = pool.acquire(term) => {}
|
||||||
|
result = tokio::time::timeout(Duration::from_secs(15), rx.recv()) => {
|
||||||
|
assert!(
|
||||||
|
result.is_ok(),
|
||||||
|
"acquire() deadlocked — is_creating was not reset after cancellation"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_new_session_creates_session() {
|
fn test_new_session_creates_session() {
|
||||||
@@ -200,50 +260,53 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A smart pointer that returns a BannerSession to the pool when dropped.
|
/// A smart pointer that returns a `BannerSession` to the pool when dropped.
|
||||||
pub struct PooledSession {
|
pub struct PooledSession {
|
||||||
session: Option<BannerSession>,
|
session: ManuallyDrop<BannerSession>,
|
||||||
// This Arc points directly to the term-specific pool.
|
|
||||||
pool: Arc<TermPool>,
|
pool: Arc<TermPool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PooledSession {
|
|
||||||
pub fn been_used(&self) -> bool {
|
|
||||||
self.session.as_ref().unwrap().been_used()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Deref for PooledSession {
|
impl Deref for PooledSession {
|
||||||
type Target = BannerSession;
|
type Target = BannerSession;
|
||||||
fn deref(&self) -> &Self::Target {
|
fn deref(&self) -> &Self::Target {
|
||||||
// The option is only ever None after drop is called, so this is safe.
|
&self.session
|
||||||
self.session.as_ref().unwrap()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DerefMut for PooledSession {
|
impl DerefMut for PooledSession {
|
||||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||||
self.session.as_mut().unwrap()
|
&mut self.session
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The magic happens here: when the guard goes out of scope, this is called.
|
|
||||||
impl Drop for PooledSession {
|
impl Drop for PooledSession {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
if let Some(session) = self.session.take() {
|
// SAFETY: `drop` is called exactly once by Rust's drop semantics,
|
||||||
let pool = self.pool.clone();
|
// so `ManuallyDrop::take` is guaranteed to see a valid value.
|
||||||
// Since drop() cannot be async, we spawn a task to return the session.
|
let session = unsafe { ManuallyDrop::take(&mut self.session) };
|
||||||
tokio::spawn(async move {
|
let pool = self.pool.clone();
|
||||||
pool.release(session).await;
|
tokio::spawn(async move {
|
||||||
});
|
pool.release(session).await;
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct TermPool {
|
pub struct TermPool {
|
||||||
sessions: Mutex<VecDeque<BannerSession>>,
|
sessions: Mutex<VecDeque<BannerSession>>,
|
||||||
notifier: Notify,
|
notifier: Notify,
|
||||||
is_creating: Mutex<bool>,
|
is_creating: AtomicBool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// RAII guard ensuring `is_creating` is reset on drop for cancellation safety.
|
||||||
|
/// Without this, a cancelled `acquire()` future would leave the flag set permanently,
|
||||||
|
/// deadlocking all subsequent callers.
|
||||||
|
struct CreatingGuard(Arc<TermPool>);
|
||||||
|
|
||||||
|
impl Drop for CreatingGuard {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
self.0.is_creating.store(false, Ordering::Release);
|
||||||
|
self.0.notifier.notify_waiters();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TermPool {
|
impl TermPool {
|
||||||
@@ -251,7 +314,7 @@ impl TermPool {
|
|||||||
Self {
|
Self {
|
||||||
sessions: Mutex::new(VecDeque::new()),
|
sessions: Mutex::new(VecDeque::new()),
|
||||||
notifier: Notify::new(),
|
notifier: Notify::new(),
|
||||||
is_creating: Mutex::new(false),
|
is_creating: AtomicBool::new(false),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -308,7 +371,7 @@ impl SessionPool {
|
|||||||
if let Some(session) = queue.pop_front() {
|
if let Some(session) = queue.pop_front() {
|
||||||
if !session.is_expired() {
|
if !session.is_expired() {
|
||||||
return Ok(PooledSession {
|
return Ok(PooledSession {
|
||||||
session: Some(session),
|
session: ManuallyDrop::new(session),
|
||||||
pool: Arc::clone(&term_pool),
|
pool: Arc::clone(&term_pool),
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
@@ -317,45 +380,38 @@ impl SessionPool {
|
|||||||
}
|
}
|
||||||
} // MutexGuard is dropped, lock is released.
|
} // MutexGuard is dropped, lock is released.
|
||||||
|
|
||||||
// Slow path: No sessions available. We must either wait or become the creator.
|
// Slow path: wait for an in-progress creation, or become the creator.
|
||||||
let mut is_creating_guard = term_pool.is_creating.lock().await;
|
if term_pool.is_creating.load(Ordering::Acquire) {
|
||||||
if *is_creating_guard {
|
|
||||||
// Another task is already creating a session. Release the lock and wait.
|
|
||||||
drop(is_creating_guard);
|
|
||||||
if !waited_for_creation {
|
if !waited_for_creation {
|
||||||
trace!("Waiting for another task to create session");
|
trace!("Waiting for another task to create session");
|
||||||
waited_for_creation = true;
|
waited_for_creation = true;
|
||||||
}
|
}
|
||||||
term_pool.notifier.notified().await;
|
term_pool.notifier.notified().await;
|
||||||
// Loop back to the top to try the fast path again.
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This task is now the designated creator.
|
// CAS to become the designated creator.
|
||||||
*is_creating_guard = true;
|
if term_pool
|
||||||
drop(is_creating_guard);
|
.is_creating
|
||||||
|
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire)
|
||||||
|
.is_err()
|
||||||
|
{
|
||||||
|
continue; // Lost the race — loop back and wait.
|
||||||
|
}
|
||||||
|
|
||||||
|
// Guard resets is_creating on drop (including cancellation).
|
||||||
|
let creating_guard = CreatingGuard(Arc::clone(&term_pool));
|
||||||
|
|
||||||
// Race: wait for a session to be returned OR for the rate limiter to allow a new one.
|
|
||||||
trace!("Pool empty, creating new session");
|
trace!("Pool empty, creating new session");
|
||||||
tokio::select! {
|
tokio::select! {
|
||||||
_ = term_pool.notifier.notified() => {
|
_ = term_pool.notifier.notified() => {
|
||||||
// A session was returned while we were waiting!
|
// A session was returned — release creator role and race for it.
|
||||||
// We are no longer the creator. Reset the flag and loop to race for the new session.
|
drop(creating_guard);
|
||||||
let mut guard = term_pool.is_creating.lock().await;
|
|
||||||
*guard = false;
|
|
||||||
drop(guard);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
_ = SESSION_CREATION_RATE_LIMITER.until_ready() => {
|
_ = SESSION_CREATION_RATE_LIMITER.until_ready() => {
|
||||||
// The rate limit has elapsed. It's our job to create the session.
|
|
||||||
let new_session_result = self.create_session(&term).await;
|
let new_session_result = self.create_session(&term).await;
|
||||||
|
drop(creating_guard);
|
||||||
// After creation, we are no longer the creator. Reset the flag
|
|
||||||
// and notify all other waiting tasks.
|
|
||||||
let mut guard = term_pool.is_creating.lock().await;
|
|
||||||
*guard = false;
|
|
||||||
drop(guard);
|
|
||||||
term_pool.notifier.notify_waiters();
|
|
||||||
|
|
||||||
match new_session_result {
|
match new_session_result {
|
||||||
Ok(new_session) => {
|
Ok(new_session) => {
|
||||||
@@ -366,12 +422,11 @@ impl SessionPool {
|
|||||||
"Created new session"
|
"Created new session"
|
||||||
);
|
);
|
||||||
return Ok(PooledSession {
|
return Ok(PooledSession {
|
||||||
session: Some(new_session),
|
session: ManuallyDrop::new(new_session),
|
||||||
pool: term_pool,
|
pool: term_pool,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
// Propagate the error if session creation failed.
|
|
||||||
return Err(e.context("Failed to create new session in pool"));
|
return Err(e.context("Failed to create new session in pool"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -380,8 +435,8 @@ impl SessionPool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sets up initial session cookies by making required Banner API requests
|
/// Sets up initial session cookies by making required Banner API requests.
|
||||||
pub async fn create_session(&self, term: &Term) -> Result<BannerSession> {
|
async fn create_session(&self, term: &Term) -> Result<BannerSession> {
|
||||||
info!(term = %term, "setting up banner session");
|
info!(term = %term, "setting up banner session");
|
||||||
|
|
||||||
// The 'register' or 'search' registration page
|
// The 'register' or 'search' registration page
|
||||||
@@ -392,22 +447,15 @@ impl SessionPool {
|
|||||||
.await?;
|
.await?;
|
||||||
// TODO: Validate success
|
// TODO: Validate success
|
||||||
|
|
||||||
let cookies = initial_registration
|
let cookies: HashMap<String, String> = initial_registration
|
||||||
.headers()
|
.headers()
|
||||||
.get_all("Set-Cookie")
|
.get_all("Set-Cookie")
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|header_value| {
|
.filter_map(|v| {
|
||||||
if let Ok(cookie_str) = header_value.to_str() {
|
let c = Cookie::parse(v.to_str().ok()?).ok()?;
|
||||||
if let Ok(cookie) = Cookie::parse(cookie_str) {
|
Some((c.name().to_string(), c.value().to_string()))
|
||||||
Some((cookie.name().to_string(), cookie.value().to_string()))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
.collect::<HashMap<String, String>>();
|
.collect();
|
||||||
|
|
||||||
let jsessionid = cookies
|
let jsessionid = cookies
|
||||||
.get("JSESSIONID")
|
.get("JSESSIONID")
|
||||||
@@ -494,8 +542,8 @@ impl SessionPool {
|
|||||||
Ok(terms)
|
Ok(terms)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Selects a term for the current session
|
/// Selects a term for the current session.
|
||||||
pub async fn select_term(
|
async fn select_term(
|
||||||
&self,
|
&self,
|
||||||
term: &str,
|
term: &str,
|
||||||
unique_session_id: &str,
|
unique_session_id: &str,
|
||||||
|
|||||||
+16
-3
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
use crate::banner::Course;
|
use crate::banner::Course;
|
||||||
use crate::data::models::{DbMeetingTime, UpsertCounts};
|
use crate::data::models::{DbMeetingTime, UpsertCounts};
|
||||||
|
use crate::data::names::parse_banner_name;
|
||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
use sqlx::PgConnection;
|
use sqlx::PgConnection;
|
||||||
use sqlx::PgPool;
|
use sqlx::PgPool;
|
||||||
@@ -628,6 +629,8 @@ async fn upsert_instructors(
|
|||||||
) -> Result<HashMap<String, i32>> {
|
) -> Result<HashMap<String, i32>> {
|
||||||
let mut seen = HashSet::new();
|
let mut seen = HashSet::new();
|
||||||
let mut display_names: Vec<&str> = Vec::new();
|
let mut display_names: Vec<&str> = Vec::new();
|
||||||
|
let mut first_names: Vec<Option<String>> = Vec::new();
|
||||||
|
let mut last_names: Vec<Option<String>> = Vec::new();
|
||||||
let mut emails_lower: Vec<String> = Vec::new();
|
let mut emails_lower: Vec<String> = Vec::new();
|
||||||
let mut skipped_no_email = 0u32;
|
let mut skipped_no_email = 0u32;
|
||||||
|
|
||||||
@@ -636,7 +639,10 @@ async fn upsert_instructors(
|
|||||||
if let Some(email) = &faculty.email_address {
|
if let Some(email) = &faculty.email_address {
|
||||||
let email_lower = email.to_lowercase();
|
let email_lower = email.to_lowercase();
|
||||||
if seen.insert(email_lower.clone()) {
|
if seen.insert(email_lower.clone()) {
|
||||||
|
let parts = parse_banner_name(&faculty.display_name);
|
||||||
display_names.push(faculty.display_name.as_str());
|
display_names.push(faculty.display_name.as_str());
|
||||||
|
first_names.push(parts.as_ref().map(|p| p.first.clone()));
|
||||||
|
last_names.push(parts.as_ref().map(|p| p.last.clone()));
|
||||||
emails_lower.push(email_lower);
|
emails_lower.push(email_lower);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -657,18 +663,25 @@ async fn upsert_instructors(
|
|||||||
}
|
}
|
||||||
|
|
||||||
let email_refs: Vec<&str> = emails_lower.iter().map(|s| s.as_str()).collect();
|
let email_refs: Vec<&str> = emails_lower.iter().map(|s| s.as_str()).collect();
|
||||||
|
let first_name_refs: Vec<Option<&str>> = first_names.iter().map(|s| s.as_deref()).collect();
|
||||||
|
let last_name_refs: Vec<Option<&str>> = last_names.iter().map(|s| s.as_deref()).collect();
|
||||||
|
|
||||||
let rows: Vec<(i32, String)> = sqlx::query_as(
|
let rows: Vec<(i32, String)> = sqlx::query_as(
|
||||||
r#"
|
r#"
|
||||||
INSERT INTO instructors (display_name, email)
|
INSERT INTO instructors (display_name, email, first_name, last_name)
|
||||||
SELECT * FROM UNNEST($1::text[], $2::text[])
|
SELECT * FROM UNNEST($1::text[], $2::text[], $3::text[], $4::text[])
|
||||||
ON CONFLICT (email)
|
ON CONFLICT (email)
|
||||||
DO UPDATE SET display_name = EXCLUDED.display_name
|
DO UPDATE SET
|
||||||
|
display_name = EXCLUDED.display_name,
|
||||||
|
first_name = EXCLUDED.first_name,
|
||||||
|
last_name = EXCLUDED.last_name
|
||||||
RETURNING id, email
|
RETURNING id, email
|
||||||
"#,
|
"#,
|
||||||
)
|
)
|
||||||
.bind(&display_names)
|
.bind(&display_names)
|
||||||
.bind(&email_refs)
|
.bind(&email_refs)
|
||||||
|
.bind(&first_name_refs)
|
||||||
|
.bind(&last_name_refs)
|
||||||
.fetch_all(&mut *conn)
|
.fetch_all(&mut *conn)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| anyhow::anyhow!("Failed to batch upsert instructors: {}", e))?;
|
.map_err(|e| anyhow::anyhow!("Failed to batch upsert instructors: {}", e))?;
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
pub mod batch;
|
pub mod batch;
|
||||||
pub mod courses;
|
pub mod courses;
|
||||||
pub mod models;
|
pub mod models;
|
||||||
|
pub mod names;
|
||||||
pub mod reference;
|
pub mod reference;
|
||||||
pub mod rmp;
|
pub mod rmp;
|
||||||
pub mod rmp_matching;
|
pub mod rmp_matching;
|
||||||
|
|||||||
@@ -103,6 +103,8 @@ pub struct Instructor {
|
|||||||
pub display_name: String,
|
pub display_name: String,
|
||||||
pub email: String,
|
pub email: String,
|
||||||
pub rmp_match_status: String,
|
pub rmp_match_status: String,
|
||||||
|
pub first_name: Option<String>,
|
||||||
|
pub last_name: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
|
|||||||
@@ -0,0 +1,728 @@
|
|||||||
|
//! Name parsing, normalization, and matching utilities.
|
||||||
|
//!
|
||||||
|
//! Handles the mismatch between Banner's single `display_name` ("Last, First Middle")
|
||||||
|
//! and RMP's separate `first_name`/`last_name` fields, plus data quality issues
|
||||||
|
//! from both sources (HTML entities, accents, nicknames, suffixes, junk).
|
||||||
|
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use tracing::{info, warn};
|
||||||
|
use unicode_normalization::UnicodeNormalization;
|
||||||
|
|
||||||
|
/// Known name suffixes to extract from the last-name portion.
|
||||||
|
const SUFFIXES: &[&str] = &["iv", "iii", "ii", "jr", "sr"];
|
||||||
|
|
||||||
|
/// Parsed, cleaned name components.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
pub struct NameParts {
|
||||||
|
/// Cleaned display-quality first name(s): "H. Paul", "María"
|
||||||
|
pub first: String,
|
||||||
|
/// Cleaned display-quality last name: "O'Brien", "LeBlanc"
|
||||||
|
pub last: String,
|
||||||
|
/// Middle name/initial if detected: "Manuel", "L."
|
||||||
|
pub middle: Option<String>,
|
||||||
|
/// Suffix if detected: "III", "Jr"
|
||||||
|
pub suffix: Option<String>,
|
||||||
|
/// Nicknames extracted from parentheses: ["Ken"], ["Qian"]
|
||||||
|
pub nicknames: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decode common HTML entities found in Banner data.
|
||||||
|
///
|
||||||
|
/// Handles both named entities (`&`, `ü`) and numeric references
|
||||||
|
/// (`'`, `'`).
|
||||||
|
fn decode_html_entities(s: &str) -> String {
|
||||||
|
if !s.contains('&') {
|
||||||
|
return s.to_string();
|
||||||
|
}
|
||||||
|
htmlize::unescape(s).to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract parenthesized nicknames from a name string.
|
||||||
|
///
|
||||||
|
/// `"William (Ken)"` → `("William", vec!["Ken"])`
|
||||||
|
/// `"Guenevere (Qian)"` → `("Guenevere", vec!["Qian"])`
|
||||||
|
/// `"John (jack) C."` → `("John C.", vec!["jack"])`
|
||||||
|
fn extract_nicknames(s: &str) -> (String, Vec<String>) {
|
||||||
|
let mut nicknames = Vec::new();
|
||||||
|
let mut cleaned = String::with_capacity(s.len());
|
||||||
|
let mut chars = s.chars().peekable();
|
||||||
|
|
||||||
|
while let Some(ch) = chars.next() {
|
||||||
|
if ch == '(' {
|
||||||
|
let mut nick = String::new();
|
||||||
|
for inner in chars.by_ref() {
|
||||||
|
if inner == ')' {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
nick.push(inner);
|
||||||
|
}
|
||||||
|
let nick = nick.trim().to_string();
|
||||||
|
if !nick.is_empty() {
|
||||||
|
nicknames.push(nick);
|
||||||
|
}
|
||||||
|
} else if ch == '"' || ch == '\u{201C}' || ch == '\u{201D}' {
|
||||||
|
// Extract quoted nicknames: Thomas "Butch" → nickname "Butch"
|
||||||
|
let mut nick = String::new();
|
||||||
|
for inner in chars.by_ref() {
|
||||||
|
if inner == '"' || inner == '\u{201C}' || inner == '\u{201D}' {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
nick.push(inner);
|
||||||
|
}
|
||||||
|
let nick = nick.trim().to_string();
|
||||||
|
if !nick.is_empty() {
|
||||||
|
nicknames.push(nick);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cleaned.push(ch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collapse multiple spaces left by extraction
|
||||||
|
let cleaned = collapse_whitespace(&cleaned);
|
||||||
|
(cleaned, nicknames)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract a suffix (Jr, Sr, II, III, IV) from the last-name portion.
|
||||||
|
///
|
||||||
|
/// `"LeBlanc III"` → `("LeBlanc", Some("III"))`
|
||||||
|
/// `"Smith Jr."` → `("Smith", Some("Jr."))`
|
||||||
|
fn extract_suffix(last: &str) -> (String, Option<String>) {
|
||||||
|
// Try to match the last token as a suffix
|
||||||
|
let tokens: Vec<&str> = last.split_whitespace().collect();
|
||||||
|
if tokens.len() < 2 {
|
||||||
|
return (last.to_string(), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
let candidate = tokens.last().unwrap();
|
||||||
|
let candidate_normalized = candidate.to_lowercase().trim_end_matches('.').to_string();
|
||||||
|
|
||||||
|
if SUFFIXES.contains(&candidate_normalized.as_str()) {
|
||||||
|
let name_part = tokens[..tokens.len() - 1].join(" ");
|
||||||
|
return (name_part, Some(candidate.to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
(last.to_string(), None)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Strip junk commonly found in RMP name fields.
|
||||||
|
///
|
||||||
|
/// - Trailing commas: `"Cronenberger,"` → `"Cronenberger"`
|
||||||
|
/// - Email addresses: `"Neel.Baumgardner@utsa.edu"` → `""` (returns empty)
|
||||||
|
fn strip_junk(s: &str) -> String {
|
||||||
|
let s = s.trim();
|
||||||
|
|
||||||
|
// If the string looks like an email, return empty
|
||||||
|
if s.contains('@') && s.contains('.') && !s.contains(' ') {
|
||||||
|
return String::new();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip trailing commas
|
||||||
|
s.trim_end_matches(',').trim().to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collapse runs of whitespace into single spaces and trim.
|
||||||
|
fn collapse_whitespace(s: &str) -> String {
|
||||||
|
s.split_whitespace().collect::<Vec<_>>().join(" ")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a Banner `display_name` ("Last, First Middle") into structured parts.
|
||||||
|
///
|
||||||
|
/// Handles HTML entities, suffixes, and multi-token names.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use banner::data::names::parse_banner_name;
|
||||||
|
///
|
||||||
|
/// let parts = parse_banner_name("O'Brien, Erin").unwrap();
|
||||||
|
/// assert_eq!(parts.first, "Erin");
|
||||||
|
/// assert_eq!(parts.last, "O'Brien");
|
||||||
|
/// ```
|
||||||
|
pub fn parse_banner_name(display_name: &str) -> Option<NameParts> {
|
||||||
|
// 1. Decode HTML entities
|
||||||
|
let decoded = decode_html_entities(display_name);
|
||||||
|
|
||||||
|
// 2. Split on first comma
|
||||||
|
let (last_part, first_part) = decoded.split_once(',')?;
|
||||||
|
let last_part = last_part.trim();
|
||||||
|
let first_part = first_part.trim();
|
||||||
|
|
||||||
|
if last_part.is_empty() || first_part.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Extract suffix from last name
|
||||||
|
let (last_clean, suffix) = extract_suffix(last_part);
|
||||||
|
|
||||||
|
// 4. Parse first-name portion: first token(s) + optional middle
|
||||||
|
// Banner format is "First Middle", so we keep all tokens as first_name
|
||||||
|
// to support "H. Paul" style names
|
||||||
|
let first_clean = collapse_whitespace(first_part);
|
||||||
|
|
||||||
|
Some(NameParts {
|
||||||
|
first: first_clean,
|
||||||
|
last: last_clean,
|
||||||
|
middle: None, // Banner doesn't clearly delineate middle vs first
|
||||||
|
suffix,
|
||||||
|
nicknames: Vec::new(), // Banner doesn't include nicknames
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse RMP professor name fields into structured parts.
|
||||||
|
///
|
||||||
|
/// Handles junk data, nicknames in parentheses/quotes, and suffixes.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use banner::data::names::parse_rmp_name;
|
||||||
|
///
|
||||||
|
/// let parts = parse_rmp_name("William (Ken)", "Burchenal").unwrap();
|
||||||
|
/// assert_eq!(parts.first, "William");
|
||||||
|
/// assert_eq!(parts.nicknames, vec!["Ken"]);
|
||||||
|
/// ```
|
||||||
|
pub fn parse_rmp_name(first_name: &str, last_name: &str) -> Option<NameParts> {
|
||||||
|
let first_cleaned = strip_junk(first_name);
|
||||||
|
let last_cleaned = strip_junk(last_name);
|
||||||
|
|
||||||
|
if first_cleaned.is_empty() || last_cleaned.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract nicknames from parens/quotes in first name
|
||||||
|
let (first_no_nicks, nicknames) = extract_nicknames(&first_cleaned);
|
||||||
|
let first_final = collapse_whitespace(&first_no_nicks);
|
||||||
|
|
||||||
|
// Extract suffix from last name
|
||||||
|
let (last_final, suffix) = extract_suffix(&last_cleaned);
|
||||||
|
|
||||||
|
if first_final.is_empty() || last_final.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(NameParts {
|
||||||
|
first: first_final,
|
||||||
|
last: last_final,
|
||||||
|
middle: None,
|
||||||
|
suffix,
|
||||||
|
nicknames,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Normalize a name string for matching comparison.
|
||||||
|
///
|
||||||
|
/// Pipeline: lowercase → NFD decompose → strip combining marks →
|
||||||
|
/// strip punctuation/hyphens → collapse whitespace → trim.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use banner::data::names::normalize_for_matching;
|
||||||
|
///
|
||||||
|
/// assert_eq!(normalize_for_matching("García"), "garcia");
|
||||||
|
/// assert_eq!(normalize_for_matching("O'Brien"), "obrien");
|
||||||
|
/// assert_eq!(normalize_for_matching("Aguirre-Mesa"), "aguirremesa");
|
||||||
|
/// ```
|
||||||
|
/// Normalize a name string for matching index keys.
|
||||||
|
///
|
||||||
|
/// Pipeline: lowercase → NFD decompose → strip combining marks →
|
||||||
|
/// strip ALL punctuation, hyphens, and whitespace.
|
||||||
|
///
|
||||||
|
/// This produces a compact, space-free string so that "Aguirre Mesa" (Banner)
|
||||||
|
/// and "Aguirre-Mesa" (RMP) both become "aguirremesa".
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use banner::data::names::normalize_for_matching;
|
||||||
|
///
|
||||||
|
/// assert_eq!(normalize_for_matching("García"), "garcia");
|
||||||
|
/// assert_eq!(normalize_for_matching("O'Brien"), "obrien");
|
||||||
|
/// assert_eq!(normalize_for_matching("Aguirre-Mesa"), "aguirremesa");
|
||||||
|
/// assert_eq!(normalize_for_matching("Aguirre Mesa"), "aguirremesa");
|
||||||
|
/// ```
|
||||||
|
pub fn normalize_for_matching(s: &str) -> String {
|
||||||
|
s.to_lowercase()
|
||||||
|
.nfd()
|
||||||
|
.filter(|c| {
|
||||||
|
// Keep only non-combining alphabetic characters — strip everything else
|
||||||
|
c.is_alphabetic() && !unicode_normalization::char::is_combining_mark(*c)
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate all matching index keys for a parsed name.
|
||||||
|
///
|
||||||
|
/// For a name like "H. Paul" / "LeBlanc" with no nicknames, generates:
|
||||||
|
/// - `("leblanc", "h paul")` — full normalized first
|
||||||
|
/// - `("leblanc", "paul")` — individual token (if multi-token)
|
||||||
|
/// - `("leblanc", "h")` — individual token (if multi-token)
|
||||||
|
///
|
||||||
|
/// For a name like "William" / "Burchenal" with nickname "Ken":
|
||||||
|
/// - `("burchenal", "william")` — primary
|
||||||
|
/// - `("burchenal", "ken")` — nickname variant
|
||||||
|
pub fn matching_keys(parts: &NameParts) -> Vec<(String, String)> {
|
||||||
|
let norm_last = normalize_for_matching(&parts.last);
|
||||||
|
if norm_last.is_empty() {
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut keys = Vec::new();
|
||||||
|
let mut seen = std::collections::HashSet::new();
|
||||||
|
|
||||||
|
// Primary key: full first name (all spaces stripped)
|
||||||
|
let norm_first_full = normalize_for_matching(&parts.first);
|
||||||
|
if !norm_first_full.is_empty() && seen.insert(norm_first_full.clone()) {
|
||||||
|
keys.push((norm_last.clone(), norm_first_full));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Individual tokens from the display-form first name
|
||||||
|
// (split before full normalization so we can generate per-token keys)
|
||||||
|
let first_tokens: Vec<&str> = parts.first.split_whitespace().collect();
|
||||||
|
if first_tokens.len() > 1 {
|
||||||
|
for token in &first_tokens {
|
||||||
|
let norm_token = normalize_for_matching(token);
|
||||||
|
if !norm_token.is_empty() && seen.insert(norm_token.clone()) {
|
||||||
|
keys.push((norm_last.clone(), norm_token));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Nickname variants
|
||||||
|
for nick in &parts.nicknames {
|
||||||
|
let norm_nick = normalize_for_matching(nick);
|
||||||
|
if !norm_nick.is_empty() && seen.insert(norm_nick.clone()) {
|
||||||
|
keys.push((norm_last.clone(), norm_nick));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
keys
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Backfill `first_name`/`last_name` columns for all instructors that have
|
||||||
|
/// a `display_name` but NULL structured name fields.
|
||||||
|
///
|
||||||
|
/// Parses each `display_name` using [`parse_banner_name`] and updates the row.
|
||||||
|
/// Logs warnings for any names that fail to parse.
|
||||||
|
pub async fn backfill_instructor_names(db_pool: &PgPool) -> crate::error::Result<()> {
|
||||||
|
let rows: Vec<(i32, String)> = sqlx::query_as(
|
||||||
|
"SELECT id, display_name FROM instructors WHERE first_name IS NULL OR last_name IS NULL",
|
||||||
|
)
|
||||||
|
.fetch_all(db_pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if rows.is_empty() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let total = rows.len();
|
||||||
|
let mut ids: Vec<i32> = Vec::with_capacity(total);
|
||||||
|
let mut firsts: Vec<String> = Vec::with_capacity(total);
|
||||||
|
let mut lasts: Vec<String> = Vec::with_capacity(total);
|
||||||
|
let mut unparseable = 0usize;
|
||||||
|
|
||||||
|
for (id, display_name) in &rows {
|
||||||
|
match parse_banner_name(display_name) {
|
||||||
|
Some(parts) => {
|
||||||
|
ids.push(*id);
|
||||||
|
firsts.push(parts.first);
|
||||||
|
lasts.push(parts.last);
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
warn!(
|
||||||
|
id,
|
||||||
|
display_name, "Failed to parse instructor display_name during backfill"
|
||||||
|
);
|
||||||
|
unparseable += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !ids.is_empty() {
|
||||||
|
let first_refs: Vec<&str> = firsts.iter().map(|s| s.as_str()).collect();
|
||||||
|
let last_refs: Vec<&str> = lasts.iter().map(|s| s.as_str()).collect();
|
||||||
|
|
||||||
|
sqlx::query(
|
||||||
|
r#"
|
||||||
|
UPDATE instructors i
|
||||||
|
SET first_name = v.first_name, last_name = v.last_name
|
||||||
|
FROM UNNEST($1::int4[], $2::text[], $3::text[])
|
||||||
|
AS v(id, first_name, last_name)
|
||||||
|
WHERE i.id = v.id
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(&ids)
|
||||||
|
.bind(&first_refs)
|
||||||
|
.bind(&last_refs)
|
||||||
|
.execute(db_pool)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
info!(
|
||||||
|
total,
|
||||||
|
updated = ids.len(),
|
||||||
|
unparseable,
|
||||||
|
"Instructor name backfill complete"
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// HTML entity decoding
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn decode_apostrophe_entity() {
|
||||||
|
assert_eq!(decode_html_entities("O'Brien"), "O'Brien");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn decode_umlaut_entity() {
|
||||||
|
assert_eq!(decode_html_entities("Bülent"), "Bülent");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn decode_no_entities() {
|
||||||
|
assert_eq!(decode_html_entities("Smith"), "Smith");
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// Nickname extraction
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extract_paren_nickname() {
|
||||||
|
let (cleaned, nicks) = extract_nicknames("William (Ken)");
|
||||||
|
assert_eq!(cleaned, "William");
|
||||||
|
assert_eq!(nicks, vec!["Ken"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extract_quoted_nickname() {
|
||||||
|
let (cleaned, nicks) = extract_nicknames("Thomas \"Butch\"");
|
||||||
|
assert_eq!(cleaned, "Thomas");
|
||||||
|
assert_eq!(nicks, vec!["Butch"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extract_paren_with_extra_text() {
|
||||||
|
let (cleaned, nicks) = extract_nicknames("John (jack) C.");
|
||||||
|
assert_eq!(cleaned, "John C.");
|
||||||
|
assert_eq!(nicks, vec!["jack"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extract_no_nicknames() {
|
||||||
|
let (cleaned, nicks) = extract_nicknames("Maria Elena");
|
||||||
|
assert_eq!(cleaned, "Maria Elena");
|
||||||
|
assert!(nicks.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// Suffix extraction
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extract_suffix_iii() {
|
||||||
|
let (name, suffix) = extract_suffix("LeBlanc III");
|
||||||
|
assert_eq!(name, "LeBlanc");
|
||||||
|
assert_eq!(suffix, Some("III".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extract_suffix_jr_period() {
|
||||||
|
let (name, suffix) = extract_suffix("Smith Jr.");
|
||||||
|
assert_eq!(name, "Smith");
|
||||||
|
assert_eq!(suffix, Some("Jr.".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extract_no_suffix() {
|
||||||
|
let (name, suffix) = extract_suffix("García");
|
||||||
|
assert_eq!(name, "García");
|
||||||
|
assert_eq!(suffix, None);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// Junk stripping
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn strip_trailing_comma() {
|
||||||
|
assert_eq!(strip_junk("Cronenberger,"), "Cronenberger");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn strip_email_address() {
|
||||||
|
assert_eq!(strip_junk("Neel.Baumgardner@utsa.edu"), "");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn strip_clean_name() {
|
||||||
|
assert_eq!(strip_junk(" Maria "), "Maria");
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// normalize_for_matching
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn normalize_strips_accents() {
|
||||||
|
assert_eq!(normalize_for_matching("García"), "garcia");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn normalize_strips_apostrophe() {
|
||||||
|
assert_eq!(normalize_for_matching("O'Brien"), "obrien");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn normalize_strips_hyphen() {
|
||||||
|
assert_eq!(normalize_for_matching("Aguirre-Mesa"), "aguirremesa");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn normalize_tilde_n() {
|
||||||
|
assert_eq!(normalize_for_matching("Muñoz"), "munoz");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn normalize_umlaut() {
|
||||||
|
assert_eq!(normalize_for_matching("Müller"), "muller");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn normalize_period() {
|
||||||
|
assert_eq!(normalize_for_matching("H. Paul"), "hpaul");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn normalize_strips_spaces() {
|
||||||
|
assert_eq!(normalize_for_matching("Mary Lou"), "marylou");
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// parse_banner_name
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn banner_standard_name() {
|
||||||
|
let p = parse_banner_name("Smith, John").unwrap();
|
||||||
|
assert_eq!(p.first, "John");
|
||||||
|
assert_eq!(p.last, "Smith");
|
||||||
|
assert_eq!(p.suffix, None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn banner_html_entity_apostrophe() {
|
||||||
|
let p = parse_banner_name("O'Brien, Erin").unwrap();
|
||||||
|
assert_eq!(p.first, "Erin");
|
||||||
|
assert_eq!(p.last, "O'Brien");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn banner_html_entity_umlaut() {
|
||||||
|
let p = parse_banner_name("Temel, Bülent").unwrap();
|
||||||
|
assert_eq!(p.first, "Bülent");
|
||||||
|
assert_eq!(p.last, "Temel");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn banner_suffix_iii() {
|
||||||
|
let p = parse_banner_name("LeBlanc III, H. Paul").unwrap();
|
||||||
|
assert_eq!(p.first, "H. Paul");
|
||||||
|
assert_eq!(p.last, "LeBlanc");
|
||||||
|
assert_eq!(p.suffix, Some("III".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn banner_suffix_ii() {
|
||||||
|
let p = parse_banner_name("Ellis II, Ronald").unwrap();
|
||||||
|
assert_eq!(p.first, "Ronald");
|
||||||
|
assert_eq!(p.last, "Ellis");
|
||||||
|
assert_eq!(p.suffix, Some("II".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn banner_multi_word_last() {
|
||||||
|
let p = parse_banner_name("Aguirre Mesa, Andres").unwrap();
|
||||||
|
assert_eq!(p.first, "Andres");
|
||||||
|
assert_eq!(p.last, "Aguirre Mesa");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn banner_hyphenated_last() {
|
||||||
|
let p = parse_banner_name("Abu-Lail, Nehal").unwrap();
|
||||||
|
assert_eq!(p.first, "Nehal");
|
||||||
|
assert_eq!(p.last, "Abu-Lail");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn banner_with_middle_name() {
|
||||||
|
let p = parse_banner_name("Smith, John David").unwrap();
|
||||||
|
assert_eq!(p.first, "John David");
|
||||||
|
assert_eq!(p.last, "Smith");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn banner_no_comma() {
|
||||||
|
assert!(parse_banner_name("SingleName").is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn banner_empty_first() {
|
||||||
|
assert!(parse_banner_name("Smith,").is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn banner_empty_last() {
|
||||||
|
assert!(parse_banner_name(", John").is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// parse_rmp_name
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn rmp_standard_name() {
|
||||||
|
let p = parse_rmp_name("John", "Smith").unwrap();
|
||||||
|
assert_eq!(p.first, "John");
|
||||||
|
assert_eq!(p.last, "Smith");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn rmp_with_nickname() {
|
||||||
|
let p = parse_rmp_name("William (Ken)", "Burchenal").unwrap();
|
||||||
|
assert_eq!(p.first, "William");
|
||||||
|
assert_eq!(p.nicknames, vec!["Ken"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn rmp_trailing_comma_last() {
|
||||||
|
let p = parse_rmp_name("J.", "Cronenberger,").unwrap();
|
||||||
|
assert_eq!(p.last, "Cronenberger");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn rmp_email_in_first() {
|
||||||
|
assert!(parse_rmp_name("Neel.Baumgardner@utsa.edu", "Baumgardner").is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn rmp_suffix_in_last() {
|
||||||
|
let p = parse_rmp_name("H. Paul", "LeBlanc III").unwrap();
|
||||||
|
assert_eq!(p.first, "H. Paul");
|
||||||
|
assert_eq!(p.last, "LeBlanc");
|
||||||
|
assert_eq!(p.suffix, Some("III".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn rmp_quoted_nickname() {
|
||||||
|
let p = parse_rmp_name("Thomas \"Butch\"", "Matjeka").unwrap();
|
||||||
|
assert_eq!(p.first, "Thomas");
|
||||||
|
assert_eq!(p.nicknames, vec!["Butch"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn rmp_accented_last() {
|
||||||
|
let p = parse_rmp_name("Liliana", "Saldaña").unwrap();
|
||||||
|
assert_eq!(p.last, "Saldaña");
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// matching_keys
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn keys_simple_name() {
|
||||||
|
let parts = NameParts {
|
||||||
|
first: "John".into(),
|
||||||
|
last: "Smith".into(),
|
||||||
|
middle: None,
|
||||||
|
suffix: None,
|
||||||
|
nicknames: vec![],
|
||||||
|
};
|
||||||
|
let keys = matching_keys(&parts);
|
||||||
|
assert_eq!(keys, vec![("smith".into(), "john".into())]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn keys_multi_token_first() {
|
||||||
|
let parts = NameParts {
|
||||||
|
first: "H. Paul".into(),
|
||||||
|
last: "LeBlanc".into(),
|
||||||
|
middle: None,
|
||||||
|
suffix: Some("III".into()),
|
||||||
|
nicknames: vec![],
|
||||||
|
};
|
||||||
|
let keys = matching_keys(&parts);
|
||||||
|
assert!(keys.contains(&("leblanc".into(), "hpaul".into())));
|
||||||
|
assert!(keys.contains(&("leblanc".into(), "paul".into())));
|
||||||
|
assert!(keys.contains(&("leblanc".into(), "h".into())));
|
||||||
|
assert_eq!(keys.len(), 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn keys_with_nickname() {
|
||||||
|
let parts = NameParts {
|
||||||
|
first: "William".into(),
|
||||||
|
last: "Burchenal".into(),
|
||||||
|
middle: None,
|
||||||
|
suffix: None,
|
||||||
|
nicknames: vec!["Ken".into()],
|
||||||
|
};
|
||||||
|
let keys = matching_keys(&parts);
|
||||||
|
assert!(keys.contains(&("burchenal".into(), "william".into())));
|
||||||
|
assert!(keys.contains(&("burchenal".into(), "ken".into())));
|
||||||
|
assert_eq!(keys.len(), 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn keys_hyphenated_last() {
|
||||||
|
let parts = parse_banner_name("Aguirre-Mesa, Andres").unwrap();
|
||||||
|
let keys = matching_keys(&parts);
|
||||||
|
// Hyphen removed: "aguirremesa"
|
||||||
|
assert!(keys.contains(&("aguirremesa".into(), "andres".into())));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn keys_accented_name() {
|
||||||
|
let parts = parse_rmp_name("Liliana", "Saldaña").unwrap();
|
||||||
|
let keys = matching_keys(&parts);
|
||||||
|
assert!(keys.contains(&("saldana".into(), "liliana".into())));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn keys_cross_source_match() {
|
||||||
|
// Banner: "Aguirre Mesa, Andres" → last="Aguirre Mesa"
|
||||||
|
let banner = parse_banner_name("Aguirre Mesa, Andres").unwrap();
|
||||||
|
let banner_keys = matching_keys(&banner);
|
||||||
|
|
||||||
|
// RMP: "Andres" / "Aguirre-Mesa" → last="Aguirre-Mesa"
|
||||||
|
let rmp = parse_rmp_name("Andres", "Aguirre-Mesa").unwrap();
|
||||||
|
let rmp_keys = matching_keys(&rmp);
|
||||||
|
|
||||||
|
// Both should normalize to ("aguirremesa", "andres")
|
||||||
|
assert!(banner_keys.iter().any(|k| rmp_keys.contains(k)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn keys_accent_cross_match() {
|
||||||
|
// Banner: "García, José" (if Banner ever has accents)
|
||||||
|
let banner = parse_banner_name("Garcia, Jose").unwrap();
|
||||||
|
let banner_keys = matching_keys(&banner);
|
||||||
|
|
||||||
|
// RMP: "José" / "García"
|
||||||
|
let rmp = parse_rmp_name("José", "García").unwrap();
|
||||||
|
let rmp_keys = matching_keys(&rmp);
|
||||||
|
|
||||||
|
// Both normalize to ("garcia", "jose")
|
||||||
|
assert!(banner_keys.iter().any(|k| rmp_keys.contains(k)));
|
||||||
|
}
|
||||||
|
}
|
||||||
+65
-82
@@ -91,25 +91,6 @@ pub async fn batch_upsert_rmp_professors(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Normalize a name for matching: lowercase, trim, strip trailing periods.
|
|
||||||
pub(crate) fn normalize(s: &str) -> String {
|
|
||||||
s.trim().to_lowercase().trim_end_matches('.').to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parse Banner's "Last, First Middle" display name into (last, first) tokens.
|
|
||||||
///
|
|
||||||
/// Returns `None` if the format is unparseable (no comma, empty parts).
|
|
||||||
pub(crate) fn parse_display_name(display_name: &str) -> Option<(String, String)> {
|
|
||||||
let (last_part, first_part) = display_name.split_once(',')?;
|
|
||||||
let last = normalize(last_part);
|
|
||||||
// Take only the first token of the first-name portion to drop middle names/initials.
|
|
||||||
let first = normalize(first_part.split_whitespace().next()?);
|
|
||||||
if last.is_empty() || first.is_empty() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
Some((last, first))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Retrieve RMP rating data for an instructor by instructor id.
|
/// Retrieve RMP rating data for an instructor by instructor id.
|
||||||
///
|
///
|
||||||
/// Returns `(avg_rating, num_ratings)` for the best linked RMP profile
|
/// Returns `(avg_rating, num_ratings)` for the best linked RMP profile
|
||||||
@@ -136,74 +117,76 @@ pub async fn get_instructor_rmp_data(
|
|||||||
Ok(row)
|
Ok(row)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
/// Unmatch an instructor from an RMP profile.
|
||||||
mod tests {
|
///
|
||||||
use super::*;
|
/// Removes the link from `instructor_rmp_links` and updates the instructor's
|
||||||
|
/// `rmp_match_status` to 'unmatched' if no links remain.
|
||||||
|
///
|
||||||
|
/// If `rmp_legacy_id` is `Some`, removes only that specific link.
|
||||||
|
/// If `None`, removes all links for the instructor.
|
||||||
|
pub async fn unmatch_instructor(
|
||||||
|
db_pool: &PgPool,
|
||||||
|
instructor_id: i32,
|
||||||
|
rmp_legacy_id: Option<i32>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut tx = db_pool.begin().await?;
|
||||||
|
|
||||||
#[test]
|
// Delete specific link or all links
|
||||||
fn parse_standard_name() {
|
if let Some(legacy_id) = rmp_legacy_id {
|
||||||
assert_eq!(
|
sqlx::query(
|
||||||
parse_display_name("Smith, John"),
|
"DELETE FROM instructor_rmp_links WHERE instructor_id = $1 AND rmp_legacy_id = $2",
|
||||||
Some(("smith".into(), "john".into()))
|
)
|
||||||
);
|
.bind(instructor_id)
|
||||||
|
.bind(legacy_id)
|
||||||
|
.execute(&mut *tx)
|
||||||
|
.await?;
|
||||||
|
} else {
|
||||||
|
sqlx::query("DELETE FROM instructor_rmp_links WHERE instructor_id = $1")
|
||||||
|
.bind(instructor_id)
|
||||||
|
.execute(&mut *tx)
|
||||||
|
.await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
// Check if any links remain
|
||||||
fn parse_name_with_middle() {
|
let (remaining,): (i64,) =
|
||||||
assert_eq!(
|
sqlx::query_as("SELECT COUNT(*) FROM instructor_rmp_links WHERE instructor_id = $1")
|
||||||
parse_display_name("Smith, John David"),
|
.bind(instructor_id)
|
||||||
Some(("smith".into(), "john".into()))
|
.fetch_one(&mut *tx)
|
||||||
);
|
.await?;
|
||||||
|
|
||||||
|
// Update instructor status if no links remain
|
||||||
|
if remaining == 0 {
|
||||||
|
sqlx::query("UPDATE instructors SET rmp_match_status = 'unmatched' WHERE id = $1")
|
||||||
|
.bind(instructor_id)
|
||||||
|
.execute(&mut *tx)
|
||||||
|
.await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
// Reset accepted candidates back to pending when unmatching
|
||||||
fn parse_name_with_middle_initial() {
|
// This allows the candidates to be re-matched later
|
||||||
assert_eq!(
|
if let Some(legacy_id) = rmp_legacy_id {
|
||||||
parse_display_name("Garcia, Maria L."),
|
// Reset only the specific candidate
|
||||||
Some(("garcia".into(), "maria".into()))
|
sqlx::query(
|
||||||
);
|
"UPDATE rmp_match_candidates
|
||||||
|
SET status = 'pending', resolved_at = NULL, resolved_by = NULL
|
||||||
|
WHERE instructor_id = $1 AND rmp_legacy_id = $2 AND status = 'accepted'",
|
||||||
|
)
|
||||||
|
.bind(instructor_id)
|
||||||
|
.bind(legacy_id)
|
||||||
|
.execute(&mut *tx)
|
||||||
|
.await?;
|
||||||
|
} else {
|
||||||
|
// Reset all accepted candidates for this instructor
|
||||||
|
sqlx::query(
|
||||||
|
"UPDATE rmp_match_candidates
|
||||||
|
SET status = 'pending', resolved_at = NULL, resolved_by = NULL
|
||||||
|
WHERE instructor_id = $1 AND status = 'accepted'",
|
||||||
|
)
|
||||||
|
.bind(instructor_id)
|
||||||
|
.execute(&mut *tx)
|
||||||
|
.await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
tx.commit().await?;
|
||||||
fn parse_name_with_suffix_in_last() {
|
Ok(())
|
||||||
// Banner may encode "Jr." as part of the last name.
|
|
||||||
// normalize() strips trailing periods so "Jr." becomes "jr".
|
|
||||||
assert_eq!(
|
|
||||||
parse_display_name("Smith Jr., James"),
|
|
||||||
Some(("smith jr".into(), "james".into()))
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_no_comma_returns_none() {
|
|
||||||
assert_eq!(parse_display_name("SingleName"), None);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_empty_first_returns_none() {
|
|
||||||
assert_eq!(parse_display_name("Smith,"), None);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_empty_last_returns_none() {
|
|
||||||
assert_eq!(parse_display_name(", John"), None);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_extra_whitespace() {
|
|
||||||
assert_eq!(
|
|
||||||
parse_display_name(" Doe , Jane Marie "),
|
|
||||||
Some(("doe".into(), "jane".into()))
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn normalize_trims_and_lowercases() {
|
|
||||||
assert_eq!(normalize(" FOO "), "foo");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn normalize_strips_trailing_period() {
|
|
||||||
assert_eq!(normalize("Jr."), "jr");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
+229
-47
@@ -1,6 +1,6 @@
|
|||||||
//! Confidence scoring and candidate generation for RMP instructor matching.
|
//! Confidence scoring and candidate generation for RMP instructor matching.
|
||||||
|
|
||||||
use crate::data::rmp::{normalize, parse_display_name};
|
use crate::data::names::{matching_keys, parse_banner_name, parse_rmp_name};
|
||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use sqlx::PgPool;
|
use sqlx::PgPool;
|
||||||
@@ -14,6 +14,7 @@ use tracing::{debug, info};
|
|||||||
/// Breakdown of individual scoring signals.
|
/// Breakdown of individual scoring signals.
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||||
pub struct ScoreBreakdown {
|
pub struct ScoreBreakdown {
|
||||||
|
pub name: f32,
|
||||||
pub department: f32,
|
pub department: f32,
|
||||||
pub uniqueness: f32,
|
pub uniqueness: f32,
|
||||||
pub volume: f32,
|
pub volume: f32,
|
||||||
@@ -37,12 +38,13 @@ const MIN_CANDIDATE_THRESHOLD: f32 = 0.40;
|
|||||||
const AUTO_ACCEPT_THRESHOLD: f32 = 0.85;
|
const AUTO_ACCEPT_THRESHOLD: f32 = 0.85;
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Weights
|
// Weights (must sum to 1.0)
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
const WEIGHT_DEPARTMENT: f32 = 0.50;
|
const WEIGHT_NAME: f32 = 0.50;
|
||||||
const WEIGHT_UNIQUENESS: f32 = 0.30;
|
const WEIGHT_DEPARTMENT: f32 = 0.25;
|
||||||
const WEIGHT_VOLUME: f32 = 0.20;
|
const WEIGHT_UNIQUENESS: f32 = 0.15;
|
||||||
|
const WEIGHT_VOLUME: f32 = 0.10;
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Pure scoring functions
|
// Pure scoring functions
|
||||||
@@ -78,8 +80,9 @@ fn department_similarity(subjects: &[String], rmp_department: Option<&str>) -> f
|
|||||||
/// Expand common subject abbreviations used at UTSA and check for overlap.
|
/// Expand common subject abbreviations used at UTSA and check for overlap.
|
||||||
fn matches_known_abbreviation(subject: &str, department: &str) -> bool {
|
fn matches_known_abbreviation(subject: &str, department: &str) -> bool {
|
||||||
const MAPPINGS: &[(&str, &[&str])] = &[
|
const MAPPINGS: &[(&str, &[&str])] = &[
|
||||||
|
// Core subjects (original mappings, corrected)
|
||||||
("cs", &["computer science"]),
|
("cs", &["computer science"]),
|
||||||
("ece", &["electrical", "computer engineering"]),
|
("ece", &["early childhood education", "early childhood"]),
|
||||||
("ee", &["electrical engineering", "electrical"]),
|
("ee", &["electrical engineering", "electrical"]),
|
||||||
("me", &["mechanical engineering", "mechanical"]),
|
("me", &["mechanical engineering", "mechanical"]),
|
||||||
("ce", &["civil engineering", "civil"]),
|
("ce", &["civil engineering", "civil"]),
|
||||||
@@ -105,6 +108,85 @@ fn matches_known_abbreviation(subject: &str, department: &str) -> bool {
|
|||||||
("ms", &["management science"]),
|
("ms", &["management science"]),
|
||||||
("kin", &["kinesiology"]),
|
("kin", &["kinesiology"]),
|
||||||
("com", &["communication"]),
|
("com", &["communication"]),
|
||||||
|
// Architecture & Design
|
||||||
|
("arc", &["architecture"]),
|
||||||
|
("ide", &["interior design", "design"]),
|
||||||
|
// Anthropology & Ethnic Studies
|
||||||
|
("ant", &["anthropology"]),
|
||||||
|
("aas", &["african american studies", "ethnic studies"]),
|
||||||
|
("mas", &["mexican american studies", "ethnic studies"]),
|
||||||
|
("regs", &["ethnic studies", "gender"]),
|
||||||
|
// Languages
|
||||||
|
("lng", &["linguistics", "applied linguistics"]),
|
||||||
|
("spn", &["spanish"]),
|
||||||
|
("frn", &["french"]),
|
||||||
|
("ger", &["german"]),
|
||||||
|
("chn", &["chinese"]),
|
||||||
|
("jpn", &["japanese"]),
|
||||||
|
("kor", &["korean"]),
|
||||||
|
("itl", &["italian"]),
|
||||||
|
("rus", &["russian"]),
|
||||||
|
("lat", &["latin"]),
|
||||||
|
("grk", &["greek"]),
|
||||||
|
("asl", &["american sign language", "sign language"]),
|
||||||
|
(
|
||||||
|
"fl",
|
||||||
|
&["foreign languages", "languages", "modern languages"],
|
||||||
|
),
|
||||||
|
// Education
|
||||||
|
("edu", &["education"]),
|
||||||
|
("ci", &["curriculum", "education"]),
|
||||||
|
("edl", &["educational leadership", "education"]),
|
||||||
|
("edp", &["educational psychology", "education"]),
|
||||||
|
("bbl", &["bilingual education"]),
|
||||||
|
("spe", &["special education", "education"]),
|
||||||
|
// Business
|
||||||
|
("ent", &["entrepreneurship"]),
|
||||||
|
("gba", &["general business", "business"]),
|
||||||
|
("blw", &["business law", "law"]),
|
||||||
|
("rfd", &["real estate"]),
|
||||||
|
("mot", &["management of technology", "management"]),
|
||||||
|
// Engineering
|
||||||
|
("egr", &["engineering"]),
|
||||||
|
("bme", &["biomedical engineering", "engineering"]),
|
||||||
|
("cme", &["chemical engineering", "engineering"]),
|
||||||
|
("cpe", &["computer engineering", "engineering"]),
|
||||||
|
("ise", &["industrial", "systems engineering", "engineering"]),
|
||||||
|
("mate", &["materials engineering", "engineering"]),
|
||||||
|
// Sciences
|
||||||
|
("che", &["chemistry"]),
|
||||||
|
("bch", &["biochemistry", "chemistry"]),
|
||||||
|
("geo", &["geology"]),
|
||||||
|
("phy", &["physics"]),
|
||||||
|
("ast", &["astronomy"]),
|
||||||
|
("es", &["environmental science"]),
|
||||||
|
// Social Sciences
|
||||||
|
("crj", &["criminal justice"]),
|
||||||
|
("swk", &["social work"]),
|
||||||
|
("pad", &["public administration"]),
|
||||||
|
("grg", &["geography"]),
|
||||||
|
("ges", &["geography"]),
|
||||||
|
// Humanities
|
||||||
|
("cla", &["classics"]),
|
||||||
|
("hum", &["humanities"]),
|
||||||
|
("wgss", &["women's studies"]),
|
||||||
|
// Health
|
||||||
|
("hth", &["health"]),
|
||||||
|
("hcp", &["health science", "health"]),
|
||||||
|
("ntr", &["nutrition"]),
|
||||||
|
// Military
|
||||||
|
("msc", &["military science"]),
|
||||||
|
("asc", &["aerospace"]),
|
||||||
|
// Arts
|
||||||
|
("dan", &["dance"]),
|
||||||
|
("thr", &["theater"]),
|
||||||
|
("ahc", &["art history"]),
|
||||||
|
// Other
|
||||||
|
("cou", &["counseling"]),
|
||||||
|
("hon", &["honors"]),
|
||||||
|
("csm", &["construction"]),
|
||||||
|
("wrc", &["writing"]),
|
||||||
|
("set", &["tourism management", "tourism"]),
|
||||||
];
|
];
|
||||||
|
|
||||||
for &(abbr, expansions) in MAPPINGS {
|
for &(abbr, expansions) in MAPPINGS {
|
||||||
@@ -119,35 +201,39 @@ fn matches_known_abbreviation(subject: &str, department: &str) -> bool {
|
|||||||
|
|
||||||
/// Compute match confidence score (0.0–1.0) for an instructor–RMP pair.
|
/// Compute match confidence score (0.0–1.0) for an instructor–RMP pair.
|
||||||
///
|
///
|
||||||
/// Name matching is handled by the caller via pre-filtering on exact
|
/// The name signal is always 1.0 since candidates are only generated for
|
||||||
/// normalized `(last, first)`, so only department, uniqueness, and volume
|
/// exact normalized name matches. The effective score range is 0.50–1.0.
|
||||||
/// signals are scored here.
|
|
||||||
pub fn compute_match_score(
|
pub fn compute_match_score(
|
||||||
instructor_subjects: &[String],
|
instructor_subjects: &[String],
|
||||||
rmp_department: Option<&str>,
|
rmp_department: Option<&str>,
|
||||||
candidate_count: usize,
|
candidate_count: usize,
|
||||||
rmp_num_ratings: i32,
|
rmp_num_ratings: i32,
|
||||||
) -> MatchScore {
|
) -> MatchScore {
|
||||||
// --- Department (0.50) ---
|
// --- Name (0.50) — always 1.0, candidates only exist for exact matches ---
|
||||||
|
let name_score = 1.0;
|
||||||
|
|
||||||
|
// --- Department (0.25) ---
|
||||||
let dept_score = department_similarity(instructor_subjects, rmp_department);
|
let dept_score = department_similarity(instructor_subjects, rmp_department);
|
||||||
|
|
||||||
// --- Uniqueness (0.30) ---
|
// --- Uniqueness (0.15) ---
|
||||||
let uniqueness_score = match candidate_count {
|
let uniqueness_score = match candidate_count {
|
||||||
0 | 1 => 1.0,
|
0 | 1 => 1.0,
|
||||||
2 => 0.5,
|
2 => 0.5,
|
||||||
_ => 0.2,
|
_ => 0.2,
|
||||||
};
|
};
|
||||||
|
|
||||||
// --- Volume (0.20) ---
|
// --- Volume (0.10) ---
|
||||||
let volume_score = ((rmp_num_ratings as f32).ln_1p() / 5.0_f32.ln_1p()).clamp(0.0, 1.0);
|
let volume_score = ((rmp_num_ratings as f32).ln_1p() / 5.0_f32.ln_1p()).clamp(0.0, 1.0);
|
||||||
|
|
||||||
let composite = dept_score * WEIGHT_DEPARTMENT
|
let composite = name_score * WEIGHT_NAME
|
||||||
|
+ dept_score * WEIGHT_DEPARTMENT
|
||||||
+ uniqueness_score * WEIGHT_UNIQUENESS
|
+ uniqueness_score * WEIGHT_UNIQUENESS
|
||||||
+ volume_score * WEIGHT_VOLUME;
|
+ volume_score * WEIGHT_VOLUME;
|
||||||
|
|
||||||
MatchScore {
|
MatchScore {
|
||||||
score: composite,
|
score: composite,
|
||||||
breakdown: ScoreBreakdown {
|
breakdown: ScoreBreakdown {
|
||||||
|
name: name_score,
|
||||||
department: dept_score,
|
department: dept_score,
|
||||||
uniqueness: uniqueness_score,
|
uniqueness: uniqueness_score,
|
||||||
volume: volume_score,
|
volume: volume_score,
|
||||||
@@ -164,6 +250,7 @@ pub fn compute_match_score(
|
|||||||
pub struct MatchingStats {
|
pub struct MatchingStats {
|
||||||
pub total_unmatched: usize,
|
pub total_unmatched: usize,
|
||||||
pub candidates_created: usize,
|
pub candidates_created: usize,
|
||||||
|
pub candidates_rescored: usize,
|
||||||
pub auto_matched: usize,
|
pub auto_matched: usize,
|
||||||
pub skipped_unparseable: usize,
|
pub skipped_unparseable: usize,
|
||||||
pub skipped_no_candidates: usize,
|
pub skipped_no_candidates: usize,
|
||||||
@@ -179,8 +266,8 @@ struct RmpProfForMatching {
|
|||||||
/// Generate match candidates for all unmatched instructors.
|
/// Generate match candidates for all unmatched instructors.
|
||||||
///
|
///
|
||||||
/// For each unmatched instructor:
|
/// For each unmatched instructor:
|
||||||
/// 1. Parse `display_name` into (last, first).
|
/// 1. Parse `display_name` into [`NameParts`] and generate matching keys.
|
||||||
/// 2. Find RMP professors with matching normalized name.
|
/// 2. Find RMP professors with matching normalized name keys.
|
||||||
/// 3. Score each candidate.
|
/// 3. Score each candidate.
|
||||||
/// 4. Store candidates scoring above [`MIN_CANDIDATE_THRESHOLD`].
|
/// 4. Store candidates scoring above [`MIN_CANDIDATE_THRESHOLD`].
|
||||||
/// 5. Auto-accept if the top candidate scores ≥ [`AUTO_ACCEPT_THRESHOLD`]
|
/// 5. Auto-accept if the top candidate scores ≥ [`AUTO_ACCEPT_THRESHOLD`]
|
||||||
@@ -200,6 +287,7 @@ pub async fn generate_candidates(db_pool: &PgPool) -> Result<MatchingStats> {
|
|||||||
return Ok(MatchingStats {
|
return Ok(MatchingStats {
|
||||||
total_unmatched: 0,
|
total_unmatched: 0,
|
||||||
candidates_created: 0,
|
candidates_created: 0,
|
||||||
|
candidates_rescored: 0,
|
||||||
auto_matched: 0,
|
auto_matched: 0,
|
||||||
skipped_unparseable: 0,
|
skipped_unparseable: 0,
|
||||||
skipped_no_candidates: 0,
|
skipped_no_candidates: 0,
|
||||||
@@ -227,7 +315,7 @@ pub async fn generate_candidates(db_pool: &PgPool) -> Result<MatchingStats> {
|
|||||||
subject_map.entry(iid).or_default().push(subject);
|
subject_map.entry(iid).or_default().push(subject);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. Load all RMP professors
|
// 3. Load all RMP professors and build multi-key name index
|
||||||
let prof_rows: Vec<(i32, String, String, Option<String>, i32)> = sqlx::query_as(
|
let prof_rows: Vec<(i32, String, String, Option<String>, i32)> = sqlx::query_as(
|
||||||
"SELECT legacy_id, first_name, last_name, department, num_ratings FROM rmp_professors",
|
"SELECT legacy_id, first_name, last_name, department, num_ratings FROM rmp_professors",
|
||||||
)
|
)
|
||||||
@@ -235,40 +323,72 @@ pub async fn generate_candidates(db_pool: &PgPool) -> Result<MatchingStats> {
|
|||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
// Build name index: (normalized_last, normalized_first) -> Vec<RmpProfForMatching>
|
// Build name index: (normalized_last, normalized_first) -> Vec<RmpProfForMatching>
|
||||||
|
// Each professor may appear under multiple keys (nicknames, token variants).
|
||||||
let mut name_index: HashMap<(String, String), Vec<RmpProfForMatching>> = HashMap::new();
|
let mut name_index: HashMap<(String, String), Vec<RmpProfForMatching>> = HashMap::new();
|
||||||
for (legacy_id, first_name, last_name, department, num_ratings) in prof_rows {
|
let mut rmp_parse_failures = 0usize;
|
||||||
let key = (normalize(&last_name), normalize(&first_name));
|
for (legacy_id, first_name, last_name, department, num_ratings) in &prof_rows {
|
||||||
name_index.entry(key).or_default().push(RmpProfForMatching {
|
match parse_rmp_name(first_name, last_name) {
|
||||||
legacy_id,
|
Some(parts) => {
|
||||||
department,
|
let keys = matching_keys(&parts);
|
||||||
num_ratings,
|
for key in keys {
|
||||||
});
|
name_index.entry(key).or_default().push(RmpProfForMatching {
|
||||||
|
legacy_id: *legacy_id,
|
||||||
|
department: department.clone(),
|
||||||
|
num_ratings: *num_ratings,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
rmp_parse_failures += 1;
|
||||||
|
debug!(
|
||||||
|
legacy_id,
|
||||||
|
first_name, last_name, "Unparseable RMP professor name, skipping"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 4. Load existing candidate pairs (and rejected subset) in a single query
|
if rmp_parse_failures > 0 {
|
||||||
|
debug!(
|
||||||
|
count = rmp_parse_failures,
|
||||||
|
"RMP professors with unparseable names"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Load existing candidate pairs — only skip resolved (accepted/rejected) pairs.
|
||||||
|
// Pending candidates are rescored so updated mappings take effect.
|
||||||
let candidate_rows: Vec<(i32, i32, String)> =
|
let candidate_rows: Vec<(i32, i32, String)> =
|
||||||
sqlx::query_as("SELECT instructor_id, rmp_legacy_id, status FROM rmp_match_candidates")
|
sqlx::query_as("SELECT instructor_id, rmp_legacy_id, status FROM rmp_match_candidates")
|
||||||
.fetch_all(db_pool)
|
.fetch_all(db_pool)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let mut existing_pairs: HashSet<(i32, i32)> = HashSet::with_capacity(candidate_rows.len());
|
let mut resolved_pairs: HashSet<(i32, i32)> = HashSet::new();
|
||||||
|
let mut pending_pairs: HashSet<(i32, i32)> = HashSet::new();
|
||||||
let mut rejected_pairs: HashSet<(i32, i32)> = HashSet::new();
|
let mut rejected_pairs: HashSet<(i32, i32)> = HashSet::new();
|
||||||
for (iid, lid, status) in candidate_rows {
|
for (iid, lid, status) in candidate_rows {
|
||||||
existing_pairs.insert((iid, lid));
|
match status.as_str() {
|
||||||
if status == "rejected" {
|
"accepted" | "rejected" => {
|
||||||
rejected_pairs.insert((iid, lid));
|
resolved_pairs.insert((iid, lid));
|
||||||
|
if status == "rejected" {
|
||||||
|
rejected_pairs.insert((iid, lid));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
pending_pairs.insert((iid, lid));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 5. Score and collect candidates
|
// 5. Score and collect candidates (new + rescored pending)
|
||||||
let empty_subjects: Vec<String> = Vec::new();
|
let empty_subjects: Vec<String> = Vec::new();
|
||||||
let mut candidates: Vec<(i32, i32, f32, serde_json::Value)> = Vec::new();
|
let mut new_candidates: Vec<(i32, i32, f32, serde_json::Value)> = Vec::new();
|
||||||
|
let mut rescored_candidates: Vec<(i32, i32, f32, serde_json::Value)> = Vec::new();
|
||||||
let mut auto_accept: Vec<(i32, i32)> = Vec::new(); // (instructor_id, legacy_id)
|
let mut auto_accept: Vec<(i32, i32)> = Vec::new(); // (instructor_id, legacy_id)
|
||||||
let mut skipped_unparseable = 0usize;
|
let mut skipped_unparseable = 0usize;
|
||||||
let mut skipped_no_candidates = 0usize;
|
let mut skipped_no_candidates = 0usize;
|
||||||
|
|
||||||
for (instructor_id, display_name) in &instructors {
|
for (instructor_id, display_name) in &instructors {
|
||||||
let Some((norm_last, norm_first)) = parse_display_name(display_name) else {
|
let Some(instructor_parts) = parse_banner_name(display_name) else {
|
||||||
skipped_unparseable += 1;
|
skipped_unparseable += 1;
|
||||||
debug!(
|
debug!(
|
||||||
instructor_id,
|
instructor_id,
|
||||||
@@ -279,18 +399,33 @@ pub async fn generate_candidates(db_pool: &PgPool) -> Result<MatchingStats> {
|
|||||||
|
|
||||||
let subjects = subject_map.get(instructor_id).unwrap_or(&empty_subjects);
|
let subjects = subject_map.get(instructor_id).unwrap_or(&empty_subjects);
|
||||||
|
|
||||||
let key = (norm_last.clone(), norm_first.clone());
|
// Generate all matching keys for this instructor and collect candidate
|
||||||
let Some(rmp_candidates) = name_index.get(&key) else {
|
// RMP professors across all key variants (deduplicated by legacy_id).
|
||||||
|
let instructor_keys = matching_keys(&instructor_parts);
|
||||||
|
let mut seen_profs: HashSet<i32> = HashSet::new();
|
||||||
|
let mut matched_profs: Vec<&RmpProfForMatching> = Vec::new();
|
||||||
|
|
||||||
|
for key in &instructor_keys {
|
||||||
|
if let Some(profs) = name_index.get(key) {
|
||||||
|
for prof in profs {
|
||||||
|
if seen_profs.insert(prof.legacy_id) {
|
||||||
|
matched_profs.push(prof);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if matched_profs.is_empty() {
|
||||||
skipped_no_candidates += 1;
|
skipped_no_candidates += 1;
|
||||||
continue;
|
continue;
|
||||||
};
|
}
|
||||||
|
|
||||||
let candidate_count = rmp_candidates.len();
|
let candidate_count = matched_profs.len();
|
||||||
let mut best: Option<(f32, i32)> = None;
|
let mut best: Option<(f32, i32)> = None;
|
||||||
|
|
||||||
for prof in rmp_candidates {
|
for prof in &matched_profs {
|
||||||
let pair = (*instructor_id, prof.legacy_id);
|
let pair = (*instructor_id, prof.legacy_id);
|
||||||
if existing_pairs.contains(&pair) {
|
if resolved_pairs.contains(&pair) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -308,7 +443,16 @@ pub async fn generate_candidates(db_pool: &PgPool) -> Result<MatchingStats> {
|
|||||||
let breakdown_json =
|
let breakdown_json =
|
||||||
serde_json::to_value(&ms.breakdown).unwrap_or_else(|_| serde_json::json!({}));
|
serde_json::to_value(&ms.breakdown).unwrap_or_else(|_| serde_json::json!({}));
|
||||||
|
|
||||||
candidates.push((*instructor_id, prof.legacy_id, ms.score, breakdown_json));
|
if pending_pairs.contains(&pair) {
|
||||||
|
rescored_candidates.push((
|
||||||
|
*instructor_id,
|
||||||
|
prof.legacy_id,
|
||||||
|
ms.score,
|
||||||
|
breakdown_json,
|
||||||
|
));
|
||||||
|
} else {
|
||||||
|
new_candidates.push((*instructor_id, prof.legacy_id, ms.score, breakdown_json));
|
||||||
|
}
|
||||||
|
|
||||||
match best {
|
match best {
|
||||||
Some((s, _)) if ms.score > s => best = Some((ms.score, prof.legacy_id)),
|
Some((s, _)) if ms.score > s => best = Some((ms.score, prof.legacy_id)),
|
||||||
@@ -327,19 +471,20 @@ pub async fn generate_candidates(db_pool: &PgPool) -> Result<MatchingStats> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 6–7. Write candidates and auto-accept within a single transaction
|
// 6–7. Write candidates, rescore, and auto-accept within a single transaction
|
||||||
let candidates_created = candidates.len();
|
let candidates_created = new_candidates.len();
|
||||||
|
let candidates_rescored = rescored_candidates.len();
|
||||||
let auto_matched = auto_accept.len();
|
let auto_matched = auto_accept.len();
|
||||||
|
|
||||||
let mut tx = db_pool.begin().await?;
|
let mut tx = db_pool.begin().await?;
|
||||||
|
|
||||||
// 6. Batch-insert candidates
|
// 6a. Batch-insert new candidates
|
||||||
if !candidates.is_empty() {
|
if !new_candidates.is_empty() {
|
||||||
let c_instructor_ids: Vec<i32> = candidates.iter().map(|(iid, _, _, _)| *iid).collect();
|
let c_instructor_ids: Vec<i32> = new_candidates.iter().map(|(iid, _, _, _)| *iid).collect();
|
||||||
let c_legacy_ids: Vec<i32> = candidates.iter().map(|(_, lid, _, _)| *lid).collect();
|
let c_legacy_ids: Vec<i32> = new_candidates.iter().map(|(_, lid, _, _)| *lid).collect();
|
||||||
let c_scores: Vec<f32> = candidates.iter().map(|(_, _, s, _)| *s).collect();
|
let c_scores: Vec<f32> = new_candidates.iter().map(|(_, _, s, _)| *s).collect();
|
||||||
let c_breakdowns: Vec<serde_json::Value> =
|
let c_breakdowns: Vec<serde_json::Value> =
|
||||||
candidates.into_iter().map(|(_, _, _, b)| b).collect();
|
new_candidates.into_iter().map(|(_, _, _, b)| b).collect();
|
||||||
|
|
||||||
sqlx::query(
|
sqlx::query(
|
||||||
r#"
|
r#"
|
||||||
@@ -358,6 +503,40 @@ pub async fn generate_candidates(db_pool: &PgPool) -> Result<MatchingStats> {
|
|||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 6b. Batch-update rescored pending candidates
|
||||||
|
if !rescored_candidates.is_empty() {
|
||||||
|
let r_instructor_ids: Vec<i32> = rescored_candidates
|
||||||
|
.iter()
|
||||||
|
.map(|(iid, _, _, _)| *iid)
|
||||||
|
.collect();
|
||||||
|
let r_legacy_ids: Vec<i32> = rescored_candidates
|
||||||
|
.iter()
|
||||||
|
.map(|(_, lid, _, _)| *lid)
|
||||||
|
.collect();
|
||||||
|
let r_scores: Vec<f32> = rescored_candidates.iter().map(|(_, _, s, _)| *s).collect();
|
||||||
|
let r_breakdowns: Vec<serde_json::Value> = rescored_candidates
|
||||||
|
.into_iter()
|
||||||
|
.map(|(_, _, _, b)| b)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
sqlx::query(
|
||||||
|
r#"
|
||||||
|
UPDATE rmp_match_candidates mc
|
||||||
|
SET score = v.score, score_breakdown = v.score_breakdown
|
||||||
|
FROM UNNEST($1::int4[], $2::int4[], $3::real[], $4::jsonb[])
|
||||||
|
AS v(instructor_id, rmp_legacy_id, score, score_breakdown)
|
||||||
|
WHERE mc.instructor_id = v.instructor_id
|
||||||
|
AND mc.rmp_legacy_id = v.rmp_legacy_id
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(&r_instructor_ids)
|
||||||
|
.bind(&r_legacy_ids)
|
||||||
|
.bind(&r_scores)
|
||||||
|
.bind(&r_breakdowns)
|
||||||
|
.execute(&mut *tx)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
// 7. Auto-accept top candidates
|
// 7. Auto-accept top candidates
|
||||||
if !auto_accept.is_empty() {
|
if !auto_accept.is_empty() {
|
||||||
let aa_instructor_ids: Vec<i32> = auto_accept.iter().map(|(iid, _)| *iid).collect();
|
let aa_instructor_ids: Vec<i32> = auto_accept.iter().map(|(iid, _)| *iid).collect();
|
||||||
@@ -411,6 +590,7 @@ pub async fn generate_candidates(db_pool: &PgPool) -> Result<MatchingStats> {
|
|||||||
let stats = MatchingStats {
|
let stats = MatchingStats {
|
||||||
total_unmatched,
|
total_unmatched,
|
||||||
candidates_created,
|
candidates_created,
|
||||||
|
candidates_rescored,
|
||||||
auto_matched,
|
auto_matched,
|
||||||
skipped_unparseable,
|
skipped_unparseable,
|
||||||
skipped_no_candidates,
|
skipped_no_candidates,
|
||||||
@@ -419,6 +599,7 @@ pub async fn generate_candidates(db_pool: &PgPool) -> Result<MatchingStats> {
|
|||||||
info!(
|
info!(
|
||||||
total_unmatched = stats.total_unmatched,
|
total_unmatched = stats.total_unmatched,
|
||||||
candidates_created = stats.candidates_created,
|
candidates_created = stats.candidates_created,
|
||||||
|
candidates_rescored = stats.candidates_rescored,
|
||||||
auto_matched = stats.auto_matched,
|
auto_matched = stats.auto_matched,
|
||||||
skipped_unparseable = stats.skipped_unparseable,
|
skipped_unparseable = stats.skipped_unparseable,
|
||||||
skipped_no_candidates = stats.skipped_no_candidates,
|
skipped_no_candidates = stats.skipped_no_candidates,
|
||||||
@@ -444,8 +625,9 @@ mod tests {
|
|||||||
1, // unique candidate
|
1, // unique candidate
|
||||||
50, // decent ratings
|
50, // decent ratings
|
||||||
);
|
);
|
||||||
// dept 1.0*0.50 + unique 1.0*0.30 + volume ~0.97*0.20 ≈ 0.99
|
// name 1.0*0.50 + dept 1.0*0.25 + unique 1.0*0.15 + volume ~0.97*0.10 ≈ 0.997
|
||||||
assert!(ms.score >= 0.85, "Expected score >= 0.85, got {}", ms.score);
|
assert!(ms.score >= 0.85, "Expected score >= 0.85, got {}", ms.score);
|
||||||
|
assert_eq!(ms.breakdown.name, 1.0);
|
||||||
assert_eq!(ms.breakdown.uniqueness, 1.0);
|
assert_eq!(ms.breakdown.uniqueness, 1.0);
|
||||||
assert_eq!(ms.breakdown.department, 1.0);
|
assert_eq!(ms.breakdown.department, 1.0);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -310,6 +310,7 @@ impl Scheduler {
|
|||||||
total,
|
total,
|
||||||
stats.total_unmatched,
|
stats.total_unmatched,
|
||||||
stats.candidates_created,
|
stats.candidates_created,
|
||||||
|
stats.candidates_rescored,
|
||||||
stats.auto_matched,
|
stats.auto_matched,
|
||||||
stats.skipped_unparseable,
|
stats.skipped_unparseable,
|
||||||
stats.skipped_no_candidates,
|
stats.skipped_no_candidates,
|
||||||
|
|||||||
+12
-50
@@ -180,6 +180,7 @@ pub struct InstructorDetailResponse {
|
|||||||
pub struct RescoreResponse {
|
pub struct RescoreResponse {
|
||||||
pub total_unmatched: usize,
|
pub total_unmatched: usize,
|
||||||
pub candidates_created: usize,
|
pub candidates_created: usize,
|
||||||
|
pub candidates_rescored: usize,
|
||||||
pub auto_matched: usize,
|
pub auto_matched: usize,
|
||||||
pub skipped_unparseable: usize,
|
pub skipped_unparseable: usize,
|
||||||
pub skipped_no_candidates: usize,
|
pub skipped_no_candidates: usize,
|
||||||
@@ -768,16 +769,10 @@ pub async fn unmatch_instructor(
|
|||||||
) -> Result<Json<OkResponse>, (StatusCode, Json<Value>)> {
|
) -> Result<Json<OkResponse>, (StatusCode, Json<Value>)> {
|
||||||
let rmp_legacy_id = body.and_then(|b| b.rmp_legacy_id);
|
let rmp_legacy_id = body.and_then(|b| b.rmp_legacy_id);
|
||||||
|
|
||||||
let mut tx = state
|
|
||||||
.db_pool
|
|
||||||
.begin()
|
|
||||||
.await
|
|
||||||
.map_err(|e| db_error("failed to begin transaction", e))?;
|
|
||||||
|
|
||||||
// Verify instructor exists
|
// Verify instructor exists
|
||||||
let exists: Option<(i32,)> = sqlx::query_as("SELECT id FROM instructors WHERE id = $1")
|
let exists: Option<(i32,)> = sqlx::query_as("SELECT id FROM instructors WHERE id = $1")
|
||||||
.bind(id)
|
.bind(id)
|
||||||
.fetch_optional(&mut *tx)
|
.fetch_optional(&state.db_pool)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| db_error("failed to check instructor", e))?;
|
.map_err(|e| db_error("failed to check instructor", e))?;
|
||||||
|
|
||||||
@@ -788,50 +783,16 @@ pub async fn unmatch_instructor(
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete specific link or all links
|
// Use the data layer function to perform the unmatch
|
||||||
if let Some(legacy_id) = rmp_legacy_id {
|
crate::data::rmp::unmatch_instructor(&state.db_pool, id, rmp_legacy_id)
|
||||||
let result = sqlx::query(
|
|
||||||
"DELETE FROM instructor_rmp_links WHERE instructor_id = $1 AND rmp_legacy_id = $2",
|
|
||||||
)
|
|
||||||
.bind(id)
|
|
||||||
.bind(legacy_id)
|
|
||||||
.execute(&mut *tx)
|
|
||||||
.await
|
.await
|
||||||
.map_err(|e| db_error("failed to remove rmp link", e))?;
|
.map_err(|e| {
|
||||||
|
tracing::error!(error = %e, "failed to unmatch instructor");
|
||||||
if result.rows_affected() == 0 {
|
(
|
||||||
return Err((
|
StatusCode::INTERNAL_SERVER_ERROR,
|
||||||
StatusCode::NOT_FOUND,
|
Json(json!({"error": "failed to unmatch instructor"})),
|
||||||
Json(json!({"error": "link not found for this instructor"})),
|
)
|
||||||
));
|
})?;
|
||||||
}
|
|
||||||
} else {
|
|
||||||
sqlx::query("DELETE FROM instructor_rmp_links WHERE instructor_id = $1")
|
|
||||||
.bind(id)
|
|
||||||
.execute(&mut *tx)
|
|
||||||
.await
|
|
||||||
.map_err(|e| db_error("failed to remove rmp links", e))?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if any links remain; update status accordingly
|
|
||||||
let (remaining,): (i64,) =
|
|
||||||
sqlx::query_as("SELECT COUNT(*) FROM instructor_rmp_links WHERE instructor_id = $1")
|
|
||||||
.bind(id)
|
|
||||||
.fetch_one(&mut *tx)
|
|
||||||
.await
|
|
||||||
.map_err(|e| db_error("failed to count remaining links", e))?;
|
|
||||||
|
|
||||||
if remaining == 0 {
|
|
||||||
sqlx::query("UPDATE instructors SET rmp_match_status = 'unmatched' WHERE id = $1")
|
|
||||||
.bind(id)
|
|
||||||
.execute(&mut *tx)
|
|
||||||
.await
|
|
||||||
.map_err(|e| db_error("failed to update instructor status", e))?;
|
|
||||||
}
|
|
||||||
|
|
||||||
tx.commit()
|
|
||||||
.await
|
|
||||||
.map_err(|e| db_error("failed to commit transaction", e))?;
|
|
||||||
|
|
||||||
Ok(Json(OkResponse { ok: true }))
|
Ok(Json(OkResponse { ok: true }))
|
||||||
}
|
}
|
||||||
@@ -858,6 +819,7 @@ pub async fn rescore(
|
|||||||
Ok(Json(RescoreResponse {
|
Ok(Json(RescoreResponse {
|
||||||
total_unmatched: stats.total_unmatched,
|
total_unmatched: stats.total_unmatched,
|
||||||
candidates_created: stats.candidates_created,
|
candidates_created: stats.candidates_created,
|
||||||
|
candidates_rescored: stats.candidates_rescored,
|
||||||
auto_matched: stats.auto_matched,
|
auto_matched: stats.auto_matched,
|
||||||
skipped_unparseable: stats.skipped_unparseable,
|
skipped_unparseable: stats.skipped_unparseable,
|
||||||
skipped_no_candidates: stats.skipped_no_candidates,
|
skipped_no_candidates: stats.skipped_no_candidates,
|
||||||
|
|||||||
@@ -0,0 +1,103 @@
|
|||||||
|
#[allow(dead_code)]
|
||||||
|
mod helpers;
|
||||||
|
|
||||||
|
use banner::data::rmp::unmatch_instructor;
|
||||||
|
use sqlx::PgPool;
|
||||||
|
|
||||||
|
/// Test that unmatching an instructor resets accepted candidates back to pending.
|
||||||
|
///
|
||||||
|
/// When a user unmatches an instructor, accepted candidates should be reset to
|
||||||
|
/// 'pending' so they can be re-matched later. This prevents the bug where
|
||||||
|
/// candidates remain 'accepted' but have no corresponding link.
|
||||||
|
#[sqlx::test]
|
||||||
|
async fn unmatch_resets_accepted_candidates_to_pending(pool: PgPool) {
|
||||||
|
// ARRANGE: Create an instructor
|
||||||
|
let (instructor_id,): (i32,) = sqlx::query_as(
|
||||||
|
"INSERT INTO instructors (display_name, email)
|
||||||
|
VALUES ('Test, Instructor', 'test@utsa.edu')
|
||||||
|
RETURNING id",
|
||||||
|
)
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.expect("failed to create instructor");
|
||||||
|
|
||||||
|
// ARRANGE: Create an RMP professor
|
||||||
|
let (rmp_legacy_id,): (i32,) = sqlx::query_as(
|
||||||
|
"INSERT INTO rmp_professors (legacy_id, graphql_id, first_name, last_name, num_ratings)
|
||||||
|
VALUES (9999999, 'test-graphql-id', 'Test', 'Professor', 10)
|
||||||
|
RETURNING legacy_id",
|
||||||
|
)
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.expect("failed to create rmp professor");
|
||||||
|
|
||||||
|
// ARRANGE: Create a match candidate with 'accepted' status
|
||||||
|
sqlx::query(
|
||||||
|
"INSERT INTO rmp_match_candidates (instructor_id, rmp_legacy_id, score, status)
|
||||||
|
VALUES ($1, $2, 0.85, 'accepted')",
|
||||||
|
)
|
||||||
|
.bind(instructor_id)
|
||||||
|
.bind(rmp_legacy_id)
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.expect("failed to create candidate");
|
||||||
|
|
||||||
|
// ARRANGE: Create a link in instructor_rmp_links
|
||||||
|
sqlx::query(
|
||||||
|
"INSERT INTO instructor_rmp_links (instructor_id, rmp_legacy_id, source)
|
||||||
|
VALUES ($1, $2, 'manual')",
|
||||||
|
)
|
||||||
|
.bind(instructor_id)
|
||||||
|
.bind(rmp_legacy_id)
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.expect("failed to create link");
|
||||||
|
|
||||||
|
// ARRANGE: Update instructor status to 'confirmed'
|
||||||
|
sqlx::query("UPDATE instructors SET rmp_match_status = 'confirmed' WHERE id = $1")
|
||||||
|
.bind(instructor_id)
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.expect("failed to update instructor status");
|
||||||
|
|
||||||
|
// ACT: Unmatch the specific RMP profile
|
||||||
|
unmatch_instructor(&pool, instructor_id, Some(rmp_legacy_id))
|
||||||
|
.await
|
||||||
|
.expect("unmatch should succeed");
|
||||||
|
|
||||||
|
// ASSERT: Candidate should be reset to pending
|
||||||
|
let (candidate_status,): (String,) = sqlx::query_as(
|
||||||
|
"SELECT status FROM rmp_match_candidates
|
||||||
|
WHERE instructor_id = $1 AND rmp_legacy_id = $2",
|
||||||
|
)
|
||||||
|
.bind(instructor_id)
|
||||||
|
.bind(rmp_legacy_id)
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.expect("failed to fetch candidate status");
|
||||||
|
assert_eq!(
|
||||||
|
candidate_status, "pending",
|
||||||
|
"candidate should be reset to pending after unmatch"
|
||||||
|
);
|
||||||
|
|
||||||
|
// ASSERT: Link should be deleted
|
||||||
|
let (link_count,): (i64,) =
|
||||||
|
sqlx::query_as("SELECT COUNT(*) FROM instructor_rmp_links WHERE instructor_id = $1")
|
||||||
|
.bind(instructor_id)
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.expect("failed to count links");
|
||||||
|
assert_eq!(link_count, 0, "link should be deleted");
|
||||||
|
|
||||||
|
// ASSERT: Instructor status should be unmatched
|
||||||
|
let (instructor_status,): (String,) =
|
||||||
|
sqlx::query_as("SELECT rmp_match_status FROM instructors WHERE id = $1")
|
||||||
|
.bind(instructor_id)
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.expect("failed to fetch instructor status");
|
||||||
|
assert_eq!(
|
||||||
|
instructor_status, "unmatched",
|
||||||
|
"instructor should be unmatched"
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -1,3 +1,4 @@
|
|||||||
|
#[allow(dead_code)]
|
||||||
mod helpers;
|
mod helpers;
|
||||||
|
|
||||||
use banner::data::batch::batch_upsert_courses;
|
use banner::data::batch::batch_upsert_courses;
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
#[allow(dead_code)]
|
||||||
mod helpers;
|
mod helpers;
|
||||||
|
|
||||||
use banner::data::models::{ScrapePriority, TargetType};
|
use banner::data::models::{ScrapePriority, TargetType};
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ function outTransition(_node: HTMLElement): TransitionConfig {
|
|||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div class="relative flex flex-1 flex-col overflow-hidden">
|
<div class="relative flex flex-1 flex-col">
|
||||||
{#key key}
|
{#key key}
|
||||||
<div in:inTransition out:outTransition class="flex flex-1 flex-col">
|
<div in:inTransition out:outTransition class="flex flex-1 flex-col">
|
||||||
{@render children()}
|
{@render children()}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,160 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import type { CandidateResponse } from "$lib/api";
|
||||||
|
import { isRatingValid, ratingStyle, rmpUrl } from "$lib/course";
|
||||||
|
import { Check, ExternalLink, LoaderCircle, X, XCircle } from "@lucide/svelte";
|
||||||
|
import ScoreBreakdown from "./ScoreBreakdown.svelte";
|
||||||
|
|
||||||
|
let {
|
||||||
|
candidate,
|
||||||
|
isMatched = false,
|
||||||
|
isRejected = false,
|
||||||
|
disabled = false,
|
||||||
|
actionLoading = null,
|
||||||
|
isDark = false,
|
||||||
|
onmatch,
|
||||||
|
onreject,
|
||||||
|
onunmatch,
|
||||||
|
}: {
|
||||||
|
candidate: CandidateResponse;
|
||||||
|
isMatched?: boolean;
|
||||||
|
isRejected?: boolean;
|
||||||
|
disabled?: boolean;
|
||||||
|
actionLoading?: string | null;
|
||||||
|
isDark?: boolean;
|
||||||
|
onmatch?: () => void;
|
||||||
|
onreject?: () => void;
|
||||||
|
onunmatch?: () => void;
|
||||||
|
} = $props();
|
||||||
|
|
||||||
|
const isPending = $derived(!isMatched && !isRejected);
|
||||||
|
const isMatchLoading = $derived(actionLoading === `match-${candidate.rmpLegacyId}`);
|
||||||
|
const isRejectLoading = $derived(actionLoading === `reject-${candidate.rmpLegacyId}`);
|
||||||
|
const isUnmatchLoading = $derived(actionLoading === `unmatch-${candidate.rmpLegacyId}`);
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div
|
||||||
|
class="rounded-md border p-3 transition-all duration-200
|
||||||
|
{isMatched
|
||||||
|
? 'border-l-4 border-l-green-500 bg-green-500/5 border-border'
|
||||||
|
: isRejected
|
||||||
|
? 'border-border bg-card opacity-50'
|
||||||
|
: 'border-border bg-card hover:shadow-sm'}"
|
||||||
|
>
|
||||||
|
<div class="flex items-start justify-between gap-2">
|
||||||
|
<div class="min-w-0">
|
||||||
|
<div class="flex items-center gap-2 flex-wrap">
|
||||||
|
<span class="font-medium text-foreground text-sm">
|
||||||
|
{candidate.firstName} {candidate.lastName}
|
||||||
|
</span>
|
||||||
|
{#if isMatched}
|
||||||
|
<span
|
||||||
|
class="text-[10px] rounded px-1.5 py-0.5 bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400 font-medium"
|
||||||
|
>
|
||||||
|
Matched
|
||||||
|
</span>
|
||||||
|
{:else if isRejected}
|
||||||
|
<span
|
||||||
|
class="text-[10px] rounded px-1.5 py-0.5 bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400 font-medium"
|
||||||
|
>
|
||||||
|
Rejected
|
||||||
|
</span>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
{#if candidate.department}
|
||||||
|
<div class="text-xs text-muted-foreground mt-0.5">{candidate.department}</div>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="flex items-center gap-0.5 shrink-0">
|
||||||
|
{#if isMatched}
|
||||||
|
<button
|
||||||
|
onclick={(e) => {
|
||||||
|
e.stopPropagation();
|
||||||
|
onunmatch?.();
|
||||||
|
}}
|
||||||
|
{disabled}
|
||||||
|
class="inline-flex items-center gap-1 rounded px-1.5 py-1 text-xs text-red-500 hover:bg-red-100 dark:hover:bg-red-900/30 transition-colors disabled:opacity-50 cursor-pointer"
|
||||||
|
title="Remove match"
|
||||||
|
>
|
||||||
|
{#if isUnmatchLoading}
|
||||||
|
<LoaderCircle size={14} class="animate-spin" />
|
||||||
|
{:else}
|
||||||
|
<XCircle size={14} />
|
||||||
|
{/if}
|
||||||
|
Unmatch
|
||||||
|
</button>
|
||||||
|
{:else if isPending}
|
||||||
|
<button
|
||||||
|
onclick={(e) => {
|
||||||
|
e.stopPropagation();
|
||||||
|
onmatch?.();
|
||||||
|
}}
|
||||||
|
{disabled}
|
||||||
|
class="rounded p-1 text-green-600 hover:bg-green-100 dark:hover:bg-green-900/30 transition-colors disabled:opacity-50 cursor-pointer"
|
||||||
|
title="Accept match"
|
||||||
|
>
|
||||||
|
{#if isMatchLoading}
|
||||||
|
<LoaderCircle size={14} class="animate-spin" />
|
||||||
|
{:else}
|
||||||
|
<Check size={14} />
|
||||||
|
{/if}
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onclick={(e) => {
|
||||||
|
e.stopPropagation();
|
||||||
|
onreject?.();
|
||||||
|
}}
|
||||||
|
{disabled}
|
||||||
|
class="rounded p-1 text-red-500 hover:bg-red-100 dark:hover:bg-red-900/30 transition-colors disabled:opacity-50 cursor-pointer"
|
||||||
|
title="Reject candidate"
|
||||||
|
>
|
||||||
|
{#if isRejectLoading}
|
||||||
|
<LoaderCircle size={14} class="animate-spin" />
|
||||||
|
{:else}
|
||||||
|
<X size={14} />
|
||||||
|
{/if}
|
||||||
|
</button>
|
||||||
|
{/if}
|
||||||
|
<a
|
||||||
|
href={rmpUrl(candidate.rmpLegacyId)}
|
||||||
|
target="_blank"
|
||||||
|
rel="noopener noreferrer"
|
||||||
|
onclick={(e) => e.stopPropagation()}
|
||||||
|
class="rounded p-1 text-muted-foreground hover:bg-muted hover:text-foreground transition-colors cursor-pointer"
|
||||||
|
title="View on RateMyProfessors"
|
||||||
|
>
|
||||||
|
<ExternalLink size={14} />
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Rating stats -->
|
||||||
|
<div class="mt-2 flex items-center gap-3 text-xs flex-wrap">
|
||||||
|
{#if isRatingValid(candidate.avgRating, candidate.numRatings ?? 0)}
|
||||||
|
<span
|
||||||
|
class="font-semibold tabular-nums"
|
||||||
|
style={ratingStyle(candidate.avgRating!, isDark)}
|
||||||
|
>
|
||||||
|
{candidate.avgRating!.toFixed(1)}
|
||||||
|
</span>
|
||||||
|
{:else}
|
||||||
|
<span class="text-muted-foreground">No rating</span>
|
||||||
|
{/if}
|
||||||
|
{#if candidate.avgDifficulty !== null}
|
||||||
|
<span class="text-muted-foreground tabular-nums"
|
||||||
|
>{candidate.avgDifficulty.toFixed(1)} diff</span
|
||||||
|
>
|
||||||
|
{/if}
|
||||||
|
<span class="text-muted-foreground tabular-nums">{candidate.numRatings} ratings</span>
|
||||||
|
{#if candidate.wouldTakeAgainPct !== null}
|
||||||
|
<span class="text-muted-foreground tabular-nums"
|
||||||
|
>{candidate.wouldTakeAgainPct.toFixed(0)}% again</span
|
||||||
|
>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Score breakdown -->
|
||||||
|
<div class="mt-2">
|
||||||
|
<ScoreBreakdown breakdown={candidate.scoreBreakdown} score={candidate.score ?? 0} />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import SimpleTooltip from "$lib/components/SimpleTooltip.svelte";
|
||||||
|
|
||||||
|
let {
|
||||||
|
breakdown = null,
|
||||||
|
score = 0,
|
||||||
|
}: {
|
||||||
|
breakdown?: { [key in string]?: number } | null;
|
||||||
|
score?: number;
|
||||||
|
} = $props();
|
||||||
|
|
||||||
|
const weights: Record<string, number> = {
|
||||||
|
name: 0.5,
|
||||||
|
department: 0.25,
|
||||||
|
uniqueness: 0.15,
|
||||||
|
volume: 0.1,
|
||||||
|
};
|
||||||
|
|
||||||
|
const colors: Record<string, string> = {
|
||||||
|
name: "bg-blue-500",
|
||||||
|
department: "bg-purple-500",
|
||||||
|
uniqueness: "bg-amber-500",
|
||||||
|
volume: "bg-emerald-500",
|
||||||
|
};
|
||||||
|
|
||||||
|
const labels: Record<string, string> = {
|
||||||
|
name: "Name",
|
||||||
|
department: "Dept",
|
||||||
|
uniqueness: "Unique",
|
||||||
|
volume: "Volume",
|
||||||
|
};
|
||||||
|
|
||||||
|
function fmt(v: number): string {
|
||||||
|
return (v * 100).toFixed(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
const segments = $derived(
|
||||||
|
Object.entries(breakdown ?? {})
|
||||||
|
.filter(([_, value]) => value != null)
|
||||||
|
.map(([key, value]) => ({
|
||||||
|
key,
|
||||||
|
label: labels[key] ?? key,
|
||||||
|
color: colors[key] ?? "bg-primary",
|
||||||
|
weight: weights[key] ?? 0,
|
||||||
|
raw: value!,
|
||||||
|
pct: value! * (weights[key] ?? 0) * 100,
|
||||||
|
}))
|
||||||
|
);
|
||||||
|
|
||||||
|
const tooltipText = $derived(
|
||||||
|
segments.map((s) => `${s.label}: ${fmt(s.raw)}% \u00d7 ${fmt(s.weight)}%`).join("\n") +
|
||||||
|
`\nTotal: ${fmt(score)}%`
|
||||||
|
);
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="flex items-center gap-2 text-xs">
|
||||||
|
<span class="text-muted-foreground shrink-0">Score:</span>
|
||||||
|
<div class="bg-muted h-2 flex-1 rounded-full overflow-hidden flex">
|
||||||
|
{#each segments as seg (seg.key)}
|
||||||
|
<div
|
||||||
|
class="{seg.color} h-full transition-all duration-300"
|
||||||
|
style="width: {seg.pct}%"
|
||||||
|
></div>
|
||||||
|
{/each}
|
||||||
|
</div>
|
||||||
|
<SimpleTooltip text={tooltipText} side="top">
|
||||||
|
<span
|
||||||
|
class="tabular-nums font-medium text-foreground cursor-help border-b border-dotted border-muted-foreground/40"
|
||||||
|
>
|
||||||
|
{fmt(score)}%
|
||||||
|
</span>
|
||||||
|
</SimpleTooltip>
|
||||||
|
</div>
|
||||||
Reference in New Issue
Block a user