feat: add confidence-based RMP matching with manual review workflow

Replace simple auto-matching with scored candidate generation that
considers department overlap, name uniqueness, and rating volume.
Candidates above 0.85 auto-accept; others require admin approval.
This commit is contained in:
2026-01-30 01:31:11 -06:00
parent 39ba131322
commit 203c337cf0
19 changed files with 2428 additions and 175 deletions
+59 -33
View File
@@ -392,11 +392,11 @@ pub async fn batch_upsert_courses(courses: &[Course], db_pool: &PgPool) -> Resul
insert_audits(&audits, &mut tx).await?;
insert_metrics(&metrics, &mut tx).await?;
// Step 5: Upsert instructors (deduplicated across batch)
upsert_instructors(courses, &mut tx).await?;
// Step 5: Upsert instructors (returns email -> id map)
let email_to_id = upsert_instructors(courses, &mut tx).await?;
// Step 6: Link courses to instructors via junction table
upsert_course_instructors(courses, &course_ids, &mut tx).await?;
upsert_course_instructors(courses, &course_ids, &email_to_id, &mut tx).await?;
tx.commit().await?;
@@ -596,62 +596,85 @@ async fn upsert_courses(courses: &[Course], conn: &mut PgConnection) -> Result<V
Ok(rows)
}
/// Deduplicate and upsert all instructors from the batch.
async fn upsert_instructors(courses: &[Course], conn: &mut PgConnection) -> Result<()> {
/// Deduplicate and upsert all instructors from the batch by email.
/// Returns a map of lowercased_email -> instructor id for junction linking.
async fn upsert_instructors(
courses: &[Course],
conn: &mut PgConnection,
) -> Result<HashMap<String, i32>> {
let mut seen = HashSet::new();
let mut banner_ids = Vec::new();
let mut display_names = Vec::new();
let mut emails: Vec<Option<&str>> = Vec::new();
let mut display_names: Vec<&str> = Vec::new();
let mut emails_lower: Vec<String> = Vec::new();
let mut skipped_no_email = 0u32;
for course in courses {
for faculty in &course.faculty {
if seen.insert(faculty.banner_id.as_str()) {
banner_ids.push(faculty.banner_id.as_str());
display_names.push(faculty.display_name.as_str());
emails.push(faculty.email_address.as_deref());
if let Some(email) = &faculty.email_address {
let email_lower = email.to_lowercase();
if seen.insert(email_lower.clone()) {
display_names.push(faculty.display_name.as_str());
emails_lower.push(email_lower);
}
} else {
skipped_no_email += 1;
}
}
}
if banner_ids.is_empty() {
return Ok(());
if skipped_no_email > 0 {
tracing::warn!(
count = skipped_no_email,
"Skipped instructors with no email address"
);
}
sqlx::query(
if display_names.is_empty() {
return Ok(HashMap::new());
}
let email_refs: Vec<&str> = emails_lower.iter().map(|s| s.as_str()).collect();
let rows: Vec<(i32, String)> = sqlx::query_as(
r#"
INSERT INTO instructors (banner_id, display_name, email)
SELECT * FROM UNNEST($1::text[], $2::text[], $3::text[])
ON CONFLICT (banner_id)
DO UPDATE SET
display_name = EXCLUDED.display_name,
email = COALESCE(EXCLUDED.email, instructors.email)
INSERT INTO instructors (display_name, email)
SELECT * FROM UNNEST($1::text[], $2::text[])
ON CONFLICT (email)
DO UPDATE SET display_name = EXCLUDED.display_name
RETURNING id, email
"#,
)
.bind(&banner_ids)
.bind(&display_names)
.bind(&emails)
.execute(&mut *conn)
.bind(&email_refs)
.fetch_all(&mut *conn)
.await
.map_err(|e| anyhow::anyhow!("Failed to batch upsert instructors: {}", e))?;
Ok(())
Ok(rows.into_iter().map(|(id, email)| (email, id)).collect())
}
/// Link courses to their instructors via the junction table.
async fn upsert_course_instructors(
courses: &[Course],
course_ids: &[i32],
email_to_id: &HashMap<String, i32>,
conn: &mut PgConnection,
) -> Result<()> {
let mut cids = Vec::new();
let mut iids = Vec::new();
let mut instructor_ids: Vec<i32> = Vec::new();
let mut banner_ids: Vec<&str> = Vec::new();
let mut primaries = Vec::new();
for (course, &course_id) in courses.iter().zip(course_ids) {
for faculty in &course.faculty {
cids.push(course_id);
iids.push(faculty.banner_id.as_str());
primaries.push(faculty.primary_indicator);
if let Some(email) = &faculty.email_address {
let email_lower = email.to_lowercase();
if let Some(&instructor_id) = email_to_id.get(&email_lower) {
cids.push(course_id);
instructor_ids.push(instructor_id);
banner_ids.push(faculty.banner_id.as_str());
primaries.push(faculty.primary_indicator);
}
}
}
}
@@ -668,14 +691,17 @@ async fn upsert_course_instructors(
sqlx::query(
r#"
INSERT INTO course_instructors (course_id, instructor_id, is_primary)
SELECT * FROM UNNEST($1::int4[], $2::text[], $3::bool[])
INSERT INTO course_instructors (course_id, instructor_id, banner_id, is_primary)
SELECT * FROM UNNEST($1::int4[], $2::int4[], $3::text[], $4::bool[])
ON CONFLICT (course_id, instructor_id)
DO UPDATE SET is_primary = EXCLUDED.is_primary
DO UPDATE SET
banner_id = EXCLUDED.banner_id,
is_primary = EXCLUDED.is_primary
"#,
)
.bind(&cids)
.bind(&iids)
.bind(&instructor_ids)
.bind(&banner_ids)
.bind(&primaries)
.execute(&mut *conn)
.await
+23 -9
View File
@@ -55,7 +55,7 @@ fn sort_clause(column: Option<SortColumn>, direction: Option<SortDirection>) ->
Some(SortColumn::Instructor) => {
format!(
"(SELECT i.display_name FROM course_instructors ci \
JOIN instructors i ON i.banner_id = ci.instructor_id \
JOIN instructors i ON i.id = ci.instructor_id \
WHERE ci.course_id = courses.id AND ci.is_primary = true \
LIMIT 1) {dir} NULLS LAST"
)
@@ -147,12 +147,19 @@ pub async fn get_course_instructors(
) -> Result<Vec<CourseInstructorDetail>> {
let rows = sqlx::query_as::<_, CourseInstructorDetail>(
r#"
SELECT i.banner_id, i.display_name, i.email, ci.is_primary,
rp.avg_rating, rp.num_ratings, i.rmp_legacy_id,
SELECT i.id as instructor_id, ci.banner_id, i.display_name, i.email, ci.is_primary,
rmp.avg_rating, rmp.num_ratings, rmp.rmp_legacy_id,
ci.course_id
FROM course_instructors ci
JOIN instructors i ON i.banner_id = ci.instructor_id
LEFT JOIN rmp_professors rp ON rp.legacy_id = i.rmp_legacy_id
JOIN instructors i ON i.id = ci.instructor_id
LEFT JOIN LATERAL (
SELECT rp.avg_rating, rp.num_ratings, rp.legacy_id as rmp_legacy_id
FROM instructor_rmp_links irl
JOIN rmp_professors rp ON rp.legacy_id = irl.rmp_legacy_id
WHERE irl.instructor_id = i.id
ORDER BY rp.num_ratings DESC NULLS LAST, rp.legacy_id ASC
LIMIT 1
) rmp ON true
WHERE ci.course_id = $1
ORDER BY ci.is_primary DESC, i.display_name
"#,
@@ -176,12 +183,19 @@ pub async fn get_instructors_for_courses(
let rows = sqlx::query_as::<_, CourseInstructorDetail>(
r#"
SELECT i.banner_id, i.display_name, i.email, ci.is_primary,
rp.avg_rating, rp.num_ratings, i.rmp_legacy_id,
SELECT i.id as instructor_id, ci.banner_id, i.display_name, i.email, ci.is_primary,
rmp.avg_rating, rmp.num_ratings, rmp.rmp_legacy_id,
ci.course_id
FROM course_instructors ci
JOIN instructors i ON i.banner_id = ci.instructor_id
LEFT JOIN rmp_professors rp ON rp.legacy_id = i.rmp_legacy_id
JOIN instructors i ON i.id = ci.instructor_id
LEFT JOIN LATERAL (
SELECT rp.avg_rating, rp.num_ratings, rp.legacy_id as rmp_legacy_id
FROM instructor_rmp_links irl
JOIN rmp_professors rp ON rp.legacy_id = irl.rmp_legacy_id
WHERE irl.instructor_id = i.id
ORDER BY rp.num_ratings DESC NULLS LAST, rp.legacy_id ASC
LIMIT 1
) rmp ON true
WHERE ci.course_id = ANY($1)
ORDER BY ci.course_id, ci.is_primary DESC, i.display_name
"#,
+1
View File
@@ -5,6 +5,7 @@ pub mod courses;
pub mod models;
pub mod reference;
pub mod rmp;
pub mod rmp_matching;
pub mod scrape_jobs;
pub mod sessions;
pub mod users;
+7 -4
View File
@@ -99,25 +99,28 @@ pub struct Course {
#[allow(dead_code)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct Instructor {
pub banner_id: String,
pub id: i32,
pub display_name: String,
pub email: Option<String>,
pub email: String,
pub rmp_match_status: String,
}
#[allow(dead_code)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct CourseInstructor {
pub course_id: i32,
pub instructor_id: String,
pub instructor_id: i32,
pub banner_id: String,
pub is_primary: bool,
}
/// Joined instructor data for a course (from course_instructors + instructors + rmp_professors).
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct CourseInstructorDetail {
pub instructor_id: i32,
pub banner_id: String,
pub display_name: String,
pub email: Option<String>,
pub email: String,
pub is_primary: bool,
pub avg_rating: Option<f32>,
pub num_ratings: Option<i32>,
+13 -115
View File
@@ -3,8 +3,7 @@
use crate::error::Result;
use crate::rmp::RmpProfessor;
use sqlx::PgPool;
use std::collections::{HashMap, HashSet};
use tracing::{debug, info, warn};
use std::collections::HashSet;
/// Bulk upsert RMP professors using the UNNEST pattern.
///
@@ -93,14 +92,14 @@ pub async fn batch_upsert_rmp_professors(
}
/// Normalize a name for matching: lowercase, trim, strip trailing periods.
fn normalize(s: &str) -> String {
pub(crate) fn normalize(s: &str) -> String {
s.trim().to_lowercase().trim_end_matches('.').to_string()
}
/// Parse Banner's "Last, First Middle" display name into (last, first) tokens.
///
/// Returns `None` if the format is unparseable (no comma, empty parts).
fn parse_display_name(display_name: &str) -> Option<(String, String)> {
pub(crate) fn parse_display_name(display_name: &str) -> Option<(String, String)> {
let (last_part, first_part) = display_name.split_once(',')?;
let last = normalize(last_part);
// Take only the first token of the first-name portion to drop middle names/initials.
@@ -111,128 +110,27 @@ fn parse_display_name(display_name: &str) -> Option<(String, String)> {
Some((last, first))
}
/// Auto-match instructors to RMP professors by normalized name.
/// Retrieve RMP rating data for an instructor by instructor id.
///
/// Loads all pending instructors and all RMP professors, then matches in Rust
/// using normalized name comparison. Only assigns a match when exactly one RMP
/// professor matches a given instructor.
pub async fn auto_match_instructors(db_pool: &PgPool) -> Result<u64> {
// Load pending instructors
let instructors: Vec<(String, String)> = sqlx::query_as(
"SELECT banner_id, display_name FROM instructors WHERE rmp_match_status = 'pending'",
)
.fetch_all(db_pool)
.await?;
if instructors.is_empty() {
info!(matched = 0, "No pending instructors to match");
return Ok(0);
}
// Load all RMP professors
let professors: Vec<(i32, String, String)> =
sqlx::query_as("SELECT legacy_id, first_name, last_name FROM rmp_professors")
.fetch_all(db_pool)
.await?;
// Build a lookup: (normalized_last, normalized_first) -> list of legacy_ids
let mut rmp_index: HashMap<(String, String), Vec<i32>> = HashMap::new();
for (legacy_id, first, last) in &professors {
let key = (normalize(last), normalize(first));
rmp_index.entry(key).or_default().push(*legacy_id);
}
// Match each instructor
let mut matches: Vec<(i32, String)> = Vec::new(); // (legacy_id, banner_id)
let mut no_comma = 0u64;
let mut no_match = 0u64;
let mut ambiguous = 0u64;
for (banner_id, display_name) in &instructors {
let Some((last, first)) = parse_display_name(display_name) else {
no_comma += 1;
continue;
};
let key = (last, first);
match rmp_index.get(&key) {
Some(ids) if ids.len() == 1 => {
matches.push((ids[0], banner_id.clone()));
}
Some(ids) => {
ambiguous += 1;
debug!(
banner_id,
display_name,
candidates = ids.len(),
"Ambiguous RMP match, skipping"
);
}
None => {
no_match += 1;
}
}
}
if no_comma > 0 || ambiguous > 0 {
warn!(
total_pending = instructors.len(),
no_comma,
no_match,
ambiguous,
matched = matches.len(),
"RMP matching diagnostics"
);
}
// Batch update matches
if matches.is_empty() {
info!(matched = 0, "Auto-matched instructors to RMP professors");
return Ok(0);
}
let legacy_ids: Vec<i32> = matches.iter().map(|(id, _)| *id).collect();
let banner_ids: Vec<&str> = matches.iter().map(|(_, bid)| bid.as_str()).collect();
let result = sqlx::query(
r#"
UPDATE instructors i
SET
rmp_legacy_id = m.legacy_id,
rmp_match_status = 'auto'
FROM UNNEST($1::int4[], $2::text[]) AS m(legacy_id, banner_id)
WHERE i.banner_id = m.banner_id
"#,
)
.bind(&legacy_ids)
.bind(&banner_ids)
.execute(db_pool)
.await
.map_err(|e| anyhow::anyhow!("Failed to update instructor RMP matches: {}", e))?;
let matched = result.rows_affected();
info!(matched, "Auto-matched instructors to RMP professors");
Ok(matched)
}
/// Retrieve RMP rating data for an instructor by banner_id.
///
/// Returns `(avg_rating, num_ratings)` if the instructor has an RMP match.
/// Returns `(avg_rating, num_ratings)` for the best linked RMP profile
/// (most ratings). Returns `None` if no link exists.
#[allow(dead_code)]
pub async fn get_instructor_rmp_data(
db_pool: &PgPool,
banner_id: &str,
instructor_id: i32,
) -> Result<Option<(f32, i32)>> {
let row: Option<(f32, i32)> = sqlx::query_as(
r#"
SELECT rp.avg_rating, rp.num_ratings
FROM instructors i
JOIN rmp_professors rp ON rp.legacy_id = i.rmp_legacy_id
WHERE i.banner_id = $1
FROM instructor_rmp_links irl
JOIN rmp_professors rp ON rp.legacy_id = irl.rmp_legacy_id
WHERE irl.instructor_id = $1
AND rp.avg_rating IS NOT NULL
ORDER BY rp.num_ratings DESC NULLS LAST
LIMIT 1
"#,
)
.bind(banner_id)
.bind(instructor_id)
.fetch_optional(db_pool)
.await?;
Ok(row)
+513
View File
@@ -0,0 +1,513 @@
//! Confidence scoring and candidate generation for RMP instructor matching.
use crate::data::rmp::{normalize, parse_display_name};
use crate::error::Result;
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use std::collections::{HashMap, HashSet};
use tracing::{debug, info};
// ---------------------------------------------------------------------------
// Scoring types
// ---------------------------------------------------------------------------
/// Breakdown of individual scoring signals.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ScoreBreakdown {
pub department: f32,
pub uniqueness: f32,
pub volume: f32,
}
/// Result of scoring a single instructorRMP candidate pair.
#[derive(Debug, Clone)]
pub struct MatchScore {
pub score: f32,
pub breakdown: ScoreBreakdown,
}
// ---------------------------------------------------------------------------
// Thresholds
// ---------------------------------------------------------------------------
/// Minimum composite score to store a candidate row.
const MIN_CANDIDATE_THRESHOLD: f32 = 0.40;
/// Score at or above which a candidate is auto-accepted.
const AUTO_ACCEPT_THRESHOLD: f32 = 0.85;
// ---------------------------------------------------------------------------
// Weights
// ---------------------------------------------------------------------------
const WEIGHT_DEPARTMENT: f32 = 0.50;
const WEIGHT_UNIQUENESS: f32 = 0.30;
const WEIGHT_VOLUME: f32 = 0.20;
// ---------------------------------------------------------------------------
// Pure scoring functions
// ---------------------------------------------------------------------------
/// Check if an instructor's subjects overlap with an RMP department.
///
/// Returns `1.0` for a match, `0.2` for a mismatch, `0.5` when the RMP
/// department is unknown.
fn department_similarity(subjects: &[String], rmp_department: Option<&str>) -> f32 {
let Some(dept) = rmp_department else {
return 0.5;
};
let dept_lower = dept.to_lowercase();
// Quick check: does any subject appear directly in the department string
// or vice-versa?
for subj in subjects {
let subj_lower = subj.to_lowercase();
if dept_lower.contains(&subj_lower) || subj_lower.contains(&dept_lower) {
return 1.0;
}
// Handle common UTSA abbreviation mappings.
if matches_known_abbreviation(&subj_lower, &dept_lower) {
return 1.0;
}
}
0.2
}
/// Expand common subject abbreviations used at UTSA and check for overlap.
fn matches_known_abbreviation(subject: &str, department: &str) -> bool {
const MAPPINGS: &[(&str, &[&str])] = &[
("cs", &["computer science"]),
("ece", &["electrical", "computer engineering"]),
("ee", &["electrical engineering", "electrical"]),
("me", &["mechanical engineering", "mechanical"]),
("ce", &["civil engineering", "civil"]),
("bio", &["biology", "biological"]),
("chem", &["chemistry"]),
("phys", &["physics"]),
("math", &["mathematics"]),
("sta", &["statistics"]),
("eng", &["english"]),
("his", &["history"]),
("pol", &["political science"]),
("psy", &["psychology"]),
("soc", &["sociology"]),
("mus", &["music"]),
("art", &["art"]),
("phi", &["philosophy"]),
("eco", &["economics"]),
("acc", &["accounting"]),
("fin", &["finance"]),
("mgt", &["management"]),
("mkt", &["marketing"]),
("is", &["information systems"]),
("ms", &["management science"]),
("kin", &["kinesiology"]),
("com", &["communication"]),
];
for &(abbr, expansions) in MAPPINGS {
if subject == abbr {
return expansions
.iter()
.any(|expansion| department.contains(expansion));
}
}
false
}
/// Compute match confidence score (0.01.0) for an instructorRMP pair.
///
/// Name matching is handled by the caller via pre-filtering on exact
/// normalized `(last, first)`, so only department, uniqueness, and volume
/// signals are scored here.
pub fn compute_match_score(
instructor_subjects: &[String],
rmp_department: Option<&str>,
candidate_count: usize,
rmp_num_ratings: i32,
) -> MatchScore {
// --- Department (0.50) ---
let dept_score = department_similarity(instructor_subjects, rmp_department);
// --- Uniqueness (0.30) ---
let uniqueness_score = match candidate_count {
0 | 1 => 1.0,
2 => 0.5,
_ => 0.2,
};
// --- Volume (0.20) ---
let volume_score = ((rmp_num_ratings as f32).ln_1p() / 5.0_f32.ln_1p()).clamp(0.0, 1.0);
let composite = dept_score * WEIGHT_DEPARTMENT
+ uniqueness_score * WEIGHT_UNIQUENESS
+ volume_score * WEIGHT_VOLUME;
MatchScore {
score: composite,
breakdown: ScoreBreakdown {
department: dept_score,
uniqueness: uniqueness_score,
volume: volume_score,
},
}
}
// ---------------------------------------------------------------------------
// Candidate generation (DB)
// ---------------------------------------------------------------------------
/// Statistics returned from candidate generation.
#[derive(Debug)]
pub struct MatchingStats {
pub total_unmatched: usize,
pub candidates_created: usize,
pub auto_matched: usize,
pub skipped_unparseable: usize,
pub skipped_no_candidates: usize,
}
/// Lightweight row for building the in-memory RMP name index.
struct RmpProfForMatching {
legacy_id: i32,
department: Option<String>,
num_ratings: i32,
}
/// Generate match candidates for all unmatched instructors.
///
/// For each unmatched instructor:
/// 1. Parse `display_name` into (last, first).
/// 2. Find RMP professors with matching normalized name.
/// 3. Score each candidate.
/// 4. Store candidates scoring above [`MIN_CANDIDATE_THRESHOLD`].
/// 5. Auto-accept if the top candidate scores ≥ [`AUTO_ACCEPT_THRESHOLD`]
/// and no existing rejected candidate exists for that pair.
///
/// Already-evaluated instructorRMP pairs (any status) are skipped.
pub async fn generate_candidates(db_pool: &PgPool) -> Result<MatchingStats> {
// 1. Load unmatched instructors
let instructors: Vec<(i32, String)> = sqlx::query_as(
"SELECT id, display_name FROM instructors WHERE rmp_match_status = 'unmatched'",
)
.fetch_all(db_pool)
.await?;
if instructors.is_empty() {
info!("No unmatched instructors to generate candidates for");
return Ok(MatchingStats {
total_unmatched: 0,
candidates_created: 0,
auto_matched: 0,
skipped_unparseable: 0,
skipped_no_candidates: 0,
});
}
let instructor_ids: Vec<i32> = instructors.iter().map(|(id, _)| *id).collect();
let total_unmatched = instructors.len();
// 2. Load instructor subjects
let subject_rows: Vec<(i32, String)> = sqlx::query_as(
r#"
SELECT DISTINCT ci.instructor_id, c.subject
FROM course_instructors ci
JOIN courses c ON c.id = ci.course_id
WHERE ci.instructor_id = ANY($1)
"#,
)
.bind(&instructor_ids)
.fetch_all(db_pool)
.await?;
let mut subject_map: HashMap<i32, Vec<String>> = HashMap::new();
for (iid, subject) in subject_rows {
subject_map.entry(iid).or_default().push(subject);
}
// 3. Load all RMP professors
let prof_rows: Vec<(i32, String, String, Option<String>, i32)> = sqlx::query_as(
"SELECT legacy_id, first_name, last_name, department, num_ratings FROM rmp_professors",
)
.fetch_all(db_pool)
.await?;
// Build name index: (normalized_last, normalized_first) -> Vec<RmpProfForMatching>
let mut name_index: HashMap<(String, String), Vec<RmpProfForMatching>> = HashMap::new();
for (legacy_id, first_name, last_name, department, num_ratings) in prof_rows {
let key = (normalize(&last_name), normalize(&first_name));
name_index.entry(key).or_default().push(RmpProfForMatching {
legacy_id,
department,
num_ratings,
});
}
// 4. Load existing candidate pairs (and rejected subset) in a single query
let candidate_rows: Vec<(i32, i32, String)> =
sqlx::query_as("SELECT instructor_id, rmp_legacy_id, status FROM rmp_match_candidates")
.fetch_all(db_pool)
.await?;
let mut existing_pairs: HashSet<(i32, i32)> = HashSet::with_capacity(candidate_rows.len());
let mut rejected_pairs: HashSet<(i32, i32)> = HashSet::new();
for (iid, lid, status) in candidate_rows {
existing_pairs.insert((iid, lid));
if status == "rejected" {
rejected_pairs.insert((iid, lid));
}
}
// 5. Score and collect candidates
let empty_subjects: Vec<String> = Vec::new();
let mut candidates: Vec<(i32, i32, f32, serde_json::Value)> = Vec::new();
let mut auto_accept: Vec<(i32, i32)> = Vec::new(); // (instructor_id, legacy_id)
let mut skipped_unparseable = 0usize;
let mut skipped_no_candidates = 0usize;
for (instructor_id, display_name) in &instructors {
let Some((norm_last, norm_first)) = parse_display_name(display_name) else {
skipped_unparseable += 1;
debug!(
instructor_id,
display_name, "Unparseable display name, skipping"
);
continue;
};
let subjects = subject_map.get(instructor_id).unwrap_or(&empty_subjects);
let key = (norm_last.clone(), norm_first.clone());
let Some(rmp_candidates) = name_index.get(&key) else {
skipped_no_candidates += 1;
continue;
};
let candidate_count = rmp_candidates.len();
let mut best: Option<(f32, i32)> = None;
for prof in rmp_candidates {
let pair = (*instructor_id, prof.legacy_id);
if existing_pairs.contains(&pair) {
continue;
}
let ms = compute_match_score(
subjects,
prof.department.as_deref(),
candidate_count,
prof.num_ratings,
);
if ms.score < MIN_CANDIDATE_THRESHOLD {
continue;
}
let breakdown_json =
serde_json::to_value(&ms.breakdown).unwrap_or_else(|_| serde_json::json!({}));
candidates.push((*instructor_id, prof.legacy_id, ms.score, breakdown_json));
match best {
Some((s, _)) if ms.score > s => best = Some((ms.score, prof.legacy_id)),
None => best = Some((ms.score, prof.legacy_id)),
_ => {}
}
}
// Auto-accept the top candidate if it meets the threshold and is not
// previously rejected.
if let Some((score, legacy_id)) = best
&& score >= AUTO_ACCEPT_THRESHOLD
&& !rejected_pairs.contains(&(*instructor_id, legacy_id))
{
auto_accept.push((*instructor_id, legacy_id));
}
}
// 67. Write candidates and auto-accept within a single transaction
let candidates_created = candidates.len();
let auto_matched = auto_accept.len();
let mut tx = db_pool.begin().await?;
// 6. Batch-insert candidates
if !candidates.is_empty() {
let c_instructor_ids: Vec<i32> = candidates.iter().map(|(iid, _, _, _)| *iid).collect();
let c_legacy_ids: Vec<i32> = candidates.iter().map(|(_, lid, _, _)| *lid).collect();
let c_scores: Vec<f32> = candidates.iter().map(|(_, _, s, _)| *s).collect();
let c_breakdowns: Vec<serde_json::Value> =
candidates.into_iter().map(|(_, _, _, b)| b).collect();
sqlx::query(
r#"
INSERT INTO rmp_match_candidates (instructor_id, rmp_legacy_id, score, score_breakdown)
SELECT v.instructor_id, v.rmp_legacy_id, v.score, v.score_breakdown
FROM UNNEST($1::int4[], $2::int4[], $3::real[], $4::jsonb[])
AS v(instructor_id, rmp_legacy_id, score, score_breakdown)
ON CONFLICT (instructor_id, rmp_legacy_id) DO NOTHING
"#,
)
.bind(&c_instructor_ids)
.bind(&c_legacy_ids)
.bind(&c_scores)
.bind(&c_breakdowns)
.execute(&mut *tx)
.await?;
}
// 7. Auto-accept top candidates
if !auto_accept.is_empty() {
let aa_instructor_ids: Vec<i32> = auto_accept.iter().map(|(iid, _)| *iid).collect();
let aa_legacy_ids: Vec<i32> = auto_accept.iter().map(|(_, lid)| *lid).collect();
// Mark the candidate row as accepted
sqlx::query(
r#"
UPDATE rmp_match_candidates mc
SET status = 'accepted', resolved_at = NOW()
FROM UNNEST($1::int4[], $2::int4[]) AS v(instructor_id, rmp_legacy_id)
WHERE mc.instructor_id = v.instructor_id
AND mc.rmp_legacy_id = v.rmp_legacy_id
"#,
)
.bind(&aa_instructor_ids)
.bind(&aa_legacy_ids)
.execute(&mut *tx)
.await?;
// Insert links into instructor_rmp_links
sqlx::query(
r#"
INSERT INTO instructor_rmp_links (instructor_id, rmp_legacy_id, source)
SELECT v.instructor_id, v.rmp_legacy_id, 'auto'
FROM UNNEST($1::int4[], $2::int4[]) AS v(instructor_id, rmp_legacy_id)
ON CONFLICT (rmp_legacy_id) DO NOTHING
"#,
)
.bind(&aa_instructor_ids)
.bind(&aa_legacy_ids)
.execute(&mut *tx)
.await?;
// Update instructor match status
sqlx::query(
r#"
UPDATE instructors i
SET rmp_match_status = 'auto'
FROM UNNEST($1::int4[]) AS v(instructor_id)
WHERE i.id = v.instructor_id
"#,
)
.bind(&aa_instructor_ids)
.execute(&mut *tx)
.await?;
}
tx.commit().await?;
let stats = MatchingStats {
total_unmatched,
candidates_created,
auto_matched,
skipped_unparseable,
skipped_no_candidates,
};
info!(
total_unmatched = stats.total_unmatched,
candidates_created = stats.candidates_created,
auto_matched = stats.auto_matched,
skipped_unparseable = stats.skipped_unparseable,
skipped_no_candidates = stats.skipped_no_candidates,
"Candidate generation complete"
);
Ok(stats)
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ideal_candidate_high_score() {
let ms = compute_match_score(
&["CS".to_string()],
Some("Computer Science"),
1, // unique candidate
50, // decent ratings
);
// dept 1.0*0.50 + unique 1.0*0.30 + volume ~0.97*0.20 ≈ 0.99
assert!(ms.score >= 0.85, "Expected score >= 0.85, got {}", ms.score);
assert_eq!(ms.breakdown.uniqueness, 1.0);
assert_eq!(ms.breakdown.department, 1.0);
}
#[test]
fn test_ambiguous_candidates_lower_score() {
let unique = compute_match_score(&[], None, 1, 10);
let ambiguous = compute_match_score(&[], None, 3, 10);
assert!(
unique.score > ambiguous.score,
"Unique ({}) should outscore ambiguous ({})",
unique.score,
ambiguous.score
);
assert_eq!(unique.breakdown.uniqueness, 1.0);
assert_eq!(ambiguous.breakdown.uniqueness, 0.2);
}
#[test]
fn test_no_department_neutral() {
let ms = compute_match_score(&["CS".to_string()], None, 1, 10);
assert_eq!(ms.breakdown.department, 0.5);
}
#[test]
fn test_department_match() {
let ms = compute_match_score(&["CS".to_string()], Some("Computer Science"), 1, 10);
assert_eq!(ms.breakdown.department, 1.0);
}
#[test]
fn test_department_mismatch() {
let ms = compute_match_score(&["CS".to_string()], Some("History"), 1, 10);
assert_eq!(ms.breakdown.department, 0.2);
}
#[test]
fn test_department_match_outscores_mismatch() {
let matched = compute_match_score(&["CS".to_string()], Some("Computer Science"), 1, 10);
let mismatched = compute_match_score(&["CS".to_string()], Some("History"), 1, 10);
assert!(
matched.score > mismatched.score,
"Department match ({}) should outscore mismatch ({})",
matched.score,
mismatched.score
);
}
#[test]
fn test_volume_scaling() {
let zero = compute_match_score(&[], None, 1, 0);
let many = compute_match_score(&[], None, 1, 100);
assert!(
many.breakdown.volume > zero.breakdown.volume,
"100 ratings ({}) should outscore 0 ratings ({})",
many.breakdown.volume,
zero.breakdown.volume
);
assert_eq!(zero.breakdown.volume, 0.0);
assert!(
many.breakdown.volume > 0.9,
"100 ratings should be near max"
);
}
}
+10 -2
View File
@@ -250,8 +250,16 @@ impl Scheduler {
crate::data::rmp::batch_upsert_rmp_professors(&professors, db_pool).await?;
info!(total, "RMP professors upserted");
let matched = crate::data::rmp::auto_match_instructors(db_pool).await?;
info!(total, matched, "RMP sync complete");
let stats = crate::data::rmp_matching::generate_candidates(db_pool).await?;
info!(
total,
stats.total_unmatched,
stats.candidates_created,
stats.auto_matched,
stats.skipped_unparseable,
stats.skipped_no_candidates,
"RMP sync complete"
);
Ok(())
}
+865
View File
@@ -0,0 +1,865 @@
//! Admin API handlers for RMP instructor matching management.
use axum::extract::{Path, Query, State};
use axum::http::StatusCode;
use axum::response::Json;
use serde::{Deserialize, Serialize};
use serde_json::{Value, json};
use ts_rs::TS;
use crate::state::AppState;
use crate::web::extractors::AdminUser;
// ---------------------------------------------------------------------------
// Query / body types
// ---------------------------------------------------------------------------
#[derive(Deserialize)]
pub struct ListInstructorsParams {
status: Option<String>,
search: Option<String>,
page: Option<i32>,
per_page: Option<i32>,
sort: Option<String>,
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct MatchBody {
rmp_legacy_id: i32,
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct RejectCandidateBody {
rmp_legacy_id: i32,
}
// ---------------------------------------------------------------------------
// Response types
// ---------------------------------------------------------------------------
/// Simple acknowledgement response for mutating operations.
#[derive(Debug, Clone, Serialize, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export)]
pub struct OkResponse {
pub ok: bool,
}
/// A top-candidate summary shown in the instructor list view.
#[derive(Debug, Clone, Serialize, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export)]
pub struct TopCandidateResponse {
pub rmp_legacy_id: i32,
pub score: Option<f32>,
#[ts(as = "Option<std::collections::HashMap<String, f32>>")]
pub score_breakdown: Option<serde_json::Value>,
pub first_name: Option<String>,
pub last_name: Option<String>,
pub department: Option<String>,
pub avg_rating: Option<f32>,
pub num_ratings: Option<i32>,
}
/// An instructor row in the paginated list.
#[derive(Debug, Clone, Serialize, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export)]
pub struct InstructorListItem {
pub id: i32,
pub display_name: String,
pub email: String,
pub rmp_match_status: String,
#[ts(as = "i32")]
pub rmp_link_count: i64,
#[ts(as = "i32")]
pub candidate_count: i64,
#[ts(as = "i32")]
pub course_subject_count: i64,
pub top_candidate: Option<TopCandidateResponse>,
}
/// Aggregate status counts for the instructor list.
#[derive(Debug, Clone, Serialize, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export)]
pub struct InstructorStats {
#[ts(as = "i32")]
pub total: i64,
#[ts(as = "i32")]
pub unmatched: i64,
#[ts(as = "i32")]
pub auto: i64,
#[ts(as = "i32")]
pub confirmed: i64,
#[ts(as = "i32")]
pub rejected: i64,
#[ts(as = "i32")]
pub with_candidates: i64,
}
/// Response for `GET /api/admin/instructors`.
#[derive(Debug, Clone, Serialize, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export)]
pub struct ListInstructorsResponse {
pub instructors: Vec<InstructorListItem>,
#[ts(as = "i32")]
pub total: i64,
pub page: i32,
pub per_page: i32,
pub stats: InstructorStats,
}
/// Instructor summary in the detail view.
#[derive(Debug, Clone, Serialize, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export)]
pub struct InstructorDetail {
pub id: i32,
pub display_name: String,
pub email: String,
pub rmp_match_status: String,
pub subjects_taught: Vec<String>,
#[ts(as = "i32")]
pub course_count: i64,
}
/// A linked RMP profile in the detail view.
#[derive(Debug, Clone, Serialize, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export)]
pub struct LinkedRmpProfile {
pub link_id: i32,
pub legacy_id: i32,
pub first_name: Option<String>,
pub last_name: Option<String>,
pub department: Option<String>,
pub avg_rating: Option<f32>,
pub avg_difficulty: Option<f32>,
pub num_ratings: Option<i32>,
pub would_take_again_pct: Option<f32>,
}
/// A match candidate in the detail view.
#[derive(Debug, Clone, Serialize, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export)]
pub struct CandidateResponse {
pub id: i32,
pub rmp_legacy_id: i32,
pub first_name: Option<String>,
pub last_name: Option<String>,
pub department: Option<String>,
pub avg_rating: Option<f32>,
pub avg_difficulty: Option<f32>,
pub num_ratings: Option<i32>,
pub would_take_again_pct: Option<f32>,
pub score: Option<f32>,
#[ts(as = "Option<std::collections::HashMap<String, f32>>")]
pub score_breakdown: Option<serde_json::Value>,
pub status: String,
}
/// Response for `GET /api/admin/instructors/{id}` and `POST .../match`.
#[derive(Debug, Clone, Serialize, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export)]
pub struct InstructorDetailResponse {
pub instructor: InstructorDetail,
pub current_matches: Vec<LinkedRmpProfile>,
pub candidates: Vec<CandidateResponse>,
}
/// Response for `POST /api/admin/rmp/rescore`.
#[derive(Debug, Clone, Serialize, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export)]
pub struct RescoreResponse {
pub total_unmatched: usize,
pub candidates_created: usize,
pub auto_matched: usize,
pub skipped_unparseable: usize,
pub skipped_no_candidates: usize,
}
// ---------------------------------------------------------------------------
// Helper: map sqlx errors to the standard admin error tuple
// ---------------------------------------------------------------------------
fn db_error(context: &str, e: sqlx::Error) -> (StatusCode, Json<Value>) {
tracing::error!(error = %e, "{context}");
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({"error": context})),
)
}
// ---------------------------------------------------------------------------
// Row types for SQL queries
// ---------------------------------------------------------------------------
#[derive(sqlx::FromRow)]
struct InstructorRow {
id: i32,
display_name: String,
email: String,
rmp_match_status: String,
rmp_link_count: Option<i64>,
top_candidate_rmp_id: Option<i32>,
top_candidate_score: Option<f32>,
top_candidate_breakdown: Option<serde_json::Value>,
tc_first_name: Option<String>,
tc_last_name: Option<String>,
tc_department: Option<String>,
tc_avg_rating: Option<f32>,
tc_num_ratings: Option<i32>,
candidate_count: Option<i64>,
course_subject_count: Option<i64>,
}
#[derive(sqlx::FromRow)]
struct StatusCount {
rmp_match_status: String,
count: i64,
}
#[derive(sqlx::FromRow)]
struct CandidateRow {
id: i32,
rmp_legacy_id: i32,
score: Option<f32>,
score_breakdown: Option<serde_json::Value>,
status: String,
first_name: Option<String>,
last_name: Option<String>,
department: Option<String>,
avg_rating: Option<f32>,
avg_difficulty: Option<f32>,
num_ratings: Option<i32>,
would_take_again_pct: Option<f32>,
}
#[derive(sqlx::FromRow)]
struct LinkedRmpProfileRow {
link_id: i32,
legacy_id: i32,
first_name: Option<String>,
last_name: Option<String>,
department: Option<String>,
avg_rating: Option<f32>,
avg_difficulty: Option<f32>,
num_ratings: Option<i32>,
would_take_again_pct: Option<f32>,
}
// ---------------------------------------------------------------------------
// 1. GET /api/admin/instructors — paginated list with filtering
// ---------------------------------------------------------------------------
/// `GET /api/admin/instructors` — List instructors with filtering and pagination.
pub async fn list_instructors(
AdminUser(_user): AdminUser,
State(state): State<AppState>,
Query(params): Query<ListInstructorsParams>,
) -> Result<Json<ListInstructorsResponse>, (StatusCode, Json<Value>)> {
let page = params.page.unwrap_or(1).max(1);
let per_page = params.per_page.unwrap_or(50).clamp(1, 100);
let offset = (page - 1) * per_page;
let sort_clause = match params.sort.as_deref() {
Some("name_asc") => "i.display_name ASC",
Some("name_desc") => "i.display_name DESC",
Some("status") => "i.rmp_match_status ASC, i.display_name ASC",
_ => "tc.score DESC NULLS LAST, i.display_name ASC",
};
// Build WHERE clause
let mut conditions = Vec::new();
let mut bind_idx = 0u32;
if params.status.is_some() {
bind_idx += 1;
conditions.push(format!("i.rmp_match_status = ${bind_idx}"));
}
if params.search.is_some() {
bind_idx += 1;
conditions.push(format!(
"(i.display_name ILIKE ${bind_idx} OR i.email ILIKE ${bind_idx})"
));
}
let where_clause = if conditions.is_empty() {
String::new()
} else {
format!("WHERE {}", conditions.join(" AND "))
};
let query_str = format!(
r#"
SELECT
i.id, i.display_name, i.email, i.rmp_match_status,
(SELECT COUNT(*) FROM instructor_rmp_links irl WHERE irl.instructor_id = i.id) as rmp_link_count,
tc.rmp_legacy_id as top_candidate_rmp_id,
tc.score as top_candidate_score,
tc.score_breakdown as top_candidate_breakdown,
rp.first_name as tc_first_name,
rp.last_name as tc_last_name,
rp.department as tc_department,
rp.avg_rating as tc_avg_rating,
rp.num_ratings as tc_num_ratings,
(SELECT COUNT(*) FROM rmp_match_candidates mc WHERE mc.instructor_id = i.id AND mc.status = 'pending') as candidate_count,
(SELECT COUNT(DISTINCT c.subject) FROM course_instructors ci JOIN courses c ON c.id = ci.course_id WHERE ci.instructor_id = i.id) as course_subject_count
FROM instructors i
LEFT JOIN LATERAL (
SELECT mc.rmp_legacy_id, mc.score, mc.score_breakdown
FROM rmp_match_candidates mc
WHERE mc.instructor_id = i.id AND mc.status = 'pending'
ORDER BY mc.score DESC
LIMIT 1
) tc ON true
LEFT JOIN rmp_professors rp ON rp.legacy_id = tc.rmp_legacy_id
{where_clause}
ORDER BY {sort_clause}
LIMIT {per_page} OFFSET {offset}
"#
);
// Build the query with dynamic binds
let mut query = sqlx::query_as::<_, InstructorRow>(&query_str);
if let Some(ref status) = params.status {
query = query.bind(status);
}
if let Some(ref search) = params.search {
query = query.bind(format!("%{search}%"));
}
let rows = query
.fetch_all(&state.db_pool)
.await
.map_err(|e| db_error("failed to list instructors", e))?;
// Count total with filters
let count_query_str = format!("SELECT COUNT(*) FROM instructors i {where_clause}");
let mut count_query = sqlx::query_as::<_, (i64,)>(&count_query_str);
if let Some(ref status) = params.status {
count_query = count_query.bind(status);
}
if let Some(ref search) = params.search {
count_query = count_query.bind(format!("%{search}%"));
}
let (total,) = count_query
.fetch_one(&state.db_pool)
.await
.map_err(|e| db_error("failed to count instructors", e))?;
// Aggregate stats (unfiltered)
let stats_rows = sqlx::query_as::<_, StatusCount>(
"SELECT rmp_match_status, COUNT(*) as count FROM instructors GROUP BY rmp_match_status",
)
.fetch_all(&state.db_pool)
.await
.map_err(|e| db_error("failed to get instructor stats", e))?;
// Count instructors with at least one candidate (for progress bar denominator)
let (with_candidates,): (i64,) =
sqlx::query_as("SELECT COUNT(DISTINCT instructor_id) FROM rmp_match_candidates")
.fetch_one(&state.db_pool)
.await
.map_err(|e| db_error("failed to count instructors with candidates", e))?;
let mut stats = InstructorStats {
total: 0,
unmatched: 0,
auto: 0,
confirmed: 0,
rejected: 0,
with_candidates,
};
for row in &stats_rows {
stats.total += row.count;
match row.rmp_match_status.as_str() {
"unmatched" => stats.unmatched = row.count,
"auto" => stats.auto = row.count,
"confirmed" => stats.confirmed = row.count,
"rejected" => stats.rejected = row.count,
_ => {}
}
}
let instructors: Vec<InstructorListItem> = rows
.iter()
.map(|r| {
let top_candidate = r.top_candidate_rmp_id.map(|rmp_id| TopCandidateResponse {
rmp_legacy_id: rmp_id,
score: r.top_candidate_score,
score_breakdown: r.top_candidate_breakdown.clone(),
first_name: r.tc_first_name.clone(),
last_name: r.tc_last_name.clone(),
department: r.tc_department.clone(),
avg_rating: r.tc_avg_rating,
num_ratings: r.tc_num_ratings,
});
InstructorListItem {
id: r.id,
display_name: r.display_name.clone(),
email: r.email.clone(),
rmp_match_status: r.rmp_match_status.clone(),
rmp_link_count: r.rmp_link_count.unwrap_or(0),
candidate_count: r.candidate_count.unwrap_or(0),
course_subject_count: r.course_subject_count.unwrap_or(0),
top_candidate,
}
})
.collect();
Ok(Json(ListInstructorsResponse {
instructors,
total,
page,
per_page,
stats,
}))
}
// ---------------------------------------------------------------------------
// 2. GET /api/admin/instructors/{id} — full detail
// ---------------------------------------------------------------------------
/// `GET /api/admin/instructors/{id}` — Full instructor detail with candidates.
pub async fn get_instructor(
AdminUser(_user): AdminUser,
State(state): State<AppState>,
Path(id): Path<i32>,
) -> Result<Json<InstructorDetailResponse>, (StatusCode, Json<Value>)> {
build_instructor_detail(&state, id).await
}
/// Shared helper that builds the full instructor detail response.
async fn build_instructor_detail(
state: &AppState,
id: i32,
) -> Result<Json<InstructorDetailResponse>, (StatusCode, Json<Value>)> {
// Fetch instructor
let instructor: Option<(i32, String, String, String)> = sqlx::query_as(
"SELECT id, display_name, email, rmp_match_status FROM instructors WHERE id = $1",
)
.bind(id)
.fetch_optional(&state.db_pool)
.await
.map_err(|e| db_error("failed to fetch instructor", e))?;
let (inst_id, display_name, email, rmp_match_status) = instructor.ok_or_else(|| {
(
StatusCode::NOT_FOUND,
Json(json!({"error": "instructor not found"})),
)
})?;
// Subjects taught
let subjects: Vec<(String,)> = sqlx::query_as(
"SELECT DISTINCT c.subject FROM course_instructors ci JOIN courses c ON c.id = ci.course_id WHERE ci.instructor_id = $1 ORDER BY c.subject",
)
.bind(inst_id)
.fetch_all(&state.db_pool)
.await
.map_err(|e| db_error("failed to fetch subjects", e))?;
// Course count
let (course_count,): (i64,) = sqlx::query_as(
"SELECT COUNT(DISTINCT ci.course_id) FROM course_instructors ci WHERE ci.instructor_id = $1",
)
.bind(inst_id)
.fetch_one(&state.db_pool)
.await
.map_err(|e| db_error("failed to count courses", e))?;
// Candidates with RMP professor info
let candidates = sqlx::query_as::<_, CandidateRow>(
r#"
SELECT mc.id, mc.rmp_legacy_id, mc.score, mc.score_breakdown, mc.status,
rp.first_name, rp.last_name, rp.department,
rp.avg_rating, rp.avg_difficulty, rp.num_ratings, rp.would_take_again_pct
FROM rmp_match_candidates mc
JOIN rmp_professors rp ON rp.legacy_id = mc.rmp_legacy_id
WHERE mc.instructor_id = $1
ORDER BY mc.score DESC
"#,
)
.bind(inst_id)
.fetch_all(&state.db_pool)
.await
.map_err(|e| db_error("failed to fetch candidates", e))?;
// Current matches (all linked RMP profiles)
let current_matches = sqlx::query_as::<_, LinkedRmpProfileRow>(
r#"
SELECT irl.id as link_id,
rp.legacy_id, rp.first_name, rp.last_name, rp.department,
rp.avg_rating, rp.avg_difficulty, rp.num_ratings, rp.would_take_again_pct
FROM instructor_rmp_links irl
JOIN rmp_professors rp ON rp.legacy_id = irl.rmp_legacy_id
WHERE irl.instructor_id = $1
ORDER BY rp.num_ratings DESC NULLS LAST
"#,
)
.bind(inst_id)
.fetch_all(&state.db_pool)
.await
.map_err(|e| db_error("failed to fetch linked rmp profiles", e))?;
let current_matches_resp: Vec<LinkedRmpProfile> = current_matches
.into_iter()
.map(|p| LinkedRmpProfile {
link_id: p.link_id,
legacy_id: p.legacy_id,
first_name: p.first_name,
last_name: p.last_name,
department: p.department,
avg_rating: p.avg_rating,
avg_difficulty: p.avg_difficulty,
num_ratings: p.num_ratings,
would_take_again_pct: p.would_take_again_pct,
})
.collect();
let candidates_resp: Vec<CandidateResponse> = candidates
.into_iter()
.map(|c| CandidateResponse {
id: c.id,
rmp_legacy_id: c.rmp_legacy_id,
first_name: c.first_name,
last_name: c.last_name,
department: c.department,
avg_rating: c.avg_rating,
avg_difficulty: c.avg_difficulty,
num_ratings: c.num_ratings,
would_take_again_pct: c.would_take_again_pct,
score: c.score,
score_breakdown: c.score_breakdown,
status: c.status,
})
.collect();
Ok(Json(InstructorDetailResponse {
instructor: InstructorDetail {
id: inst_id,
display_name,
email,
rmp_match_status,
subjects_taught: subjects.into_iter().map(|(s,)| s).collect(),
course_count,
},
current_matches: current_matches_resp,
candidates: candidates_resp,
}))
}
// ---------------------------------------------------------------------------
// 3. POST /api/admin/instructors/{id}/match — accept a candidate
// ---------------------------------------------------------------------------
/// `POST /api/admin/instructors/{id}/match` — Accept a candidate match.
pub async fn match_instructor(
AdminUser(user): AdminUser,
State(state): State<AppState>,
Path(id): Path<i32>,
Json(body): Json<MatchBody>,
) -> Result<Json<InstructorDetailResponse>, (StatusCode, Json<Value>)> {
// Verify the candidate exists and is pending
let candidate: Option<(i32,)> = sqlx::query_as(
"SELECT id FROM rmp_match_candidates WHERE instructor_id = $1 AND rmp_legacy_id = $2 AND status = 'pending'",
)
.bind(id)
.bind(body.rmp_legacy_id)
.fetch_optional(&state.db_pool)
.await
.map_err(|e| db_error("failed to check candidate", e))?;
if candidate.is_none() {
return Err((
StatusCode::NOT_FOUND,
Json(json!({"error": "pending candidate not found for this instructor"})),
));
}
// Check if this RMP profile is already linked to a different instructor
let conflict: Option<(i32,)> = sqlx::query_as(
"SELECT instructor_id FROM instructor_rmp_links WHERE rmp_legacy_id = $1 AND instructor_id != $2",
)
.bind(body.rmp_legacy_id)
.bind(id)
.fetch_optional(&state.db_pool)
.await
.map_err(|e| db_error("failed to check rmp uniqueness", e))?;
if let Some((other_id,)) = conflict {
return Err((
StatusCode::CONFLICT,
Json(json!({
"error": "RMP profile already linked to another instructor",
"conflictingInstructorId": other_id,
})),
));
}
let mut tx = state
.db_pool
.begin()
.await
.map_err(|e| db_error("failed to begin transaction", e))?;
// Insert link into instructor_rmp_links
sqlx::query(
"INSERT INTO instructor_rmp_links (instructor_id, rmp_legacy_id, created_by, source) VALUES ($1, $2, $3, 'manual') ON CONFLICT (rmp_legacy_id) DO NOTHING",
)
.bind(id)
.bind(body.rmp_legacy_id)
.bind(user.discord_id)
.execute(&mut *tx)
.await
.map_err(|e| db_error("failed to insert rmp link", e))?;
// Update instructor match status
sqlx::query("UPDATE instructors SET rmp_match_status = 'confirmed' WHERE id = $1")
.bind(id)
.execute(&mut *tx)
.await
.map_err(|e| db_error("failed to update instructor match status", e))?;
// Accept the candidate
sqlx::query(
"UPDATE rmp_match_candidates SET status = 'accepted', resolved_at = NOW(), resolved_by = $1 WHERE instructor_id = $2 AND rmp_legacy_id = $3",
)
.bind(user.discord_id)
.bind(id)
.bind(body.rmp_legacy_id)
.execute(&mut *tx)
.await
.map_err(|e| db_error("failed to accept candidate", e))?;
tx.commit()
.await
.map_err(|e| db_error("failed to commit transaction", e))?;
build_instructor_detail(&state, id).await
}
// ---------------------------------------------------------------------------
// 4. POST /api/admin/instructors/{id}/reject-candidate — reject one candidate
// ---------------------------------------------------------------------------
/// `POST /api/admin/instructors/{id}/reject-candidate` — Reject a single candidate.
pub async fn reject_candidate(
AdminUser(user): AdminUser,
State(state): State<AppState>,
Path(id): Path<i32>,
Json(body): Json<RejectCandidateBody>,
) -> Result<Json<OkResponse>, (StatusCode, Json<Value>)> {
let result = sqlx::query(
"UPDATE rmp_match_candidates SET status = 'rejected', resolved_at = NOW(), resolved_by = $1 WHERE instructor_id = $2 AND rmp_legacy_id = $3 AND status = 'pending'",
)
.bind(user.discord_id)
.bind(id)
.bind(body.rmp_legacy_id)
.execute(&state.db_pool)
.await
.map_err(|e| db_error("failed to reject candidate", e))?;
if result.rows_affected() == 0 {
return Err((
StatusCode::NOT_FOUND,
Json(json!({"error": "pending candidate not found"})),
));
}
Ok(Json(OkResponse { ok: true }))
}
// ---------------------------------------------------------------------------
// 5. POST /api/admin/instructors/{id}/reject-all — no valid match
// ---------------------------------------------------------------------------
/// `POST /api/admin/instructors/{id}/reject-all` — Mark instructor as having no valid RMP match.
pub async fn reject_all(
AdminUser(user): AdminUser,
State(state): State<AppState>,
Path(id): Path<i32>,
) -> Result<Json<OkResponse>, (StatusCode, Json<Value>)> {
let mut tx = state
.db_pool
.begin()
.await
.map_err(|e| db_error("failed to begin transaction", e))?;
// Check current status — cannot reject an instructor with confirmed matches
let current_status: Option<(String,)> =
sqlx::query_as("SELECT rmp_match_status FROM instructors WHERE id = $1")
.bind(id)
.fetch_optional(&mut *tx)
.await
.map_err(|e| db_error("failed to fetch instructor status", e))?;
let (status,) = current_status.ok_or_else(|| {
(
StatusCode::NOT_FOUND,
Json(json!({"error": "instructor not found"})),
)
})?;
if status == "confirmed" {
return Err((
StatusCode::CONFLICT,
Json(
json!({"error": "cannot reject instructor with confirmed matches — unmatch first"}),
),
));
}
// Update instructor status
sqlx::query("UPDATE instructors SET rmp_match_status = 'rejected' WHERE id = $1")
.bind(id)
.execute(&mut *tx)
.await
.map_err(|e| db_error("failed to update instructor status", e))?;
// Reject all pending candidates
sqlx::query(
"UPDATE rmp_match_candidates SET status = 'rejected', resolved_at = NOW(), resolved_by = $1 WHERE instructor_id = $2 AND status = 'pending'",
)
.bind(user.discord_id)
.bind(id)
.execute(&mut *tx)
.await
.map_err(|e| db_error("failed to reject candidates", e))?;
tx.commit()
.await
.map_err(|e| db_error("failed to commit transaction", e))?;
Ok(Json(OkResponse { ok: true }))
}
// ---------------------------------------------------------------------------
// 6. POST /api/admin/instructors/{id}/unmatch — remove current match
// ---------------------------------------------------------------------------
/// Body for unmatch — optional `rmpLegacyId` to remove a specific link.
/// If omitted (or null), all links are removed.
#[derive(Deserialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct UnmatchBody {
rmp_legacy_id: Option<i32>,
}
/// `POST /api/admin/instructors/{id}/unmatch` — Remove RMP link(s).
///
/// Send `{ "rmpLegacyId": N }` to remove a specific link, or an empty body / `{}`
/// to remove all links for the instructor.
pub async fn unmatch_instructor(
AdminUser(_user): AdminUser,
State(state): State<AppState>,
Path(id): Path<i32>,
body: Option<Json<UnmatchBody>>,
) -> Result<Json<OkResponse>, (StatusCode, Json<Value>)> {
let rmp_legacy_id = body.and_then(|b| b.rmp_legacy_id);
let mut tx = state
.db_pool
.begin()
.await
.map_err(|e| db_error("failed to begin transaction", e))?;
// Verify instructor exists
let exists: Option<(i32,)> = sqlx::query_as("SELECT id FROM instructors WHERE id = $1")
.bind(id)
.fetch_optional(&mut *tx)
.await
.map_err(|e| db_error("failed to check instructor", e))?;
if exists.is_none() {
return Err((
StatusCode::NOT_FOUND,
Json(json!({"error": "instructor not found"})),
));
}
// Delete specific link or all links
if let Some(legacy_id) = rmp_legacy_id {
let result = sqlx::query(
"DELETE FROM instructor_rmp_links WHERE instructor_id = $1 AND rmp_legacy_id = $2",
)
.bind(id)
.bind(legacy_id)
.execute(&mut *tx)
.await
.map_err(|e| db_error("failed to remove rmp link", e))?;
if result.rows_affected() == 0 {
return Err((
StatusCode::NOT_FOUND,
Json(json!({"error": "link not found for this instructor"})),
));
}
} else {
sqlx::query("DELETE FROM instructor_rmp_links WHERE instructor_id = $1")
.bind(id)
.execute(&mut *tx)
.await
.map_err(|e| db_error("failed to remove rmp links", e))?;
}
// Check if any links remain; update status accordingly
let (remaining,): (i64,) =
sqlx::query_as("SELECT COUNT(*) FROM instructor_rmp_links WHERE instructor_id = $1")
.bind(id)
.fetch_one(&mut *tx)
.await
.map_err(|e| db_error("failed to count remaining links", e))?;
if remaining == 0 {
sqlx::query("UPDATE instructors SET rmp_match_status = 'unmatched' WHERE id = $1")
.bind(id)
.execute(&mut *tx)
.await
.map_err(|e| db_error("failed to update instructor status", e))?;
}
tx.commit()
.await
.map_err(|e| db_error("failed to commit transaction", e))?;
Ok(Json(OkResponse { ok: true }))
}
// ---------------------------------------------------------------------------
// 7. POST /api/admin/rmp/rescore — re-run candidate generation
// ---------------------------------------------------------------------------
/// `POST /api/admin/rmp/rescore` — Re-run RMP candidate generation.
pub async fn rescore(
AdminUser(_user): AdminUser,
State(state): State<AppState>,
) -> Result<Json<RescoreResponse>, (StatusCode, Json<Value>)> {
let stats = crate::data::rmp_matching::generate_candidates(&state.db_pool)
.await
.map_err(|e| {
tracing::error!(error = %e, "failed to run candidate generation");
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({"error": "candidate generation failed"})),
)
})?;
Ok(Json(RescoreResponse {
total_unmatched: stats.total_unmatched,
candidates_created: stats.candidates_created,
auto_matched: stats.auto_matched,
skipped_unparseable: stats.skipped_unparseable,
skipped_no_candidates: stats.skipped_no_candidates,
}))
}
+1
View File
@@ -1,6 +1,7 @@
//! Web API module for the banner application.
pub mod admin;
pub mod admin_rmp;
#[cfg(feature = "embed-assets")]
pub mod assets;
pub mod auth;
+23 -1
View File
@@ -10,6 +10,7 @@ use axum::{
};
use crate::web::admin;
use crate::web::admin_rmp;
use crate::web::auth::{self, AuthConfig};
use crate::web::ws;
#[cfg(feature = "embed-assets")]
@@ -66,6 +67,25 @@ pub fn create_router(app_state: AppState, auth_config: AuthConfig) -> Router {
.route("/admin/scrape-jobs", get(admin::list_scrape_jobs))
.route("/admin/scrape-jobs/ws", get(ws::scrape_jobs_ws))
.route("/admin/audit-log", get(admin::list_audit_log))
.route("/admin/instructors", get(admin_rmp::list_instructors))
.route("/admin/instructors/{id}", get(admin_rmp::get_instructor))
.route(
"/admin/instructors/{id}/match",
post(admin_rmp::match_instructor),
)
.route(
"/admin/instructors/{id}/reject-candidate",
post(admin_rmp::reject_candidate),
)
.route(
"/admin/instructors/{id}/reject-all",
post(admin_rmp::reject_all),
)
.route(
"/admin/instructors/{id}/unmatch",
post(admin_rmp::unmatch_instructor),
)
.route("/admin/rmp/rescore", post(admin_rmp::rescore))
.with_state(app_state);
let mut router = Router::new()
@@ -435,9 +455,10 @@ pub struct CourseResponse {
#[serde(rename_all = "camelCase")]
#[ts(export)]
pub struct InstructorResponse {
instructor_id: i32,
banner_id: String,
display_name: String,
email: Option<String>,
email: String,
is_primary: bool,
rmp_rating: Option<f32>,
rmp_num_ratings: Option<i32>,
@@ -470,6 +491,7 @@ fn build_course_response(
let instructors = instructors
.into_iter()
.map(|i| InstructorResponse {
instructor_id: i.instructor_id,
banner_id: i.banner_id,
display_name: i.display_name,
email: i.email,