feat: add confidence-based RMP matching with manual review workflow

Replace simple auto-matching with scored candidate generation that considers department overlap, name uniqueness, and rating volume. Candidates above 0.85 auto-accept; others require admin approval.
2026-01-31 06:23:37 -06:00 · 2026-01-30 01:31:11 -06:00
parent 39ba131322
commit 203c337cf0
19 changed files with 2428 additions and 175 deletions
@@ -392,11 +392,11 @@ pub async fn batch_upsert_courses(courses: &[Course], db_pool: &PgPool) -> Resul
    insert_audits(&audits, &mut tx).await?;
    insert_metrics(&metrics, &mut tx).await?;

-    // Step 5: Upsert instructors (deduplicated across batch)
-    upsert_instructors(courses, &mut tx).await?;
+    // Step 5: Upsert instructors (returns email -> id map)
+    let email_to_id = upsert_instructors(courses, &mut tx).await?;

    // Step 6: Link courses to instructors via junction table
-    upsert_course_instructors(courses, &course_ids, &mut tx).await?;
+    upsert_course_instructors(courses, &course_ids, &email_to_id, &mut tx).await?;

    tx.commit().await?;

@@ -596,62 +596,85 @@ async fn upsert_courses(courses: &[Course], conn: &mut PgConnection) -> Result<V
    Ok(rows)
 }

-/// Deduplicate and upsert all instructors from the batch.
-async fn upsert_instructors(courses: &[Course], conn: &mut PgConnection) -> Result<()> {
+/// Deduplicate and upsert all instructors from the batch by email.
+/// Returns a map of lowercased_email -> instructor id for junction linking.
+async fn upsert_instructors(
+    courses: &[Course],
+    conn: &mut PgConnection,
+) -> Result<HashMap<String, i32>> {
    let mut seen = HashSet::new();
-    let mut banner_ids = Vec::new();
-    let mut display_names = Vec::new();
-    let mut emails: Vec<Option<&str>> = Vec::new();
+    let mut display_names: Vec<&str> = Vec::new();
+    let mut emails_lower: Vec<String> = Vec::new();
+    let mut skipped_no_email = 0u32;

    for course in courses {
        for faculty in &course.faculty {
-            if seen.insert(faculty.banner_id.as_str()) {
-                banner_ids.push(faculty.banner_id.as_str());
-                display_names.push(faculty.display_name.as_str());
-                emails.push(faculty.email_address.as_deref());
+            if let Some(email) = &faculty.email_address {
+                let email_lower = email.to_lowercase();
+                if seen.insert(email_lower.clone()) {
+                    display_names.push(faculty.display_name.as_str());
+                    emails_lower.push(email_lower);
+                }
+            } else {
+                skipped_no_email += 1;
            }
        }
    }

-    if banner_ids.is_empty() {
-        return Ok(());
+    if skipped_no_email > 0 {
+        tracing::warn!(
+            count = skipped_no_email,
+            "Skipped instructors with no email address"
+        );
    }

-    sqlx::query(
+    if display_names.is_empty() {
+        return Ok(HashMap::new());
+    }
+
+    let email_refs: Vec<&str> = emails_lower.iter().map(|s| s.as_str()).collect();
+
+    let rows: Vec<(i32, String)> = sqlx::query_as(
        r#"
-        INSERT INTO instructors (banner_id, display_name, email)
-        SELECT * FROM UNNEST($1::text[], $2::text[], $3::text[])
-        ON CONFLICT (banner_id)
-        DO UPDATE SET
-            display_name = EXCLUDED.display_name,
-            email = COALESCE(EXCLUDED.email, instructors.email)
+        INSERT INTO instructors (display_name, email)
+        SELECT * FROM UNNEST($1::text[], $2::text[])
+        ON CONFLICT (email)
+        DO UPDATE SET display_name = EXCLUDED.display_name
+        RETURNING id, email
        "#,
    )
-    .bind(&banner_ids)
    .bind(&display_names)
-    .bind(&emails)
-    .execute(&mut *conn)
+    .bind(&email_refs)
+    .fetch_all(&mut *conn)
    .await
    .map_err(|e| anyhow::anyhow!("Failed to batch upsert instructors: {}", e))?;

-    Ok(())
+    Ok(rows.into_iter().map(|(id, email)| (email, id)).collect())
 }

 /// Link courses to their instructors via the junction table.
 async fn upsert_course_instructors(
    courses: &[Course],
    course_ids: &[i32],
+    email_to_id: &HashMap<String, i32>,
    conn: &mut PgConnection,
 ) -> Result<()> {
    let mut cids = Vec::new();
-    let mut iids = Vec::new();
+    let mut instructor_ids: Vec<i32> = Vec::new();
+    let mut banner_ids: Vec<&str> = Vec::new();
    let mut primaries = Vec::new();

    for (course, &course_id) in courses.iter().zip(course_ids) {
        for faculty in &course.faculty {
-            cids.push(course_id);
-            iids.push(faculty.banner_id.as_str());
-            primaries.push(faculty.primary_indicator);
+            if let Some(email) = &faculty.email_address {
+                let email_lower = email.to_lowercase();
+                if let Some(&instructor_id) = email_to_id.get(&email_lower) {
+                    cids.push(course_id);
+                    instructor_ids.push(instructor_id);
+                    banner_ids.push(faculty.banner_id.as_str());
+                    primaries.push(faculty.primary_indicator);
+                }
+            }
        }
    }

@@ -668,14 +691,17 @@ async fn upsert_course_instructors(

    sqlx::query(
        r#"
-        INSERT INTO course_instructors (course_id, instructor_id, is_primary)
-        SELECT * FROM UNNEST($1::int4[], $2::text[], $3::bool[])
+        INSERT INTO course_instructors (course_id, instructor_id, banner_id, is_primary)
+        SELECT * FROM UNNEST($1::int4[], $2::int4[], $3::text[], $4::bool[])
        ON CONFLICT (course_id, instructor_id)
-        DO UPDATE SET is_primary = EXCLUDED.is_primary
+        DO UPDATE SET
+            banner_id = EXCLUDED.banner_id,
+            is_primary = EXCLUDED.is_primary
        "#,
    )
    .bind(&cids)
-    .bind(&iids)
+    .bind(&instructor_ids)
+    .bind(&banner_ids)
    .bind(&primaries)
    .execute(&mut *conn)
    .await
@@ -55,7 +55,7 @@ fn sort_clause(column: Option<SortColumn>, direction: Option<SortDirection>) ->
        Some(SortColumn::Instructor) => {
            format!(
                "(SELECT i.display_name FROM course_instructors ci \
-                 JOIN instructors i ON i.banner_id = ci.instructor_id \
+                 JOIN instructors i ON i.id = ci.instructor_id \
                 WHERE ci.course_id = courses.id AND ci.is_primary = true \
                 LIMIT 1) {dir} NULLS LAST"
            )
@@ -147,12 +147,19 @@ pub async fn get_course_instructors(
 ) -> Result<Vec<CourseInstructorDetail>> {
    let rows = sqlx::query_as::<_, CourseInstructorDetail>(
        r#"
-        SELECT i.banner_id, i.display_name, i.email, ci.is_primary,
-               rp.avg_rating, rp.num_ratings, i.rmp_legacy_id,
+        SELECT i.id as instructor_id, ci.banner_id, i.display_name, i.email, ci.is_primary,
+               rmp.avg_rating, rmp.num_ratings, rmp.rmp_legacy_id,
               ci.course_id
        FROM course_instructors ci
-        JOIN instructors i ON i.banner_id = ci.instructor_id
-        LEFT JOIN rmp_professors rp ON rp.legacy_id = i.rmp_legacy_id
+        JOIN instructors i ON i.id = ci.instructor_id
+        LEFT JOIN LATERAL (
+            SELECT rp.avg_rating, rp.num_ratings, rp.legacy_id as rmp_legacy_id
+            FROM instructor_rmp_links irl
+            JOIN rmp_professors rp ON rp.legacy_id = irl.rmp_legacy_id
+            WHERE irl.instructor_id = i.id
+            ORDER BY rp.num_ratings DESC NULLS LAST, rp.legacy_id ASC
+            LIMIT 1
+        ) rmp ON true
        WHERE ci.course_id = $1
        ORDER BY ci.is_primary DESC, i.display_name
        "#,
@@ -176,12 +183,19 @@ pub async fn get_instructors_for_courses(

    let rows = sqlx::query_as::<_, CourseInstructorDetail>(
        r#"
-        SELECT i.banner_id, i.display_name, i.email, ci.is_primary,
-               rp.avg_rating, rp.num_ratings, i.rmp_legacy_id,
+        SELECT i.id as instructor_id, ci.banner_id, i.display_name, i.email, ci.is_primary,
+               rmp.avg_rating, rmp.num_ratings, rmp.rmp_legacy_id,
               ci.course_id
        FROM course_instructors ci
-        JOIN instructors i ON i.banner_id = ci.instructor_id
-        LEFT JOIN rmp_professors rp ON rp.legacy_id = i.rmp_legacy_id
+        JOIN instructors i ON i.id = ci.instructor_id
+        LEFT JOIN LATERAL (
+            SELECT rp.avg_rating, rp.num_ratings, rp.legacy_id as rmp_legacy_id
+            FROM instructor_rmp_links irl
+            JOIN rmp_professors rp ON rp.legacy_id = irl.rmp_legacy_id
+            WHERE irl.instructor_id = i.id
+            ORDER BY rp.num_ratings DESC NULLS LAST, rp.legacy_id ASC
+            LIMIT 1
+        ) rmp ON true
        WHERE ci.course_id = ANY($1)
        ORDER BY ci.course_id, ci.is_primary DESC, i.display_name
        "#,
@@ -5,6 +5,7 @@ pub mod courses;
 pub mod models;
 pub mod reference;
 pub mod rmp;
+pub mod rmp_matching;
 pub mod scrape_jobs;
 pub mod sessions;
 pub mod users;
@@ -99,25 +99,28 @@ pub struct Course {
 #[allow(dead_code)]
 #[derive(sqlx::FromRow, Debug, Clone)]
 pub struct Instructor {
-    pub banner_id: String,
+    pub id: i32,
    pub display_name: String,
-    pub email: Option<String>,
+    pub email: String,
+    pub rmp_match_status: String,
 }

 #[allow(dead_code)]
 #[derive(sqlx::FromRow, Debug, Clone)]
 pub struct CourseInstructor {
    pub course_id: i32,
-    pub instructor_id: String,
+    pub instructor_id: i32,
+    pub banner_id: String,
    pub is_primary: bool,
 }

 /// Joined instructor data for a course (from course_instructors + instructors + rmp_professors).
 #[derive(sqlx::FromRow, Debug, Clone)]
 pub struct CourseInstructorDetail {
+    pub instructor_id: i32,
    pub banner_id: String,
    pub display_name: String,
-    pub email: Option<String>,
+    pub email: String,
    pub is_primary: bool,
    pub avg_rating: Option<f32>,
    pub num_ratings: Option<i32>,
@@ -3,8 +3,7 @@
 use crate::error::Result;
 use crate::rmp::RmpProfessor;
 use sqlx::PgPool;
-use std::collections::{HashMap, HashSet};
-use tracing::{debug, info, warn};
+use std::collections::HashSet;

 /// Bulk upsert RMP professors using the UNNEST pattern.
 ///
@@ -93,14 +92,14 @@ pub async fn batch_upsert_rmp_professors(
 }

 /// Normalize a name for matching: lowercase, trim, strip trailing periods.
-fn normalize(s: &str) -> String {
+pub(crate) fn normalize(s: &str) -> String {
    s.trim().to_lowercase().trim_end_matches('.').to_string()
 }

 /// Parse Banner's "Last, First Middle" display name into (last, first) tokens.
 ///
 /// Returns `None` if the format is unparseable (no comma, empty parts).
-fn parse_display_name(display_name: &str) -> Option<(String, String)> {
+pub(crate) fn parse_display_name(display_name: &str) -> Option<(String, String)> {
    let (last_part, first_part) = display_name.split_once(',')?;
    let last = normalize(last_part);
    // Take only the first token of the first-name portion to drop middle names/initials.
@@ -111,128 +110,27 @@ fn parse_display_name(display_name: &str) -> Option<(String, String)> {
    Some((last, first))
 }

-/// Auto-match instructors to RMP professors by normalized name.
+/// Retrieve RMP rating data for an instructor by instructor id.
 ///
-/// Loads all pending instructors and all RMP professors, then matches in Rust
-/// using normalized name comparison. Only assigns a match when exactly one RMP
-/// professor matches a given instructor.
-pub async fn auto_match_instructors(db_pool: &PgPool) -> Result<u64> {
-    // Load pending instructors
-    let instructors: Vec<(String, String)> = sqlx::query_as(
-        "SELECT banner_id, display_name FROM instructors WHERE rmp_match_status = 'pending'",
-    )
-    .fetch_all(db_pool)
-    .await?;
-
-    if instructors.is_empty() {
-        info!(matched = 0, "No pending instructors to match");
-        return Ok(0);
-    }
-
-    // Load all RMP professors
-    let professors: Vec<(i32, String, String)> =
-        sqlx::query_as("SELECT legacy_id, first_name, last_name FROM rmp_professors")
-            .fetch_all(db_pool)
-            .await?;
-
-    // Build a lookup: (normalized_last, normalized_first) -> list of legacy_ids
-    let mut rmp_index: HashMap<(String, String), Vec<i32>> = HashMap::new();
-    for (legacy_id, first, last) in &professors {
-        let key = (normalize(last), normalize(first));
-        rmp_index.entry(key).or_default().push(*legacy_id);
-    }
-
-    // Match each instructor
-    let mut matches: Vec<(i32, String)> = Vec::new(); // (legacy_id, banner_id)
-    let mut no_comma = 0u64;
-    let mut no_match = 0u64;
-    let mut ambiguous = 0u64;
-
-    for (banner_id, display_name) in &instructors {
-        let Some((last, first)) = parse_display_name(display_name) else {
-            no_comma += 1;
-            continue;
-        };
-
-        let key = (last, first);
-        match rmp_index.get(&key) {
-            Some(ids) if ids.len() == 1 => {
-                matches.push((ids[0], banner_id.clone()));
-            }
-            Some(ids) => {
-                ambiguous += 1;
-                debug!(
-                    banner_id,
-                    display_name,
-                    candidates = ids.len(),
-                    "Ambiguous RMP match, skipping"
-                );
-            }
-            None => {
-                no_match += 1;
-            }
-        }
-    }
-
-    if no_comma > 0 || ambiguous > 0 {
-        warn!(
-            total_pending = instructors.len(),
-            no_comma,
-            no_match,
-            ambiguous,
-            matched = matches.len(),
-            "RMP matching diagnostics"
-        );
-    }
-
-    // Batch update matches
-    if matches.is_empty() {
-        info!(matched = 0, "Auto-matched instructors to RMP professors");
-        return Ok(0);
-    }
-
-    let legacy_ids: Vec<i32> = matches.iter().map(|(id, _)| *id).collect();
-    let banner_ids: Vec<&str> = matches.iter().map(|(_, bid)| bid.as_str()).collect();
-
-    let result = sqlx::query(
-        r#"
-        UPDATE instructors i
-        SET
-            rmp_legacy_id = m.legacy_id,
-            rmp_match_status = 'auto'
-        FROM UNNEST($1::int4[], $2::text[]) AS m(legacy_id, banner_id)
-        WHERE i.banner_id = m.banner_id
-        "#,
-    )
-    .bind(&legacy_ids)
-    .bind(&banner_ids)
-    .execute(db_pool)
-    .await
-    .map_err(|e| anyhow::anyhow!("Failed to update instructor RMP matches: {}", e))?;
-
-    let matched = result.rows_affected();
-    info!(matched, "Auto-matched instructors to RMP professors");
-    Ok(matched)
-}
-
-/// Retrieve RMP rating data for an instructor by banner_id.
-///
-/// Returns `(avg_rating, num_ratings)` if the instructor has an RMP match.
+/// Returns `(avg_rating, num_ratings)` for the best linked RMP profile
+/// (most ratings). Returns `None` if no link exists.
 #[allow(dead_code)]
 pub async fn get_instructor_rmp_data(
    db_pool: &PgPool,
-    banner_id: &str,
+    instructor_id: i32,
 ) -> Result<Option<(f32, i32)>> {
    let row: Option<(f32, i32)> = sqlx::query_as(
        r#"
        SELECT rp.avg_rating, rp.num_ratings
-        FROM instructors i
-        JOIN rmp_professors rp ON rp.legacy_id = i.rmp_legacy_id
-        WHERE i.banner_id = $1
+        FROM instructor_rmp_links irl
+        JOIN rmp_professors rp ON rp.legacy_id = irl.rmp_legacy_id
+        WHERE irl.instructor_id = $1
          AND rp.avg_rating IS NOT NULL
+        ORDER BY rp.num_ratings DESC NULLS LAST
+        LIMIT 1
        "#,
    )
-    .bind(banner_id)
+    .bind(instructor_id)
    .fetch_optional(db_pool)
    .await?;
    Ok(row)
@@ -0,0 +1,513 @@
+//! Confidence scoring and candidate generation for RMP instructor matching.
+
+use crate::data::rmp::{normalize, parse_display_name};
+use crate::error::Result;
+use serde::{Deserialize, Serialize};
+use sqlx::PgPool;
+use std::collections::{HashMap, HashSet};
+use tracing::{debug, info};
+
+// ---------------------------------------------------------------------------
+// Scoring types
+// ---------------------------------------------------------------------------
+
+/// Breakdown of individual scoring signals.
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct ScoreBreakdown {
+    pub department: f32,
+    pub uniqueness: f32,
+    pub volume: f32,
+}
+
+/// Result of scoring a single instructor–RMP candidate pair.
+#[derive(Debug, Clone)]
+pub struct MatchScore {
+    pub score: f32,
+    pub breakdown: ScoreBreakdown,
+}
+
+// ---------------------------------------------------------------------------
+// Thresholds
+// ---------------------------------------------------------------------------
+
+/// Minimum composite score to store a candidate row.
+const MIN_CANDIDATE_THRESHOLD: f32 = 0.40;
+
+/// Score at or above which a candidate is auto-accepted.
+const AUTO_ACCEPT_THRESHOLD: f32 = 0.85;
+
+// ---------------------------------------------------------------------------
+// Weights
+// ---------------------------------------------------------------------------
+
+const WEIGHT_DEPARTMENT: f32 = 0.50;
+const WEIGHT_UNIQUENESS: f32 = 0.30;
+const WEIGHT_VOLUME: f32 = 0.20;
+
+// ---------------------------------------------------------------------------
+// Pure scoring functions
+// ---------------------------------------------------------------------------
+
+/// Check if an instructor's subjects overlap with an RMP department.
+///
+/// Returns `1.0` for a match, `0.2` for a mismatch, `0.5` when the RMP
+/// department is unknown.
+fn department_similarity(subjects: &[String], rmp_department: Option<&str>) -> f32 {
+    let Some(dept) = rmp_department else {
+        return 0.5;
+    };
+    let dept_lower = dept.to_lowercase();
+
+    // Quick check: does any subject appear directly in the department string
+    // or vice-versa?
+    for subj in subjects {
+        let subj_lower = subj.to_lowercase();
+        if dept_lower.contains(&subj_lower) || subj_lower.contains(&dept_lower) {
+            return 1.0;
+        }
+
+        // Handle common UTSA abbreviation mappings.
+        if matches_known_abbreviation(&subj_lower, &dept_lower) {
+            return 1.0;
+        }
+    }
+
+    0.2
+}
+
+/// Expand common subject abbreviations used at UTSA and check for overlap.
+fn matches_known_abbreviation(subject: &str, department: &str) -> bool {
+    const MAPPINGS: &[(&str, &[&str])] = &[
+        ("cs", &["computer science"]),
+        ("ece", &["electrical", "computer engineering"]),
+        ("ee", &["electrical engineering", "electrical"]),
+        ("me", &["mechanical engineering", "mechanical"]),
+        ("ce", &["civil engineering", "civil"]),
+        ("bio", &["biology", "biological"]),
+        ("chem", &["chemistry"]),
+        ("phys", &["physics"]),
+        ("math", &["mathematics"]),
+        ("sta", &["statistics"]),
+        ("eng", &["english"]),
+        ("his", &["history"]),
+        ("pol", &["political science"]),
+        ("psy", &["psychology"]),
+        ("soc", &["sociology"]),
+        ("mus", &["music"]),
+        ("art", &["art"]),
+        ("phi", &["philosophy"]),
+        ("eco", &["economics"]),
+        ("acc", &["accounting"]),
+        ("fin", &["finance"]),
+        ("mgt", &["management"]),
+        ("mkt", &["marketing"]),
+        ("is", &["information systems"]),
+        ("ms", &["management science"]),
+        ("kin", &["kinesiology"]),
+        ("com", &["communication"]),
+    ];
+
+    for &(abbr, expansions) in MAPPINGS {
+        if subject == abbr {
+            return expansions
+                .iter()
+                .any(|expansion| department.contains(expansion));
+        }
+    }
+    false
+}
+
+/// Compute match confidence score (0.0–1.0) for an instructor–RMP pair.
+///
+/// Name matching is handled by the caller via pre-filtering on exact
+/// normalized `(last, first)`, so only department, uniqueness, and volume
+/// signals are scored here.
+pub fn compute_match_score(
+    instructor_subjects: &[String],
+    rmp_department: Option<&str>,
+    candidate_count: usize,
+    rmp_num_ratings: i32,
+) -> MatchScore {
+    // --- Department (0.50) ---
+    let dept_score = department_similarity(instructor_subjects, rmp_department);
+
+    // --- Uniqueness (0.30) ---
+    let uniqueness_score = match candidate_count {
+        0 | 1 => 1.0,
+        2 => 0.5,
+        _ => 0.2,
+    };
+
+    // --- Volume (0.20) ---
+    let volume_score = ((rmp_num_ratings as f32).ln_1p() / 5.0_f32.ln_1p()).clamp(0.0, 1.0);
+
+    let composite = dept_score * WEIGHT_DEPARTMENT
+        + uniqueness_score * WEIGHT_UNIQUENESS
+        + volume_score * WEIGHT_VOLUME;
+
+    MatchScore {
+        score: composite,
+        breakdown: ScoreBreakdown {
+            department: dept_score,
+            uniqueness: uniqueness_score,
+            volume: volume_score,
+        },
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Candidate generation (DB)
+// ---------------------------------------------------------------------------
+
+/// Statistics returned from candidate generation.
+#[derive(Debug)]
+pub struct MatchingStats {
+    pub total_unmatched: usize,
+    pub candidates_created: usize,
+    pub auto_matched: usize,
+    pub skipped_unparseable: usize,
+    pub skipped_no_candidates: usize,
+}
+
+/// Lightweight row for building the in-memory RMP name index.
+struct RmpProfForMatching {
+    legacy_id: i32,
+    department: Option<String>,
+    num_ratings: i32,
+}
+
+/// Generate match candidates for all unmatched instructors.
+///
+/// For each unmatched instructor:
+/// 1. Parse `display_name` into (last, first).
+/// 2. Find RMP professors with matching normalized name.
+/// 3. Score each candidate.
+/// 4. Store candidates scoring above [`MIN_CANDIDATE_THRESHOLD`].
+/// 5. Auto-accept if the top candidate scores ≥ [`AUTO_ACCEPT_THRESHOLD`]
+///    and no existing rejected candidate exists for that pair.
+///
+/// Already-evaluated instructor–RMP pairs (any status) are skipped.
+pub async fn generate_candidates(db_pool: &PgPool) -> Result<MatchingStats> {
+    // 1. Load unmatched instructors
+    let instructors: Vec<(i32, String)> = sqlx::query_as(
+        "SELECT id, display_name FROM instructors WHERE rmp_match_status = 'unmatched'",
+    )
+    .fetch_all(db_pool)
+    .await?;
+
+    if instructors.is_empty() {
+        info!("No unmatched instructors to generate candidates for");
+        return Ok(MatchingStats {
+            total_unmatched: 0,
+            candidates_created: 0,
+            auto_matched: 0,
+            skipped_unparseable: 0,
+            skipped_no_candidates: 0,
+        });
+    }
+
+    let instructor_ids: Vec<i32> = instructors.iter().map(|(id, _)| *id).collect();
+    let total_unmatched = instructors.len();
+
+    // 2. Load instructor subjects
+    let subject_rows: Vec<(i32, String)> = sqlx::query_as(
+        r#"
+        SELECT DISTINCT ci.instructor_id, c.subject
+        FROM course_instructors ci
+        JOIN courses c ON c.id = ci.course_id
+        WHERE ci.instructor_id = ANY($1)
+        "#,
+    )
+    .bind(&instructor_ids)
+    .fetch_all(db_pool)
+    .await?;
+
+    let mut subject_map: HashMap<i32, Vec<String>> = HashMap::new();
+    for (iid, subject) in subject_rows {
+        subject_map.entry(iid).or_default().push(subject);
+    }
+
+    // 3. Load all RMP professors
+    let prof_rows: Vec<(i32, String, String, Option<String>, i32)> = sqlx::query_as(
+        "SELECT legacy_id, first_name, last_name, department, num_ratings FROM rmp_professors",
+    )
+    .fetch_all(db_pool)
+    .await?;
+
+    // Build name index: (normalized_last, normalized_first) -> Vec<RmpProfForMatching>
+    let mut name_index: HashMap<(String, String), Vec<RmpProfForMatching>> = HashMap::new();
+    for (legacy_id, first_name, last_name, department, num_ratings) in prof_rows {
+        let key = (normalize(&last_name), normalize(&first_name));
+        name_index.entry(key).or_default().push(RmpProfForMatching {
+            legacy_id,
+            department,
+            num_ratings,
+        });
+    }
+
+    // 4. Load existing candidate pairs (and rejected subset) in a single query
+    let candidate_rows: Vec<(i32, i32, String)> =
+        sqlx::query_as("SELECT instructor_id, rmp_legacy_id, status FROM rmp_match_candidates")
+            .fetch_all(db_pool)
+            .await?;
+
+    let mut existing_pairs: HashSet<(i32, i32)> = HashSet::with_capacity(candidate_rows.len());
+    let mut rejected_pairs: HashSet<(i32, i32)> = HashSet::new();
+    for (iid, lid, status) in candidate_rows {
+        existing_pairs.insert((iid, lid));
+        if status == "rejected" {
+            rejected_pairs.insert((iid, lid));
+        }
+    }
+
+    // 5. Score and collect candidates
+    let empty_subjects: Vec<String> = Vec::new();
+    let mut candidates: Vec<(i32, i32, f32, serde_json::Value)> = Vec::new();
+    let mut auto_accept: Vec<(i32, i32)> = Vec::new(); // (instructor_id, legacy_id)
+    let mut skipped_unparseable = 0usize;
+    let mut skipped_no_candidates = 0usize;
+
+    for (instructor_id, display_name) in &instructors {
+        let Some((norm_last, norm_first)) = parse_display_name(display_name) else {
+            skipped_unparseable += 1;
+            debug!(
+                instructor_id,
+                display_name, "Unparseable display name, skipping"
+            );
+            continue;
+        };
+
+        let subjects = subject_map.get(instructor_id).unwrap_or(&empty_subjects);
+
+        let key = (norm_last.clone(), norm_first.clone());
+        let Some(rmp_candidates) = name_index.get(&key) else {
+            skipped_no_candidates += 1;
+            continue;
+        };
+
+        let candidate_count = rmp_candidates.len();
+        let mut best: Option<(f32, i32)> = None;
+
+        for prof in rmp_candidates {
+            let pair = (*instructor_id, prof.legacy_id);
+            if existing_pairs.contains(&pair) {
+                continue;
+            }
+
+            let ms = compute_match_score(
+                subjects,
+                prof.department.as_deref(),
+                candidate_count,
+                prof.num_ratings,
+            );
+
+            if ms.score < MIN_CANDIDATE_THRESHOLD {
+                continue;
+            }
+
+            let breakdown_json =
+                serde_json::to_value(&ms.breakdown).unwrap_or_else(|_| serde_json::json!({}));
+
+            candidates.push((*instructor_id, prof.legacy_id, ms.score, breakdown_json));
+
+            match best {
+                Some((s, _)) if ms.score > s => best = Some((ms.score, prof.legacy_id)),
+                None => best = Some((ms.score, prof.legacy_id)),
+                _ => {}
+            }
+        }
+
+        // Auto-accept the top candidate if it meets the threshold and is not
+        // previously rejected.
+        if let Some((score, legacy_id)) = best
+            && score >= AUTO_ACCEPT_THRESHOLD
+            && !rejected_pairs.contains(&(*instructor_id, legacy_id))
+        {
+            auto_accept.push((*instructor_id, legacy_id));
+        }
+    }
+
+    // 6–7. Write candidates and auto-accept within a single transaction
+    let candidates_created = candidates.len();
+    let auto_matched = auto_accept.len();
+
+    let mut tx = db_pool.begin().await?;
+
+    // 6. Batch-insert candidates
+    if !candidates.is_empty() {
+        let c_instructor_ids: Vec<i32> = candidates.iter().map(|(iid, _, _, _)| *iid).collect();
+        let c_legacy_ids: Vec<i32> = candidates.iter().map(|(_, lid, _, _)| *lid).collect();
+        let c_scores: Vec<f32> = candidates.iter().map(|(_, _, s, _)| *s).collect();
+        let c_breakdowns: Vec<serde_json::Value> =
+            candidates.into_iter().map(|(_, _, _, b)| b).collect();
+
+        sqlx::query(
+            r#"
+            INSERT INTO rmp_match_candidates (instructor_id, rmp_legacy_id, score, score_breakdown)
+            SELECT v.instructor_id, v.rmp_legacy_id, v.score, v.score_breakdown
+            FROM UNNEST($1::int4[], $2::int4[], $3::real[], $4::jsonb[])
+                AS v(instructor_id, rmp_legacy_id, score, score_breakdown)
+            ON CONFLICT (instructor_id, rmp_legacy_id) DO NOTHING
+            "#,
+        )
+        .bind(&c_instructor_ids)
+        .bind(&c_legacy_ids)
+        .bind(&c_scores)
+        .bind(&c_breakdowns)
+        .execute(&mut *tx)
+        .await?;
+    }
+
+    // 7. Auto-accept top candidates
+    if !auto_accept.is_empty() {
+        let aa_instructor_ids: Vec<i32> = auto_accept.iter().map(|(iid, _)| *iid).collect();
+        let aa_legacy_ids: Vec<i32> = auto_accept.iter().map(|(_, lid)| *lid).collect();
+
+        // Mark the candidate row as accepted
+        sqlx::query(
+            r#"
+            UPDATE rmp_match_candidates mc
+            SET status = 'accepted', resolved_at = NOW()
+            FROM UNNEST($1::int4[], $2::int4[]) AS v(instructor_id, rmp_legacy_id)
+            WHERE mc.instructor_id = v.instructor_id
+              AND mc.rmp_legacy_id = v.rmp_legacy_id
+            "#,
+        )
+        .bind(&aa_instructor_ids)
+        .bind(&aa_legacy_ids)
+        .execute(&mut *tx)
+        .await?;
+
+        // Insert links into instructor_rmp_links
+        sqlx::query(
+            r#"
+            INSERT INTO instructor_rmp_links (instructor_id, rmp_legacy_id, source)
+            SELECT v.instructor_id, v.rmp_legacy_id, 'auto'
+            FROM UNNEST($1::int4[], $2::int4[]) AS v(instructor_id, rmp_legacy_id)
+            ON CONFLICT (rmp_legacy_id) DO NOTHING
+            "#,
+        )
+        .bind(&aa_instructor_ids)
+        .bind(&aa_legacy_ids)
+        .execute(&mut *tx)
+        .await?;
+
+        // Update instructor match status
+        sqlx::query(
+            r#"
+            UPDATE instructors i
+            SET rmp_match_status = 'auto'
+            FROM UNNEST($1::int4[]) AS v(instructor_id)
+            WHERE i.id = v.instructor_id
+            "#,
+        )
+        .bind(&aa_instructor_ids)
+        .execute(&mut *tx)
+        .await?;
+    }
+
+    tx.commit().await?;
+
+    let stats = MatchingStats {
+        total_unmatched,
+        candidates_created,
+        auto_matched,
+        skipped_unparseable,
+        skipped_no_candidates,
+    };
+
+    info!(
+        total_unmatched = stats.total_unmatched,
+        candidates_created = stats.candidates_created,
+        auto_matched = stats.auto_matched,
+        skipped_unparseable = stats.skipped_unparseable,
+        skipped_no_candidates = stats.skipped_no_candidates,
+        "Candidate generation complete"
+    );
+
+    Ok(stats)
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_ideal_candidate_high_score() {
+        let ms = compute_match_score(
+            &["CS".to_string()],
+            Some("Computer Science"),
+            1,  // unique candidate
+            50, // decent ratings
+        );
+        // dept 1.0*0.50 + unique 1.0*0.30 + volume ~0.97*0.20 ≈ 0.99
+        assert!(ms.score >= 0.85, "Expected score >= 0.85, got {}", ms.score);
+        assert_eq!(ms.breakdown.uniqueness, 1.0);
+        assert_eq!(ms.breakdown.department, 1.0);
+    }
+
+    #[test]
+    fn test_ambiguous_candidates_lower_score() {
+        let unique = compute_match_score(&[], None, 1, 10);
+        let ambiguous = compute_match_score(&[], None, 3, 10);
+        assert!(
+            unique.score > ambiguous.score,
+            "Unique ({}) should outscore ambiguous ({})",
+            unique.score,
+            ambiguous.score
+        );
+        assert_eq!(unique.breakdown.uniqueness, 1.0);
+        assert_eq!(ambiguous.breakdown.uniqueness, 0.2);
+    }
+
+    #[test]
+    fn test_no_department_neutral() {
+        let ms = compute_match_score(&["CS".to_string()], None, 1, 10);
+        assert_eq!(ms.breakdown.department, 0.5);
+    }
+
+    #[test]
+    fn test_department_match() {
+        let ms = compute_match_score(&["CS".to_string()], Some("Computer Science"), 1, 10);
+        assert_eq!(ms.breakdown.department, 1.0);
+    }
+
+    #[test]
+    fn test_department_mismatch() {
+        let ms = compute_match_score(&["CS".to_string()], Some("History"), 1, 10);
+        assert_eq!(ms.breakdown.department, 0.2);
+    }
+
+    #[test]
+    fn test_department_match_outscores_mismatch() {
+        let matched = compute_match_score(&["CS".to_string()], Some("Computer Science"), 1, 10);
+        let mismatched = compute_match_score(&["CS".to_string()], Some("History"), 1, 10);
+        assert!(
+            matched.score > mismatched.score,
+            "Department match ({}) should outscore mismatch ({})",
+            matched.score,
+            mismatched.score
+        );
+    }
+
+    #[test]
+    fn test_volume_scaling() {
+        let zero = compute_match_score(&[], None, 1, 0);
+        let many = compute_match_score(&[], None, 1, 100);
+        assert!(
+            many.breakdown.volume > zero.breakdown.volume,
+            "100 ratings ({}) should outscore 0 ratings ({})",
+            many.breakdown.volume,
+            zero.breakdown.volume
+        );
+        assert_eq!(zero.breakdown.volume, 0.0);
+        assert!(
+            many.breakdown.volume > 0.9,
+            "100 ratings should be near max"
+        );
+    }
+}
@@ -250,8 +250,16 @@ impl Scheduler {
        crate::data::rmp::batch_upsert_rmp_professors(&professors, db_pool).await?;
        info!(total, "RMP professors upserted");

-        let matched = crate::data::rmp::auto_match_instructors(db_pool).await?;
-        info!(total, matched, "RMP sync complete");
+        let stats = crate::data::rmp_matching::generate_candidates(db_pool).await?;
+        info!(
+            total,
+            stats.total_unmatched,
+            stats.candidates_created,
+            stats.auto_matched,
+            stats.skipped_unparseable,
+            stats.skipped_no_candidates,
+            "RMP sync complete"
+        );

        Ok(())
    }
@@ -0,0 +1,865 @@
+//! Admin API handlers for RMP instructor matching management.
+
+use axum::extract::{Path, Query, State};
+use axum::http::StatusCode;
+use axum::response::Json;
+use serde::{Deserialize, Serialize};
+use serde_json::{Value, json};
+use ts_rs::TS;
+
+use crate::state::AppState;
+use crate::web::extractors::AdminUser;
+
+// ---------------------------------------------------------------------------
+// Query / body types
+// ---------------------------------------------------------------------------
+
+#[derive(Deserialize)]
+pub struct ListInstructorsParams {
+    status: Option<String>,
+    search: Option<String>,
+    page: Option<i32>,
+    per_page: Option<i32>,
+    sort: Option<String>,
+}
+
+#[derive(Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct MatchBody {
+    rmp_legacy_id: i32,
+}
+
+#[derive(Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct RejectCandidateBody {
+    rmp_legacy_id: i32,
+}
+
+// ---------------------------------------------------------------------------
+// Response types
+// ---------------------------------------------------------------------------
+
+/// Simple acknowledgement response for mutating operations.
+#[derive(Debug, Clone, Serialize, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export)]
+pub struct OkResponse {
+    pub ok: bool,
+}
+
+/// A top-candidate summary shown in the instructor list view.
+#[derive(Debug, Clone, Serialize, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export)]
+pub struct TopCandidateResponse {
+    pub rmp_legacy_id: i32,
+    pub score: Option<f32>,
+    #[ts(as = "Option<std::collections::HashMap<String, f32>>")]
+    pub score_breakdown: Option<serde_json::Value>,
+    pub first_name: Option<String>,
+    pub last_name: Option<String>,
+    pub department: Option<String>,
+    pub avg_rating: Option<f32>,
+    pub num_ratings: Option<i32>,
+}
+
+/// An instructor row in the paginated list.
+#[derive(Debug, Clone, Serialize, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export)]
+pub struct InstructorListItem {
+    pub id: i32,
+    pub display_name: String,
+    pub email: String,
+    pub rmp_match_status: String,
+    #[ts(as = "i32")]
+    pub rmp_link_count: i64,
+    #[ts(as = "i32")]
+    pub candidate_count: i64,
+    #[ts(as = "i32")]
+    pub course_subject_count: i64,
+    pub top_candidate: Option<TopCandidateResponse>,
+}
+
+/// Aggregate status counts for the instructor list.
+#[derive(Debug, Clone, Serialize, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export)]
+pub struct InstructorStats {
+    #[ts(as = "i32")]
+    pub total: i64,
+    #[ts(as = "i32")]
+    pub unmatched: i64,
+    #[ts(as = "i32")]
+    pub auto: i64,
+    #[ts(as = "i32")]
+    pub confirmed: i64,
+    #[ts(as = "i32")]
+    pub rejected: i64,
+    #[ts(as = "i32")]
+    pub with_candidates: i64,
+}
+
+/// Response for `GET /api/admin/instructors`.
+#[derive(Debug, Clone, Serialize, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export)]
+pub struct ListInstructorsResponse {
+    pub instructors: Vec<InstructorListItem>,
+    #[ts(as = "i32")]
+    pub total: i64,
+    pub page: i32,
+    pub per_page: i32,
+    pub stats: InstructorStats,
+}
+
+/// Instructor summary in the detail view.
+#[derive(Debug, Clone, Serialize, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export)]
+pub struct InstructorDetail {
+    pub id: i32,
+    pub display_name: String,
+    pub email: String,
+    pub rmp_match_status: String,
+    pub subjects_taught: Vec<String>,
+    #[ts(as = "i32")]
+    pub course_count: i64,
+}
+
+/// A linked RMP profile in the detail view.
+#[derive(Debug, Clone, Serialize, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export)]
+pub struct LinkedRmpProfile {
+    pub link_id: i32,
+    pub legacy_id: i32,
+    pub first_name: Option<String>,
+    pub last_name: Option<String>,
+    pub department: Option<String>,
+    pub avg_rating: Option<f32>,
+    pub avg_difficulty: Option<f32>,
+    pub num_ratings: Option<i32>,
+    pub would_take_again_pct: Option<f32>,
+}
+
+/// A match candidate in the detail view.
+#[derive(Debug, Clone, Serialize, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export)]
+pub struct CandidateResponse {
+    pub id: i32,
+    pub rmp_legacy_id: i32,
+    pub first_name: Option<String>,
+    pub last_name: Option<String>,
+    pub department: Option<String>,
+    pub avg_rating: Option<f32>,
+    pub avg_difficulty: Option<f32>,
+    pub num_ratings: Option<i32>,
+    pub would_take_again_pct: Option<f32>,
+    pub score: Option<f32>,
+    #[ts(as = "Option<std::collections::HashMap<String, f32>>")]
+    pub score_breakdown: Option<serde_json::Value>,
+    pub status: String,
+}
+
+/// Response for `GET /api/admin/instructors/{id}` and `POST .../match`.
+#[derive(Debug, Clone, Serialize, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export)]
+pub struct InstructorDetailResponse {
+    pub instructor: InstructorDetail,
+    pub current_matches: Vec<LinkedRmpProfile>,
+    pub candidates: Vec<CandidateResponse>,
+}
+
+/// Response for `POST /api/admin/rmp/rescore`.
+#[derive(Debug, Clone, Serialize, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export)]
+pub struct RescoreResponse {
+    pub total_unmatched: usize,
+    pub candidates_created: usize,
+    pub auto_matched: usize,
+    pub skipped_unparseable: usize,
+    pub skipped_no_candidates: usize,
+}
+
+// ---------------------------------------------------------------------------
+// Helper: map sqlx errors to the standard admin error tuple
+// ---------------------------------------------------------------------------
+
+fn db_error(context: &str, e: sqlx::Error) -> (StatusCode, Json<Value>) {
+    tracing::error!(error = %e, "{context}");
+    (
+        StatusCode::INTERNAL_SERVER_ERROR,
+        Json(json!({"error": context})),
+    )
+}
+
+// ---------------------------------------------------------------------------
+// Row types for SQL queries
+// ---------------------------------------------------------------------------
+
+#[derive(sqlx::FromRow)]
+struct InstructorRow {
+    id: i32,
+    display_name: String,
+    email: String,
+    rmp_match_status: String,
+    rmp_link_count: Option<i64>,
+    top_candidate_rmp_id: Option<i32>,
+    top_candidate_score: Option<f32>,
+    top_candidate_breakdown: Option<serde_json::Value>,
+    tc_first_name: Option<String>,
+    tc_last_name: Option<String>,
+    tc_department: Option<String>,
+    tc_avg_rating: Option<f32>,
+    tc_num_ratings: Option<i32>,
+    candidate_count: Option<i64>,
+    course_subject_count: Option<i64>,
+}
+
+#[derive(sqlx::FromRow)]
+struct StatusCount {
+    rmp_match_status: String,
+    count: i64,
+}
+
+#[derive(sqlx::FromRow)]
+struct CandidateRow {
+    id: i32,
+    rmp_legacy_id: i32,
+    score: Option<f32>,
+    score_breakdown: Option<serde_json::Value>,
+    status: String,
+    first_name: Option<String>,
+    last_name: Option<String>,
+    department: Option<String>,
+    avg_rating: Option<f32>,
+    avg_difficulty: Option<f32>,
+    num_ratings: Option<i32>,
+    would_take_again_pct: Option<f32>,
+}
+
+#[derive(sqlx::FromRow)]
+struct LinkedRmpProfileRow {
+    link_id: i32,
+    legacy_id: i32,
+    first_name: Option<String>,
+    last_name: Option<String>,
+    department: Option<String>,
+    avg_rating: Option<f32>,
+    avg_difficulty: Option<f32>,
+    num_ratings: Option<i32>,
+    would_take_again_pct: Option<f32>,
+}
+
+// ---------------------------------------------------------------------------
+// 1. GET /api/admin/instructors — paginated list with filtering
+// ---------------------------------------------------------------------------
+
+/// `GET /api/admin/instructors` — List instructors with filtering and pagination.
+pub async fn list_instructors(
+    AdminUser(_user): AdminUser,
+    State(state): State<AppState>,
+    Query(params): Query<ListInstructorsParams>,
+) -> Result<Json<ListInstructorsResponse>, (StatusCode, Json<Value>)> {
+    let page = params.page.unwrap_or(1).max(1);
+    let per_page = params.per_page.unwrap_or(50).clamp(1, 100);
+    let offset = (page - 1) * per_page;
+
+    let sort_clause = match params.sort.as_deref() {
+        Some("name_asc") => "i.display_name ASC",
+        Some("name_desc") => "i.display_name DESC",
+        Some("status") => "i.rmp_match_status ASC, i.display_name ASC",
+        _ => "tc.score DESC NULLS LAST, i.display_name ASC",
+    };
+
+    // Build WHERE clause
+    let mut conditions = Vec::new();
+    let mut bind_idx = 0u32;
+
+    if params.status.is_some() {
+        bind_idx += 1;
+        conditions.push(format!("i.rmp_match_status = ${bind_idx}"));
+    }
+    if params.search.is_some() {
+        bind_idx += 1;
+        conditions.push(format!(
+            "(i.display_name ILIKE ${bind_idx} OR i.email ILIKE ${bind_idx})"
+        ));
+    }
+
+    let where_clause = if conditions.is_empty() {
+        String::new()
+    } else {
+        format!("WHERE {}", conditions.join(" AND "))
+    };
+
+    let query_str = format!(
+        r#"
+        SELECT
+            i.id, i.display_name, i.email, i.rmp_match_status,
+            (SELECT COUNT(*) FROM instructor_rmp_links irl WHERE irl.instructor_id = i.id) as rmp_link_count,
+            tc.rmp_legacy_id as top_candidate_rmp_id,
+            tc.score as top_candidate_score,
+            tc.score_breakdown as top_candidate_breakdown,
+            rp.first_name as tc_first_name,
+            rp.last_name as tc_last_name,
+            rp.department as tc_department,
+            rp.avg_rating as tc_avg_rating,
+            rp.num_ratings as tc_num_ratings,
+            (SELECT COUNT(*) FROM rmp_match_candidates mc WHERE mc.instructor_id = i.id AND mc.status = 'pending') as candidate_count,
+            (SELECT COUNT(DISTINCT c.subject) FROM course_instructors ci JOIN courses c ON c.id = ci.course_id WHERE ci.instructor_id = i.id) as course_subject_count
+        FROM instructors i
+        LEFT JOIN LATERAL (
+            SELECT mc.rmp_legacy_id, mc.score, mc.score_breakdown
+            FROM rmp_match_candidates mc
+            WHERE mc.instructor_id = i.id AND mc.status = 'pending'
+            ORDER BY mc.score DESC
+            LIMIT 1
+        ) tc ON true
+        LEFT JOIN rmp_professors rp ON rp.legacy_id = tc.rmp_legacy_id
+        {where_clause}
+        ORDER BY {sort_clause}
+        LIMIT {per_page} OFFSET {offset}
+        "#
+    );
+
+    // Build the query with dynamic binds
+    let mut query = sqlx::query_as::<_, InstructorRow>(&query_str);
+    if let Some(ref status) = params.status {
+        query = query.bind(status);
+    }
+    if let Some(ref search) = params.search {
+        query = query.bind(format!("%{search}%"));
+    }
+
+    let rows = query
+        .fetch_all(&state.db_pool)
+        .await
+        .map_err(|e| db_error("failed to list instructors", e))?;
+
+    // Count total with filters
+    let count_query_str = format!("SELECT COUNT(*) FROM instructors i {where_clause}");
+    let mut count_query = sqlx::query_as::<_, (i64,)>(&count_query_str);
+    if let Some(ref status) = params.status {
+        count_query = count_query.bind(status);
+    }
+    if let Some(ref search) = params.search {
+        count_query = count_query.bind(format!("%{search}%"));
+    }
+
+    let (total,) = count_query
+        .fetch_one(&state.db_pool)
+        .await
+        .map_err(|e| db_error("failed to count instructors", e))?;
+
+    // Aggregate stats (unfiltered)
+    let stats_rows = sqlx::query_as::<_, StatusCount>(
+        "SELECT rmp_match_status, COUNT(*) as count FROM instructors GROUP BY rmp_match_status",
+    )
+    .fetch_all(&state.db_pool)
+    .await
+    .map_err(|e| db_error("failed to get instructor stats", e))?;
+
+    // Count instructors with at least one candidate (for progress bar denominator)
+    let (with_candidates,): (i64,) =
+        sqlx::query_as("SELECT COUNT(DISTINCT instructor_id) FROM rmp_match_candidates")
+            .fetch_one(&state.db_pool)
+            .await
+            .map_err(|e| db_error("failed to count instructors with candidates", e))?;
+
+    let mut stats = InstructorStats {
+        total: 0,
+        unmatched: 0,
+        auto: 0,
+        confirmed: 0,
+        rejected: 0,
+        with_candidates,
+    };
+    for row in &stats_rows {
+        stats.total += row.count;
+        match row.rmp_match_status.as_str() {
+            "unmatched" => stats.unmatched = row.count,
+            "auto" => stats.auto = row.count,
+            "confirmed" => stats.confirmed = row.count,
+            "rejected" => stats.rejected = row.count,
+            _ => {}
+        }
+    }
+
+    let instructors: Vec<InstructorListItem> = rows
+        .iter()
+        .map(|r| {
+            let top_candidate = r.top_candidate_rmp_id.map(|rmp_id| TopCandidateResponse {
+                rmp_legacy_id: rmp_id,
+                score: r.top_candidate_score,
+                score_breakdown: r.top_candidate_breakdown.clone(),
+                first_name: r.tc_first_name.clone(),
+                last_name: r.tc_last_name.clone(),
+                department: r.tc_department.clone(),
+                avg_rating: r.tc_avg_rating,
+                num_ratings: r.tc_num_ratings,
+            });
+
+            InstructorListItem {
+                id: r.id,
+                display_name: r.display_name.clone(),
+                email: r.email.clone(),
+                rmp_match_status: r.rmp_match_status.clone(),
+                rmp_link_count: r.rmp_link_count.unwrap_or(0),
+                candidate_count: r.candidate_count.unwrap_or(0),
+                course_subject_count: r.course_subject_count.unwrap_or(0),
+                top_candidate,
+            }
+        })
+        .collect();
+
+    Ok(Json(ListInstructorsResponse {
+        instructors,
+        total,
+        page,
+        per_page,
+        stats,
+    }))
+}
+
+// ---------------------------------------------------------------------------
+// 2. GET /api/admin/instructors/{id} — full detail
+// ---------------------------------------------------------------------------
+
+/// `GET /api/admin/instructors/{id}` — Full instructor detail with candidates.
+pub async fn get_instructor(
+    AdminUser(_user): AdminUser,
+    State(state): State<AppState>,
+    Path(id): Path<i32>,
+) -> Result<Json<InstructorDetailResponse>, (StatusCode, Json<Value>)> {
+    build_instructor_detail(&state, id).await
+}
+
+/// Shared helper that builds the full instructor detail response.
+async fn build_instructor_detail(
+    state: &AppState,
+    id: i32,
+) -> Result<Json<InstructorDetailResponse>, (StatusCode, Json<Value>)> {
+    // Fetch instructor
+    let instructor: Option<(i32, String, String, String)> = sqlx::query_as(
+        "SELECT id, display_name, email, rmp_match_status FROM instructors WHERE id = $1",
+    )
+    .bind(id)
+    .fetch_optional(&state.db_pool)
+    .await
+    .map_err(|e| db_error("failed to fetch instructor", e))?;
+
+    let (inst_id, display_name, email, rmp_match_status) = instructor.ok_or_else(|| {
+        (
+            StatusCode::NOT_FOUND,
+            Json(json!({"error": "instructor not found"})),
+        )
+    })?;
+
+    // Subjects taught
+    let subjects: Vec<(String,)> = sqlx::query_as(
+        "SELECT DISTINCT c.subject FROM course_instructors ci JOIN courses c ON c.id = ci.course_id WHERE ci.instructor_id = $1 ORDER BY c.subject",
+    )
+    .bind(inst_id)
+    .fetch_all(&state.db_pool)
+    .await
+    .map_err(|e| db_error("failed to fetch subjects", e))?;
+
+    // Course count
+    let (course_count,): (i64,) = sqlx::query_as(
+        "SELECT COUNT(DISTINCT ci.course_id) FROM course_instructors ci WHERE ci.instructor_id = $1",
+    )
+    .bind(inst_id)
+    .fetch_one(&state.db_pool)
+    .await
+    .map_err(|e| db_error("failed to count courses", e))?;
+
+    // Candidates with RMP professor info
+    let candidates = sqlx::query_as::<_, CandidateRow>(
+        r#"
+        SELECT mc.id, mc.rmp_legacy_id, mc.score, mc.score_breakdown, mc.status,
+               rp.first_name, rp.last_name, rp.department,
+               rp.avg_rating, rp.avg_difficulty, rp.num_ratings, rp.would_take_again_pct
+        FROM rmp_match_candidates mc
+        JOIN rmp_professors rp ON rp.legacy_id = mc.rmp_legacy_id
+        WHERE mc.instructor_id = $1
+        ORDER BY mc.score DESC
+        "#,
+    )
+    .bind(inst_id)
+    .fetch_all(&state.db_pool)
+    .await
+    .map_err(|e| db_error("failed to fetch candidates", e))?;
+
+    // Current matches (all linked RMP profiles)
+    let current_matches = sqlx::query_as::<_, LinkedRmpProfileRow>(
+        r#"
+        SELECT irl.id as link_id,
+               rp.legacy_id, rp.first_name, rp.last_name, rp.department,
+               rp.avg_rating, rp.avg_difficulty, rp.num_ratings, rp.would_take_again_pct
+        FROM instructor_rmp_links irl
+        JOIN rmp_professors rp ON rp.legacy_id = irl.rmp_legacy_id
+        WHERE irl.instructor_id = $1
+        ORDER BY rp.num_ratings DESC NULLS LAST
+        "#,
+    )
+    .bind(inst_id)
+    .fetch_all(&state.db_pool)
+    .await
+    .map_err(|e| db_error("failed to fetch linked rmp profiles", e))?;
+
+    let current_matches_resp: Vec<LinkedRmpProfile> = current_matches
+        .into_iter()
+        .map(|p| LinkedRmpProfile {
+            link_id: p.link_id,
+            legacy_id: p.legacy_id,
+            first_name: p.first_name,
+            last_name: p.last_name,
+            department: p.department,
+            avg_rating: p.avg_rating,
+            avg_difficulty: p.avg_difficulty,
+            num_ratings: p.num_ratings,
+            would_take_again_pct: p.would_take_again_pct,
+        })
+        .collect();
+
+    let candidates_resp: Vec<CandidateResponse> = candidates
+        .into_iter()
+        .map(|c| CandidateResponse {
+            id: c.id,
+            rmp_legacy_id: c.rmp_legacy_id,
+            first_name: c.first_name,
+            last_name: c.last_name,
+            department: c.department,
+            avg_rating: c.avg_rating,
+            avg_difficulty: c.avg_difficulty,
+            num_ratings: c.num_ratings,
+            would_take_again_pct: c.would_take_again_pct,
+            score: c.score,
+            score_breakdown: c.score_breakdown,
+            status: c.status,
+        })
+        .collect();
+
+    Ok(Json(InstructorDetailResponse {
+        instructor: InstructorDetail {
+            id: inst_id,
+            display_name,
+            email,
+            rmp_match_status,
+            subjects_taught: subjects.into_iter().map(|(s,)| s).collect(),
+            course_count,
+        },
+        current_matches: current_matches_resp,
+        candidates: candidates_resp,
+    }))
+}
+
+// ---------------------------------------------------------------------------
+// 3. POST /api/admin/instructors/{id}/match — accept a candidate
+// ---------------------------------------------------------------------------
+
+/// `POST /api/admin/instructors/{id}/match` — Accept a candidate match.
+pub async fn match_instructor(
+    AdminUser(user): AdminUser,
+    State(state): State<AppState>,
+    Path(id): Path<i32>,
+    Json(body): Json<MatchBody>,
+) -> Result<Json<InstructorDetailResponse>, (StatusCode, Json<Value>)> {
+    // Verify the candidate exists and is pending
+    let candidate: Option<(i32,)> = sqlx::query_as(
+        "SELECT id FROM rmp_match_candidates WHERE instructor_id = $1 AND rmp_legacy_id = $2 AND status = 'pending'",
+    )
+    .bind(id)
+    .bind(body.rmp_legacy_id)
+    .fetch_optional(&state.db_pool)
+    .await
+    .map_err(|e| db_error("failed to check candidate", e))?;
+
+    if candidate.is_none() {
+        return Err((
+            StatusCode::NOT_FOUND,
+            Json(json!({"error": "pending candidate not found for this instructor"})),
+        ));
+    }
+
+    // Check if this RMP profile is already linked to a different instructor
+    let conflict: Option<(i32,)> = sqlx::query_as(
+        "SELECT instructor_id FROM instructor_rmp_links WHERE rmp_legacy_id = $1 AND instructor_id != $2",
+    )
+    .bind(body.rmp_legacy_id)
+    .bind(id)
+    .fetch_optional(&state.db_pool)
+    .await
+    .map_err(|e| db_error("failed to check rmp uniqueness", e))?;
+
+    if let Some((other_id,)) = conflict {
+        return Err((
+            StatusCode::CONFLICT,
+            Json(json!({
+                "error": "RMP profile already linked to another instructor",
+                "conflictingInstructorId": other_id,
+            })),
+        ));
+    }
+
+    let mut tx = state
+        .db_pool
+        .begin()
+        .await
+        .map_err(|e| db_error("failed to begin transaction", e))?;
+
+    // Insert link into instructor_rmp_links
+    sqlx::query(
+        "INSERT INTO instructor_rmp_links (instructor_id, rmp_legacy_id, created_by, source) VALUES ($1, $2, $3, 'manual') ON CONFLICT (rmp_legacy_id) DO NOTHING",
+    )
+    .bind(id)
+    .bind(body.rmp_legacy_id)
+    .bind(user.discord_id)
+    .execute(&mut *tx)
+    .await
+    .map_err(|e| db_error("failed to insert rmp link", e))?;
+
+    // Update instructor match status
+    sqlx::query("UPDATE instructors SET rmp_match_status = 'confirmed' WHERE id = $1")
+        .bind(id)
+        .execute(&mut *tx)
+        .await
+        .map_err(|e| db_error("failed to update instructor match status", e))?;
+
+    // Accept the candidate
+    sqlx::query(
+        "UPDATE rmp_match_candidates SET status = 'accepted', resolved_at = NOW(), resolved_by = $1 WHERE instructor_id = $2 AND rmp_legacy_id = $3",
+    )
+    .bind(user.discord_id)
+    .bind(id)
+    .bind(body.rmp_legacy_id)
+    .execute(&mut *tx)
+    .await
+    .map_err(|e| db_error("failed to accept candidate", e))?;
+
+    tx.commit()
+        .await
+        .map_err(|e| db_error("failed to commit transaction", e))?;
+
+    build_instructor_detail(&state, id).await
+}
+
+// ---------------------------------------------------------------------------
+// 4. POST /api/admin/instructors/{id}/reject-candidate — reject one candidate
+// ---------------------------------------------------------------------------
+
+/// `POST /api/admin/instructors/{id}/reject-candidate` — Reject a single candidate.
+pub async fn reject_candidate(
+    AdminUser(user): AdminUser,
+    State(state): State<AppState>,
+    Path(id): Path<i32>,
+    Json(body): Json<RejectCandidateBody>,
+) -> Result<Json<OkResponse>, (StatusCode, Json<Value>)> {
+    let result = sqlx::query(
+        "UPDATE rmp_match_candidates SET status = 'rejected', resolved_at = NOW(), resolved_by = $1 WHERE instructor_id = $2 AND rmp_legacy_id = $3 AND status = 'pending'",
+    )
+    .bind(user.discord_id)
+    .bind(id)
+    .bind(body.rmp_legacy_id)
+    .execute(&state.db_pool)
+    .await
+    .map_err(|e| db_error("failed to reject candidate", e))?;
+
+    if result.rows_affected() == 0 {
+        return Err((
+            StatusCode::NOT_FOUND,
+            Json(json!({"error": "pending candidate not found"})),
+        ));
+    }
+
+    Ok(Json(OkResponse { ok: true }))
+}
+
+// ---------------------------------------------------------------------------
+// 5. POST /api/admin/instructors/{id}/reject-all — no valid match
+// ---------------------------------------------------------------------------
+
+/// `POST /api/admin/instructors/{id}/reject-all` — Mark instructor as having no valid RMP match.
+pub async fn reject_all(
+    AdminUser(user): AdminUser,
+    State(state): State<AppState>,
+    Path(id): Path<i32>,
+) -> Result<Json<OkResponse>, (StatusCode, Json<Value>)> {
+    let mut tx = state
+        .db_pool
+        .begin()
+        .await
+        .map_err(|e| db_error("failed to begin transaction", e))?;
+
+    // Check current status — cannot reject an instructor with confirmed matches
+    let current_status: Option<(String,)> =
+        sqlx::query_as("SELECT rmp_match_status FROM instructors WHERE id = $1")
+            .bind(id)
+            .fetch_optional(&mut *tx)
+            .await
+            .map_err(|e| db_error("failed to fetch instructor status", e))?;
+
+    let (status,) = current_status.ok_or_else(|| {
+        (
+            StatusCode::NOT_FOUND,
+            Json(json!({"error": "instructor not found"})),
+        )
+    })?;
+
+    if status == "confirmed" {
+        return Err((
+            StatusCode::CONFLICT,
+            Json(
+                json!({"error": "cannot reject instructor with confirmed matches — unmatch first"}),
+            ),
+        ));
+    }
+
+    // Update instructor status
+    sqlx::query("UPDATE instructors SET rmp_match_status = 'rejected' WHERE id = $1")
+        .bind(id)
+        .execute(&mut *tx)
+        .await
+        .map_err(|e| db_error("failed to update instructor status", e))?;
+
+    // Reject all pending candidates
+    sqlx::query(
+        "UPDATE rmp_match_candidates SET status = 'rejected', resolved_at = NOW(), resolved_by = $1 WHERE instructor_id = $2 AND status = 'pending'",
+    )
+    .bind(user.discord_id)
+    .bind(id)
+    .execute(&mut *tx)
+    .await
+    .map_err(|e| db_error("failed to reject candidates", e))?;
+
+    tx.commit()
+        .await
+        .map_err(|e| db_error("failed to commit transaction", e))?;
+
+    Ok(Json(OkResponse { ok: true }))
+}
+
+// ---------------------------------------------------------------------------
+// 6. POST /api/admin/instructors/{id}/unmatch — remove current match
+// ---------------------------------------------------------------------------
+
+/// Body for unmatch — optional `rmpLegacyId` to remove a specific link.
+/// If omitted (or null), all links are removed.
+#[derive(Deserialize, Default)]
+#[serde(rename_all = "camelCase")]
+pub struct UnmatchBody {
+    rmp_legacy_id: Option<i32>,
+}
+
+/// `POST /api/admin/instructors/{id}/unmatch` — Remove RMP link(s).
+///
+/// Send `{ "rmpLegacyId": N }` to remove a specific link, or an empty body / `{}`
+/// to remove all links for the instructor.
+pub async fn unmatch_instructor(
+    AdminUser(_user): AdminUser,
+    State(state): State<AppState>,
+    Path(id): Path<i32>,
+    body: Option<Json<UnmatchBody>>,
+) -> Result<Json<OkResponse>, (StatusCode, Json<Value>)> {
+    let rmp_legacy_id = body.and_then(|b| b.rmp_legacy_id);
+
+    let mut tx = state
+        .db_pool
+        .begin()
+        .await
+        .map_err(|e| db_error("failed to begin transaction", e))?;
+
+    // Verify instructor exists
+    let exists: Option<(i32,)> = sqlx::query_as("SELECT id FROM instructors WHERE id = $1")
+        .bind(id)
+        .fetch_optional(&mut *tx)
+        .await
+        .map_err(|e| db_error("failed to check instructor", e))?;
+
+    if exists.is_none() {
+        return Err((
+            StatusCode::NOT_FOUND,
+            Json(json!({"error": "instructor not found"})),
+        ));
+    }
+
+    // Delete specific link or all links
+    if let Some(legacy_id) = rmp_legacy_id {
+        let result = sqlx::query(
+            "DELETE FROM instructor_rmp_links WHERE instructor_id = $1 AND rmp_legacy_id = $2",
+        )
+        .bind(id)
+        .bind(legacy_id)
+        .execute(&mut *tx)
+        .await
+        .map_err(|e| db_error("failed to remove rmp link", e))?;
+
+        if result.rows_affected() == 0 {
+            return Err((
+                StatusCode::NOT_FOUND,
+                Json(json!({"error": "link not found for this instructor"})),
+            ));
+        }
+    } else {
+        sqlx::query("DELETE FROM instructor_rmp_links WHERE instructor_id = $1")
+            .bind(id)
+            .execute(&mut *tx)
+            .await
+            .map_err(|e| db_error("failed to remove rmp links", e))?;
+    }
+
+    // Check if any links remain; update status accordingly
+    let (remaining,): (i64,) =
+        sqlx::query_as("SELECT COUNT(*) FROM instructor_rmp_links WHERE instructor_id = $1")
+            .bind(id)
+            .fetch_one(&mut *tx)
+            .await
+            .map_err(|e| db_error("failed to count remaining links", e))?;
+
+    if remaining == 0 {
+        sqlx::query("UPDATE instructors SET rmp_match_status = 'unmatched' WHERE id = $1")
+            .bind(id)
+            .execute(&mut *tx)
+            .await
+            .map_err(|e| db_error("failed to update instructor status", e))?;
+    }
+
+    tx.commit()
+        .await
+        .map_err(|e| db_error("failed to commit transaction", e))?;
+
+    Ok(Json(OkResponse { ok: true }))
+}
+
+// ---------------------------------------------------------------------------
+// 7. POST /api/admin/rmp/rescore — re-run candidate generation
+// ---------------------------------------------------------------------------
+
+/// `POST /api/admin/rmp/rescore` — Re-run RMP candidate generation.
+pub async fn rescore(
+    AdminUser(_user): AdminUser,
+    State(state): State<AppState>,
+) -> Result<Json<RescoreResponse>, (StatusCode, Json<Value>)> {
+    let stats = crate::data::rmp_matching::generate_candidates(&state.db_pool)
+        .await
+        .map_err(|e| {
+            tracing::error!(error = %e, "failed to run candidate generation");
+            (
+                StatusCode::INTERNAL_SERVER_ERROR,
+                Json(json!({"error": "candidate generation failed"})),
+            )
+        })?;
+
+    Ok(Json(RescoreResponse {
+        total_unmatched: stats.total_unmatched,
+        candidates_created: stats.candidates_created,
+        auto_matched: stats.auto_matched,
+        skipped_unparseable: stats.skipped_unparseable,
+        skipped_no_candidates: stats.skipped_no_candidates,
+    }))
+}
@@ -1,6 +1,7 @@
 //! Web API module for the banner application.

 pub mod admin;
+pub mod admin_rmp;
 #[cfg(feature = "embed-assets")]
 pub mod assets;
 pub mod auth;
@@ -10,6 +10,7 @@ use axum::{
 };

 use crate::web::admin;
+use crate::web::admin_rmp;
 use crate::web::auth::{self, AuthConfig};
 use crate::web::ws;
 #[cfg(feature = "embed-assets")]
@@ -66,6 +67,25 @@ pub fn create_router(app_state: AppState, auth_config: AuthConfig) -> Router {
        .route("/admin/scrape-jobs", get(admin::list_scrape_jobs))
        .route("/admin/scrape-jobs/ws", get(ws::scrape_jobs_ws))
        .route("/admin/audit-log", get(admin::list_audit_log))
+        .route("/admin/instructors", get(admin_rmp::list_instructors))
+        .route("/admin/instructors/{id}", get(admin_rmp::get_instructor))
+        .route(
+            "/admin/instructors/{id}/match",
+            post(admin_rmp::match_instructor),
+        )
+        .route(
+            "/admin/instructors/{id}/reject-candidate",
+            post(admin_rmp::reject_candidate),
+        )
+        .route(
+            "/admin/instructors/{id}/reject-all",
+            post(admin_rmp::reject_all),
+        )
+        .route(
+            "/admin/instructors/{id}/unmatch",
+            post(admin_rmp::unmatch_instructor),
+        )
+        .route("/admin/rmp/rescore", post(admin_rmp::rescore))
        .with_state(app_state);

    let mut router = Router::new()
@@ -435,9 +455,10 @@ pub struct CourseResponse {
 #[serde(rename_all = "camelCase")]
 #[ts(export)]
 pub struct InstructorResponse {
+    instructor_id: i32,
    banner_id: String,
    display_name: String,
-    email: Option<String>,
+    email: String,
    is_primary: bool,
    rmp_rating: Option<f32>,
    rmp_num_ratings: Option<i32>,
@@ -470,6 +491,7 @@ fn build_course_response(
    let instructors = instructors
        .into_iter()
        .map(|i| InstructorResponse {
+            instructor_id: i.instructor_id,
            banner_id: i.banner_id,
            display_name: i.display_name,
            email: i.email,