mirror of
https://github.com/Xevion/banner.git
synced 2026-01-30 22:23:32 -06:00
feat: sync RMP professor ratings and display in course search interface
This commit is contained in:
+12
-9
@@ -98,23 +98,26 @@ pub async fn get_course_by_crn(
|
||||
|
||||
/// Get instructors for a course by course ID.
|
||||
///
|
||||
/// Returns `(banner_id, display_name, email, is_primary)` tuples.
|
||||
/// Returns `(banner_id, display_name, email, is_primary, rmp_avg_rating, rmp_num_ratings)` tuples.
|
||||
pub async fn get_course_instructors(
|
||||
db_pool: &PgPool,
|
||||
course_id: i32,
|
||||
) -> Result<Vec<(String, String, Option<String>, bool)>> {
|
||||
let rows: Vec<(String, String, Option<String>, bool)> = sqlx::query_as(
|
||||
r#"
|
||||
SELECT i.banner_id, i.display_name, i.email, ci.is_primary
|
||||
) -> Result<Vec<(String, String, Option<String>, bool, Option<f32>, Option<i32>)>> {
|
||||
let rows: Vec<(String, String, Option<String>, bool, Option<f32>, Option<i32>)> =
|
||||
sqlx::query_as(
|
||||
r#"
|
||||
SELECT i.banner_id, i.display_name, i.email, ci.is_primary,
|
||||
rp.avg_rating, rp.num_ratings
|
||||
FROM course_instructors ci
|
||||
JOIN instructors i ON i.banner_id = ci.instructor_id
|
||||
LEFT JOIN rmp_professors rp ON rp.legacy_id = i.rmp_legacy_id
|
||||
WHERE ci.course_id = $1
|
||||
ORDER BY ci.is_primary DESC, i.display_name
|
||||
"#,
|
||||
)
|
||||
.bind(course_id)
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
)
|
||||
.bind(course_id)
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
Ok(rows)
|
||||
}
|
||||
|
||||
|
||||
@@ -4,4 +4,5 @@ pub mod batch;
|
||||
pub mod courses;
|
||||
pub mod models;
|
||||
pub mod reference;
|
||||
pub mod rmp;
|
||||
pub mod scrape_jobs;
|
||||
|
||||
+311
@@ -0,0 +1,311 @@
|
||||
//! Database operations for RateMyProfessors data.
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::rmp::RmpProfessor;
|
||||
use sqlx::PgPool;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
/// Bulk upsert RMP professors using the UNNEST pattern.
|
||||
///
|
||||
/// Deduplicates by `legacy_id` before inserting — the RMP API can return
|
||||
/// the same professor on multiple pages.
|
||||
pub async fn batch_upsert_rmp_professors(
|
||||
professors: &[RmpProfessor],
|
||||
db_pool: &PgPool,
|
||||
) -> Result<()> {
|
||||
if professors.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Deduplicate: keep last occurrence per legacy_id (latest page wins)
|
||||
let mut seen = HashSet::new();
|
||||
let deduped: Vec<&RmpProfessor> = professors
|
||||
.iter()
|
||||
.rev()
|
||||
.filter(|p| seen.insert(p.legacy_id))
|
||||
.collect();
|
||||
|
||||
let legacy_ids: Vec<i32> = deduped.iter().map(|p| p.legacy_id).collect();
|
||||
let graphql_ids: Vec<&str> = deduped.iter().map(|p| p.graphql_id.as_str()).collect();
|
||||
let first_names: Vec<String> = deduped.iter().map(|p| p.first_name.trim().to_string()).collect();
|
||||
let first_name_refs: Vec<&str> = first_names.iter().map(|s| s.as_str()).collect();
|
||||
let last_names: Vec<String> = deduped.iter().map(|p| p.last_name.trim().to_string()).collect();
|
||||
let last_name_refs: Vec<&str> = last_names.iter().map(|s| s.as_str()).collect();
|
||||
let departments: Vec<Option<&str>> = deduped
|
||||
.iter()
|
||||
.map(|p| p.department.as_deref())
|
||||
.collect();
|
||||
let avg_ratings: Vec<Option<f32>> = deduped.iter().map(|p| p.avg_rating).collect();
|
||||
let avg_difficulties: Vec<Option<f32>> = deduped.iter().map(|p| p.avg_difficulty).collect();
|
||||
let num_ratings: Vec<i32> = deduped.iter().map(|p| p.num_ratings).collect();
|
||||
let would_take_again_pcts: Vec<Option<f32>> = deduped
|
||||
.iter()
|
||||
.map(|p| p.would_take_again_pct)
|
||||
.collect();
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO rmp_professors (
|
||||
legacy_id, graphql_id, first_name, last_name, department,
|
||||
avg_rating, avg_difficulty, num_ratings, would_take_again_pct,
|
||||
last_synced_at
|
||||
)
|
||||
SELECT
|
||||
v.legacy_id, v.graphql_id, v.first_name, v.last_name, v.department,
|
||||
v.avg_rating, v.avg_difficulty, v.num_ratings, v.would_take_again_pct,
|
||||
NOW()
|
||||
FROM UNNEST(
|
||||
$1::int4[], $2::text[], $3::text[], $4::text[], $5::text[],
|
||||
$6::real[], $7::real[], $8::int4[], $9::real[]
|
||||
) AS v(
|
||||
legacy_id, graphql_id, first_name, last_name, department,
|
||||
avg_rating, avg_difficulty, num_ratings, would_take_again_pct
|
||||
)
|
||||
ON CONFLICT (legacy_id)
|
||||
DO UPDATE SET
|
||||
graphql_id = EXCLUDED.graphql_id,
|
||||
first_name = EXCLUDED.first_name,
|
||||
last_name = EXCLUDED.last_name,
|
||||
department = EXCLUDED.department,
|
||||
avg_rating = EXCLUDED.avg_rating,
|
||||
avg_difficulty = EXCLUDED.avg_difficulty,
|
||||
num_ratings = EXCLUDED.num_ratings,
|
||||
would_take_again_pct = EXCLUDED.would_take_again_pct,
|
||||
last_synced_at = EXCLUDED.last_synced_at
|
||||
"#,
|
||||
)
|
||||
.bind(&legacy_ids)
|
||||
.bind(&graphql_ids)
|
||||
.bind(&first_name_refs)
|
||||
.bind(&last_name_refs)
|
||||
.bind(&departments)
|
||||
.bind(&avg_ratings)
|
||||
.bind(&avg_difficulties)
|
||||
.bind(&num_ratings)
|
||||
.bind(&would_take_again_pcts)
|
||||
.execute(db_pool)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("Failed to batch upsert RMP professors: {}", e))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Normalize a name for matching: lowercase, trim, strip trailing periods.
|
||||
fn normalize(s: &str) -> String {
|
||||
s.trim().to_lowercase().trim_end_matches('.').to_string()
|
||||
}
|
||||
|
||||
/// Parse Banner's "Last, First Middle" display name into (last, first) tokens.
|
||||
///
|
||||
/// Returns `None` if the format is unparseable (no comma, empty parts).
|
||||
fn parse_display_name(display_name: &str) -> Option<(String, String)> {
|
||||
let (last_part, first_part) = display_name.split_once(',')?;
|
||||
let last = normalize(last_part);
|
||||
// Take only the first token of the first-name portion to drop middle names/initials.
|
||||
let first = normalize(first_part.split_whitespace().next()?);
|
||||
if last.is_empty() || first.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some((last, first))
|
||||
}
|
||||
|
||||
/// Auto-match instructors to RMP professors by normalized name.
|
||||
///
|
||||
/// Loads all pending instructors and all RMP professors, then matches in Rust
|
||||
/// using normalized name comparison. Only assigns a match when exactly one RMP
|
||||
/// professor matches a given instructor.
|
||||
pub async fn auto_match_instructors(db_pool: &PgPool) -> Result<u64> {
|
||||
// Load pending instructors
|
||||
let instructors: Vec<(String, String)> = sqlx::query_as(
|
||||
"SELECT banner_id, display_name FROM instructors WHERE rmp_match_status = 'pending'",
|
||||
)
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
|
||||
if instructors.is_empty() {
|
||||
info!(matched = 0, "No pending instructors to match");
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// Load all RMP professors
|
||||
let professors: Vec<(i32, String, String)> = sqlx::query_as(
|
||||
"SELECT legacy_id, first_name, last_name FROM rmp_professors",
|
||||
)
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
|
||||
// Build a lookup: (normalized_last, normalized_first) -> list of legacy_ids
|
||||
let mut rmp_index: HashMap<(String, String), Vec<i32>> = HashMap::new();
|
||||
for (legacy_id, first, last) in &professors {
|
||||
let key = (normalize(last), normalize(first));
|
||||
rmp_index.entry(key).or_default().push(*legacy_id);
|
||||
}
|
||||
|
||||
// Match each instructor
|
||||
let mut matches: Vec<(i32, String)> = Vec::new(); // (legacy_id, banner_id)
|
||||
let mut no_comma = 0u64;
|
||||
let mut no_match = 0u64;
|
||||
let mut ambiguous = 0u64;
|
||||
|
||||
for (banner_id, display_name) in &instructors {
|
||||
let Some((last, first)) = parse_display_name(display_name) else {
|
||||
no_comma += 1;
|
||||
continue;
|
||||
};
|
||||
|
||||
let key = (last, first);
|
||||
match rmp_index.get(&key) {
|
||||
Some(ids) if ids.len() == 1 => {
|
||||
matches.push((ids[0], banner_id.clone()));
|
||||
}
|
||||
Some(ids) => {
|
||||
ambiguous += 1;
|
||||
debug!(
|
||||
banner_id,
|
||||
display_name,
|
||||
candidates = ids.len(),
|
||||
"Ambiguous RMP match, skipping"
|
||||
);
|
||||
}
|
||||
None => {
|
||||
no_match += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if no_comma > 0 || ambiguous > 0 {
|
||||
warn!(
|
||||
total_pending = instructors.len(),
|
||||
no_comma,
|
||||
no_match,
|
||||
ambiguous,
|
||||
matched = matches.len(),
|
||||
"RMP matching diagnostics"
|
||||
);
|
||||
}
|
||||
|
||||
// Batch update matches
|
||||
if matches.is_empty() {
|
||||
info!(matched = 0, "Auto-matched instructors to RMP professors");
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let legacy_ids: Vec<i32> = matches.iter().map(|(id, _)| *id).collect();
|
||||
let banner_ids: Vec<&str> = matches.iter().map(|(_, bid)| bid.as_str()).collect();
|
||||
|
||||
let result = sqlx::query(
|
||||
r#"
|
||||
UPDATE instructors i
|
||||
SET
|
||||
rmp_legacy_id = m.legacy_id,
|
||||
rmp_match_status = 'auto'
|
||||
FROM UNNEST($1::int4[], $2::text[]) AS m(legacy_id, banner_id)
|
||||
WHERE i.banner_id = m.banner_id
|
||||
"#,
|
||||
)
|
||||
.bind(&legacy_ids)
|
||||
.bind(&banner_ids)
|
||||
.execute(db_pool)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("Failed to update instructor RMP matches: {}", e))?;
|
||||
|
||||
let matched = result.rows_affected();
|
||||
info!(matched, "Auto-matched instructors to RMP professors");
|
||||
Ok(matched)
|
||||
}
|
||||
|
||||
/// Retrieve RMP rating data for an instructor by banner_id.
|
||||
///
|
||||
/// Returns `(avg_rating, num_ratings)` if the instructor has an RMP match.
|
||||
#[allow(dead_code)]
|
||||
pub async fn get_instructor_rmp_data(
|
||||
db_pool: &PgPool,
|
||||
banner_id: &str,
|
||||
) -> Result<Option<(f32, i32)>> {
|
||||
let row: Option<(f32, i32)> = sqlx::query_as(
|
||||
r#"
|
||||
SELECT rp.avg_rating, rp.num_ratings
|
||||
FROM instructors i
|
||||
JOIN rmp_professors rp ON rp.legacy_id = i.rmp_legacy_id
|
||||
WHERE i.banner_id = $1
|
||||
AND rp.avg_rating IS NOT NULL
|
||||
"#,
|
||||
)
|
||||
.bind(banner_id)
|
||||
.fetch_optional(db_pool)
|
||||
.await?;
|
||||
Ok(row)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parse_standard_name() {
|
||||
assert_eq!(
|
||||
parse_display_name("Smith, John"),
|
||||
Some(("smith".into(), "john".into()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_name_with_middle() {
|
||||
assert_eq!(
|
||||
parse_display_name("Smith, John David"),
|
||||
Some(("smith".into(), "john".into()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_name_with_middle_initial() {
|
||||
assert_eq!(
|
||||
parse_display_name("Garcia, Maria L."),
|
||||
Some(("garcia".into(), "maria".into()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_name_with_suffix_in_last() {
|
||||
// Banner may encode "Jr." as part of the last name.
|
||||
// normalize() strips trailing periods so "Jr." becomes "jr".
|
||||
assert_eq!(
|
||||
parse_display_name("Smith Jr., James"),
|
||||
Some(("smith jr".into(), "james".into()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_no_comma_returns_none() {
|
||||
assert_eq!(parse_display_name("SingleName"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_empty_first_returns_none() {
|
||||
assert_eq!(parse_display_name("Smith,"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_empty_last_returns_none() {
|
||||
assert_eq!(parse_display_name(", John"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_extra_whitespace() {
|
||||
assert_eq!(
|
||||
parse_display_name(" Doe , Jane Marie "),
|
||||
Some(("doe".into(), "jane".into()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_trims_and_lowercases() {
|
||||
assert_eq!(normalize(" FOO "), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_strips_trailing_period() {
|
||||
assert_eq!(normalize("Jr."), "jr");
|
||||
}
|
||||
}
|
||||
@@ -7,6 +7,7 @@ pub mod data;
|
||||
pub mod error;
|
||||
pub mod formatter;
|
||||
pub mod logging;
|
||||
pub mod rmp;
|
||||
pub mod scraper;
|
||||
pub mod services;
|
||||
pub mod signals;
|
||||
|
||||
@@ -14,6 +14,7 @@ mod data;
|
||||
mod error;
|
||||
mod formatter;
|
||||
mod logging;
|
||||
mod rmp;
|
||||
mod scraper;
|
||||
mod services;
|
||||
mod signals;
|
||||
|
||||
+161
@@ -0,0 +1,161 @@
|
||||
//! RateMyProfessors GraphQL client for bulk professor data sync.
|
||||
|
||||
use anyhow::Result;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::{debug, info};
|
||||
|
||||
/// UTSA's school ID on RateMyProfessors (base64 of "School-1516").
|
||||
const UTSA_SCHOOL_ID: &str = "U2Nob29sLTE1MTY=";
|
||||
|
||||
/// Basic auth header value (base64 of "test:test").
|
||||
const AUTH_HEADER: &str = "Basic dGVzdDp0ZXN0";
|
||||
|
||||
/// GraphQL endpoint.
|
||||
const GRAPHQL_URL: &str = "https://www.ratemyprofessors.com/graphql";
|
||||
|
||||
/// Page size for paginated fetches.
|
||||
const PAGE_SIZE: u32 = 100;
|
||||
|
||||
/// A professor record from RateMyProfessors.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RmpProfessor {
|
||||
pub legacy_id: i32,
|
||||
pub graphql_id: String,
|
||||
pub first_name: String,
|
||||
pub last_name: String,
|
||||
pub department: Option<String>,
|
||||
pub avg_rating: Option<f32>,
|
||||
pub avg_difficulty: Option<f32>,
|
||||
pub num_ratings: i32,
|
||||
pub would_take_again_pct: Option<f32>,
|
||||
}
|
||||
|
||||
/// Client for fetching professor data from RateMyProfessors.
|
||||
pub struct RmpClient {
|
||||
http: reqwest::Client,
|
||||
}
|
||||
|
||||
impl RmpClient {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
http: reqwest::Client::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch all professors for UTSA via paginated GraphQL queries.
|
||||
pub async fn fetch_all_professors(&self) -> Result<Vec<RmpProfessor>> {
|
||||
let mut all = Vec::new();
|
||||
let mut cursor: Option<String> = None;
|
||||
|
||||
loop {
|
||||
let after_clause = match &cursor {
|
||||
Some(c) => format!(r#", after: "{}""#, c),
|
||||
None => String::new(),
|
||||
};
|
||||
|
||||
let query = format!(
|
||||
r#"query {{
|
||||
newSearch {{
|
||||
teachers(query: {{ text: "", schoolID: "{school_id}" }}, first: {page_size}{after}) {{
|
||||
edges {{
|
||||
cursor
|
||||
node {{
|
||||
id
|
||||
legacyId
|
||||
firstName
|
||||
lastName
|
||||
department
|
||||
avgRating
|
||||
avgDifficulty
|
||||
numRatings
|
||||
wouldTakeAgainPercent
|
||||
}}
|
||||
}}
|
||||
pageInfo {{
|
||||
hasNextPage
|
||||
endCursor
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
}}"#,
|
||||
school_id = UTSA_SCHOOL_ID,
|
||||
page_size = PAGE_SIZE,
|
||||
after = after_clause,
|
||||
);
|
||||
|
||||
let body = serde_json::json!({ "query": query });
|
||||
|
||||
let resp = self
|
||||
.http
|
||||
.post(GRAPHQL_URL)
|
||||
.header("Authorization", AUTH_HEADER)
|
||||
.json(&body)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
let status = resp.status();
|
||||
if !status.is_success() {
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
anyhow::bail!("RMP GraphQL request failed ({status}): {text}");
|
||||
}
|
||||
|
||||
let json: serde_json::Value = resp.json().await?;
|
||||
|
||||
let teachers = &json["data"]["newSearch"]["teachers"];
|
||||
let edges = teachers["edges"]
|
||||
.as_array()
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing edges in RMP response"))?;
|
||||
|
||||
for edge in edges {
|
||||
let node = &edge["node"];
|
||||
let wta = node["wouldTakeAgainPercent"]
|
||||
.as_f64()
|
||||
.map(|v| v as f32)
|
||||
.filter(|&v| v >= 0.0);
|
||||
|
||||
all.push(RmpProfessor {
|
||||
legacy_id: node["legacyId"]
|
||||
.as_i64()
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing legacyId"))?
|
||||
as i32,
|
||||
graphql_id: node["id"]
|
||||
.as_str()
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing id"))?
|
||||
.to_string(),
|
||||
first_name: node["firstName"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string(),
|
||||
last_name: node["lastName"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string(),
|
||||
department: node["department"].as_str().map(|s| s.to_string()),
|
||||
avg_rating: node["avgRating"].as_f64().map(|v| v as f32),
|
||||
avg_difficulty: node["avgDifficulty"].as_f64().map(|v| v as f32),
|
||||
num_ratings: node["numRatings"].as_i64().unwrap_or(0) as i32,
|
||||
would_take_again_pct: wta,
|
||||
});
|
||||
}
|
||||
|
||||
let page_info = &teachers["pageInfo"];
|
||||
let has_next = page_info["hasNextPage"].as_bool().unwrap_or(false);
|
||||
|
||||
if !has_next {
|
||||
break;
|
||||
}
|
||||
|
||||
cursor = page_info["endCursor"]
|
||||
.as_str()
|
||||
.map(|s| s.to_string());
|
||||
|
||||
debug!(
|
||||
fetched = all.len(),
|
||||
"RMP pagination: fetching next page"
|
||||
);
|
||||
}
|
||||
|
||||
info!(total = all.len(), "Fetched all RMP professors");
|
||||
Ok(all)
|
||||
}
|
||||
}
|
||||
+59
-15
@@ -2,6 +2,7 @@ use crate::banner::{BannerApi, Term};
|
||||
use crate::data::models::{ReferenceData, ScrapePriority, TargetType};
|
||||
use crate::data::scrape_jobs;
|
||||
use crate::error::Result;
|
||||
use crate::rmp::RmpClient;
|
||||
use crate::scraper::jobs::subject::SubjectJob;
|
||||
use crate::state::ReferenceCache;
|
||||
use serde_json::json;
|
||||
@@ -16,6 +17,9 @@ use tracing::{debug, error, info, warn};
|
||||
/// How often reference data is re-scraped (6 hours).
|
||||
const REFERENCE_DATA_INTERVAL: Duration = Duration::from_secs(6 * 60 * 60);
|
||||
|
||||
/// How often RMP data is synced (24 hours).
|
||||
const RMP_SYNC_INTERVAL: Duration = Duration::from_secs(24 * 60 * 60);
|
||||
|
||||
/// Periodically analyzes data and enqueues prioritized scrape jobs.
|
||||
pub struct Scheduler {
|
||||
db_pool: PgPool,
|
||||
@@ -53,6 +57,8 @@ impl Scheduler {
|
||||
let mut current_work: Option<(tokio::task::JoinHandle<()>, CancellationToken)> = None;
|
||||
// Scrape reference data immediately on first cycle
|
||||
let mut last_ref_scrape = Instant::now() - REFERENCE_DATA_INTERVAL;
|
||||
// Sync RMP data immediately on first cycle
|
||||
let mut last_rmp_sync = Instant::now() - RMP_SYNC_INTERVAL;
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
@@ -60,6 +66,7 @@ impl Scheduler {
|
||||
let cancel_token = CancellationToken::new();
|
||||
|
||||
let should_scrape_ref = last_ref_scrape.elapsed() >= REFERENCE_DATA_INTERVAL;
|
||||
let should_sync_rmp = last_rmp_sync.elapsed() >= RMP_SYNC_INTERVAL;
|
||||
|
||||
// Spawn work in separate task to allow graceful cancellation during shutdown.
|
||||
let work_handle = tokio::spawn({
|
||||
@@ -68,28 +75,47 @@ impl Scheduler {
|
||||
let cancel_token = cancel_token.clone();
|
||||
let reference_cache = self.reference_cache.clone();
|
||||
|
||||
async move {
|
||||
tokio::select! {
|
||||
_ = async {
|
||||
if should_scrape_ref
|
||||
&& let Err(e) = Self::scrape_reference_data(&db_pool, &banner_api, &reference_cache).await
|
||||
{
|
||||
error!(error = ?e, "Failed to scrape reference data");
|
||||
async move {
|
||||
tokio::select! {
|
||||
_ = async {
|
||||
// RMP sync is independent of Banner API — run it
|
||||
// concurrently with reference data scraping so it
|
||||
// doesn't wait behind rate-limited Banner calls.
|
||||
let rmp_fut = async {
|
||||
if should_sync_rmp
|
||||
&& let Err(e) = Self::sync_rmp_data(&db_pool).await
|
||||
{
|
||||
error!(error = ?e, "Failed to sync RMP data");
|
||||
}
|
||||
};
|
||||
|
||||
let ref_fut = async {
|
||||
if should_scrape_ref
|
||||
&& let Err(e) = Self::scrape_reference_data(&db_pool, &banner_api, &reference_cache).await
|
||||
{
|
||||
error!(error = ?e, "Failed to scrape reference data");
|
||||
}
|
||||
};
|
||||
|
||||
tokio::join!(rmp_fut, ref_fut);
|
||||
|
||||
if let Err(e) = Self::schedule_jobs_impl(&db_pool, &banner_api).await {
|
||||
error!(error = ?e, "Failed to schedule jobs");
|
||||
}
|
||||
} => {}
|
||||
_ = cancel_token.cancelled() => {
|
||||
debug!("Scheduling work cancelled gracefully");
|
||||
}
|
||||
}
|
||||
if let Err(e) = Self::schedule_jobs_impl(&db_pool, &banner_api).await {
|
||||
error!(error = ?e, "Failed to schedule jobs");
|
||||
}
|
||||
} => {}
|
||||
_ = cancel_token.cancelled() => {
|
||||
debug!("Scheduling work cancelled gracefully");
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if should_scrape_ref {
|
||||
last_ref_scrape = Instant::now();
|
||||
}
|
||||
if should_sync_rmp {
|
||||
last_rmp_sync = Instant::now();
|
||||
}
|
||||
|
||||
current_work = Some((work_handle, cancel_token));
|
||||
next_run = time::Instant::now() + work_interval;
|
||||
@@ -194,6 +220,24 @@ impl Scheduler {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Fetch all RMP professors, upsert to DB, and auto-match against Banner instructors.
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn sync_rmp_data(db_pool: &PgPool) -> Result<()> {
|
||||
info!("Starting RMP data sync");
|
||||
|
||||
let client = RmpClient::new();
|
||||
let professors = client.fetch_all_professors().await?;
|
||||
let total = professors.len();
|
||||
|
||||
crate::data::rmp::batch_upsert_rmp_professors(&professors, db_pool).await?;
|
||||
info!(total, "RMP professors upserted");
|
||||
|
||||
let matched = crate::data::rmp::auto_match_instructors(db_pool).await?;
|
||||
info!(total, matched, "RMP sync complete");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Scrape all reference data categories from Banner and upsert to DB, then refresh cache.
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn scrape_reference_data(
|
||||
|
||||
+11
-5
@@ -357,6 +357,8 @@ pub struct InstructorResponse {
|
||||
display_name: String,
|
||||
email: Option<String>,
|
||||
is_primary: bool,
|
||||
rmp_rating: Option<f32>,
|
||||
rmp_num_ratings: Option<i32>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, TS)]
|
||||
@@ -387,11 +389,15 @@ async fn build_course_response(
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(
|
||||
|(banner_id, display_name, email, is_primary)| InstructorResponse {
|
||||
banner_id,
|
||||
display_name,
|
||||
email,
|
||||
is_primary,
|
||||
|(banner_id, display_name, email, is_primary, rmp_rating, rmp_num_ratings)| {
|
||||
InstructorResponse {
|
||||
banner_id,
|
||||
display_name,
|
||||
email,
|
||||
is_primary,
|
||||
rmp_rating,
|
||||
rmp_num_ratings,
|
||||
}
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
Reference in New Issue
Block a user