refactor: consolidate query logic and eliminate N+1 instructor loads

This commit is contained in:
2026-01-29 12:03:06 -06:00
parent 61f8bd9de7
commit c90bd740de
22 changed files with 414 additions and 398 deletions
+10 -7
View File
@@ -1,4 +1,4 @@
use bitflags::{Flags, bitflags};
use bitflags::{bitflags, Flags};
use chrono::{DateTime, NaiveDate, NaiveTime, Timelike, Utc, Weekday};
use extension_traits::extension;
use serde::{Deserialize, Deserializer, Serialize};
@@ -320,10 +320,11 @@ pub enum MeetingType {
Unknown(String),
}
impl MeetingType {
/// Parse from the meeting type string
pub fn from_string(s: &str) -> Self {
match s {
impl std::str::FromStr for MeetingType {
type Err = std::convert::Infallible;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
Ok(match s {
"HB" | "H2" | "H1" => MeetingType::HybridBlended,
"OS" => MeetingType::OnlineSynchronous,
"OA" => MeetingType::OnlineAsynchronous,
@@ -331,9 +332,11 @@ impl MeetingType {
"ID" => MeetingType::IndependentStudy,
"FF" => MeetingType::FaceToFace,
other => MeetingType::Unknown(other.to_string()),
}
})
}
}
impl MeetingType {
/// Get description for the meeting type
pub fn description(&self) -> &'static str {
match self {
@@ -424,7 +427,7 @@ impl MeetingScheduleInfo {
end: now,
}
});
let meeting_type = MeetingType::from_string(&meeting_time.meeting_type);
let meeting_type: MeetingType = meeting_time.meeting_type.parse().unwrap();
let location = MeetingLocation::from_meeting_time(meeting_time);
let duration_weeks = date_range.weeks_duration();
+3 -14
View File
@@ -10,8 +10,9 @@ pub struct Range {
pub high: i32,
}
/// Builder for constructing Banner API search queries
/// Builder for constructing Banner API search queries.
#[derive(Debug, Clone, Default)]
#[allow(dead_code)]
pub struct SearchQuery {
subject: Option<String>,
title: Option<String>,
@@ -32,6 +33,7 @@ pub struct SearchQuery {
course_number_range: Option<Range>,
}
#[allow(dead_code)]
impl SearchQuery {
/// Creates a new SearchQuery with default values
pub fn new() -> Self {
@@ -67,7 +69,6 @@ impl SearchQuery {
}
/// Adds a keyword to the query
#[allow(dead_code)]
pub fn keyword<S: Into<String>>(mut self, keyword: S) -> Self {
match &mut self.keywords {
Some(keywords) => keywords.push(keyword.into()),
@@ -77,63 +78,54 @@ impl SearchQuery {
}
/// Sets whether to search for open courses only
#[allow(dead_code)]
pub fn open_only(mut self, open_only: bool) -> Self {
self.open_only = Some(open_only);
self
}
/// Sets the term part for the query
#[allow(dead_code)]
pub fn term_part(mut self, term_part: Vec<String>) -> Self {
self.term_part = Some(term_part);
self
}
/// Sets the campuses for the query
#[allow(dead_code)]
pub fn campus(mut self, campus: Vec<String>) -> Self {
self.campus = Some(campus);
self
}
/// Sets the instructional methods for the query
#[allow(dead_code)]
pub fn instructional_method(mut self, instructional_method: Vec<String>) -> Self {
self.instructional_method = Some(instructional_method);
self
}
/// Sets the attributes for the query
#[allow(dead_code)]
pub fn attributes(mut self, attributes: Vec<String>) -> Self {
self.attributes = Some(attributes);
self
}
/// Sets the instructors for the query
#[allow(dead_code)]
pub fn instructor(mut self, instructor: Vec<u64>) -> Self {
self.instructor = Some(instructor);
self
}
/// Sets the start time for the query
#[allow(dead_code)]
pub fn start_time(mut self, start_time: Duration) -> Self {
self.start_time = Some(start_time);
self
}
/// Sets the end time for the query
#[allow(dead_code)]
pub fn end_time(mut self, end_time: Duration) -> Self {
self.end_time = Some(end_time);
self
}
/// Sets the credit range for the query
#[allow(dead_code)]
pub fn credits(mut self, low: i32, high: i32) -> Self {
self.min_credits = Some(low);
self.max_credits = Some(high);
@@ -141,14 +133,12 @@ impl SearchQuery {
}
/// Sets the minimum credits for the query
#[allow(dead_code)]
pub fn min_credits(mut self, value: i32) -> Self {
self.min_credits = Some(value);
self
}
/// Sets the maximum credits for the query
#[allow(dead_code)]
pub fn max_credits(mut self, value: i32) -> Self {
self.max_credits = Some(value);
self
@@ -161,7 +151,6 @@ impl SearchQuery {
}
/// Sets the offset for pagination
#[allow(dead_code)]
pub fn offset(mut self, offset: i32) -> Self {
self.offset = offset;
self
+128 -71
View File
@@ -1,8 +1,74 @@
//! Database query functions for courses, used by the web API.
use crate::data::models::Course;
use crate::data::models::{Course, CourseInstructorDetail};
use crate::error::Result;
use sqlx::PgPool;
use std::collections::HashMap;
/// Column to sort search results by.
#[derive(Debug, Clone, Copy, serde::Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SortColumn {
CourseCode,
Title,
Instructor,
Time,
Seats,
}
/// Sort direction.
#[derive(Debug, Clone, Copy, serde::Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SortDirection {
Asc,
Desc,
}
/// Shared WHERE clause for course search filters.
///
/// Parameters $1-$8 match the bind order in `search_courses`.
const SEARCH_WHERE: &str = r#"
WHERE term_code = $1
AND ($2::text[] IS NULL OR subject = ANY($2))
AND ($3::text IS NULL OR title_search @@ plainto_tsquery('simple', $3) OR title ILIKE '%' || $3 || '%')
AND ($4::int IS NULL OR course_number::int >= $4)
AND ($5::int IS NULL OR course_number::int <= $5)
AND ($6::bool = false OR max_enrollment > enrollment)
AND ($7::text IS NULL OR instructional_method = $7)
AND ($8::text IS NULL OR campus = $8)
"#;
/// Build a safe ORDER BY clause from typed sort parameters.
///
/// All column names are hardcoded string literals — no caller input is interpolated.
fn sort_clause(column: Option<SortColumn>, direction: Option<SortDirection>) -> String {
let dir = match direction.unwrap_or(SortDirection::Asc) {
SortDirection::Asc => "ASC",
SortDirection::Desc => "DESC",
};
match column {
Some(SortColumn::CourseCode) => {
format!("subject {dir}, course_number {dir}, sequence_number {dir}")
}
Some(SortColumn::Title) => format!("title {dir}"),
Some(SortColumn::Instructor) => {
format!(
"(SELECT i.display_name FROM course_instructors ci \
JOIN instructors i ON i.banner_id = ci.instructor_id \
WHERE ci.course_id = courses.id AND ci.is_primary = true \
LIMIT 1) {dir} NULLS LAST"
)
}
Some(SortColumn::Time) => {
format!("(meeting_times->0->>'begin_time') {dir} NULLS LAST")
}
Some(SortColumn::Seats) => {
format!("(max_enrollment - enrollment) {dir}")
}
None => "subject ASC, course_number ASC, sequence_number ASC".to_string(),
}
}
/// Search courses by term with optional filters.
///
@@ -21,32 +87,17 @@ pub async fn search_courses(
campus: Option<&str>,
limit: i32,
offset: i32,
order_by: &str,
sort_by: Option<SortColumn>,
sort_dir: Option<SortDirection>,
) -> Result<(Vec<Course>, i64)> {
// Build WHERE clauses dynamically via parameter binding + COALESCE trick:
// each optional filter uses ($N IS NULL OR column = $N) so NULL means "no filter".
//
// ORDER BY is interpolated as a string since column names can't be bound as
// parameters. The caller must provide a safe, pre-validated clause (see
// `sort_clause` in routes.rs).
let query = format!(
r#"
SELECT *
FROM courses
WHERE term_code = $1
AND ($2::text[] IS NULL OR subject = ANY($2))
AND ($3::text IS NULL OR title_search @@ plainto_tsquery('simple', $3) OR title ILIKE '%' || $3 || '%')
AND ($4::int IS NULL OR course_number::int >= $4)
AND ($5::int IS NULL OR course_number::int <= $5)
AND ($6::bool = false OR max_enrollment > enrollment)
AND ($7::text IS NULL OR instructional_method = $7)
AND ($8::text IS NULL OR campus = $8)
ORDER BY {order_by}
LIMIT $9 OFFSET $10
"#
);
let order_by = sort_clause(sort_by, sort_dir);
let courses = sqlx::query_as::<_, Course>(&query)
let data_query = format!(
"SELECT * FROM courses {SEARCH_WHERE} ORDER BY {order_by} LIMIT $9 OFFSET $10"
);
let count_query = format!("SELECT COUNT(*) FROM courses {SEARCH_WHERE}");
let courses = sqlx::query_as::<_, Course>(&data_query)
.bind(term_code)
.bind(subject)
.bind(title_query)
@@ -60,30 +111,17 @@ pub async fn search_courses(
.fetch_all(db_pool)
.await?;
let total: (i64,) = sqlx::query_as(
r#"
SELECT COUNT(*)
FROM courses
WHERE term_code = $1
AND ($2::text[] IS NULL OR subject = ANY($2))
AND ($3::text IS NULL OR title_search @@ plainto_tsquery('simple', $3) OR title ILIKE '%' || $3 || '%')
AND ($4::int IS NULL OR course_number::int >= $4)
AND ($5::int IS NULL OR course_number::int <= $5)
AND ($6::bool = false OR max_enrollment > enrollment)
AND ($7::text IS NULL OR instructional_method = $7)
AND ($8::text IS NULL OR campus = $8)
"#,
)
.bind(term_code)
.bind(subject)
.bind(title_query)
.bind(course_number_low)
.bind(course_number_high)
.bind(open_only)
.bind(instructional_method)
.bind(campus)
.fetch_one(db_pool)
.await?;
let total: (i64,) = sqlx::query_as(&count_query)
.bind(term_code)
.bind(subject)
.bind(title_query)
.bind(course_number_low)
.bind(course_number_high)
.bind(open_only)
.bind(instructional_method)
.bind(campus)
.fetch_one(db_pool)
.await?;
Ok((courses, total.0))
}
@@ -103,33 +141,16 @@ pub async fn get_course_by_crn(
Ok(course)
}
/// Get instructors for a course by course ID.
///
/// Returns `(banner_id, display_name, email, is_primary, rmp_avg_rating, rmp_num_ratings)` tuples.
/// Get instructors for a single course by course ID.
pub async fn get_course_instructors(
db_pool: &PgPool,
course_id: i32,
) -> Result<
Vec<(
String,
String,
Option<String>,
bool,
Option<f32>,
Option<i32>,
)>,
> {
let rows: Vec<(
String,
String,
Option<String>,
bool,
Option<f32>,
Option<i32>,
)> = sqlx::query_as(
) -> Result<Vec<CourseInstructorDetail>> {
let rows = sqlx::query_as::<_, CourseInstructorDetail>(
r#"
SELECT i.banner_id, i.display_name, i.email, ci.is_primary,
rp.avg_rating, rp.num_ratings
rp.avg_rating, rp.num_ratings,
ci.course_id
FROM course_instructors ci
JOIN instructors i ON i.banner_id = ci.instructor_id
LEFT JOIN rmp_professors rp ON rp.legacy_id = i.rmp_legacy_id
@@ -143,6 +164,42 @@ pub async fn get_course_instructors(
Ok(rows)
}
/// Batch-fetch instructors for multiple courses in a single query.
///
/// Returns a map of `course_id → Vec<CourseInstructorDetail>`.
pub async fn get_instructors_for_courses(
db_pool: &PgPool,
course_ids: &[i32],
) -> Result<HashMap<i32, Vec<CourseInstructorDetail>>> {
if course_ids.is_empty() {
return Ok(HashMap::new());
}
let rows = sqlx::query_as::<_, CourseInstructorDetail>(
r#"
SELECT i.banner_id, i.display_name, i.email, ci.is_primary,
rp.avg_rating, rp.num_ratings,
ci.course_id
FROM course_instructors ci
JOIN instructors i ON i.banner_id = ci.instructor_id
LEFT JOIN rmp_professors rp ON rp.legacy_id = i.rmp_legacy_id
WHERE ci.course_id = ANY($1)
ORDER BY ci.course_id, ci.is_primary DESC, i.display_name
"#,
)
.bind(course_ids)
.fetch_all(db_pool)
.await?;
let mut map: HashMap<i32, Vec<CourseInstructorDetail>> = HashMap::new();
for row in rows {
// course_id is always present in the batch query
let cid = row.course_id.unwrap_or_default();
map.entry(cid).or_default().push(row);
}
Ok(map)
}
/// Get subjects for a term, sorted by total enrollment (descending).
///
/// Returns only subjects that have courses in the given term, with their
+13
View File
@@ -76,6 +76,19 @@ pub struct CourseInstructor {
pub is_primary: bool,
}
/// Joined instructor data for a course (from course_instructors + instructors + rmp_professors).
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct CourseInstructorDetail {
pub banner_id: String,
pub display_name: String,
pub email: Option<String>,
pub is_primary: bool,
pub avg_rating: Option<f32>,
pub num_ratings: Option<i32>,
/// Present when fetched via batch query; `None` for single-course queries.
pub course_id: Option<i32>,
}
#[allow(dead_code)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct ReferenceData {
+20 -13
View File
@@ -134,7 +134,7 @@ pub async fn find_existing_job_payloads(
Ok(existing_payloads)
}
/// Batch insert scrape jobs in a single transaction.
/// Batch insert scrape jobs using UNNEST for a single round-trip.
///
/// All jobs are inserted with `execute_at` set to the current time.
///
@@ -149,22 +149,29 @@ pub async fn batch_insert_jobs(
return Ok(());
}
let now = chrono::Utc::now();
let mut tx = db_pool.begin().await?;
let mut target_types: Vec<String> = Vec::with_capacity(jobs.len());
let mut payloads: Vec<serde_json::Value> = Vec::with_capacity(jobs.len());
let mut priorities: Vec<String> = Vec::with_capacity(jobs.len());
for (payload, target_type, priority) in jobs {
sqlx::query(
"INSERT INTO scrape_jobs (target_type, target_payload, priority, execute_at) VALUES ($1, $2, $3, $4)"
)
.bind(target_type)
.bind(payload)
.bind(priority)
.bind(now)
.execute(&mut *tx)
.await?;
target_types.push(format!("{target_type:?}"));
payloads.push(payload.clone());
priorities.push(format!("{priority:?}"));
}
tx.commit().await?;
sqlx::query(
r#"
INSERT INTO scrape_jobs (target_type, target_payload, priority, execute_at)
SELECT v.target_type::target_type, v.payload, v.priority::scrape_priority, NOW()
FROM UNNEST($1::text[], $2::jsonb[], $3::text[])
AS v(target_type, payload, priority)
"#,
)
.bind(&target_types)
.bind(&payloads)
.bind(&priorities)
.execute(db_pool)
.await?;
Ok(())
}
-1
View File
@@ -19,7 +19,6 @@ mod scraper;
mod services;
mod signals;
mod state;
#[allow(dead_code)]
mod status;
mod web;
+6
View File
@@ -35,6 +35,12 @@ pub struct RmpClient {
http: reqwest::Client,
}
impl Default for RmpClient {
fn default() -> Self {
Self::new()
}
}
impl RmpClient {
pub fn new() -> Self {
Self {
+17 -12
View File
@@ -13,9 +13,10 @@ use tokio::sync::RwLock;
/// In-memory cache for reference data (code→description lookups).
///
/// Loaded from the `reference_data` table on startup and refreshed periodically.
/// Uses a two-level HashMap so lookups take `&str` without allocating.
pub struct ReferenceCache {
/// `(category, code)``description`
data: HashMap<(String, String), String>,
/// category → (code → description)
data: HashMap<String, HashMap<String, String>>,
}
impl Default for ReferenceCache {
@@ -34,27 +35,31 @@ impl ReferenceCache {
/// Build cache from a list of reference data entries.
pub fn from_entries(entries: Vec<ReferenceData>) -> Self {
let data = entries
.into_iter()
.map(|e| ((e.category, e.code), e.description))
.collect();
let mut data: HashMap<String, HashMap<String, String>> = HashMap::new();
for e in entries {
data.entry(e.category)
.or_default()
.insert(e.code, e.description);
}
Self { data }
}
/// Look up a description by category and code.
/// Look up a description by category and code. Zero allocations.
pub fn lookup(&self, category: &str, code: &str) -> Option<&str> {
self.data
.get(&(category.to_string(), code.to_string()))
.get(category)
.and_then(|codes| codes.get(code))
.map(|s| s.as_str())
}
/// Get all `(code, description)` pairs for a category, sorted by description.
pub fn entries_for_category(&self, category: &str) -> Vec<(&str, &str)> {
let mut entries: Vec<(&str, &str)> = self
.data
let Some(codes) = self.data.get(category) else {
return Vec::new();
};
let mut entries: Vec<(&str, &str)> = codes
.iter()
.filter(|((cat, _), _)| cat == category)
.map(|((_, code), desc)| (code.as_str(), desc.as_str()))
.map(|(code, desc)| (code.as_str(), desc.as_str()))
.collect();
entries.sort_by(|a, b| a.1.cmp(b.1));
entries
+3
View File
@@ -10,6 +10,7 @@ use ts_rs::TS;
#[serde(rename_all = "lowercase")]
#[ts(export)]
pub enum ServiceStatus {
#[allow(dead_code)]
Starting,
Active,
Connected,
@@ -21,6 +22,7 @@ pub enum ServiceStatus {
#[derive(Debug, Clone)]
pub struct StatusEntry {
pub status: ServiceStatus,
#[allow(dead_code)]
pub updated_at: Instant,
}
@@ -48,6 +50,7 @@ impl ServiceStatusRegistry {
}
/// Returns the current status of a named service, if present.
#[allow(dead_code)]
pub fn get(&self, name: &str) -> Option<ServiceStatus> {
self.inner.get(name).map(|entry| entry.status.clone())
}
+33 -74
View File
@@ -323,59 +323,12 @@ struct SearchParams {
sort_dir: Option<SortDirection>,
}
#[derive(Debug, Clone, Copy, Deserialize)]
#[serde(rename_all = "snake_case")]
enum SortColumn {
CourseCode,
Title,
Instructor,
Time,
Seats,
}
#[derive(Debug, Clone, Copy, Deserialize)]
#[serde(rename_all = "snake_case")]
enum SortDirection {
Asc,
Desc,
}
use crate::data::courses::{SortColumn, SortDirection};
fn default_limit() -> i32 {
25
}
/// Build a safe ORDER BY clause from the validated sort column and direction.
fn sort_clause(column: Option<SortColumn>, direction: Option<SortDirection>) -> String {
let dir = match direction.unwrap_or(SortDirection::Asc) {
SortDirection::Asc => "ASC",
SortDirection::Desc => "DESC",
};
match column {
Some(SortColumn::CourseCode) => {
format!("subject {dir}, course_number {dir}, sequence_number {dir}")
}
Some(SortColumn::Title) => format!("title {dir}"),
Some(SortColumn::Instructor) => {
// Sort by primary instructor display name via a subquery
format!(
"(SELECT i.display_name FROM course_instructors ci \
JOIN instructors i ON i.banner_id = ci.instructor_id \
WHERE ci.course_id = courses.id AND ci.is_primary = true \
LIMIT 1) {dir} NULLS LAST"
)
}
Some(SortColumn::Time) => {
// Sort by first meeting time's begin_time via JSONB
format!("(meeting_times->0->>'begin_time') {dir} NULLS LAST")
}
Some(SortColumn::Seats) => {
format!("(max_enrollment - enrollment) {dir}")
}
None => "subject ASC, course_number ASC, sequence_number ASC".to_string(),
}
}
#[derive(Serialize, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export)]
@@ -436,27 +389,21 @@ pub struct CodeDescription {
description: String,
}
/// Build a `CourseResponse` from a DB course, fetching its instructors.
async fn build_course_response(
/// Build a `CourseResponse` from a DB course with pre-fetched instructor details.
fn build_course_response(
course: &crate::data::models::Course,
db_pool: &sqlx::PgPool,
instructors: Vec<crate::data::models::CourseInstructorDetail>,
) -> CourseResponse {
let instructors = crate::data::courses::get_course_instructors(db_pool, course.id)
.await
.unwrap_or_default()
let instructors = instructors
.into_iter()
.map(
|(banner_id, display_name, email, is_primary, rmp_rating, rmp_num_ratings)| {
InstructorResponse {
banner_id,
display_name,
email,
is_primary,
rmp_rating,
rmp_num_ratings,
}
},
)
.map(|i| InstructorResponse {
banner_id: i.banner_id,
display_name: i.display_name,
email: i.email,
is_primary: i.is_primary,
rmp_rating: i.avg_rating,
rmp_num_ratings: i.num_ratings,
})
.collect();
CourseResponse {
@@ -495,8 +442,6 @@ async fn search_courses(
let limit = params.limit.clamp(1, 100);
let offset = params.offset.max(0);
let order_by = sort_clause(params.sort_by, params.sort_dir);
let (courses, total_count) = crate::data::courses::search_courses(
&state.db_pool,
&params.term,
@@ -513,7 +458,8 @@ async fn search_courses(
params.campus.as_deref(),
limit,
offset,
&order_by,
params.sort_by,
params.sort_dir,
)
.await
.map_err(|e| {
@@ -524,10 +470,20 @@ async fn search_courses(
)
})?;
let mut course_responses = Vec::with_capacity(courses.len());
for course in &courses {
course_responses.push(build_course_response(course, &state.db_pool).await);
}
// Batch-fetch all instructors in a single query instead of N+1
let course_ids: Vec<i32> = courses.iter().map(|c| c.id).collect();
let mut instructor_map =
crate::data::courses::get_instructors_for_courses(&state.db_pool, &course_ids)
.await
.unwrap_or_default();
let course_responses: Vec<CourseResponse> = courses
.iter()
.map(|course| {
let instructors = instructor_map.remove(&course.id).unwrap_or_default();
build_course_response(course, instructors)
})
.collect();
Ok(Json(SearchResponse {
courses: course_responses,
@@ -553,7 +509,10 @@ async fn get_course(
})?
.ok_or_else(|| (AxumStatusCode::NOT_FOUND, "Course not found".to_string()))?;
Ok(Json(build_course_response(&course, &state.db_pool).await))
let instructors = crate::data::courses::get_course_instructors(&state.db_pool, course.id)
.await
.unwrap_or_default();
Ok(Json(build_course_response(&course, instructors)))
}
/// `GET /api/terms`