Setup expiry time on major scrape, improve logs, use ResetDataForm(), fix query

This commit is contained in:
2024-01-29 15:56:13 -06:00
parent a785196437
commit 2783162b2b
2 changed files with 41 additions and 9 deletions

View File

@@ -274,6 +274,12 @@ func main() {
log.Info().Str("term", term).Str("sessionID", sessionID).Msg("Setting selected term") log.Info().Str("term", term).Str("sessionID", sessionID).Msg("Setting selected term")
SelectTerm(term) SelectTerm(term)
// Scrape on startup
err = Scrape()
if err != nil {
log.Fatal().Err(err).Msg("Startup Scrape Failed")
}
// Close session, ensure http client closes idle connections // Close session, ensure http client closes idle connections
defer session.Close() defer session.Close()
defer client.CloseIdleConnections() defer client.CloseIdleConnections()

View File

@@ -2,6 +2,7 @@ package main
import ( import (
"fmt" "fmt"
"math/rand"
"time" "time"
"github.com/redis/go-redis/v9" "github.com/redis/go-redis/v9"
@@ -90,12 +91,13 @@ func CanScrape(subject string) bool {
// ScrapeMajor is the scraping invocation for a specific major. // ScrapeMajor is the scraping invocation for a specific major.
// This function does not check whether scraping is required at this time, it is assumed that the caller has already done so. // This function does not check whether scraping is required at this time, it is assumed that the caller has already done so.
func ScrapeMajor(subject string) error { func ScrapeMajor(subject string) error {
offset := 1 offset := 0
totalClassCount := 0 totalClassCount := 0
log.Info().Str("subject", subject).Msg("Scraping Major")
for { for {
query := NewQuery().Offset(offset).MaxResults(MaxPageSize) query := NewQuery().Offset(offset).MaxResults(MaxPageSize).Subject(subject)
result, err := Search(query, "", false) result, err := Search(query, "subjectDescription", false)
if err != nil { if err != nil {
return fmt.Errorf("failed to search for classes on page %d: %w", offset, err) return fmt.Errorf("failed to search for classes on page %d: %w", offset, err)
} }
@@ -105,8 +107,13 @@ func ScrapeMajor(subject string) error {
return fmt.Errorf("search for classes on page %d was not successful", offset) return fmt.Errorf("search for classes on page %d was not successful", offset)
} }
classCount := len(result.Data)
totalClassCount += classCount
log.Debug().Str("subject", subject).Int("count", classCount).Int("offset", offset).Msg("Placing classes in Redis")
// Process each class and store it in Redis // Process each class and store it in Redis
for _, class := range result.Data { for _, class := range result.Data {
// TODO: Move this into a separate function to allow future comparison/SQLite intake
// Store class in Redis // Store class in Redis
err := kv.Set(ctx, fmt.Sprintf("class:%s", class.CourseReferenceNumber), class, 0).Err() err := kv.Set(ctx, fmt.Sprintf("class:%s", class.CourseReferenceNumber), class, 0).Err()
if err != nil { if err != nil {
@@ -114,9 +121,6 @@ func ScrapeMajor(subject string) error {
} }
} }
classCount := len(result.Data)
totalClassCount += classCount
// Increment and continue if the results are full // Increment and continue if the results are full
if classCount >= MaxPageSize { if classCount >= MaxPageSize {
// This is unlikely to happen, but log it just in case // This is unlikely to happen, but log it just in case
@@ -127,20 +131,42 @@ func ScrapeMajor(subject string) error {
offset += MaxPageSize offset += MaxPageSize
// TODO: Replace sleep with smarter rate limiting // TODO: Replace sleep with smarter rate limiting
time.Sleep(time.Second * 7) log.Debug().Str("subject", subject).Int("nextOffset", offset).Msg("Sleeping before next page")
time.Sleep(time.Second * 3)
continue continue
} else { } else {
// Log the number of classes scraped // Log the number of classes scraped
log.Info().Str("subject", subject).Int("count", totalClassCount).Int("offset", offset).Int("finalOffset", offset+classCount).Msg("Scraped classes") log.Info().Str("subject", subject).Int("total", totalClassCount).Msg("Major Scraped")
break break
} }
} }
// Calculate the expiry time for the scrape (1 hour for every 500 classes, random +-15%) with a minimum of 1 hour
scrapeExpiry := time.Hour * time.Duration(totalClassCount/500)
partial := scrapeExpiry.Seconds() * 0.15
if rand.Intn(2) == 0 {
scrapeExpiry -= time.Duration(partial) * time.Second
} else {
scrapeExpiry += time.Duration(partial) * time.Second
}
// Ensure the expiry is at least 1 hour with up to 15 extra minutes
if scrapeExpiry < time.Hour {
scrapeExpiry = time.Hour + time.Duration(rand.Intn(60*15))*time.Second
}
// If the subject is a priority, then the expiry is halved
if lo.Contains(PriorityMajors, subject) {
scrapeExpiry /= 2
}
// Mark the major as scraped // Mark the major as scraped
term := Default(time.Now()).ToString() term := Default(time.Now()).ToString()
err := kv.Set(ctx, fmt.Sprintf("scraped:%s:%s", subject, term), "1", 0).Err() err := kv.Set(ctx, fmt.Sprintf("scraped:%s:%s", subject, term), "1", scrapeExpiry).Err()
if err != nil { if err != nil {
log.Error().Err(err).Msg("failed to mark major as scraped") log.Error().Err(err).Msg("failed to mark major as scraped")
} else {
log.Debug().Str("subject", subject).Str("expiry", scrapeExpiry.String()).Msg("Marked major as scraped")
} }
return nil return nil