mirror of
https://github.com/Xevion/banner.git
synced 2025-12-09 10:06:34 -06:00
Setup expiry time on major scrape, improve logs, use ResetDataForm(), fix query
This commit is contained in:
6
main.go
6
main.go
@@ -274,6 +274,12 @@ func main() {
|
|||||||
log.Info().Str("term", term).Str("sessionID", sessionID).Msg("Setting selected term")
|
log.Info().Str("term", term).Str("sessionID", sessionID).Msg("Setting selected term")
|
||||||
SelectTerm(term)
|
SelectTerm(term)
|
||||||
|
|
||||||
|
// Scrape on startup
|
||||||
|
err = Scrape()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal().Err(err).Msg("Startup Scrape Failed")
|
||||||
|
}
|
||||||
|
|
||||||
// Close session, ensure http client closes idle connections
|
// Close session, ensure http client closes idle connections
|
||||||
defer session.Close()
|
defer session.Close()
|
||||||
defer client.CloseIdleConnections()
|
defer client.CloseIdleConnections()
|
||||||
|
|||||||
44
scrape.go
44
scrape.go
@@ -2,6 +2,7 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math/rand"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/redis/go-redis/v9"
|
"github.com/redis/go-redis/v9"
|
||||||
@@ -90,12 +91,13 @@ func CanScrape(subject string) bool {
|
|||||||
// ScrapeMajor is the scraping invocation for a specific major.
|
// ScrapeMajor is the scraping invocation for a specific major.
|
||||||
// This function does not check whether scraping is required at this time, it is assumed that the caller has already done so.
|
// This function does not check whether scraping is required at this time, it is assumed that the caller has already done so.
|
||||||
func ScrapeMajor(subject string) error {
|
func ScrapeMajor(subject string) error {
|
||||||
offset := 1
|
offset := 0
|
||||||
totalClassCount := 0
|
totalClassCount := 0
|
||||||
|
log.Info().Str("subject", subject).Msg("Scraping Major")
|
||||||
|
|
||||||
for {
|
for {
|
||||||
query := NewQuery().Offset(offset).MaxResults(MaxPageSize)
|
query := NewQuery().Offset(offset).MaxResults(MaxPageSize).Subject(subject)
|
||||||
result, err := Search(query, "", false)
|
result, err := Search(query, "subjectDescription", false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to search for classes on page %d: %w", offset, err)
|
return fmt.Errorf("failed to search for classes on page %d: %w", offset, err)
|
||||||
}
|
}
|
||||||
@@ -105,8 +107,13 @@ func ScrapeMajor(subject string) error {
|
|||||||
return fmt.Errorf("search for classes on page %d was not successful", offset)
|
return fmt.Errorf("search for classes on page %d was not successful", offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
classCount := len(result.Data)
|
||||||
|
totalClassCount += classCount
|
||||||
|
log.Debug().Str("subject", subject).Int("count", classCount).Int("offset", offset).Msg("Placing classes in Redis")
|
||||||
|
|
||||||
// Process each class and store it in Redis
|
// Process each class and store it in Redis
|
||||||
for _, class := range result.Data {
|
for _, class := range result.Data {
|
||||||
|
// TODO: Move this into a separate function to allow future comparison/SQLite intake
|
||||||
// Store class in Redis
|
// Store class in Redis
|
||||||
err := kv.Set(ctx, fmt.Sprintf("class:%s", class.CourseReferenceNumber), class, 0).Err()
|
err := kv.Set(ctx, fmt.Sprintf("class:%s", class.CourseReferenceNumber), class, 0).Err()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -114,9 +121,6 @@ func ScrapeMajor(subject string) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
classCount := len(result.Data)
|
|
||||||
totalClassCount += classCount
|
|
||||||
|
|
||||||
// Increment and continue if the results are full
|
// Increment and continue if the results are full
|
||||||
if classCount >= MaxPageSize {
|
if classCount >= MaxPageSize {
|
||||||
// This is unlikely to happen, but log it just in case
|
// This is unlikely to happen, but log it just in case
|
||||||
@@ -127,20 +131,42 @@ func ScrapeMajor(subject string) error {
|
|||||||
offset += MaxPageSize
|
offset += MaxPageSize
|
||||||
|
|
||||||
// TODO: Replace sleep with smarter rate limiting
|
// TODO: Replace sleep with smarter rate limiting
|
||||||
time.Sleep(time.Second * 7)
|
log.Debug().Str("subject", subject).Int("nextOffset", offset).Msg("Sleeping before next page")
|
||||||
|
time.Sleep(time.Second * 3)
|
||||||
continue
|
continue
|
||||||
} else {
|
} else {
|
||||||
// Log the number of classes scraped
|
// Log the number of classes scraped
|
||||||
log.Info().Str("subject", subject).Int("count", totalClassCount).Int("offset", offset).Int("finalOffset", offset+classCount).Msg("Scraped classes")
|
log.Info().Str("subject", subject).Int("total", totalClassCount).Msg("Major Scraped")
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Calculate the expiry time for the scrape (1 hour for every 500 classes, random +-15%) with a minimum of 1 hour
|
||||||
|
scrapeExpiry := time.Hour * time.Duration(totalClassCount/500)
|
||||||
|
partial := scrapeExpiry.Seconds() * 0.15
|
||||||
|
if rand.Intn(2) == 0 {
|
||||||
|
scrapeExpiry -= time.Duration(partial) * time.Second
|
||||||
|
} else {
|
||||||
|
scrapeExpiry += time.Duration(partial) * time.Second
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure the expiry is at least 1 hour with up to 15 extra minutes
|
||||||
|
if scrapeExpiry < time.Hour {
|
||||||
|
scrapeExpiry = time.Hour + time.Duration(rand.Intn(60*15))*time.Second
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the subject is a priority, then the expiry is halved
|
||||||
|
if lo.Contains(PriorityMajors, subject) {
|
||||||
|
scrapeExpiry /= 2
|
||||||
|
}
|
||||||
|
|
||||||
// Mark the major as scraped
|
// Mark the major as scraped
|
||||||
term := Default(time.Now()).ToString()
|
term := Default(time.Now()).ToString()
|
||||||
err := kv.Set(ctx, fmt.Sprintf("scraped:%s:%s", subject, term), "1", 0).Err()
|
err := kv.Set(ctx, fmt.Sprintf("scraped:%s:%s", subject, term), "1", scrapeExpiry).Err()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error().Err(err).Msg("failed to mark major as scraped")
|
log.Error().Err(err).Msg("failed to mark major as scraped")
|
||||||
|
} else {
|
||||||
|
log.Debug().Str("subject", subject).Str("expiry", scrapeExpiry.String()).Msg("Marked major as scraped")
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
Reference in New Issue
Block a user