Files
scla-unsubscribe/helpers.go

203 lines
6.3 KiB
Go

package main
import (
"context"
"fmt"
"io"
"math/rand"
"net/http"
"regexp"
"strconv"
"strings"
"time"
"github.com/dustin/go-humanize"
"github.com/icrowley/fake"
"github.com/rs/zerolog/log"
"github.com/samber/lo"
"golang.org/x/time/rate"
)
var DomainLimiters = map[string]*rate.Limiter{
"utsa.edu": rate.NewLimiter(2, 5),
"thescla.org": rate.NewLimiter(3, 7),
}
func GetLimiter(domain string) *rate.Limiter {
// Naively simplify the domain
simplifiedDomain := SimplifyUrlToDomain(domain)
if simplifiedDomain != domain {
log.Debug().Str("domain", domain).Str("simplified", simplifiedDomain).Msg("Domain Simplified")
}
// Get the limiter
limiter, ok := DomainLimiters[simplifiedDomain]
// Create a new limiter if one does not exist
if !ok {
limiter = rate.NewLimiter(1, 3)
DomainLimiters[simplifiedDomain] = limiter
log.Debug().Str("domain", domain).Msg("New Limiter Created")
}
return limiter
}
// This will select multiple groups, but the first group is all that matters
var DomainPattern = regexp.MustCompile(`(?:\w+\.)*(\w+\.\w+)(?:\/)?`)
// SimplifyUrlToDomain transforms a url into a common simplified domain
// This is not the same as the host, as it removes subdomains (www, asap, etc.)
// This helps me group together domains that are related to eachother, such as those at UTSA.
func SimplifyUrlToDomain(url string) string {
// Find the domain
matches := DomainPattern.FindStringSubmatch(url)
if len(matches) == 0 {
return ""
}
return matches[1]
}
// Wait waits for a token from the limiter
func Wait(limiter *rate.Limiter, ctx context.Context) {
r := limiter.Reserve()
if !r.OK() {
log.Warn().Msg("Rate Limit Exceeded")
return
}
// Wait for the limiter
if r.Delay() > 0 {
log.Debug().Str("delay", r.Delay().String()).Msg("Waiting")
time.Sleep(r.Delay())
}
}
// DoRequestNoRead makes a request and returns the response
// Compared to DoRequest, this function does not read the response body, and it uses the Content-Length header for the associated log attribute.
// This function encapsulates the boilerplate for logging.
func DoRequestNoRead(req *http.Request) (*http.Response, error) {
// Acquire the limiter, and wait for a token
limiter := GetLimiter(req.URL.Host)
Wait(limiter, req.Context())
// Log the request
log.Debug().Str("method", req.Method).Str("host", req.Host).Str("url", req.URL.String()).Msg("Request")
// Send the request (while acquiring timings)
start := time.Now()
resp, err := client.Do(req)
duration := time.Since(start)
if err != nil {
log.Error().Err(err).Msg("Request Error")
return nil, err
}
contentLength, err := strconv.ParseUint(resp.Header.Get("Content-Length"), 10, 64)
if err != nil {
contentLength = 0
}
log.Debug().Int("code", resp.StatusCode).Str("content-type", resp.Header.Get("Content-Type")).Str("content-length", Bytes(contentLength)).
Str("duration", duration.String()).Msg("Response")
return resp, nil
}
// DoRequest makes a request and returns the response and body
// This function encapsulates the boilerplate for logging and reading the response body
func DoRequest(req *http.Request) (*http.Response, []byte, error) {
// Acquire the limiter, and wait for a token
limiter := GetLimiter(req.URL.Host)
Wait(limiter, req.Context())
// Log the request
log.Debug().Str("method", req.Method).Str("host", req.Host).Str("url", req.URL.String()).Msg("Request")
// Send the request (while acquiring timings)
start := time.Now()
resp, err := client.Do(req)
duration := time.Since(start)
// Handle errors
if err != nil {
log.Error().Err(err).Msg("Request Error")
return nil, nil, err
}
// Read the body
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
log.Err(err).Int("code", resp.StatusCode).Str("content-type", resp.Header.Get("Content-Type")).Str("content-length", Bytes(uint64(len(body)))).
Str("duration", duration.String()).Msg("Response (Unable to Read Body)")
return nil, nil, err
}
log.Debug().Int("code", resp.StatusCode).Str("content-type", resp.Header.Get("Content-Type")).Str("content-length", Bytes(uint64(len(body)))).
Str("duration", duration.String()).Msg("Response")
return resp, body, nil
}
const userAgent = "Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0"
// ApplySclaHeaders applies headers to a request for thescla.org
func ApplySclaHeaders(req *http.Request) {
req.Header.Set("Origin", "http://www2.thescla.org")
req.Header.Set("User-Agent", userAgent)
req.Header.Set("Accept", "application/json, text/javascript, */*; q=0.01")
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
req.Header.Set("Accept-Encoding", "gzip, deflate")
}
// ApplyUtsaHeaders applies headers to a request for utsa.edu
func ApplyUtsaHeaders(req *http.Request) {
req.Header.Set("User-Agent", userAgent)
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8")
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
req.Header.Set("Accept-Encoding", "gzip, deflate")
}
var nonAlphaNumeric = regexp.MustCompile(`[^a-zA-Z0-9]+`)
var continuousWhitespace = regexp.MustCompile(`\s+`)
// NormalizeTitle creates a normalized title from a string, providing a consistent format regardless of whitespace or non-alphanumeric characters
//
// Non-alphanumeric characters are removed
// Capital letters are converted to lowercase
// Whitespace at the beginning or end of the string is removed
// Whitespace of any continuous length is replaced with a single dash
//
// Examples:
//
// "Mailing Address" => "mailing-address"
// "Mailing | Address" => "mailing-address"
// " Mailing Address " => "mailing-address"
// " Mailing | Address " => "mailing-address"
func NormalizeTitle(title string) string {
return continuousWhitespace.ReplaceAllString(
strings.TrimSpace(
strings.ToLower(
nonAlphaNumeric.ReplaceAllString(
title, " ",
),
),
),
"-",
)
}
// Bytes calls humanize.Bytes and removes space characters
func Bytes(bytes uint64) string {
return strings.Replace(humanize.Bytes(bytes), " ", "", -1)
}
// RandBool returns a random boolean
func RandBool() bool {
return rand.Intn(2) == 0
}
// FakeEmail generates a fake email address
func FakeEmail() string {
return strings.ToLower(fmt.Sprintf("%s.%s@%sutsa.edu", fake.FirstName(), fake.LastName(), lo.Ternary(RandBool(), "my.", "")))
}