mirror of
https://github.com/Xevion/the-office.git
synced 2025-12-10 10:08:57 -06:00
fix assert, fix quote patern, fix Section builder with deleted scene handling, added clearing methods, added Section repr
This commit is contained in:
@@ -5,14 +5,14 @@ from bs4 import BeautifulSoup
|
|||||||
from app import db, login
|
from app import db, login
|
||||||
|
|
||||||
episodes = [5, 6, 22, 23, 14, 26, 24, 24, 24, 23]
|
episodes = [5, 6, 22, 23, 14, 26, 24, 24, 24, 23]
|
||||||
quotePattern = r'(\w+):.+'
|
quotePattern = r'([\w\s]+):(.+)'
|
||||||
|
|
||||||
class Season(db.Model):
|
class Season(db.Model):
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
id = db.Column(db.Integer, primary_key=True)
|
||||||
episodes = db.relationship('Episode', backref='season', lazy='dynamic')
|
episodes = db.relationship('Episode', backref='season', lazy='dynamic')
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
assert 0 >= kwargs.get('id') <= 9, "Season ID must be 0-9 inclusive"
|
assert 0 <= kwargs.get('id') <= 9, "Season ID must be 0-9 inclusive"
|
||||||
super(Season, self).__init__(**kwargs)
|
super(Season, self).__init__(**kwargs)
|
||||||
|
|
||||||
def build(self):
|
def build(self):
|
||||||
@@ -50,20 +50,30 @@ class Episode(db.Model):
|
|||||||
|
|
||||||
def build(self):
|
def build(self):
|
||||||
"""downloads, processes, and automatically creates Sections and Quotes"""
|
"""downloads, processes, and automatically creates Sections and Quotes"""
|
||||||
link = f'http://officequotes.net/no{self.season_id}-{str(self.episode).zfill(2)}.php'
|
link = f'http://officequotes.net/no{self.season_id}-{str(self.number).zfill(2)}.php'
|
||||||
data = requests.get(link).text
|
data = requests.get(link).text
|
||||||
soup = BeautifulSoup(data, 'html.parser')
|
soup = BeautifulSoup(data, 'html.parser')
|
||||||
|
|
||||||
sections = soup.find_all(attrs={'class' : 'quote'})
|
sections = soup.find_all(attrs={'class' : 'quote'})
|
||||||
deleted = 0
|
deleted = 0
|
||||||
|
|
||||||
for section in sections:
|
for section in sections:
|
||||||
quotes = [quote.string + quote.next_sibling.string for quote in section.find_all('b')]
|
quotes = [quote.string + quote.next_sibling.string for quote in section.find_all('b')]
|
||||||
isDeletedScene = quotes[0].lower().startswith('deleted scene')
|
isDeletedScene = quotes[0].lower().startswith('deleted scene')
|
||||||
if isDeletedScene: deleted += 1
|
if isDeletedScene:
|
||||||
s = Section(episode=self, deleted=deleted if isDeletedScene else -1, quotes=quotes)
|
deleted += 1
|
||||||
|
s = Section(episode_id=self.id, deleted=deleted if isDeletedScene else -1)
|
||||||
|
s.build(quotes[1:] if isDeletedScene else quotes)
|
||||||
db.session.add(s)
|
db.session.add(s)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
sections = Section.query.filter_by(episode_id=self.id).all()
|
||||||
|
print(f'Clearing {len(sections)} from Databse for Episode {self.number} of Season {self.season_id}')
|
||||||
|
for section in sections:
|
||||||
|
section.clear(commit=False)
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def scrapeURL(self):
|
def scrapeURL(self):
|
||||||
return f'http://officequotes.net/no{self.season_id}-{str(self.number).zfill(2)}.php'
|
return f'http://officequotes.net/no{self.season_id}-{str(self.number).zfill(2)}.php'
|
||||||
@@ -75,17 +85,29 @@ class Section(db.Model):
|
|||||||
deleted = db.Column(db.Integer, default=-1)
|
deleted = db.Column(db.Integer, default=-1)
|
||||||
quotes = db.relationship('Quote', backref='section', lazy='dynamic')
|
quotes = db.relationship('Quote', backref='section', lazy='dynamic')
|
||||||
|
|
||||||
def build(self, quotes, commit=False):
|
def build(self, quotes, commit=False, reset=False):
|
||||||
"""given an List of unformatted script quotes, automatically creates Quotes assigned to this Section"""
|
"""given an List of unformatted script quotes, automatically creates Quotes assigned to this Section"""
|
||||||
for quote in quotes:
|
for quote in quotes:
|
||||||
if quote.lower().startswith('deleted scene'):
|
if quote.lower().startswith('deleted scene'):
|
||||||
raise Exception(f'Deleted Scene Quote passed to Section Builder: "{quote}"')
|
raise Exception(f'Deleted Scene Quote passed to Section Builder: "{quote}"')
|
||||||
match = re.match(quotePattern, quote)
|
match = re.match(quotePattern, quote)
|
||||||
assert match != None, f"Quote '{quote}' could not be processed."
|
assert match != None, f"Quote '{quote}' could not be processed."
|
||||||
q = Quote(section=self, speaker=match[1], text=match[2])
|
q = Quote(section=self, speaker=match[1].strip(), text=match[2].strip())
|
||||||
db.session.add(q)
|
db.session.add(q)
|
||||||
if commit: db.session.commit()
|
if commit: db.session.commit()
|
||||||
|
|
||||||
|
def clear(self, commit=True):
|
||||||
|
quotes = Quote.query.filter_by(section_id=self.id).all()
|
||||||
|
print(f'Clearing {len(quotes)} quotes from Section ID {self.id}')
|
||||||
|
for quote in quotes:
|
||||||
|
db.session.delete(quote)
|
||||||
|
if commit: db.session.commit()
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
season = Episode.query.get(self.episode_id).first().id
|
||||||
|
quotes = len(Quote.query.filter_by(section_id=self.id).all())
|
||||||
|
return f'Section(id={self.id} episode={self.episode_id} season={season} quotes=[{quotes}...])'
|
||||||
|
|
||||||
class Quote(db.Model):
|
class Quote(db.Model):
|
||||||
"""represents a specific quote by a specific speaker"""
|
"""represents a specific quote by a specific speaker"""
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
id = db.Column(db.Integer, primary_key=True)
|
||||||
|
|||||||
Reference in New Issue
Block a user