fix assert, fix quote patern, fix Section builder with deleted scene handling, added clearing methods, added Section repr

This commit is contained in:
Xevion
2020-01-20 06:08:16 -06:00
parent 9c00745503
commit 94099c364d

View File

@@ -5,14 +5,14 @@ from bs4 import BeautifulSoup
from app import db, login from app import db, login
episodes = [5, 6, 22, 23, 14, 26, 24, 24, 24, 23] episodes = [5, 6, 22, 23, 14, 26, 24, 24, 24, 23]
quotePattern = r'(\w+):.+' quotePattern = r'([\w\s]+):(.+)'
class Season(db.Model): class Season(db.Model):
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
episodes = db.relationship('Episode', backref='season', lazy='dynamic') episodes = db.relationship('Episode', backref='season', lazy='dynamic')
def __init__(self, **kwargs): def __init__(self, **kwargs):
assert 0 >= kwargs.get('id') <= 9, "Season ID must be 0-9 inclusive" assert 0 <= kwargs.get('id') <= 9, "Season ID must be 0-9 inclusive"
super(Season, self).__init__(**kwargs) super(Season, self).__init__(**kwargs)
def build(self): def build(self):
@@ -50,20 +50,30 @@ class Episode(db.Model):
def build(self): def build(self):
"""downloads, processes, and automatically creates Sections and Quotes""" """downloads, processes, and automatically creates Sections and Quotes"""
link = f'http://officequotes.net/no{self.season_id}-{str(self.episode).zfill(2)}.php' link = f'http://officequotes.net/no{self.season_id}-{str(self.number).zfill(2)}.php'
data = requests.get(link).text data = requests.get(link).text
soup = BeautifulSoup(data, 'html.parser') soup = BeautifulSoup(data, 'html.parser')
sections = soup.find_all(attrs={'class' : 'quote'}) sections = soup.find_all(attrs={'class' : 'quote'})
deleted = 0 deleted = 0
for section in sections: for section in sections:
quotes = [quote.string + quote.next_sibling.string for quote in section.find_all('b')] quotes = [quote.string + quote.next_sibling.string for quote in section.find_all('b')]
isDeletedScene = quotes[0].lower().startswith('deleted scene') isDeletedScene = quotes[0].lower().startswith('deleted scene')
if isDeletedScene: deleted += 1 if isDeletedScene:
s = Section(episode=self, deleted=deleted if isDeletedScene else -1, quotes=quotes) deleted += 1
s = Section(episode_id=self.id, deleted=deleted if isDeletedScene else -1)
s.build(quotes[1:] if isDeletedScene else quotes)
db.session.add(s) db.session.add(s)
db.session.commit() db.session.commit()
def clear(self):
sections = Section.query.filter_by(episode_id=self.id).all()
print(f'Clearing {len(sections)} from Databse for Episode {self.number} of Season {self.season_id}')
for section in sections:
section.clear(commit=False)
db.session.commit()
@property @property
def scrapeURL(self): def scrapeURL(self):
return f'http://officequotes.net/no{self.season_id}-{str(self.number).zfill(2)}.php' return f'http://officequotes.net/no{self.season_id}-{str(self.number).zfill(2)}.php'
@@ -75,17 +85,29 @@ class Section(db.Model):
deleted = db.Column(db.Integer, default=-1) deleted = db.Column(db.Integer, default=-1)
quotes = db.relationship('Quote', backref='section', lazy='dynamic') quotes = db.relationship('Quote', backref='section', lazy='dynamic')
def build(self, quotes, commit=False): def build(self, quotes, commit=False, reset=False):
"""given an List of unformatted script quotes, automatically creates Quotes assigned to this Section""" """given an List of unformatted script quotes, automatically creates Quotes assigned to this Section"""
for quote in quotes: for quote in quotes:
if quote.lower().startswith('deleted scene'): if quote.lower().startswith('deleted scene'):
raise Exception(f'Deleted Scene Quote passed to Section Builder: "{quote}"') raise Exception(f'Deleted Scene Quote passed to Section Builder: "{quote}"')
match = re.match(quotePattern, quote) match = re.match(quotePattern, quote)
assert match != None, f"Quote '{quote}' could not be processed." assert match != None, f"Quote '{quote}' could not be processed."
q = Quote(section=self, speaker=match[1], text=match[2]) q = Quote(section=self, speaker=match[1].strip(), text=match[2].strip())
db.session.add(q) db.session.add(q)
if commit: db.session.commit() if commit: db.session.commit()
def clear(self, commit=True):
quotes = Quote.query.filter_by(section_id=self.id).all()
print(f'Clearing {len(quotes)} quotes from Section ID {self.id}')
for quote in quotes:
db.session.delete(quote)
if commit: db.session.commit()
def __repr__(self):
season = Episode.query.get(self.episode_id).first().id
quotes = len(Quote.query.filter_by(section_id=self.id).all())
return f'Section(id={self.id} episode={self.episode_id} season={season} quotes=[{quotes}...])'
class Quote(db.Model): class Quote(db.Model):
"""represents a specific quote by a specific speaker""" """represents a specific quote by a specific speaker"""
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)