diff --git a/.gitignore b/.gitignore index 0eec845..f51df2f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,8 @@ migrations/** app.db keys.json process.py +app.db-journal +test.html # Byte-compiled / optimized / DLL files __pycache__/ @@ -133,4 +135,4 @@ venv.bak/ dmypy.json # Pyre type checker -.pyre/ \ No newline at end of file +.pyre/ diff --git a/app/models.py b/app/models.py index 4c9976e..aa454a4 100644 --- a/app/models.py +++ b/app/models.py @@ -4,37 +4,56 @@ import re from bs4 import BeautifulSoup from app import db, login -episodes = [5, 6, 22, 23, 14, 26, 24, 24, 24, 23] -quotePattern = r'(\w+):.+' +episodes = [5, 6, 22, 23, 14, 26, 24, 24, 24, 23] # Episode counts. Index 0 is for Webisodes. +quotePattern = r'([\w\s\.\',-\[\]\d&\"#]+):(.+)' class Season(db.Model): id = db.Column(db.Integer, primary_key=True) episodes = db.relationship('Episode', backref='season', lazy='dynamic') def __init__(self, **kwargs): - assert 0 >= kwargs.get('id') <= 9, "Season ID must be 0-9 inclusive" + assert 0 <= kwargs.get('id') <= 9, "Season ID must be 0-9 inclusive" super(Season, self).__init__(**kwargs) - def build(self): + def build(self, rebuild=False): """runs build operations on every Episode under this season""" + print(f'Running build() on Season {self.id}') for episode in range(1, episodes[self.id - 1] + 1): - ep = Episode.query.filter_by(season=self, number=episode).first() + ep = Episode.query.filter_by(season_id=self.id, number=episode).first() if ep is None: # Add the episode, then build print(f'Creating new Episode, Season {self.id}, Episode {episode}') - ep = Episode(season=self, number=episode) + ep = Episode(season_id=self.id, number=episode) db.session.add(ep) # I'm commiting early, which is a bit taboo, but I'm more worried about what the Episode object will need while building. db.session.commit() + ep.build() else: - # Regardless of whether it existended before hand, the episode will be built. + print(f'Rebuilding Season {self.id}, Episode {episode}') + if rebuild: + ep.build() pass - ep.build() + + @staticmethod + def create_all(build=True): + """creates new Season objects and runs build() on them""" + for i in range(1, 10): + if Season.query.get(i) is None: + s = Season(id=i) + db.session.add(s) + if build: s.build() + db.session.commit() + + @staticmethod + def rebuild_all(): + """runs build() on all Season objects in database""" + for season in Season.query.all(): + season.build(rebuild=True) @property - def episodes(self): + def episodes(self): """returns a List of Episodes under this Season""" - return Episode.query.filter_by(season=self).all() + return Episode.query.filter_by(season_id=self.id).all() @property def characters(self, sort): @@ -50,43 +69,96 @@ class Episode(db.Model): def build(self): """downloads, processes, and automatically creates Sections and Quotes""" - link = f'http://officequotes.net/no{self.season_id}-{str(self.episode).zfill(2)}.php' + link = f'http://officequotes.net/no{self.season_id}-{str(self.number).zfill(2)}.php' data = requests.get(link).text + open('test.html', 'w+', encoding='utf-8').write(data) soup = BeautifulSoup(data, 'html.parser') - sections = soup.find_all(attrs={'class' : 'quote'}) - for section in sections: - quotes = [] - for quote in section.find_all("b"): - quotes.append(quote.string + quote.next_sibling.string) - deleted = quotes[0].startswith('Deleted Scene'): + sections = soup.find_all(attrs={'class' : 'quote'}) + deleted = 0 + + for section in sections: + isNewpeat = False + quotes = [] + for quote in section.find_all('b'): + if 'Newpeat' in quote.string: + quote = quote.next_sibling + isNewpeat = True + if quote is None or quote.next_sibling is None: + print('Quote is None or next sibling is None') + continue + quotes.append(quote.string + quote.next_sibling.string) + if len(quotes) == 0: + print(f'Section found with Zero quotes. Newpeat: {isNewpeat}') + continue + isDeletedScene = quotes[0].lower().startswith('deleted scene') + if isDeletedScene: + deleted += 1 + s = Section(episode_id=self.id, deleted=deleted if isDeletedScene else -1, newpeat=isNewpeat) + s.build(quotes[1:] if isDeletedScene else quotes) + db.session.add(s) + db.session.commit() + + def clear(self): + """delete all sections relevant to this episode in order to reprocess""" + sections = Section.query.filter_by(episode_id=self.id).all() + print(f'Clearing {len(sections)} Sections of Ep {self.number} Season {self.season_id}') + for section in sections: + section.clear(commit=False) + db.session.delete(section) + db.session.commit() + + @staticmethod + def clear_all(): + """runs clear() on every episode in the database""" + for episode in Episode.query.all(): + episode.clear() + + def __repr__(self): + sections = len(Section.query.filter_by(episode_id=self.id).all()) + return f'Episode(id={self.id} s={self.season_id} ep={self.number} sects=[{sections}...])' - @property - def scrapeURL(self): - return f'http://officequotes.net/no{self.season_id}-{str(self.number).zfill(2)}.php' - class Section(db.Model): """represents a Section of Quotes, a specific scene with relevant dialog""" id = db.Column(db.Integer, primary_key=True) episode_id = db.Column(db.Integer, db.ForeignKey('episode.id')) - deleted = db.Column(db.Boolean) + deleted = db.Column(db.Integer, default=-1) + newpeat = db.Column(db.Boolean, default=False) quotes = db.relationship('Quote', backref='section', lazy='dynamic') - def build(self, quotes, commit=False): + def build(self, quotes, commit=False, reset=False): """given an List of unformatted script quotes, automatically creates Quotes assigned to this Section""" - for quote in quotes: + for i, quote in enumerate(quotes): if quote.lower().startswith('deleted scene'): raise Exception(f'Deleted Scene Quote passed to Section Builder: "{quote}"') - match = re.match(quotePattern, quote) - assert match != None, f"Quote '{quote}' could not be processed." - q = Quote(section=self, speaker=match[1], text=match[2]) + # match = re.match(quotePattern, quote) + # assert match != None, f"Quote '{quote}' could not be processed." + # q = Quote(section=self, speaker=match[1].strip(), text=match[2].strip()) + mark = quote.find(':') + q = Quote(section=self, speaker=quote[:mark], text=quote[mark + 1:], section_index=i) db.session.add(q) if commit: db.session.commit() + def clear(self, doprint=True, commit=True): + """delete all quotes relevant to this section""" + quotes = Quote.query.filter_by(section_id=self.id).all() + if doprint: print(f'Clearing {len(quotes)} quotes from Section ID {self.id}') + for quote in quotes: + db.session.delete(quote) + if commit: db.session.commit() + + def __repr__(self): + season = Episode.query.get(self.episode_id).id + quotes = len(Quote.query.filter_by(section_id=self.id).all()) + return f'Section(id={self.id} S-EP={season}/{self.episode_id} quotes=[{quotes}...])' + class Quote(db.Model): """represents a specific quote by a specific speaker""" id = db.Column(db.Integer, primary_key=True) section_id = db.Column(db.Integer, db.ForeignKey('section.id')) # The section this quote belongs to. speaker = db.Column(db.String(32)) # The name of a character text = db.Column(db.String(512)) # The content of the Quote. Usually a sentence, sometimes more. - section_index = db.Column(db.Integer) # The index of this quote in the section \ No newline at end of file + section_index = db.Column(db.Integer) # The index of this quote in the section + + def __repr__(self): + return f"Quote(speaker='{self.speaker}' text='{self.text[:50]}{'...' if len(self.text) > 51 else ''}')" \ No newline at end of file diff --git a/app/routes.py b/app/routes.py index 91130d5..9f5ae46 100644 --- a/app/routes.py +++ b/app/routes.py @@ -1,6 +1,19 @@ +from flask import send_from_directory, redirect, url_for, render_template, request +from app.models import Season, Episode from app import app -from flask import send_from_directory, redirect, url_for, render_template @app.route('/') def index(): - return 'WIP' \ No newline at end of file + return 'WIP' + +@app.route('/view') +def view(): + season = request.args.get('season', default=-1, type=int) + episode = request.args.get('episode', default=-1, type=int) + + if season != -1: + if episode != -1: + return render_template('episode.html', episode=Episode.query.filter_by(season_id=season, number=episode).first_or_404()) + else: + return render_template('season.html', season=Season.query.filter_by(id=season).first_or_404()) + return render_template('view.html', seasons=Season.query.all()) \ No newline at end of file diff --git a/app/templates/base.html b/app/templates/base.html index e69de29..4cbd66b 100644 --- a/app/templates/base.html +++ b/app/templates/base.html @@ -0,0 +1,13 @@ + + + + The Office Quotes{% if title %} - {{ title }}{% endif %} + {% block head %} + {% endblock head %} + + + + {% block body %} + + {% endblock body %} + \ No newline at end of file diff --git a/app/templates/content.html b/app/templates/content.html new file mode 100644 index 0000000..a185b0d --- /dev/null +++ b/app/templates/content.html @@ -0,0 +1,4 @@ +{% extends 'base.html' %} +{% block body %} +{{ super() }} +{% endblock body %} \ No newline at end of file diff --git a/app/templates/episode.html b/app/templates/episode.html new file mode 100644 index 0000000..bbdfcf8 --- /dev/null +++ b/app/templates/episode.html @@ -0,0 +1,10 @@ +{% extends 'content.html' %} +{% block body %} +{% for section in episode.sections %} +{% for quote in section.quotes %} +{{ quote.speaker }}: {{ quote.text }} +
+{% endfor %} +
+{% endfor %} +{% endblock body %} \ No newline at end of file diff --git a/app/templates/season.html b/app/templates/season.html new file mode 100644 index 0000000..aec2de8 --- /dev/null +++ b/app/templates/season.html @@ -0,0 +1,11 @@ +{% extends 'base.html' %} +{% block body %} +{{ super() }} +Season {{ season.id }} +
+{% for episode in season.episodes %} +Episode {{ episode.number }} +
+{% endfor %} + +{% endblock body %} \ No newline at end of file diff --git a/app/templates/view.html b/app/templates/view.html new file mode 100644 index 0000000..2af8502 --- /dev/null +++ b/app/templates/view.html @@ -0,0 +1,9 @@ +{% extends 'base.html' %} +{% block body %} +{{ super() }} +{% for season in seasons %} +Season {{ season.id }} +
+{% endfor %} + +{% endblock body %} \ No newline at end of file