Merge pull request #1 from Xevion/database

Database Creation
This commit is contained in:
Xevion
2020-01-21 06:28:09 -06:00
committed by GitHub
8 changed files with 165 additions and 31 deletions

4
.gitignore vendored
View File

@@ -4,6 +4,8 @@ migrations/**
app.db app.db
keys.json keys.json
process.py process.py
app.db-journal
test.html
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/
@@ -133,4 +135,4 @@ venv.bak/
dmypy.json dmypy.json
# Pyre type checker # Pyre type checker
.pyre/ .pyre/

View File

@@ -4,37 +4,56 @@ import re
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from app import db, login from app import db, login
episodes = [5, 6, 22, 23, 14, 26, 24, 24, 24, 23] episodes = [5, 6, 22, 23, 14, 26, 24, 24, 24, 23] # Episode counts. Index 0 is for Webisodes.
quotePattern = r'(\w+):.+' quotePattern = r'([\w\s\.\',-\[\]\d&\"#]+):(.+)'
class Season(db.Model): class Season(db.Model):
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
episodes = db.relationship('Episode', backref='season', lazy='dynamic') episodes = db.relationship('Episode', backref='season', lazy='dynamic')
def __init__(self, **kwargs): def __init__(self, **kwargs):
assert 0 >= kwargs.get('id') <= 9, "Season ID must be 0-9 inclusive" assert 0 <= kwargs.get('id') <= 9, "Season ID must be 0-9 inclusive"
super(Season, self).__init__(**kwargs) super(Season, self).__init__(**kwargs)
def build(self): def build(self, rebuild=False):
"""runs build operations on every Episode under this season""" """runs build operations on every Episode under this season"""
print(f'Running build() on Season {self.id}')
for episode in range(1, episodes[self.id - 1] + 1): for episode in range(1, episodes[self.id - 1] + 1):
ep = Episode.query.filter_by(season=self, number=episode).first() ep = Episode.query.filter_by(season_id=self.id, number=episode).first()
if ep is None: if ep is None:
# Add the episode, then build # Add the episode, then build
print(f'Creating new Episode, Season {self.id}, Episode {episode}') print(f'Creating new Episode, Season {self.id}, Episode {episode}')
ep = Episode(season=self, number=episode) ep = Episode(season_id=self.id, number=episode)
db.session.add(ep) db.session.add(ep)
# I'm commiting early, which is a bit taboo, but I'm more worried about what the Episode object will need while building. # I'm commiting early, which is a bit taboo, but I'm more worried about what the Episode object will need while building.
db.session.commit() db.session.commit()
ep.build()
else: else:
# Regardless of whether it existended before hand, the episode will be built. print(f'Rebuilding Season {self.id}, Episode {episode}')
if rebuild:
ep.build()
pass pass
ep.build()
@staticmethod
def create_all(build=True):
"""creates new Season objects and runs build() on them"""
for i in range(1, 10):
if Season.query.get(i) is None:
s = Season(id=i)
db.session.add(s)
if build: s.build()
db.session.commit()
@staticmethod
def rebuild_all():
"""runs build() on all Season objects in database"""
for season in Season.query.all():
season.build(rebuild=True)
@property @property
def episodes(self): def episodes(self):
"""returns a List of Episodes under this Season""" """returns a List of Episodes under this Season"""
return Episode.query.filter_by(season=self).all() return Episode.query.filter_by(season_id=self.id).all()
@property @property
def characters(self, sort): def characters(self, sort):
@@ -50,43 +69,96 @@ class Episode(db.Model):
def build(self): def build(self):
"""downloads, processes, and automatically creates Sections and Quotes""" """downloads, processes, and automatically creates Sections and Quotes"""
link = f'http://officequotes.net/no{self.season_id}-{str(self.episode).zfill(2)}.php' link = f'http://officequotes.net/no{self.season_id}-{str(self.number).zfill(2)}.php'
data = requests.get(link).text data = requests.get(link).text
open('test.html', 'w+', encoding='utf-8').write(data)
soup = BeautifulSoup(data, 'html.parser') soup = BeautifulSoup(data, 'html.parser')
sections = soup.find_all(attrs={'class' : 'quote'}) sections = soup.find_all(attrs={'class' : 'quote'})
for section in sections: deleted = 0
quotes = []
for quote in section.find_all("b"): for section in sections:
quotes.append(quote.string + quote.next_sibling.string) isNewpeat = False
deleted = quotes[0].startswith('Deleted Scene'): quotes = []
for quote in section.find_all('b'):
if 'Newpeat' in quote.string:
quote = quote.next_sibling
isNewpeat = True
if quote is None or quote.next_sibling is None:
print('Quote is None or next sibling is None')
continue
quotes.append(quote.string + quote.next_sibling.string)
if len(quotes) == 0:
print(f'Section found with Zero quotes. Newpeat: {isNewpeat}')
continue
isDeletedScene = quotes[0].lower().startswith('deleted scene')
if isDeletedScene:
deleted += 1
s = Section(episode_id=self.id, deleted=deleted if isDeletedScene else -1, newpeat=isNewpeat)
s.build(quotes[1:] if isDeletedScene else quotes)
db.session.add(s)
db.session.commit()
def clear(self):
"""delete all sections relevant to this episode in order to reprocess"""
sections = Section.query.filter_by(episode_id=self.id).all()
print(f'Clearing {len(sections)} Sections of Ep {self.number} Season {self.season_id}')
for section in sections:
section.clear(commit=False)
db.session.delete(section)
db.session.commit()
@staticmethod
def clear_all():
"""runs clear() on every episode in the database"""
for episode in Episode.query.all():
episode.clear()
def __repr__(self):
sections = len(Section.query.filter_by(episode_id=self.id).all())
return f'Episode(id={self.id} s={self.season_id} ep={self.number} sects=[{sections}...])'
@property
def scrapeURL(self):
return f'http://officequotes.net/no{self.season_id}-{str(self.number).zfill(2)}.php'
class Section(db.Model): class Section(db.Model):
"""represents a Section of Quotes, a specific scene with relevant dialog""" """represents a Section of Quotes, a specific scene with relevant dialog"""
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
episode_id = db.Column(db.Integer, db.ForeignKey('episode.id')) episode_id = db.Column(db.Integer, db.ForeignKey('episode.id'))
deleted = db.Column(db.Boolean) deleted = db.Column(db.Integer, default=-1)
newpeat = db.Column(db.Boolean, default=False)
quotes = db.relationship('Quote', backref='section', lazy='dynamic') quotes = db.relationship('Quote', backref='section', lazy='dynamic')
def build(self, quotes, commit=False): def build(self, quotes, commit=False, reset=False):
"""given an List of unformatted script quotes, automatically creates Quotes assigned to this Section""" """given an List of unformatted script quotes, automatically creates Quotes assigned to this Section"""
for quote in quotes: for i, quote in enumerate(quotes):
if quote.lower().startswith('deleted scene'): if quote.lower().startswith('deleted scene'):
raise Exception(f'Deleted Scene Quote passed to Section Builder: "{quote}"') raise Exception(f'Deleted Scene Quote passed to Section Builder: "{quote}"')
match = re.match(quotePattern, quote) # match = re.match(quotePattern, quote)
assert match != None, f"Quote '{quote}' could not be processed." # assert match != None, f"Quote '{quote}' could not be processed."
q = Quote(section=self, speaker=match[1], text=match[2]) # q = Quote(section=self, speaker=match[1].strip(), text=match[2].strip())
mark = quote.find(':')
q = Quote(section=self, speaker=quote[:mark], text=quote[mark + 1:], section_index=i)
db.session.add(q) db.session.add(q)
if commit: db.session.commit() if commit: db.session.commit()
def clear(self, doprint=True, commit=True):
"""delete all quotes relevant to this section"""
quotes = Quote.query.filter_by(section_id=self.id).all()
if doprint: print(f'Clearing {len(quotes)} quotes from Section ID {self.id}')
for quote in quotes:
db.session.delete(quote)
if commit: db.session.commit()
def __repr__(self):
season = Episode.query.get(self.episode_id).id
quotes = len(Quote.query.filter_by(section_id=self.id).all())
return f'Section(id={self.id} S-EP={season}/{self.episode_id} quotes=[{quotes}...])'
class Quote(db.Model): class Quote(db.Model):
"""represents a specific quote by a specific speaker""" """represents a specific quote by a specific speaker"""
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
section_id = db.Column(db.Integer, db.ForeignKey('section.id')) # The section this quote belongs to. section_id = db.Column(db.Integer, db.ForeignKey('section.id')) # The section this quote belongs to.
speaker = db.Column(db.String(32)) # The name of a character speaker = db.Column(db.String(32)) # The name of a character
text = db.Column(db.String(512)) # The content of the Quote. Usually a sentence, sometimes more. text = db.Column(db.String(512)) # The content of the Quote. Usually a sentence, sometimes more.
section_index = db.Column(db.Integer) # The index of this quote in the section section_index = db.Column(db.Integer) # The index of this quote in the section
def __repr__(self):
return f"Quote(speaker='{self.speaker}' text='{self.text[:50]}{'...' if len(self.text) > 51 else ''}')"

View File

@@ -1,6 +1,19 @@
from flask import send_from_directory, redirect, url_for, render_template, request
from app.models import Season, Episode
from app import app from app import app
from flask import send_from_directory, redirect, url_for, render_template
@app.route('/') @app.route('/')
def index(): def index():
return 'WIP' return 'WIP'
@app.route('/view')
def view():
season = request.args.get('season', default=-1, type=int)
episode = request.args.get('episode', default=-1, type=int)
if season != -1:
if episode != -1:
return render_template('episode.html', episode=Episode.query.filter_by(season_id=season, number=episode).first_or_404())
else:
return render_template('season.html', season=Season.query.filter_by(id=season).first_or_404())
return render_template('view.html', seasons=Season.query.all())

View File

@@ -0,0 +1,13 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>The Office Quotes{% if title %} - {{ title }}{% endif %}</title>
{% block head %}
{% endblock head %}
</head>
<body>
{% block body %}
{% endblock body %}
</body>

View File

@@ -0,0 +1,4 @@
{% extends 'base.html' %}
{% block body %}
{{ super() }}
{% endblock body %}

View File

@@ -0,0 +1,10 @@
{% extends 'content.html' %}
{% block body %}
{% for section in episode.sections %}
{% for quote in section.quotes %}
<b>{{ quote.speaker }}:</b> {{ quote.text }}
<br>
{% endfor %}
<br>
{% endfor %}
{% endblock body %}

11
app/templates/season.html Normal file
View File

@@ -0,0 +1,11 @@
{% extends 'base.html' %}
{% block body %}
{{ super() }}
Season {{ season.id }}
<br>
{% for episode in season.episodes %}
<a href="/view?season={{ season.id }}&episode={{ episode.number }}" >Episode {{ episode.number }}</a>
<br>
{% endfor %}
{% endblock body %}

9
app/templates/view.html Normal file
View File

@@ -0,0 +1,9 @@
{% extends 'base.html' %}
{% block body %}
{{ super() }}
{% for season in seasons %}
<a href="/view?season={{ season.id }}" >Season {{ season.id }}</a>
<br>
{% endfor %}
{% endblock body %}