add Episode building structure, Quote section index with comments

still needs a lot of work to even attempt testing
This commit is contained in:
Xevion
2020-01-19 22:50:06 -06:00
parent 81cdba5fb6
commit 8d720f27e9

View File

@@ -1,5 +1,7 @@
import requests
import re import re
from bs4 import BeautifulSoup
from app import db, login from app import db, login
episodes = [5, 6, 22, 23, 14, 26, 24, 24, 24, 23] episodes = [5, 6, 22, 23, 14, 26, 24, 24, 24, 23]
@@ -47,7 +49,17 @@ class Episode(db.Model):
sections = db.relationship('Section', backref='episode', lazy='dynamic') # sections of quotes under this episode sections = db.relationship('Section', backref='episode', lazy='dynamic') # sections of quotes under this episode
def build(self): def build(self):
"""Downloads, processes, and automatically creates Sections and Quotes""" """downloads, processes, and automatically creates Sections and Quotes"""
link = f'http://officequotes.net/no{self.season_id}-{str(self.episode).zfill(2)}.php'
data = requests.get(link).text
soup = BeautifulSoup(data, 'html.parser')
sections = soup.find_all(attrs={'class' : 'quote'})
for section in sections:
quotes = []
for quote in section.find_all("b"):
quotes.append(quote.string + quote.next_sibling.string)
deleted = quotes[0].startswith('Deleted Scene'):
@property @property
def scrapeURL(self): def scrapeURL(self):
@@ -57,6 +69,7 @@ class Section(db.Model):
"""represents a Section of Quotes, a specific scene with relevant dialog""" """represents a Section of Quotes, a specific scene with relevant dialog"""
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
episode_id = db.Column(db.Integer, db.ForeignKey('episode.id')) episode_id = db.Column(db.Integer, db.ForeignKey('episode.id'))
deleted = db.Column(db.Boolean)
quotes = db.relationship('Quote', backref='section', lazy='dynamic') quotes = db.relationship('Quote', backref='section', lazy='dynamic')
def build(self, quotes, commit=False): def build(self, quotes, commit=False):
@@ -73,6 +86,7 @@ class Section(db.Model):
class Quote(db.Model): class Quote(db.Model):
"""represents a specific quote by a specific speaker""" """represents a specific quote by a specific speaker"""
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
section_id = db.Column(db.Integer, db.ForeignKey('section.id')) section_id = db.Column(db.Integer, db.ForeignKey('section.id')) # The section this quote belongs to.
speaker = db.Column(db.String(32)) speaker = db.Column(db.String(32)) # The name of a character
text = db.Column(db.String(512)) text = db.Column(db.String(512)) # The content of the Quote. Usually a sentence, sometimes more.
section_index = db.Column(db.Integer) # The index of this quote in the section