mirror of
https://github.com/Xevion/the-office.git
synced 2025-12-10 06:08:51 -06:00
change from XML to JSON (changed my mind)
This commit is contained in:
@@ -105,41 +105,46 @@ class Episode(db.Model):
|
|||||||
return open(self.HTMLpath, "r", encoding="utf-8").read()
|
return open(self.HTMLpath, "r", encoding="utf-8").read()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def XMLpath(self):
|
def JSONpath(self):
|
||||||
return os.path.join("app", "data", "preprocess", f"{self.season_id}-{self.number}.xml")
|
"""
|
||||||
|
returns the path for the JSON file with data for this episode
|
||||||
|
@return: a path
|
||||||
|
"""
|
||||||
|
return os.path.join("app", "data", "preprocess", f"{self.season_id}-{self.number}.json")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def XMLdata(self):
|
def JSONdata(self):
|
||||||
return open(self.XMLpath, "r", encoding="utf-8").read()
|
"""
|
||||||
|
Returns the raw JSON data for this episode
|
||||||
|
"""
|
||||||
|
return open(self.JSONpath, "r", encoding="utf-8").read()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def downloaded(self):
|
def downloaded(self):
|
||||||
return os.path.exists(self.path)
|
"""
|
||||||
|
Checks whether the raw episode script data has been downloaded.
|
||||||
|
@return: boolean stating the existence (and thus likely properly downloaded) of raw data
|
||||||
|
"""
|
||||||
|
return os.path.exists(self.HTMLpath)
|
||||||
|
|
||||||
def download(self, force=False):
|
def download(self, force=False):
|
||||||
"""downloads data"""
|
"""downloads data"""
|
||||||
if not self.downloaded or force:
|
if not self.downloaded or force:
|
||||||
print(f"Downloading e{self.number}/s{self.season_id} from {self.link}")
|
print(f"Downloading e{self.number}/s{self.season_id} from {self.link}")
|
||||||
data = requests.get(self.link).text
|
data = requests.get(self.link).text
|
||||||
open(self.path, "w+", encoding="utf-8").write(data)
|
open(self.HTMLpath, "w+", encoding="utf-8").write(data)
|
||||||
|
|
||||||
def preprocess(self):
|
def preprocess(self):
|
||||||
"""
|
"""
|
||||||
Runs pre-processing on this Episode, which creates and automatically formats a XML file full of the data
|
Runs pre-processing on this Episode, which creates and automatically builds a JSON file full of the data
|
||||||
required to create a Episode properly, right before the Developer edits a episode and then enters it into the
|
required to create a Episode properly, right before the Developer edits a episode and then enters it into the
|
||||||
database as a full fledged 'processed' episode.
|
database as a full fledged 'processed' episode.
|
||||||
"""
|
"""
|
||||||
print(f'Pre-processing data for {self}')
|
print(f'Pre-processing data for {self}')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def build(self):
|
|
||||||
"""downloads, processes, and automatically creates Sections and Quotes"""
|
|
||||||
print(f'Rebuilding s{self.season_id} e{self.number}')
|
print(f'Rebuilding s{self.season_id} e{self.number}')
|
||||||
self.download()
|
self.download()
|
||||||
soup = BeautifulSoup(self.data, "html.parser")
|
|
||||||
|
|
||||||
|
soup = BeautifulSoup(self.data, "html.parser")
|
||||||
sections = soup.find_all(attrs={"class": "quote"})
|
sections = soup.find_all(attrs={"class": "quote"})
|
||||||
deleted = 0
|
deleted = 0
|
||||||
|
|
||||||
@@ -167,6 +172,11 @@ class Episode(db.Model):
|
|||||||
)
|
)
|
||||||
s.build(quotes[1:] if isDeletedScene else quotes)
|
s.build(quotes[1:] if isDeletedScene else quotes)
|
||||||
db.session.add(s)
|
db.session.add(s)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def build(self):
|
||||||
|
"""downloads, processes, and automatically creates Sections and Quotes"""
|
||||||
self.built = True
|
self.built = True
|
||||||
self.title = titles[self.season_id][self.number - 1]
|
self.title = titles[self.season_id][self.number - 1]
|
||||||
print(self.title)
|
print(self.title)
|
||||||
|
|||||||
Reference in New Issue
Block a user