change from XML to JSON (changed my mind)

This commit is contained in:
Xevion
2020-03-10 17:47:56 -05:00
parent c0d099ddb1
commit a04a211c55

View File

@@ -105,41 +105,46 @@ class Episode(db.Model):
return open(self.HTMLpath, "r", encoding="utf-8").read() return open(self.HTMLpath, "r", encoding="utf-8").read()
@property @property
def XMLpath(self): def JSONpath(self):
return os.path.join("app", "data", "preprocess", f"{self.season_id}-{self.number}.xml") """
returns the path for the JSON file with data for this episode
@return: a path
"""
return os.path.join("app", "data", "preprocess", f"{self.season_id}-{self.number}.json")
@property @property
def XMLdata(self): def JSONdata(self):
return open(self.XMLpath, "r", encoding="utf-8").read() """
Returns the raw JSON data for this episode
"""
return open(self.JSONpath, "r", encoding="utf-8").read()
@property @property
def downloaded(self): def downloaded(self):
return os.path.exists(self.path) """
Checks whether the raw episode script data has been downloaded.
@return: boolean stating the existence (and thus likely properly downloaded) of raw data
"""
return os.path.exists(self.HTMLpath)
def download(self, force=False): def download(self, force=False):
"""downloads data""" """downloads data"""
if not self.downloaded or force: if not self.downloaded or force:
print(f"Downloading e{self.number}/s{self.season_id} from {self.link}") print(f"Downloading e{self.number}/s{self.season_id} from {self.link}")
data = requests.get(self.link).text data = requests.get(self.link).text
open(self.path, "w+", encoding="utf-8").write(data) open(self.HTMLpath, "w+", encoding="utf-8").write(data)
def preprocess(self): def preprocess(self):
""" """
Runs pre-processing on this Episode, which creates and automatically formats a XML file full of the data Runs pre-processing on this Episode, which creates and automatically builds a JSON file full of the data
required to create a Episode properly, right before the Developer edits a episode and then enters it into the required to create a Episode properly, right before the Developer edits a episode and then enters it into the
database as a full fledged 'processed' episode. database as a full fledged 'processed' episode.
""" """
print(f'Pre-processing data for {self}') print(f'Pre-processing data for {self}')
def build(self):
"""downloads, processes, and automatically creates Sections and Quotes"""
print(f'Rebuilding s{self.season_id} e{self.number}') print(f'Rebuilding s{self.season_id} e{self.number}')
self.download() self.download()
soup = BeautifulSoup(self.data, "html.parser")
soup = BeautifulSoup(self.data, "html.parser")
sections = soup.find_all(attrs={"class": "quote"}) sections = soup.find_all(attrs={"class": "quote"})
deleted = 0 deleted = 0
@@ -167,6 +172,11 @@ class Episode(db.Model):
) )
s.build(quotes[1:] if isDeletedScene else quotes) s.build(quotes[1:] if isDeletedScene else quotes)
db.session.add(s) db.session.add(s)
def build(self):
"""downloads, processes, and automatically creates Sections and Quotes"""
self.built = True self.built = True
self.title = titles[self.season_id][self.number - 1] self.title = titles[self.season_id][self.number - 1]
print(self.title) print(self.title)