finish off "download once" protocol, ignore .html files in data

This commit is contained in:
Xevion
2020-03-09 02:53:51 -05:00
parent b4d2922163
commit edd5b49dfe
2 changed files with 14 additions and 6 deletions

1
.gitignore vendored
View File

@@ -7,6 +7,7 @@ keys.json
process.py process.py
app.db-journal app.db-journal
test.html test.html
app/data/*.html
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/

View File

@@ -90,25 +90,32 @@ class Episode(db.Model):
) # sections of quotes under this episode ) # sections of quotes under this episode
@property @property
def links(self): def link(self):
return f"http://officequotes.net/no{self.season_id}-{str(self.number).zfill(2)}.php" return f"http://officequotes.net/no{self.season_id}-{str(self.number).zfill(2)}.php"
@property @property
def path(self): def path(self):
return os.path.join('app', 'data', f'{self.season_id}-{self.number}') return os.path.join('app', 'data', f'{self.season_id}-{self.number}.html')
@property @property
def downloaded(self): def downloaded(self):
return os.path.exists(self.path) return os.path.exists(self.path)
def download(self): def download(self, force=False):
"""downloads data""" """downloads data"""
data = requests.get(link).text print(f'Downloading e{self.number}/s{self.season_id} from {self.link}')
open(self.path, "w+", encoding="utf-8").write(data) if not self.downloaded or force:
data = requests.get(self.link).text
open(self.path, "w+", encoding="utf-8").write(data)
@property
def data(self):
return open(self.path, 'r', encoding="utf-8").read()
def build(self): def build(self):
"""downloads, processes, and automatically creates Sections and Quotes""" """downloads, processes, and automatically creates Sections and Quotes"""
soup = BeautifulSoup(data, "html.parser") self.download()
soup = BeautifulSoup(self.data, "html.parser")
sections = soup.find_all(attrs={"class": "quote"}) sections = soup.find_all(attrs={"class": "quote"})
deleted = 0 deleted = 0