docstrings, come up with more ideal system for processing Blogs, process, getPage, pageURL docstrings and typehints

This commit is contained in:
Xevion
2020-05-04 06:04:30 -05:00
parent 7b9cf60091
commit 7810a1930b
+35 -6
View File
@@ -3,7 +3,13 @@ main.py
Contains classes for managing and downloading media from Tumblr
"""
from typing import List
class Blog:
"""
A Blog object assists with downloading media from a specific blog.
It holds very basic information for cycling through all
"""
def __init__(self, blogid, download: bool = True, max_pages: int = 99999):
"""
:param download: If true, begin downloading immediately following initialization.
@@ -13,15 +19,38 @@ class Blog:
self.id = blogid
self.max_pages = max_pages
self.media_urls = []
self.pagenum = 0
self.media_queue = [] # Stores URLs to media
self.processed_media = [] # Stores URLs for successfully processed media
self.pages = [] # Stores Request objects for every page. Index 0 => Page 1
if download:
self.process()
def process(self) -> None:
def process(self, require_download: int = -1) -> None:
"""
Processes the entire Tumblr blog acquiring all Media URLs
:param require_download: The number of media endpoints the function will pass before downloading media early
"""
for page in range(1, self.max_pages):
url = self.pageURL(page)
urls = self.getPage(page)
if urls:
else:
print(f'Last page processed ({page}).')
break
def pageURL(self, page):
return f'https://{self.id}.tumblr.com' + f'/page/{page}' if page > 0 else ''
def getPage(self, page: int) -> List[str]:
"""
Processes a Tumblr page on a blog, locating all media URLs.
:param page: The page index
:return: A list of URLs for pictures or videos found on the associated page
"""
pass
def pageURL(self, page) -> str:
"""
Returns the appropriate URL for a given page, for a given Tumblr blog
:param page: The page index
:return: The URL-like string containing the URL to the given page
"""
return f'https://{self.id}.tumblr.com{f"/page/{page}" if page > 0 else ""}'