mirror of
https://github.com/Xevion/exercism.git
synced 2025-12-06 15:15:01 -06:00
94 lines
5.2 KiB
Python
94 lines
5.2 KiB
Python
import progressbar as pb
|
|
import requests
|
|
import pprint
|
|
import datetime
|
|
import bs4
|
|
import re
|
|
import os
|
|
import sys
|
|
import time
|
|
|
|
# A basic function for handling responses from the request module, as well as handling timing and data sizes of the web pages downloaded.
|
|
def simpleReq(url):
|
|
start = time.time()
|
|
data = requests.get(url)
|
|
if data.status_code != 200:
|
|
raise ConnectionError(f'A status code other than 200 was received. ({data.status_code} @ {url})')
|
|
end = time.time()
|
|
request_timings.append((end - start, len(data.text.encode('utf-16-le'))))
|
|
return data.text
|
|
|
|
def parseComment(name, url, soup):
|
|
comment = soup.find(get_reflection)
|
|
if comment['class'] == ['reflection']:
|
|
return '\n\n'.join(list(map(lambda item : item.text, comment.find_all('p'))))
|
|
return ''
|
|
|
|
# This script works on the assumption that you have a setup similar to mine.
|
|
# I have a GitHub repository linked so that on any computer I can have my Exercism progress kept in one place.
|
|
# This script may break if you don't have a track folder available (i.e you have a `java` track started but no folder on the computer available.)
|
|
|
|
# Constants & Reused Lambdas
|
|
username = 'Xevion' # CaSe SeNsItIvE Username from `Exercism.io`.
|
|
t1 = time.time()
|
|
request_timings = []
|
|
descend = lambda thing : list(thing.children)
|
|
get_solutions = lambda tag : ['solution'] == tag['class'] if tag.has_attr('class') else False
|
|
get_reflection = lambda tag : any(['reflection' in classtag for classtag in tag['class']]) if tag.has_attr('class') else False
|
|
get_url = lambda url : re.findall(r'exercism.io\/tracks\/([a-z-]+)\/exercises\/', url)[0]
|
|
get_name_from_url = lambda url : re.search(r'exercism.io\/tracks\/[a-z-]+\/exercises\/([a-z-]+)\/solutions\/', url).group(1)
|
|
pp = pprint.PrettyPrinter()
|
|
print('Requesting Profile Page Data')
|
|
data = simpleReq('https://exercism.io/profiles/{}'.format(username))
|
|
soup = bs4.BeautifulSoup(data, 'html.parser')
|
|
comments_top = """# {0} Track Comments\n\nThis page represents all my comments from my solutions currently hosted on [Exercism.io](https://exercism.io/). You can view my profile [here](https://exercism.io/profiles/Xevion).
|
|
The reason for this is simply to have a place where I can collect my comments, as well as just have some fun with Python and webscraping. Exercise file and exercise submission links will be provided for each and every exercise.
|
|
This file is for the **{0}** track, contains **{1}** submissions, **{2}** of which have comments. This file was built {3}.\n\n"""
|
|
|
|
# Find all completed exercises, find the URL to the Solution by the user and create for parsing.
|
|
# Also creates dictionary of all available tracks
|
|
solutions = soup.find_all(get_solutions)
|
|
solutions = [(descend(descend(solution)[2])[1].text, 'https://exercism.io{}'.format(descend(solution.parent)[1]['href'])) for solution in solutions]
|
|
tracks = {k : list() for k in dict.fromkeys(list(map(lambda i : get_url(i[1]), solutions)))}
|
|
|
|
# Get all comment data & parse, then put into track dictionary
|
|
print('Requesting Page Data for {} solution{} from {} {}'.format(len(solutions), 's' if len(solutions) > 1 else '', len(tracks.keys()), 'different tracks' if abs(len(tracks.keys())) != 1 else 'track'))
|
|
# Progress bar
|
|
temp = []
|
|
for solution in pb.progressbar(solutions):
|
|
soup = bs4.BeautifulSoup(simpleReq(solution[1]), 'html.parser')
|
|
temp.append((solution[0], solution[1], soup))
|
|
solutions = temp
|
|
solutions = [{'name' : solution[0], 'url' : solution[1], 'comment' : parseComment(*solution)} for solution in solutions]
|
|
|
|
# Send all the solutions to their appropriate tracks.
|
|
for solution in solutions:
|
|
track = get_url(solution['url'])
|
|
tracks[track].append(solution)
|
|
|
|
# Parse into a readable markdown format
|
|
print('Parsing all solution comments')
|
|
for track in tracks.keys():
|
|
# Getting the path and formatting the top portion of the markdown file.
|
|
path = os.path.join(sys.path[0], track, 'COMMENTS.md')
|
|
submission_comments = len(list(filter(lambda item : item['comment'] != '', tracks[track])))
|
|
top = comments_top.format(track.title(), len(tracks[track]), submission_comments, datetime.datetime.utcnow().strftime('on **%d-%m-%Y** at **%H:%M:%S UTC**'))
|
|
|
|
# Adding all the comments with proper formatting and links.
|
|
markdown_comments = []
|
|
for submission in tracks[track]:
|
|
true_name = get_name_from_url(submission['url'])
|
|
file_url = './{}/{}'.format(true_name, true_name.replace('-', '_') + '.py')
|
|
comment = "## {}\n\n[Link to File]({}) | [Link to Submission]({})\n\n{}".format(submission['name'], file_url, submission['url'], submission['comment'])
|
|
markdown_comments.append(comment)
|
|
|
|
# Join into a single string and then write it into a file.
|
|
markdown = top + '\n\n'.join(markdown_comments)
|
|
with open(path, 'w+') as file:
|
|
file.write(markdown)
|
|
print('Wrote {} KiB file for {} track'.format(round(os.path.getsize(path) / 1024, 2), track))
|
|
t2 = time.time()
|
|
|
|
# Sorry for this ridiculously long line. ;-;
|
|
print('Downloaded {} MiB in webpages.\nDownloaded & parsed in {} seconds with {}ms on average request time.'.format(round(sum([i[1] for i in request_timings]) / (1024 ** 2), 2), round(t2 - t1, 2), round((sum(i[0] for i in request_timings) / len(request_timings)) * 1000, 2)))
|