properly working refactor of markdown

2026-01-31 06:24:15 -06:00 · 2019-07-14 00:13:30 -05:00
parent a3ef604afa
commit 6cfda7de65
1 changed files with 47 additions and 36 deletions
@@ -2,52 +2,63 @@ import re

 def parse(markdown):
    lines = markdown.split('\n')
-    res = ''
-    in_list = False
-    in_list_append = False
+    result, in_list, finish_list = '', False, False
+
    for line in lines:
+        # Heading formatting
        if re.match('###### (.*)', line) is not None:
            line = '<h6>' + line[7:] + '</h6>'
        elif re.match('## (.*)', line) is not None:
            line = '<h2>' + line[3:] + '</h2>'
        elif re.match('# (.*)', line) is not None:
            line = '<h1>' + line[2:] + '</h1>'
-        m = re.match(r'\* (.*)', line)
-        if m:
-            in_list, is_bold, is_italic = True, False, False
-            current_line = m.group(1)
-            # Bold
-            strongmatch = re.match('(.*)__(.*)__(.*)', current_line)
-            if strongmatch:
-                current_line = strongmatch.group(1) + '<strong>' + \
-                    strongmatch.group(2) + '</strong>' + strongmatch.group(3)
-                is_bold = True
-            # Italics
-            italicmatch = re.match('(.*)_(.*)_(.*)', current_line)
-            if italicmatch:
-                current_line = italicmatch.group(1) + '<em>' + italicmatch.group(2) + \
-                    '</em>' + italicmatch.group(3)
-                is_italic = True

-            line = '<ul><li>' + current_line + '</li></ul>' if not in_list else '<li>' + current_line + '</li>'
+        # List detection  
+        is_list = re.match(r'\* (.*)', line) or False
+        line = is_list.group(1) if is_list else line
+
+        # Bold
+        strongmatch = re.match('(.*)__(.*)__(.*)', line)
+        if strongmatch:
+            line = strongmatch.group(1) + '<strong>' + \
+                strongmatch.group(2) + '</strong>' + strongmatch.group(3)
+
+        # Italics
+        italicmatch = re.match('(.*)_(.*)_(.*)', line)
+        if italicmatch:
+            line = italicmatch.group(1) + '<em>' + italicmatch.group(2) + \
+                '</em>' + italicmatch.group(3)
+        
+        # If a list has been detected
+        if is_list:
+            if not in_list:
+                line = '<ul><li>' + line + '</li>'
+                in_list = True
+            else:
+                line = '<li>' + line + '</li>'
        else:
+            # If a list wasn't detected, but it's supposedly trying to continue a list 
            if in_list:
-                in_list_append = True
+                finish_list = True
                in_list = False

-        m = re.match('<h|<ul|<p|<li', line)
-        if not m:
+        # Detect whether a heading, list or text paragraph has already been started
+        # This is just to ensure it's wrapped in something at the very least, and follows HTML sytnax.
+        occupied = re.match('<h|<ul|<p|<li', line)
+        if not occupied:
            line = '<p>' + line + '</p>'    
-        m = re.match('(.*)__(.*)__(.*)', line)
-        if m:
-            line = m.group(1) + '<strong>' + m.group(2) + '</strong>' + m.group(3)
-        m = re.match('(.*)_(.*)_(.*)', line)
-        if m:
-            line = m.group(1) + '<em>' + m.group(2) + '</em>' + m.group(3)
-        if in_list_append:
+        
+        # If a list has ended and it needs to be formally ended
+        if finish_list:
            line = '</ul>' + line
-            in_list_append = False
-        res += line
+            is_list = False
+            finish_list = False
+
+        # Finish this line and add it to the result.  
+        result += line
+
+    # If we have somehow not ended a list yet, complete it now.
    if in_list:
-        res += '</ul>'
-    return res
+        result += '</ul>'
+    
+    return result