mirror of
https://github.com/XRPLF/xrpl-dev-portal.git
synced 2025-11-30 00:25:49 +00:00
parse_pages - code quality improvements
This commit is contained in:
@@ -7,8 +7,8 @@
|
||||
# Optionally pre-compile them to HTML (using pandoc & a custom filter)
|
||||
################################################################################
|
||||
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
import os, sys, re
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import argparse
|
||||
|
||||
@@ -18,12 +18,13 @@ import subprocess
|
||||
#Fetch markdown sources from another repo
|
||||
import requests
|
||||
|
||||
#Used for processing and post-processing of markdown
|
||||
#Used for processing and pre/post-processing of markdown
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
from markdown import markdown
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
#Watchdog stuff
|
||||
import time#, logging
|
||||
import time
|
||||
from watchdog.observers import Observer
|
||||
from watchdog.events import PatternMatchingEventHandler
|
||||
|
||||
@@ -38,7 +39,7 @@ PRINCE_PAGE_MANIFEST_FILE = "/tmp/devportal-pages.txt"
|
||||
|
||||
PDF_TARGET = "pdf"
|
||||
DEFAULT_TARGET = "local"
|
||||
MULTICODE_TAB_TARGETS = ["local","ripple.com"]
|
||||
MULTICODE_TAB_TARGETS = ["local", "ripple.com"]
|
||||
|
||||
|
||||
MC_START_REGEX = re.compile("<!-- *<div class='multicode'[^>]*> *-->")
|
||||
@@ -47,6 +48,7 @@ DOCTOC_START = "<!-- START doctoc generated TOC please keep comment here to allo
|
||||
DOCTOC_END = "<!-- END doctoc generated TOC please keep comment here to allow auto update -->"
|
||||
|
||||
def parse_markdown(md, target=DEFAULT_TARGET, pages=None):
|
||||
"""Take a markdown string and output HTML for that content"""
|
||||
## Python markdown requires markdown="1" on HTML block elements
|
||||
## that contain markdown. AND there's a bug where if you use
|
||||
## markdown.extensions.extra, it replaces code fences in HTML
|
||||
@@ -57,38 +59,38 @@ def parse_markdown(md, target=DEFAULT_TARGET, pages=None):
|
||||
# return m.group(1) + ' markdown="1">'
|
||||
# else:
|
||||
# return m.group(0)
|
||||
#
|
||||
#
|
||||
# md = re.sub("(<div[^>]*)>", add_markdown_class, md)
|
||||
# print("done")
|
||||
|
||||
|
||||
#Strip out doctoc Table of Contents for RippleAPI
|
||||
doctoc_start_i = md.find(DOCTOC_START)
|
||||
doctoc_end_i = md.find(DOCTOC_END)
|
||||
if doctoc_start_i != -1 and doctoc_end_i != -1:
|
||||
md = md[:doctoc_start_i]+md[doctoc_end_i+len(DOCTOC_END):]
|
||||
|
||||
|
||||
#the actual markdown parsing is the easy part
|
||||
print("parsing markdown...")
|
||||
html = markdown(md, extensions=["markdown.extensions.extra",
|
||||
"markdown.extensions.toc"])
|
||||
print("done")
|
||||
|
||||
|
||||
#if target uses multicode tabs, uncomment the divs
|
||||
if target in MULTICODE_TAB_TARGETS:
|
||||
print("enabling multicode tabs...")
|
||||
html = re.sub(MC_START_REGEX, "<div class='multicode'>", html)
|
||||
html = re.sub(MC_END_REGEX, "</div>", html)
|
||||
print("done")
|
||||
|
||||
|
||||
#replace underscores with dashes in h1,h2,etc. for Flatdoc compatibility
|
||||
print("tweaking header IDs...")
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
headers = soup.find_all(name=re.compile("h[0-9]"), id=True)
|
||||
for h in headers:
|
||||
if "_" in h["id"]:
|
||||
h["id"] = h["id"].replace("_","-")
|
||||
h["id"] = h["id"].replace("_", "-")
|
||||
print("done")
|
||||
|
||||
|
||||
#buttonize links ending in >
|
||||
print("buttonizing try-it links...")
|
||||
buttonlinks = soup.find_all("a", string=re.compile(">$"))
|
||||
@@ -98,14 +100,14 @@ def parse_markdown(md, target=DEFAULT_TARGET, pages=None):
|
||||
else:
|
||||
link["class"] = "button"
|
||||
print("done")
|
||||
|
||||
|
||||
#Replace links for live site
|
||||
if target != DEFAULT_TARGET:
|
||||
print("modifying links for target",target)
|
||||
print("modifying links for target", target)
|
||||
if not pages:
|
||||
pages = get_pages()
|
||||
|
||||
links = soup.find_all("a",href=re.compile("^[^.]+\.html"))
|
||||
|
||||
links = soup.find_all("a", href=re.compile(r"^[^.]+\.html"))
|
||||
for link in links:
|
||||
for page in pages:
|
||||
if target in page:
|
||||
@@ -116,7 +118,7 @@ def parse_markdown(md, target=DEFAULT_TARGET, pages=None):
|
||||
link["href"] = link["href"].replace(local_url,
|
||||
target_url)
|
||||
print("done")
|
||||
|
||||
|
||||
print("re-rendering HTML")
|
||||
html2 = str(soup)
|
||||
print("done")
|
||||
@@ -124,10 +126,12 @@ def parse_markdown(md, target=DEFAULT_TARGET, pages=None):
|
||||
|
||||
MARKDOWN_LINK_REGEX = re.compile(r"(\[([^\]]+)\]\(([^:)]+)\)|\[([^\]]+)\]:\s*(\S+)$)", re.MULTILINE)
|
||||
def githubify_markdown(md, target=DEFAULT_TARGET, pages=None):
|
||||
"""Github-friendly markdown has absolute links, no md in divs"""
|
||||
if not pages:
|
||||
pages = get_pages()
|
||||
|
||||
class MDLink:
|
||||
"""A markdown link, either a reference link or inline link"""
|
||||
def __init__(self, fullmatch, label, url, label2, url2):
|
||||
self.fullmatch = fullmatch
|
||||
if label:
|
||||
@@ -138,22 +142,18 @@ def githubify_markdown(md, target=DEFAULT_TARGET, pages=None):
|
||||
self.label = label2
|
||||
self.url = url2
|
||||
self.is_reflink = True
|
||||
|
||||
|
||||
def to_markdown(self):
|
||||
s = "["
|
||||
s += self.label
|
||||
s += "]"
|
||||
"""Re-represent self as a link in markdown syntax"""
|
||||
s = "[" + self.label + "]"
|
||||
if self.is_reflink:
|
||||
s += ": "
|
||||
s += self.url
|
||||
s += ": " + self.url
|
||||
else:
|
||||
s += "("
|
||||
s += self.url
|
||||
s += ")"
|
||||
s += "(" + self.url + ")"
|
||||
return s
|
||||
|
||||
links = [MDLink(*m) for m in MARKDOWN_LINK_REGEX.findall(md)]
|
||||
|
||||
|
||||
for link in links:
|
||||
for page in pages:
|
||||
if target in page:
|
||||
@@ -163,138 +163,158 @@ def githubify_markdown(md, target=DEFAULT_TARGET, pages=None):
|
||||
if link.url[:len(local_url)] == local_url:
|
||||
link.url = link.url.replace(local_url, target_url)
|
||||
md = md.replace(link.fullmatch, link.to_markdown())
|
||||
|
||||
|
||||
return md
|
||||
|
||||
def get_pages(target=None,verbose=True):
|
||||
|
||||
def get_pages(target=None):
|
||||
"""Read pages.json and return an object, optionally filtered
|
||||
to just the pages that this target cares about"""
|
||||
with open(PAGE_MANIFEST_FILE) as f:
|
||||
pages = json.load(f)
|
||||
|
||||
|
||||
if target:
|
||||
#filter pages that aren't part of this target
|
||||
#filter pages that aren't part of this target
|
||||
def should_include(page, target):
|
||||
if "targets" not in page:
|
||||
return True
|
||||
if target in page["targets"]:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
pages = [page for page in pages
|
||||
if "targets" not in page or target in page["targets"]
|
||||
]
|
||||
if should_include(page, target)]
|
||||
return pages
|
||||
|
||||
def render_pages(precompiled, target=DEFAULT_TARGET):
|
||||
"""The main work function. Reads pages.json, runs the pre-parser,
|
||||
runs the markdown parser, and writes the resulting
|
||||
HTML files and maybe PDF."""
|
||||
pages = get_pages(target)
|
||||
categories = []#ordered, de-duplicated list
|
||||
for page in pages:
|
||||
if "category" in page and page["category"] not in categories:
|
||||
categories.append(page["category"])
|
||||
print("categories:",categories)
|
||||
|
||||
print("categories:", categories)
|
||||
|
||||
env = Environment(loader=FileSystemLoader(os.path.curdir))
|
||||
env.lstrip_blocks = True
|
||||
env.trim_blocks = True
|
||||
|
||||
|
||||
pp_env = Environment(loader=FileSystemLoader(CONTENT_PATH))
|
||||
#Example: if we want to add custom functions to the md files
|
||||
#pp_env.globals['foo'] = lambda x: "FOO %s"%x
|
||||
|
||||
for currentpage in pages:
|
||||
|
||||
|
||||
if "md" in currentpage:
|
||||
# Documentation file
|
||||
|
||||
|
||||
print("reading template file...")
|
||||
|
||||
doc_template = env.get_template(DOC_TEMPLATE_FILE)
|
||||
if target == PDF_TARGET:
|
||||
doc_template = env.get_template(PDF_TEMPLATE_FILE)
|
||||
print("done")
|
||||
|
||||
|
||||
|
||||
|
||||
if precompiled:
|
||||
if "http:" in currentpage["md"] or "https:" in currentpage["md"]:
|
||||
#No pre-processing for remote pages
|
||||
print("fetching remote page",currentpage["name"])
|
||||
# No pre-processing for remote pages
|
||||
print("fetching remote page",
|
||||
currentpage["name"])
|
||||
try:
|
||||
r = requests.get(currentpage["md"])
|
||||
if r.status_code == 200:
|
||||
md_in = r.text
|
||||
response = requests.get(currentpage["md"])
|
||||
if response.status_code == 200:
|
||||
md_in = response.text
|
||||
else:
|
||||
raise requests.RequestException("Status code for page was not 200")
|
||||
except:
|
||||
print("Skipping page",currentpage["name"],"due to error fetching contents")
|
||||
print("Skipping page",
|
||||
currentpage["name"],
|
||||
"due to error fetching contents")
|
||||
continue
|
||||
print("done")
|
||||
|
||||
|
||||
else:
|
||||
## Read markdown as a template
|
||||
filein = os.path.join(CONTENT_PATH, currentpage["md"])
|
||||
print("pre-processing markdown file",currentpage["md"])
|
||||
# Read markdown as a template
|
||||
print("pre-processing markdown file",
|
||||
currentpage["md"])
|
||||
md_raw = pp_env.get_template(currentpage["md"])
|
||||
md_in = md_raw.render(target=target,pages=pages)
|
||||
|
||||
md_in = md_raw.render(target=target,
|
||||
pages=pages)
|
||||
|
||||
print("parsing markdown for", currentpage["name"])
|
||||
doc_html = parse_markdown(md_in, target, pages)
|
||||
|
||||
print("done")
|
||||
|
||||
print("rendering page",currentpage["name"],"...")
|
||||
out_html = doc_template.render(currentpage=currentpage,
|
||||
categories=categories,
|
||||
pages=pages,
|
||||
content=doc_html,
|
||||
precompiled=precompiled)
|
||||
|
||||
print("rendering page", currentpage["name"], "...")
|
||||
out_html = doc_template.render(
|
||||
currentpage=currentpage,
|
||||
categories=categories,
|
||||
pages=pages,
|
||||
content=doc_html,
|
||||
precompiled=precompiled)
|
||||
print("done")
|
||||
|
||||
|
||||
else:
|
||||
print("compiling skipped")
|
||||
|
||||
print("rendering page",currentpage["name"],"...")
|
||||
out_html = doc_template.render(currentpage=currentpage,
|
||||
categories=categories,
|
||||
pages=pages,
|
||||
content="",
|
||||
precompiled=precompiled)
|
||||
|
||||
print("rendering page", currentpage["name"], "...")
|
||||
out_html = doc_template.render(
|
||||
currentpage=currentpage,
|
||||
categories=categories,
|
||||
pages=pages,
|
||||
content="",
|
||||
precompiled=precompiled)
|
||||
print("done")
|
||||
|
||||
|
||||
else:
|
||||
# Not a documentation page
|
||||
print("reading template file...")
|
||||
template = env.get_template(currentpage["template"])
|
||||
print("done")
|
||||
|
||||
|
||||
print("rendering page",currentpage["name"],"...")
|
||||
|
||||
|
||||
print("rendering page", currentpage["name"], "...")
|
||||
out_html = template.render(currentpage=currentpage,
|
||||
categories=categories,
|
||||
pages=pages)
|
||||
print("done")
|
||||
|
||||
|
||||
|
||||
|
||||
fileout = os.path.join(BUILD_PATH, currentpage["html"])
|
||||
if (not os.path.isdir(BUILD_PATH)):
|
||||
print("creating build folder",BUILD_PATH)
|
||||
if not os.path.isdir(BUILD_PATH):
|
||||
print("creating build folder", BUILD_PATH)
|
||||
os.makedirs(BUILD_PATH)
|
||||
with open(fileout, "w") as f:
|
||||
print("writing to file:",fileout,"...")
|
||||
print("writing to file:", fileout, "...")
|
||||
f.write(out_html)
|
||||
print("done")
|
||||
|
||||
|
||||
def watch(pre_parse, pdf, target):
|
||||
"""Look for changed files and re-generate HTML (and optionally
|
||||
PDF whenever there's an update. Runs until interrupted."""
|
||||
path = ".."
|
||||
class UpdaterHandler(PatternMatchingEventHandler):
|
||||
"""Updates to pattern-matched files means rendering."""
|
||||
def on_any_event(self, event):
|
||||
print("got event!")
|
||||
if pdf:
|
||||
make_pdf(pdf)
|
||||
render_pages(pre_parse, target)
|
||||
print("done rendering")
|
||||
|
||||
patterns = ["*tool/pages.json","*tool/template-*.html"]
|
||||
|
||||
patterns = ["*tool/pages.json", "*tool/template-*.html"]
|
||||
if pre_parse:
|
||||
#md only prompts HTML change if pre-parsed
|
||||
patterns.append("*content/*.md",)
|
||||
patterns.append("*content/*.md", "content/code_samples/*")
|
||||
event_handler = UpdaterHandler(patterns=patterns)
|
||||
observer = Observer()
|
||||
observer.schedule(event_handler, path, recursive=True)
|
||||
observer.start()
|
||||
#The above starts an observing thread, so the main thread can just wait
|
||||
# The above starts an observing thread,
|
||||
# so the main thread can just wait
|
||||
try:
|
||||
while True:
|
||||
time.sleep(1)
|
||||
@@ -303,18 +323,21 @@ def watch(pre_parse, pdf, target):
|
||||
observer.join()
|
||||
|
||||
def make_pdf(outfile):
|
||||
"""Use prince to convert several HTML files into a PDF"""
|
||||
print("rendering PDF-able versions of pages...")
|
||||
render_pages(precompiled=True, target=PDF_TARGET)
|
||||
print("done")
|
||||
|
||||
|
||||
args = ['prince', '-o', outfile]
|
||||
pages = get_pages(PDF_TARGET)
|
||||
args += ["../"+p["html"] for p in pages]
|
||||
print("generating PDF: running ", " ".join(args),"...")
|
||||
print("generating PDF: running ", " ".join(args), "...")
|
||||
prince_resp = subprocess.check_output(args, universal_newlines=True)
|
||||
print(prince_resp)
|
||||
|
||||
|
||||
def githubify(md_file_name, target=DEFAULT_TARGET):
|
||||
"""Wrapper - make the markdown resemble GitHub flavor"""
|
||||
filein = os.path.join(CONTENT_PATH, md_file_name)
|
||||
with open(filein, "r") as f:
|
||||
md = f.read()
|
||||
@@ -324,35 +347,37 @@ def githubify(md_file_name, target=DEFAULT_TARGET):
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Generate static site from markdown and templates.')
|
||||
description='Generate static site from markdown and templates.')
|
||||
parser.add_argument("-f", "--flatdoc", action="store_true",
|
||||
help="Use Flatdoc instead of parsing pages")
|
||||
parser.add_argument("-w","--watch", action="store_true",
|
||||
help="Watch for changes and re-generate the files. This runs until force-quit.")
|
||||
parser.add_argument("--pdf", type=str,
|
||||
help="Generate a PDF, too. Requires Prince.")
|
||||
parser.add_argument("-g","--githubify", type=str, help="Output md prepared for GitHub")
|
||||
parser.add_argument("--target", "-t", type=str, default=DEFAULT_TARGET)
|
||||
args = parser.parse_args()
|
||||
pre_parse = not args.flatdoc
|
||||
|
||||
if args.githubify:
|
||||
githubify(args.githubify, args.target)
|
||||
help="Use Flatdoc instead of parsing pages")
|
||||
parser.add_argument("-w", "--watch", action="store_true",
|
||||
help="Watch for changes and re-generate the files. This runs until force-quit.")
|
||||
parser.add_argument("--pdf", type=str,
|
||||
help="Generate a PDF, too. Requires Prince.")
|
||||
parser.add_argument("-g", "--githubify", type=str,
|
||||
help="Output md prepared for GitHub")
|
||||
parser.add_argument("--target", "-t", type=str,
|
||||
default=DEFAULT_TARGET)
|
||||
cli_args = parser.parse_args()
|
||||
pre_parse = not cli_args.flatdoc
|
||||
|
||||
if cli_args.githubify:
|
||||
githubify(cli_args.githubify, cli_args.target)
|
||||
exit(0)
|
||||
|
||||
if args.pdf:
|
||||
if args.pdf[-4:] != ".pdf":
|
||||
|
||||
if cli_args.pdf:
|
||||
if cli_args.pdf[-4:] != ".pdf":
|
||||
exit("PDF filename must end in .pdf")
|
||||
print("making a pdf...")
|
||||
make_pdf(args.pdf)
|
||||
make_pdf(cli_args.pdf)
|
||||
print("pdf done")
|
||||
|
||||
|
||||
#Not an accident that we go on to re-gen files in non-PDF format
|
||||
print("rendering pages now")
|
||||
render_pages(precompiled=pre_parse, target=args.target)
|
||||
render_pages(precompiled=pre_parse, target=cli_args.target)
|
||||
print("all done")
|
||||
|
||||
if args.watch:
|
||||
|
||||
if cli_args.watch:
|
||||
print("watching for changes...")
|
||||
watch(pre_parse, args.pdf, args.target)
|
||||
|
||||
watch(pre_parse, cli_args.pdf, cli_args.target)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user