Files
xrpl-dev-portal/tool/parse_pages.py

249 lines
8.9 KiB
Python
Executable File

#!/usr/bin/env python3
################################################################################
# ripple-dev-portal doc parser
#
# Generate the html for all the Ripple Dev Portal files from a template
# Optionally pre-compile them to HTML (using pandoc & a custom filter)
################################################################################
from jinja2 import Environment, FileSystemLoader
import os, sys, re
import json
import argparse
##Necessary for pandoc, prince
import subprocess
#Python markdown works instead of pandoc
from markdown import markdown
from bs4 import BeautifulSoup
#Watchdog stuff
import time#, logging
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
DOC_TEMPLATE_FILE = "template-doc.html"
PDF_TEMPLATE_FILE = "template-forpdf.html"
PAGE_MANIFEST_FILE = "pages.json"
BUILD_PATH = ".."
CONTENT_PATH = "../content"
BUTTONIZE_FILTER = "buttonize.py"
PRINCE_PAGE_MANIFEST_FILE = "/tmp/devportal-pages.txt"
def parse_markdown(md, environment="local", pages=None):
## Python markdown requires markdown="1" on HTML block elements
## that contain markdown. AND there's a bug where if you use
## markdown.extensions.extra, it replaces code fences in HTML
## block elements with garbled text
print("adding markdown class to embedded divs...")
def add_markdown_class(m):
if m.group(0).find("markdown=") == -1:
return m.group(1) + ' markdown="1">'
else:
return m.group(0)
md = re.sub("(<div[^>]*)>", add_markdown_class, md)
print("done")
#the actual markdown parsing is the easy part
print("parsing markdown...")
html = markdown(md, extensions=["markdown.extensions.extra",
"markdown.extensions.toc"])
print("done")
#replace underscores with dashes in h1,h2,etc. for Flatdoc compatibility
print("tweaking header IDs...")
soup = BeautifulSoup(html, "html.parser")
headers = soup.find_all(name=re.compile("h[0-9]"), id=True)
for h in headers:
if "_" in h["id"]:
h["id"] = h["id"].replace("_","-")
print("done")
#buttonize links ending in >
print("buttonizing try-it links...")
buttonlinks = soup.find_all("a", string=re.compile(">$"))
## print("buttonlinks:",buttonlinks)
for link in buttonlinks:
if "class" in link.attrs:
link["class"].append("button")
else:
link["class"] = "button"
print("done")
#Replace links for live site
if environment != "local":
print("modifying links for environment",environment)
if not pages:
pages = get_pages()
links = soup.find_all("a",href=re.compile("^[^.]+\.html"))
for link in links:
for page in pages:
if environment in page:
#There's a replacement link for this env
if page["html"] in link["href"]:
link["href"] = link["href"].replace(page["html"],
page[environment])
print("done")
print("re-rendering HTML")
#html2 = soup.prettify()
html2 = str(soup)
print("done")
return html2
def get_pages():
print("reading page manifest...")
with open(PAGE_MANIFEST_FILE) as f:
pages = json.load(f)
print("done")
return pages
def render_pages(precompiled, pdf=False, environment="local"):
pages = get_pages()
env = Environment(loader=FileSystemLoader(os.path.curdir))
env.lstrip_blocks = True
env.trim_blocks = True
for currentpage in pages:
if "md" in currentpage:
# Documentation file
print("reading template file...")
# #Experimental: Preprocessing the doc files using Jinja
# with open(DOC_TEMPLATE_FILE) as f:
# template_text = f.read()
# doc_template = Template(template_text)
doc_template = env.get_template(DOC_TEMPLATE_FILE)
if pdf:
doc_template = env.get_template(PDF_TEMPLATE_FILE)
print("done")
if precompiled:
filein = os.path.join(CONTENT_PATH, currentpage["md"])
print("parsing markdown for", currentpage)
## New markdown module way
with open(filein) as f:
s = f.read()
doc_html = parse_markdown(s, environment, pages)
# ## Old Pandoc way
# args = ['pandoc', filein, '-F', BUTTONIZE_FILTER, '-t', 'html']
# print("compiling: running ", " ".join(args),"...")
# doc_html = subprocess.check_output(args, universal_newlines=True)
print("done")
print("rendering page",currentpage["name"],"...")
out_html = doc_template.render(currentpage=currentpage,
pages=pages,
content=doc_html,
precompiled=precompiled)
print("done")
else:
print("compiling skipped")
print("rendering page",currentpage["name"],"...")
out_html = doc_template.render(currentpage=currentpage,
pages=pages,
content="",
precompiled=precompiled)
print("done")
else:
# Not a documentation page
print("reading template file...")
# with open(currentpage["template"]) as f:
# template_text = f.read()
# template = Template(template_text)
template = env.get_template(currentpage["template"])
print("done")
print("rendering page",currentpage["name"],"...")
out_html = template.render(currentpage=currentpage, pages=pages)
print("done")
fileout = os.path.join(BUILD_PATH, currentpage["html"])
if (not os.path.isdir(BUILD_PATH)):
print("creating build folder",BUILD_PATH)
os.makedirs(BUILD_PATH)
with open(fileout, "w") as f:
print("writing to file:",fileout,"...")
f.write(out_html)
print("done")
def watch(pre_parse, pdf, environment):
path = ".."
class UpdaterHandler(PatternMatchingEventHandler):
def on_any_event(self, event):
print("got event!")
if pdf:
make_pdf(pdf)
render_pages(pre_parse, pdf, environment)
patterns = ["*tool/pages.json","*tool/template-*.html"]
if pre_parse:
#md only prompts HTML change if pre-parsed
patterns.append("*content/*.md",)
event_handler = UpdaterHandler(patterns=patterns)
observer = Observer()
observer.schedule(event_handler, path, recursive=True)
observer.start()
#The above starts an observing thread, so the main thread can just wait
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
def make_pdf(outfile):
print("rendering PDF-able versions of pages...")
render_pages(True, pdf=outfile)
print("done")
args = ['prince', '-o', outfile, "../index.html"]
pages = get_pages()
args += ["../"+p["html"] for p in pages if "md" in p]
print("generating PDF: running ", " ".join(args),"...")
prince_resp = subprocess.check_output(args, universal_newlines=True)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Generate static site from markdown and templates.')
parser.add_argument("-p", "--pre_parse", action="store_true",
help="Parse markdown; otherwise, use Flatdoc")
parser.add_argument("-w","--watch", action="store_true",
help="Watch for changes and re-generate the files. This runs until force-quit.")
parser.add_argument("--pdf", type=str, help="Generate a PDF, too. Requires Prince.")
parser.add_argument("--environment", "-e", type=str, default="local",
choices=["local","ripple.com"])
args = parser.parse_args()
if args.pdf:
if args.pdf[-4:] != ".pdf":
exit("PDF filename must end in .pdf")
print("making a pdf...")
make_pdf(args.pdf)
print("pdf done")
#Not an accident that we go on to re-gen files in non-PDF format
if args.watch:
print("watching for changes...")
watch(args.pre_parse, args.pdf, args.environment)
else:
print("rendering pages now")
render_pages(args.pre_parse, environment=args.environment)
print("all done")