#!/usr/bin/env python3 ################################################################################ # ripple-dev-portal doc parser # # Generate the html for all the Ripple Dev Portal files from a template # Optionally pre-compile them to HTML (using pandoc & a custom filter) ################################################################################ from jinja2 import Environment, FileSystemLoader import os, sys, re import json import argparse ##Necessary for pandoc, prince import subprocess #Python markdown works instead of pandoc from markdown import markdown from bs4 import BeautifulSoup #Watchdog stuff import time#, logging from watchdog.observers import Observer from watchdog.events import PatternMatchingEventHandler DOC_TEMPLATE_FILE = "template-doc.html" PDF_TEMPLATE_FILE = "template-forpdf.html" PAGE_MANIFEST_FILE = "pages.json" BUILD_PATH = ".." CONTENT_PATH = "../content" BUTTONIZE_FILTER = "buttonize.py" PRINCE_PAGE_MANIFEST_FILE = "/tmp/devportal-pages.txt" def parse_markdown(md, environment="local", pages=None): ## Python markdown requires markdown="1" on HTML block elements ## that contain markdown. AND there's a bug where if you use ## markdown.extensions.extra, it replaces code fences in HTML ## block elements with garbled text print("adding markdown class to embedded divs...") def add_markdown_class(m): if m.group(0).find("markdown=") == -1: return m.group(1) + ' markdown="1">' else: return m.group(0) md = re.sub("(]*)>", add_markdown_class, md) print("done") #the actual markdown parsing is the easy part print("parsing markdown...") html = markdown(md, extensions=["markdown.extensions.extra", "markdown.extensions.toc"]) print("done") #replace underscores with dashes in h1,h2,etc. for Flatdoc compatibility print("tweaking header IDs...") soup = BeautifulSoup(html, "html.parser") headers = soup.find_all(name=re.compile("h[0-9]"), id=True) for h in headers: if "_" in h["id"]: h["id"] = h["id"].replace("_","-") print("done") #buttonize links ending in > print("buttonizing try-it links...") buttonlinks = soup.find_all("a", string=re.compile(">$")) ## print("buttonlinks:",buttonlinks) for link in buttonlinks: if "class" in link.attrs: link["class"].append("button") else: link["class"] = "button" print("done") #Replace links for live site if environment != "local": print("modifying links for environment",environment) if not pages: pages = get_pages() links = soup.find_all("a",href=re.compile("^[^.]+\.html")) for link in links: for page in pages: if environment in page: #There's a replacement link for this env if page["html"] in link["href"]: link["href"] = link["href"].replace(page["html"], page[environment]) print("done") print("re-rendering HTML") #html2 = soup.prettify() html2 = str(soup) print("done") return html2 def get_pages(): print("reading page manifest...") with open(PAGE_MANIFEST_FILE) as f: pages = json.load(f) print("done") return pages def render_pages(precompiled, pdf=False, environment="local"): pages = get_pages() env = Environment(loader=FileSystemLoader(os.path.curdir)) env.lstrip_blocks = True env.trim_blocks = True for currentpage in pages: if "md" in currentpage: # Documentation file print("reading template file...") # #Experimental: Preprocessing the doc files using Jinja # with open(DOC_TEMPLATE_FILE) as f: # template_text = f.read() # doc_template = Template(template_text) doc_template = env.get_template(DOC_TEMPLATE_FILE) if pdf: doc_template = env.get_template(PDF_TEMPLATE_FILE) print("done") if precompiled: filein = os.path.join(CONTENT_PATH, currentpage["md"]) print("parsing markdown for", currentpage) ## New markdown module way with open(filein) as f: s = f.read() doc_html = parse_markdown(s, environment, pages) # ## Old Pandoc way # args = ['pandoc', filein, '-F', BUTTONIZE_FILTER, '-t', 'html'] # print("compiling: running ", " ".join(args),"...") # doc_html = subprocess.check_output(args, universal_newlines=True) print("done") print("rendering page",currentpage["name"],"...") out_html = doc_template.render(currentpage=currentpage, pages=pages, content=doc_html, precompiled=precompiled) print("done") else: print("compiling skipped") print("rendering page",currentpage["name"],"...") out_html = doc_template.render(currentpage=currentpage, pages=pages, content="", precompiled=precompiled) print("done") else: # Not a documentation page print("reading template file...") # with open(currentpage["template"]) as f: # template_text = f.read() # template = Template(template_text) template = env.get_template(currentpage["template"]) print("done") print("rendering page",currentpage["name"],"...") out_html = template.render(currentpage=currentpage, pages=pages) print("done") fileout = os.path.join(BUILD_PATH, currentpage["html"]) if (not os.path.isdir(BUILD_PATH)): print("creating build folder",BUILD_PATH) os.makedirs(BUILD_PATH) with open(fileout, "w") as f: print("writing to file:",fileout,"...") f.write(out_html) print("done") def watch(pre_parse, pdf, environment): path = ".." class UpdaterHandler(PatternMatchingEventHandler): def on_any_event(self, event): print("got event!") if pdf: make_pdf(pdf) render_pages(pre_parse, pdf, environment) patterns = ["*tool/pages.json","*tool/template-*.html"] if pre_parse: #md only prompts HTML change if pre-parsed patterns.append("*content/*.md",) event_handler = UpdaterHandler(patterns=patterns) observer = Observer() observer.schedule(event_handler, path, recursive=True) observer.start() #The above starts an observing thread, so the main thread can just wait try: while True: time.sleep(1) except KeyboardInterrupt: observer.stop() observer.join() def make_pdf(outfile): print("rendering PDF-able versions of pages...") render_pages(True, pdf=outfile) print("done") args = ['prince', '-o', outfile, "../index.html"] pages = get_pages() args += ["../"+p["html"] for p in pages if "md" in p] print("generating PDF: running ", " ".join(args),"...") prince_resp = subprocess.check_output(args, universal_newlines=True) if __name__ == "__main__": parser = argparse.ArgumentParser(description='Generate static site from markdown and templates.') parser.add_argument("-p", "--pre_parse", action="store_true", help="Parse markdown; otherwise, use Flatdoc") parser.add_argument("-w","--watch", action="store_true", help="Watch for changes and re-generate the files. This runs until force-quit.") parser.add_argument("--pdf", type=str, help="Generate a PDF, too. Requires Prince.") parser.add_argument("--environment", "-e", type=str, default="local", choices=["local","ripple.com"]) args = parser.parse_args() if args.pdf: if args.pdf[-4:] != ".pdf": exit("PDF filename must end in .pdf") print("making a pdf...") make_pdf(args.pdf) print("pdf done") #Not an accident that we go on to re-gen files in non-PDF format if args.watch: print("watching for changes...") watch(args.pre_parse, args.pdf, args.environment) else: print("rendering pages now") render_pages(args.pre_parse, environment=args.environment) print("all done")