#!/usr/bin/env python3 ################################################################################ # ripple-dev-portal doc parser # # Generate the html for all the Ripple Dev Portal files from a template # Optionally pre-compile them to HTML (using pandoc & a custom filter) ################################################################################ from jinja2 import Environment, FileSystemLoader import os, sys, re import json import argparse ##Necessary for pandoc, prince import subprocess #Python markdown works instead of pandoc from markdown import markdown from bs4 import BeautifulSoup #Watchdog stuff import time#, logging from watchdog.observers import Observer from watchdog.events import PatternMatchingEventHandler DOC_TEMPLATE_FILE = "template-doc.html" PDF_TEMPLATE_FILE = "template-forpdf.html" PAGE_MANIFEST_FILE = "pages.json" BUILD_PATH = ".." CONTENT_PATH = "../content" BUTTONIZE_FILTER = "buttonize.py" PRINCE_PAGE_MANIFEST_FILE = "/tmp/devportal-pages.txt" def parse_markdown(md): ## Python markdown requires markdown="1" on HTML block elements ## that contain markdown. AND there's a bug where if you use ## markdown.extensions.extra, it replaces code fences in HTML ## block elements with garbled text def add_markdown_class(m): if m.group(0).find("markdown=") == -1: return m.group(1) + ' markdown="1">' else: return m.group(0) md = re.sub("(]*)>", add_markdown_class, md) #the actual markdown parsing is the easy part html = markdown(md, extensions=["markdown.extensions.extra", "markdown.extensions.toc"]) #replace underscores with dashes in h1,h2,etc. for Flatdoc compatibility soup = BeautifulSoup(html, "html.parser") headers = soup.find_all(name=re.compile("h[0-9]"), id=True) for h in headers: if "_" in h["id"]: h["id"] = h["id"].replace("_","-") html2 = soup.prettify() return html2 def get_pages(): print("reading page manifest...") with open(PAGE_MANIFEST_FILE) as f: pages = json.load(f) print("done") return pages def render_pages(precompiled, pdf=False): pages = get_pages() # if pdf: # precompiled = True#Prince probably won't work otherwise # with open(PRINCE_PAGE_MANIFEST_FILE,"w") as f: # for page in pages: # if "md" in page: # f.write(page["html"]) # f.write("\n\n") env = Environment(loader=FileSystemLoader(os.path.curdir)) env.lstrip_blocks = True env.trim_blocks = True for currentpage in pages: if "md" in currentpage: # Documentation file print("reading template file...") # with open(DOC_TEMPLATE_FILE) as f: # template_text = f.read() # doc_template = Template(template_text) doc_template = env.get_template(DOC_TEMPLATE_FILE) if pdf: doc_template = env.get_template(PDF_TEMPLATE_FILE) print("done") if precompiled: filein = os.path.join(CONTENT_PATH, currentpage["md"]) print("parsing markdown for", currentpage) ## New markdown module way with open(filein) as f: s = f.read() doc_html = parse_markdown(s) # ## Old Pandoc way # args = ['pandoc', filein, '-F', BUTTONIZE_FILTER, '-t', 'html'] # print("compiling: running ", " ".join(args),"...") # doc_html = subprocess.check_output(args, universal_newlines=True) print("done") print("rendering page",currentpage,"...") out_html = doc_template.render(currentpage=currentpage, pages=pages, content=doc_html, precompiled=precompiled) print("done") else: print("compiling skipped") print("rendering page",currentpage,"...") out_html = doc_template.render(currentpage=currentpage, pages=pages, content="", precompiled=precompiled) print("done") else: # Not a documentation page print("reading template file...") # with open(currentpage["template"]) as f: # template_text = f.read() # template = Template(template_text) template = env.get_template(currentpage["template"]) print("done") print("rendering page",currentpage,"...") out_html = template.render(currentpage=currentpage, pages=pages) print("done") fileout = os.path.join(BUILD_PATH, currentpage["html"]) if (not os.path.isdir(BUILD_PATH)): print("creating build folder",BUILD_PATH) os.makedirs(BUILD_PATH) with open(fileout, "w") as f: print("writing to file:",fileout,"...") f.write(out_html) print("done") def watch(pre_parse, pdf): path = ".." class UpdaterHandler(PatternMatchingEventHandler): def on_any_event(self, event): print("got event!") if pdf: make_pdf(pdf) render_pages(pre_parse, pdf) patterns = ["*tool/pages.json","*tool/template-*.html"] if pre_parse: #md only prompts HTML change if pre-parsed patterns.append("*content/*.md",) event_handler = UpdaterHandler(patterns=patterns) observer = Observer() observer.schedule(event_handler, path, recursive=True) observer.start() #The above starts an observing thread, so the main thread can just wait try: while True: time.sleep(1) except KeyboardInterrupt: observer.stop() observer.join() def make_pdf(outfile): print("rendering PDF-able versions of pages...") render_pages(True, outfile) print("done") args = ['prince', '-o', outfile, "../index.html"] pages = get_pages() args += ["../"+p["html"] for p in pages if "md" in p] print("generating PDF: running ", " ".join(args),"...") prince_resp = subprocess.check_output(args, universal_newlines=True) if __name__ == "__main__": parser = argparse.ArgumentParser(description='Generate static site from markdown and templates.') parser.add_argument("-p", "--pre_parse", action="store_true", help="Parse markdown; otherwise, use Flatdoc") parser.add_argument("-w","--watch", action="store_true", help="Watch for changes and re-generate the files. This runs until force-quit.") parser.add_argument("--pdf", type=str, help="Generate a PDF, too. Requires Prince.") args = parser.parse_args() if args.pdf: if args.pdf[-4:] != ".pdf": exit("PDF filename must end in .pdf") make_pdf(args.pdf) #Not an accident that we go on to re-gen files in non-PDF format if args.watch: watch(args.pre_parse, args.pdf) else: render_pages(args.pre_parse)