Move to stand-alone Dactyl package

This commit is contained in:
mDuo13
2017-02-21 17:05:34 -08:00
parent 745c9fde06
commit ef8d773680
16 changed files with 21 additions and 1804 deletions

View File

@@ -29,6 +29,12 @@ default_filters:
- callouts
- badges
cover_page:
name: Overview
html: index.html
sidebar: false
template: template-index.html
targets:
# First member is the default that gets built when target not specified
- name: local
@@ -60,15 +66,6 @@ targets:
display_name: rippled Setup Guide
pages:
# Intro pages is not directly replicated on ripple.com at this time
- name: Overview
html: index.html
sidebar: false
template: template-index.html
targets:
- local
- ripple.com
# References are exhaustive lists of commands and options
- name: RippleAPI
category: References

View File

@@ -1,834 +0,0 @@
#!/usr/bin/env python3
################################################################################
# Dactyl - a tool for heroic epics of documentation
#
# Generates a website from Markdown and Jinja templates, with filtering
# along the way.
################################################################################
DEFAULT_CONFIG_FILE = "dactyl-config.yml"
import os
import re
import yaml
import argparse
import logging
import traceback
# Necessary to copy static files to the output dir
from distutils.dir_util import copy_tree
# Used to import filters.
from importlib import import_module
# Necessary for prince
import subprocess
# Used to fetch markdown sources from GitHub repos
import requests
# Various content and template processing stuff
from jinja2 import Environment, FileSystemLoader, TemplateError
from markdown import markdown
from bs4 import BeautifulSoup
# Watchdog stuff
import time
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
# The log level is configurable at runtime (see __main__ below)
logger = logging.getLogger()
# These fields are special, and pages don't inherit them directly
RESERVED_KEYS_TARGET = [
"name",
"display_name",
"filters",
"image_subs",
"pages",
]
ADHOC_TARGET = "__ADHOC__"
DEFAULT_PDF_FILE = "__DEFAULT_FILENAME__"
NO_PDF = "__NO_PDF__"
filters = {}
def load_config(config_file=DEFAULT_CONFIG_FILE):
"""Reload config from a YAML file."""
global config, filters
logger.info("loading config file %s..." % config_file)
with open(config_file, "r") as f:
config = yaml.load(f)
assert(config["targets"])
assert(config["pages"])
assert(config["pdf_template"])
assert(config["default_template"])
assert(config["content_path"])
assert(config["out_path"])
assert(config["temporary_files_path"])
assert(config["template_static_path"])
assert(config["content_static_path"])
if "prince_executable" not in config or not config["prince_executable"]:
config["prince_executable"] = "prince" # A reasonable default
if "default_filters" not in config:
config["default_filters"] = []
if "skip_preprocessor" not in config:
config["skip_preprocessor"] = False
if "pdf_filename_fields" not in config:
config["pdf_filename_fields"] = "display_name"
if "pdf_filename_separator" not in config:
config["pdf_filename_separator"] = "-"
# Warn if any pages aren't part of a target
for page in config["pages"]:
if "targets" not in page:
if "name" in page:
logging.warn("Page %s is not part of any targets." %
page["name"])
else:
logging.warn("Page %s is not part of any targets." % page)
if "md" in page and "name" not in page:
logging.debug("Guessing page name for page %s" % page)
page_path = os.path.join(config["content_path"], page["md"])
page["name"] = guess_title_from_md_file(page_path)
# Figure out which filters we need and import them
filternames = set(config["default_filters"])
for target in config["targets"]:
if "filters" in target:
filternames.update(target["filters"])
for page in config["pages"]:
if "filters" in page:
filternames.update(page["filters"])
for filter_name in filternames:
filters[filter_name] = import_module("filter_"+filter_name)
def default_pdf_name(target):
target = get_target(target)
filename_segments = []
for fieldname in config["pdf_filename_fields"]:
if fieldname in target.keys():
filename_segments.append(slugify(target[fieldname]))
if filename_segments:
return config["pdf_filename_separator"].join(filename_segments) + ".pdf"
else:
return slugify(target["name"])+".pdf"
# old default_pdf_name(target)
# if {"product","version","guide"} <= set(target.keys()):
# p_name = slugify(target["product"])
# v_num = slugify(target["version"])
# g_name = slugify(target["guide"])
# return p_name+"-"+v_num+"-"+g_name+".pdf"
# elif "display_name" in target:
# return slugify(target["display_name"])+".pdf"
# else:
# return slugify(target["name"])+".pdf"
# Note: this regex means non-ascii characters get stripped from filenames,
# which is not preferable when making non-English filenames.
unacceptable_chars = re.compile(r"[^A-Za-z0-9._ ]+")
whitespace_regex = re.compile(r"\s+")
def slugify(s):
s = re.sub(unacceptable_chars, "", s)
s = re.sub(whitespace_regex, "_", s)
if not s:
s = "_"
return s
def substitute_links_for_target(soup, target):
"""Replaces local-html-links with appropriate substitutions
for the given target, and images likewise"""
target = get_target(target)
logger.info("... modifying links for target: %s" % target["name"])
# We actually want to get all pages, even the ones that aren't built as
# part of this target, in case those pages have replacement links.
pages = get_pages()
links = soup.find_all("a", href=re.compile(r"^[^.]+\.html"))
for link in links:
for page in pages:
if target["name"] in page:
#There's a replacement link for this env
local_url = page["html"]
target_url = page[target["name"]]
if link["href"][:len(local_url)] == local_url:
link["href"] = link["href"].replace(local_url,
target_url)
if "image_subs" in target:
images = soup.find_all("img")
for img in images:
local_path = img["src"]
if local_path in target["image_subs"]:
logger.info("... replacing image path '%s' with '%s'" %
(local_path, target["image_subs"][local_path]))
img["src"] = target["image_subs"][local_path]
image_links = soup.find_all("a",
href=re.compile(r"^[^.]+\.(png|jpg|jpeg|gif|svg)"))
for img_link in image_links:
local_path = img_link["href"]
if local_path in target["image_subs"]:
logger.info("... replacing image link '%s' with '%s'" %
(local_path, target["image_subs"][local_path]))
img_link["href"] = target["image_subs"][local_path]
def substitute_parameter_links(link_parameter, currentpage, target):
"""Some templates have links in page parameters. Do link substitution for
the target on one of those parameters."""
target = get_target(target)
# We actually want to get all pages, even the ones that aren't built as
# part of this target, in case those pages have replacement links.
pages = get_pages()
if link_parameter in currentpage:
linked_page = next(p for p in pages
if p["html"] == currentpage[link_parameter])
if target["name"] in linked_page:
#there's a link substitution available
currentpage[link_parameter] = linked_page[target["name"]]
## We could warn here, but it would frequently be a false alarm
# else:
# logging.warning("No substitution for %s[%s] for this target" %
# (currentpage["html"],link_parameter))
def get_target(target):
"""Get a target by name, or return the default target object.
We can't use default args in function defs because the default is
set at runtime based on config"""
if target == None:
return config["targets"][0]
if type(target) == str:
try:
return next(t for t in config["targets"] if t["name"] == target)
except StopIteration:
logger.critical("Unknown target: %s" % target)
exit(1)
if "name" in target:
# Eh, it's probably a target, just return it
return target
def make_adhoc_target(inpages, no_cover):
t = {
"name": ADHOC_TARGET,
"display_name": "(Untitled)",
}
if not no_cover:
indexpage = next(p for p in config["pages"]
if p["html"] == "index.html")
indexpage["targets"].append(ADHOC_TARGET)
if len(inpages) == 1:
t["display_name"] = guess_title_from_md_file(inpages[0])
for inpage in inpages:
# Figure out the actual filename and location of this infile
# and set the content source dir appropriately
in_dir, in_file = os.path.split(inpage)
config["content_path"] = in_dir
# Figure out what html file to output to
ENDS_IN_MD = re.compile("\.md$", re.I)
if re.search(ENDS_IN_MD, in_file):
out_html_file = re.sub(ENDS_IN_MD, ".html", in_file)
else:
out_html_file = in_file+".html"
# Try to come up with a reasonable page title
page_title = guess_title_from_md_file(inpage)
new_page = {
"name": page_title,
"md": in_file,
"html": out_html_file,
"targets": [ADHOC_TARGET],
"category": "Pages",
"pp_env": in_dir,
}
config["pages"].append(new_page)
config["targets"].append(t)
return t
def guess_title_from_md_file(filepath):
with open(filepath, "r") as f:
line1 = f.readline()
line2 = f.readline()
# look for headers in the "followed by ----- or ===== format"
ALT_HEADER_REGEX = re.compile("^[=-]{3,}$")
if ALT_HEADER_REGEX.match(line2):
possible_header = line1
if possible_header.strip():
return possible_header.strip()
# look for headers in the "## abc ## format"
HEADER_REGEX = re.compile("^#+\s*(.+[^#\s])\s*#*$")
m = HEADER_REGEX.match(line1)
if m:
possible_header = m.group(1)
if possible_header.strip():
return possible_header.strip()
#basically if the first line's not a markdown header, we give up and use
# the filename instead
return os.path.basename(filepath)
def get_filters_for_page(page, target=None):
ffp = set(config["default_filters"])
target = get_target(target)
if "filters" in target:
ffp.update(target["filters"])
if "filters" in page:
ffp.update(page["filters"])
return ffp
def parse_markdown(page, target=None, pages=None, bypass_errors=False):
"""Take a markdown string and output HTML for that content"""
target = get_target(target)
logging.info("Preparing page %s" % page["name"])
# Preprocess Markdown using this Jinja environment
pp_env = setup_pp_env(page)
# We'll apply these filters to the page
page_filters = get_filters_for_page(page, target)
md = get_markdown_for_page(page["md"], pp_env=pp_env, target=target,
bypass_errors=bypass_errors, currentpage=page)
# Apply markdown-based filters here
for filter_name in page_filters:
if "filter_markdown" in dir(filters[filter_name]):
logging.info("... applying markdown filter %s" % filter_name)
md = filters[filter_name].filter_markdown(md, target=target,
page=page, config=config)
# Actually parse the markdown
logger.info("... parsing markdown...")
html = markdown(md, extensions=["markdown.extensions.extra",
"markdown.extensions.toc"],
lazy_ol=False)
# Apply raw-HTML-string-based filters here
for filter_name in page_filters:
if "filter_html" in dir(filters[filter_name]):
logging.info("... applying HTML filter %s" % filter_name)
html = filters[filter_name].filter_html(html, target=target,
page=page, config=config)
# Some filters would rather operate on a soup than a string.
# May as well parse once and re-serialize once.
soup = BeautifulSoup(html, "html.parser")
# Apply soup-based filters here
for filter_name in page_filters:
if "filter_soup" in dir(filters[filter_name]):
logging.info("... applying soup filter %s" % filter_name)
filters[filter_name].filter_soup(soup, target=target,
page=page, config=config)
# ^ the soup filters apply to the same object, passed by reference
# Replace links for any non-default target
if target["name"] != config["targets"][0]["name"]:
substitute_links_for_target(soup, target)
logging.info("... re-rendering HTML from soup...")
html2 = str(soup)
return html2
def githubify_markdown(md, target=None, pages=None):
"""Github-friendly markdown has absolute links, no md in divs"""
MARKDOWN_LINK_REGEX = re.compile(
r"(\[([^\]]+)\]\(([^:)]+)\)|\[([^\]]+)\]:\s*(\S+)$)", re.MULTILINE)
target = get_target(target)
if not pages:
pages = get_pages(target["name"])
class MDLink:
"""A markdown link, either a reference link or inline link"""
def __init__(self, fullmatch, label, url, label2, url2):
self.fullmatch = fullmatch
if label:
self.label = label
self.url = url
self.is_reflink = False
elif label2:
self.label = label2
self.url = url2
self.is_reflink = True
def to_markdown(self):
"""Re-represent self as a link in markdown syntax"""
s = "[" + self.label + "]"
if self.is_reflink:
s += ": " + self.url
else:
s += "(" + self.url + ")"
return s
links = [MDLink(*m) for m in MARKDOWN_LINK_REGEX.findall(md)]
for link in links:
for page in pages:
if target["name"] in page:
#There's a replacement link for this
local_url = page["html"]
target_url = page[target["name"]]
if link.url[:len(local_url)] == local_url:
link.url = link.url.replace(local_url, target_url)
md = md.replace(link.fullmatch, link.to_markdown())
return md
def get_pages(target=None):
"""Read pages from config and return an object, optionally filtered
to just the pages that this target cares about"""
target = get_target(target)
pages = config["pages"]
if target["name"]:
#filter pages that aren't part of this target
def should_include(page, target_name):
if "targets" not in page:
return False
if target_name in page["targets"]:
return True
else:
return False
pages = [page for page in pages
if should_include(page, target["name"])]
# Pages should inherit non-reserved keys from the target
for p in pages:
for key,val in target.items():
if key in RESERVED_KEYS_TARGET:
continue
elif key not in p:
p[key] = val
return pages
def get_categories(pages):
"""Produce an ordered, de-duplicated list of categories from
the page list"""
categories = []
for page in pages:
if "category" in page and page["category"] not in categories:
categories.append(page["category"])
logger.info("categories: %s" % categories)
return categories
def read_markdown_local(filename, pp_env, target=None, bypass_errors=False, currentpage={}):
"""Read in a markdown file and pre-process any templating lang in it,
returning the parsed contents."""
target = get_target(target)
pages = get_pages(target)
logging.info("reading markdown from file: %s" % filename)
if config["skip_preprocessor"]:
fpath = pp_env.loader.searchpath[0]
with open(os.path.join(fpath,filename), "r") as f:
md_out = f.read()
else:
try:
md_raw = pp_env.get_template(filename)
md_out = md_raw.render(target=target, pages=pages, currentpage=currentpage)
except TemplateError as e:
traceback.print_tb(e.__traceback__)
if bypass_errors:
logging.warn("Error pre-processing page %s; trying to load it raw"
% filename)
fpath = pp_env.loader.searchpath[0]
with open(os.path.join(fpath,filename), "r") as f:
md_out = f.read()
else:
exit("Error pre-processing page %s: %s" % (filename, e))
return md_out
def read_markdown_remote(url):
"""Fetch a remote markdown file and return its contents"""
response = requests.get(url)
if response.status_code == 200:
return response.text
else:
raise requests.RequestException("Status code for page was not 200")
def get_markdown_for_page(md_where, pp_env=None, target=None, bypass_errors=False, currentpage={}):
"""Read/Fetch and pre-process markdown file"""
target = get_target(target)
if "http:" in md_where or "https:" in md_where:
try:
mdr = read_markdown_remote(md_where)
except requests.RequestException as e:
if bypass_errors:
mdr = ""
else:
traceback.print_tb(e.__traceback__)
exit("Error fetching page %s: %s" % (md_where, e))
return mdr
else:
return read_markdown_local(md_where, pp_env, target, bypass_errors, currentpage=currentpage)
def copy_static_files(template_static=True, content_static=True, out_path=None):
"""Copy static files to the output directory."""
if out_path == None:
out_path = config["out_path"]
if template_static:
template_static_src = config["template_static_path"]
template_static_dst = os.path.join(out_path,
os.path.basename(template_static_src))
copy_tree(template_static_src, template_static_dst)
if content_static:
content_static_src = config["content_static_path"]
content_static_dst = os.path.join(out_path,
os.path.basename(content_static_src))
copy_tree(content_static_src, content_static_dst)
def setup_pp_env(page=None):
if not page or "pp_dir" not in page:
pp_env = Environment(loader=FileSystemLoader(config["content_path"]))
else:
pp_env = Environment(loader=FileSystemLoader(page["pp_dir"]))
#Example: if we want to add custom functions to the md files
#pp_env.globals['foo'] = lambda x: "FOO %s"%x
return pp_env
def setup_html_env():
env = Environment(loader=FileSystemLoader(config["template_path"]))
env.lstrip_blocks = True
env.trim_blocks = True
return env
def toc_from_headers(html_string):
"""make a table of contents from headers"""
soup = BeautifulSoup(html_string, "html.parser")
headers = soup.find_all(name=re.compile("h[1-3]"), id=True)
toc_s = ""
for h in headers:
if h.name == "h1":
toc_level = "level-1"
elif h.name == "h2":
toc_level = "level-2"
else:
toc_level = "level-3"
new_a = soup.new_tag("a", href="#"+h["id"])
if h.string:
new_a.string = h.string
else:
new_a.string = " ".join(h.strings)
new_li = soup.new_tag("li")
new_li["class"] = toc_level
new_li.append(new_a)
toc_s += str(new_li)+"\n"
return str(toc_s)
def render_pages(target=None, for_pdf=False, bypass_errors=False):
"""Parse and render all pages in target, writing files to out_path."""
target = get_target(target)
pages = get_pages(target)
categories = get_categories(pages)
# Insert generated HTML into templates using this Jinja environment
env = setup_html_env()
if for_pdf:
if "pdf_template" in target:
logging.debug("reading pdf template %s from target..." % target["pdf_template"])
default_template = env.get_template(target["pdf_template"])
else:
logging.debug("reading default pdf template %s..." % config["pdf_template"])
default_template = env.get_template(config["pdf_template"])
else:
if "template" in target:
logging.debug("reading HTML template %s from target..." % target["template"])
default_template = env.get_template(target["template"])
else:
logging.debug("reading default HTML template %s..." % config["default_template"])
default_template = env.get_template(config["default_template"])
for currentpage in pages:
if "md" in currentpage:
# Read and parse the markdown
try:
html_content = parse_markdown(currentpage, target=target,
pages=pages, bypass_errors=bypass_errors)
except Exception as e:
if bypass_errors:
traceback.print_tb(e.__traceback__)
logging.warning( ("Skipping page %s " +
"due to error fetching contents: %s") %
(currentpage["name"], e) )
continue
else:
traceback.print_tb(e.__traceback__)
exit("Error when fetching page %s: %s" %
(currentpage["name"], e) )
else:
html_content = ""
# default to a table-of-contents sidebar...
if "sidebar" not in currentpage:
currentpage["sidebar"] = "toc"
if currentpage["sidebar"] == "toc":
sidebar_content = toc_from_headers(html_content)
else:
sidebar_content = None
# Prepare some parameters for rendering
substitute_parameter_links("doc_page", currentpage, target)
current_time = time.strftime("%B %d, %Y")
# Figure out which template to use
if "template" in currentpage and not for_pdf:
logging.info("using template %s from page" % currentpage["template"])
use_template = env.get_template(currentpage["template"])
elif "pdf_template" in currentpage and for_pdf:
logging.info("using pdf_template %s from page" % currentpage["pdf_template"])
use_template = env.get_template(currentpage["pdf_template"])
else:
use_template = default_template
# Render the content into the appropriate template
out_html = use_template.render(currentpage=currentpage,
categories=categories,
pages=pages,
content=html_content,
target=target,
current_time=current_time,
sidebar_content=sidebar_content)
if for_pdf:
out_path = config["temporary_files_path"]
else:
out_path = config["out_path"]
fileout = os.path.join(out_path, currentpage["html"])
if not os.path.isdir(out_path):
logging.info("creating build folder %s" % out_path)
os.makedirs(out_path)
with open(fileout, "w") as f:
logging.info("writing to file: %s..." % fileout)
f.write(out_html)
def watch(pdf_file, target):
"""Look for changed files and re-generate HTML (and optionally
PDF whenever there's an update. Runs until interrupted."""
target = get_target(target)
class UpdaterHandler(PatternMatchingEventHandler):
"""Updates to pattern-matched files means rendering."""
def on_any_event(self, event):
logging.info("got event!")
# bypass_errors=True because Watch shouldn't
# just die if a file is temporarily not found
if pdf_file:
make_pdf(pdf_file, target=target, bypass_errors=True)
else:
render_pages(target, bypass_errors=True)
logging.info("done rendering")
patterns = ["*template-*.html",
"*.md",
"*code_samples/*"]
event_handler = UpdaterHandler(patterns=patterns)
observer = Observer()
observer.schedule(event_handler, config["template_path"], recursive=True)
observer.schedule(event_handler, config["content_path"], recursive=True)
observer.start()
# The above starts an observing thread,
# so the main thread can just wait
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
def make_pdf(outfile, target=None, bypass_errors=False):
"""Use prince to convert several HTML files into a PDF"""
logging.info("rendering PDF-able versions of pages...")
target = get_target(target)
render_pages(target=target, for_pdf=True, bypass_errors=bypass_errors)
temp_files_path = config["temporary_files_path"]
# Prince will need the static files, so copy them over
copy_static_files(out_path=temp_files_path)
# Make sure the path we're going to write the PDF to exists
if not os.path.isdir(config["out_path"]):
logging.info("creating build folder %s" % config["out_path"])
os.makedirs(config["out_path"])
# Start preparing the prince command
args = [config["prince_executable"], '--javascript', '-o', outfile]
# Each HTML output file in the target is another arg to prince
pages = get_pages(target)
args += [os.path.join(temp_files_path, p["html"]) for p in pages]
logger.info("generating PDF: running %s..." % " ".join(args))
prince_resp = subprocess.check_output(args, universal_newlines=True)
print(prince_resp)
def githubify(md_file_name, target=None):
"""Wrapper - make the markdown resemble GitHub flavor"""
target = get_target(target)
pages = get_pages()
logging.info("getting markdown for page %s" % md_file_name)
md = get_markdown_for_page(md_file_name,
pp_env=setup_pp_env(),
target=target)
logging.info("githubifying markdown...")
rendered_md = githubify_markdown(md, target=target, pages=pages)
if not os.path.isdir(config["out_path"]):
logging.info("creating build folder %s" % config["out_path"])
os.makedirs(config["out_path"])
fileout = os.path.join(config["out_path"], md_file_name)
logging.info("writing generated file to path: %s"%fileout)
with open(fileout, "w") as f:
f.write(rendered_md)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='Generate static site from markdown and templates.')
parser.add_argument("--watch", "-w", action="store_true",
help="Watch for changes and re-generate output. "+\
"This runs until force-quit.")
parser.add_argument("--pdf", nargs="?", type=str,
const=DEFAULT_PDF_FILE, default=NO_PDF,
help="Output a PDF to this file. Requires Prince.")
parser.add_argument("--githubify", "-g", type=str,
help="Output md prepared for GitHub")
parser.add_argument("--target", "-t", type=str,
help="Build for the specified target.")
parser.add_argument("--out_dir", "-o", type=str,
help="Output to this folder (overrides config file)")
parser.add_argument("--quiet", "-q", action="store_true",
help="Suppress status messages")
parser.add_argument("--bypass_errors", "-b", action="store_true",
help="Continue building if some contents not found")
parser.add_argument("--config", "-c", type=str,
help="Specify path to an alternate config file.")
parser.add_argument("--copy_static", "-s", action="store_true",
help="Copy static files to the out dir",
default=False)
parser.add_argument("--pages", type=str, help="Build markdown page(s) "+\
"that aren't described in the config.", nargs="+")
parser.add_argument("--no_cover", "-n", action="store_true",
help="(with --pages only) Don't automatically add a "+\
"cover page / index.html file.")
parser.add_argument("--skip_preprocessor", action="store_true", default=False,
help="Don't pre-process Jinja syntax in markdown files")
parser.add_argument("--title", type=str, help="Override target display "+\
"name. Useful when passing multiple args to --pages.")
parser.add_argument("--list_targets_only", "-l", action="store_true",
help="Don't build anything, just display list of "+
"known targets from the config file.")
cli_args = parser.parse_args()
if not cli_args.quiet:
logging.basicConfig(level=logging.INFO)
if cli_args.config:
load_config(cli_args.config)
else:
load_config()
if cli_args.list_targets_only:
for t in config["targets"]:
if "display_name" in t:
display_name = t["display_name"]
elif {"product","version","guide"} <= set(t.keys()):
display_name = " ".join([t["product"],t["version"],t["guide"]])
else:
display_name = ""
print("%s\t\t%s" % (t["name"], display_name))
#print(" ".join([t["name"] for t in config["targets"]]))
exit(0)
if cli_args.out_dir:
config["out_path"] = cli_args.out_dir
config["skip_preprocessor"] = cli_args.skip_preprocessor
if cli_args.pages:
make_adhoc_target(cli_args.pages, cli_args.no_cover)
cli_args.target = ADHOC_TARGET
if cli_args.title:
target = get_target(cli_args.target)
target["display_name"] = cli_args.title
if cli_args.githubify:
githubify(cli_args.githubify, cli_args.target)
if cli_args.copy_static:
copy_static(template_static=False, content_static=True)
exit(0)
if cli_args.pdf != NO_PDF:
if cli_args.pdf == DEFAULT_PDF_FILE:
pdf_path = os.path.join(config["out_path"],
default_pdf_name(cli_args.target))
elif cli_args.pdf[-4:] != ".pdf":
exit("PDF filename must end in .pdf")
else:
pdf_path = os.path.join(config["out_path"], cli_args.pdf)
logging.info("making a pdf...")
make_pdf(pdf_path, target=cli_args.target,
bypass_errors=cli_args.bypass_errors)
logging.info("pdf done")
else:
logging.info("rendering pages...")
render_pages(target=cli_args.target,
bypass_errors=cli_args.bypass_errors)
logging.info("done rendering")
if cli_args.copy_static:
logging.info("copying static pages...")
copy_static_files()
if cli_args.watch:
logging.info("watching for changes...")
if cli_args.pdf:
pdf_path = os.path.join(config["out_path"], cli_args.pdf)
watch(pdf_path, cli_args.target)
else:
watch(None, cli_args.target)

View File

@@ -1,288 +0,0 @@
#!/usr/bin/env python3
import requests
import os
import yaml
import argparse
import logging
import re
from bs4 import BeautifulSoup
from time import time, sleep
DEFAULT_CONFIG_FILE = "dactyl-config.yml"
TIMEOUT_SECS = 9.1
CHECK_IN_INTERVAL = 30
FINAL_RETRY_DELAY = 4 * CHECK_IN_INTERVAL
soupsCache = {}
def getSoup(fullPath):
if fullPath in soupsCache.keys():
soup = soupsCache[fullPath]
else:
with open(fullPath, 'r') as f:
soup = BeautifulSoup(f.read(), "html.parser")
soupsCache[fullPath] = soup
return soup
def check_for_unparsed_reference_links(soup):
#unmatched_reflink_regex = re.compile(r"\[[^\]]+\]\[(\w| )*\]")
unmatched_reflink_regex = re.compile(r"(\[[^\]]+)?\]\[(\w| )*\]")
unparsed_links = []
for s in soup.strings:
m = re.search(unmatched_reflink_regex, s)
if m:
unparsed_links.append(m.group(0))
return unparsed_links
def check_remote_url(endpoint, fullPath, broken_links, externalCache, isImg=False):
if isImg:
linkword = "image"
else:
linkword = "link"
if endpoint in [v for k,v in broken_links]:
# We already confirmed this was broken, so just add another instance
logging.warning("Broken %s %s appears again in %s" % (linkword, endpoint, fullPath))
broken_links.append( (fullPath, endpoint) )
return False
if endpoint in externalCache:
logging.debug("Skipping cached %s %s" % (linkword, endpoint))
return True
if endpoint in config["known_broken_links"]:
logging.warning("Skipping known broken %s %s in %s" % (linkword, endpoint, fullPath))
return True
logging.info("Testing remote %s URL %s"%(linkword, endpoint))
try:
code = requests.head(endpoint, timeout=TIMEOUT_SECS).status_code
except Exception as e:
logging.warning("Error occurred: %s" % e)
code = 500
if code == 405 or code == 404:
#HEAD didn't work, maybe GET will?
try:
code = requests.get(endpoint, timeout=TIMEOUT_SECS).status_code
except Exception as e:
logging.warning("Error occurred: %s" % e)
code = 500
if code < 200 or code >= 400:
logging.warning("Broken remote %s in %s to %s"%(linkword, fullPath, endpoint))
broken_links.append( (fullPath, endpoint) )
return False
else:
logging.info("...success.")
externalCache.append(endpoint)
return True
def checkLinks(offline=False):
externalCache = []
broken_links = []
num_links_checked = 0
last_checkin = time()
for dirpath, dirnames, filenames in os.walk(config["out_path"]):
if time() - last_checkin > CHECK_IN_INTERVAL:
last_checkin = time()
print("... still working (dirpath: %s) ..." % dirpath)
if os.path.abspath(dirpath) == os.path.abspath(config["template_path"]):
# don't try to parse and linkcheck the templates
continue
for fname in filenames:
if time() - last_checkin > CHECK_IN_INTERVAL:
last_checkin = time()
print("... still working (file: %s) ..." % fname)
fullPath = os.path.join(dirpath, fname)
if "/node_modules/" in fullPath or ".git" in fullPath:
logging.debug("skipping ignored dir: %s" % fullPath)
continue
if fullPath.endswith(".html"):
soup = getSoup(fullPath)
unparsed_links = check_for_unparsed_reference_links(soup)
if unparsed_links:
logging.warning("Found %d unparsed Markdown reference links: %s" %
(len(unparsed_links), "\n... ".join(unparsed_links)))
[broken_links.append( (fullPath, u) ) for u in unparsed_links]
links = soup.find_all('a')
for link in links:
if time() - last_checkin > CHECK_IN_INTERVAL:
last_checkin = time()
print("... still working (link: %s) ..." % link)
if "href" not in link.attrs:
#probably an <a name> type anchor, skip
continue
endpoint = link['href']
if not endpoint.strip():
logging.warning("Empty link in %s" % fullPath)
broken_links.append( (fullPath, endpoint) )
num_links_checked += 1
elif endpoint == "#":
continue
elif "mailto:" in endpoint:
logging.info("Skipping email link in %s to %s"%(fullPath, endpoint))
continue
elif "://" in endpoint:
if offline:
logging.info("Offline - Skipping remote URL %s"%(endpoint))
continue
num_links_checked += 1
check_remote_url(endpoint, fullPath, broken_links, externalCache)
elif '#' in endpoint:
if fname in config["ignore_anchors_in"]:
logging.info("Ignoring anchor %s in dynamic page %s"%(endpoint,fname))
continue
logging.info("Testing local link %s from %s"%(endpoint, fullPath))
num_links_checked += 1
filename,anchor = endpoint.split("#",1)
if filename == "":
fullTargetPath = fullPath
else:
fullTargetPath = os.path.join(dirpath, filename)
if not os.path.exists(fullTargetPath):
logging.warning("Broken local link in %s to %s"%(fullPath, endpoint))
broken_links.append( (fullPath, endpoint) )
elif filename in config["ignore_anchors_in"]:
#Some pages are populated dynamically, so BeatifulSoup wouldn't
# be able to find anchors in them anyway
logging.info("Skipping anchor link in %s to dynamic page %s" %
(fullPath, endpoint))
continue
elif fullTargetPath != "../":
num_links_checked += 1
targetSoup = getSoup(fullTargetPath)
if not targetSoup.find(id=anchor) and not targetSoup.find(
"a",attrs={"name":anchor}):
logging.warning("Broken anchor link in %s to %s"%(fullPath, endpoint))
broken_links.append( (fullPath, endpoint) )
else:
logging.info("...anchor found.")
continue
elif endpoint[0] == '/':
#can't really test links out of the local field
logging.info("Skipping absolute link in %s to %s"%(fullPath, endpoint))
continue
else:
num_links_checked += 1
if not os.path.exists(os.path.join(dirpath, endpoint)):
logging.warning("Broken local link in %s to %s"%(fullPath, endpoint))
broken_links.append( (fullPath, endpoint) )
#Now check images
imgs = soup.find_all('img')
for img in imgs:
num_links_checked += 1
if "src" not in img.attrs or not img["src"].strip():
logging.warning("Broken image with no src in %s" % fullPath)
broken_links.append( (fullPath, img["src"]) )
continue
src = img["src"]
if "://" in src:
if offline:
logging.info("Offline - Skipping remote image %s"%(endpoint))
continue
check_remote_url(src, fullPath, broken_links, externalCache, isImg=True)
else:
logging.info("Checking local image %s in %s" % (src, fullPath))
if os.path.exists(os.path.join(dirpath, src)):
logging.info("...success")
else:
logging.warning("Broken local image %s in %s" % (src, fullPath))
broken_links.append( (fullPath, src) )
return broken_links, num_links_checked
def final_retry_links(broken_links):
"""Give the broken remote links a little while to recover in case they're just flaps"""
broken_remote_links = [ (page,link) for page,link in broken_links
if re.match(r"^https?://", link) ]
if not broken_remote_links:
logging.info("(no http/https broken links to retry)")
return
logging.info("Waiting %d seconds to retry broken %d remote links..."
% (FINAL_RETRY_DELAY, len(broken_remote_links)))
start_wait = time()
elapsed = 0
while elapsed < FINAL_RETRY_DELAY:
sleep(CHECK_IN_INTERVAL)
print("...")
elapsed = time() - start_wait
retry_cache = []
retry_broken = []
for page, link in broken_remote_links:
link_works = check_remote_url(link, page, retry_broken, retry_cache)
if link_works:
logging.info("Link %s in page %s is back online" % (link, page))
broken_links.remove( (page,link) )
else:
logging.info("Link %s in page %s is still down." % (link, page))
def load_config(config_file=DEFAULT_CONFIG_FILE):
"""Reload config from a YAML file."""
global config
logging.info("loading config file %s..." % config_file)
with open(config_file, "r") as f:
config = yaml.load(f)
assert(config["out_path"])
assert(type(config["known_broken_links"]) == list)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='Check files in this repository for broken links.')
parser.add_argument("-o", "--offline", action="store_true",
help="Check local anchors only")
parser.add_argument("-s", "--strict", action="store_true",
help="Exit with error even on known problems")
parser.add_argument("--config", "-c", type=str,
help="Specify path to an alternate config file.")
parser.add_argument("-n", "--no_final_retry", action="store_true",
help="Don't wait and retry failed remote links at the end.")
parser.add_argument("--quiet", "-q", action="store_true",
help="Reduce output to just failures and final report")
args = parser.parse_args()
if not args.quiet:
logging.basicConfig(level=logging.INFO)
if args.config:
load_config(args.config)
else:
load_config()
broken_links, num_links_checked = checkLinks(args.offline)
if not args.no_final_retry and not args.offline:
final_retry_links(broken_links)
#^ sleeps for FINAL_RETRY_DELAY and then retries remote links
# Automatically removes from broken_links if they work now
print("---------------------------------------")
print("Link check report. %d links checked."%num_links_checked)
if not args.strict:
unknown_broken_links = [ (page,link) for page,link in broken_links
if link not in config["known_broken_links"] ]
if not broken_links:
print("Success! No broken links found.")
else:
print("%d broken links found:"%(len(broken_links)))
[print("File:",fname,"Link:",link) for fname,link in broken_links]
if args.strict or unknown_broken_links:
exit(1)
print("Success - all broken links are known problems.")

View File

@@ -1,170 +0,0 @@
#!/usr/bin/env python3
###############################################################################
## Dactyl Style Police ##
## Author: Rome Reginelli ##
## Copyright: Ripple Labs, Inc. 2016 ##
## ##
## Reads the markdown files to try and enforce elements of good style. ##
###############################################################################
import logging
import argparse
#import nltk
import re
import collections
import yaml
from bs4 import BeautifulSoup
from bs4 import Comment
from bs4 import NavigableString
import dactyl_build
DEFAULT_CONFIG_FILE = "dactyl-config.yml"
OVERRIDE_COMMENT_REGEX = r" *STYLE_OVERRIDE: *([\w, -]+)"
logger = logging.getLogger()
def load_config(config_file=DEFAULT_CONFIG_FILE):
global config
dactyl_build.load_config(config_file)
config = dactyl_build.config
if "word_substitutions_file" in config:
with open(config["word_substitutions_file"], "r") as f:
config["disallowed_words"] = yaml.load(f)
else:
logging.warning("No 'word_substitutions_file' found in config.")
if "phrase_substitutions_file" in config:
with open(config["phrase_substitutions_file"], "r") as f:
config["disallowed_phrases"] = yaml.load(f)
else:
logging.warning("No 'phrase_substitutions_file' found in config.")
def tokenize(passage):
words = re.split(r"[\s,.;()!'\"]+", passage)
return [w for w in words if w]
def depunctuate(passage):
punctuation = re.compile(r"[,.;()!'\"]")
return re.sub(punctuation, "", passage)
def check_all_pages(target=None):
"""Reads all pages for a target and checks them for style."""
target = dactyl_build.get_target(target)
pages = dactyl_build.get_pages(target)
pp_env = dactyl_build.setup_pp_env()
print("Style Checker - checking all pages in target %s" % target["name"])
style_issues = []
for page in pages:
if "md" not in page:
# Not a doc page, move on
continue
logging.info("Checking page %s" % page["name"])
page_issues = []
html = dactyl_build.parse_markdown(page, pages=pages, target=target)
soup = BeautifulSoup(html, "html.parser")
overrides = get_overrides(soup)
content_elements = ["p","li","a","em","strong","th","td",
"h1","h2","h3","h4","h5","h6"]
for el in soup.descendants:
if (type(el) == NavigableString and
el.parent.name in content_elements and
str(el).strip()):
passage = str(el).strip()
passage_issues = check_passage(passage, overrides)
if passage_issues:
page_issues += passage_issues
#print("'%s' (%s)" % (el, el.parent.name))
# for el in soup.find_all(content_elements):
# for passage in el.stripped_strings:
# passage_issues = check_passage(passage, overrides)
# if passage_issues:
# page_issues += passage_issues
if page_issues:
style_issues.append( (page["name"], page_issues) )
return style_issues
def get_overrides(soup):
overrides = []
comments = soup.find_all(string=lambda text:isinstance(text,Comment))
for comment in comments:
m = re.match(OVERRIDE_COMMENT_REGEX, comment)
if m:
new_overrides = m.group(1).split(",")
new_overrides = [o.strip() for o in new_overrides]
logging.info("Overrides found: %s" % new_overrides)
overrides += new_overrides
return overrides
def check_passage(passage, overrides):
"""Checks an individual string of text for style issues."""
issues = []
logging.debug("Checking passage %s" % passage)
#tokens = nltk.word_tokenize(passage)
tokens = tokenize(passage)
for t in tokens:
if t.lower() in config["disallowed_words"]:
if t.lower() in overrides:
logging.info("Unplain word violation %s overridden" % t)
continue
issues.append( ("Unplain Word", t.lower()) )
for phrase,sub in config["disallowed_phrases"].items():
if phrase.lower() in depunctuate(passage):
if phrase.lower() in overrides:
logging.info("Unplain phrase violation %s overridden" % t)
continue
#logging.warn("Unplain phrase: %s; suggest %s instead" % (phrase, sub))
issues.append( ("Unplain Phrase", phrase.lower()) )
return issues
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Check content files for style issues.")
parser.add_argument("--config", "-c", type=str,
help="Specify path to an alternate config file.")
parser.add_argument("--verbose", "-v", action="store_true",
help="Show status messages")
parser.add_argument("--target", "-t", type=str,
help="Check the specified target.")
cli_args = parser.parse_args()
if cli_args.verbose:
logging.basicConfig(level=logging.INFO)
if cli_args.config:
load_config(cli_args.config)
else:
load_config()
issues = check_all_pages(target=cli_args.target)
if issues:
num_issues = sum(len(p[1]) for p in issues)
print("Found %d issues:" % num_issues)
for pagename,issuelist in issues:
print("Page: %s" % pagename)
c = collections.Counter(issuelist)
for i, count_i in c.items():
if i[0]=="Unplain Phrase":
print(" Discouraged phrase: %s (%d instances); suggest '%s' instead." %
( i[1], count_i, config["disallowed_phrases"][i[1].lower()] ))
elif i[0]=="Unplain Word":
print(" Discouraged word: %s (%d instances); suggest '%s' instead." %
( i[1], count_i, config["disallowed_words"][i[1].lower()] ))
else:
print(" %s: %s (%d instances)" % (i[0], i[1], count_i))
exit(1)
else:
print("Style check passed with flying colors!")
exit(0)

View File

@@ -1,31 +0,0 @@
################################################################################
## Add version to markdown filter ##
## Author: Rome Reginelli ##
## Copyright: Ripple Labs, Inc. 2016 ##
## ##
## Adds a message to the beginning of a file with a version number, based on ##
## the URL of the remotely-fetched markdown. ##
################################################################################
import re
import logging
def filter_markdown(md, target=None, page=None, config=None):
"""Finds the version number and adds it to the start of the page."""
version_regex = r"https://raw.githubusercontent.com/([A-Za-z0-9_.-]+)/([A-Za-z0-9_.-]+)/([A-Za-z0-9_-]+\.[A-Za-z0-9_.-]+)/.+\.md"
try:
version_match = re.match(version_regex, page["md"])
except (TypeError, KeyError):
logging.warning("couldn't get MD path from page %s" % page)
return md
try:
github_owner = version_match.group(1)
github_project = version_match.group(2)
vnum = version_match.group(3)
url = "https://github.com/%s/%s/releases/%s" % (github_owner, github_project, vnum)
md = ("<p style='margin-top: 1em; font-style: italic'>Updated for <a href='%s' title='view on GitHub'>version %s</a></p>"%(url, vnum))+md
except AttributeError:
logging.warning("version regex didn't match: %s" % version_match)
return md

View File

@@ -1,44 +0,0 @@
################################################################################
## Badges filter ##
## Author: Rome Reginelli ##
## Copyright: Ripple Labs, Inc. 2016 ##
## ##
## Looks for links with the title text "BADGE" and makes them into badges. ##
## The alt text must be in the form of <badgelefthalf>:<badgerighthalf> and ##
## the left half can't contain a colon. ##
################################################################################
import re
import logging
from urllib.parse import quote as urlescape
BADGE_REGEX = re.compile("BADGE_(BRIGHTGREEN|GREEN|YELLOWGREEN|YELLOW|ORANGE|RED|LIGHTGREY|BLUE|[0-9A-Fa-f]{6})")
def filter_soup(soup, target=None, page=None, config=None):
"""replace underscores with dashes in h1,h2,etc. for backwards compatibility"""
badge_links = soup.find_all(name="a", title=BADGE_REGEX)
for b in badge_links:
badge_label = b.string
if not badge_label:
badge_label = "".join(b.strings)
if not badge_label:
logging.warning("Badge link with no string: %s" % b)
continue
if ":" not in badge_label:
logging.warning("Badge link specified with no ':' in link: %s" % b.string)
continue
badge_color = BADGE_REGEX.match(b["title"]).group(1).lower()
badge_left, badge_right = [urlescape(s.strip()).replace("-","--")
for s in badge_label.split(":", 1)]
badge_url = "https://img.shields.io/badge/%s-%s-%s.svg" % (
badge_left, badge_right, badge_color)
img = soup.new_tag("img", src=badge_url, alt=badge_label)
img["class"]="dactyl_badge"
b.clear()
b.append(img)
b["title"] = badge_label
if not b["href"]:
del b["href"]

View File

@@ -1,18 +0,0 @@
################################################################################
## Buttonize links ##
## Author: Rome Reginelli ##
## Copyright: Ripple Labs, Inc. 2016 ##
## ##
## Looks for links ending in >, and adds a "button" class to those links so ##
## they can be styled like buttons in the page. ##
################################################################################
import re
def filter_soup(soup, target=None, page=None, config=None):
"""make links ending in > render like buttons"""
buttonlinks = soup.find_all("a", string=re.compile(">$"))
for link in buttonlinks:
if "class" in link.attrs:
link["class"].append("button")
else:
link["class"] = "button"

View File

@@ -1,27 +0,0 @@
################################################################################
## Callouts filter ##
## Author: Rome Reginelli ##
## Copyright: Ripple Labs, Inc. 2016 ##
## ##
## Looks for sections starting **Note:** or **Caution:** and gives them CSS ##
## classes like "callout note" so they can be styled accordinglyselfselfself. ##
################################################################################
import re
CALLOUT_CLASS_MAPPING = {
"note": "devportal-callout note",
"warning": "devportal-callout warning",
"caution": "devportal-callout caution",
"tip": "devportal-callout tip",
}
def filter_soup(soup, target=None, page=None, config=None):
"""replace underscores with dashes in h1,h2,etc. for backwards compatibility"""
callout_intro = re.compile(r"(Note|Warning|Tip|Caution):?$", re.I)
callouts = soup.find_all(name=["strong","em"], string=callout_intro)
for c in callouts:
if not c.previous_sibling: #This callout starts a block
callout_type = c.string.replace(":","").lower()
if callout_type in CALLOUT_CLASS_MAPPING:
c.parent["class"] = CALLOUT_CLASS_MAPPING[callout_type]
#c.parent["class"] = "callout %s" % callout_type

View File

@@ -1,25 +0,0 @@
################################################################################
## Add Markdown Class to Divs filter ##
## Author: Rome Reginelli ##
## Copyright: Ripple Labs, Inc. 2016 ##
## ##
## Finds raw divs in the markdown and adds the markdown=1 attribute to them ##
## so that HTML inside those divs gets parsed as markdown. ##
## Some flavors of markdown do this automatically, so this provides ##
## compatibility with those. ##
################################################################################
def filter_markdown(md, target=None, page=None, config=None):
"""Python markdown requires markdown="1" on HTML block elements
that contain markdown. AND there's a bug where if you use
markdown.extensions.extra, it replaces code fences in HTML
block elements with garbled text."""
def add_markdown_class(m):
if m.group(0).find("markdown=") == -1:
return m.group(1) + ' markdown="1">'
else:
return m.group(0)
logger.info("... adding markdown class to embedded divs...")
md = re.sub(r"(<div[^>]*)>", add_markdown_class, md)
return md

View File

@@ -1,61 +0,0 @@
################################################################################
## Multicode Tabs 2 filter ##
## Author: Rome Reginelli ##
## Copyright: Ripple Labs, Inc. 2016 ##
## ##
## Finds multicode tab sections and turns them into properly-formatted ##
## HTML syntax to use with minitabs jQuery ##
################################################################################
import re
import logging
def filter_html(html, target=None, page=None, config=None):
"""Turn multicode comments into a div (after markdown inside is parsed)"""
MC_START_REGEX = re.compile(r"<!--\s*MULTICODE_BLOCK_START\s*-->")
MC_END_REGEX = re.compile(r"<!--\s*MULTICODE_BLOCK_END\s*-->")
html = re.sub(MC_START_REGEX, "<div class='multicode'>", html)
html = re.sub(MC_END_REGEX, "</div>", html)
return html
def filter_soup(soup, target=None, page=None, config=None):
"""Turn a multicode block into the correct syntax for minitabs"""
multicodes = soup.find_all(class_="multicode")
index1 = 0
for cb_area in multicodes:
cb_area["id"] = "code-%d" % index1
codetabs_ul = soup.new_tag("ul")
codetabs_ul["class"] = "codetabs"
cb_area.insert(0,codetabs_ul)
pres = cb_area.find_all("pre")
index2 = 0
for pre in pres:
#make a unique ID for this code sample
linkid = "code-%d-%d" % (index1, index2)
#wrap this code sample in an ID'd div
code_sample_wrapper = soup.new_tag("div", id=linkid)
code_sample_wrapper["class"] = "code_sample"
code_sample_wrapper["style"] = "position: static;"
pre.wrap(code_sample_wrapper)
#add a link to the tabs ul
linkback = soup.new_tag("a", href=("#%s" % linkid))
linkback_li = soup.new_tag("li")
linkback_li.append(linkback)
codetabs_ul.append(linkback_li)
#find the text label for this sample
prev_p = code_sample_wrapper.find_previous_sibling("p")
try:
label = "".join(prev_p.em.strings)
except AttributeError:
label = "Code Sample %d-%d" % (index1, index2)
linkback.string = label
prev_p.decompose()
index2 += 1
index1 += 1

View File

@@ -1,20 +0,0 @@
################################################################################
## Remove doctoc filter ##
## Author: Rome Reginelli ##
## Copyright: Ripple Labs, Inc. 2016 ##
## ##
## Removes an automatically-generated "doctoc" table of contents, as ##
## delineated by HTML comments, from the markdown source. ##
################################################################################
def filter_markdown(md, target=None, page=None, config=None):
"""Strip out doctoc Table of Contents for RippleAPI"""
DOCTOC_START = "<!-- START doctoc generated TOC please keep comment here to allow auto update -->"
DOCTOC_END = "<!-- END doctoc generated TOC please keep comment here to allow auto update -->"
doctoc_start_i = md.find(DOCTOC_START)
doctoc_end_i = md.find(DOCTOC_END)
if doctoc_start_i != -1 and doctoc_end_i != -1:
md = md[:doctoc_start_i]+md[doctoc_end_i+len(DOCTOC_END):]
return md

View File

@@ -1,17 +0,0 @@
################################################################################
## Standardize header IDs ##
## Author: Rome Reginelli ##
## Copyright: Ripple Labs, Inc. 2016 ##
## ##
## Replaces underscores with dashes in h1,h2,... element IDs. This provides ##
## compatibility with some other flavors of markdown that generate HTML IDs ##
## differently. ##
################################################################################
import re
def filter_soup(soup, target=None, page=None, config=None):
"""replace underscores with dashes in h1,h2,etc. for backwards compatibility"""
headers = soup.find_all(name=re.compile("h[0-9]"), id=True)
for h in headers:
if "_" in h["id"]:
h["id"] = h["id"].replace("_", "-")

View File

@@ -1,142 +0,0 @@
################################################################################
## XRefs: Intelligent Crossreferences filter ##
## Author: Rome Reginelli ##
## Copyright: Ripple Labs, Inc. 2017 ##
## ##
## Looks for syntax matching the following format: ##
## [optional text](XREF: some-link.html#fragment) ##
## and interprets it as cross-references. If some-link.html is a file in the ##
## current target it becomes a normal hyperlink. If the link text is [] (that ##
## is, blank) it gets replaced with the title of the page. ##
## (Note: we can't look up section titles as that would require parsing the ##
## cross-referenced page and could lead to an infinite recursion loop if two ##
## pages cross-ferenced each other.) ##
## If the file isn't part of the current target but is part of another ##
## target, it becomes a non-hyperlink cross reference to the page in the ##
## first target that DOES have it. For example: ##
## "Some Link Title" in _A Target Containing Some Link_ ##
################################################################################
import re
from logging import warning
# match anything starting with XREF:/xref:, split by the # if there is one
# dropping any excess whitespace
xref_regex = re.compile(r"^\s*xref:\s*(?P<xref_file>[^#]+)(?P<xref_frag>#\S+)?\s*?$", re.I)
def find_file_in_target(fname, targetname, config):
if fname[-3:] == ".md":
# look by markdown file first
for page in config["pages"]:
if "md" not in page:
continue
elif ("/" in fname and page["md"] == fname # try to match md file by exact path
and targetname in page.get("targets",[]) # the page appears in this target
and page.get("html","") ): # and finally, the page has an html filename
return page
elif ( page["md"].split("/")[-1] == fname # match md filename in any directory
and targetname in page.get("targets",[])
and page.get("html","") ):
return page
for page in config["pages"]:
if "html" not in page:
continue
elif page["html"] != fname:
continue
if targetname in page["targets"]:
return page
else:
return False
def find_file_in_any_target(fname, config):
if fname[-3:] == ".md":
#print("finding in any target by md")
# look by markdown file first
for page in config["pages"]:
if "md" not in page:
continue
elif ("/" in fname and page["md"] == fname # try to match md file by exact path
and page.get("targets",[]) # page must appear in some target
and page.get("html","") ): # and page must have an html filename
return page
elif ( page["md"].split("/")[-1] == fname # match md filename in any folder
and page.get("targets",[])
and page.get("html","") ):
return page
# look by HTML file if it didn't end in .md or if we didn't find it yet
for page in config["pages"]:
if "html" not in page:
continue
elif page["html"] == fname and page["targets"]:
#page has to have "some" target(s) for it to be worthwhile
return page
else:
return False
def lookup_display_name(targetname, config):
for t in config["targets"]:
if "name" in t and t["name"] == targetname:
display_name = "%s %s %s %s %s" % (
t.get("display_name", ""),
t.get("product", ""),
t.get("version", ""),
t.get("guide", ""),
t.get("subtitle", "")
)
if display_name.strip():
return display_name
else:
warning("Target has no display_name/product/version/guide: %s" % targetname)
return targetname
else:
warning("Target not found: %s" % targetname)
return targetname
def filter_soup(soup, target={"name":""}, page=None, config={"pages":[]}):
"""Look for cross-references and replace them with not-hyperlinks if they
don't exist in the current target."""
xrefs = soup.find_all(href=xref_regex)
#print("Crossreferences:", xrefs)
#print("Target pages:", target["pages"])
for xref in xrefs:
m = xref_regex.match(xref.attrs["href"])
xref_file = m.group("xref_file")
xref_frag = m.group("xref_frag") or ""
xref_page = find_file_in_target(xref_file, target["name"], config)
if xref_page == False:
# Cross-referenced page isn't part of this target
xref_page = find_file_in_any_target(xref_file, config)
if not xref_page:
raise KeyError(("xref to missing file: '%s'. Maybe it's not in the Dactyl config file?")%xref_file)
xref_target_shortname = xref_page["targets"][0]
ref_target = lookup_display_name(xref_target_shortname, config)
link_label = " ".join([s for s in xref.stripped_strings])
# If a link label wasn't provided, generate one from the page name
if not link_label.strip():
link_label = xref_page["name"]
link_label = link_label.strip()
# "Link Label" in _Target Display Name_
span = soup.new_tag("span")
span.attrs["class"] = "dactyl_xref"
span.string = '"%s" in the ' % link_label
em = soup.new_tag("em")
em.string = ref_target
span.append(em)
xref.replace_with(span)
else:
# The xref is on-target
# First fix the hyperlink. Use the HTML (in case of link-by-md):
xref.attrs["href"] = xref_page["html"]+xref_frag
# If this link's label is only whitespace, fix it
if not [s for s in xref.stripped_strings]:
#print("replacing label for xref", xref)
#print("stripped_strings was", [s for s in xref.stripped_strings])
xref.string = xref_page["name"]

View File

@@ -1,5 +0,0 @@
Jinja2==2.8
Markdown==2.6.2
watchdog==0.8.3
requests==2.8.1
beautifulsoup4==4.4.1