From 9609ccdff310606f7ba0adb1ada6d72fc2e4e937 Mon Sep 17 00:00:00 2001 From: mDuo13 Date: Mon, 6 Feb 2017 17:42:06 -0800 Subject: [PATCH] Dactyl - xrefs filter; pass config to filters; fix pages inheriting pages array --- tool/dactyl_build.py | 10 +- tool/filter_add_version.py | 2 +- tool/filter_badges.py | 2 +- tool/filter_buttonize.py | 2 +- tool/filter_callouts.py | 2 +- tool/filter_markdown_in_divs.py | 2 +- tool/filter_multicode_tabs.py | 4 +- tool/filter_remove_doctoc.py | 2 +- tool/filter_standardize_header_ids.py | 2 +- tool/filter_xrefs.py | 142 ++++++++++++++++++++++++++ 10 files changed, 158 insertions(+), 12 deletions(-) create mode 100644 tool/filter_xrefs.py diff --git a/tool/dactyl_build.py b/tool/dactyl_build.py index e5b2be33a7..ef1aaccc9a 100755 --- a/tool/dactyl_build.py +++ b/tool/dactyl_build.py @@ -47,6 +47,7 @@ RESERVED_KEYS_TARGET = [ "display_name", "filters", "image_subs", + "pages", ] ADHOC_TARGET = "__ADHOC__" DEFAULT_PDF_FILE = "__DEFAULT_FILENAME__" @@ -310,7 +311,8 @@ def parse_markdown(page, target=None, pages=None, bypass_errors=False): for filter_name in page_filters: if "filter_markdown" in dir(filters[filter_name]): logging.info("... applying markdown filter %s" % filter_name) - md = filters[filter_name].filter_markdown(md, target=target, page=page) + md = filters[filter_name].filter_markdown(md, target=target, + page=page, config=config) # Actually parse the markdown logger.info("... parsing markdown...") @@ -322,7 +324,8 @@ def parse_markdown(page, target=None, pages=None, bypass_errors=False): for filter_name in page_filters: if "filter_html" in dir(filters[filter_name]): logging.info("... applying HTML filter %s" % filter_name) - html = filters[filter_name].filter_html(html, target=target, page=page) + html = filters[filter_name].filter_html(html, target=target, + page=page, config=config) # Some filters would rather operate on a soup than a string. # May as well parse once and re-serialize once. @@ -332,7 +335,8 @@ def parse_markdown(page, target=None, pages=None, bypass_errors=False): for filter_name in page_filters: if "filter_soup" in dir(filters[filter_name]): logging.info("... applying soup filter %s" % filter_name) - filters[filter_name].filter_soup(soup, target=target, page=page) + filters[filter_name].filter_soup(soup, target=target, + page=page, config=config) # ^ the soup filters apply to the same object, passed by reference # Replace links for any non-default target diff --git a/tool/filter_add_version.py b/tool/filter_add_version.py index 5b6776a249..289a182f33 100644 --- a/tool/filter_add_version.py +++ b/tool/filter_add_version.py @@ -9,7 +9,7 @@ import re import logging -def filter_markdown(md, target=None, page=None): +def filter_markdown(md, target=None, page=None, config=None): """Finds the version number and adds it to the start of the page.""" version_regex = r"https://raw.githubusercontent.com/([A-Za-z0-9_.-]+)/([A-Za-z0-9_.-]+)/([A-Za-z0-9_-]+\.[A-Za-z0-9_.-]+)/.+\.md" diff --git a/tool/filter_badges.py b/tool/filter_badges.py index 434d15bf3c..589da446c2 100644 --- a/tool/filter_badges.py +++ b/tool/filter_badges.py @@ -13,7 +13,7 @@ from urllib.parse import quote as urlescape BADGE_REGEX = re.compile("BADGE_(BRIGHTGREEN|GREEN|YELLOWGREEN|YELLOW|ORANGE|RED|LIGHTGREY|BLUE|[0-9A-Fa-f]{6})") -def filter_soup(soup, target=None, page=None): +def filter_soup(soup, target=None, page=None, config=None): """replace underscores with dashes in h1,h2,etc. for backwards compatibility""" badge_links = soup.find_all(name="a", title=BADGE_REGEX) diff --git a/tool/filter_buttonize.py b/tool/filter_buttonize.py index 2a0d5ad24d..edda8d69a5 100644 --- a/tool/filter_buttonize.py +++ b/tool/filter_buttonize.py @@ -8,7 +8,7 @@ ################################################################################ import re -def filter_soup(soup, target=None, page=None): +def filter_soup(soup, target=None, page=None, config=None): """make links ending in > render like buttons""" buttonlinks = soup.find_all("a", string=re.compile(">$")) for link in buttonlinks: diff --git a/tool/filter_callouts.py b/tool/filter_callouts.py index 5175e280ef..6fa83e423c 100644 --- a/tool/filter_callouts.py +++ b/tool/filter_callouts.py @@ -15,7 +15,7 @@ CALLOUT_CLASS_MAPPING = { "tip": "devportal-callout tip", } -def filter_soup(soup, target=None, page=None): +def filter_soup(soup, target=None, page=None, config=None): """replace underscores with dashes in h1,h2,etc. for backwards compatibility""" callout_intro = re.compile(r"(Note|Warning|Tip|Caution):?$", re.I) callouts = soup.find_all(name=["strong","em"], string=callout_intro) diff --git a/tool/filter_markdown_in_divs.py b/tool/filter_markdown_in_divs.py index 6618828f52..890b71c2f7 100644 --- a/tool/filter_markdown_in_divs.py +++ b/tool/filter_markdown_in_divs.py @@ -9,7 +9,7 @@ ## compatibility with those. ## ################################################################################ -def filter_markdown(md, target=None, page=None): +def filter_markdown(md, target=None, page=None, config=None): """Python markdown requires markdown="1" on HTML block elements that contain markdown. AND there's a bug where if you use markdown.extensions.extra, it replaces code fences in HTML diff --git a/tool/filter_multicode_tabs.py b/tool/filter_multicode_tabs.py index 078e1ee256..5c5ed9819c 100644 --- a/tool/filter_multicode_tabs.py +++ b/tool/filter_multicode_tabs.py @@ -9,7 +9,7 @@ import re import logging -def filter_html(html, target=None, page=None): +def filter_html(html, target=None, page=None, config=None): """Turn multicode comments into a div (after markdown inside is parsed)""" MC_START_REGEX = re.compile(r"") MC_END_REGEX = re.compile(r"") @@ -18,7 +18,7 @@ def filter_html(html, target=None, page=None): html = re.sub(MC_END_REGEX, "", html) return html -def filter_soup(soup, target=None, page=None): +def filter_soup(soup, target=None, page=None, config=None): """Turn a multicode block into the correct syntax for minitabs""" multicodes = soup.find_all(class_="multicode") index1 = 0 diff --git a/tool/filter_remove_doctoc.py b/tool/filter_remove_doctoc.py index 64acf75f9b..25b52cdf75 100644 --- a/tool/filter_remove_doctoc.py +++ b/tool/filter_remove_doctoc.py @@ -8,7 +8,7 @@ ################################################################################ -def filter_markdown(md, target=None, page=None): +def filter_markdown(md, target=None, page=None, config=None): """Strip out doctoc Table of Contents for RippleAPI""" DOCTOC_START = "" DOCTOC_END = "" diff --git a/tool/filter_standardize_header_ids.py b/tool/filter_standardize_header_ids.py index 2748065374..7cb3d8c4fe 100644 --- a/tool/filter_standardize_header_ids.py +++ b/tool/filter_standardize_header_ids.py @@ -9,7 +9,7 @@ ################################################################################ import re -def filter_soup(soup, target=None, page=None): +def filter_soup(soup, target=None, page=None, config=None): """replace underscores with dashes in h1,h2,etc. for backwards compatibility""" headers = soup.find_all(name=re.compile("h[0-9]"), id=True) for h in headers: diff --git a/tool/filter_xrefs.py b/tool/filter_xrefs.py new file mode 100644 index 0000000000..e7ae1abe72 --- /dev/null +++ b/tool/filter_xrefs.py @@ -0,0 +1,142 @@ +################################################################################ +## XRefs: Intelligent Crossreferences filter ## +## Author: Rome Reginelli ## +## Copyright: Ripple Labs, Inc. 2017 ## +## ## +## Looks for syntax matching the following format: ## +## [optional text](XREF: some-link.html#fragment) ## +## and interprets it as cross-references. If some-link.html is a file in the ## +## current target it becomes a normal hyperlink. If the link text is [] (that ## +## is, blank) it gets replaced with the title of the page. ## +## (Note: we can't look up section titles as that would require parsing the ## +## cross-referenced page and could lead to an infinite recursion loop if two ## +## pages cross-ferenced each other.) ## +## If the file isn't part of the current target but is part of another ## +## target, it becomes a non-hyperlink cross reference to the page in the ## +## first target that DOES have it. For example: ## +## "Some Link Title" in _A Target Containing Some Link_ ## +################################################################################ +import re +from logging import warning + +# match anything starting with XREF:/xref:, split by the # if there is one +# dropping any excess whitespace +xref_regex = re.compile(r"^\s*xref:\s*(?P[^#]+)(?P#\S+)?\s*?$", re.I) + +def find_file_in_target(fname, targetname, config): + if fname[-3:] == ".md": + # look by markdown file first + for page in config["pages"]: + if "md" not in page: + continue + elif ("/" in fname and page["md"] == fname # try to match md file by exact path + and targetname in page.get("targets",[]) # the page appears in this target + and page.get("html","") ): # and finally, the page has an html filename + return page + elif ( page["md"].split("/")[-1] == fname # match md filename in any directory + and targetname in page.get("targets",[]) + and page.get("html","") ): + return page + + for page in config["pages"]: + if "html" not in page: + continue + elif page["html"] != fname: + continue + if targetname in page["targets"]: + return page + else: + return False + +def find_file_in_any_target(fname, config): + if fname[-3:] == ".md": + print("finding in any target by md") + # look by markdown file first + for page in config["pages"]: + if "md" not in page: + continue + elif ("/" in fname and page["md"] == fname # try to match md file by exact path + and page.get("targets",[]) # page must appear in some target + and page.get("html","") ): # and page must have an html filename + return page + elif ( page["md"].split("/")[-1] == fname # match md filename in any folder + and page.get("targets",[]) + and page.get("html","") ): + return page + + # look by HTML file if it didn't end in .md or if we didn't find it yet + for page in config["pages"]: + if "html" not in page: + continue + elif page["html"] == fname and page["targets"]: + #page has to have "some" target(s) for it to be worthwhile + return page + else: + return False + +def lookup_display_name(targetname, config): + for t in config["targets"]: + if "name" in t and t["name"] == targetname: + display_name = "%s %s %s %s %s" % ( + t.get("display_name", ""), + t.get("product", ""), + t.get("version", ""), + t.get("guide", ""), + t.get("subtitle", "") + ) + if display_name.strip(): + return display_name + else: + warning("Target has no display_name/product/version/guide: %s" % targetname) + return targetname + else: + warning("Target not found: %s" % targetname) + return targetname + +def filter_soup(soup, target={"name":""}, page=None, config={"pages":[]}): + """Look for cross-references and replace them with not-hyperlinks if they + don't exist in the current target.""" + + xrefs = soup.find_all(href=xref_regex) + #print("Crossreferences:", xrefs) + #print("Target pages:", target["pages"]) + + for xref in xrefs: + m = xref_regex.match(xref.attrs["href"]) + xref_file = m.group("xref_file") + xref_frag = m.group("xref_frag") or "" + + xref_page = find_file_in_target(xref_file, target["name"], config) + if xref_page == False: + # Cross-referenced page isn't part of this target + xref_page = find_file_in_any_target(xref_file, config) + if not xref_page: + raise KeyError(("xref to missing file: '%s'. Maybe it's not in the Dactyl config file?")%xref_file) + xref_target_shortname = xref_page["targets"][0] + + ref_target = lookup_display_name(xref_target_shortname, config) + + link_label = " ".join([s for s in xref.stripped_strings]) + # If a link label wasn't provided, generate one from the page name + if not link_label.strip(): + link_label = xref_page["name"] + link_label = link_label.strip() + + # "Link Label" in _Target Display Name_ + span = soup.new_tag("span") + span.attrs["class"] = "dactyl_xref" + span.string = '"%s" in the ' % link_label + em = soup.new_tag("em") + em.string = ref_target + span.append(em) + xref.replace_with(span) + + else: + # The xref is on-target + # First fix the hyperlink. Use the HTML (in case of link-by-md): + xref.attrs["href"] = xref_page["html"]+xref_frag + # If this link's label is only whitespace, fix it + if not [s for s in xref.stripped_strings]: + #print("replacing label for xref", xref) + #print("stripped_strings was", [s for s in xref.stripped_strings]) + xref.string = xref_page["name"]