Dactyl - xrefs filter; pass config to filters; fix pages inheriting pages array

2025-11-28 23:55:49 +00:00 · 2017-02-06 17:42:06 -08:00
parent 1102ff13df
commit 9609ccdff3
10 changed files with 158 additions and 12 deletions
--- a/tool/dactyl_build.py
+++ b/tool/dactyl_build.py
@@ -47,6 +47,7 @@ RESERVED_KEYS_TARGET = [
    "display_name",
    "filters",
    "image_subs",
+    "pages",
 ]
 ADHOC_TARGET = "__ADHOC__"
 DEFAULT_PDF_FILE = "__DEFAULT_FILENAME__"
@@ -310,7 +311,8 @@ def parse_markdown(page, target=None, pages=None, bypass_errors=False):
    for filter_name in page_filters:
        if "filter_markdown" in dir(filters[filter_name]):
            logging.info("... applying markdown filter %s" % filter_name)
-            md = filters[filter_name].filter_markdown(md, target=target, page=page)
+            md = filters[filter_name].filter_markdown(md, target=target,
+                            page=page, config=config)

    # Actually parse the markdown
    logger.info("... parsing markdown...")
@@ -322,7 +324,8 @@ def parse_markdown(page, target=None, pages=None, bypass_errors=False):
    for filter_name in page_filters:
        if "filter_html" in dir(filters[filter_name]):
            logging.info("... applying HTML filter %s" % filter_name)
-            html = filters[filter_name].filter_html(html, target=target, page=page)
+            html = filters[filter_name].filter_html(html, target=target,
+                            page=page, config=config)

    # Some filters would rather operate on a soup than a string.
    # May as well parse once and re-serialize once.
@@ -332,7 +335,8 @@ def parse_markdown(page, target=None, pages=None, bypass_errors=False):
    for filter_name in page_filters:
        if "filter_soup" in dir(filters[filter_name]):
            logging.info("... applying soup filter %s" % filter_name)
-            filters[filter_name].filter_soup(soup, target=target, page=page)
+            filters[filter_name].filter_soup(soup, target=target,
+                            page=page, config=config)
            # ^ the soup filters apply to the same object, passed by reference

    # Replace links for any non-default target
--- a/tool/filter_add_version.py
+++ b/tool/filter_add_version.py
@@ -9,7 +9,7 @@
 import re
 import logging

-def filter_markdown(md, target=None, page=None):
+def filter_markdown(md, target=None, page=None, config=None):
    """Finds the version number and adds it to the start of the page."""
    version_regex = r"https://raw.githubusercontent.com/([A-Za-z0-9_.-]+)/([A-Za-z0-9_.-]+)/([A-Za-z0-9_-]+\.[A-Za-z0-9_.-]+)/.+\.md"

--- a/tool/filter_badges.py
+++ b/tool/filter_badges.py
@@ -13,7 +13,7 @@ from urllib.parse import quote as urlescape

 BADGE_REGEX = re.compile("BADGE_(BRIGHTGREEN|GREEN|YELLOWGREEN|YELLOW|ORANGE|RED|LIGHTGREY|BLUE|[0-9A-Fa-f]{6})")

-def filter_soup(soup, target=None, page=None):
+def filter_soup(soup, target=None, page=None, config=None):
    """replace underscores with dashes in h1,h2,etc. for backwards compatibility"""

    badge_links = soup.find_all(name="a", title=BADGE_REGEX)
--- a/tool/filter_buttonize.py
+++ b/tool/filter_buttonize.py
@@ -8,7 +8,7 @@
 ################################################################################
 import re

-def filter_soup(soup, target=None, page=None):
+def filter_soup(soup, target=None, page=None, config=None):
    """make links ending in > render like buttons"""
    buttonlinks = soup.find_all("a", string=re.compile(">$"))
    for link in buttonlinks:
--- a/tool/filter_callouts.py
+++ b/tool/filter_callouts.py
@@ -15,7 +15,7 @@ CALLOUT_CLASS_MAPPING = {
    "tip": "devportal-callout tip",
 }

-def filter_soup(soup, target=None, page=None):
+def filter_soup(soup, target=None, page=None, config=None):
    """replace underscores with dashes in h1,h2,etc. for backwards compatibility"""
    callout_intro = re.compile(r"(Note|Warning|Tip|Caution):?$", re.I)
    callouts = soup.find_all(name=["strong","em"], string=callout_intro)
--- a/tool/filter_markdown_in_divs.py
+++ b/tool/filter_markdown_in_divs.py
@@ -9,7 +9,7 @@
 ## compatibility with those.                                                  ##
 ################################################################################

-def filter_markdown(md, target=None, page=None):
+def filter_markdown(md, target=None, page=None, config=None):
    """Python markdown requires markdown="1" on HTML block elements
       that contain markdown. AND there's a bug where if you use
       markdown.extensions.extra, it replaces code fences in HTML
--- a/tool/filter_multicode_tabs.py
+++ b/tool/filter_multicode_tabs.py
@@ -9,7 +9,7 @@
 import re
 import logging

-def filter_html(html, target=None, page=None):
+def filter_html(html, target=None, page=None, config=None):
    """Turn multicode comments into a div (after markdown inside is parsed)"""
    MC_START_REGEX = re.compile(r"<!--\s*MULTICODE_BLOCK_START\s*-->")
    MC_END_REGEX = re.compile(r"<!--\s*MULTICODE_BLOCK_END\s*-->")
@@ -18,7 +18,7 @@ def filter_html(html, target=None, page=None):
    html = re.sub(MC_END_REGEX, "</div>", html)
    return html

-def filter_soup(soup, target=None, page=None):
+def filter_soup(soup, target=None, page=None, config=None):
    """Turn a multicode block into the correct syntax for minitabs"""
    multicodes = soup.find_all(class_="multicode")
    index1 = 0
--- a/tool/filter_remove_doctoc.py
+++ b/tool/filter_remove_doctoc.py
@@ -8,7 +8,7 @@
 ################################################################################


-def filter_markdown(md, target=None, page=None):
+def filter_markdown(md, target=None, page=None, config=None):
    """Strip out doctoc Table of Contents for RippleAPI"""
    DOCTOC_START = "<!-- START doctoc generated TOC please keep comment here to allow auto update -->"
    DOCTOC_END = "<!-- END doctoc generated TOC please keep comment here to allow auto update -->"
--- a/tool/filter_standardize_header_ids.py
+++ b/tool/filter_standardize_header_ids.py
@@ -9,7 +9,7 @@
 ################################################################################
 import re

-def filter_soup(soup, target=None, page=None):
+def filter_soup(soup, target=None, page=None, config=None):
    """replace underscores with dashes in h1,h2,etc. for backwards compatibility"""
    headers = soup.find_all(name=re.compile("h[0-9]"), id=True)
    for h in headers:
--- a/tool/filter_xrefs.py
+++ b/tool/filter_xrefs.py
@@ -0,0 +1,142 @@
+################################################################################
+## XRefs: Intelligent Crossreferences filter                                  ##
+## Author: Rome Reginelli                                                     ##
+## Copyright: Ripple Labs, Inc. 2017                                          ##
+##                                                                            ##
+## Looks for syntax matching the following format:                            ##
+##     [optional text](XREF: some-link.html#fragment)                         ##
+## and interprets it as cross-references. If some-link.html is a file in the  ##
+## current target it becomes a normal hyperlink. If the link text is [] (that ##
+## is, blank) it gets replaced with the title of the page.                    ##
+## (Note: we can't look up section titles as that would require parsing the   ##
+## cross-referenced page and could lead to an infinite recursion loop if two  ##
+## pages cross-ferenced each other.)                                          ##
+##   If the file isn't part of the current target but is part of another      ##
+## target, it becomes a non-hyperlink cross reference to the page in the      ##
+## first target that DOES have it. For example:                               ##
+##      "Some Link Title" in _A Target Containing Some Link_                  ##
+################################################################################
+import re
+from logging import warning
+
+# match anything starting with XREF:/xref:, split by the # if there is one
+# dropping any excess whitespace
+xref_regex = re.compile(r"^\s*xref:\s*(?P<xref_file>[^#]+)(?P<xref_frag>#\S+)?\s*?$", re.I)
+
+def find_file_in_target(fname, targetname, config):
+    if fname[-3:] == ".md":
+        # look by markdown file first
+        for page in config["pages"]:
+            if "md" not in page:
+                continue
+            elif ("/" in fname and page["md"] == fname # try to match md file by exact path
+                    and targetname in page.get("targets",[]) # the page appears in this target
+                    and page.get("html","") ): # and finally, the page has an html filename
+                return page
+            elif ( page["md"].split("/")[-1] == fname # match md filename in any directory
+                    and targetname in page.get("targets",[])
+                    and page.get("html","") ):
+                return page
+
+    for page in config["pages"]:
+        if "html" not in page:
+            continue
+        elif page["html"] != fname:
+            continue
+        if targetname in page["targets"]:
+            return page
+    else:
+        return False
+
+def find_file_in_any_target(fname, config):
+    if fname[-3:] == ".md":
+        print("finding in any target by md")
+        # look by markdown file first
+        for page in config["pages"]:
+            if "md" not in page:
+                continue
+            elif ("/" in fname and page["md"] == fname # try to match md file by exact path
+                   and page.get("targets",[]) # page must appear in some target
+                   and page.get("html","") ): # and page must have an html filename
+                return page
+            elif ( page["md"].split("/")[-1] == fname # match md filename in any folder
+                   and page.get("targets",[])
+                   and page.get("html","") ):
+                return page
+
+    # look by HTML file if it didn't end in .md or if we didn't find it yet
+    for page in config["pages"]:
+        if "html" not in page:
+            continue
+        elif page["html"] == fname and page["targets"]:
+            #page has to have "some" target(s) for it to be worthwhile
+            return page
+    else:
+        return False
+
+def lookup_display_name(targetname, config):
+    for t in config["targets"]:
+        if "name" in t and t["name"] == targetname:
+            display_name = "%s %s %s %s %s" % (
+                t.get("display_name", ""),
+                t.get("product", ""),
+                t.get("version", ""),
+                t.get("guide", ""),
+                t.get("subtitle", "")
+            )
+            if display_name.strip():
+                return display_name
+            else:
+                warning("Target has no display_name/product/version/guide: %s" % targetname)
+                return targetname
+    else:
+        warning("Target not found: %s" % targetname)
+        return targetname
+
+def filter_soup(soup, target={"name":""}, page=None, config={"pages":[]}):
+    """Look for cross-references and replace them with not-hyperlinks if they
+       don't exist in the current target."""
+
+    xrefs = soup.find_all(href=xref_regex)
+    #print("Crossreferences:", xrefs)
+    #print("Target pages:", target["pages"])
+
+    for xref in xrefs:
+        m = xref_regex.match(xref.attrs["href"])
+        xref_file = m.group("xref_file")
+        xref_frag = m.group("xref_frag") or ""
+
+        xref_page = find_file_in_target(xref_file, target["name"], config)
+        if xref_page == False:
+            # Cross-referenced page isn't part of this target
+            xref_page = find_file_in_any_target(xref_file, config)
+            if not xref_page:
+                raise KeyError(("xref to missing file: '%s'. Maybe it's not in the Dactyl config file?")%xref_file)
+            xref_target_shortname = xref_page["targets"][0]
+
+            ref_target = lookup_display_name(xref_target_shortname, config)
+
+            link_label = " ".join([s for s in xref.stripped_strings])
+            # If a link label wasn't provided, generate one from the page name
+            if not link_label.strip():
+                link_label = xref_page["name"]
+            link_label = link_label.strip()
+
+            # "Link Label" in _Target Display Name_
+            span = soup.new_tag("span")
+            span.attrs["class"] = "dactyl_xref"
+            span.string = '"%s" in the ' % link_label
+            em = soup.new_tag("em")
+            em.string = ref_target
+            span.append(em)
+            xref.replace_with(span)
+
+        else:
+            # The xref is on-target
+            # First fix the hyperlink. Use the HTML (in case of link-by-md):
+            xref.attrs["href"] = xref_page["html"]+xref_frag
+            # If this link's label is only whitespace, fix it
+            if not [s for s in xref.stripped_strings]:
+                #print("replacing label for xref", xref)
+                #print("stripped_strings was", [s for s in xref.stripped_strings])
+                xref.string = xref_page["name"]