From 9609ccdff310606f7ba0adb1ada6d72fc2e4e937 Mon Sep 17 00:00:00 2001
From: mDuo13 <mduo13@gmail.com>
Date: Mon, 6 Feb 2017 17:42:06 -0800
Subject: [PATCH] Dactyl - xrefs filter; pass config to filters; fix pages
 inheriting pages array

---
 tool/dactyl_build.py                  |  10 +-
 tool/filter_add_version.py            |   2 +-
 tool/filter_badges.py                 |   2 +-
 tool/filter_buttonize.py              |   2 +-
 tool/filter_callouts.py               |   2 +-
 tool/filter_markdown_in_divs.py       |   2 +-
 tool/filter_multicode_tabs.py         |   4 +-
 tool/filter_remove_doctoc.py          |   2 +-
 tool/filter_standardize_header_ids.py |   2 +-
 tool/filter_xrefs.py                  | 142 ++++++++++++++++++++++++++
 10 files changed, 158 insertions(+), 12 deletions(-)
 create mode 100644 tool/filter_xrefs.py

diff --git a/tool/dactyl_build.py b/tool/dactyl_build.py
index e5b2be33a7..ef1aaccc9a 100755
--- a/tool/dactyl_build.py
+++ b/tool/dactyl_build.py
@@ -47,6 +47,7 @@ RESERVED_KEYS_TARGET = [
     "display_name",
     "filters",
     "image_subs",
+    "pages",
 ]
 ADHOC_TARGET = "__ADHOC__"
 DEFAULT_PDF_FILE = "__DEFAULT_FILENAME__"
@@ -310,7 +311,8 @@ def parse_markdown(page, target=None, pages=None, bypass_errors=False):
     for filter_name in page_filters:
         if "filter_markdown" in dir(filters[filter_name]):
             logging.info("... applying markdown filter %s" % filter_name)
-            md = filters[filter_name].filter_markdown(md, target=target, page=page)
+            md = filters[filter_name].filter_markdown(md, target=target,
+                            page=page, config=config)
 
     # Actually parse the markdown
     logger.info("... parsing markdown...")
@@ -322,7 +324,8 @@ def parse_markdown(page, target=None, pages=None, bypass_errors=False):
     for filter_name in page_filters:
         if "filter_html" in dir(filters[filter_name]):
             logging.info("... applying HTML filter %s" % filter_name)
-            html = filters[filter_name].filter_html(html, target=target, page=page)
+            html = filters[filter_name].filter_html(html, target=target,
+                            page=page, config=config)
 
     # Some filters would rather operate on a soup than a string.
     # May as well parse once and re-serialize once.
@@ -332,7 +335,8 @@ def parse_markdown(page, target=None, pages=None, bypass_errors=False):
     for filter_name in page_filters:
         if "filter_soup" in dir(filters[filter_name]):
             logging.info("... applying soup filter %s" % filter_name)
-            filters[filter_name].filter_soup(soup, target=target, page=page)
+            filters[filter_name].filter_soup(soup, target=target,
+                            page=page, config=config)
             # ^ the soup filters apply to the same object, passed by reference
 
     # Replace links for any non-default target
diff --git a/tool/filter_add_version.py b/tool/filter_add_version.py
index 5b6776a249..289a182f33 100644
--- a/tool/filter_add_version.py
+++ b/tool/filter_add_version.py
@@ -9,7 +9,7 @@
 import re
 import logging
 
-def filter_markdown(md, target=None, page=None):
+def filter_markdown(md, target=None, page=None, config=None):
     """Finds the version number and adds it to the start of the page."""
     version_regex = r"https://raw.githubusercontent.com/([A-Za-z0-9_.-]+)/([A-Za-z0-9_.-]+)/([A-Za-z0-9_-]+\.[A-Za-z0-9_.-]+)/.+\.md"
 
diff --git a/tool/filter_badges.py b/tool/filter_badges.py
index 434d15bf3c..589da446c2 100644
--- a/tool/filter_badges.py
+++ b/tool/filter_badges.py
@@ -13,7 +13,7 @@ from urllib.parse import quote as urlescape
 
 BADGE_REGEX = re.compile("BADGE_(BRIGHTGREEN|GREEN|YELLOWGREEN|YELLOW|ORANGE|RED|LIGHTGREY|BLUE|[0-9A-Fa-f]{6})")
 
-def filter_soup(soup, target=None, page=None):
+def filter_soup(soup, target=None, page=None, config=None):
     """replace underscores with dashes in h1,h2,etc. for backwards compatibility"""
 
     badge_links = soup.find_all(name="a", title=BADGE_REGEX)
diff --git a/tool/filter_buttonize.py b/tool/filter_buttonize.py
index 2a0d5ad24d..edda8d69a5 100644
--- a/tool/filter_buttonize.py
+++ b/tool/filter_buttonize.py
@@ -8,7 +8,7 @@
 ################################################################################
 import re
 
-def filter_soup(soup, target=None, page=None):
+def filter_soup(soup, target=None, page=None, config=None):
     """make links ending in > render like buttons"""
     buttonlinks = soup.find_all("a", string=re.compile(">$"))
     for link in buttonlinks:
diff --git a/tool/filter_callouts.py b/tool/filter_callouts.py
index 5175e280ef..6fa83e423c 100644
--- a/tool/filter_callouts.py
+++ b/tool/filter_callouts.py
@@ -15,7 +15,7 @@ CALLOUT_CLASS_MAPPING = {
     "tip": "devportal-callout tip",
 }
 
-def filter_soup(soup, target=None, page=None):
+def filter_soup(soup, target=None, page=None, config=None):
     """replace underscores with dashes in h1,h2,etc. for backwards compatibility"""
     callout_intro = re.compile(r"(Note|Warning|Tip|Caution):?$", re.I)
     callouts = soup.find_all(name=["strong","em"], string=callout_intro)
diff --git a/tool/filter_markdown_in_divs.py b/tool/filter_markdown_in_divs.py
index 6618828f52..890b71c2f7 100644
--- a/tool/filter_markdown_in_divs.py
+++ b/tool/filter_markdown_in_divs.py
@@ -9,7 +9,7 @@
 ## compatibility with those.                                                  ##
 ################################################################################
 
-def filter_markdown(md, target=None, page=None):
+def filter_markdown(md, target=None, page=None, config=None):
     """Python markdown requires markdown="1" on HTML block elements
        that contain markdown. AND there's a bug where if you use
        markdown.extensions.extra, it replaces code fences in HTML
diff --git a/tool/filter_multicode_tabs.py b/tool/filter_multicode_tabs.py
index 078e1ee256..5c5ed9819c 100644
--- a/tool/filter_multicode_tabs.py
+++ b/tool/filter_multicode_tabs.py
@@ -9,7 +9,7 @@
 import re
 import logging
 
-def filter_html(html, target=None, page=None):
+def filter_html(html, target=None, page=None, config=None):
     """Turn multicode comments into a div (after markdown inside is parsed)"""
     MC_START_REGEX = re.compile(r"<!--\s*MULTICODE_BLOCK_START\s*-->")
     MC_END_REGEX = re.compile(r"<!--\s*MULTICODE_BLOCK_END\s*-->")
@@ -18,7 +18,7 @@ def filter_html(html, target=None, page=None):
     html = re.sub(MC_END_REGEX, "</div>", html)
     return html
 
-def filter_soup(soup, target=None, page=None):
+def filter_soup(soup, target=None, page=None, config=None):
     """Turn a multicode block into the correct syntax for minitabs"""
     multicodes = soup.find_all(class_="multicode")
     index1 = 0
diff --git a/tool/filter_remove_doctoc.py b/tool/filter_remove_doctoc.py
index 64acf75f9b..25b52cdf75 100644
--- a/tool/filter_remove_doctoc.py
+++ b/tool/filter_remove_doctoc.py
@@ -8,7 +8,7 @@
 ################################################################################
 
 
-def filter_markdown(md, target=None, page=None):
+def filter_markdown(md, target=None, page=None, config=None):
     """Strip out doctoc Table of Contents for RippleAPI"""
     DOCTOC_START = "<!-- START doctoc generated TOC please keep comment here to allow auto update -->"
     DOCTOC_END = "<!-- END doctoc generated TOC please keep comment here to allow auto update -->"
diff --git a/tool/filter_standardize_header_ids.py b/tool/filter_standardize_header_ids.py
index 2748065374..7cb3d8c4fe 100644
--- a/tool/filter_standardize_header_ids.py
+++ b/tool/filter_standardize_header_ids.py
@@ -9,7 +9,7 @@
 ################################################################################
 import re
 
-def filter_soup(soup, target=None, page=None):
+def filter_soup(soup, target=None, page=None, config=None):
     """replace underscores with dashes in h1,h2,etc. for backwards compatibility"""
     headers = soup.find_all(name=re.compile("h[0-9]"), id=True)
     for h in headers:
diff --git a/tool/filter_xrefs.py b/tool/filter_xrefs.py
new file mode 100644
index 0000000000..e7ae1abe72
--- /dev/null
+++ b/tool/filter_xrefs.py
@@ -0,0 +1,142 @@
+################################################################################
+## XRefs: Intelligent Crossreferences filter                                  ##
+## Author: Rome Reginelli                                                     ##
+## Copyright: Ripple Labs, Inc. 2017                                          ##
+##                                                                            ##
+## Looks for syntax matching the following format:                            ##
+##     [optional text](XREF: some-link.html#fragment)                         ##
+## and interprets it as cross-references. If some-link.html is a file in the  ##
+## current target it becomes a normal hyperlink. If the link text is [] (that ##
+## is, blank) it gets replaced with the title of the page.                    ##
+## (Note: we can't look up section titles as that would require parsing the   ##
+## cross-referenced page and could lead to an infinite recursion loop if two  ##
+## pages cross-ferenced each other.)                                          ##
+##   If the file isn't part of the current target but is part of another      ##
+## target, it becomes a non-hyperlink cross reference to the page in the      ##
+## first target that DOES have it. For example:                               ##
+##      "Some Link Title" in _A Target Containing Some Link_                  ##
+################################################################################
+import re
+from logging import warning
+
+# match anything starting with XREF:/xref:, split by the # if there is one
+# dropping any excess whitespace
+xref_regex = re.compile(r"^\s*xref:\s*(?P<xref_file>[^#]+)(?P<xref_frag>#\S+)?\s*?$", re.I)
+
+def find_file_in_target(fname, targetname, config):
+    if fname[-3:] == ".md":
+        # look by markdown file first
+        for page in config["pages"]:
+            if "md" not in page:
+                continue
+            elif ("/" in fname and page["md"] == fname # try to match md file by exact path
+                    and targetname in page.get("targets",[]) # the page appears in this target
+                    and page.get("html","") ): # and finally, the page has an html filename
+                return page
+            elif ( page["md"].split("/")[-1] == fname # match md filename in any directory
+                    and targetname in page.get("targets",[])
+                    and page.get("html","") ):
+                return page
+
+    for page in config["pages"]:
+        if "html" not in page:
+            continue
+        elif page["html"] != fname:
+            continue
+        if targetname in page["targets"]:
+            return page
+    else:
+        return False
+
+def find_file_in_any_target(fname, config):
+    if fname[-3:] == ".md":
+        print("finding in any target by md")
+        # look by markdown file first
+        for page in config["pages"]:
+            if "md" not in page:
+                continue
+            elif ("/" in fname and page["md"] == fname # try to match md file by exact path
+                   and page.get("targets",[]) # page must appear in some target
+                   and page.get("html","") ): # and page must have an html filename
+                return page
+            elif ( page["md"].split("/")[-1] == fname # match md filename in any folder
+                   and page.get("targets",[])
+                   and page.get("html","") ):
+                return page
+
+    # look by HTML file if it didn't end in .md or if we didn't find it yet
+    for page in config["pages"]:
+        if "html" not in page:
+            continue
+        elif page["html"] == fname and page["targets"]:
+            #page has to have "some" target(s) for it to be worthwhile
+            return page
+    else:
+        return False
+
+def lookup_display_name(targetname, config):
+    for t in config["targets"]:
+        if "name" in t and t["name"] == targetname:
+            display_name = "%s %s %s %s %s" % (
+                t.get("display_name", ""),
+                t.get("product", ""),
+                t.get("version", ""),
+                t.get("guide", ""),
+                t.get("subtitle", "")
+            )
+            if display_name.strip():
+                return display_name
+            else:
+                warning("Target has no display_name/product/version/guide: %s" % targetname)
+                return targetname
+    else:
+        warning("Target not found: %s" % targetname)
+        return targetname
+
+def filter_soup(soup, target={"name":""}, page=None, config={"pages":[]}):
+    """Look for cross-references and replace them with not-hyperlinks if they
+       don't exist in the current target."""
+
+    xrefs = soup.find_all(href=xref_regex)
+    #print("Crossreferences:", xrefs)
+    #print("Target pages:", target["pages"])
+
+    for xref in xrefs:
+        m = xref_regex.match(xref.attrs["href"])
+        xref_file = m.group("xref_file")
+        xref_frag = m.group("xref_frag") or ""
+
+        xref_page = find_file_in_target(xref_file, target["name"], config)
+        if xref_page == False:
+            # Cross-referenced page isn't part of this target
+            xref_page = find_file_in_any_target(xref_file, config)
+            if not xref_page:
+                raise KeyError(("xref to missing file: '%s'. Maybe it's not in the Dactyl config file?")%xref_file)
+            xref_target_shortname = xref_page["targets"][0]
+
+            ref_target = lookup_display_name(xref_target_shortname, config)
+
+            link_label = " ".join([s for s in xref.stripped_strings])
+            # If a link label wasn't provided, generate one from the page name
+            if not link_label.strip():
+                link_label = xref_page["name"]
+            link_label = link_label.strip()
+
+            # "Link Label" in _Target Display Name_
+            span = soup.new_tag("span")
+            span.attrs["class"] = "dactyl_xref"
+            span.string = '"%s" in the ' % link_label
+            em = soup.new_tag("em")
+            em.string = ref_target
+            span.append(em)
+            xref.replace_with(span)
+
+        else:
+            # The xref is on-target
+            # First fix the hyperlink. Use the HTML (in case of link-by-md):
+            xref.attrs["href"] = xref_page["html"]+xref_frag
+            # If this link's label is only whitespace, fix it
+            if not [s for s in xref.stripped_strings]:
+                #print("replacing label for xref", xref)
+                #print("stripped_strings was", [s for s in xref.stripped_strings])
+                xref.string = xref_page["name"]