Markdown syntax migration script

Script replacing include_code with code-snippet Migration script: partials, some variables Add variables to conversion script draft repo-link component Complete repo-link component Migration script: handle github links Draft include_svg→inline-svg (non-functional) Currently doesn't work due to image path issues. Also, captions and custom classes (for inlining) not implemented yet. Conversion script: refactor & add code-page-name Custom code-page-name component works around Markdoc limitation where vars can't be used in `inline code` sections. Migrate script: Handle more code includes correctly Migration script: tabs and tabbed code samples Child pages macro & conversion script Adapted from 70cffa67ed Migration script: update with some partial fixes Migration script: callouts→admonitions Fix auto-generation of index pages Migration script: fix SVG migration Migration scripting: fix code block prefixes & indentation - Use the Redocly 0.66 feature for code block prefixes - Update the script for converting indented code blocks to fences with Roman's latest fixes (now uses 4 spaces per level, for consistency)
2025-11-04 20:05:50 +00:00 · 2023-10-31 16:05:38 -07:00
parent cb9f332d78
commit 817ea3732f
8 changed files with 17422 additions and 1 deletions
--- a/content/.env
+++ b/content/.env
@@ -0,0 +1,4 @@
+PUBLIC_GITHUB_FORK=https://github.com/XRPLF/xrpl-dev-portal
+PUBLIC_GITHUB_BRANCH=master
+PUBLIC_OWNER_RESERVE=2 XRP
+PUBLIC_BASE_RESERVE=10 XRP
--- a/content/@theme/markdoc/components.tsx
+++ b/content/@theme/markdoc/components.tsx
@@ -0,0 +1,44 @@
+import * as React from 'react';
+// @ts-ignore
+import dynamicReact from '@markdoc/markdoc/dist/react';
+import { usePageSharedData } from '@portal/hooks';
+import { Link } from '@portal/Link';
+
+export function IndexPageItems() {
+    const data = usePageSharedData('index-page-items') as any[];
+    return (
+        <div className="children-display">
+            <ul>
+              {data.map((item: any) => (
+                <li className="level-1">
+                  <Link to={item.slug}>{item.title}</Link>
+                  <p className='class="blurb child-blurb'>{item.blurb}</p>
+                </li>
+              ))}
+            </ul>
+        </div>
+    );
+}
+
+export function RepoLink(props: {
+        children: React.ReactNode;
+        path: string;
+        github_fork: string;
+        github_branch: string
+    }) {
+    const treeblob = props.path.indexOf(".") >= 0 ? "blob/" : "tree/"
+    const sep = props.github_fork[-1] == "/" ? "" : "/"
+    const href = props.github_fork+sep+treeblob+props.github_branch+"/"+props.path
+
+    return (
+        <Link to={href}>{dynamicReact(props.children, React, {})}</Link>
+    )
+}
+
+export function CodePageName(props: {
+    name: string;
+}) {
+    return (
+        <code>{props.name}</code>
+    )
+}
--- a/content/@theme/markdoc/schema.ts
+++ b/content/@theme/markdoc/schema.ts
@@ -0,0 +1,50 @@
+import { Schema, Tag } from '@markdoc/markdoc';
+
+export const indexPageList: Schema & { tagName: string } = {
+  tagName: 'child-pages',
+  render: 'IndexPageItems',
+  selfClosing: true,
+};
+
+export const repoLink: Schema & { tagName: string } = {
+    tagName: 'repo-link',
+    attributes: {
+      path: {
+        type: 'String',
+        required: true,
+      },
+      github_fork: {
+        type: 'String',
+        required: false,
+      },
+      github_branch: {
+        type: 'String',
+        required: false,
+      },
+    },
+    transform(node, config) {
+        const attributes = node.transformAttributes(config);
+        attributes["github_fork"] = attributes["github_fork"] || config.variables.env.PUBLIC_GITHUB_FORK;
+        attributes["github_branch"] = attributes["github_branch"] || config.variables.env.PUBLIC_GITHUB_BRANCH;
+        const children = node.transformChildren(config);
+        return new Tag(this.render, attributes, children);
+    },
+    render: 'RepoLink',
+};
+
+export const codePageName: Schema & { tagName: string } = {
+  tagName: 'code-page-name',
+  attributes: {
+    name: {
+      type: 'String',
+      required: false,
+    },
+  },
+  transform(node, config) {
+    const attributes = node.transformAttributes(config);
+    attributes["name"] = config.variables.frontmatter.seo.title;
+    return new Tag(this.render, attributes);
+  },
+  render: 'CodePageName',
+  selfClosing: true,
+};
--- a/content/@theme/plugins/index-pages.js
+++ b/content/@theme/plugins/index-pages.js
@@ -0,0 +1,63 @@
+// @ts-check
+import { readSharedData } from '@redocly/portal/dist/server/utils/shared-data.js'; // TODO: export function from root package
+const INDEX_PAGE_INFO_DATA_KEY = 'index-page-items';
+
+export function indexPages() {
+  /** @type {import("@redocly/portal/dist/server/plugins/types").PluginInstance } */
+  const instance = {
+    // hook that gets executed after all routes were created
+    async afterRoutesCreated(contentProvider, actions) {
+      // get all the routes that are ind pages
+      const indexRoutes = actions.getAllRoutes().filter(route => route.metadata?.indexPage);
+
+      for (const route of indexRoutes) {
+        // @ts-ignore this uses some internals, we will expose them in nicer way in the future releases
+        const sidebarId = actions.routesSharedData.get(route.slug)?.['sidebar']; // TODO: implement a helper function for this
+        /** @type {any} */
+        const sidebar = await readSharedData(sidebarId, actions.outdir);
+
+        if (!sidebar) {
+          console.log('[warn] Index route used with no sidebar', route.fsPath);
+          continue;
+        }
+
+        const item = findItemDeep(sidebar.items, route.fsPath);
+        const childrenPaths = (item.items || []).map(item => item.fsPath).filter(Boolean);
+
+        const childRoutes = childrenPaths.map(fsPath => actions.getRouteByFsPath(fsPath));
+        const childRoutesData = await Promise.all(
+          childRoutes.map(async route => {
+            const { parsed } = contentProvider.loadContent(route.fsPath, 'frontmatter');
+            return {
+              ...parsed?.data,
+              slug: route.slug,
+              title: await route.getNavText(),
+            };
+          })
+        );
+
+        const sharedDataId = await actions.createSharedData(
+          route.slug + '_' + INDEX_PAGE_INFO_DATA_KEY,
+          childRoutesData
+        );
+        actions.addRouteSharedData(route.slug, INDEX_PAGE_INFO_DATA_KEY, sharedDataId);
+      }
+    },
+  };
+  return instance;
+}
+
+function findItemDeep(items, fsPath) {
+  for (const item of items) {
+    if (item.fsPath === fsPath) {
+      return item;
+    }
+
+    if (item.items) {
+      const found = findItemDeep(item.items, fsPath);
+      if (found) {
+        return found;
+      }
+    }
+  }
+}
--- a/content/redocly.yaml
+++ b/content/redocly.yaml
@@ -34,6 +34,8 @@ theme:
    # hide:
    items:
      $ref: top-nav.yaml
+  markdown:
+    partialsFolders: ["_snippets", "_code-samples", "_api-examples"]
  # footer:
  #   # hide:
  #   copyrightText: © 2022-2023, Redocly Inc. All right reserved.
--- a/package.json
+++ b/package.json
@@ -10,7 +10,7 @@
  "keywords": [],
  "license": "MIT",
  "dependencies": {
-    "@redocly/portal": "0.57.0",
+    "@redocly/portal": "^0.65.3",
    "lottie-react": "^2.4.0",
    "moment": "^2.29.4"
  },
--- a/tool/convert-code-blocks.cjs
+++ b/tool/convert-code-blocks.cjs
--- a/tool/md_dactyl_to_redocly.py
+++ b/tool/md_dactyl_to_redocly.py
@@ -0,0 +1,277 @@
+#!/usr/bin/env python
+###############################################################################
+## Markdown files: Dactyl (Jinja) to Redocly (Markdoc) syntax converter
+## Author: mDuo13
+## License: MIT
+##
+## Searches md files in the content dir for specific syntax and converts it to
+## a format that should work under Redocly.
+##
+## 1) includes → partials
+## 2) variables:
+##    a) github_fork / github_branch variables → repo-link component
+##    b) `{{currentpage.name}}` → code-page-name component
+##    c) {{currentpage.name}} → frontmatter.seo.title variable
+##    d) owner / account reserve variables → env variables
+## 3) include_code() macro → code-snippet component
+## 4) category template → child pages component
+## 5) include_svg() → inline-svg component
+## 6) callouts → admonitions
+## 7) code tabs → tabs component
+###############################################################################
+
+import os
+import os.path
+import re
+import ruamel.yaml
+yaml = ruamel.yaml.YAML(typ="safe")
+
+
+def should_include(fname):
+    """
+    Return True if the given file/folder name should be checked.
+    Otherwise return False.
+    """
+    if fname == "node_modules":
+        return False
+    if fname == "_snippets":
+        return True
+    if fname[:1] == "_":
+        return False
+    if ".git" in fname:
+        return False
+    return True
+
+
+def list_mds(content_dir):
+    all_mds = []
+    for dirpath, dirnames, filenames in os.walk(content_dir, topdown=True):
+        dirnames[:] = [d for d in dirnames if should_include(d)]
+        filenames[:] = [f for f in filenames if should_include(f)]
+        for filename in filenames:
+            if filename[-3:] == ".md":
+                #all_mds.append(os.path.relpath(os.path.join(dirpath,filename), content_dir))
+                all_mds.append(os.path.join(dirpath,filename))
+    return all_mds
+
+COMMON_LINKS_INCLUDES = [
+    "<!--{# common link defs #}-->",
+    "<!--_ -->",
+    "<!--{#_ #}-->",
+    "{% include '_snippets/rippled-api-links.md' %}",
+    "{% include '_snippets/tx-type-links.md' %}",
+    "{% include '_snippets/rippled_versions.md' %}",
+]
+def rm_common_links_includes(ftext):
+    """
+    Remove (with no replacement) the includes that define common links at the
+    end of a file. Trim out extra whitespace (except last \n)
+    """
+    for s in COMMON_LINKS_INCLUDES:
+        ftext = ftext.replace(s, "")
+    ftext = ftext.strip()+"\n"
+    return ftext
+
+class RegexReplacer():
+    """
+    Prototype class for replacing instances of a pattern throughout text
+    """
+
+    regex: re.compile('')
+
+    @staticmethod
+    def replace(m: re.Match):
+        """
+        To be overridden. Text that should replace an instance of the regex
+        """
+        return ""
+
+    def replace_all(self, ftext):
+        ftext2 = ftext
+        for m in re.finditer(self.regex, ftext):
+            raw_string = m.group(0)
+            repl_string = self.replace(m)
+            ftext2 = ftext2.replace(raw_string, repl_string)
+        return ftext2
+
+regex_todos = [] # List of RegexReplacer child instances to run, in order, on each page
+
+class TabsToSpaces(RegexReplacer):
+    regex = re.compile(r'\t')
+    replace = staticmethod(lambda m: "    ")
+regex_todos.append(TabsToSpaces())
+
+class IncludeCodeReplacer(RegexReplacer):
+    regex = re.compile(r'\{\{ *include_code\( *"(?P<fname>[^"]+)"[,\s]*(start_with="(?P<start_with>[^"]+)"[,\s]*|end_before="(?P<end_before>[^"]+)"[,\s]*|language="(?P<language>[^"]+)"[,\s]*)* *\) *\}\}')
+    @staticmethod
+    def replace(m: re.Match):
+        """
+        Convert instances of the include_code() filter to instances
+        of the code-snippet Redocly component.
+        """
+        repl_string = '{% code-snippet file="/'+m.group("fname")+'" '
+        if m.group("start_with"):
+            repl_string += 'from="'+m.group("start_with")+'" '
+        if m.group("end_before"):
+            repl_string += 'before="'+m.group("end_before")+'" '
+        if m.group("language"):
+            repl_string += 'language="'+m.group("language")+'" '
+        repl_string += '/%}'
+        return repl_string
+regex_todos.append(IncludeCodeReplacer())
+
+class IncludeSvgReplacer(RegexReplacer):
+    regex = re.compile(r'\{\{ *include_svg\( *"(?P<fname>[^"]+)"[, ]*("(?P<caption>[^"]*)")?[, ]*(classes="(?P<classes>[^"]*)")?\) *}}')
+    @staticmethod
+    def replace(m):
+        return '[{% inline-svg file="/' + m.group("fname") + '" /%}](/'+m.group("fname")+' "'+m.group("caption")+'")'
+regex_todos.append(IncludeSvgReplacer())
+
+class PrefixedCodeSnippetReplacer(RegexReplacer):
+    regex = re.compile(r"""```(?P<language>\w*)\n(?P<prefix>[^{`]+)\n\{% include ['"](?P<path>[^'"]+)['"] %\}\s*```""")
+    @staticmethod
+    def replace(m: re.Match):
+        escaped_prefix = m.group("prefix").replace("\n","\\n").replace('"', '\\"')+"\\n"
+        return '{% code-snippet file="/'+m.group("path")+'" language="'+m.group("language")+'" prefix="'+escaped_prefix+'" /%}'
+regex_todos.append(PrefixedCodeSnippetReplacer())
+
+class PlainCodeIncludeReplacer(RegexReplacer):
+    regex = re.compile(re.compile(r"""```(?P<language>\w*)\n\{% include ['"](?P<path>[^'"]+)['"] %\}\s*```"""))
+    replace = staticmethod(lambda m: '{% code-snippet file="/'+m.group("path")+'" language="'+m.group("language")+'" /%}')
+regex_todos.append(PlainCodeIncludeReplacer())
+
+class SnippetReplacer(RegexReplacer):
+    # Redocly requires partials to end in md due to Mardoc limitations.
+    # Other includes need to be converted to code-snippet instances instead.
+    regex = re.compile(r"\{% *include *'(?P<path>_[^']+\.md)' *%\}")
+    replace = staticmethod(lambda m: '{{% partial file="/{fpath}" /%}}'.format(fpath=m.group("path")))
+regex_todos.append(SnippetReplacer())
+
+class RepoLinkReplacer(RegexReplacer):
+    """
+    Replacement for links that use {{github_forkurl}} and {{github_branch}}.
+    Uses a custom repo-link component to pull info from .env, since variables
+    can't be evaluated inside the href of a link.
+
+    Note, this has to be run before general vars replacement since it covers a
+    special case that's larger than one variable.
+    """
+    regex = re.compile(r"\[(?P<linktext>[^\]]+)\]\(\{\{ *target\.github_forkurl *\}\}/(tree|blob)/\{\{ *target\.github_branch *\}\}/(?P<path>[^\)]+)\)")
+    replace = staticmethod(lambda m: '{% repo-link path="'+m.group("path")+'" %}'+m.group("linktext")+'{% /repo-link %}')
+regex_todos.append(RepoLinkReplacer())
+
+class VarReplacer(RegexReplacer):
+    regex = re.compile(r"\{\{ *(target|currentpage)\.(?P<var>[a-z_]+) *}}")
+    @staticmethod
+    def replace(m):
+        if m.group("var") == "name":
+            return '{% $frontmatter.seo.title %}'
+        else:
+            return '{% $env.PUBLIC_'+m.group("var").upper()+" %}"
+regex_todos.append(VarReplacer())
+
+class TabsReplacer(RegexReplacer):
+    """
+    Meat to run after all the code block replacers
+    """
+    regex = re.compile(r'<!-- MULTICODE_BLOCK_START -->(.*?)<!-- MULTICODE_BLOCK_END -->', re.DOTALL)
+    @staticmethod
+    def replace(m: re.Match):
+        repl_string = "{% tabs %}\n\n"
+        indent = ""
+        code_tab_regex = re.compile(r'^[*_](?P<tabname>[^_*]+)[*_]\n+(?P<codeblock>```.+?```|\{% code-snippet .+? /%\}$)', re.MULTILINE|re.DOTALL)
+        if not code_tab_regex.search(m.group(1)):
+            indented_code_tab_regex = re.compile(r'^(?P<indentation> {4,})[*_](?P<tabname>[^_*]+)[*_]\n\n(?P<codeblock>( {8,}.*|\n)+)\n\n', re.MULTILINE)
+            double_indented_code_tab_regex = re.compile(r'^(?P<indentation> {8,})[*_](?P<tabname>[^_*]+)[*_]\n\n(?P<codeblock>( {12,}.*|\n)+)\n\n', re.MULTILINE) # Same as above except one level of indent more.
+
+            if indented_code_tab_regex.search(m.group(1)):
+                use_regex = indented_code_tab_regex
+                if double_indented_code_tab_regex.search(m.group(1)):
+                    use_regex = double_indented_code_tab_regex
+                for m2 in re.finditer(use_regex, m.group(1)):
+                    indent = m2.group("indentation")
+                    repl_string += indent + '```{% label="'+m2.group("tabname")+'" %}\n'
+                    for codeline in m2.group("codeblock").split("\n"):
+                        if not codeline.strip():
+                            repl_string += "\n"
+                        else:
+                            # Remove extra level of indentation since we're changing it to a fence.
+                            # If the codeline isn't long enough, the md file probably has a syntax error.
+                            repl_string += codeline[4:]+"\n"
+
+                    # trim any excess trailing newlines
+                    repl_string = repl_string.rstrip()+"\n"
+                    repl_string += indent+'```\n\n'
+            else:
+                print("ERROR, no tab found in code tabs")
+                print(m.group(1))
+                exit(1)
+        for m2 in re.finditer(code_tab_regex, m.group(1)):
+            repl_string += '{% tab label="'+m2.group("tabname")+'" %}\n'
+            repl_string += m2.group("codeblock").strip() + "\n"
+            repl_string += '{% /tab %}\n\n'
+        repl_string += indent+"{% /tabs %}"
+        return repl_string
+regex_todos.append(TabsReplacer())
+
+callout_mapping = {
+    # lowercase callout name → admonition type
+    "tip": "success",
+    "note": "info",
+    "caution": "warning",
+    "warning": "danger",
+    "ヒント": "success",
+    "注記": "info",
+    "注意": "warning",
+    "警告": "danger",
+}
+class BQCalloutReplacer(RegexReplacer):
+    regex = re.compile(r'^\> [*_]{1,2}(?P<label>Tip|Note|Caution|Warning|ヒント|注記|注意|警告):?[*_]{1,2} (?P<content>(.*)(\n\> ?.*)*)$', re.MULTILINE|re.I)
+    @staticmethod
+    def replace(m: re.Match):
+        admontype = callout_mapping[m.group("label").lower()]
+        bq_start = re.compile(r'^\> |^\>$', re.MULTILINE)
+        content = bq_start.sub('', m.group("content"))
+        repl_string = '{% admonition type="'+admontype+'" name="'+m.group("label")+'" %}\n'+content+'\n{% /admonition %}'
+        return repl_string
+regex_todos.append(BQCalloutReplacer())
+
+class OnelineCalloutReplacer(RegexReplacer):
+    regex = re.compile(r'^(?P<indentation>\s*)[*_]{1,2}(?P<label>Tip|Note|Caution|Warning|ヒント|注記|注意|警告):?[*_]{1,2} (?P<content>.*)$', re.I)
+    @staticmethod
+    def replace(m: re.Match):
+        admontype = callout_mapping[m.group("label").lower()]
+        if m.group("indentation"):
+            repl_string = m.group("indentation")+'{% admonition type="'+admontype+'" name="'+m.group("label")+'" %}'+m.group("content")+'{% /admonition %}'
+        else:
+            repl_string = '{% admonition type="'+admontype+'" name="'+m.group("label")+'" %}\n'+m.group("content")+'\n{% /admonition %}'
+        return repl_string
+regex_todos.append(OnelineCalloutReplacer())
+
+
+category_regex = re.compile(r'^#?template: *pagetype-category\.html\.jinja\n', re.MULTILINE)
+def convert_category_page(ftext):
+    if not category_regex.search(ftext):
+        return ftext
+    ftext2 = re.sub(category_regex, "metadata:\n  indexPage: true\n", ftext)
+    ftext2 = ftext2 + "\n{% child-pages /%}\n"
+    return ftext2
+
+def main():
+    all_mds = list_mds("content")
+    for fname in all_mds:
+        with open(fname) as f:
+            ftext = f.read()
+        ftext2 = rm_common_links_includes(ftext)
+
+        for replacer in regex_todos:
+            ftext2 = replacer.replace_all(ftext2)
+
+        ftext2 = convert_category_page(ftext2)
+
+        if ftext2 != ftext:
+            #print("performing syntax conversion in", fname)
+            with open(fname, "w") as f:
+                f.write(ftext2)
+
+main()