mirror of
https://github.com/XRPLF/xrpl-dev-portal.git
synced 2025-11-17 10:15:50 +00:00
link checker - add quiet option
This commit is contained in:
@@ -3,6 +3,7 @@ import requests
|
|||||||
import os
|
import os
|
||||||
import yaml
|
import yaml
|
||||||
import argparse
|
import argparse
|
||||||
|
import logging
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
DEFAULT_CONFIG_FILE = "dactyl-config.yml"
|
DEFAULT_CONFIG_FILE = "dactyl-config.yml"
|
||||||
@@ -28,7 +29,7 @@ def checkLinks(offline=False):
|
|||||||
for fname in filenames:
|
for fname in filenames:
|
||||||
fullPath = os.path.join(dirpath, fname)
|
fullPath = os.path.join(dirpath, fname)
|
||||||
if "/node_modules/" in fullPath or ".git" in fullPath:
|
if "/node_modules/" in fullPath or ".git" in fullPath:
|
||||||
print("skipping ignored dir:", fullPath)
|
logging.info("skipping ignored dir: %s" % fullPath)
|
||||||
continue
|
continue
|
||||||
if fullPath.endswith(".html"):
|
if fullPath.endswith(".html"):
|
||||||
soup = getSoup(fullPath)
|
soup = getSoup(fullPath)
|
||||||
@@ -40,7 +41,7 @@ def checkLinks(offline=False):
|
|||||||
|
|
||||||
endpoint = link['href']
|
endpoint = link['href']
|
||||||
if not endpoint.strip():
|
if not endpoint.strip():
|
||||||
print("Empty link in",fullPath)
|
logging.warning("Empty link in %s" % fullPath)
|
||||||
broken_links.append( (fullPath, endpoint) )
|
broken_links.append( (fullPath, endpoint) )
|
||||||
num_links_checked += 1
|
num_links_checked += 1
|
||||||
|
|
||||||
@@ -48,43 +49,43 @@ def checkLinks(offline=False):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
elif "mailto:" in endpoint:
|
elif "mailto:" in endpoint:
|
||||||
print("Skipping email link in %s to %s"%(fullPath, endpoint))
|
logging.info("Skipping email link in %s to %s"%(fullPath, endpoint))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
elif "://" in endpoint:
|
elif "://" in endpoint:
|
||||||
if offline:
|
if offline:
|
||||||
print("Offline - Skipping remote URL %s"%(endpoint))
|
logging.info("Offline - Skipping remote URL %s"%(endpoint))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
num_links_checked += 1
|
num_links_checked += 1
|
||||||
if endpoint not in externalCache:
|
if endpoint not in externalCache:
|
||||||
print("Testing remote URL %s"%(endpoint))
|
logging.info("Testing remote URL %s"%(endpoint))
|
||||||
try:
|
try:
|
||||||
code = requests.head(endpoint).status_code
|
code = requests.head(endpoint).status_code
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Error occurred:",e)
|
logging.warning("Error occurred: %s" % e)
|
||||||
code = 500
|
code = 500
|
||||||
if code == 405 or code == 404:
|
if code == 405 or code == 404:
|
||||||
#HEAD didn't work, maybe GET will?
|
#HEAD didn't work, maybe GET will?
|
||||||
try:
|
try:
|
||||||
code = requests.get(endpoint).status_code
|
code = requests.get(endpoint).status_code
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Error occurred:",e)
|
logging.warning("Error occurred: %s" % e)
|
||||||
code = 500
|
code = 500
|
||||||
|
|
||||||
if code < 200 or code >= 400:
|
if code < 200 or code >= 400:
|
||||||
print("Broken remote link in %s to %s"%(fullPath, endpoint))
|
logging.warning("Broken remote link in %s to %s"%(fullPath, endpoint))
|
||||||
broken_links.append( (fullPath, endpoint) )
|
broken_links.append( (fullPath, endpoint) )
|
||||||
else:
|
else:
|
||||||
print("...success.")
|
logging.info("...success.")
|
||||||
externalCache.append(endpoint)
|
externalCache.append(endpoint)
|
||||||
|
|
||||||
|
|
||||||
elif '#' in endpoint:
|
elif '#' in endpoint:
|
||||||
if fname in config["ignore_anchors_in"]:
|
if fname in config["ignore_anchors_in"]:
|
||||||
print("Ignoring anchor %s in dynamic page %s"%(endpoint,fname))
|
logging.info("Ignoring anchor %s in dynamic page %s"%(endpoint,fname))
|
||||||
continue
|
continue
|
||||||
print("Testing local link %s from %s"%(endpoint, fullPath))
|
logging.info("Testing local link %s from %s"%(endpoint, fullPath))
|
||||||
num_links_checked += 1
|
num_links_checked += 1
|
||||||
filename,anchor = endpoint.split("#",1)
|
filename,anchor = endpoint.split("#",1)
|
||||||
if filename == "":
|
if filename == "":
|
||||||
@@ -92,13 +93,13 @@ def checkLinks(offline=False):
|
|||||||
else:
|
else:
|
||||||
fullTargetPath = os.path.join(dirpath, filename)
|
fullTargetPath = os.path.join(dirpath, filename)
|
||||||
if not os.path.exists(fullTargetPath):
|
if not os.path.exists(fullTargetPath):
|
||||||
print("Broken local link in %s to %s"%(fullPath, endpoint))
|
logging.warning("Broken local link in %s to %s"%(fullPath, endpoint))
|
||||||
broken_links.append( (fullPath, endpoint) )
|
broken_links.append( (fullPath, endpoint) )
|
||||||
|
|
||||||
elif filename in config["ignore_anchors_in"]:
|
elif filename in config["ignore_anchors_in"]:
|
||||||
#Some pages are populated dynamically, so BeatifulSoup wouldn't
|
#Some pages are populated dynamically, so BeatifulSoup wouldn't
|
||||||
# be able to find anchors in them anyway
|
# be able to find anchors in them anyway
|
||||||
print("Skipping anchor link in %s to dynamic page %s" %
|
logging.info("Skipping anchor link in %s to dynamic page %s" %
|
||||||
(fullPath, endpoint))
|
(fullPath, endpoint))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -107,21 +108,21 @@ def checkLinks(offline=False):
|
|||||||
targetSoup = getSoup(fullTargetPath)
|
targetSoup = getSoup(fullTargetPath)
|
||||||
if not targetSoup.find(id=anchor) and not targetSoup.find(
|
if not targetSoup.find(id=anchor) and not targetSoup.find(
|
||||||
"a",attrs={"name":anchor}):
|
"a",attrs={"name":anchor}):
|
||||||
print("Broken anchor link in %s to %s"%(fullPath, endpoint))
|
logging.warning("Broken anchor link in %s to %s"%(fullPath, endpoint))
|
||||||
broken_links.append( (fullPath, endpoint) )
|
broken_links.append( (fullPath, endpoint) )
|
||||||
else:
|
else:
|
||||||
print("...anchor found.")
|
logging.info("...anchor found.")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
elif endpoint[0] == '/':
|
elif endpoint[0] == '/':
|
||||||
#can't really test links out of the local field
|
#can't really test links out of the local field
|
||||||
print("Skipping absolute link in %s to %s"%(fullPath, endpoint))
|
logging.info("Skipping absolute link in %s to %s"%(fullPath, endpoint))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
else:
|
else:
|
||||||
num_links_checked += 1
|
num_links_checked += 1
|
||||||
if not os.path.exists(os.path.join(dirpath, endpoint)):
|
if not os.path.exists(os.path.join(dirpath, endpoint)):
|
||||||
print("Broken local link in %s to %s"%(fullPath, endpoint))
|
logging.warning("Broken local link in %s to %s"%(fullPath, endpoint))
|
||||||
broken_links.append( (fullPath, endpoint) )
|
broken_links.append( (fullPath, endpoint) )
|
||||||
return broken_links, num_links_checked
|
return broken_links, num_links_checked
|
||||||
|
|
||||||
@@ -130,7 +131,7 @@ def checkLinks(offline=False):
|
|||||||
def load_config(config_file=DEFAULT_CONFIG_FILE):
|
def load_config(config_file=DEFAULT_CONFIG_FILE):
|
||||||
"""Reload config from a YAML file."""
|
"""Reload config from a YAML file."""
|
||||||
global config
|
global config
|
||||||
print("loading config file %s..." % config_file)
|
logging.info("loading config file %s..." % config_file)
|
||||||
with open(config_file, "r") as f:
|
with open(config_file, "r") as f:
|
||||||
config = yaml.load(f)
|
config = yaml.load(f)
|
||||||
assert(config["out_path"])
|
assert(config["out_path"])
|
||||||
@@ -147,15 +148,18 @@ if __name__ == "__main__":
|
|||||||
help="Exit with error even on known problems")
|
help="Exit with error even on known problems")
|
||||||
parser.add_argument("--config", "-c", type=str,
|
parser.add_argument("--config", "-c", type=str,
|
||||||
help="Specify path to an alternate config file.")
|
help="Specify path to an alternate config file.")
|
||||||
|
parser.add_argument("--quiet", "-q", action="store_true",
|
||||||
|
help="Reduce output to just failures and final report")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if not args.quiet:
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
if args.config:
|
if args.config:
|
||||||
load_config(args.config)
|
load_config(args.config)
|
||||||
else:
|
else:
|
||||||
load_config()
|
load_config()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
broken_links, num_links_checked = checkLinks(args.offline)
|
broken_links, num_links_checked = checkLinks(args.offline)
|
||||||
|
|
||||||
print("---------------------------------------")
|
print("---------------------------------------")
|
||||||
|
|||||||
Reference in New Issue
Block a user