Dactyl Link Checker - longer timeout, retry remote links after a delay

This commit is contained in:
mDuo13
2016-10-10 15:18:53 -07:00
parent 5562f07ffa
commit 3c800a1da5

View File

@@ -4,12 +4,14 @@ import os
import yaml import yaml
import argparse import argparse
import logging import logging
import re
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from time import time from time import time, sleep
DEFAULT_CONFIG_FILE = "dactyl-config.yml" DEFAULT_CONFIG_FILE = "dactyl-config.yml"
TIMEOUT_SECS = 5 TIMEOUT_SECS = 9.1
CHECK_IN_INTERVAL = 30 CHECK_IN_INTERVAL = 30
FINAL_RETRY_DELAY = 4 * CHECK_IN_INTERVAL
soupsCache = {} soupsCache = {}
def getSoup(fullPath): def getSoup(fullPath):
@@ -180,7 +182,32 @@ def checkLinks(offline=False):
broken_links.append( (fullPath, src) ) broken_links.append( (fullPath, src) )
return broken_links, num_links_checked return broken_links, num_links_checked
def final_retry_links(broken_links):
"""Give the broken remote links a little while to recover in case they're just flaps"""
broken_remote_links = [ (page,link) for page,link in broken_links
if re.match(r"^https?://", link) ]
if not broken_remote_links:
logging.info("(no http/https broken links to retry)")
return
logging.info("Waiting %d seconds to retry broken %d remote links..."
% (FINAL_RETRY_DELAY, len(broken_remote_links)))
start_wait = time()
elapsed = 0
while elapsed < FINAL_RETRY_DELAY:
sleep(CHECK_IN_INTERVAL)
print("...")
elapsed = time() - start_wait
retry_cache = []
retry_broken = []
for page, link in broken_remote_links:
link_works = check_remote_url(link, page, retry_broken, retry_cache)
if link_works:
logging.info("Link %s in page %s is back online" % (link, page))
broken_links.remove( (page,link) )
else:
logging.info("Link %s in page %s is still down." % (link, page))
def load_config(config_file=DEFAULT_CONFIG_FILE): def load_config(config_file=DEFAULT_CONFIG_FILE):
"""Reload config from a YAML file.""" """Reload config from a YAML file."""
@@ -202,6 +229,8 @@ if __name__ == "__main__":
help="Exit with error even on known problems") help="Exit with error even on known problems")
parser.add_argument("--config", "-c", type=str, parser.add_argument("--config", "-c", type=str,
help="Specify path to an alternate config file.") help="Specify path to an alternate config file.")
parser.add_argument("-n", "--no_final_retry", action="store_true",
help="Don't wait and retry failed remote links at the end.")
parser.add_argument("--quiet", "-q", action="store_true", parser.add_argument("--quiet", "-q", action="store_true",
help="Reduce output to just failures and final report") help="Reduce output to just failures and final report")
args = parser.parse_args() args = parser.parse_args()
@@ -216,6 +245,11 @@ if __name__ == "__main__":
broken_links, num_links_checked = checkLinks(args.offline) broken_links, num_links_checked = checkLinks(args.offline)
if not args.no_final_retry and not args.offline:
final_retry_links(broken_links)
#^ sleeps for FINAL_RETRY_DELAY and then retries remote links
# Automatically removes from broken_links if they work now
print("---------------------------------------") print("---------------------------------------")
print("Link check report. %d links checked."%num_links_checked) print("Link check report. %d links checked."%num_links_checked)