diff options
Diffstat (limited to 'codegen/vulkan/scripts/check_html_xrefs.py')
-rwxr-xr-x | codegen/vulkan/scripts/check_html_xrefs.py | 93 |
1 files changed, 0 insertions, 93 deletions
diff --git a/codegen/vulkan/scripts/check_html_xrefs.py b/codegen/vulkan/scripts/check_html_xrefs.py deleted file mode 100755 index 0081e6c0..00000000 --- a/codegen/vulkan/scripts/check_html_xrefs.py +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/python3 -# -# Copyright 2020-2021 The Khronos Group Inc. -# -# SPDX-License-Identifier: Apache-2.0 - -# check_html_xrefs - simple-minded check for internal xrefs in spec HTML -# that don't exist. - -# Usage: check_html_xrefs file -# Just reports bad xrefs, not where they occur - -import argparse -import re -from lxml import etree - -SECTNAME = re.compile(r'sect(?P<level>\d+)') - -def find_parent_ids(elem, href): - """Find section titles in parents, which are the 'id' elements of '<hN' - children of '<div class="sectM"' tags, and N = M + 1. This may be - specific to the Vulkan spec, though - hierarchy could be different in - other asciidoctor documents. Returns a list of [ anchor, title ]. - - elem - this node - href - href link text of elem""" - - # Find parent <div> with class="sect#" - parent = elem.getparent() - while parent is not None: - if parent.tag == 'div': - cssclass = parent.get('class') - matches = SECTNAME.match(cssclass) - if matches is not None: - level = int(matches.group('level')) - # Look for corresponding header tag in this div - helem = parent.find('./h{}'.format(level+1)) - if helem is not None: - return [ helem.get('id'), ''.join(helem.itertext()) ] - parent = parent.getparent() - return [ '** NO PARENT NODE IDENTIFIED **', '' ] - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - - parser.add_argument('files', metavar='filename', nargs='*', - help='Path to registry XML') - args = parser.parse_args() - - for filename in args.files: - parser = etree.HTMLParser() - tree = etree.parse(filename, parser) - - # Find all 'id' elements - id_elems = tree.findall('.//*[@id]') - ids = set() - for elem in id_elems: - id = elem.get('id') - if id in ids: - True - # print('Duplicate ID attribute:', id) - else: - ids.add(id) - - # Find all internal 'href' attributes and see if they're valid - # Keep an [element, href] list for tracking parents - # Also keep a count of each href - ref_elems = tree.findall('.//a[@href]') - refs = [] - count = {} - for elem in ref_elems: - href = elem.get('href') - # If not a local href, skip it - if href[0] == '#': - # If there's a corresponding id, skip it - href = href[1:] - if href not in ids: - if href in count: - refs.append((elem, href)) - True - count[href] = count[href] + 1 - else: - refs.append((elem, href)) - count[href] = 1 - else: - True - # print('Skipping external href:', ref) - - # Check for hrefs not found in ids - print('Bad links in {}:'.format(filename)) - for (elem, href) in refs: - parents = find_parent_ids(elem, href) - print('{:<40} in {:<28} ({})'.format(href, parents[0], parents[1])) |