#!/usr/bin/env python3 """ refguide_check.py [OPTIONS] [-- ARGS] - Check for a NumPy submodule whether the objects in its __all__ dict correspond to the objects included in the reference guide. - Check docstring examples - Check example blocks in RST files Example of usage:: $ python tools/refguide_check.py Note that this is a helper script to be able to check if things are missing; the output of this script does need to be checked manually. In some cases objects are left out of the refguide for a good reason (it's an alias of another function, or deprecated, or ...) Another use of this helper script is to check validity of code samples in docstrings:: $ python tools/refguide_check.py --doctests ma or in RST-based documentations:: $ python tools/refguide_check.py --rst doc/source """ import copy import inspect import io import os import re import sys import warnings from argparse import ArgumentParser import docutils.core from docutils.parsers.rst import directives sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'doc', 'sphinxext')) from numpydoc.docscrape_sphinx import get_doc_object # Enable specific Sphinx directives from sphinx.directives.other import Only, SeeAlso directives.register_directive('seealso', SeeAlso) directives.register_directive('only', Only) BASE_MODULE = "numpy" PUBLIC_SUBMODULES = [ "f2py", "linalg", "lib", "lib.format", "lib.mixins", "lib.recfunctions", "lib.scimath", "lib.stride_tricks", "lib.npyio", "lib.introspect", "lib.array_utils", "fft", "char", "rec", "ma", "ma.extras", "ma.mrecords", "polynomial", "polynomial.chebyshev", "polynomial.hermite", "polynomial.hermite_e", "polynomial.laguerre", "polynomial.legendre", "polynomial.polynomial", "matrixlib", "random", "strings", "testing", ] # Docs for these modules are included in the parent module OTHER_MODULE_DOCS = { 'fftpack.convolve': 'fftpack', 'io.wavfile': 'io', 'io.arff': 'io', } # these names are not required to be present in ALL despite being in # autosummary:: listing REFGUIDE_ALL_SKIPLIST = [ r'scipy\.sparse\.linalg', r'scipy\.spatial\.distance', r'scipy\.linalg\.blas\.[sdczi].*', r'scipy\.linalg\.lapack\.[sdczi].*', ] # these names are not required to be in an autosummary:: listing # despite being in ALL REFGUIDE_AUTOSUMMARY_SKIPLIST = [ # NOTE: should NumPy have a better match between autosummary # listings and __all__? For now, TR isn't convinced this is a # priority -- focus on just getting docstrings executed / correct r'numpy\.*', ] def short_path(path, cwd=None): """ Return relative or absolute path name, whichever is shortest. Parameters ---------- path : str or None cwd : str or None Returns ------- str Relative path or absolute path based on current working directory """ if not isinstance(path, str): return path if cwd is None: cwd = os.getcwd() abspath = os.path.abspath(path) relpath = os.path.relpath(path, cwd) if len(abspath) <= len(relpath): return abspath return relpath def find_names(module, names_dict): """ Finds the occurrences of function names, special directives like data and functions and scipy constants in the docstrings of `module`. The following patterns are searched for: * 3 spaces followed by function name, and maybe some spaces, some dashes, and an explanation; only function names listed in refguide are formatted like this (mostly, there may be some false positives * special directives, such as data and function * (scipy.constants only): quoted list The `names_dict` is updated by reference and accessible in calling method Parameters ---------- module : ModuleType The module, whose docstrings is to be searched names_dict : dict Dictionary which contains module name as key and a set of found function names and directives as value Returns ------- None """ patterns = [ r"^\s\s\s([a-z_0-9A-Z]+)(\s+-+.*)?$", r"^\.\. (?:data|function)::\s*([a-z_0-9A-Z]+)\s*$" ] if module.__name__ == 'scipy.constants': patterns += ["^``([a-z_0-9A-Z]+)``"] patterns = [re.compile(pattern) for pattern in patterns] module_name = module.__name__ for line in module.__doc__.splitlines(): res = re.search(r"^\s*\.\. (?:currentmodule|module):: ([a-z0-9A-Z_.]+)\s*$", line) if res: module_name = res.group(1) continue for pattern in patterns: res = re.match(pattern, line) if res is not None: name = res.group(1) entry = f'{module_name}.{name}' names_dict.setdefault(module_name, set()).add(name) break def get_all_dict(module): """ Return a copy of the __all__ dict with irrelevant items removed. Parameters ---------- module : ModuleType The module whose __all__ dict has to be processed Returns ------- deprecated : list List of callable and deprecated sub modules not_deprecated : list List of non callable or non deprecated sub modules others : list List of remaining types of sub modules """ if hasattr(module, "__all__"): all_dict = copy.deepcopy(module.__all__) else: all_dict = copy.deepcopy(dir(module)) all_dict = [name for name in all_dict if not name.startswith("_")] for name in ['absolute_import', 'division', 'print_function']: try: all_dict.remove(name) except ValueError: pass if not all_dict: # Must be a pure documentation module all_dict.append('__doc__') # Modules are almost always private; real submodules need a separate # run of refguide_check. all_dict = [name for name in all_dict if not inspect.ismodule(getattr(module, name, None))] deprecated = [] not_deprecated = [] for name in all_dict: f = getattr(module, name, None) if callable(f) and is_deprecated(f): deprecated.append(name) else: not_deprecated.append(name) others = set(dir(module)).difference(set(deprecated)).difference(set(not_deprecated)) # noqa: E501 return not_deprecated, deprecated, others def compare(all_dict, others, names, module_name): """ Return sets of objects from all_dict. Will return three sets: {in module_name.__all__}, {in REFGUIDE*}, and {missing from others} Parameters ---------- all_dict : list List of non deprecated sub modules for module_name others : list List of sub modules for module_name names : set Set of function names or special directives present in docstring of module_name module_name : ModuleType Returns ------- only_all : set only_ref : set missing : set """ only_all = set() for name in all_dict: if name not in names: for pat in REFGUIDE_AUTOSUMMARY_SKIPLIST: if re.match(pat, module_name + '.' + name): break else: only_all.add(name) only_ref = set() missing = set() for name in names: if name not in all_dict: for pat in REFGUIDE_ALL_SKIPLIST: if re.match(pat, module_name + '.' + name): if name not in others: missing.add(name) break else: only_ref.add(name) return only_all, only_ref, missing def is_deprecated(f): """ Check if module `f` is deprecated Parameters ---------- f : ModuleType Returns ------- bool """ with warnings.catch_warnings(record=True) as w: warnings.simplefilter("error") try: f(**{"not a kwarg": None}) except DeprecationWarning: return True except Exception: pass return False def check_items(all_dict, names, deprecated, others, module_name, dots=True): """ Check that `all_dict` is consistent with the `names` in `module_name` For instance, that there are no deprecated or extra objects. Parameters ---------- all_dict : list names : set deprecated : list others : list module_name : ModuleType dots : bool Whether to print a dot for each check Returns ------- list List of [(name, success_flag, output)...] """ num_all = len(all_dict) num_ref = len(names) output = "" output += f"Non-deprecated objects in __all__: {num_all}\n" output += f"Objects in refguide: {num_ref}\n\n" only_all, only_ref, missing = compare(all_dict, others, names, module_name) dep_in_ref = only_ref.intersection(deprecated) only_ref = only_ref.difference(deprecated) if len(dep_in_ref) > 0: output += "Deprecated objects in refguide::\n\n" for name in sorted(deprecated): output += " " + name + "\n" if len(only_all) == len(only_ref) == len(missing) == 0: if dots: output_dot('.') return [(None, True, output)] else: if len(only_all) > 0: output += f"ERROR: objects in {module_name}.__all__ but not in refguide::\n\n" # noqa: E501 for name in sorted(only_all): output += " " + name + "\n" output += "\nThis issue can be fixed by adding these objects to\n" output += "the function listing in __init__.py for this module\n" if len(only_ref) > 0: output += f"ERROR: objects in refguide but not in {module_name}.__all__::\n\n" # noqa: E501 for name in sorted(only_ref): output += " " + name + "\n" output += "\nThis issue should likely be fixed by removing these objects\n" output += "from the function listing in __init__.py for this module\n" output += "or adding them to __all__.\n" if len(missing) > 0: output += "ERROR: missing objects::\n\n" for name in sorted(missing): output += " " + name + "\n" if dots: output_dot('F') return [(None, False, output)] def validate_rst_syntax(text, name, dots=True): """ Validates the doc string in a snippet of documentation `text` from file `name` Parameters ---------- text : str Docstring text name : str File name for which the doc string is to be validated dots : bool Whether to print a dot symbol for each check Returns ------- (bool, str) """ if text is None: if dots: output_dot('E') return False, f"ERROR: {name}: no documentation" ok_unknown_items = { 'mod', 'doc', 'currentmodule', 'autosummary', 'data', 'attr', 'obj', 'versionadded', 'versionchanged', 'module', 'class', 'ref', 'func', 'toctree', 'moduleauthor', 'term', 'c:member', 'sectionauthor', 'codeauthor', 'eq', 'doi', 'DOI', 'arXiv', 'arxiv' } # Run through docutils error_stream = io.StringIO() def resolve(name, is_label=False): return ("http://foo", name) token = '' docutils.core.publish_doctree( text, token, settings_overrides={'halt_level': 5, 'traceback': True, 'default_reference_context': 'title-reference', 'default_role': 'emphasis', 'link_base': '', 'resolve_name': resolve, 'stylesheet_path': '', 'raw_enabled': 0, 'file_insertion_enabled': 0, 'warning_stream': error_stream}) # Print errors, disregarding unimportant ones error_msg = error_stream.getvalue() errors = error_msg.split(token) success = True output = "" for error in errors: lines = error.splitlines() if not lines: continue m = re.match(r'.*Unknown (?:interpreted text role|directive type) "(.*)".*$', lines[0]) # noqa: E501 if m: if m.group(1) in ok_unknown_items: continue m = re.match(r'.*Error in "math" directive:.*unknown option: "label"', " ".join(lines), re.S) # noqa: E501 if m: continue output += name + lines[0] + "::\n " + "\n ".join(lines[1:]).rstrip() + "\n" # noqa: E501 success = False if not success: output += " " + "-" * 72 + "\n" for lineno, line in enumerate(text.splitlines()): output += " %-4d %s\n" % (lineno + 1, line) output += " " + "-" * 72 + "\n\n" if dots: output_dot('.' if success else 'F') return success, output def output_dot(msg='.', stream=sys.stderr): stream.write(msg) stream.flush() def check_rest(module, names, dots=True): """ Check reStructuredText formatting of docstrings Parameters ---------- module : ModuleType names : set Returns ------- result : list List of [(module_name, success_flag, output),...] """ skip_types = (dict, str, float, int) results = [] if module.__name__[6:] not in OTHER_MODULE_DOCS: results += [(module.__name__,) + validate_rst_syntax(inspect.getdoc(module), module.__name__, dots=dots)] for name in names: full_name = module.__name__ + '.' + name obj = getattr(module, name, None) if obj is None: results.append((full_name, False, f"{full_name} has no docstring")) continue elif isinstance(obj, skip_types): continue if inspect.ismodule(obj): text = inspect.getdoc(obj) else: try: text = str(get_doc_object(obj)) except Exception: import traceback results.append((full_name, False, "Error in docstring format!\n" + traceback.format_exc())) continue m = re.search("([\x00-\x09\x0b-\x1f])", text) # noqa: RUF039 if m: msg = ("Docstring contains a non-printable character %r! " "Maybe forgot r\"\"\"?" % (m.group(1),)) results.append((full_name, False, msg)) continue try: src_file = short_path(inspect.getsourcefile(obj)) except TypeError: src_file = None if src_file: file_full_name = src_file + ':' + full_name else: file_full_name = full_name results.append((full_name,) + validate_rst_syntax(text, file_full_name, dots=dots)) return results def main(argv): """ Validates the docstrings of all the pre decided set of modules for errors and docstring standards. """ parser = ArgumentParser(usage=__doc__.lstrip()) parser.add_argument("module_names", metavar="SUBMODULES", default=[], nargs='*', help="Submodules to check (default: all public)") parser.add_argument("-v", "--verbose", action="count", default=0) args = parser.parse_args(argv) modules = [] names_dict = {} if not args.module_names: args.module_names = list(PUBLIC_SUBMODULES) + [BASE_MODULE] module_names = args.module_names + [ OTHER_MODULE_DOCS[name] for name in args.module_names if name in OTHER_MODULE_DOCS ] # remove duplicates while maintaining order module_names = list(dict.fromkeys(module_names)) dots = True success = True results = [] errormsgs = [] for submodule_name in module_names: prefix = BASE_MODULE + '.' if not ( submodule_name.startswith(prefix) or submodule_name == BASE_MODULE ): module_name = prefix + submodule_name else: module_name = submodule_name __import__(module_name) module = sys.modules[module_name] if submodule_name not in OTHER_MODULE_DOCS: find_names(module, names_dict) if submodule_name in args.module_names: modules.append(module) if modules: print(f"Running checks for {len(modules)} modules:") for module in modules: if dots: sys.stderr.write(module.__name__ + ' ') sys.stderr.flush() all_dict, deprecated, others = get_all_dict(module) names = names_dict.get(module.__name__, set()) mod_results = [] mod_results += check_items(all_dict, names, deprecated, others, module.__name__) mod_results += check_rest(module, set(names).difference(deprecated), dots=dots) for v in mod_results: assert isinstance(v, tuple), v results.append((module, mod_results)) if dots: sys.stderr.write('\n') sys.stderr.flush() # Report results for module, mod_results in results: success = all(x[1] for x in mod_results) if not success: errormsgs.append(f'failed checking {module.__name__}') if success and args.verbose == 0: continue print("") print("=" * len(module.__name__)) print(module.__name__) print("=" * len(module.__name__)) print("") for name, success, output in mod_results: if name is None: if not success or args.verbose >= 1: print(output.strip()) print("") elif not success or (args.verbose >= 2 and output.strip()): print(name) print("-" * len(name)) print("") print(output.strip()) print("") if len(errormsgs) == 0: print("\nOK: all checks passed!") sys.exit(0) else: print('\nERROR: ', '\n '.join(errormsgs)) sys.exit(1) if __name__ == '__main__': main(argv=sys.argv[1:])