Source code for sphinx_gallery.gen_rst

# Author: Óscar Nájera
# License: 3-clause BSD
"""reST file generator.

Generate the rst files for the examples by iterating over the python
example files.

Files that generate images should start with 'plot'.
"""

import ast
import codeop
import contextlib
import copy
import gc
import importlib
import inspect
import os
import re
import stat
import sys
import traceback
import warnings
from functools import lru_cache
from io import StringIO
from pathlib import Path
from shutil import copyfile
from textwrap import indent
from time import time

import sphinx.util
from sphinx.errors import ConfigError, ExtensionError
from sphinx.util.console import blue, bold, red

from . import glr_path_static, py_source_parser
from .backreferences import (
    THUMBNAIL_PARENT_DIV,
    THUMBNAIL_PARENT_DIV_CLOSE,
    _make_ref_regex,
    _thumbnail_div,
    _write_backreferences,
    identify_names,
)
from .block_parser import BlockParser
from .docs_resolv import _write_code_obj
from .interactive_example import (
    _add_jupyterlite_badge_logo,
    gen_binder_rst,
    gen_jupyterlite_rst,
)
from .notebook import jupyter_notebook, save_notebook
from .scrapers import (
    ImagePathIterator,
    _find_image_ext,
    _reset_dict,
    _scraper_dict,
    clean_modules,
    save_figures,
)
from .utils import (
    _W_KW,
    _collect_gallery_files,
    _format_toctree,
    _replace_md5,
    get_md5sum,
    optipng,
    scale_image,
    status_iterator,
    zip_files,
)

logger = sphinx.util.logging.getLogger("sphinx-gallery")


###############################################################################


class _LoggingTee:
    """A tee object to redirect streams to the logger."""

    def __init__(self, src_filename):
        self.logger = logger
        self.src_filename = src_filename
        self.logger_buffer = ""
        self.set_std_and_reset_position()

        # For TextIO compatibility
        self.closed = False
        self.encoding = "utf-8"

    def set_std_and_reset_position(self):
        if not isinstance(sys.stdout, _LoggingTee):
            self.origs = (sys.stdout, sys.stderr)
        sys.stdout = sys.stderr = self
        self.first_write = True
        self.output = StringIO()
        return self

    def restore_std(self):
        sys.stdout.flush()
        sys.stderr.flush()
        sys.stdout, sys.stderr = self.origs

    def write(self, data):
        self.output.write(data)

        if self.first_write:
            self.logger.verbose("Output from %s", self.src_filename, color="brown")
            self.first_write = False

        data = self.logger_buffer + data
        lines = data.splitlines()
        if data and data[-1] not in "\r\n":
            # Wait to write last line if it's incomplete. It will write next
            # time or when the LoggingTee is flushed.
            self.logger_buffer = lines[-1]
            lines = lines[:-1]
        else:
            self.logger_buffer = ""

        for line in lines:
            self.logger.verbose("%s", line)

    def flush(self):
        self.output.flush()
        if self.logger_buffer:
            self.logger.verbose("%s", self.logger_buffer)
            self.logger_buffer = ""

    # For TextIO compatibility
    def close(self):
        pass

    def fileno(self):
        return self.output.fileno()

    def isatty(self):
        return self.output.isatty()

    def readable(self):
        return False

    def seekable(self):
        return False

    def tell(self):
        return self.output.tell()

    def writable(self):
        return True

    @property
    def errors(self):
        return self.output.errors

    @property
    def newlines(self):
        return self.output.newlines

    # When called in gen_rst, conveniently use context managing
    def __enter__(self):
        return self

    def __exit__(self, type_, value, tb):
        self.restore_std()


###############################################################################
# The following strings are used when we have several pictures: we use
# an html div tag that our CSS uses to turn the lists into horizontal
# lists.
HLIST_HEADER = """
.. rst-class:: sphx-glr-horizontal

"""

HLIST_IMAGE_TEMPLATE = """
    *

      .. image:: /%s
            :class: sphx-glr-multi-img
"""

SINGLE_IMAGE = """
.. image:: /%s
    :class: sphx-glr-single-img
"""

CODE_OUTPUT = """.. rst-class:: sphx-glr-script-out

 .. code-block:: none

{0}\n"""

TIMING_CONTENT = """
.. rst-class:: sphx-glr-timing

   **Total running time of the script:** ({0:.0f} minutes {1:.3f} seconds)

"""

SPHX_GLR_SIG = """\n
.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_
"""

# Header used to include raw html
HTML_HEADER = """.. raw:: html

    <div class="output_subarea output_html rendered_html output_result">
{0}
    </div>
    <br />
    <br />"""

DOWNLOAD_LINKS_HEADER = """
.. _sphx_glr_download_{0}:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example
"""

CODE_DOWNLOAD = """
    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download {1} source code: {0} <{0}>`
"""

NOTEBOOK_DOWNLOAD = """
    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: {0} <{0}>`
"""

ZIP_DOWNLOAD = """
    .. container:: sphx-glr-download sphx-glr-download-zip

      :download:`Download zipped: {0} <{0}>`
"""

RECOMMENDATIONS_INCLUDE = """\n
.. include:: {0}.recommendations
"""


[docs] def codestr2rst(codestr, lang="python", lineno=None): """Return reStructuredText code block from code string.""" if lineno is not None: # Sphinx only starts numbering from the first non-empty line. blank_lines = codestr.count("\n", 0, -len(codestr.lstrip())) lineno = f" :lineno-start: {lineno + blank_lines}\n" else: lineno = "" # If the whole block is indented, prevent Sphinx from removing too much whitespace dedent = " :dedent: 1\n" for line in codestr.splitlines(): if line and not line.startswith((" ", "\t")): dedent = "" break code_directive = f".. code-block:: {lang}\n{dedent}{lineno}\n" indented_block = indent(codestr, " " * 4) return code_directive + indented_block
def _regroup(x): x = x.groups() return x[0] + x[1].split(".")[-1] + x[2] def _sanitize_rst(string): """Use regex to remove at least some sphinx directives.""" # :class:`a.b.c <thing here>`, :ref:`abc <thing here>` --> thing here p, e = r"(\s|^):[^:\s]+:`", r"`(\W|$)" string = re.sub(p + r"\S+\s*<([^>`]+)>" + e, r"\1\2\3", string) # :class:`~a.b.c` --> c string = re.sub(p + r"~([^`]+)" + e, _regroup, string) # :class:`a.b.c` --> a.b.c string = re.sub(p + r"([^`]+)" + e, r"\1\2\3", string) # ``whatever thing`` --> whatever thing p = r"(\s|^)`" string = re.sub(p + r"`([^`]+)`" + e, r"\1\2\3", string) # `~mymodule.MyClass` --> MyClass string = re.sub(p + r"~([^`]+)" + e, _regroup, string) # `whatever thing` --> whatever thing # `.MyClass` --> MyClass string = re.sub(p + r"\.?([^`]+)" + e, r"\1\2\3", string) # **string** --> string string = re.sub(r"\*\*([^\*]*)\*\*", r"\1", string) # *string* --> string string = re.sub(r"\*([^\*]*)\*", r"\1", string) # `link text <url>`_ --> link text string = re.sub(r"`([^`<>]+) <[^`<>]+>`\_\_?", r"\1", string) # :anchor:`the term` --> the term string = re.sub(r":[a-z]+:`([^`<>]+)( <[^`<>]+>)?`", r"\1", string) # r'\\dfrac' --> r'\dfrac' string = string.replace("\\\\", "\\") return string
[docs] def extract_intro_and_title(filename, docstring): """Extract and clean the first paragraph of module-level docstring.""" # lstrip is just in case docstring has a '\n\n' at the beginning paragraphs = docstring.lstrip().split("\n\n") # remove comments and other syntax like `.. _link:` paragraphs = [p for p in paragraphs if not p.startswith(".. ") and len(p) > 0] if len(paragraphs) == 0: raise ExtensionError( "Example docstring should have a header for the example title. " "Please check the example file:\n {}\n".format(filename) ) # Title is the first paragraph with any reStructuredText title chars # removed, i.e. lines that consist of (3 or more of the same) 7-bit # non-ASCII chars. # This conditional is not perfect but should hopefully be good enough. title_paragraph = paragraphs[0] match = re.search(r"^(?!([\W _])\1{3,})(.+)", title_paragraph, re.MULTILINE) if match is None: raise ExtensionError( f"Could not find a title in first paragraph:\n{title_paragraph}" ) title = match.group(0).strip() # Use the title if no other paragraphs are provided intro_paragraph = title if len(paragraphs) < 2 else paragraphs[1] # Concatenate all lines of the first paragraph and truncate at 95 chars intro = re.sub("\n", " ", intro_paragraph) intro = _sanitize_rst(intro) title = _sanitize_rst(title) return intro, title
[docs] def md5sum_is_current(src_file, mode="b"): """Checks whether src_file has the same md5 hash as the one on disk.""" src_md5 = get_md5sum(src_file, mode=mode) src_md5_file = str(src_file) + ".md5" if not os.path.exists(src_md5_file): return False with open(src_md5_file) as file_cs: ref_md5 = file_cs.read() return src_md5 == ref_md5
[docs] def save_thumbnail(image_path_template, src_file, script_vars, file_conf, gallery_conf): """Generate and Save the thumbnail image. Parameters ---------- image_path_template : str holds the template where to save and how to name the image src_file : str path to source python file script_vars : dict Configuration and run time variables file_conf : dict File-specific settings given in source file comments as: ``# sphinx_gallery_<name> = <value>`` gallery_conf : dict Sphinx-Gallery configuration dictionary """ thumb_dir = os.path.join(os.path.dirname(image_path_template), "thumb") os.makedirs(thumb_dir, exist_ok=True) # read specification of the figure to display as thumbnail from main text thumbnail_number = file_conf.get("thumbnail_number", None) thumbnail_path = file_conf.get("thumbnail_path", None) # thumbnail_number has priority. if thumbnail_number is None and thumbnail_path is None: # If no number AND no path, set to default thumbnail_number thumbnail_number = 1 if thumbnail_number is None: image_path = os.path.join(gallery_conf["src_dir"], thumbnail_path) else: if not isinstance(thumbnail_number, int): raise ExtensionError( "sphinx_gallery_thumbnail_number setting is not a number, " f"got {thumbnail_number!r}" ) # negative index means counting from the last one if thumbnail_number < 0: thumbnail_number += len(script_vars["image_path_iterator"]) + 1 image_path = image_path_template.format(thumbnail_number) del thumbnail_number, thumbnail_path, image_path_template thumbnail_image_path, ext = _find_image_ext(image_path) base_image_name = os.path.splitext(os.path.basename(src_file))[0] thumb_file = os.path.join(thumb_dir, f"sphx_glr_{base_image_name}_thumb.{ext}") if "formatted_exception" in script_vars and file_conf.get( "failing_thumbnail", True ): img = os.path.join(glr_path_static(), "broken_example.png") elif os.path.exists(thumbnail_image_path): img = thumbnail_image_path elif not os.path.exists(thumb_file): # create something to replace the thumbnail default_thumb_path = gallery_conf["default_thumb_file"] if default_thumb_path is None: default_thumb_path = os.path.join( glr_path_static(), "no_image.png", ) img, ext = _find_image_ext(default_thumb_path) else: return # update extension, since gallery_conf setting can be different # from file_conf # Here we have to do .new.ext so that optipng and PIL behave well thumb_file = f"{os.path.splitext(thumb_file)[0]}.new.{ext}" if ext in ("svg", "gif"): copyfile(img, thumb_file) else: scale_image(img, thumb_file, *gallery_conf["thumbnail_size"]) if "thumbnails" in gallery_conf["compress_images"]: optipng(thumb_file, gallery_conf["compress_images_args"]) fname_old = f"{os.path.splitext(thumb_file)[0][:-3]}{ext}" _replace_md5(thumb_file, fname_old=fname_old)
def _get_gallery_header(dir_, gallery_conf, raise_error=True): """Get gallery header from GALLERY_HEADER.[ext] or README.[ext] file. Returns `None` if user supplied an index.rst or no gallery header file found and `raise_error=False`. """ # First check if user supplies an index.rst and that index.rst is in the # copyfile regexp: if re.match(gallery_conf["copyfile_regex"], "index.rst"): fpth = os.path.join(dir_, "index.rst") if os.path.isfile(fpth): return None # Next look for GALLERY_HEADER.[ext] (and for backward-compatibility README.[ext] extensions = [".txt"] + sorted(gallery_conf["source_suffix"]) for ext in extensions: for fname in ("GALLERY_HEADER", "README", "readme"): fpth = os.path.join(dir_, fname + ext) if os.path.isfile(fpth): return fpth if raise_error: raise ExtensionError( "Example directory {} does not have a GALLERY_HEADER file with " "one of the expected file extensions {}. Please write one to " "introduce your gallery.".format(dir_, extensions) ) return None def _write_subsection_index( gallery_conf, user_index_rst, is_subsection, target_dir, index_content, toctree_filenames, ): """Write `index.rst` file for subsection if user has not provided index file. Returns path to index file written or `None` if no index file written as user provided one. """ index_path = None if gallery_conf["nested_sections"] and not user_index_rst and is_subsection: index_path = os.path.join(target_dir, "index.rst.new") head_ref = os.path.relpath(target_dir, gallery_conf["src_dir"]) with open(index_path, "w", **_W_KW) as findex: findex.write( "\n\n.. _sphx_glr_{}:\n\n".format(head_ref.replace(os.sep, "_")) ) findex.write(index_content) # Create toctree with all gallery examples and add to index file if len(toctree_filenames) > 0: subsection_index_toctree = _format_toctree(toctree_filenames) findex.write(subsection_index_toctree) return index_path def _copy_non_example_files(gallery_conf, src_dir, header_fname, target_dir): """Copy non-example files to `target_dir`.""" copyregex = gallery_conf["copyfile_regex"] if copyregex: listdir = [fname for fname in os.listdir(src_dir) if re.match(copyregex, fname)] if header_fname: # Don't copy over the gallery_header file listdir = [fname for fname in listdir if fname != Path(header_fname).name] for fname in listdir: src_file = os.path.normpath(os.path.join(src_dir, fname)) target_file = os.path.join(target_dir, fname) _replace_md5(src_file, fname_old=target_file, method="copy")
[docs] def generate_dir_rst( src_dir, target_dir, gallery_conf, seen_backrefs, is_subsection=True, ): """Generate the gallery reStructuredText for an example directory. Parameters ---------- src_dir: str, Path to root or sub gallery directory containing example files target_dir: str, Path where parsed examples (rst, python files, etc) will be outputted gallery_conf : Dict[str, Any] Gallery configurations. seen_backrefs: set, Back references encountered when parsing this gallery will be stored in this set. is_subsection: bool, Weather `src_dir` is a subsection dir. If subsection dir, we write a `index.rst` file with toctree listing every example file. Default=True. Returns ------- index_path: str or None Path to index rst file for the `src_dir`. None if user provided own index. index_content: str or None Gallery header content. `None` when user provided own index.rst. costs: List[Dict] List of dicts of costs for building each element of the gallery with keys "t", "mem", "src_file", and "target_dir". toctree_items: list, List of example file names we generated ReST for. """ index_content = "" # `_get_gallery_header` returns `None` if user supplied `index.rst` header_fname = _get_gallery_header(src_dir, gallery_conf) user_index_rst = True if header_fname: user_index_rst = False with open(header_fname, "r", encoding="utf-8") as fid: header_content = fid.read() index_content += header_content # Add empty lines to avoid bug in issue #165 index_content += "\n\n" # Make all dirs ahead of time to avoid collisions in parallel processing os.makedirs(target_dir, exist_ok=True) image_dir = os.path.join(target_dir, "images") os.makedirs(image_dir, exist_ok=True) thumb_dir = os.path.join(image_dir, "thumb") os.makedirs(thumb_dir, exist_ok=True) if gallery_conf["jupyterlite"] is not None: _add_jupyterlite_badge_logo(image_dir) # Get example filenames from `src_dir` listdir = _collect_gallery_files([src_dir], gallery_conf) # sort them sorted_listdir = sorted( listdir, key=_get_callables(gallery_conf, "within_subsection_order", src_dir)[0] ) # Add div containing all thumbnails; # this is helpful for controlling grid or flexbox behaviours index_content += THUMBNAIL_PARENT_DIV costs = [] toctree_filenames = [] build_target_dir = os.path.relpath(target_dir, gallery_conf["src_dir"]) iterator = status_iterator( sorted_listdir, f"generating gallery for {build_target_dir}... ", length=len(sorted_listdir), ) parallel = list p_fun = generate_file_rst if gallery_conf["parallel"]: from joblib import Parallel, delayed p_fun = delayed(generate_file_rst) parallel = Parallel( n_jobs=gallery_conf["parallel"], pre_dispatch="n_jobs", batch_size=1, backend="loky", ) results = parallel( p_fun(fname, target_dir, src_dir, gallery_conf) for fname in iterator ) for fi, (intro, title, (t, mem), out_vars) in enumerate(results): fname = sorted_listdir[fi] src_file = os.path.normpath(os.path.join(src_dir, fname)) gallery_conf["titles"][src_file] = title # n.b. non-executable files have none of these three variables defined, # so the last conditional must be "elif" not just "else" if "formatted_exception" in out_vars: assert "passing" not in out_vars assert "stale" not in out_vars gallery_conf["failing_examples"][src_file] = out_vars["formatted_exception"] elif "passing" in out_vars: assert "stale" not in out_vars gallery_conf["passing_examples"].append(src_file) elif "stale" in out_vars: gallery_conf["stale_examples"].append(out_vars["stale"]) costs.append(dict(t=t, mem=mem, src_file=src_file, target_dir=target_dir)) gallery_item_filename = ( (Path(build_target_dir) / fname).with_suffix("").as_posix() ) this_entry = _thumbnail_div( target_dir, gallery_conf["src_dir"], fname, intro, title ) index_content += this_entry toctree_filenames.append("/" + gallery_item_filename) # Write backreferences if "backrefs" in out_vars: _write_backreferences( out_vars["backrefs"], seen_backrefs, gallery_conf, target_dir, fname, intro, title, ) # Close thumbnail parent div index_content += THUMBNAIL_PARENT_DIV_CLOSE # Write index file if required index_path = _write_subsection_index( gallery_conf, user_index_rst, is_subsection, target_dir, index_content, toctree_filenames, ) if user_index_rst: # User has supplied index.rst, so blank out the content index_content = None # Copy over any other (non-gallery-example) files. _copy_non_example_files(gallery_conf, src_dir, header_fname, target_dir) return ( index_path, index_content, costs, toctree_filenames, )
[docs] def handle_exception(exc_info, src_file, script_vars, gallery_conf): """Trim and format exception, maybe raise error, etc.""" from .gen_gallery import _expected_failing_examples etype, exc, tb = exc_info stack = traceback.extract_tb(tb) # The full traceback will look something like: # # File "/home/larsoner/python/sphinx-gallery/sphinx_gallery/gen_rst.py... # mem_max, _ = gallery_conf['call_memory']( # File "/home/larsoner/python/sphinx-gallery/sphinx_gallery/gen_galler... # mem, out = memory_usage(func, max_usage=True, retval=True, # File "/home/larsoner/.local/lib/python3.8/site-packages/memory_profi... # returned = f(*args, **kw) # File "/home/larsoner/python/sphinx-gallery/sphinx_gallery/gen_rst.py... # exec(self.code, self.fake_main.__dict__) # File "/home/larsoner/python/sphinx-gallery/sphinx_gallery/tests/tiny... # raise RuntimeError('some error') # RuntimeError: some error # # But we should trim these to just the relevant trace at the user level, # so we inspect the traceback to find the start and stop points. start = 0 stop = len(stack) root = os.path.dirname(__file__) + os.sep for ii, s in enumerate(stack, 1): # Trim our internal stack if s.name.startswith("_sg_call_memory"): start = max(ii, start) elif s.filename.startswith(root + "gen_rst.py"): # SyntaxError if s.name == "execute_code_block" and ( "compile(" in s.line or "save_figures" in s.line ): start = max(ii, start) # Any other error elif s.name == "__call__": start = max(ii, start) # Our internal input() check elif s.name == "_check_input" and ii == len(stack): stop = ii - 1 stack = stack[start:stop] formatted_exception = "Traceback (most recent call last):\n" + "".join( traceback.format_list(stack) + traceback.format_exception_only(etype, exc) ) expected = src_file in _expected_failing_examples(gallery_conf) src_file_rel = os.path.relpath(src_file, gallery_conf["src_dir"]) if expected: func, color, kind = logger.info, blue, "expectedly" else: func, color, kind = logger.warning, red, "unexpectedly" func( # needs leading newline to get away from iterator f"\n{bold(color('%s'))} {kind} failed to execute correctly:\n\n%s", src_file_rel, color(indent(formatted_exception, " ")), ) except_rst = codestr2rst(formatted_exception, lang="pytb") # Breaks build on first example error if gallery_conf["abort_on_example_error"]: raise # Stores failing file script_vars["formatted_exception"] = formatted_exception script_vars["execute_script"] = False # Ensure it's marked as our style except_rst = ".. rst-class:: sphx-glr-script-out\n\n" + except_rst return except_rst
# Adapted from github.com/python/cpython/blob/3.7/Lib/warnings.py def _showwarning(message, category, filename, lineno, file=None, line=None): if file is None: file = sys.stderr if file is None: # sys.stderr is None when run with pythonw.exe: # warnings get lost return text = warnings.formatwarning(message, category, filename, lineno, line) try: file.write(text) except OSError: # the file (probably stderr) is invalid - this warning gets lost. pass
[docs] @contextlib.contextmanager def patch_warnings(): """Patch warnings.showwarning to actually write out the warning.""" # Sphinx or logging or someone is patching warnings, but we want to # capture them, so let's patch over their patch... orig_showwarning = warnings.showwarning try: warnings.showwarning = _showwarning yield finally: warnings.showwarning = orig_showwarning
class _exec_once: """Deal with memory_usage calling functions more than once (argh).""" def __init__(self, code, fake_main): self.code = code self.fake_main = fake_main self.run = False def __call__(self): if not self.run: self.run = True old_main = sys.modules.get("__main__", None) with patch_warnings(): sys.modules["__main__"] = self.fake_main try: exec(self.code, self.fake_main.__dict__) finally: if old_main is not None: sys.modules["__main__"] = old_main def _get_memory_base(): """Get the base amount of memory used by the current Python process.""" from memory_profiler import memory_usage memory_base = memory_usage(max_usage=True) return memory_base def _get_parser(fname, gallery_conf): """Get parser and language.""" if fname.endswith(".py"): parser = py_source_parser language = "Python" else: parser = BlockParser(fname, gallery_conf) language = parser.language return parser, language def _check_reset_logging_tee(src_file): # Helper to deal with our tests not necessarily calling execute_script # but rather execute_code_block directly if isinstance(sys.stdout, _LoggingTee): logging_tee = sys.stdout else: logging_tee = _LoggingTee(src_file) logging_tee.set_std_and_reset_position() return logging_tee def _exec_and_get_memory(compiler, *, code_ast, gallery_conf, script_vars): """Execute ast, capturing output if last line expression and get max mem usage. Parameters ---------- compiler : codeop.Compile Compiler to compile AST of code block. code_ast : ast.Module AST parsed code to execute. gallery_conf : Dict[str, Any] Gallery configurations. script_vars : Dict[str, Any] Configuration and runtime variables. Returns ------- is_last_expr : bool Whether the last expression in `code_ast` is an ast.Expr. mem_max : float Max memory used during execution. """ src_file = script_vars["src_file"] # capture output if last line is expression is_last_expr = False call_memory, _ = _get_call_memory_and_base(gallery_conf) if len(code_ast.body) and isinstance(code_ast.body[-1], ast.Expr): is_last_expr = True last_val = code_ast.body.pop().value # exec body minus last expression mem_body, _ = call_memory( _exec_once(compiler(code_ast, src_file, "exec"), script_vars["fake_main"]) ) # exec last expression, made into assignment body = [ ast.Assign(targets=[ast.Name(id="___", ctx=ast.Store())], value=last_val) ] # `type_ignores` empty list deals with: https://bugs.python.org/issue3589 last_val_ast = ast.Module(body=body, type_ignores=[]) ast.fix_missing_locations(last_val_ast) mem_last, _ = call_memory( _exec_once( compiler(last_val_ast, src_file, "exec"), script_vars["fake_main"] ) ) mem_max = max(mem_body, mem_last) else: mem_max, _ = call_memory( _exec_once(compiler(code_ast, src_file, "exec"), script_vars["fake_main"]) ) return is_last_expr, mem_max def _get_last_repr(capture_repr, ___): """Get repr of last expression, using first method in 'capture_repr' available.""" for meth in capture_repr: try: last_repr = getattr(___, meth)() # for case when last statement is print() if last_repr is None or last_repr == "None": repr_meth = None else: repr_meth = meth except Exception: last_repr = None repr_meth = None else: if isinstance(last_repr, str): break return last_repr, repr_meth def _get_code_output( is_last_expr, example_globals, gallery_conf, logging_tee, images_rst, capture_repr ): """Obtain standard output and html output in reST. Parameters ---------- is_last_expr : bool Whether the last expression in executed code is an ast.Expr. example_globals: Dict[str, Any] Global variables for examples. logging_tee : _LoggingTee Logging tee. images_rst : str rst code to embed the images in the document. gallery_conf : Dict[str, Any] Gallery configurations. file_conf : Dict[str, Any] File-specific settings given in source file comments as: ``# sphinx_gallery_<name> = <value>``. Returns ------- code_output : str reST of output of executed code block, including images and captured output. """ last_repr = None repr_meth = None if is_last_expr: # capture the last repr variable ___ = example_globals["___"] ignore_repr = False if gallery_conf["ignore_repr_types"]: ignore_repr = re.search(gallery_conf["ignore_repr_types"], str(type(___))) if capture_repr != () and not ignore_repr: last_repr, repr_meth = _get_last_repr(capture_repr, ___) captured_std = logging_tee.output.getvalue().expandtabs() # normal string output if repr_meth in ["__repr__", "__str__"] and last_repr: captured_std = f"{captured_std}\n{last_repr}" if captured_std and not captured_std.isspace(): captured_std = CODE_OUTPUT.format(indent(captured_std, " " * 4)) else: captured_std = "" # Sanitize ANSI escape characters for reST output ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") captured_std = ansi_escape.sub("", captured_std) # give html output its own header if repr_meth == "_repr_html_": captured_html = HTML_HEADER.format(indent(last_repr, " " * 4)) else: captured_html = "" code_output = f"\n{images_rst}\n\n{captured_std}\n{captured_html}\n\n" return code_output def _reset_cwd_syspath(cwd, sys_path): """Reset cwd and sys.path.""" os.chdir(cwd) sys.path = sys_path
[docs] def execute_code_block( compiler, block, example_globals, script_vars, gallery_conf, file_conf ): """Execute the code block of the example file. Parameters ---------- compiler : codeop.Compile Compiler to compile AST of code block. block : sphinx_gallery.py_source_parser.Block The code block to be executed. example_globals: Dict[str, Any] Global variables for examples. script_vars : Dict[str, Any] Configuration and runtime variables. gallery_conf : Dict[str, Any] Gallery configurations. file_conf : Dict[str, Any] File-specific settings given in source file comments as: ``# sphinx_gallery_<name> = <value>``. Returns ------- code_output : str Output of executing code in reST. """ if example_globals is None: # testing shortcut example_globals = script_vars["fake_main"].__dict__ # If example is not suitable to run, skip executing its blocks if not script_vars["execute_script"] or block.type == "text": return "" cwd = os.getcwd() # Redirect output to stdout src_file = script_vars["src_file"] logging_tee = _check_reset_logging_tee(src_file) assert isinstance(logging_tee, _LoggingTee) # First cd in the original example dir, so that any file # created by the example get created in this directory os.chdir(os.path.dirname(src_file)) sys_path = copy.deepcopy(sys.path) sys.path.append(os.getcwd()) # Save figures unless there is a `sphinx_gallery_defer_figures` flag defer_figs_match = re.search( r"^[\ \t]*#\s*sphinx_gallery_defer_figures[\ \t]*\n?", block.content, re.MULTILINE, ) need_save_figures = defer_figs_match is None block_conf = py_source_parser.extract_file_config(block.content) # Add `sphinx_gallery_multi_image_block` setting to block variables # (extract config rather than just regex search since the option's value is needed) script_vars["multi_image"] = block_conf.get("multi_image_block") # Determine the block's `capture_repr` value by prioritising the block-level # `sphinx_gallery_capture_repr_block` setting, then the file-level # `sphinx_gallery_capture_repr` setting, and finally the # global `capture_repr` gallery setting. capture_repr = block_conf.get( "capture_repr_block", file_conf.get("capture_repr", gallery_conf["capture_repr"]), ) # Add file_conf to script_vars to be read by image scrapers script_vars["file_conf"] = file_conf try: # The "compile" step itself can fail on a SyntaxError, so just prepend # newlines to get the correct failing line to show up in the traceback code_ast = compile( "\n" * (block.lineno - 1) + block.content, src_file, "exec", flags=ast.PyCF_ONLY_AST | compiler.flags, dont_inherit=1, ) is_last_expr, mem_max = _exec_and_get_memory( compiler, code_ast=code_ast, gallery_conf=gallery_conf, script_vars=script_vars, ) script_vars["memory_delta"].append(mem_max) # This should be inside the try block, e.g., in case of a savefig error logging_tee.restore_std() if need_save_figures: need_save_figures = False images_rst = save_figures(block, script_vars, gallery_conf) else: images_rst = "" except Exception: logging_tee.restore_std() except_rst = handle_exception( sys.exc_info(), src_file, script_vars, gallery_conf ) code_output = f"\n{except_rst}\n\n\n\n" # still call this even though we won't use the images so that # figures are closed if need_save_figures: save_figures(block, script_vars, gallery_conf) else: _reset_cwd_syspath(cwd, sys_path) code_output = _get_code_output( is_last_expr, example_globals, gallery_conf, logging_tee, images_rst, capture_repr, ) finally: _reset_cwd_syspath(cwd, sys_path) logging_tee.restore_std() return code_output
[docs] def executable_script(src_file, gallery_conf): """Validate if script has to be run according to gallery configuration. Parameters ---------- src_file : str path to python script gallery_conf : dict Contains the configuration of Sphinx-Gallery Returns ------- bool True if script has to be executed """ filename_pattern = gallery_conf["filename_pattern"] execute = re.search(filename_pattern, src_file) and gallery_conf["plot_gallery"] return execute
def _check_input(prompt=None): raise ExtensionError( "Cannot use input() builtin function in Sphinx-Gallery examples" )
[docs] def execute_script(script_blocks, script_vars, gallery_conf, file_conf): """Execute and capture output from python script already in block structure. Parameters ---------- script_blocks : list (label, content, line_number) List where each element is a tuple with the label ('text' or 'code'), the corresponding content string of block and the leading line number script_vars : dict Configuration and run time variables gallery_conf : dict Contains the configuration of Sphinx-Gallery file_conf : dict File-specific settings given in source file comments as: ``# sphinx_gallery_<name> = <value>`` Returns ------- output_blocks : list List of strings where each element is the restructured text representation of the output of each block time_elapsed : float Time elapsed during execution """ # Examples may contain if __name__ == '__main__' guards # for in example scikit-learn if the example uses multiprocessing. # Here we create a new __main__ module, and temporarily change # sys.modules when running our example call_memory, _ = _get_call_memory_and_base(gallery_conf) fake_main = importlib.util.module_from_spec( importlib.util.spec_from_loader("__main__", None) ) example_globals = fake_main.__dict__ example_globals.update( { # A lot of examples contains 'print(__doc__)' for example in # scikit-learn so that running the example prints some useful # information. Because the docstring has been separated from # the code blocks in sphinx-gallery, __doc__ is actually # __builtin__.__doc__ in the execution context and we do not # want to print it "__doc__": "", # Don't ever support __file__: Issues #166 #212 # Don't let them use input() "input": _check_input, } ) script_vars["example_globals"] = example_globals argv_orig = sys.argv[:] if script_vars["execute_script"]: # We want to run the example without arguments. See # https://github.com/sphinx-gallery/sphinx-gallery/pull/252 # for more details. sys.argv[0] = script_vars["src_file"] (reset_argv,) = _get_callables(gallery_conf, "reset_argv") sys.argv[1:] = reset_argv(gallery_conf, script_vars) gc.collect() memory_start, _ = call_memory(lambda: None) else: memory_start = 0.0 t_start = time() compiler = codeop.Compile() # include at least one entry to avoid max() ever failing script_vars["memory_delta"] = [memory_start] script_vars["fake_main"] = fake_main output_blocks = list() with _LoggingTee(script_vars.get("src_file", "")) as logging_tee: for block in script_blocks: logging_tee.set_std_and_reset_position() output_blocks.append( execute_code_block( compiler, block, example_globals, script_vars, gallery_conf, file_conf, ) ) time_elapsed = time() - t_start sys.argv = argv_orig script_vars["memory_delta"] = max(script_vars["memory_delta"]) if script_vars["execute_script"]: script_vars["memory_delta"] -= memory_start # Write md5 checksum if the example was meant to run (no-plot # shall not cache md5sum) and has built correctly with open(script_vars["target_file"] + ".md5", "w") as file_cs: file_cs.write(get_md5sum(script_vars["target_file"], mode="t")) script_vars["passing"] = True return output_blocks, time_elapsed
def _make_dummy_images(executable, file_conf, script_vars): """Make dummy images when not executing the example.""" if not executable: dummy_image = file_conf.get("dummy_images", None) if dummy_image is not None: if isinstance(dummy_image, bool) or not isinstance(dummy_image, int): raise ExtensionError( "sphinx_gallery_dummy_images setting is not an integer, " "got {dummy_image!r}" ) image_path_iterator = script_vars["image_path_iterator"] stock_img = os.path.join(glr_path_static(), "no_image.png") for _, path in zip(range(dummy_image), image_path_iterator): if not os.path.isfile(path): copyfile(stock_img, path) def _clean_script_blocks(gallery_conf, parser, script_blocks, output_blocks): """Remove ignore blocks, config comments and final empty blocks.""" # Ignore blocks must be processed before the # remaining config comments are removed. script_blocks = [ py_source_parser.Block(label, parser.remove_ignore_blocks(content), line_number) for label, content, line_number in script_blocks ] if gallery_conf["remove_config_comments"]: script_blocks = [ py_source_parser.Block( label, parser.remove_config_comments(content), line_number ) for label, content, line_number in script_blocks ] # Remove final empty block, (can occur after config comments removed) if script_blocks[-1].content.isspace(): script_blocks = script_blocks[:-1] output_blocks = output_blocks[:-1] return script_blocks, output_blocks def _get_backreferences(gallery_conf, script_vars, script_blocks, node, target_file): """Get example backreferences for `script_blocks` and write _codeobj.""" if gallery_conf["inspect_global_variables"]: global_variables = script_vars["example_globals"] else: global_variables = None ref_regex = _make_ref_regex(gallery_conf["default_role"]) example_code_obj = identify_names(script_blocks, ref_regex, global_variables, node) if example_code_obj: _write_code_obj(target_file, example_code_obj) exclude_regex = gallery_conf["exclude_implicit_doc_regex"] def _normalize_name(cobj): full_name = "{module}.{name}".format(**cobj) for pattern in gallery_conf["prefer_full_module"]: if re.search(pattern, full_name): return full_name return "{module_short}.{name}".format(**cobj) backrefs = { _normalize_name(cobj) for cobjs in example_code_obj.values() for cobj in cobjs if cobj["module"].startswith(gallery_conf["doc_module"]) and ( cobj["is_explicit"] or (not exclude_regex) or (not exclude_regex.search("{module}.{name}".format(**cobj))) ) } # This can help with garbage collection in some instances if global_variables is not None and "___" in global_variables: del global_variables["___"] del global_variables return backrefs
[docs] def generate_file_rst(fname, target_dir, src_dir, gallery_conf): """Generate the rst file for a given example. Parameters ---------- fname : str Filename of python script. target_dir : str Absolute path to directory in documentation where examples are saved. src_dir : str Absolute path to directory where source examples are stored. gallery_conf : dict Contains the configuration of Sphinx-Gallery. Returns ------- intro: str The introduction of the example. title : str The example title. cost : tuple A tuple containing the ``(time_elapsed, memory_used)`` required to run the script. out_vars : dict Variables used to run the script, possibly with entries: "stale" True if the example was stale. "backrefs" The backreferences. "passing" True if the example passed. "formatted_exception" Formatted string of the exception. """ src_file = os.path.normpath(os.path.join(src_dir, fname)) out_vars = dict() target_file = Path(target_dir) / fname _replace_md5(src_file, target_file, method="copy", mode="t") parser, language = _get_parser(fname, gallery_conf) file_conf, script_blocks, node = parser.split_code_and_text_blocks( src_file, return_node=True ) intro, title = extract_intro_and_title(fname, script_blocks[0].content) executable = executable_script(src_file, gallery_conf) if md5sum_is_current(target_file, mode="t"): do_return = True logger.debug(f"md5sum is current: {target_file}") if executable: if gallery_conf["run_stale_examples"]: do_return = False else: out_vars["stale"] = str(target_file) if do_return: return intro, title, (0, 0), out_vars image_dir = os.path.join(target_dir, "images") os.makedirs(image_dir, exist_ok=True) base_image_name = os.path.splitext(fname)[0] image_fname = "sphx_glr_" + base_image_name + "_{0:03}.png" image_path_template = os.path.join(image_dir, image_fname) script_vars = { "execute_script": executable, "image_path_iterator": ImagePathIterator(image_path_template), "src_file": src_file, "target_file": str(target_file), } if executable and gallery_conf["reset_modules_order"] in ["before", "both"]: clean_modules(gallery_conf, fname, "before") output_blocks, time_elapsed = execute_script( script_blocks, script_vars, gallery_conf, file_conf ) logger.debug("%s ran in : %.2g seconds", src_file, time_elapsed) # Create dummy images _make_dummy_images(executable, file_conf, script_vars) script_blocks, output_blocks = _clean_script_blocks( gallery_conf, parser, script_blocks, output_blocks, ) example_rst = rst_blocks( script_blocks, output_blocks, file_conf, gallery_conf, language=language, ) _, memory_base = _get_call_memory_and_base(gallery_conf) memory_used = memory_base + script_vars["memory_delta"] if not executable: time_elapsed = memory_used = 0.0 # don't let the output change save_rst_example( example_rst, target_file, time_elapsed, memory_used, gallery_conf, language=language, ) save_thumbnail(image_path_template, src_file, script_vars, file_conf, gallery_conf) files_to_zip = [target_file] if target_file.suffix in gallery_conf["notebook_extensions"]: example_nb = jupyter_notebook(script_blocks, gallery_conf, target_dir) ipy_fname = target_file.with_suffix(".ipynb.new") save_notebook(example_nb, ipy_fname) _replace_md5(ipy_fname, mode="t") files_to_zip += [target_file.with_suffix(".ipynb")] # Produce the zip file of all sources zip_files(files_to_zip, target_file.with_suffix(".zip"), target_dir) # Get names out_vars["backrefs"] = _get_backreferences( gallery_conf, script_vars, script_blocks, node, target_file, ) for key in ("passing", "formatted_exception"): if key in script_vars: out_vars[key] = script_vars[key] # don't keep this during reset del script_vars if executable and gallery_conf["reset_modules_order"] in ["after", "both"]: clean_modules(gallery_conf, fname, "after") return intro, title, (time_elapsed, memory_used), out_vars
EXAMPLE_HEADER = """ .. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "{0}" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note :ref:`Go to the end <sphx_glr_download_{1}>` to download the full example code.{2} .. rst-class:: sphx-glr-example-title .. _sphx_glr_{1}: """ RST_BLOCK_HEADER = """\ .. GENERATED FROM PYTHON SOURCE LINES {0}-{1} """
[docs] def rst_blocks( script_blocks, output_blocks, file_conf, gallery_conf, *, language="python" ): """Generate the rst string containing the script prose, code and output. Parameters ---------- script_blocks : list (label, content, line_number) List where each element is a tuple with the label ('text' or 'code'), the corresponding content string of block and the leading line number output_blocks : list List of strings where each element is the restructured text representation of the output of each block file_conf : dict File-specific settings given in source file comments as: ``# sphinx_gallery_<name> = <value>`` language : str The language to be used for syntax highlighting in code blocks. Must be a name or alias recognized by Pygments. gallery_conf : dict Contains the configuration of Sphinx-Gallery Returns ------- out : str rst notebook """ # A simple example has two blocks: one for the # example introduction/explanation and one for the code is_example_notebook_like = len(script_blocks) > 2 example_rst = "" for bi, (script_block, code_output) in enumerate(zip(script_blocks, output_blocks)): # do not add comment to the title block, otherwise the linking does # not work properly if bi > 0: example_rst += RST_BLOCK_HEADER.format( script_block.lineno, script_block.lineno + script_block.content.count("\n"), ) if script_block.type == "code": lineno = ( script_block.lineno if file_conf.get("line_numbers", gallery_conf["line_numbers"]) else None ) code_rst = ( codestr2rst(script_block.content, lang=language, lineno=lineno) + "\n" ) if is_example_notebook_like: example_rst += code_rst example_rst += code_output else: example_rst += code_output if "sphx-glr-script-out" in code_output: # Add some vertical space after output example_rst += "\n\n|\n\n" example_rst += code_rst else: block_separator = ( "\n\n" if not script_block.content.endswith("\n") else "\n" ) example_rst += script_block.content + block_separator return example_rst
[docs] def save_rst_example( example_rst, example_file, time_elapsed, memory_used, gallery_conf, *, language="python", ): """Saves the rst notebook to example_file including header & footer. Parameters ---------- example_rst : str rst containing the executed file content example_file : str Filename with full path of python example file in documentation folder language : str Name of the programming language the example is in time_elapsed : float Time elapsed in seconds while executing file memory_used : float Additional memory used during the run. gallery_conf : dict Sphinx-Gallery configuration dictionary """ example_file = Path(example_file) example_fname = str(example_file.relative_to(gallery_conf["src_dir"])) ref_fname = example_fname.replace(os.sep, "_") binder_conf = gallery_conf["binder"] is_binder_enabled = len(binder_conf) > 0 jupyterlite_conf = gallery_conf["jupyterlite"] is_jupyterlite_enabled = jupyterlite_conf is not None interactive_example_text = "" if is_binder_enabled or is_jupyterlite_enabled: interactive_example_text += " or to run this example in your browser via " if is_binder_enabled and is_jupyterlite_enabled: interactive_example_text += "JupyterLite or Binder" elif is_binder_enabled: interactive_example_text += "Binder" elif is_jupyterlite_enabled: interactive_example_text += "JupyterLite" example_rst = ( EXAMPLE_HEADER.format(example_fname, ref_fname, interactive_example_text) + example_rst ) if time_elapsed > gallery_conf["min_reported_time"]: time_m, time_s = divmod(time_elapsed, 60) example_rst += TIMING_CONTENT.format(time_m, time_s) if gallery_conf["show_memory"]: example_rst += f"**Estimated memory usage:** {memory_used: .0f} MB\n\n" example_rst += DOWNLOAD_LINKS_HEADER.format(ref_fname) save_notebook = example_file.suffix in gallery_conf["notebook_extensions"] # Generate a binder URL if specified if is_binder_enabled and save_notebook: binder_badge_rst = gen_binder_rst(example_file, binder_conf, gallery_conf) binder_badge_rst = indent(binder_badge_rst, " ") # need an extra two example_rst += binder_badge_rst if is_jupyterlite_enabled and save_notebook: jupyterlite_rst = gen_jupyterlite_rst(example_file, gallery_conf) jupyterlite_rst = indent(jupyterlite_rst, " ") # need an extra two example_rst += jupyterlite_rst if save_notebook: ipynb_download_file = example_file.with_suffix(".ipynb").name example_rst += NOTEBOOK_DOWNLOAD.format(ipynb_download_file) example_rst += CODE_DOWNLOAD.format(example_file.name, language) example_rst += ZIP_DOWNLOAD.format(example_file.with_suffix(".zip").name) if gallery_conf["recommender"]["enable"]: # extract the filename without the extension recommend_fname = Path(example_fname).stem example_rst += RECOMMENDATIONS_INCLUDE.format(recommend_fname) if gallery_conf["show_signature"]: example_rst += SPHX_GLR_SIG write_file_new = example_file.with_suffix(".rst.new") with open(write_file_new, "w", **_W_KW) as f: f.write(example_rst) # make it read-only so that people don't try to edit it mode = os.stat(write_file_new).st_mode ro_mask = 0x777 ^ (stat.S_IWRITE | stat.S_IWGRP | stat.S_IWOTH) os.chmod(write_file_new, mode & ro_mask) # in case it wasn't in our pattern, only replace the file if it's # still stale. _replace_md5(write_file_new, mode="t")
def _get_callables(gallery_conf, key, src_dir=None): """Get callables for the given conf key, returning tuple of callable(s). If value is a string, import, with the following exceptions: * `within_subsection_order` - add full path if value is a built-in aliases, instantiate if value is a class * `image_scrapers` - get value from `_scraper_dict` or import module and get `_get_sg_image_scraper` attribute * `reset_modules` - get value from `_reset_dict` """ builtin_aliases = ( "ExampleTitleSortKey", "FileNameSortKey", "FileSizeSortKey", "NumberOfCodeLinesSortKey", ) singletons = ( "reset_argv", "minigallery_sort_order", "subsection_order", "within_subsection_order", ) # the following should be the case (internal use only): assert key in ("image_scrapers", "reset_modules", "jupyterlite") + singletons, key # Get correct config value and ensure it's a list which = gallery_conf[key] if key == "jupyterlite": which = [which["notebook_modification_function"]] elif key in singletons: which = [which] if not isinstance(which, (tuple, list)): which = [which] which = list(which) for wi, what in enumerate(which): # Get readable string, for error message if key == "jupyterlite": readable = f"{key}['notebook_modification_function']" elif key in singletons: readable = f"{key}={repr(what)}" else: readable = f"{key}[{wi}]={repr(what)}" # Import string FQNs or get value from dict if isinstance(what, str): # use fully qualified name to resolve builtin callable classes # (otherwise not serializable) if what in builtin_aliases: what = f"sphinx_gallery.sorting.{what}" if "." in what: mod, attr = what.rsplit(".", 1) try: what = getattr(importlib.import_module(mod), attr) except Exception: raise ConfigError( f"Unknown string option {readable} " f"when importing {attr} from {mod}" ) elif key == "image_scrapers": if what in _scraper_dict: what = _scraper_dict[what] else: try: what = importlib.import_module(what) what = getattr(what, "_get_sg_image_scraper") what = what() except Exception: raise ConfigError(f"Unknown string option for {readable}") elif key == "reset_modules": if what not in _reset_dict: raise ConfigError(f"Unknown string option for {readable}: {what}") what = _reset_dict[what] which[wi] = what # make sure classes get instantiated (so they become callable) if key == "within_subsection_order" and inspect.isclass(what): what = what(src_dir) which[wi] = what if inspect.isclass(what): raise ConfigError( f"Got class rather than callable instance for {readable}: {what}" ) if not callable(what): raise ConfigError(f"{readable} must be callable") return tuple(which) # Default no-op version def _sg_call_memory_noop(func): return 0.0, func() def _get_call_memory_and_base(gallery_conf, *, update=False): show_memory = gallery_conf["show_memory"] # Default to no-op version call_memory = _sg_call_memory_noop memory_base = 0.0 if show_memory and gallery_conf["plot_gallery"]: if callable(show_memory): call_memory = show_memory elif gallery_conf["parallel"]: if update: logger.warning( f"{gallery_conf['show_memory']=} disabled due to " f"{gallery_conf['parallel']=}." ) gallery_conf["show_memory"] = False else: out = _get_memprof_call_memory() if out is not None: call_memory, memory_base = out elif update: gallery_conf["show_memory"] = False assert callable(call_memory) return call_memory, memory_base def _sg_call_memory_memprof(func): from memory_profiler import memory_usage # noqa mem, out = memory_usage(func, max_usage=True, retval=True, multiprocess=True) try: mem = mem[0] # old MP always returned a list except TypeError: # 'float' object is not subscriptable pass return mem, out @lru_cache() def _get_memprof_call_memory(): try: from memory_profiler import memory_usage # noqa except ImportError: logger.warning( "Please install 'memory_profiler' to enable peak memory measurements." ) return None else: return _sg_call_memory_memprof, _get_memory_base()