From b014dbb9927bc1f7c67028eb85354418b043bcfb Mon Sep 17 00:00:00 2001 From: Edoardo Baldi Date: Mon, 12 May 2025 22:46:26 +0200 Subject: [PATCH 01/11] Some quality-of-life enhancement to toc.py script Add proper error handling, logging, command-line options, and improved documentation. Clean anchor generation, add version, and enhance CLI help text with better descriptions and examples. --- tutorial/toc.py | 182 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 159 insertions(+), 23 deletions(-) diff --git a/tutorial/toc.py b/tutorial/toc.py index c1a382ed..444a36dd 100755 --- a/tutorial/toc.py +++ b/tutorial/toc.py @@ -1,39 +1,86 @@ #!/usr/bin/env python +# ruff: noqa G004 """CLI script to build a table of contents for an IPython notebook""" import argparse as ap +import logging import pathlib import re +import sys from collections import namedtuple import nbformat from nbformat import NotebookNode +__version__ = "0.1.1" + +# Set up logging +logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") +logger = logging.getLogger("toc") + TocEntry = namedtuple("TocEntry", ["level", "text", "anchor"]) def extract_markdown_cells(notebook: NotebookNode) -> str: - """Extract the markdown cells from a notebook""" + """Extract the markdown cells from a notebook + + Args: + notebook: A notebook object + + Returns: + str: Concatenated content of all markdown cells + """ return "\n".join( [cell.source for cell in notebook.cells if cell.cell_type == "markdown"] ) def extract_toc(notebook: str) -> list[TocEntry]: - """Extract the table of contents from a markdown string""" + """Extract the table of contents from a markdown string + + Parses markdown headings (lines starting with #) and converts them to TOC entries. + Each entry includes the heading level, text, and an anchor derived from the text. + + Args: + notebook: String containing markdown content + + Returns: + list[TocEntry]: List of table of contents entries + """ toc = [] line_re = re.compile(r"(#+)\s+(.+)") + line_num = 0 + for line in notebook.splitlines(): + line_num += 1 if groups := re.match(line_re, line): - heading, text, *_ = groups.groups() - level = len(heading) - anchor = "-".join(text.replace("`", "").split()) - toc.append(TocEntry(level, text, anchor)) + try: + heading, text, *_ = groups.groups() + level = len(heading) + + # Clean the text to make a proper anchor + clean_text = text.replace("`", "") + # Remove any other special characters that might break anchors + clean_text = re.sub(r"[^\w\s-]", "", clean_text) + anchor = "-".join(clean_text.lower().split()) + + toc.append(TocEntry(level, text, anchor)) + logger.debug(f"Found heading (level {level}): {text}") + except Exception as e: + logger.warning(f"Error processing heading at line {line_num}: {e}") + return toc def markdown_toc(toc: list[TocEntry]) -> str: - """Build a string representation of the toc as a nested markdown list""" + """Build a string representation of the toc as a nested markdown list + + Args: + toc: List of TocEntry objects + + Returns: + str: Markdown-formatted table of contents with proper indentation + """ lines = [] for entry in toc: line = f"{' ' * entry.level}- [{entry.text}](#{entry.anchor})" @@ -41,33 +88,69 @@ def markdown_toc(toc: list[TocEntry]) -> str: return "\n".join(lines) -def build_toc(nb_path: pathlib.Path, placeholder: str = "[TOC]") -> NotebookNode: - """Build a table of contents for a notebook and insert it at the location of a placeholder""" +def build_toc( + nb_path: pathlib.Path, + placeholder: str = "[TOC]", + toc_header: str = "# Table of Contents", +) -> tuple[NotebookNode, bool]: + """Build a table of contents for a notebook and insert it at the location of a placeholder + + Args: + nb_path: Path to the notebook file + placeholder: The text to replace with the generated TOC (default: "[TOC]") + toc_header: The header text to use for the TOC (default: "# Table of Contents") + + Returns: + tuple[NotebookNode, bool]: The notebook with TOC inserted and a boolean indicating if placeholder was found + """ # Read the notebook - nb_obj: NotebookNode = nbformat.read(nb_path, nbformat.NO_CONVERT) + try: + nb_obj: NotebookNode = nbformat.read(nb_path, nbformat.NO_CONVERT) + except Exception as e: + logger.error(f"Failed to read notebook '{nb_path}': {e}") + raise + md_cells = extract_markdown_cells(nb_obj) # Build tree toc_tree = extract_toc(md_cells) + if not toc_tree: + logger.warning(f"No headings found in notebook '{nb_path}'") + # Build toc representation toc_repr = markdown_toc(toc_tree) - # Insert it a the location of a placeholder - toc_header = "# Table of Contents" + # Insert it at the location of a placeholder + toc_replaced = False for cell in nb_obj.cells: if cell.source.startswith((placeholder, toc_header)): cell.source = f"{toc_header}\n{toc_repr}" cell.cell_type = "markdown" + toc_replaced = True + break + + if not toc_replaced: + logger.warning( + f"Placeholder '{placeholder}' or heading '{toc_header}' not found in notebook" + ) - return nb_obj + return nb_obj, toc_replaced def main(): """CLI entry point""" parser = ap.ArgumentParser( - description="Build a table of contents for an IPython notebook" + description="Build a table of contents for an IPython notebook", + epilog=""" + This script extracts headings from markdown cells in a Jupyter notebook and + generates a markdown-formatted table of contents. The TOC is inserted into + the notebook at the location of a placeholder (default: '[TOC]') or where + a '# Table of Contents' heading exists. Links in the TOC point to notebook + anchors created from the heading text. + """, + formatter_class=ap.RawDescriptionHelpFormatter, ) parser.add_argument("notebook", type=str, help="Path to the notebook to process") parser.add_argument( @@ -80,22 +163,75 @@ def main(): default=False, help="Force overwrite of original notebook", ) + parser.add_argument( + "--placeholder", + "-p", + type=str, + default="[TOC]", + help="Placeholder text to replace with the TOC (default: '[TOC]')", + ) + parser.add_argument( + "--header", + type=str, + default="# Table of Contents", + help="Header text for the TOC (default: '# Table of Contents')", + ) + parser.add_argument( + "--verbose", "-v", action="store_true", help="Enable verbose output" + ) + parser.add_argument( + "--version", action="version", version=f"%(prog)s {__version__}" + ) args = parser.parse_args() - if not (input_nb := pathlib.Path(args.notebook)).exists(): - raise FileNotFoundError(input_nb) - + # Set logging level based on verbosity + if args.verbose: + logger.setLevel(logging.DEBUG) + + # Validate input file + try: + input_nb = pathlib.Path(args.notebook) + if not input_nb.exists(): + logger.error(f"Input file not found: {input_nb}") + sys.exit(1) + if not input_nb.is_file(): + logger.error(f"Input path is not a file: {input_nb}") + sys.exit(1) + except Exception as e: + logger.error(f"Error processing input path: {e}") + sys.exit(1) + + # Set output file path if args.output is None: output_nb = input_nb.with_suffix(".toc.ipynb") else: output_nb = pathlib.Path(args.output) - with output_nb.open("w", encoding="utf-8") as file: - nbformat.write(build_toc(input_nb), file) - - if args.force: - input_nb.unlink() - output_nb.rename(input_nb) + # Create output directory if it doesn't exist + output_nb.parent.mkdir(parents=True, exist_ok=True) + + try: + # Generate TOC and write to output file + logger.info(f"Processing notebook: {input_nb}") + toc_notebook, toc_replaced = build_toc(input_nb, args.placeholder, args.header) + + if not toc_replaced: + logger.warning("Skipping output - no placeholder found in notebook") + sys.exit(0) # Exit with success code since it's not an error + + with output_nb.open("w", encoding="utf-8") as file: + nbformat.write(toc_notebook, file) + logger.info(f"TOC written to: {output_nb}") + + # Handle force option + if args.force: + logger.info(f"Replacing original notebook with TOC version") + input_nb.unlink() + output_nb.rename(input_nb) + logger.info(f"Original notebook replaced with: {input_nb}") + except Exception as e: + logger.error(f"Error processing notebook: {e}") + sys.exit(1) if __name__ == "__main__": From ab79959717bdfb007f6114b4c1f8fd97832fa5f6 Mon Sep 17 00:00:00 2001 From: Edoardo Baldi Date: Tue, 13 May 2025 07:31:23 +0200 Subject: [PATCH 02/11] [skip ci] Improve logging Replace f-strings with %-style formatting in logger calls for better performance. Use logger.exception() for better error reporting with tracebacks. --- tutorial/toc.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/tutorial/toc.py b/tutorial/toc.py index 444a36dd..c020c567 100755 --- a/tutorial/toc.py +++ b/tutorial/toc.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# ruff: noqa G004 """CLI script to build a table of contents for an IPython notebook""" import argparse as ap @@ -12,7 +11,7 @@ import nbformat from nbformat import NotebookNode -__version__ = "0.1.1" +__version__ = "0.1.2" # Set up logging logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") @@ -65,9 +64,9 @@ def extract_toc(notebook: str) -> list[TocEntry]: anchor = "-".join(clean_text.lower().split()) toc.append(TocEntry(level, text, anchor)) - logger.debug(f"Found heading (level {level}): {text}") + logger.debug("Found heading (level %d): %s", level, text) except Exception as e: - logger.warning(f"Error processing heading at line {line_num}: {e}") + logger.warning("Error processing heading at line %d: %s", line_num, e) return toc @@ -106,8 +105,8 @@ def build_toc( # Read the notebook try: nb_obj: NotebookNode = nbformat.read(nb_path, nbformat.NO_CONVERT) - except Exception as e: - logger.error(f"Failed to read notebook '{nb_path}': {e}") + except Exception: + logger.exception("Failed to read notebook '%s'", nb_path) raise md_cells = extract_markdown_cells(nb_obj) @@ -116,7 +115,7 @@ def build_toc( toc_tree = extract_toc(md_cells) if not toc_tree: - logger.warning(f"No headings found in notebook '{nb_path}'") + logger.warning("No headings found in notebook '%s'", nb_path) # Build toc representation toc_repr = markdown_toc(toc_tree) @@ -133,7 +132,9 @@ def build_toc( if not toc_replaced: logger.warning( - f"Placeholder '{placeholder}' or heading '{toc_header}' not found in notebook" + "Placeholder '%s' or heading '%s' not found in notebook", + placeholder, + toc_header, ) return nb_obj, toc_replaced @@ -192,13 +193,13 @@ def main(): try: input_nb = pathlib.Path(args.notebook) if not input_nb.exists(): - logger.error(f"Input file not found: {input_nb}") + logger.error("Input file not found: %s", input_nb) sys.exit(1) if not input_nb.is_file(): - logger.error(f"Input path is not a file: {input_nb}") + logger.error("Input path is not a file: %s", input_nb) sys.exit(1) - except Exception as e: - logger.error(f"Error processing input path: {e}") + except Exception: + logger.exception("Error processing input path") sys.exit(1) # Set output file path @@ -212,7 +213,7 @@ def main(): try: # Generate TOC and write to output file - logger.info(f"Processing notebook: {input_nb}") + logger.info("Processing notebook: %s", input_nb) toc_notebook, toc_replaced = build_toc(input_nb, args.placeholder, args.header) if not toc_replaced: @@ -221,16 +222,16 @@ def main(): with output_nb.open("w", encoding="utf-8") as file: nbformat.write(toc_notebook, file) - logger.info(f"TOC written to: {output_nb}") + logger.info("TOC written to: %s", output_nb) # Handle force option if args.force: - logger.info(f"Replacing original notebook with TOC version") + logger.info("Replacing original notebook with TOC version") input_nb.unlink() output_nb.rename(input_nb) - logger.info(f"Original notebook replaced with: {input_nb}") - except Exception as e: - logger.error(f"Error processing notebook: {e}") + logger.info("Original notebook replaced with: %s", input_nb) + except Exception: + logger.exception("Error processing notebook") sys.exit(1) From a6a2c55f1384ef9e9aba88359b93d7dff670f9eb Mon Sep 17 00:00:00 2001 From: Edoardo Baldi Date: Tue, 13 May 2025 21:43:02 +0200 Subject: [PATCH 03/11] Fix TOC generation for headings inside code blocks Upgrade TocEntry from namedtuple to NamedTuple with type hints. Improve logic for detecting and ignoring markdown headers inside code blocks to prevent incorrect TOC entries. Also refactor the file output handling for better organization when using --force. --- tutorial/toc.py | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/tutorial/toc.py b/tutorial/toc.py index c020c567..ec5c682a 100755 --- a/tutorial/toc.py +++ b/tutorial/toc.py @@ -6,7 +6,7 @@ import pathlib import re import sys -from collections import namedtuple +from typing import NamedTuple import nbformat from nbformat import NotebookNode @@ -17,7 +17,13 @@ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") logger = logging.getLogger("toc") -TocEntry = namedtuple("TocEntry", ["level", "text", "anchor"]) + +class TocEntry(NamedTuple): + """Table of contents entry""" + + level: int + text: str + anchor: str def extract_markdown_cells(notebook: NotebookNode) -> str: @@ -39,6 +45,7 @@ def extract_toc(notebook: str) -> list[TocEntry]: Parses markdown headings (lines starting with #) and converts them to TOC entries. Each entry includes the heading level, text, and an anchor derived from the text. + Ignores '#' symbols inside code blocks. Args: notebook: String containing markdown content @@ -49,9 +56,21 @@ def extract_toc(notebook: str) -> list[TocEntry]: toc = [] line_re = re.compile(r"(#+)\s+(.+)") line_num = 0 + is_code_block = False for line in notebook.splitlines(): line_num += 1 + + # Check if we're entering or exiting a code block + if line.strip().startswith("```"): + is_code_block = not is_code_block + continue + + # Skip header processing if we're in a code block + if is_code_block: + continue + + # Process headers only when not in a code block if groups := re.match(line_re, line): try: heading, text, *_ = groups.groups() @@ -220,16 +239,21 @@ def main(): logger.warning("Skipping output - no placeholder found in notebook") sys.exit(0) # Exit with success code since it's not an error - with output_nb.open("w", encoding="utf-8") as file: - nbformat.write(toc_notebook, file) - logger.info("TOC written to: %s", output_nb) + if not args.force: + logger.debug("Ignoring output file: %s", output_nb) - # Handle force option - if args.force: + with output_nb.open("w", encoding="utf-8") as file: + nbformat.write(toc_notebook, file) + + logger.info("TOC written to: %s", output_nb) + else: logger.info("Replacing original notebook with TOC version") - input_nb.unlink() - output_nb.rename(input_nb) + + with input_nb.open("w", encoding="utf-8") as file: + nbformat.write(toc_notebook, file) + logger.info("Original notebook replaced with: %s", input_nb) + except Exception: logger.exception("Error processing notebook") sys.exit(1) From 2024e0688067c2b428792008fa955e431cbfe5df Mon Sep 17 00:00:00 2001 From: Edoardo Baldi Date: Tue, 13 May 2025 21:49:37 +0200 Subject: [PATCH 04/11] Skip TOC header when generating table of contents --- tutorial/toc.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tutorial/toc.py b/tutorial/toc.py index ec5c682a..f95831e9 100755 --- a/tutorial/toc.py +++ b/tutorial/toc.py @@ -40,7 +40,7 @@ def extract_markdown_cells(notebook: NotebookNode) -> str: ) -def extract_toc(notebook: str) -> list[TocEntry]: +def extract_toc(notebook: str, toc_header: str) -> list[TocEntry]: """Extract the table of contents from a markdown string Parses markdown headings (lines starting with #) and converts them to TOC entries. @@ -49,17 +49,19 @@ def extract_toc(notebook: str) -> list[TocEntry]: Args: notebook: String containing markdown content + toc_header: Header text for the table of contents Returns: list[TocEntry]: List of table of contents entries """ toc = [] line_re = re.compile(r"(#+)\s+(.+)") - line_num = 0 is_code_block = False - for line in notebook.splitlines(): - line_num += 1 + for line_num, line in enumerate(notebook.splitlines(), start=1): + # Skip line if contains exactly the toc header + if line.strip() == toc_header: + continue # Check if we're entering or exiting a code block if line.strip().startswith("```"): @@ -70,7 +72,7 @@ def extract_toc(notebook: str) -> list[TocEntry]: if is_code_block: continue - # Process headers only when not in a code block + # Process headers if groups := re.match(line_re, line): try: heading, text, *_ = groups.groups() @@ -84,6 +86,7 @@ def extract_toc(notebook: str) -> list[TocEntry]: toc.append(TocEntry(level, text, anchor)) logger.debug("Found heading (level %d): %s", level, text) + except Exception as e: logger.warning("Error processing heading at line %d: %s", line_num, e) @@ -131,7 +134,7 @@ def build_toc( md_cells = extract_markdown_cells(nb_obj) # Build tree - toc_tree = extract_toc(md_cells) + toc_tree = extract_toc(md_cells, toc_header) if not toc_tree: logger.warning("No headings found in notebook '%s'", nb_path) From 0a0c145b5920c5f45fb1bb2d27627fb77d962a57 Mon Sep 17 00:00:00 2001 From: Edoardo Baldi Date: Fri, 27 Mar 2026 11:48:01 +0100 Subject: [PATCH 05/11] Rewrite toc.py as a uv inline script - Add PEP 723 inline script metadata so the script runs standalone via `uv run toc.py` without any manual dependency installation - Replace argparse with Typer: grouped help panels (Output / TOC Options / Misc), path validation delegated to Typer/Click, eager --version callback - Replace logging with Rich: console for stdout, stderr console for warnings, print_exception() for errors - Add APP_HELP embedded documentation covering how the tool works, the placeholder cell requirement, all output modes, and usage examples - build_toc() now returns a 3-tuple (notebook, toc_replaced, has_headings) so all I/O stays in main(); remove try/except from extract_toc() - Add toc_fixture.ipynb test fixture covering: fenced code blocks with # comments, multiple consecutive code blocks, language-spec fences, anchor generation edge cases, non-heading # patterns, and deep nesting - Bump version to 0.2.0 --- tutorial/tests/toc_fixture.ipynb | 172 +++++++++++++++ tutorial/toc.py | 365 ++++++++++++++++--------------- 2 files changed, 365 insertions(+), 172 deletions(-) create mode 100644 tutorial/tests/toc_fixture.ipynb diff --git a/tutorial/tests/toc_fixture.ipynb b/tutorial/tests/toc_fixture.ipynb new file mode 100644 index 00000000..0a806f77 --- /dev/null +++ b/tutorial/tests/toc_fixture.ipynb @@ -0,0 +1,172 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "[TOC]" + ] + }, + { + "cell_type": "markdown", + "id": "1", + "metadata": {}, + "source": [ + "# Introduction\n", + "\n", + "This notebook is a fixture for testing `toc.py`. It exercises heading detection,\n", + "code block skipping, anchor generation, and edge cases.\n", + "\n", + "## Why this matters\n", + "\n", + "The TOC generator must correctly distinguish real Markdown headings from `#` characters\n", + "that appear inside fenced code blocks." + ] + }, + { + "cell_type": "markdown", + "id": "2", + "metadata": {}, + "source": [ + "## Code Blocks with `#` Comments\n", + "\n", + "The lines below are inside a fenced Python block. They must **not** appear in the TOC.\n", + "\n", + "```python\n", + "# This looks like a heading but is a comment\n", + "## So does this\n", + "### And this\n", + "x = 1 # inline comment\n", + "print(x)\n", + "```\n", + "\n", + "This paragraph is after the closing fence — headings here are real again.\n", + "\n", + "### Real Heading After a Code Block\n", + "\n", + "This heading immediately follows a closing fence and must be captured." + ] + }, + { + "cell_type": "markdown", + "id": "3", + "metadata": {}, + "source": [ + "## Fences With Language Specifiers\n", + "\n", + "Verify that fences with a language tag (` ```bash `, ` ```python `, ` ```text `) still\n", + "toggle the code-block state correctly.\n", + "\n", + "```bash\n", + "# bash comment — not a heading\n", + "echo \"hello world\"\n", + "```\n", + "\n", + "```python\n", + "# another comment — not a heading\n", + "print(\"world\")\n", + "```\n", + "\n", + "### Heading Between Two Code Blocks\n", + "\n", + "This heading sits between two code blocks and must be captured.\n", + "\n", + "```text\n", + "# text block comment — not a heading\n", + "```\n", + "\n", + "### Heading After the Third Block\n", + "\n", + "Three code blocks closed — heading detection must still be active." + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "## Anchor Generation Edge Cases\n", + "\n", + "### Heading with `inline code` in backticks\n", + "\n", + "The anchor should strip backtick characters and produce `heading-with-inline-code-in-backticks`.\n", + "\n", + "### Special Characters: Hello, World!\n", + "\n", + "Commas, exclamation marks, and colons must be stripped from anchors.\n", + "\n", + "### Hyphens-are-preserved and Spaces Become Hyphens\n", + "\n", + "Hyphens that are already in the heading text should survive into the anchor." + ] + }, + { + "cell_type": "markdown", + "id": "5", + "metadata": {}, + "source": [ + "## Non-Heading `#` Patterns\n", + "\n", + "The following lines must **not** produce TOC entries:\n", + "\n", + "#notaheading (no space after `#`)\n", + "\n", + " # indented with spaces (indented code block style)\n", + "\n", + "Some prose with a `#hashtag` inside a sentence." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "# This is a Python code cell, not a markdown cell.\n", + "# Its content must be completely ignored by the TOC generator.\n", + "\n", + "## Also not a heading\n", + "print('hello')" + ] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": {}, + "source": [ + "## Deep Nesting\n", + "\n", + "### Level Three\n", + "\n", + "#### Level Four\n", + "\n", + "##### Level Five\n", + "\n", + "Nesting beyond h2 tests that the indentation in the generated TOC list scales with level." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorial/toc.py b/tutorial/toc.py index f95831e9..89e46b71 100755 --- a/tutorial/toc.py +++ b/tutorial/toc.py @@ -1,25 +1,85 @@ #!/usr/bin/env python -"""CLI script to build a table of contents for an IPython notebook""" +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "typer", +# "rich", +# "nbformat" +# ] +# /// +"""CLI script to build a table of contents for a Jupyter notebook.""" -import argparse as ap -import logging import pathlib import re -import sys -from typing import NamedTuple +from typing import Annotated, NamedTuple import nbformat +import typer from nbformat import NotebookNode +from rich.console import Console -__version__ = "0.1.2" +__version__ = "0.2.0" -# Set up logging -logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") -logger = logging.getLogger("toc") +console = Console() +err_console = Console(stderr=True) + +APP_HELP = """\ +Generate a Markdown table of contents from a Jupyter notebook's headings and +insert it into a designated cell. + +[bold]How it works[/bold] + +Scans all [italic]markdown cells[/italic] in the notebook for ATX headings +([cyan]#[/cyan], [cyan]##[/cyan], [cyan]###[/cyan] …), skipping headings inside fenced code blocks and +ignoring the TOC header itself to avoid self-referential entries. + +For each heading it produces a linked list item whose anchor is derived from +the heading text (lowercased, spaces → hyphens, most punctuation stripped). + +[bold]Placeholder cell requirement[/bold] + +The TOC is inserted into the first cell whose source starts with either: + + • the placeholder string (default: [cyan]\\[TOC\\][/cyan]) + • an existing [cyan]# Table of Contents[/cyan] heading (allows regeneration) + +If no such cell is found the script exits without writing any output. + +[bold]Output modes[/bold] + + [green]default[/green] Writes [cyan].toc.ipynb[/cyan] alongside the original file. + [green]-o PATH[/green] Writes to an explicit output path. + [green]--force[/green] Overwrites the original notebook in-place. + +[bold]Examples[/bold] + + [dim]# Generate TOC, write to my_notebook.toc.ipynb[/dim] + uv run toc.py my_notebook.ipynb + + [dim]# Update the notebook in-place[/dim] + uv run toc.py my_notebook.ipynb --force + + [dim]# Custom placeholder and output path[/dim] + uv run toc.py my_notebook.ipynb -p "" -o out/notebook.ipynb +""" + + +def _version_callback(value: bool) -> None: + if value: + console.print(f"toc {__version__}") + raise typer.Exit() + + +app = typer.Typer( + name="toc", + help=APP_HELP, + add_completion=False, + rich_markup_mode="rich", +) class TocEntry(NamedTuple): - """Table of contents entry""" + """Table of contents entry.""" level: int text: str @@ -27,81 +87,54 @@ class TocEntry(NamedTuple): def extract_markdown_cells(notebook: NotebookNode) -> str: - """Extract the markdown cells from a notebook - - Args: - notebook: A notebook object - - Returns: - str: Concatenated content of all markdown cells - """ + """Return concatenated content of all markdown cells in the notebook.""" return "\n".join( - [cell.source for cell in notebook.cells if cell.cell_type == "markdown"] + cell.source for cell in notebook.cells if cell.cell_type == "markdown" ) def extract_toc(notebook: str, toc_header: str) -> list[TocEntry]: - """Extract the table of contents from a markdown string + """Parse markdown headings from a string and return TOC entries. - Parses markdown headings (lines starting with #) and converts them to TOC entries. - Each entry includes the heading level, text, and an anchor derived from the text. - Ignores '#' symbols inside code blocks. + Ignores headings inside fenced code blocks and skips the TOC header itself. Args: - notebook: String containing markdown content - toc_header: Header text for the table of contents + notebook: String containing markdown content. + toc_header: Header text for the table of contents (excluded from output). Returns: - list[TocEntry]: List of table of contents entries + List of TocEntry objects (level, text, anchor). """ toc = [] line_re = re.compile(r"(#+)\s+(.+)") is_code_block = False - for line_num, line in enumerate(notebook.splitlines(), start=1): - # Skip line if contains exactly the toc header + for line in notebook.splitlines(): if line.strip() == toc_header: continue - # Check if we're entering or exiting a code block if line.strip().startswith("```"): is_code_block = not is_code_block continue - # Skip header processing if we're in a code block if is_code_block: continue - # Process headers if groups := re.match(line_re, line): - try: - heading, text, *_ = groups.groups() - level = len(heading) + heading, text, *_ = groups.groups() + level = len(heading) - # Clean the text to make a proper anchor - clean_text = text.replace("`", "") - # Remove any other special characters that might break anchors - clean_text = re.sub(r"[^\w\s-]", "", clean_text) - anchor = "-".join(clean_text.lower().split()) + clean_text = text.replace("`", "") + clean_text = re.sub(r"[^\w\s-]", "", clean_text) + anchor = "-".join(clean_text.lower().split()) - toc.append(TocEntry(level, text, anchor)) - logger.debug("Found heading (level %d): %s", level, text) - - except Exception as e: - logger.warning("Error processing heading at line %d: %s", line_num, e) + toc.append(TocEntry(level, text, anchor)) return toc def markdown_toc(toc: list[TocEntry]) -> str: - """Build a string representation of the toc as a nested markdown list - - Args: - toc: List of TocEntry objects - - Returns: - str: Markdown-formatted table of contents with proper indentation - """ + """Return a nested markdown list representation of the TOC entries.""" lines = [] for entry in toc: line = f"{' ' * entry.level}- [{entry.text}](#{entry.anchor})" @@ -113,36 +146,24 @@ def build_toc( nb_path: pathlib.Path, placeholder: str = "[TOC]", toc_header: str = "# Table of Contents", -) -> tuple[NotebookNode, bool]: - """Build a table of contents for a notebook and insert it at the location of a placeholder +) -> tuple[NotebookNode, bool, bool]: + """Read a notebook, generate a TOC, and insert it at the placeholder cell. Args: - nb_path: Path to the notebook file - placeholder: The text to replace with the generated TOC (default: "[TOC]") - toc_header: The header text to use for the TOC (default: "# Table of Contents") + nb_path: Path to the notebook file. + placeholder: Text to replace with the generated TOC. + toc_header: Header text for the TOC section. Returns: - tuple[NotebookNode, bool]: The notebook with TOC inserted and a boolean indicating if placeholder was found + Tuple of (notebook, toc_replaced, has_headings). """ - # Read the notebook - try: - nb_obj: NotebookNode = nbformat.read(nb_path, nbformat.NO_CONVERT) - except Exception: - logger.exception("Failed to read notebook '%s'", nb_path) - raise + nb_obj: NotebookNode = nbformat.read(nb_path, nbformat.NO_CONVERT) md_cells = extract_markdown_cells(nb_obj) - - # Build tree toc_tree = extract_toc(md_cells, toc_header) + has_headings = bool(toc_tree) - if not toc_tree: - logger.warning("No headings found in notebook '%s'", nb_path) - - # Build toc representation toc_repr = markdown_toc(toc_tree) - - # Insert it at the location of a placeholder toc_replaced = False for cell in nb_obj.cells: @@ -152,115 +173,115 @@ def build_toc( toc_replaced = True break - if not toc_replaced: - logger.warning( - "Placeholder '%s' or heading '%s' not found in notebook", - placeholder, - toc_header, - ) - - return nb_obj, toc_replaced - - -def main(): - """CLI entry point""" - parser = ap.ArgumentParser( - description="Build a table of contents for an IPython notebook", - epilog=""" - This script extracts headings from markdown cells in a Jupyter notebook and - generates a markdown-formatted table of contents. The TOC is inserted into - the notebook at the location of a placeholder (default: '[TOC]') or where - a '# Table of Contents' heading exists. Links in the TOC point to notebook - anchors created from the heading text. - """, - formatter_class=ap.RawDescriptionHelpFormatter, - ) - parser.add_argument("notebook", type=str, help="Path to the notebook to process") - parser.add_argument( - "--output", "-o", type=str, default=None, help="Path to the output notebook" - ) - parser.add_argument( - "--force", - "-f", - action="store_true", - default=False, - help="Force overwrite of original notebook", - ) - parser.add_argument( - "--placeholder", - "-p", - type=str, - default="[TOC]", - help="Placeholder text to replace with the TOC (default: '[TOC]')", - ) - parser.add_argument( - "--header", - type=str, - default="# Table of Contents", - help="Header text for the TOC (default: '# Table of Contents')", - ) - parser.add_argument( - "--verbose", "-v", action="store_true", help="Enable verbose output" - ) - parser.add_argument( - "--version", action="version", version=f"%(prog)s {__version__}" - ) - args = parser.parse_args() - - # Set logging level based on verbosity - if args.verbose: - logger.setLevel(logging.DEBUG) - - # Validate input file - try: - input_nb = pathlib.Path(args.notebook) - if not input_nb.exists(): - logger.error("Input file not found: %s", input_nb) - sys.exit(1) - if not input_nb.is_file(): - logger.error("Input path is not a file: %s", input_nb) - sys.exit(1) - except Exception: - logger.exception("Error processing input path") - sys.exit(1) - - # Set output file path - if args.output is None: - output_nb = input_nb.with_suffix(".toc.ipynb") + return nb_obj, toc_replaced, has_headings + + +@app.command(help=APP_HELP) +def main( + notebook: Annotated[ + pathlib.Path, + typer.Argument( + help="Path to the Jupyter notebook (.ipynb) to process.", + exists=True, + file_okay=True, + dir_okay=False, + readable=True, + ), + ], + output: Annotated[ + pathlib.Path | None, + typer.Option( + "--output", + "-o", + help="Output path for the processed notebook. Defaults to [cyan].toc.ipynb[/cyan].", + rich_help_panel="Output", + ), + ] = None, + force: Annotated[ + bool, + typer.Option( + "--force", + "-f", + help="Overwrite the [bold]original[/bold] notebook in-place instead of writing a new file.", + rich_help_panel="Output", + ), + ] = False, + placeholder: Annotated[ + str, + typer.Option( + "--placeholder", + "-p", + help=r"Placeholder text in a cell to replace with the generated TOC.", + rich_help_panel="TOC Options", + ), + ] = "[TOC]", + header: Annotated[ + str, + typer.Option( + "--header", + help="Markdown heading to use as the TOC section header.", + rich_help_panel="TOC Options", + ), + ] = "# Table of Contents", + verbose: Annotated[ + bool, + typer.Option( + "--verbose", + "-v", + help="Print debug information during processing.", + rich_help_panel="Misc", + ), + ] = False, + version: Annotated[ # noqa: ARG001 + bool, + typer.Option( + "--version", + help="Show the version and exit.", + callback=_version_callback, + is_eager=True, + rich_help_panel="Misc", + ), + ] = False, +) -> None: + if force: + output_nb = notebook + elif output is not None: + output_nb = output else: - output_nb = pathlib.Path(args.output) + output_nb = notebook.with_suffix(".toc.ipynb") - # Create output directory if it doesn't exist output_nb.parent.mkdir(parents=True, exist_ok=True) - try: - # Generate TOC and write to output file - logger.info("Processing notebook: %s", input_nb) - toc_notebook, toc_replaced = build_toc(input_nb, args.placeholder, args.header) - - if not toc_replaced: - logger.warning("Skipping output - no placeholder found in notebook") - sys.exit(0) # Exit with success code since it's not an error + if verbose: + console.print(f"[dim]Processing[/dim] [cyan]{notebook}[/cyan] …") - if not args.force: - logger.debug("Ignoring output file: %s", output_nb) - - with output_nb.open("w", encoding="utf-8") as file: - nbformat.write(toc_notebook, file) + try: + toc_notebook, toc_replaced, has_headings = build_toc( + notebook, placeholder, header + ) + except Exception: + err_console.print_exception() + raise typer.Exit(1) from None - logger.info("TOC written to: %s", output_nb) - else: - logger.info("Replacing original notebook with TOC version") + if not has_headings: + err_console.print( + f"[yellow]Warning:[/yellow] No headings found in [cyan]{notebook}[/cyan]." + ) - with input_nb.open("w", encoding="utf-8") as file: - nbformat.write(toc_notebook, file) + if not toc_replaced: + err_console.print( + "[yellow]Warning:[/yellow] No placeholder or TOC cell found — skipping output." + ) + raise typer.Exit(0) - logger.info("Original notebook replaced with: %s", input_nb) + with output_nb.open("w", encoding="utf-8") as file: + nbformat.write(toc_notebook, file) - except Exception: - logger.exception("Error processing notebook") - sys.exit(1) + if force: + console.print(f"[green]Updated in-place:[/green] {notebook}") + else: + console.print(f"[green]TOC written to:[/green] {output_nb}") if __name__ == "__main__": - main() + app() From c3811119ee87b044f98ad9b03dbf1673b23fa36d Mon Sep 17 00:00:00 2001 From: Edoardo Baldi Date: Fri, 27 Mar 2026 11:53:13 +0100 Subject: [PATCH 06/11] Add --split-cells flag to fix Jupyter TOC anchor links Jupyter only generates working anchor IDs for headings when each heading is in its own markdown cell. This adds --split-cells / -s which rewrites multi-heading cells into one cell per heading before TOC generation, so every link in the table of contents works correctly. New functions: - split_cell(): splits a single cell source at each heading boundary, respecting fenced code blocks and skipping the TOC header line - split_multi_heading_cells(): applies split_cell() across all markdown cells in a notebook, returning the modified notebook and a count of cells that were split build_toc() gains a split_cells parameter and returns a 4-tuple that includes the split count. main() passes the new --split-cells flag through and prints a summary line when cells were split. Also adds a dedicated multi-heading cell to toc_fixture.ipynb to exercise the splitting path, and bumps the version to 0.3.0. --- tutorial/tests/toc_fixture.ipynb | 17 ++++++ tutorial/toc.py | 99 +++++++++++++++++++++++++++++--- 2 files changed, 109 insertions(+), 7 deletions(-) diff --git a/tutorial/tests/toc_fixture.ipynb b/tutorial/tests/toc_fixture.ipynb index 0a806f77..2e1e0549 100644 --- a/tutorial/tests/toc_fixture.ipynb +++ b/tutorial/tests/toc_fixture.ipynb @@ -135,6 +135,23 @@ "cell_type": "markdown", "id": "7", "metadata": {}, + "source": [ + "## Cell Splitting\n", + "\n", + "This cell intentionally contains **two headings** to test `--split-cells`.\n", + "Without splitting, the TOC link for the second heading below will not work in\n", + "Jupyter because only the first heading in a cell receives a working anchor.\n", + "\n", + "### Second Heading in the Same Cell\n", + "\n", + "With `--split-cells`, this cell is rewritten into two separate cells before\n", + "the TOC is generated, so both anchor links work." + ] + }, + { + "cell_type": "markdown", + "id": "8", + "metadata": {}, "source": [ "## Deep Nesting\n", "\n", diff --git a/tutorial/toc.py b/tutorial/toc.py index 89e46b71..de1ab6b1 100755 --- a/tutorial/toc.py +++ b/tutorial/toc.py @@ -18,7 +18,7 @@ from nbformat import NotebookNode from rich.console import Console -__version__ = "0.2.0" +__version__ = "0.3.0" console = Console() err_console = Console(stderr=True) @@ -49,7 +49,8 @@ [green]default[/green] Writes [cyan].toc.ipynb[/cyan] alongside the original file. [green]-o PATH[/green] Writes to an explicit output path. - [green]--force[/green] Overwrites the original notebook in-place. + [green]--force[/green] Overwrites the original notebook in-place. + [green]--split-cells[/green] Split multi-heading cells so all TOC links work in Jupyter. [bold]Examples[/bold] @@ -142,23 +143,94 @@ def markdown_toc(toc: list[TocEntry]) -> str: return "\n".join(lines) +def split_cell(source: str, toc_header: str) -> list[str]: + """Split a markdown cell source at each heading boundary. + + Respects fenced code blocks (headings inside them are not split points). + The TOC header line itself is never a split point. + + Returns a list with one entry per segment. Returns ``[source]`` unchanged + when no split is needed (zero or one heading found). + """ + line_re = re.compile(r"^(#+)\s+.+") + is_code_block = False + segments: list[str] = [] + current_lines: list[str] = [] + + for line in source.splitlines(keepends=True): + stripped = line.strip() + + if stripped.startswith("```"): + is_code_block = not is_code_block + current_lines.append(line) + continue + + if is_code_block or stripped == toc_header: + current_lines.append(line) + continue + + if re.match(line_re, line) and current_lines: + segments.append("".join(current_lines).strip()) + current_lines = [line] + else: + current_lines.append(line) + + if current_lines: + segments.append("".join(current_lines).strip()) + + return segments if len(segments) > 1 else [source] + + +def split_multi_heading_cells( + nb_obj: NotebookNode, toc_header: str +) -> tuple[NotebookNode, int]: + """Replace each markdown cell that contains multiple headings with one cell per heading. + + Returns ``(nb_obj, cells_split_count)``. + """ + new_cells: list[NotebookNode] = [] + cells_split = 0 + + for cell in nb_obj.cells: + if cell.cell_type != "markdown": + new_cells.append(cell) + continue + + segments = split_cell(cell.source, toc_header) + if len(segments) == 1: + new_cells.append(cell) + else: + cells_split += 1 + for seg in segments: + new_cells.append(nbformat.v4.new_markdown_cell(seg)) + + nb_obj.cells = new_cells + return nb_obj, cells_split + + def build_toc( nb_path: pathlib.Path, placeholder: str = "[TOC]", toc_header: str = "# Table of Contents", -) -> tuple[NotebookNode, bool, bool]: + split_cells: bool = False, +) -> tuple[NotebookNode, bool, bool, int]: """Read a notebook, generate a TOC, and insert it at the placeholder cell. Args: nb_path: Path to the notebook file. placeholder: Text to replace with the generated TOC. toc_header: Header text for the TOC section. + split_cells: If True, split multi-heading cells before generating the TOC. Returns: - Tuple of (notebook, toc_replaced, has_headings). + Tuple of (notebook, toc_replaced, has_headings, cells_split). """ nb_obj: NotebookNode = nbformat.read(nb_path, nbformat.NO_CONVERT) + cells_split = 0 + if split_cells: + nb_obj, cells_split = split_multi_heading_cells(nb_obj, toc_header) + md_cells = extract_markdown_cells(nb_obj) toc_tree = extract_toc(md_cells, toc_header) has_headings = bool(toc_tree) @@ -173,7 +245,7 @@ def build_toc( toc_replaced = True break - return nb_obj, toc_replaced, has_headings + return nb_obj, toc_replaced, has_headings, cells_split @app.command(help=APP_HELP) @@ -223,6 +295,16 @@ def main( rich_help_panel="TOC Options", ), ] = "# Table of Contents", + split_cells: Annotated[ + bool, + typer.Option( + "--split-cells", + "-s", + help="Split markdown cells that contain multiple headings into one cell per heading. " + "Recommended — required for TOC links to work correctly in Jupyter.", + rich_help_panel="TOC Options", + ), + ] = False, verbose: Annotated[ bool, typer.Option( @@ -256,8 +338,8 @@ def main( console.print(f"[dim]Processing[/dim] [cyan]{notebook}[/cyan] …") try: - toc_notebook, toc_replaced, has_headings = build_toc( - notebook, placeholder, header + toc_notebook, toc_replaced, has_headings, cells_split = build_toc( + notebook, placeholder, header, split_cells ) except Exception: err_console.print_exception() @@ -277,6 +359,9 @@ def main( with output_nb.open("w", encoding="utf-8") as file: nbformat.write(toc_notebook, file) + if split_cells and cells_split: + console.print(f"[dim]Split {cells_split} cell(s) with multiple headings.[/dim]") + if force: console.print(f"[green]Updated in-place:[/green] {notebook}") else: From 6b5e21bae8795c89db03d9a461a302e9be6a51c0 Mon Sep 17 00:00:00 2001 From: Edoardo Baldi Date: Fri, 27 Mar 2026 12:25:28 +0100 Subject: [PATCH 07/11] Fix anchor generation and add CLI input validation Anchor format fix: JupyterLab sets data-jupyter-id on headings by replacing spaces with hyphens and preserving everything else verbatim. The previous implementation lowercased text and stripped all non-word characters, producing anchors that never matched. Fix: strip backticks only (they are markdown syntax, absent from rendered text) and replace spaces with hyphens, preserving case and all other characters. CLI improvements: - --output and --force are now mutually exclusive; passing both exits with a clear error message - --output paths missing the .ipynb extension have it appended automatically --- tutorial/toc.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tutorial/toc.py b/tutorial/toc.py index de1ab6b1..de8a36a0 100755 --- a/tutorial/toc.py +++ b/tutorial/toc.py @@ -125,9 +125,7 @@ def extract_toc(notebook: str, toc_header: str) -> list[TocEntry]: heading, text, *_ = groups.groups() level = len(heading) - clean_text = text.replace("`", "") - clean_text = re.sub(r"[^\w\s-]", "", clean_text) - anchor = "-".join(clean_text.lower().split()) + anchor = text.replace("`", "").replace(" ", "-") toc.append(TocEntry(level, text, anchor)) @@ -325,9 +323,17 @@ def main( ), ] = False, ) -> None: + if force and output is not None: + err_console.print( + "[red]Error:[/red] --output and --force are mutually exclusive." + ) + raise typer.Exit(1) + if force: output_nb = notebook elif output is not None: + if output.suffix != ".ipynb": + output = output.with_suffix(output.suffix + ".ipynb") output_nb = output else: output_nb = notebook.with_suffix(".toc.ipynb") From b4a71584290bad5793935c4e84e21513072f5ed5 Mon Sep 17 00:00:00 2001 From: Edoardo Baldi Date: Fri, 27 Mar 2026 12:35:37 +0100 Subject: [PATCH 08/11] Use Rich Markdown formatting for Typer's help and messages --- tutorial/toc.py | 50 +++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/tutorial/toc.py b/tutorial/toc.py index de8a36a0..a0af88f7 100755 --- a/tutorial/toc.py +++ b/tutorial/toc.py @@ -27,41 +27,43 @@ Generate a Markdown table of contents from a Jupyter notebook's headings and insert it into a designated cell. -[bold]How it works[/bold] +## How it works -Scans all [italic]markdown cells[/italic] in the notebook for ATX headings -([cyan]#[/cyan], [cyan]##[/cyan], [cyan]###[/cyan] …), skipping headings inside fenced code blocks and -ignoring the TOC header itself to avoid self-referential entries. +Scans all *markdown cells* in the notebook for ATX headings (`#`, `##`, `###` …), +skipping headings inside fenced code blocks and ignoring the TOC header itself +to avoid self-referential entries. -For each heading it produces a linked list item whose anchor is derived from -the heading text (lowercased, spaces → hyphens, most punctuation stripped). +For each heading it produces a linked list item whose anchor is derived from the +heading text (spaces → hyphens, backticks stripped, everything else preserved). -[bold]Placeholder cell requirement[/bold] +## Placeholder cell requirement The TOC is inserted into the first cell whose source starts with either: - • the placeholder string (default: [cyan]\\[TOC\\][/cyan]) - • an existing [cyan]# Table of Contents[/cyan] heading (allows regeneration) +- the placeholder string (default: `[TOC]`) +- an existing `# Table of Contents` heading (allows regeneration) If no such cell is found the script exits without writing any output. -[bold]Output modes[/bold] +## Output modes - [green]default[/green] Writes [cyan].toc.ipynb[/cyan] alongside the original file. - [green]-o PATH[/green] Writes to an explicit output path. - [green]--force[/green] Overwrites the original notebook in-place. - [green]--split-cells[/green] Split multi-heading cells so all TOC links work in Jupyter. +| Flag | Behaviour | +|---|---| +| *(default)* | Writes `.toc.ipynb` alongside the original | +| `-o PATH` | Writes to an explicit output path | +| `--force` | Overwrites the original notebook in-place | +| `--split-cells` | Splits multi-heading cells so all TOC links work | -[bold]Examples[/bold] +## Examples - [dim]# Generate TOC, write to my_notebook.toc.ipynb[/dim] - uv run toc.py my_notebook.ipynb + # Generate TOC, write to my_notebook.toc.ipynb + uv run toc.py my_notebook.ipynb - [dim]# Update the notebook in-place[/dim] - uv run toc.py my_notebook.ipynb --force + # Update the notebook in-place + uv run toc.py my_notebook.ipynb --force - [dim]# Custom placeholder and output path[/dim] - uv run toc.py my_notebook.ipynb -p "" -o out/notebook.ipynb + # Custom placeholder and output path + uv run toc.py my_notebook.ipynb -p "" -o out/notebook.ipynb """ @@ -75,7 +77,7 @@ def _version_callback(value: bool) -> None: name="toc", help=APP_HELP, add_completion=False, - rich_markup_mode="rich", + rich_markup_mode="markdown", ) @@ -263,7 +265,7 @@ def main( typer.Option( "--output", "-o", - help="Output path for the processed notebook. Defaults to [cyan].toc.ipynb[/cyan].", + help="Output path for the processed notebook. Defaults to `.toc.ipynb`.", rich_help_panel="Output", ), ] = None, @@ -272,7 +274,7 @@ def main( typer.Option( "--force", "-f", - help="Overwrite the [bold]original[/bold] notebook in-place instead of writing a new file.", + help="Overwrite the **original** notebook in-place instead of writing a new file.", rich_help_panel="Output", ), ] = False, From 72ff4cea02749bd885738db83e647875e5bd89c1 Mon Sep 17 00:00:00 2001 From: Edoardo Baldi Date: Fri, 27 Mar 2026 12:41:16 +0100 Subject: [PATCH 09/11] Better help --- tutorial/toc.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tutorial/toc.py b/tutorial/toc.py index a0af88f7..c9b9c087 100755 --- a/tutorial/toc.py +++ b/tutorial/toc.py @@ -52,7 +52,7 @@ | *(default)* | Writes `.toc.ipynb` alongside the original | | `-o PATH` | Writes to an explicit output path | | `--force` | Overwrites the original notebook in-place | -| `--split-cells` | Splits multi-heading cells so all TOC links work | +| `--split-cells` | Splits multi-heading cells to have only one heading per cell | ## Examples @@ -300,8 +300,7 @@ def main( typer.Option( "--split-cells", "-s", - help="Split markdown cells that contain multiple headings into one cell per heading. " - "Recommended — required for TOC links to work correctly in Jupyter.", + help="Split markdown cells that contain multiple headings into one cell per heading.", rich_help_panel="TOC Options", ), ] = False, From 885a95a074677b2a88f8387207f080b0a5419922 Mon Sep 17 00:00:00 2001 From: Edoardo Baldi Date: Fri, 27 Mar 2026 12:44:46 +0100 Subject: [PATCH 10/11] Update shebang in toc.py to uv run --- tutorial/toc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorial/toc.py b/tutorial/toc.py index c9b9c087..665568c3 100755 --- a/tutorial/toc.py +++ b/tutorial/toc.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env -S uv run -s # /// script # requires-python = ">=3.10" # dependencies = [ From a9cbc7b5f444ebd2129725c94d00609609cb71f7 Mon Sep 17 00:00:00 2001 From: Edoardo Baldi Date: Fri, 27 Mar 2026 16:23:08 +0100 Subject: [PATCH 11/11] Reformat all docstrings --- tutorial/toc.py | 81 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 19 deletions(-) diff --git a/tutorial/toc.py b/tutorial/toc.py index 665568c3..10d64d52 100755 --- a/tutorial/toc.py +++ b/tutorial/toc.py @@ -82,7 +82,14 @@ def _version_callback(value: bool) -> None: class TocEntry(NamedTuple): - """Table of contents entry.""" + """A single table of contents entry parsed from a notebook heading. + + Attributes: + level: Heading depth (1 for h1, 2 for h2, etc.). + text: Raw heading text as written in the markdown source. + anchor: URL-safe anchor derived from the text (spaces → hyphens, + backticks stripped, everything else preserved). + """ level: int text: str @@ -90,23 +97,31 @@ class TocEntry(NamedTuple): def extract_markdown_cells(notebook: NotebookNode) -> str: - """Return concatenated content of all markdown cells in the notebook.""" + """Return the concatenated source of all markdown cells in the notebook. + + Args: + notebook: A parsed notebook object. + + Returns: + A single string with all markdown cell sources joined by newlines. + """ return "\n".join( cell.source for cell in notebook.cells if cell.cell_type == "markdown" ) def extract_toc(notebook: str, toc_header: str) -> list[TocEntry]: - """Parse markdown headings from a string and return TOC entries. + """Parse ATX headings from a markdown string and return TOC entries. - Ignores headings inside fenced code blocks and skips the TOC header itself. + Headings inside fenced code blocks are ignored. The TOC header line + itself is skipped to prevent self-referential entries. Args: - notebook: String containing markdown content. - toc_header: Header text for the table of contents (excluded from output). + notebook: Concatenated markdown content from notebook cells. + toc_header: The TOC section header line to exclude from entries. Returns: - List of TocEntry objects (level, text, anchor). + A list of TocEntry objects, one per heading found. """ toc = [] line_re = re.compile(r"(#+)\s+(.+)") @@ -135,7 +150,17 @@ def extract_toc(notebook: str, toc_header: str) -> list[TocEntry]: def markdown_toc(toc: list[TocEntry]) -> str: - """Return a nested markdown list representation of the TOC entries.""" + """Format a list of TOC entries as a nested Markdown list. + + Each entry is indented by two spaces per heading level and rendered + as a Markdown link pointing to its anchor. + + Args: + toc: TOC entries to format. + + Returns: + A Markdown string with one linked list item per entry. + """ lines = [] for entry in toc: line = f"{' ' * entry.level}- [{entry.text}](#{entry.anchor})" @@ -144,13 +169,18 @@ def markdown_toc(toc: list[TocEntry]) -> str: def split_cell(source: str, toc_header: str) -> list[str]: - """Split a markdown cell source at each heading boundary. + """Split a markdown cell source into segments at each heading boundary. - Respects fenced code blocks (headings inside them are not split points). - The TOC header line itself is never a split point. + Headings inside fenced code blocks are not treated as split points. + The TOC header line is also excluded from splitting. + + Args: + source: Raw source text of a single markdown cell. + toc_header: The TOC section header line; never used as a split point. - Returns a list with one entry per segment. Returns ``[source]`` unchanged - when no split is needed (zero or one heading found). + Returns: + A list of source segments, one per heading. Returns ``[source]`` + unchanged when the cell contains zero or one heading. """ line_re = re.compile(r"^(#+)\s+.+") is_code_block = False @@ -184,9 +214,17 @@ def split_cell(source: str, toc_header: str) -> list[str]: def split_multi_heading_cells( nb_obj: NotebookNode, toc_header: str ) -> tuple[NotebookNode, int]: - """Replace each markdown cell that contains multiple headings with one cell per heading. + """Split every markdown cell that contains multiple headings into one cell per heading. + + Non-markdown cells are passed through unchanged. - Returns ``(nb_obj, cells_split_count)``. + Args: + nb_obj: The notebook to process (modified in place). + toc_header: The TOC section header line; passed through to ``split_cell``. + + Returns: + A tuple of ``(notebook, cells_split)`` where ``cells_split`` is the + number of cells that were split. """ new_cells: list[NotebookNode] = [] cells_split = 0 @@ -218,12 +256,17 @@ def build_toc( Args: nb_path: Path to the notebook file. - placeholder: Text to replace with the generated TOC. - toc_header: Header text for the TOC section. - split_cells: If True, split multi-heading cells before generating the TOC. + placeholder: Cell source prefix that marks the TOC insertion point. + toc_header: Markdown heading used as the TOC section title. + split_cells: When True, split multi-heading cells before generating + the TOC so that every heading gets its own cell. Returns: - Tuple of (notebook, toc_replaced, has_headings, cells_split). + A tuple of ``(notebook, toc_replaced, has_headings, cells_split)`` + where ``toc_replaced`` is True if the placeholder was found and + replaced, ``has_headings`` is True if any headings were found, and + ``cells_split`` is the number of cells split (0 when split_cells + is False). """ nb_obj: NotebookNode = nbformat.read(nb_path, nbformat.NO_CONVERT)