Skip to content

Commit b014dbb

Browse files
committed
Some quality-of-life enhancement to toc.py script
Add proper error handling, logging, command-line options, and improved documentation. Clean anchor generation, add version, and enhance CLI help text with better descriptions and examples.
1 parent cbd5572 commit b014dbb

1 file changed

Lines changed: 159 additions & 23 deletions

File tree

tutorial/toc.py

Lines changed: 159 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,156 @@
11
#!/usr/bin/env python
2+
# ruff: noqa G004
23
"""CLI script to build a table of contents for an IPython notebook"""
34

45
import argparse as ap
6+
import logging
57
import pathlib
68
import re
9+
import sys
710
from collections import namedtuple
811

912
import nbformat
1013
from nbformat import NotebookNode
1114

15+
__version__ = "0.1.1"
16+
17+
# Set up logging
18+
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
19+
logger = logging.getLogger("toc")
20+
1221
TocEntry = namedtuple("TocEntry", ["level", "text", "anchor"])
1322

1423

1524
def extract_markdown_cells(notebook: NotebookNode) -> str:
16-
"""Extract the markdown cells from a notebook"""
25+
"""Extract the markdown cells from a notebook
26+
27+
Args:
28+
notebook: A notebook object
29+
30+
Returns:
31+
str: Concatenated content of all markdown cells
32+
"""
1733
return "\n".join(
1834
[cell.source for cell in notebook.cells if cell.cell_type == "markdown"]
1935
)
2036

2137

2238
def extract_toc(notebook: str) -> list[TocEntry]:
23-
"""Extract the table of contents from a markdown string"""
39+
"""Extract the table of contents from a markdown string
40+
41+
Parses markdown headings (lines starting with #) and converts them to TOC entries.
42+
Each entry includes the heading level, text, and an anchor derived from the text.
43+
44+
Args:
45+
notebook: String containing markdown content
46+
47+
Returns:
48+
list[TocEntry]: List of table of contents entries
49+
"""
2450
toc = []
2551
line_re = re.compile(r"(#+)\s+(.+)")
52+
line_num = 0
53+
2654
for line in notebook.splitlines():
55+
line_num += 1
2756
if groups := re.match(line_re, line):
28-
heading, text, *_ = groups.groups()
29-
level = len(heading)
30-
anchor = "-".join(text.replace("`", "").split())
31-
toc.append(TocEntry(level, text, anchor))
57+
try:
58+
heading, text, *_ = groups.groups()
59+
level = len(heading)
60+
61+
# Clean the text to make a proper anchor
62+
clean_text = text.replace("`", "")
63+
# Remove any other special characters that might break anchors
64+
clean_text = re.sub(r"[^\w\s-]", "", clean_text)
65+
anchor = "-".join(clean_text.lower().split())
66+
67+
toc.append(TocEntry(level, text, anchor))
68+
logger.debug(f"Found heading (level {level}): {text}")
69+
except Exception as e:
70+
logger.warning(f"Error processing heading at line {line_num}: {e}")
71+
3272
return toc
3373

3474

3575
def markdown_toc(toc: list[TocEntry]) -> str:
36-
"""Build a string representation of the toc as a nested markdown list"""
76+
"""Build a string representation of the toc as a nested markdown list
77+
78+
Args:
79+
toc: List of TocEntry objects
80+
81+
Returns:
82+
str: Markdown-formatted table of contents with proper indentation
83+
"""
3784
lines = []
3885
for entry in toc:
3986
line = f"{' ' * entry.level}- [{entry.text}](#{entry.anchor})"
4087
lines.append(line)
4188
return "\n".join(lines)
4289

4390

44-
def build_toc(nb_path: pathlib.Path, placeholder: str = "[TOC]") -> NotebookNode:
45-
"""Build a table of contents for a notebook and insert it at the location of a placeholder"""
91+
def build_toc(
92+
nb_path: pathlib.Path,
93+
placeholder: str = "[TOC]",
94+
toc_header: str = "# Table of Contents",
95+
) -> tuple[NotebookNode, bool]:
96+
"""Build a table of contents for a notebook and insert it at the location of a placeholder
97+
98+
Args:
99+
nb_path: Path to the notebook file
100+
placeholder: The text to replace with the generated TOC (default: "[TOC]")
101+
toc_header: The header text to use for the TOC (default: "# Table of Contents")
102+
103+
Returns:
104+
tuple[NotebookNode, bool]: The notebook with TOC inserted and a boolean indicating if placeholder was found
105+
"""
46106
# Read the notebook
47-
nb_obj: NotebookNode = nbformat.read(nb_path, nbformat.NO_CONVERT)
107+
try:
108+
nb_obj: NotebookNode = nbformat.read(nb_path, nbformat.NO_CONVERT)
109+
except Exception as e:
110+
logger.error(f"Failed to read notebook '{nb_path}': {e}")
111+
raise
112+
48113
md_cells = extract_markdown_cells(nb_obj)
49114

50115
# Build tree
51116
toc_tree = extract_toc(md_cells)
52117

118+
if not toc_tree:
119+
logger.warning(f"No headings found in notebook '{nb_path}'")
120+
53121
# Build toc representation
54122
toc_repr = markdown_toc(toc_tree)
55123

56-
# Insert it a the location of a placeholder
57-
toc_header = "# Table of Contents"
124+
# Insert it at the location of a placeholder
125+
toc_replaced = False
58126

59127
for cell in nb_obj.cells:
60128
if cell.source.startswith((placeholder, toc_header)):
61129
cell.source = f"{toc_header}\n{toc_repr}"
62130
cell.cell_type = "markdown"
131+
toc_replaced = True
132+
break
133+
134+
if not toc_replaced:
135+
logger.warning(
136+
f"Placeholder '{placeholder}' or heading '{toc_header}' not found in notebook"
137+
)
63138

64-
return nb_obj
139+
return nb_obj, toc_replaced
65140

66141

67142
def main():
68143
"""CLI entry point"""
69144
parser = ap.ArgumentParser(
70-
description="Build a table of contents for an IPython notebook"
145+
description="Build a table of contents for an IPython notebook",
146+
epilog="""
147+
This script extracts headings from markdown cells in a Jupyter notebook and
148+
generates a markdown-formatted table of contents. The TOC is inserted into
149+
the notebook at the location of a placeholder (default: '[TOC]') or where
150+
a '# Table of Contents' heading exists. Links in the TOC point to notebook
151+
anchors created from the heading text.
152+
""",
153+
formatter_class=ap.RawDescriptionHelpFormatter,
71154
)
72155
parser.add_argument("notebook", type=str, help="Path to the notebook to process")
73156
parser.add_argument(
@@ -80,22 +163,75 @@ def main():
80163
default=False,
81164
help="Force overwrite of original notebook",
82165
)
166+
parser.add_argument(
167+
"--placeholder",
168+
"-p",
169+
type=str,
170+
default="[TOC]",
171+
help="Placeholder text to replace with the TOC (default: '[TOC]')",
172+
)
173+
parser.add_argument(
174+
"--header",
175+
type=str,
176+
default="# Table of Contents",
177+
help="Header text for the TOC (default: '# Table of Contents')",
178+
)
179+
parser.add_argument(
180+
"--verbose", "-v", action="store_true", help="Enable verbose output"
181+
)
182+
parser.add_argument(
183+
"--version", action="version", version=f"%(prog)s {__version__}"
184+
)
83185
args = parser.parse_args()
84186

85-
if not (input_nb := pathlib.Path(args.notebook)).exists():
86-
raise FileNotFoundError(input_nb)
87-
187+
# Set logging level based on verbosity
188+
if args.verbose:
189+
logger.setLevel(logging.DEBUG)
190+
191+
# Validate input file
192+
try:
193+
input_nb = pathlib.Path(args.notebook)
194+
if not input_nb.exists():
195+
logger.error(f"Input file not found: {input_nb}")
196+
sys.exit(1)
197+
if not input_nb.is_file():
198+
logger.error(f"Input path is not a file: {input_nb}")
199+
sys.exit(1)
200+
except Exception as e:
201+
logger.error(f"Error processing input path: {e}")
202+
sys.exit(1)
203+
204+
# Set output file path
88205
if args.output is None:
89206
output_nb = input_nb.with_suffix(".toc.ipynb")
90207
else:
91208
output_nb = pathlib.Path(args.output)
92209

93-
with output_nb.open("w", encoding="utf-8") as file:
94-
nbformat.write(build_toc(input_nb), file)
95-
96-
if args.force:
97-
input_nb.unlink()
98-
output_nb.rename(input_nb)
210+
# Create output directory if it doesn't exist
211+
output_nb.parent.mkdir(parents=True, exist_ok=True)
212+
213+
try:
214+
# Generate TOC and write to output file
215+
logger.info(f"Processing notebook: {input_nb}")
216+
toc_notebook, toc_replaced = build_toc(input_nb, args.placeholder, args.header)
217+
218+
if not toc_replaced:
219+
logger.warning("Skipping output - no placeholder found in notebook")
220+
sys.exit(0) # Exit with success code since it's not an error
221+
222+
with output_nb.open("w", encoding="utf-8") as file:
223+
nbformat.write(toc_notebook, file)
224+
logger.info(f"TOC written to: {output_nb}")
225+
226+
# Handle force option
227+
if args.force:
228+
logger.info(f"Replacing original notebook with TOC version")
229+
input_nb.unlink()
230+
output_nb.rename(input_nb)
231+
logger.info(f"Original notebook replaced with: {input_nb}")
232+
except Exception as e:
233+
logger.error(f"Error processing notebook: {e}")
234+
sys.exit(1)
99235

100236

101237
if __name__ == "__main__":

0 commit comments

Comments
 (0)