11#!/usr/bin/env python
2+ # ruff: noqa G004
23"""CLI script to build a table of contents for an IPython notebook"""
34
45import argparse as ap
6+ import logging
57import pathlib
68import re
9+ import sys
710from collections import namedtuple
811
912import nbformat
1013from nbformat import NotebookNode
1114
15+ __version__ = "0.1.1"
16+
17+ # Set up logging
18+ logging .basicConfig (level = logging .INFO , format = "%(levelname)s: %(message)s" )
19+ logger = logging .getLogger ("toc" )
20+
1221TocEntry = namedtuple ("TocEntry" , ["level" , "text" , "anchor" ])
1322
1423
1524def extract_markdown_cells (notebook : NotebookNode ) -> str :
16- """Extract the markdown cells from a notebook"""
25+ """Extract the markdown cells from a notebook
26+
27+ Args:
28+ notebook: A notebook object
29+
30+ Returns:
31+ str: Concatenated content of all markdown cells
32+ """
1733 return "\n " .join (
1834 [cell .source for cell in notebook .cells if cell .cell_type == "markdown" ]
1935 )
2036
2137
2238def extract_toc (notebook : str ) -> list [TocEntry ]:
23- """Extract the table of contents from a markdown string"""
39+ """Extract the table of contents from a markdown string
40+
41+ Parses markdown headings (lines starting with #) and converts them to TOC entries.
42+ Each entry includes the heading level, text, and an anchor derived from the text.
43+
44+ Args:
45+ notebook: String containing markdown content
46+
47+ Returns:
48+ list[TocEntry]: List of table of contents entries
49+ """
2450 toc = []
2551 line_re = re .compile (r"(#+)\s+(.+)" )
52+ line_num = 0
53+
2654 for line in notebook .splitlines ():
55+ line_num += 1
2756 if groups := re .match (line_re , line ):
28- heading , text , * _ = groups .groups ()
29- level = len (heading )
30- anchor = "-" .join (text .replace ("`" , "" ).split ())
31- toc .append (TocEntry (level , text , anchor ))
57+ try :
58+ heading , text , * _ = groups .groups ()
59+ level = len (heading )
60+
61+ # Clean the text to make a proper anchor
62+ clean_text = text .replace ("`" , "" )
63+ # Remove any other special characters that might break anchors
64+ clean_text = re .sub (r"[^\w\s-]" , "" , clean_text )
65+ anchor = "-" .join (clean_text .lower ().split ())
66+
67+ toc .append (TocEntry (level , text , anchor ))
68+ logger .debug (f"Found heading (level { level } ): { text } " )
69+ except Exception as e :
70+ logger .warning (f"Error processing heading at line { line_num } : { e } " )
71+
3272 return toc
3373
3474
3575def markdown_toc (toc : list [TocEntry ]) -> str :
36- """Build a string representation of the toc as a nested markdown list"""
76+ """Build a string representation of the toc as a nested markdown list
77+
78+ Args:
79+ toc: List of TocEntry objects
80+
81+ Returns:
82+ str: Markdown-formatted table of contents with proper indentation
83+ """
3784 lines = []
3885 for entry in toc :
3986 line = f"{ ' ' * entry .level } - [{ entry .text } ](#{ entry .anchor } )"
4087 lines .append (line )
4188 return "\n " .join (lines )
4289
4390
44- def build_toc (nb_path : pathlib .Path , placeholder : str = "[TOC]" ) -> NotebookNode :
45- """Build a table of contents for a notebook and insert it at the location of a placeholder"""
91+ def build_toc (
92+ nb_path : pathlib .Path ,
93+ placeholder : str = "[TOC]" ,
94+ toc_header : str = "# Table of Contents" ,
95+ ) -> tuple [NotebookNode , bool ]:
96+ """Build a table of contents for a notebook and insert it at the location of a placeholder
97+
98+ Args:
99+ nb_path: Path to the notebook file
100+ placeholder: The text to replace with the generated TOC (default: "[TOC]")
101+ toc_header: The header text to use for the TOC (default: "# Table of Contents")
102+
103+ Returns:
104+ tuple[NotebookNode, bool]: The notebook with TOC inserted and a boolean indicating if placeholder was found
105+ """
46106 # Read the notebook
47- nb_obj : NotebookNode = nbformat .read (nb_path , nbformat .NO_CONVERT )
107+ try :
108+ nb_obj : NotebookNode = nbformat .read (nb_path , nbformat .NO_CONVERT )
109+ except Exception as e :
110+ logger .error (f"Failed to read notebook '{ nb_path } ': { e } " )
111+ raise
112+
48113 md_cells = extract_markdown_cells (nb_obj )
49114
50115 # Build tree
51116 toc_tree = extract_toc (md_cells )
52117
118+ if not toc_tree :
119+ logger .warning (f"No headings found in notebook '{ nb_path } '" )
120+
53121 # Build toc representation
54122 toc_repr = markdown_toc (toc_tree )
55123
56- # Insert it a the location of a placeholder
57- toc_header = "# Table of Contents"
124+ # Insert it at the location of a placeholder
125+ toc_replaced = False
58126
59127 for cell in nb_obj .cells :
60128 if cell .source .startswith ((placeholder , toc_header )):
61129 cell .source = f"{ toc_header } \n { toc_repr } "
62130 cell .cell_type = "markdown"
131+ toc_replaced = True
132+ break
133+
134+ if not toc_replaced :
135+ logger .warning (
136+ f"Placeholder '{ placeholder } ' or heading '{ toc_header } ' not found in notebook"
137+ )
63138
64- return nb_obj
139+ return nb_obj , toc_replaced
65140
66141
67142def main ():
68143 """CLI entry point"""
69144 parser = ap .ArgumentParser (
70- description = "Build a table of contents for an IPython notebook"
145+ description = "Build a table of contents for an IPython notebook" ,
146+ epilog = """
147+ This script extracts headings from markdown cells in a Jupyter notebook and
148+ generates a markdown-formatted table of contents. The TOC is inserted into
149+ the notebook at the location of a placeholder (default: '[TOC]') or where
150+ a '# Table of Contents' heading exists. Links in the TOC point to notebook
151+ anchors created from the heading text.
152+ """ ,
153+ formatter_class = ap .RawDescriptionHelpFormatter ,
71154 )
72155 parser .add_argument ("notebook" , type = str , help = "Path to the notebook to process" )
73156 parser .add_argument (
@@ -80,22 +163,75 @@ def main():
80163 default = False ,
81164 help = "Force overwrite of original notebook" ,
82165 )
166+ parser .add_argument (
167+ "--placeholder" ,
168+ "-p" ,
169+ type = str ,
170+ default = "[TOC]" ,
171+ help = "Placeholder text to replace with the TOC (default: '[TOC]')" ,
172+ )
173+ parser .add_argument (
174+ "--header" ,
175+ type = str ,
176+ default = "# Table of Contents" ,
177+ help = "Header text for the TOC (default: '# Table of Contents')" ,
178+ )
179+ parser .add_argument (
180+ "--verbose" , "-v" , action = "store_true" , help = "Enable verbose output"
181+ )
182+ parser .add_argument (
183+ "--version" , action = "version" , version = f"%(prog)s { __version__ } "
184+ )
83185 args = parser .parse_args ()
84186
85- if not (input_nb := pathlib .Path (args .notebook )).exists ():
86- raise FileNotFoundError (input_nb )
87-
187+ # Set logging level based on verbosity
188+ if args .verbose :
189+ logger .setLevel (logging .DEBUG )
190+
191+ # Validate input file
192+ try :
193+ input_nb = pathlib .Path (args .notebook )
194+ if not input_nb .exists ():
195+ logger .error (f"Input file not found: { input_nb } " )
196+ sys .exit (1 )
197+ if not input_nb .is_file ():
198+ logger .error (f"Input path is not a file: { input_nb } " )
199+ sys .exit (1 )
200+ except Exception as e :
201+ logger .error (f"Error processing input path: { e } " )
202+ sys .exit (1 )
203+
204+ # Set output file path
88205 if args .output is None :
89206 output_nb = input_nb .with_suffix (".toc.ipynb" )
90207 else :
91208 output_nb = pathlib .Path (args .output )
92209
93- with output_nb .open ("w" , encoding = "utf-8" ) as file :
94- nbformat .write (build_toc (input_nb ), file )
95-
96- if args .force :
97- input_nb .unlink ()
98- output_nb .rename (input_nb )
210+ # Create output directory if it doesn't exist
211+ output_nb .parent .mkdir (parents = True , exist_ok = True )
212+
213+ try :
214+ # Generate TOC and write to output file
215+ logger .info (f"Processing notebook: { input_nb } " )
216+ toc_notebook , toc_replaced = build_toc (input_nb , args .placeholder , args .header )
217+
218+ if not toc_replaced :
219+ logger .warning ("Skipping output - no placeholder found in notebook" )
220+ sys .exit (0 ) # Exit with success code since it's not an error
221+
222+ with output_nb .open ("w" , encoding = "utf-8" ) as file :
223+ nbformat .write (toc_notebook , file )
224+ logger .info (f"TOC written to: { output_nb } " )
225+
226+ # Handle force option
227+ if args .force :
228+ logger .info (f"Replacing original notebook with TOC version" )
229+ input_nb .unlink ()
230+ output_nb .rename (input_nb )
231+ logger .info (f"Original notebook replaced with: { input_nb } " )
232+ except Exception as e :
233+ logger .error (f"Error processing notebook: { e } " )
234+ sys .exit (1 )
99235
100236
101237if __name__ == "__main__" :
0 commit comments