Skip to content

Commit 5d6a289

Browse files
committed
dotfiles: add coqui-read helper
1 parent 58e45bd commit 5d6a289

File tree

1 file changed

+171
-0
lines changed

1 file changed

+171
-0
lines changed

dotfiles/lib/functions/coqui-read

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
#!/usr/bin/env bash
2+
3+
function coqui-read {
4+
local script_file stdin_file
5+
script_file="$(mktemp)"
6+
stdin_file=""
7+
8+
if [[ "$#" -eq 0 && ! -t 0 ]]; then
9+
stdin_file="$(mktemp)"
10+
cat > "$stdin_file"
11+
set -- --stdin-file "$stdin_file"
12+
fi
13+
14+
cat > "$script_file" <<'PY'
15+
import argparse
16+
import re
17+
import subprocess
18+
import sys
19+
import tempfile
20+
import urllib.parse
21+
import urllib.request
22+
from pathlib import Path
23+
24+
25+
DEFAULT_HOST = "http://[::1]:11115"
26+
27+
28+
def split_sentences(text: str) -> list[str]:
29+
parts = re.split(r"(?<=[.!?])\s+", text.strip())
30+
return [part.strip() for part in parts if part.strip()]
31+
32+
33+
def split_text(text: str, mode: str, max_chars: int) -> list[str]:
34+
normalized = re.sub(r"\r\n?", "\n", text).strip()
35+
if not normalized:
36+
return []
37+
38+
if mode == "sentences":
39+
units = split_sentences(normalized)
40+
else:
41+
units = [chunk.strip() for chunk in re.split(r"\n\s*\n+", normalized) if chunk.strip()]
42+
43+
chunks: list[str] = []
44+
for unit in units:
45+
if len(unit) <= max_chars:
46+
chunks.append(unit)
47+
continue
48+
49+
sentences = split_sentences(unit)
50+
if len(sentences) <= 1:
51+
chunks.append(unit)
52+
continue
53+
54+
current = ""
55+
for sentence in sentences:
56+
candidate = sentence if not current else f"{current} {sentence}"
57+
if len(candidate) <= max_chars:
58+
current = candidate
59+
else:
60+
if current:
61+
chunks.append(current)
62+
current = sentence
63+
if current:
64+
chunks.append(current)
65+
66+
return chunks
67+
68+
69+
def build_url(base_url: str, text: str, speaker: str | None, language: str | None) -> str:
70+
params = {"text": text}
71+
if speaker:
72+
params["speaker_id"] = speaker
73+
if language:
74+
params["language_id"] = language
75+
query = urllib.parse.urlencode(params)
76+
return f"{base_url.rstrip('/')}/api/tts?{query}"
77+
78+
79+
def synthesize_chunk(base_url: str, text: str, speaker: str | None, language: str | None) -> Path:
80+
request = urllib.request.Request(build_url(base_url, text, speaker, language))
81+
with urllib.request.urlopen(request, timeout=300) as response:
82+
wav_data = response.read()
83+
84+
temp_file = tempfile.NamedTemporaryFile(prefix="coqui-read-", suffix=".wav", delete=False)
85+
temp_file.write(wav_data)
86+
temp_file.close()
87+
return Path(temp_file.name)
88+
89+
90+
def play_file(path: Path, player: str) -> None:
91+
if player == "ffplay":
92+
cmd = [player, "-nodisp", "-autoexit", "-loglevel", "warning", str(path)]
93+
else:
94+
cmd = [player, str(path)]
95+
subprocess.run(cmd, check=True)
96+
97+
98+
def read_input(inputs: list[str]) -> str:
99+
if inputs:
100+
if len(inputs) == 1 and Path(inputs[0]).exists():
101+
return Path(inputs[0]).read_text()
102+
return " ".join(inputs)
103+
return sys.stdin.read()
104+
105+
106+
def main() -> int:
107+
parser = argparse.ArgumentParser(description="Read text incrementally through the local Coqui TTS service.")
108+
parser.add_argument("--stdin-file", default=None, help=argparse.SUPPRESS)
109+
parser.add_argument("inputs", nargs="*", help="Text to speak, or a single text-file path. Reads stdin when omitted.")
110+
parser.add_argument("--host", default=DEFAULT_HOST, help=f"Coqui server base URL. Default: {DEFAULT_HOST}")
111+
parser.add_argument("--speaker", default=None, help="Optional speaker_id value.")
112+
parser.add_argument("--language", default=None, help="Optional language_id value.")
113+
parser.add_argument(
114+
"--chunk-mode",
115+
choices=["paragraphs", "sentences"],
116+
default="paragraphs",
117+
help="Chunking strategy before synthesis.",
118+
)
119+
parser.add_argument("--max-chars", type=int, default=700, help="Maximum characters per synthesized chunk.")
120+
parser.add_argument(
121+
"--player",
122+
default="ffplay",
123+
help="Playback command. Use 'none' to only synthesize and print wav paths.",
124+
)
125+
parser.add_argument(
126+
"--keep",
127+
action="store_true",
128+
help="Keep generated wav files on disk instead of deleting them after playback.",
129+
)
130+
args = parser.parse_args()
131+
132+
if args.stdin_file:
133+
text = Path(args.stdin_file).read_text()
134+
else:
135+
text = read_input(args.inputs)
136+
chunks = split_text(text, args.chunk_mode, args.max_chars)
137+
if not chunks:
138+
print("No text to synthesize.", file=sys.stderr)
139+
return 1
140+
141+
created_files: list[Path] = []
142+
try:
143+
for index, chunk in enumerate(chunks, start=1):
144+
print(f"[{index}/{len(chunks)}] Synthesizing {len(chunk)} chars...", file=sys.stderr)
145+
wav_path = synthesize_chunk(args.host, chunk, args.speaker, args.language)
146+
created_files.append(wav_path)
147+
print(wav_path)
148+
if args.player != "none":
149+
play_file(wav_path, args.player)
150+
finally:
151+
if not args.keep:
152+
for wav_path in created_files:
153+
wav_path.unlink(missing_ok=True)
154+
155+
return 0
156+
157+
158+
if __name__ == "__main__":
159+
raise SystemExit(main())
160+
PY
161+
162+
python3 "$script_file" "$@"
163+
local exit_code=$?
164+
rm -f "$script_file"
165+
if [[ -n "$stdin_file" ]]; then
166+
rm -f "$stdin_file"
167+
fi
168+
return "$exit_code"
169+
}
170+
171+
coqui-read "$@"

0 commit comments

Comments
 (0)