|
| 1 | +#!/usr/bin/env bash |
| 2 | + |
| 3 | +function coqui-read { |
| 4 | + local script_file stdin_file |
| 5 | + script_file="$(mktemp)" |
| 6 | + stdin_file="" |
| 7 | + |
| 8 | + if [[ "$#" -eq 0 && ! -t 0 ]]; then |
| 9 | + stdin_file="$(mktemp)" |
| 10 | + cat > "$stdin_file" |
| 11 | + set -- --stdin-file "$stdin_file" |
| 12 | + fi |
| 13 | + |
| 14 | + cat > "$script_file" <<'PY' |
| 15 | +import argparse |
| 16 | +import re |
| 17 | +import subprocess |
| 18 | +import sys |
| 19 | +import tempfile |
| 20 | +import urllib.parse |
| 21 | +import urllib.request |
| 22 | +from pathlib import Path |
| 23 | +
|
| 24 | +
|
| 25 | +DEFAULT_HOST = "http://[::1]:11115" |
| 26 | +
|
| 27 | +
|
| 28 | +def split_sentences(text: str) -> list[str]: |
| 29 | + parts = re.split(r"(?<=[.!?])\s+", text.strip()) |
| 30 | + return [part.strip() for part in parts if part.strip()] |
| 31 | +
|
| 32 | +
|
| 33 | +def split_text(text: str, mode: str, max_chars: int) -> list[str]: |
| 34 | + normalized = re.sub(r"\r\n?", "\n", text).strip() |
| 35 | + if not normalized: |
| 36 | + return [] |
| 37 | +
|
| 38 | + if mode == "sentences": |
| 39 | + units = split_sentences(normalized) |
| 40 | + else: |
| 41 | + units = [chunk.strip() for chunk in re.split(r"\n\s*\n+", normalized) if chunk.strip()] |
| 42 | +
|
| 43 | + chunks: list[str] = [] |
| 44 | + for unit in units: |
| 45 | + if len(unit) <= max_chars: |
| 46 | + chunks.append(unit) |
| 47 | + continue |
| 48 | +
|
| 49 | + sentences = split_sentences(unit) |
| 50 | + if len(sentences) <= 1: |
| 51 | + chunks.append(unit) |
| 52 | + continue |
| 53 | +
|
| 54 | + current = "" |
| 55 | + for sentence in sentences: |
| 56 | + candidate = sentence if not current else f"{current} {sentence}" |
| 57 | + if len(candidate) <= max_chars: |
| 58 | + current = candidate |
| 59 | + else: |
| 60 | + if current: |
| 61 | + chunks.append(current) |
| 62 | + current = sentence |
| 63 | + if current: |
| 64 | + chunks.append(current) |
| 65 | +
|
| 66 | + return chunks |
| 67 | +
|
| 68 | +
|
| 69 | +def build_url(base_url: str, text: str, speaker: str | None, language: str | None) -> str: |
| 70 | + params = {"text": text} |
| 71 | + if speaker: |
| 72 | + params["speaker_id"] = speaker |
| 73 | + if language: |
| 74 | + params["language_id"] = language |
| 75 | + query = urllib.parse.urlencode(params) |
| 76 | + return f"{base_url.rstrip('/')}/api/tts?{query}" |
| 77 | +
|
| 78 | +
|
| 79 | +def synthesize_chunk(base_url: str, text: str, speaker: str | None, language: str | None) -> Path: |
| 80 | + request = urllib.request.Request(build_url(base_url, text, speaker, language)) |
| 81 | + with urllib.request.urlopen(request, timeout=300) as response: |
| 82 | + wav_data = response.read() |
| 83 | +
|
| 84 | + temp_file = tempfile.NamedTemporaryFile(prefix="coqui-read-", suffix=".wav", delete=False) |
| 85 | + temp_file.write(wav_data) |
| 86 | + temp_file.close() |
| 87 | + return Path(temp_file.name) |
| 88 | +
|
| 89 | +
|
| 90 | +def play_file(path: Path, player: str) -> None: |
| 91 | + if player == "ffplay": |
| 92 | + cmd = [player, "-nodisp", "-autoexit", "-loglevel", "warning", str(path)] |
| 93 | + else: |
| 94 | + cmd = [player, str(path)] |
| 95 | + subprocess.run(cmd, check=True) |
| 96 | +
|
| 97 | +
|
| 98 | +def read_input(inputs: list[str]) -> str: |
| 99 | + if inputs: |
| 100 | + if len(inputs) == 1 and Path(inputs[0]).exists(): |
| 101 | + return Path(inputs[0]).read_text() |
| 102 | + return " ".join(inputs) |
| 103 | + return sys.stdin.read() |
| 104 | +
|
| 105 | +
|
| 106 | +def main() -> int: |
| 107 | + parser = argparse.ArgumentParser(description="Read text incrementally through the local Coqui TTS service.") |
| 108 | + parser.add_argument("--stdin-file", default=None, help=argparse.SUPPRESS) |
| 109 | + parser.add_argument("inputs", nargs="*", help="Text to speak, or a single text-file path. Reads stdin when omitted.") |
| 110 | + parser.add_argument("--host", default=DEFAULT_HOST, help=f"Coqui server base URL. Default: {DEFAULT_HOST}") |
| 111 | + parser.add_argument("--speaker", default=None, help="Optional speaker_id value.") |
| 112 | + parser.add_argument("--language", default=None, help="Optional language_id value.") |
| 113 | + parser.add_argument( |
| 114 | + "--chunk-mode", |
| 115 | + choices=["paragraphs", "sentences"], |
| 116 | + default="paragraphs", |
| 117 | + help="Chunking strategy before synthesis.", |
| 118 | + ) |
| 119 | + parser.add_argument("--max-chars", type=int, default=700, help="Maximum characters per synthesized chunk.") |
| 120 | + parser.add_argument( |
| 121 | + "--player", |
| 122 | + default="ffplay", |
| 123 | + help="Playback command. Use 'none' to only synthesize and print wav paths.", |
| 124 | + ) |
| 125 | + parser.add_argument( |
| 126 | + "--keep", |
| 127 | + action="store_true", |
| 128 | + help="Keep generated wav files on disk instead of deleting them after playback.", |
| 129 | + ) |
| 130 | + args = parser.parse_args() |
| 131 | +
|
| 132 | + if args.stdin_file: |
| 133 | + text = Path(args.stdin_file).read_text() |
| 134 | + else: |
| 135 | + text = read_input(args.inputs) |
| 136 | + chunks = split_text(text, args.chunk_mode, args.max_chars) |
| 137 | + if not chunks: |
| 138 | + print("No text to synthesize.", file=sys.stderr) |
| 139 | + return 1 |
| 140 | +
|
| 141 | + created_files: list[Path] = [] |
| 142 | + try: |
| 143 | + for index, chunk in enumerate(chunks, start=1): |
| 144 | + print(f"[{index}/{len(chunks)}] Synthesizing {len(chunk)} chars...", file=sys.stderr) |
| 145 | + wav_path = synthesize_chunk(args.host, chunk, args.speaker, args.language) |
| 146 | + created_files.append(wav_path) |
| 147 | + print(wav_path) |
| 148 | + if args.player != "none": |
| 149 | + play_file(wav_path, args.player) |
| 150 | + finally: |
| 151 | + if not args.keep: |
| 152 | + for wav_path in created_files: |
| 153 | + wav_path.unlink(missing_ok=True) |
| 154 | +
|
| 155 | + return 0 |
| 156 | +
|
| 157 | +
|
| 158 | +if __name__ == "__main__": |
| 159 | + raise SystemExit(main()) |
| 160 | +PY |
| 161 | + |
| 162 | + python3 "$script_file" "$@" |
| 163 | + local exit_code=$? |
| 164 | + rm -f "$script_file" |
| 165 | + if [[ -n "$stdin_file" ]]; then |
| 166 | + rm -f "$stdin_file" |
| 167 | + fi |
| 168 | + return "$exit_code" |
| 169 | +} |
| 170 | + |
| 171 | +coqui-read "$@" |
0 commit comments