fix: auto-download outputs for music/speech/image and fix README examples

RyanLee-Dev · RyanLee-Dev · commit 38cdd8d24594 · 2026-04-08T11:05:18.000-07:00
- music/speech: always save to file (auto-generate filename if --out not given)
- image: auto-download to current dir when --out-dir not specified
- Fix broken README examples: music now requires --lyrics or --instrumental,
  replace invalid voice Boyan_new_hailuo with English_magnetic_voiced_man
- Add test coverage for new music API contract (lyrics required)

Made-with: Cursor
diff --git a/README.md b/README.md
@@ -47,7 +47,7 @@ mmx text chat --message "What is MiniMax?"
 mmx image "A cat in a spacesuit"
 mmx speech synthesize --text "Hello!" --out hello.mp3
 mmx video generate --prompt "Ocean waves at sunset"
-mmx music "Upbeat pop"
+mmx music generate --prompt "Upbeat pop" --lyrics "[verse] La da dee, sunny day"
 mmx search "MiniMax AI latest news"
 mmx vision photo.jpg
 mmx quota
@@ -87,16 +87,17 @@ mmx video download --file-id 176844028768320 --out video.mp4
 ```bash
 mmx speech synthesize --text "Hello!" --out hello.mp3
 mmx speech synthesize --text "Stream me" --stream | mpv -
-mmx speech synthesize --text "Hi" --voice Boyan_new_hailuo --speed 1.2
+mmx speech synthesize --text "Hi" --voice English_magnetic_voiced_man --speed 1.2
 echo "Breaking news" | mmx speech synthesize --text-file - --out news.mp3
 mmx speech voices
 ```
 
 ### `mmx music`
 
 ```bash
-mmx music "Upbeat pop"
+mmx music generate --prompt "Upbeat pop" --lyrics "[verse] La da dee, sunny day"
 mmx music generate --prompt "Jazz" --lyrics "La la la" --out song.mp3
+mmx music generate --prompt "Cinematic orchestral" --instrumental --out bgm.mp3
 ```
 
 ### `mmx vision`
diff --git a/README_CN.md b/README_CN.md
@@ -47,7 +47,7 @@ mmx text chat --message "你好，MiniMax！"
 mmx image "一只穿宇航服的猫"
 mmx speech synthesize --text "你好！" --out hello.mp3
 mmx video generate --prompt "海浪拍打礁石"
-mmx music "欢快的流行乐"
+mmx music generate --prompt "欢快的流行乐" --lyrics "[主歌] 啦啦啦，阳光照"
 mmx search "MiniMax AI 最新动态"
 mmx vision photo.jpg
 mmx quota
@@ -87,16 +87,17 @@ mmx video download --file-id 176844028768320 --out video.mp4
 ```bash
 mmx speech synthesize --text "你好！" --out hello.mp3
 mmx speech synthesize --text "流式输出" --stream | mpv -
-mmx speech synthesize --text "Hi" --voice Boyan_new_hailuo --speed 1.2
+mmx speech synthesize --text "Hi" --voice English_magnetic_voiced_man --speed 1.2
 echo "头条新闻" | mmx speech synthesize --text-file - --out news.mp3
 mmx speech voices
 ```
 
 ### `mmx music`
 
 ```bash
-mmx music "欢快的流行乐"
+mmx music generate --prompt "欢快的流行乐" --lyrics "[主歌] 啦啦啦，阳光照"
 mmx music generate --prompt "爵士风" --lyrics "啦啦啦" --out song.mp3
+mmx music generate --prompt "史诗管弦乐" --instrumental --out bgm.mp3
 ```
 
 ### `mmx vision`
diff --git a/src/commands/image/generate.ts b/src/commands/image/generate.ts
@@ -107,44 +107,28 @@ export default defineCommand({
       process.stderr.write('[Model: image-01]\n');
     }
 
-    // Download if --out-dir specified
-    if (flags.outDir) {
-      const outDir = flags.outDir as string;
-      if (!existsSync(outDir)) mkdirSync(outDir, { recursive: true });
-
-      const prefix = (flags.outPrefix as string) || 'image';
-      const saved: string[] = [];
-
-      for (let i = 0; i < imageUrls.length; i++) {
-        const filename = `${prefix}_${String(i + 1).padStart(3, '0')}.jpg`;
-        const destPath = join(outDir, filename);
-        await downloadFile(imageUrls[i]!, destPath, { quiet: config.quiet });
-        saved.push(destPath);
-      }
+    const outDir = (flags.outDir as string | undefined) ?? '.';
+    if (!existsSync(outDir)) mkdirSync(outDir, { recursive: true });
 
-      if (config.quiet) {
-        console.log(saved.join('\n'));
-      } else {
-        console.log(formatOutput({
-          id: response.data.task_id,
-          saved,
-          success_count: response.data.success_count,
-          failed_count: response.data.failed_count,
-        }, format));
-      }
-      return;
+    const prefix = (flags.outPrefix as string) || 'image';
+    const saved: string[] = [];
+
+    for (let i = 0; i < imageUrls.length; i++) {
+      const filename = `${prefix}_${String(i + 1).padStart(3, '0')}.jpg`;
+      const destPath = join(outDir, filename);
+      await downloadFile(imageUrls[i]!, destPath, { quiet: config.quiet });
+      saved.push(destPath);
     }
 
     if (config.quiet) {
-      console.log(imageUrls.join('\n'));
-      return;
+      console.log(saved.join('\n'));
+    } else {
+      console.log(formatOutput({
+        id: response.data.task_id,
+        saved,
+        success_count: response.data.success_count,
+        failed_count: response.data.failed_count,
+      }, format));
     }
-
-    console.log(formatOutput({
-      id: response.data.task_id,
-      images: imageUrls,
-      success_count: response.data.success_count,
-      failed_count: response.data.failed_count,
-    }, format));
   },
 });
diff --git a/src/commands/music/generate.ts b/src/commands/music/generate.ts
@@ -13,7 +13,7 @@ import type { MusicRequest, MusicResponse } from '../../types/api';
 export default defineCommand({
   name: 'music generate',
   description: 'Generate a song (music-2.5)',
-  usage: 'mmx music generate --prompt <text> [--lyrics <text>] [--out <path>] [flags]',
+  usage: 'mmx music generate --prompt <text> (--lyrics <text> | --instrumental) [--out <path>] [flags]',
   options: [
     { flag: '--prompt <text>', description: 'Music style description (can be detailed — see examples)' },
     { flag: '--lyrics <text>', description: 'Song lyrics with structure tags. Use "无歌词" for instrumental music. Cannot be used with --instrumental.' },
@@ -100,21 +100,27 @@ export default defineCommand({
       throw new CLIError(
         'At least one of --prompt or --lyrics is required.',
         ExitCode.USAGE,
-        'mmx music generate --prompt <text> [--lyrics <text>]',
+        'mmx music generate --prompt <text> --lyrics <text>',
       );
     }
 
-    if (!lyrics) {
-      process.stderr.write('Warning: No lyrics provided. Use --lyrics or --lyrics-file to include lyrics.\n');
+    if (!lyrics?.trim()) {
+      throw new CLIError(
+        'The API requires lyrics. Add --lyrics or --lyrics-file, or use --instrumental (or --lyrics "无歌词") for instrumental output.',
+        ExitCode.USAGE,
+        'mmx music generate --prompt <text> --lyrics <text>',
+      );
     }
 
     if (structuredParts.length > 0) {
       const structured = structuredParts.join('. ');
       prompt = prompt ? `${prompt}. ${structured}` : structured;
     }
 
-    const outPath = flags.out as string | undefined;
-    const outFormat = outPath ? 'hex' : 'url';
+    const ts = new Date().toISOString().slice(0, 19).replace(/[T:]/g, '-');
+    const ext = (flags.format as string) || 'mp3';
+    const outPath = (flags.out as string | undefined) ?? `music_${ts}.${ext}`;
+    const outFormat = 'hex';
     const format = detectOutputFormat(config.output);
 
     const body: MusicRequest = {
diff --git a/src/commands/speech/synthesize.ts b/src/commands/speech/synthesize.ts
@@ -55,8 +55,9 @@ export default defineCommand({
 
     const model = (flags.model as string) || 'speech-2.8-hd';
     const voice = (flags.voice as string) || 'English_expressive_narrator';
-    const outPath = flags.out as string | undefined;
-    const outFormat = outPath ? 'hex' : 'url';
+    const ts = new Date().toISOString().slice(0, 19).replace(/[T:]/g, '-');
+    const outPath = (flags.out as string | undefined) ?? `speech_${ts}.mp3`;
+    const outFormat = 'hex';
     const format = detectOutputFormat(config.output);
 
     const body: SpeechRequest = {
diff --git a/test/commands/music/generate.test.ts b/test/commands/music/generate.test.ts
@@ -38,6 +38,12 @@ describe('music generate command', () => {
     ).rejects.toThrow('At least one of --prompt or --lyrics is required');
   });
 
+  it('requires lyrics when only prompt is given (API contract)', async () => {
+    await expect(
+      generateCommand.execute(baseConfig, { ...baseFlags, prompt: 'Upbeat pop' }),
+    ).rejects.toThrow('The API requires lyrics');
+  });
+
   it('structured flags are appended to prompt (dry-run)', async () => {
     // Use dryRun=true so no real API call is made.
     let resolved = false;
@@ -48,6 +54,7 @@ describe('music generate command', () => {
           ...baseFlags,
           dryRun: true,
           prompt: 'Indie folk',
+          lyrics: '[verse] placeholder',
           vocals: 'warm male and bright female duet',
           genre: 'folk',
           mood: 'warm',
@@ -57,7 +64,7 @@ describe('music generate command', () => {
         },
       );
       resolved = true;
-    } catch (_) {
+    } catch {
       // dryRun may resolve or reject depending on output routing; either is fine
       resolved = true;
     }
@@ -116,7 +123,7 @@ describe('music generate command', () => {
         { ...baseFlags, dryRun: true, prompt: 'Folk', lyrics: '无歌词' },
       );
       resolved = true;
-    } catch (_) {
+    } catch {
       resolved = true;
     }
     expect(resolved).toBe(true);
@@ -130,7 +137,7 @@ describe('music generate command', () => {
         { ...baseFlags, dryRun: true, prompt: 'Folk', lyrics: 'no lyrics' },
       );
       resolved = true;
-    } catch (_) {
+    } catch {
       resolved = true;
     }
     expect(resolved).toBe(true);