Merge pull request #15 from DaleStudy/perf/10-reduce-subrequests

soobing · web-flow · commit a430e96440e8 · 2026-04-08T22:37:33.000+09:00
perf: GraphQL 배치 조회 + OpenAI 배치 분석으로 subrequest 절감
diff --git a/handlers/learning-status.js b/handlers/learning-status.js
@@ -10,7 +10,7 @@ import {
   fetchCohortUserSolutions,
   fetchPRSubmissions,
 } from "../utils/learningData.js";
-import { generateApproachAnalysis } from "../utils/openai.js";
+import { generateBatchApproachAnalysis } from "../utils/openai.js";
 import {
   formatLearningStatusComment,
   upsertLearningStatusComment,
@@ -105,9 +105,10 @@ export async function postLearningStatus(
     `[learningStatus] PR #${prNumber}: analyzing ${submissions.length} submission(s) for ${username}`
   );
 
-  // 4. 제출 파일별 AI 분석
+  // 4. 제출 파일 코드 다운로드 (N회 fetch)
   const submissionResults = [];
-  const totalUsage = { prompt_tokens: 0, completion_tokens: 0 };
+  const batchItems = [];
+  const batchIndices = [];
 
   for (const submission of submissions) {
     const problemInfo = categories[submission.problemName];
@@ -126,7 +127,6 @@ export async function postLearningStatus(
     }
 
     try {
-      // 파일 원본 내용 가져오기
       const rawResponse = await fetch(submission.rawUrl);
       if (!rawResponse.ok) {
         throw new Error(`Failed to fetch raw file: ${rawResponse.status} ${rawResponse.statusText}`);
@@ -140,27 +140,18 @@ export async function postLearningStatus(
         );
       }
 
-      const analysis = await generateApproachAnalysis(
-        fileContent,
-        submission.problemName,
-        problemInfo,
-        openaiApiKey
-      );
-
-      if (analysis.usage) {
-        totalUsage.prompt_tokens += analysis.usage.prompt_tokens ?? 0;
-        totalUsage.completion_tokens += analysis.usage.completion_tokens ?? 0;
-      }
-
+      const idx = submissionResults.length;
       submissionResults.push({
         problemName: submission.problemName,
         difficulty: problemInfo.difficulty,
-        matches: analysis.matches,
-        explanation: analysis.explanation,
+        matches: null,
+        explanation: "",
       });
+      batchItems.push({ problemName: submission.problemName, fileContent, problemInfo });
+      batchIndices.push(idx);
     } catch (error) {
       console.error(
-        `[learningStatus] Failed to analyze "${submission.problemName}": ${error.message}`
+        `[learningStatus] Failed to fetch "${submission.problemName}": ${error.message}`
       );
       submissionResults.push({
         problemName: submission.problemName,
@@ -171,13 +162,33 @@ export async function postLearningStatus(
     }
   }
 
-  const hasUsage = totalUsage.prompt_tokens > 0 || totalUsage.completion_tokens > 0;
+  // 5. AI 일괄 분석 (1회 OpenAI 호출로 모든 제출 파일 분석)
+  let totalUsage = null;
+  if (batchItems.length > 0) {
+    try {
+      console.log(
+        `[learningStatus] PR #${prNumber}: batch analyzing ${batchItems.length} file(s) via OpenAI`
+      );
+      const { results: batchResults, usage } = await generateBatchApproachAnalysis(batchItems, openaiApiKey);
+      totalUsage = usage;
+      for (let i = 0; i < batchResults.length; i++) {
+        submissionResults[batchIndices[i]].matches = batchResults[i].matches;
+        submissionResults[batchIndices[i]].explanation = batchResults[i].explanation;
+      }
+    } catch (error) {
+      console.error(
+        `[learningStatus] Batch analysis failed: ${error.message}`
+      );
+    }
+  }
+
+  const hasUsage = totalUsage != null;
 
-  // 5. 카테고리별 진행도 계산
+  // 6. 카테고리별 진행도 계산
   const totalProblems = Object.keys(categories).length;
   const categoryProgress = buildCategoryProgress(categories, solvedProblems);
 
-  // 6. 댓글 본문 포맷
+  // 7. 댓글 본문 포맷
   const commentBody = formatLearningStatusComment(
     username,
     submissionResults,
@@ -186,7 +197,7 @@ export async function postLearningStatus(
     categoryProgress
   );
 
-  // 7. 댓글 생성 또는 업데이트
+  // 8. 댓글 생성 또는 업데이트
   await upsertLearningStatusComment(
     repoOwner,
     repoName,
@@ -196,7 +207,7 @@ export async function postLearningStatus(
     hasUsage ? totalUsage : null
   );
 
-  // 8. 결과 반환
+  // 9. 결과 반환
   const matchedCount = submissionResults.filter((r) => r.matches === true).length;
   return { analyzed: submissionResults.length, matched: matchedCount };
 }
diff --git a/utils/learningData.js b/utils/learningData.js
@@ -80,16 +80,21 @@ async function fetchActiveCohortProjectId(repoOwner, repoName, appToken) {
 }
 
 /**
- * 기수 프로젝트에서 해당 유저가 머지한 PR 번호 목록을 반환한다.
- * 프로젝트 아이템을 페이지네이션하며 author.login으로 필터링한다.
+ * 기수 프로젝트의 아이템을 페이지네이션하며 해당 유저가 머지한 PR의
+ * 파일 경로를 GraphQL로 한 번에 조회하여 풀이한 문제 이름 목록을 반환한다.
+ *
+ * PR별 REST 호출 없이 GraphQL 응답에 files를 포함시켜 subrequest를 절약한다.
  *
  * @param {string} projectId
  * @param {string} username
  * @param {string} appToken
- * @returns {Promise<number[]>}
+ * @returns {Promise<string[]>}
  */
-async function fetchUserMergedPRsInProject(projectId, username, appToken) {
-  const prNumbers = [];
+async function fetchCohortSolvedFromProject(projectId, username, appToken) {
+  const usernamePattern = new RegExp(
+    `^([^/]+)/${escapeRegExp(username)}\\.[^/]+$`
+  );
+  const problemNames = new Set();
   let cursor = null;
 
   while (true) {
@@ -103,9 +108,11 @@ async function fetchUserMergedPRsInProject(projectId, username, appToken) {
               nodes {
                 content {
                   ... on PullRequest {
-                    number
                     state
                     author { login }
+                    files(first: 100) {
+                      nodes { path }
+                    }
                   }
                 }
               }
@@ -124,15 +131,20 @@ async function fetchUserMergedPRsInProject(projectId, username, appToken) {
         pr?.state === "MERGED" &&
         pr?.author?.login?.toLowerCase() === username.toLowerCase()
       ) {
-        prNumbers.push(pr.number);
+        for (const file of pr.files?.nodes || []) {
+          const match = file.path.match(usernamePattern);
+          if (match) {
+            problemNames.add(match[1]);
+          }
+        }
       }
     }
 
     if (!pageInfo.hasNextPage) break;
     cursor = pageInfo.endCursor;
   }
 
-  return prNumbers;
+  return Array.from(problemNames);
 }
 
 /**
@@ -165,31 +177,17 @@ export async function fetchCohortUserSolutions(
     return fetchUserSolutions(repoOwner, repoName, username, appToken);
   }
 
-  const prNumbers = await fetchUserMergedPRsInProject(
+  const problems = await fetchCohortSolvedFromProject(
     projectId,
     username,
     appToken
   );
 
   console.log(
-    `[fetchCohortUserSolutions] ${username} has ${prNumbers.length} merged PRs in current cohort`
+    `[fetchCohortUserSolutions] ${username} solved ${problems.length} problems in current cohort`
   );
 
-  const problemNames = new Set();
-  for (const prNumber of prNumbers) {
-    const submissions = await fetchPRSubmissions(
-      repoOwner,
-      repoName,
-      prNumber,
-      username,
-      appToken
-    );
-    for (const { problemName } of submissions) {
-      problemNames.add(problemName);
-    }
-  }
-
-  return Array.from(problemNames);
+  return problems;
 }
 
 /**
diff --git a/utils/openai.js b/utils/openai.js
@@ -258,6 +258,119 @@ ${truncatedContent}
   };
 }
 
+/**
+ * 여러 솔루션 파일의 접근법 일치 여부를 한 번의 API 호출로 일괄 분석.
+ * subrequest 수를 줄이기 위해 파일당 개별 호출 대신 배치로 처리한다.
+ *
+ * @param {Array<{problemName: string, fileContent: string, problemInfo: object}>} items
+ * @param {string} apiKey - OpenAI API 키
+ * @returns {Promise<{results: Array<{matches: boolean, explanation: string}>, usage: object|null}>}
+ */
+export async function generateBatchApproachAnalysis(items, apiKey) {
+  if (items.length === 0) return { results: [], usage: null };
+
+  // 단건이면 기존 함수 위임
+  if (items.length === 1) {
+    const { fileContent, problemName, problemInfo } = items[0];
+    const result = await generateApproachAnalysis(fileContent, problemName, problemInfo, apiKey);
+    return {
+      results: [{ matches: result.matches, explanation: result.explanation }],
+      usage: result.usage ?? null,
+    };
+  }
+
+  const systemPrompt = `You are an algorithm analysis expert. You will receive multiple problems. For each one, determine if the submitted code matches the intended approach.
+
+Respond with a JSON object containing a "results" array with exactly ${items.length} entries, in the same order as the input:
+{
+  "results": [
+    { "matches": true, "explanation": "한국어 1문장, 80자 이내" },
+    ...
+  ]
+}
+
+Rules:
+- matches=true if the core data structure or algorithm matches the intended approach
+- matches=false if brute force was used when an optimized approach was intended
+- Keep each explanation to 1 sentence in Korean, 80 characters or fewer
+- You MUST return exactly ${items.length} results`;
+
+  const MAX_BATCH_FILE_SIZE = 5000;
+
+  const problemSections = items.map(({ problemName, fileContent, problemInfo }, i) => {
+    const truncated = fileContent.slice(0, MAX_BATCH_FILE_SIZE);
+    return `## 문제 ${i + 1}: ${problemName}
+- 난이도: ${problemInfo.difficulty}
+- 카테고리: ${(problemInfo.categories || []).join(", ")}
+- 의도된 접근법: ${problemInfo.intended_approach}
+
+\`\`\`
+${truncated}
+\`\`\``;
+  });
+
+  const userPrompt = problemSections.join("\n\n") +
+    `\n\n위 ${items.length}개 코드가 각각 의도된 접근법과 일치하는지 분석해주세요.`;
+
+  const response = await fetch("https://api.openai.com/v1/chat/completions", {
+    method: "POST",
+    headers: {
+      Authorization: `Bearer ${apiKey}`,
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify({
+      model: "gpt-4.1-nano",
+      messages: [
+        { role: "system", content: systemPrompt },
+        { role: "user", content: userPrompt },
+      ],
+      response_format: { type: "json_object" },
+      max_tokens: 200 * items.length,
+      temperature: 0.2,
+    }),
+  });
+
+  if (!response.ok) {
+    const error = await response.text();
+    throw new Error(`OpenAI batch API error: ${error}`);
+  }
+
+  const data = await response.json();
+  const content = data.choices[0]?.message?.content;
+
+  if (!content) {
+    throw new Error("Empty response from OpenAI batch analysis");
+  }
+
+  let parsed;
+  try {
+    parsed = JSON.parse(content);
+  } catch {
+    throw new Error(`OpenAI returned invalid JSON: ${content.slice(0, 200)}`);
+  }
+
+  const rawResults = parsed.results;
+  if (!Array.isArray(rawResults)) {
+    throw new Error(`OpenAI did not return a results array`);
+  }
+
+  if (rawResults.length !== items.length) {
+    console.warn(
+      `[generateBatchApproachAnalysis] Expected ${items.length} results, got ${rawResults.length}`
+    );
+  }
+
+  const results = items.map((_, i) => {
+    const r = rawResults[i];
+    return {
+      matches: r?.matches === true,
+      explanation: typeof r?.explanation === "string" ? r.explanation : "",
+    };
+  });
+
+  return { results, usage: data.usage ?? null };
+}
+
 /**
  * 솔루션의 시간/공간 복잡도 분석.
  * 사용자가 코드 어딘가에 자유 포맷으로 남긴 TC/SC 주석을 함께 추출하여