GameBoy Emulator 1
Game Boy emulator core and tooling
Loading...
Searching...
No Matches
groq_push_summary.py
1#!/usr/bin/env python3
2
3import json
4import os
5import re
6import subprocess
7import urllib.request
8from pathlib import Path
9
10
11MAX_DIFF_FILES_CHARS = 1600
12MAX_DIFF_PATCH_CHARS = 3200
13MAX_COMMITS = 20
14DEFAULT_MODEL = "openai/gpt-oss-20b"
15MODEL_PRIORITY = [
16 "openai/gpt-oss-20b",
17 "openai/gpt-oss-120b",
18 "groq/compound",
19 "llama-3.3-70b-versatile",
20 "qwen/qwen3-32b",
21 "meta-llama/llama-4-scout-17b-16e-instruct",
22 "llama-3.1-8b-instant",
23]
24
25
26def read_text(path: Path, default: str = "") -> str:
27 try:
28 return path.read_text(encoding="utf-8", errors="replace")
29 except FileNotFoundError:
30 return default
31
32
33def run_git(args):
34 proc = subprocess.run(
35 ["git", *args],
36 check=False,
37 capture_output=True,
38 text=True,
39 encoding="utf-8",
40 errors="replace",
41 )
42 if proc.returncode != 0:
43 return ""
44 return proc.stdout.strip()
45
46
47def trim(text: str, limit: int) -> str:
48 if len(text) <= limit:
49 return text
50 return text[: limit - 20] + "\n...[truncated]"
51
52
53def parse_changed_paths(changed_files: str) -> list[str]:
54 paths = []
55 for line in changed_files.splitlines():
56 raw = line.strip()
57 if not raw:
58 continue
59 parts = raw.split("\t")
60 if len(parts) >= 2:
61 candidate = parts[-1].strip()
62 else:
63 candidate = raw.split()[-1]
64 if candidate:
65 paths.append(candidate)
66 return paths
67
68
69def classify_path(path: str) -> str:
70 p = path.strip().lower()
71 if p.startswith("src/") or p.startswith("include/"):
72 return "core"
73 if p.startswith("tests/"):
74 return "tests"
75 if p.startswith(".github/workflows/"):
76 return "workflow"
77 if p.startswith("utility_scripts/"):
78 return "scripts"
79 if p.startswith("documentation/") or p.startswith("docs/") or p.endswith("doxygenconfig"):
80 return "docs"
81 if p.startswith("dist/"):
82 return "dist"
83 return "other"
84
85
86def infer_primary_focus(changed_paths: list[str]) -> str:
87 if not changed_paths:
88 return "unknown"
89
90 weights = {
91 "core": 5,
92 "tests": 4,
93 "scripts": 3,
94 "workflow": 2,
95 "docs": 1,
96 "dist": 1,
97 "other": 1,
98 }
99 scores = {k: 0 for k in weights}
100 for path in changed_paths:
101 scores[classify_path(path)] += weights[classify_path(path)]
102
103 return max(scores, key=lambda k: scores[k])
104
105
106def summarize_focus_label(focus: str) -> str:
107 mapping = {
108 "core": "core emulator logic",
109 "tests": "test coverage and verification",
110 "scripts": "developer/automation scripts",
111 "workflow": "CI workflow behavior",
112 "docs": "documentation",
113 "dist": "prebuilt/reference assets",
114 "other": "project configuration",
115 "unknown": "repository updates",
116 }
117 return mapping.get(focus, "repository updates")
118
119
120def build_fallback_summary(commits_text: str, changed_files: str, changed_paths: list[str]) -> str:
121 focus = infer_primary_focus(changed_paths)
122 focus_label = summarize_focus_label(focus)
123 top_paths = "\n".join(f"- `{p}`" for p in changed_paths[:8]) or "- No changed files detected"
124
125 lines = [
126 "Push Summary",
127 f"This push primarily updates {focus_label}.",
128 "",
129 "Key Changes",
130 commits_text or "- No commit messages were detected in the push payload.",
131 "",
132 "Risks / Follow-ups",
133 "- Review changed files and test impact for this push.",
134 "- Verify CI output if behavior changes are expected.",
135 "",
136 "Intent",
137 f"Likely intent: improve {focus_label} while keeping the branch synchronized.",
138 "",
139 "Changed Files (Top)",
140 top_paths,
141 ]
142 return "\n".join(lines)
143
144
145def fetch_available_models(api_key: str):
146 req = urllib.request.Request(
147 "https://api.groq.com/openai/v1/models",
148 headers={
149 "Authorization": f"Bearer {api_key}",
150 "Content-Type": "application/json",
151 },
152 method="GET",
153 )
154 with urllib.request.urlopen(req, timeout=15) as resp:
155 payload = json.loads(resp.read().decode("utf-8", errors="replace"))
156 data = payload.get("data", [])
157 return [item.get("id", "") for item in data if item.get("id")]
158
159
160def choose_model(api_key: str, requested_model: str) -> str:
161 try:
162 available = fetch_available_models(api_key)
163 except Exception:
164 return requested_model or DEFAULT_MODEL
165
166 if requested_model and requested_model in available:
167 return requested_model
168
169 for model_id in MODEL_PRIORITY:
170 if model_id in available:
171 return model_id
172
173 return requested_model or DEFAULT_MODEL
174
175
176def call_model(api_key: str, model: str, prompt: str) -> str:
177 openai_module = __import__("openai")
178 client = openai_module.OpenAI(
179 api_key=api_key,
180 base_url="https://api.groq.com/openai/v1",
181 )
182 response = client.responses.create(input=prompt, model=model)
183 return (response.output_text or "").strip()
184
185
186def build_prompt(
187 repo: str,
188 ref_name: str,
189 commits_text: str,
190 changed_files: str,
191 diff_patch: str,
192 focus: str,
193 strict_retry: bool,
194) -> str:
195 strict_clause = ""
196 if strict_retry:
197 strict_clause = (
198 "IMPORTANT RETRY MODE: Your previous draft referenced areas not grounded in changed files. "
199 "In this retry, keep claims tightly scoped to changed files and diff."
200 )
201
202 return f"""
203You are an engineering release assistant for repository `{repo}` on branch `{ref_name}`.
204
205Task: produce a concise push summary grounded in the provided commit messages and git diff context.
206
207Grounding rules (strict-with-light-inference):
2081. You may infer high-level intent, but every concrete claim must be supported by changed files or diff.
2092. Do NOT mention components/files that are not present in Changed files summary or Diff excerpt.
2103. If `.github/workflows/` is absent from changed files, do not discuss CI/workflow changes.
2114. Prioritize the likely primary focus of this push: `{summarize_focus_label(focus)}`.
2125. If context is insufficient for specifics, say so briefly instead of guessing.
213
214Output format:
215- Use Markdown without code fences.
216- 4 sections exactly, with these headings in order:
217 - Push Summary
218 - Key Changes
219 - Risks / Follow-ups
220 - Intent
221- Keep total length around 90-170 words.
222
223{strict_clause}
224
225Commit messages:
226{commits_text}
227
228Changed files summary:
229{changed_files}
230
231Diff excerpt:
232{diff_patch}
233""".strip()
234
235
236def has_ungrounded_workflow_reference(summary_text: str, changed_paths: list[str]) -> bool:
237 has_workflow_changes = any(p.lower().startswith(".github/workflows/") for p in changed_paths)
238 if has_workflow_changes:
239 return False
240
241 lower = summary_text.lower()
242 suspicious_tokens = [
243 "workflow",
244 "github actions",
245 "discord notification",
246 "static.yml",
247 "test-init-response-protocol.yml",
248 "deploy-*",
249 "pages deployment",
250 ]
251 return any(token in lower for token in suspicious_tokens)
252
253
254def main():
255 event_path = Path(os.environ.get("EVENT_PATH", ""))
256 repo = os.environ.get("REPO", "unknown")
257 ref_name = os.environ.get("REF_NAME", "unknown")
258 groq_api_key = os.environ.get("GROQ_API_KEY", "")
259 requested_model = os.environ.get("GROQ_MODEL", "")
260 step_summary_path = os.environ.get("GITHUB_STEP_SUMMARY", "")
261
262 event = {}
263 if event_path.exists():
264 event = json.loads(read_text(event_path, "{}") or "{}")
265
266 commits = event.get("commits", [])
267 before = event.get("before", "")
268 after = event.get("after", "")
269
270 commit_lines = []
271 for commit in commits[:MAX_COMMITS]:
272 cid = (commit.get("id") or "")[:7]
273 message = (commit.get("message") or "").splitlines()[0].strip()
274 author = (commit.get("author") or {}).get("name", "unknown")
275 commit_lines.append(f"- {cid} {message} ({author})")
276
277 if len(commits) > MAX_COMMITS:
278 commit_lines.append(f"- ... and {len(commits) - MAX_COMMITS} more commit(s)")
279
280 commits_text = "\n".join(commit_lines)
281
282 changed_files = ""
283 diff_patch = ""
284
285 if before and after and before != "0" * 40:
286 changed_files = run_git(["diff", "--name-status", before, after])
287 diff_patch = run_git(["diff", "--unified=1", "--no-color", before, after])
288
289 if not changed_files:
290 changed_files = run_git(["show", "--name-status", "--pretty=format:", "HEAD"])
291 if not diff_patch:
292 diff_patch = run_git(["show", "--unified=1", "--no-color", "--pretty=format:", "HEAD"])
293
294 changed_files = trim(changed_files, MAX_DIFF_FILES_CHARS)
295 diff_patch = trim(diff_patch, MAX_DIFF_PATCH_CHARS)
296
297 changed_paths = parse_changed_paths(changed_files)
298 focus = infer_primary_focus(changed_paths)
299
300 selected_model = requested_model or DEFAULT_MODEL
301 summary_text = ""
302
303 if groq_api_key:
304 try:
305 selected_model = choose_model(groq_api_key, requested_model)
306 prompt = build_prompt(
307 repo=repo,
308 ref_name=ref_name,
309 commits_text=commits_text,
310 changed_files=changed_files,
311 diff_patch=diff_patch,
312 focus=focus,
313 strict_retry=False,
314 )
315 summary_text = call_model(groq_api_key, selected_model, prompt)
316
317 if has_ungrounded_workflow_reference(summary_text, changed_paths):
318 retry_prompt = build_prompt(
319 repo=repo,
320 ref_name=ref_name,
321 commits_text=commits_text,
322 changed_files=changed_files,
323 diff_patch=diff_patch,
324 focus=focus,
325 strict_retry=True,
326 )
327 summary_text = call_model(groq_api_key, selected_model, retry_prompt)
328 except Exception as exc:
329 summary_text = (
330 "Push Summary\n"
331 f"AI summarization failed ({type(exc).__name__}); using fallback summary."
332 )
333
334 if (not summary_text) or has_ungrounded_workflow_reference(summary_text, changed_paths):
335 summary_text = build_fallback_summary(commits_text, changed_files, changed_paths)
336
337 summary_with_meta = f"_Model used: `{selected_model}`_\n\n{summary_text}".strip()
338
339 Path("ai_push_summary.txt").write_text(summary_with_meta + "\n", encoding="utf-8")
340 Path("ai_push_summary.json").write_text(
341 json.dumps(
342 {
343 "summary": summary_text,
344 "model": selected_model,
345 "focus": focus,
346 "changed_paths": changed_paths,
347 },
348 ensure_ascii=False,
349 indent=2,
350 ),
351 encoding="utf-8",
352 )
353
354 if step_summary_path:
355 with open(step_summary_path, "a", encoding="utf-8") as fh:
356 fh.write("\n## AI Push Summary\n\n")
357 fh.write(summary_with_meta)
358 fh.write("\n")
359
360
361if __name__ == "__main__":
362 main()