GameBoy Emulator 1
Game Boy emulator core and tooling
Loading...
Searching...
No Matches
opcode_lookup.py
1#!/usr/bin/env python3
2
3import argparse
4import curses
5import html
6import json
7import re
8import shutil
9import subprocess
10import sys
11import tempfile
12import textwrap
13from datetime import datetime, timezone
14from pathlib import Path
15
16
17PROJECT_ROOT = Path(__file__).resolve().parent.parent
18DIST_DIR = PROJECT_ROOT / "dist"
19INDEX_HTML = DIST_DIR / "index.html"
20INDEX_JSON = DIST_DIR / "opcode_lookup_index.json"
21
22OPCODES_HEADER = PROJECT_ROOT / "include" / "opcodes.hpp"
23CB_OPCODES_HEADER = PROJECT_ROOT / "include" / "cb_opcodes.hpp"
24BASE_OPCODE_SRC_DIR = PROJECT_ROOT / "src" / "core" / "cpu" / "instructions" / "opcodes"
25CB_OPCODE_SRC_DIR = (
26 PROJECT_ROOT / "src" / "core" / "cpu" / "instructions" / "cb_opcodes"
27)
28
29OP_DECL_PATTERN = re.compile(
30 r"^\s*int\s+(op_[A-Za-z0-9_]+)\s*\‍([^;]*\‍)\s*;\s*//\s*(0x[0-9A-Fa-f]+)\s*$"
31)
32DUMMY_PATTERN = re.compile(r"DUMMY\‍(\s*(op_[A-Za-z0-9_]+)\s*\‍)")
33
34
35def find_chromium_binary() -> str:
36 for name in (
37 "chromium",
38 "chromium-browser",
39 "google-chrome",
40 "google-chrome-stable",
41 ):
42 binary = shutil.which(name)
43 if binary:
44 return binary
45 raise RuntimeError("No Chromium binary found. Install chromium/chromium-browser.")
46
47
48def run_chromium_dump(helper_file: Path) -> str:
49 binary = find_chromium_binary()
50 cmd = [
51 binary,
52 "--headless",
53 "--disable-gpu",
54 "--allow-file-access-from-files",
55 "--virtual-time-budget=20000",
56 "--dump-dom",
57 f"file://{helper_file.as_posix()}",
58 ]
59 proc = subprocess.run(cmd, capture_output=True, text=True, check=False)
60 if proc.returncode != 0:
61 raise RuntimeError(
62 f"Chromium failed: {proc.stderr.strip() or proc.stdout.strip()}"
63 )
64 return proc.stdout
65
66
67def parse_opcode_declarations(header_path: Path) -> list[tuple[str, str]]:
68 entries: list[tuple[str, str]] = []
69 for line in header_path.read_text(encoding="utf-8").splitlines():
70 match = OP_DECL_PATTERN.match(line)
71 if match:
72 entries.append((match.group(1), match.group(2).upper()))
73 return entries
74
75
76def collect_dummy_symbols(source_dir: Path) -> set[str]:
77 dummies: set[str] = set()
78 for cpp_file in sorted(source_dir.glob("*.cpp")):
79 content = cpp_file.read_text(encoding="utf-8")
80 for symbol in DUMMY_PATTERN.findall(content):
81 dummies.add(symbol)
82 return dummies
83
84
85def get_implemented_opcode_keys() -> set[str]:
86 base_entries = parse_opcode_declarations(OPCODES_HEADER)
87 cb_entries = parse_opcode_declarations(CB_OPCODES_HEADER)
88
89 base_dummies = collect_dummy_symbols(BASE_OPCODE_SRC_DIR)
90 cb_dummies = collect_dummy_symbols(CB_OPCODE_SRC_DIR)
91
92 implemented: set[str] = set()
93 implemented.update(
94 opcode.upper() for name, opcode in base_entries if name not in base_dummies
95 )
96 implemented.update(
97 opcode.upper() for name, opcode in cb_entries if name not in cb_dummies
98 )
99 return implemented
100
101
102def annotate_implementation_status(entries: list[dict]) -> None:
103 implemented_keys = get_implemented_opcode_keys()
104 for entry in entries:
105 key = str(entry.get("opcode", "")).upper()
106 entry["implemented"] = key in implemented_keys
107
108
109def scrape_rows_from_dist() -> list[dict]:
110 if not INDEX_HTML.exists():
111 raise FileNotFoundError(f"Missing {INDEX_HTML}")
112
113 with tempfile.TemporaryDirectory(prefix="gb_opcode_lookup_") as td:
114 temp_dir = Path(td)
115 helper = temp_dir / "dump.html"
116
117 helper.write_text(
118 f"""<!doctype html><html><body><pre id=\"out\">[]</pre><script>
119(async function() {{
120 const out = document.getElementById('out');
121 const iframe = document.createElement('iframe');
122 iframe.src = 'file://{INDEX_HTML.resolve().as_posix()}';
123 iframe.style.width = '1400px';
124 iframe.style.height = '1000px';
125 document.body.appendChild(iframe);
126
127 await new Promise((resolve) => iframe.addEventListener('load', resolve, {{ once: true }}));
128 const doc = iframe.contentWindow.document;
129
130 function sleep(ms) {{ return new Promise((resolve) => setTimeout(resolve, ms)); }}
131
132 for (let i = 0; i < 120; i += 1) {{
133 if (doc.querySelectorAll('button[aria-label^="Opcode:"]').length > 0) break;
134 await sleep(50);
135 }}
136
137 const buttons = Array.from(doc.querySelectorAll('button[aria-label^="Opcode:"]'));
138 const rows = [];
139
140 for (const btn of buttons) {{
141 btn.click();
142 await sleep(12);
143 const sidebar = doc.querySelector('#details-sidebar');
144 const title = sidebar?.querySelector('h2')?.textContent?.trim() || '';
145 const fields = Array.from(sidebar?.querySelectorAll('li') || []).map((x) => x.textContent.trim());
146 const desc = sidebar?.querySelector('section[aria-label="description"]')?.innerText?.replace(/\\s+/g, ' ').trim() || '';
147 const aria = btn.getAttribute('aria-label') || '';
148 rows.push({{ aria, title, fields, description: desc }});
149 }}
150
151 out.textContent = JSON.stringify(rows);
152 document.title = 'done';
153}})();
154</script></body></html>""",
155 encoding="utf-8",
156 )
157
158 dumped = run_chromium_dump(helper)
159
160 match = re.search(r"<pre id=\"out\">([\s\S]*?)</pre>", dumped)
161 if not match:
162 raise RuntimeError("Could not extract opcode data from rendered DOM")
163
164 raw = html.unescape(match.group(1))
165 rows = json.loads(raw)
166 if not isinstance(rows, list):
167 raise RuntimeError("Unexpected scrape payload format")
168 return rows
169
170
171def normalize_spaces(text: str) -> str:
172 return re.sub(r"\s+", " ", text).strip()
173
174
175def parse_fields(fields: list[str]) -> tuple[str, int, str, dict]:
176 opcode = ""
177 bytes_count = -1
178 cycles = ""
179 flags = {"Z": "-", "N": "-", "H": "-", "CY": "-"}
180
181 for item in fields:
182 if item.startswith("Opcode:"):
183 opcode = normalize_spaces(item.split(":", 1)[1])
184 elif item.startswith("Number of Bytes:"):
185 raw = normalize_spaces(item.split(":", 1)[1])
186 try:
187 bytes_count = int(raw)
188 except ValueError:
189 bytes_count = -1
190 elif item.startswith("Number of Cycles:"):
191 cycles = normalize_spaces(item.split(":", 1)[1])
192 elif item.startswith("Flags:"):
193 raw = normalize_spaces(item.split(":", 1)[1])
194 parts = raw.split(" ")
195 while len(parts) < 4:
196 parts.append("-")
197 flags = {
198 "Z": parts[0],
199 "N": parts[1],
200 "H": parts[2],
201 "CY": parts[3],
202 }
203
204 return opcode, bytes_count, cycles, flags
205
206
207def canonical_opcode(opcode_text: str) -> str:
208 cleaned = opcode_text.upper().replace(" ", "")
209 if cleaned.startswith("0X"):
210 cleaned = cleaned[2:]
211 return f"0x{cleaned}"
212
213
214def rows_to_entries(rows: list[dict]) -> list[dict]:
215 entries = []
216
217 for row in rows:
218 if not isinstance(row, dict):
219 continue
220
221 fields = row.get("fields", [])
222 if not isinstance(fields, list):
223 fields = []
224
225 opcode_text, bytes_count, cycles, flags = parse_fields(fields)
226 if not opcode_text:
227 continue
228
229 opcode = canonical_opcode(opcode_text)
230 mnemonic = normalize_spaces(str(row.get("title", "")))
231 description = normalize_spaces(str(row.get("description", "")))
232 if description.lower().startswith("description "):
233 description = description[len("Description ") :].strip()
234
235 type_label = "cb" if opcode.upper().startswith("0XCB") else "base"
236
237 entries.append(
238 {
239 "opcode": opcode,
240 "prefix": type_label,
241 "mnemonic": mnemonic,
242 "bytes": bytes_count,
243 "cycles": cycles,
244 "flags": flags,
245 "description": description,
246 }
247 )
248
249 unique = {}
250 for entry in entries:
251 unique[entry["opcode"].upper()] = entry
252
253 deduped = list(unique.values())
254 deduped.sort(key=lambda x: int(x["opcode"][2:], 16))
255 return deduped
256
257
258def build_index() -> dict:
259 rows = scrape_rows_from_dist()
260 entries = rows_to_entries(rows)
261
262 payload = {
263 "generated_at": datetime.now(timezone.utc).isoformat(),
264 "source": str(INDEX_HTML.relative_to(PROJECT_ROOT)),
265 "count": len(entries),
266 "entries": entries,
267 }
268
269 INDEX_JSON.write_text(json.dumps(payload, indent=2), encoding="utf-8")
270 return payload
271
272
273def parse_query_as_opcode(query: str) -> str | None:
274 q = query.strip().upper()
275 q = re.sub(r"\s+", " ", q)
276
277 for pattern in (
278 r"^0XCB([0-9A-F]{2})$",
279 r"^CB\s*([0-9A-F]{2})$",
280 r"^CB[:\- ]([0-9A-F]{2})$",
281 ):
282 m = re.match(pattern, q)
283 if m:
284 return f"0xCB{m.group(1)}"
285
286 m4 = re.match(r"^0X([0-9A-F]{4})$", q)
287 if m4:
288 return f"0x{m4.group(1)}"
289
290 m2 = re.match(r"^(?:0X)?([0-9A-F]{2})$", q)
291 if m2:
292 return f"0x{m2.group(1)}"
293
294 return None
295
296
297def load_index() -> dict:
298 if not INDEX_JSON.exists():
299 raise FileNotFoundError(
300 f"Missing {INDEX_JSON}. Run: python3 utility_scripts/opcode_lookup.py --build"
301 )
302
303 data = json.loads(INDEX_JSON.read_text(encoding="utf-8"))
304 if not isinstance(data, dict) or "entries" not in data:
305 raise RuntimeError(f"Invalid index format: {INDEX_JSON}")
306 return data
307
308
309def find_entries(index: dict, query: str) -> list[dict]:
310 entries = index.get("entries", [])
311 if not isinstance(entries, list):
312 return []
313
314 opcode = parse_query_as_opcode(query)
315 if opcode:
316 key = opcode.upper()
317 return [e for e in entries if str(e.get("opcode", "")).upper() == key]
318
319 needle = normalize_spaces(query).lower()
320 if not needle:
321 return []
322
323 exact = [e for e in entries if str(e.get("mnemonic", "")).lower() == needle]
324 if exact:
325 return exact
326
327 return [e for e in entries if needle in str(e.get("mnemonic", "")).lower()]
328
329
330def print_entry(entry: dict) -> None:
331 flags = entry.get("flags", {})
332 z = flags.get("Z", "-")
333 n = flags.get("N", "-")
334 h = flags.get("H", "-")
335 cy = flags.get("CY", "-")
336
337 print(f"Opcode : {entry.get('opcode', '')}")
338 print(f"Mnemonic : {entry.get('mnemonic', '')}")
339 print(f"Prefix : {entry.get('prefix', '')}")
340 print(f"Bytes : {entry.get('bytes', '')}")
341 print(f"Cycles : {entry.get('cycles', '')}")
342 print(f"Flags (ZNHC): {z} {n} {h} {cy}")
343 print("Description :")
344 print(f" {entry.get('description', '')}")
345
346
347def build_search_text(entry: dict) -> str:
348 parts = [
349 str(entry.get("opcode", "")),
350 str(entry.get("mnemonic", "")),
351 str(entry.get("prefix", "")),
352 str(entry.get("cycles", "")),
353 str(entry.get("bytes", "")),
354 str(entry.get("description", "")),
355 ]
356 return " ".join(parts).lower()
357
358
359def filter_entries(entries: list[dict], query: str) -> list[dict]:
360 needle = normalize_spaces(query).lower()
361 if not needle:
362 return entries
363
364 opcode = parse_query_as_opcode(needle)
365 if opcode:
366 key = opcode.upper()
367 exact = [e for e in entries if str(e.get("opcode", "")).upper() == key]
368 if exact:
369 return exact
370
371 tokens = [t for t in needle.split(" ") if t]
372 filtered = []
373 for entry in entries:
374 haystack = build_search_text(entry)
375 if all(token in haystack for token in tokens):
376 filtered.append(entry)
377 return filtered
378
379
380def wrap_lines(text: str, width: int) -> list[str]:
381 if width <= 1:
382 return [text]
383 out = []
384 for raw in (text or "").splitlines() or [""]:
385 chunks = textwrap.wrap(
386 raw, width=width, replace_whitespace=False, drop_whitespace=False
387 )
388 if chunks:
389 out.extend(chunks)
390 else:
391 out.append("")
392 return out
393
394
395def init_tui_colors() -> dict[str, int]:
396 palette = {
397 "title": 0,
398 "label": 0,
399 "muted": 0,
400 "selected": curses.A_REVERSE,
401 "divider": 0,
402 "value": 0,
403 "good": 0,
404 "warn": 0,
405 }
406
407 if not curses.has_colors():
408 return palette
409
410 curses.start_color()
411 try:
412 curses.use_default_colors()
413 bg = -1
414 except curses.error:
415 bg = curses.COLOR_BLACK
416
417 curses.init_pair(1, curses.COLOR_CYAN, bg)
418 curses.init_pair(2, curses.COLOR_WHITE, bg)
419 curses.init_pair(3, curses.COLOR_BLUE, bg)
420 curses.init_pair(4, curses.COLOR_BLACK, curses.COLOR_CYAN)
421 curses.init_pair(5, curses.COLOR_GREEN, bg)
422 curses.init_pair(6, curses.COLOR_YELLOW, bg)
423
424 palette.update(
425 {
426 "title": curses.color_pair(1) | curses.A_BOLD,
427 "label": curses.color_pair(2) | curses.A_BOLD,
428 "muted": curses.color_pair(3),
429 "selected": curses.color_pair(4) | curses.A_BOLD,
430 "divider": curses.color_pair(3),
431 "value": curses.color_pair(2),
432 "good": curses.color_pair(5) | curses.A_BOLD,
433 "warn": curses.color_pair(6) | curses.A_BOLD,
434 }
435 )
436 return palette
437
438
439def draw_tui(stdscr, entries: list[dict]) -> int:
440 curses.curs_set(0)
441 stdscr.keypad(True)
442 palette = init_tui_colors()
443
444 query = ""
445 selected = 0
446 scroll = 0
447
448 while True:
449 height, width = stdscr.getmaxyx()
450 if width < 70 or height < 16:
451 stdscr.erase()
452 stdscr.addstr(
453 0, 0, "Terminal too small. Resize to at least 70x16.", palette["warn"]
454 )
455 stdscr.addstr(1, 0, "Press q to quit.", palette["muted"])
456 stdscr.refresh()
457 ch = stdscr.getch()
458 if ch in (ord("q"), ord("Q")):
459 return 0
460 continue
461
462 filtered = filter_entries(entries, query)
463 if filtered:
464 selected = max(0, min(selected, len(filtered) - 1))
465 else:
466 selected = 0
467
468 list_width = max(30, min(44, width // 2 - 2))
469 detail_x = list_width + 2
470 detail_width = width - detail_x - 1
471 list_rows = height - 5
472
473 if selected < scroll:
474 scroll = selected
475 if selected >= scroll + list_rows:
476 scroll = selected - list_rows + 1
477 if scroll < 0:
478 scroll = 0
479
480 stdscr.erase()
481 stdscr.addstr(0, 0, "GB Opcode Lookup Interface", palette["title"])
482 stdscr.addstr(1, 0, "Search:", palette["label"])
483 stdscr.addstr(1, 8, f"{query}", palette["value"])
484 stdscr.addstr(2, 0, "Matches:", palette["label"])
485 stdscr.addstr(2, 9, f"{len(filtered)}/{len(entries)}", palette["good"])
486 stdscr.addstr(2, 22, "[I] implemented", palette["good"])
487 stdscr.addstr(
488 2, detail_x, "Keys: Up/Down PgUp/PgDn Backspace q", palette["muted"]
489 )
490
491 for row in range(3, height - 2):
492 stdscr.addch(row, list_width + 1, "|", palette["divider"])
493
494 visible = filtered[scroll : scroll + list_rows]
495 for idx, entry in enumerate(visible):
496 y = 3 + idx
497 absolute_idx = scroll + idx
498 marker = "I" if entry.get("implemented", False) else " "
499 label = (
500 f"[{marker}] {entry.get('opcode', ''):<8} {entry.get('mnemonic', '')}"
501 )
502 label = label[: list_width - 1]
503 if absolute_idx == selected:
504 stdscr.addstr(y, 0, label.ljust(list_width - 1), palette["selected"])
505 else:
506 value_attr = (
507 palette["good"]
508 if entry.get("implemented", False)
509 else palette["value"]
510 )
511 stdscr.addstr(y, 0, label.ljust(list_width - 1), value_attr)
512
513 if filtered:
514 current = filtered[selected]
515 flags = current.get("flags", {})
516 flag_line = (
517 f"Z={flags.get('Z', '-')} "
518 f"N={flags.get('N', '-')} "
519 f"H={flags.get('H', '-')} "
520 f"CY={flags.get('CY', '-')}"
521 )
522 detail_rows: list[tuple[str, str]] = [
523 ("Opcode", str(current.get("opcode", ""))),
524 ("Mnemonic", str(current.get("mnemonic", ""))),
525 ("Implemented", "Yes" if current.get("implemented", False) else "No"),
526 ("Prefix", str(current.get("prefix", ""))),
527 ("Bytes", str(current.get("bytes", ""))),
528 ("Cycles", str(current.get("cycles", ""))),
529 ("Flags", flag_line),
530 ]
531
532 y = 3
533 for label, value in detail_rows:
534 if y >= height - 1:
535 break
536 stdscr.addstr(y, detail_x, f"{label:<9}", palette["label"])
537 if label == "Implemented":
538 value_attr = palette["good"] if value == "Yes" else palette["warn"]
539 else:
540 value_attr = (
541 palette["good"]
542 if label in ("Opcode", "Cycles", "Bytes")
543 else palette["value"]
544 )
545 stdscr.addstr(
546 y, detail_x + 10, value[: max(1, detail_width - 11)], value_attr
547 )
548 y += 1
549
550 if y < height - 1:
551 y += 1
552 if y < height - 1:
553 stdscr.addstr(y, detail_x, "Description", palette["label"])
554 y += 1
555
556 desc_lines = wrap_lines(
557 str(current.get("description", "")), detail_width - 1
558 )
559 for line in desc_lines:
560 if y >= height - 1:
561 break
562 stdscr.addstr(y, detail_x, line[: detail_width - 1], palette["value"])
563 y += 1
564 else:
565 stdscr.addstr(3, detail_x, "No matches.", palette["warn"])
566
567 stdscr.refresh()
568 ch = stdscr.getch()
569
570 if ch in (ord("q"), ord("Q"), 27):
571 return 0
572 if ch in (curses.KEY_UP, ord("k")):
573 if filtered:
574 selected = max(0, selected - 1)
575 continue
576 if ch in (curses.KEY_DOWN, ord("j")):
577 if filtered:
578 selected = min(len(filtered) - 1, selected + 1)
579 continue
580 if ch == curses.KEY_NPAGE:
581 if filtered:
582 selected = min(len(filtered) - 1, selected + max(1, list_rows - 2))
583 continue
584 if ch == curses.KEY_PPAGE:
585 if filtered:
586 selected = max(0, selected - max(1, list_rows - 2))
587 continue
588 if ch in (curses.KEY_BACKSPACE, 127, 8):
589 if query:
590 query = query[:-1]
591 selected = 0
592 scroll = 0
593 continue
594 if 32 <= ch <= 126:
595 query += chr(ch)
596 selected = 0
597 scroll = 0
598 continue
599
600
601def run_tui(index: dict) -> int:
602 entries = index.get("entries", [])
603 if not isinstance(entries, list):
604 print("Invalid index data", file=sys.stderr)
605 return 1
606 annotate_implementation_status(entries)
607 return curses.wrapper(lambda stdscr: draw_tui(stdscr, entries))
608
609
610def main() -> int:
611 parser = argparse.ArgumentParser(
612 description="Lookup Game Boy opcodes from dist/index.html metadata"
613 )
614 parser.add_argument(
615 "query", nargs="?", help="opcode (e.g. 0x04, CB 11) or mnemonic"
616 )
617 parser.add_argument(
618 "--build", action="store_true", help="build dist/opcode_lookup_index.json"
619 )
620 parser.add_argument(
621 "--json", action="store_true", help="output JSON for query result"
622 )
623 parser.add_argument("--all", action="store_true", help="show all mnemonic matches")
624 parser.add_argument(
625 "--tui", action="store_true", help="launch interactive terminal UI"
626 )
627 args = parser.parse_args()
628
629 if args.build:
630 payload = build_index()
631 print(
632 f"Built {INDEX_JSON.relative_to(PROJECT_ROOT)} with {payload.get('count', 0)} entries"
633 )
634 if not args.query:
635 return 0
636
637 try:
638 index = load_index()
639 except Exception as exc:
640 print(str(exc), file=sys.stderr)
641 return 1
642
643 if args.tui or not args.query:
644 return run_tui(index)
645
646 results = find_entries(index, args.query)
647 if not results:
648 print(f"No results for: {args.query}")
649 return 1
650
651 if args.json:
652 payload = results if (args.all or len(results) == 1) else [results[0]]
653 print(json.dumps(payload, indent=2))
654 return 0
655
656 if len(results) > 1 and not args.all:
657 print(
658 f"Found {len(results)} matches, showing best match. Use --all to list all."
659 )
660 print()
661 print_entry(results[0])
662 return 0
663
664 if len(results) > 1:
665 for idx, entry in enumerate(results, start=1):
666 print(f"[{idx}] {entry.get('opcode', '')} {entry.get('mnemonic', '')}")
667 return 0
668
669 print_entry(results[0])
670 return 0
671
672
673if __name__ == "__main__":
674 try:
675 raise SystemExit(main())
676 except KeyboardInterrupt:
677 print("Interrupted", file=sys.stderr)
678 raise SystemExit(130)