#!/usr/bin/env python3 """Extract Machine Gods of the Noxian Expanse rules PDF to plain text.""" import fitz import sys import re from pathlib import Path RULES_PDF = Path("rules/Machine Gods of the Noxian Expanse - Core Rules BETA 3.pdf") OUTPUT_TXT = Path("rules/rules_full.txt") def extract_pdf_text(pdf_path: Path) -> str: doc = fitz.open(str(pdf_path)) pages = [] for i, page in enumerate(doc): text = page.get_text("text") pages.append(f"===== PAGE {i+1} =====\n{text}") doc.close() return "\n\n".join(pages) def main(): if not RULES_PDF.exists(): print(f"Error: {RULES_PDF} not found", file=sys.stderr) sys.exit(1) print(f"Extracting {RULES_PDF}...") text = extract_pdf_text(RULES_PDF) OUTPUT_TXT.write_text(text, encoding="utf-8") print(f"Done — {len(text)} chars written to {OUTPUT_TXT}") print(f"Total pages extracted: {text.count('===== PAGE')}") if __name__ == "__main__": main()