Files
fvtt-machine-gods-noxian-ex…/tools/extract_rules.py
T

37 lines
983 B
Python

#!/usr/bin/env python3
"""Extract Machine Gods of the Noxian Expanse rules PDF to plain text."""
import fitz
import sys
import re
from pathlib import Path
RULES_PDF = Path("rules/Machine Gods of the Noxian Expanse - Core Rules BETA 3.pdf")
OUTPUT_TXT = Path("rules/rules_full.txt")
def extract_pdf_text(pdf_path: Path) -> str:
doc = fitz.open(str(pdf_path))
pages = []
for i, page in enumerate(doc):
text = page.get_text("text")
pages.append(f"===== PAGE {i+1} =====\n{text}")
doc.close()
return "\n\n".join(pages)
def main():
if not RULES_PDF.exists():
print(f"Error: {RULES_PDF} not found", file=sys.stderr)
sys.exit(1)
print(f"Extracting {RULES_PDF}...")
text = extract_pdf_text(RULES_PDF)
OUTPUT_TXT.write_text(text, encoding="utf-8")
print(f"Done — {len(text)} chars written to {OUTPUT_TXT}")
print(f"Total pages extracted: {text.count('===== PAGE')}")
if __name__ == "__main__":
main()