Portraits et corrections sur valeurs des PNJ
Some checks failed
Release Creation / build (release) Failing after 1m24s
Some checks failed
Release Creation / build (release) Failing after 1m24s
This commit is contained in:
154
extract_pretires.py
Normal file
154
extract_pretires.py
Normal file
@@ -0,0 +1,154 @@
|
||||
import fitz, re, json
|
||||
|
||||
pdf_path = '/home/morr/work/uberwald/fvtt-celestopol/__regles/Célestopol 1922 Fiches de prêts à jouer v1_cdjdr.pdf'
|
||||
doc = fitz.open(pdf_path)
|
||||
|
||||
def is_green(color, tol=0.008):
|
||||
if not color: return False
|
||||
return (abs(color[0]-0.131) < tol and abs(color[1]-0.284) < tol and abs(color[2]-0.160) < tol)
|
||||
|
||||
SKILL_LIST = ['ARTIFICE','ATTRACTION','COERCITION','FAVEUR',
|
||||
'ÉCHAUFFOURÉE','EFFACEMENT','MOBILITÉ','PROUESSE',
|
||||
'APPRÉCIATION','ARTS','INSPIRATION','TRAQUE',
|
||||
'INSTRUCTION','MERV.TECH.','RAISONNEMENT','TRAITEMENT']
|
||||
|
||||
def norm(s):
|
||||
return s.strip().upper().replace('É','E').replace('È','E').replace('Ê','E').replace('Ô','O').replace('Â','A').replace('Î','I').replace('Œ','OE').replace('.','')
|
||||
|
||||
def get_skill_values(page):
|
||||
words = page.get_text("words")
|
||||
skill_y = {}
|
||||
for w in words:
|
||||
wn = norm(w[4])
|
||||
for sk in SKILL_LIST:
|
||||
if wn == norm(sk) and w[0] < 430:
|
||||
skill_y[sk] = (w[1]+w[3])/2
|
||||
# MERV.TECH. special
|
||||
for w in words:
|
||||
if 'MERV' in w[4].upper() and w[0] < 430:
|
||||
skill_y['MERV.TECH.'] = (w[1]+w[3])/2
|
||||
|
||||
green_circles = []
|
||||
for d in page.get_drawings():
|
||||
if is_green(d.get('fill')):
|
||||
rect = d['rect']
|
||||
ws = rect.x1 - rect.x0
|
||||
if 5.5 < ws < 8.5:
|
||||
green_circles.append(((rect.x0+rect.x1)/2, (rect.y0+rect.y1)/2))
|
||||
|
||||
skills = {}
|
||||
for sk in SKILL_LIST:
|
||||
sy = skill_y.get(sk, None)
|
||||
if sy is not None:
|
||||
skills[sk] = sum(1 for cx, cy in green_circles if abs(cy - sy) < 7)
|
||||
else:
|
||||
skills[sk] = 0
|
||||
return skills
|
||||
|
||||
def get_resistances(page):
|
||||
words = page.get_text("words")
|
||||
domain_y = {}
|
||||
for w in sorted(words, key=lambda x: x[1]):
|
||||
t = norm(w[4])
|
||||
x0 = w[0]
|
||||
if t == 'AME' and 300 < x0 < 500:
|
||||
domain_y['ame'] = w[1]
|
||||
elif t == 'CORPS' and 300 < x0 < 500:
|
||||
domain_y['corps'] = w[1]
|
||||
elif t == 'COEUR' and 300 < x0 < 500:
|
||||
domain_y['coeur'] = w[1]
|
||||
elif t == 'ESPRIT' and 300 < x0 < 500:
|
||||
domain_y['esprit'] = w[1]
|
||||
res = {}
|
||||
for dom, dy in domain_y.items():
|
||||
for w in words:
|
||||
if w[4].strip().isdigit() and w[0] > 480 and abs(w[1]-dy) < 35:
|
||||
res[dom] = int(w[4].strip())
|
||||
break
|
||||
return res
|
||||
|
||||
def get_anomalie_name(stats_page):
|
||||
# Extract from text: the anomalie name appears in the bottom-right of the page
|
||||
# Parse cleanly using raw text
|
||||
text = stats_page.get_text("text")
|
||||
lines = [l.strip() for l in text.split('\n') if l.strip()]
|
||||
|
||||
skip_words = {'ANOMALIE','NV','RÉSISTANCE','RESISTANCE'}
|
||||
skip_starts = ['pour ','lors ','gagner ','trouver ','éviter ','sortir ','obtenir ',
|
||||
'lors d', 'Vider ', 'Gain ', 'en pui', 'pour ne', 'pour ré']
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
if 'ANOMALIE' in line.upper() or 'NV' in line:
|
||||
# Look in next few lines for the name
|
||||
for j in range(i+1, min(i+10, len(lines))):
|
||||
l = lines[j]
|
||||
if not any(l.startswith(s) for s in skip_starts) and l not in skip_words:
|
||||
if l and l[0].isupper() and len(l) > 1:
|
||||
return l
|
||||
return "?"
|
||||
|
||||
def get_anomalie_niveau(stats_page):
|
||||
words = stats_page.get_text("words")
|
||||
for w in sorted(words, key=lambda x: (x[1],x[0])):
|
||||
if w[4].strip().isdigit() and w[0] > 480 and w[1] > 650:
|
||||
return int(w[4].strip())
|
||||
return None
|
||||
|
||||
def get_char_base_info(stats_page):
|
||||
blocks = stats_page.get_text("dict")["blocks"]
|
||||
name = None
|
||||
for block in blocks:
|
||||
for line in block.get("lines", []):
|
||||
for span in line.get("spans", []):
|
||||
if span.get("size", 0) > 11 and 'Bold' in span.get("font",""):
|
||||
y = span["origin"][1]
|
||||
t = span["text"].strip()
|
||||
if t and len(t) > 3 and 150 < y < 250:
|
||||
name = t
|
||||
return {'name': name}
|
||||
|
||||
def get_raw_text(page):
|
||||
html = page.get_text("html")
|
||||
clean = re.sub(r'<[^>]+>', ' ', html)
|
||||
clean = re.sub(r'&#x([0-9a-fA-F]+);', lambda m: chr(int(m.group(1),16)), clean)
|
||||
clean = re.sub(r'&#([0-9]+);', lambda m: chr(int(m.group(1))), clean)
|
||||
return re.sub(r'\s+', ' ', clean).strip()
|
||||
|
||||
def parse_aspects_page(asp_page):
|
||||
text = asp_page.get_text("text")
|
||||
lines = [l.strip() for l in text.split('\n') if l.strip()]
|
||||
return lines
|
||||
|
||||
# Characters: (name_idx, anom_desc_idx, stats_idx, aspects_idx)
|
||||
CHARACTERS = [
|
||||
(0, 1, 2, 3),
|
||||
(4, 5, 6, 7),
|
||||
(8, 9, 10, 11),
|
||||
(12, 13, 14, 15),
|
||||
(16, 17, 18, 19),
|
||||
(20, 21, 22, 23),
|
||||
(24, 25, 26, 27),
|
||||
(28, 29, 30, 31),
|
||||
]
|
||||
|
||||
for n_idx, a_idx, s_idx, asp_idx in CHARACTERS:
|
||||
sp = doc[s_idx]
|
||||
skills = get_skill_values(sp)
|
||||
res = get_resistances(sp)
|
||||
anom_name = get_anomalie_name(sp)
|
||||
anom_nv = get_anomalie_niveau(sp)
|
||||
char_info = get_char_base_info(sp)
|
||||
anom_desc = get_raw_text(doc[a_idx])
|
||||
asp_lines = parse_aspects_page(doc[asp_idx])
|
||||
|
||||
print(f"\n{'='*70}")
|
||||
print(f"NAME: {char_info.get('name','?')}")
|
||||
print(f"SKILLS: {json.dumps(skills, ensure_ascii=False)}")
|
||||
print(f"RESISTANCES: {res}")
|
||||
print(f"ANOMALIE: {anom_name} nv{anom_nv}")
|
||||
print(f"ANOM DESC (first 300 chars): {anom_desc[:300]}")
|
||||
print("ASPECTS LINES:")
|
||||
for i,l in enumerate(asp_lines[:60]): print(f" {i:2d}: {l}")
|
||||
|
||||
doc.close()
|
||||
print("\nDONE")
|
||||
Reference in New Issue
Block a user