- Fixed processors.py to use correct pypdf API - Fixed app.js escaping issues - PDF generation now working with real Laufzettel files 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
101 lines
3.1 KiB
Python
101 lines
3.1 KiB
Python
from docx import Document
|
|
from docx.oxml.ns import qn
|
|
from pypdf import PdfReader, PdfWriter
|
|
import zipfile
|
|
import xml.etree.ElementTree as ET
|
|
|
|
|
|
def parse_laufzettel(docx_path):
|
|
data = {}
|
|
with zipfile.ZipFile(docx_path, 'r') as z:
|
|
xml_content = z.read('word/document.xml')
|
|
root = ET.fromstring(xml_content)
|
|
|
|
ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
|
|
|
|
for sdt in root.findall('.//w:sdt', ns):
|
|
alias = sdt.find('.//w:alias', ns)
|
|
if alias is not None:
|
|
alias_val = alias.get(qn('w:val'))
|
|
texts = sdt.findall('.//w:t', ns)
|
|
content = ' '.join([t.text for t in texts if t.text]).strip()
|
|
|
|
field_map = {
|
|
'Schadennummer': 'Schadennummer',
|
|
'Versicherungsnehmer': 'Versicherungsnehmer',
|
|
'Versicherer': 'Versicherer',
|
|
'Schadenort': 'Schadenort',
|
|
'Schadenart': 'Schadenart',
|
|
}
|
|
|
|
if alias_val in field_map:
|
|
data[field_map[alias_val]] = content
|
|
elif alias_val == 'Straße':
|
|
data['Adresse'] = content
|
|
elif alias_val == 'Ort':
|
|
if 'Adresse' in data:
|
|
data['Adresse'] += ', ' + content
|
|
else:
|
|
data['Adresse'] = content
|
|
|
|
return data
|
|
|
|
|
|
def fill_pdf(template_path, output_path, data):
|
|
reader = PdfReader(template_path)
|
|
writer = PdfWriter()
|
|
writer.append(reader)
|
|
writer.update_page_form_field_values(writer.pages[0], data)
|
|
with open(output_path, 'wb') as f:
|
|
writer.write(f)
|
|
return output_path
|
|
|
|
|
|
def analyze_pdf(pdf_path):
|
|
reader = PdfReader(pdf_path)
|
|
result = {'textfields': {}, 'dropdowns': {}, 'checkboxes': {}}
|
|
|
|
fields = reader.get_fields()
|
|
if not fields:
|
|
return result
|
|
|
|
for name, field in fields.items():
|
|
ft = field.get('/FT', '')
|
|
value = field.get('/V', '')
|
|
|
|
if ft == '/Tx':
|
|
result['textfields'][name] = str(value) if value else ''
|
|
elif ft == '/Ch':
|
|
opts = field.get('/Opt', [])
|
|
options = [str(o[1]) if isinstance(o, list) else str(o) for o in opts]
|
|
result['dropdowns'][name] = {
|
|
'selected': str(value) if value else '',
|
|
'options': options
|
|
}
|
|
elif ft == '/Btn':
|
|
result['checkboxes'][name] = value != '/Off'
|
|
|
|
return result
|
|
|
|
|
|
def generate_vorbericht(pdf_data, template_path, output_path):
|
|
doc = Document(template_path)
|
|
tf = pdf_data.get('textfields', {})
|
|
|
|
replacements = {
|
|
'Schadennummer': tf.get('Schadennummer', 'xx'),
|
|
'Versicherer': tf.get('Versicherer', 'xx'),
|
|
'Versicherungsnehmer': tf.get('Versicherungsnehmer', 'xx'),
|
|
'Adresse': tf.get('Adresse', 'xx'),
|
|
}
|
|
|
|
for para in doc.paragraphs:
|
|
text = para.text
|
|
for key, value in replacements.items():
|
|
if key.lower() in text.lower() and 'xx' in text:
|
|
para.text = text.replace('xx', value, 1)
|
|
break
|
|
|
|
doc.save(output_path)
|
|
return output_path
|