from docx import Document from docx.oxml.ns import qn from pypdf import PdfReader, PdfWriter import zipfile import xml.etree.ElementTree as ET def parse_laufzettel(docx_path): data = {} with zipfile.ZipFile(docx_path, 'r') as z: xml_content = z.read('word/document.xml') root = ET.fromstring(xml_content) ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} for sdt in root.findall('.//w:sdt', ns): alias = sdt.find('.//w:alias', ns) if alias is not None: alias_val = alias.get(qn('w:val')) texts = sdt.findall('.//w:t', ns) content = ' '.join([t.text for t in texts if t.text]).strip() field_map = { 'Schadennummer': 'Schadennummer', 'Versicherungsnehmer': 'Versicherungsnehmer', 'Versicherer': 'Versicherer', 'Schadenort': 'Schadenort', 'Schadenart': 'Schadenart', } if alias_val in field_map: data[field_map[alias_val]] = content elif alias_val == 'Straße': data['Adresse'] = content elif alias_val == 'Ort': if 'Adresse' in data: data['Adresse'] += ', ' + content else: data['Adresse'] = content return data def fill_pdf(template_path, output_path, data): reader = PdfReader(template_path) writer = PdfWriter() writer.append(reader) writer.update_page_form_field_values(writer.pages[0], data) with open(output_path, 'wb') as f: writer.write(f) return output_path def analyze_pdf(pdf_path): reader = PdfReader(pdf_path) result = {'textfields': {}, 'dropdowns': {}, 'checkboxes': {}} fields = reader.get_fields() if not fields: return result for name, field in fields.items(): ft = field.get('/FT', '') value = field.get('/V', '') if ft == '/Tx': result['textfields'][name] = str(value) if value else '' elif ft == '/Ch': opts = field.get('/Opt', []) options = [str(o[1]) if isinstance(o, list) else str(o) for o in opts] result['dropdowns'][name] = { 'selected': str(value) if value else '', 'options': options } elif ft == '/Btn': result['checkboxes'][name] = value != '/Off' return result def generate_vorbericht(pdf_data, template_path, output_path): doc = Document(template_path) tf = pdf_data.get('textfields', {}) replacements = { 'Schadennummer': tf.get('Schadennummer', 'xx'), 'Versicherer': tf.get('Versicherer', 'xx'), 'Versicherungsnehmer': tf.get('Versicherungsnehmer', 'xx'), 'Adresse': tf.get('Adresse', 'xx'), } for para in doc.paragraphs: text = para.text for key, value in replacements.items(): if key.lower() in text.lower() and 'xx' in text: para.text = text.replace('xx', value, 1) break doc.save(output_path) return output_path