""" Schadenprotokoll Processors - PDF/DOCX Verarbeitung """ from docx import Document from docx.oxml.ns import qn from pypdf import PdfReader, PdfWriter import zipfile import xml.etree.ElementTree as ET def parse_laufzettel(docx_path: str) -> dict: """Extrahiert Schadensdaten aus dem Laufzettel.docx""" data = {} with zipfile.ZipFile(docx_path, "r") as z: xml_content = z.read("word/document.xml") root = ET.fromstring(xml_content) ns = {"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"} for sdt in root.findall(".//w:sdt", ns): alias = sdt.find(".//w:alias", ns) if alias is not None: alias_val = alias.get(qn("w:val")) texts = sdt.findall(".//w:t", ns) content = " ".join([t.text for t in texts if t.text]).strip() field_map = { "Schadennummer": "Schadennummer", "Versicherungsnehmer": "Versicherungsnehmer:", "Versicherer": "Versicherer:", "Datum": "Datum:", "Schadenort": "Schadenort:", "Schadenart": "Schadenart:", "e-Mail": "e-Mail VN:", } if alias_val in field_map: data[field_map[alias_val]] = content elif alias_val == "Straße": data["Adresse:"] = content elif alias_val == "Ort": if "Adresse:" in data: data["Adresse:"] += ", " + content else: data["Adresse:"] = content return data def fill_pdf(template_path: str, output_path: str, data: dict) -> str: """Befuellt das PDF-Template mit den Schadensdaten""" reader = PdfReader(template_path) writer = PdfWriter() for page in reader.pages: writer.add_page(page) if "/Annots" in page: writer.update_page_form_field_values( writer.pages[-1], data, auto_regenerate=False ) with open(output_path, "wb") as f: writer.write(f) return output_path def analyze_pdf(pdf_path: str) -> dict: """Liest alle Formularfelder aus dem ausgefuellten PDF""" reader = PdfReader(pdf_path) result = {"textfields": {}, "dropdowns": {}, "checkboxes": {}} for page_num, page in enumerate(reader.pages): if "/Annots" not in page: continue for annot in page["/Annots"]: obj = annot.get_object() field_type = obj.get("/FT", "") field_name = obj.get("/T", f"Feld_{page_num}") if field_type == "/Tx": value = obj.get("/V", "") result["textfields"][field_name] = str(value) if value else "" elif field_type == "/Ch": value = obj.get("/V", "") options = [] if "/Opt" in obj: for opt in obj["/Opt"]: if isinstance(opt, list): options.append(str(opt[1]) if len(opt) > 1 else str(opt[0])) else: options.append(str(opt)) result["dropdowns"][field_name] = { "selected": str(value) if value else "", "options": options } elif field_type == "/Btn": value = obj.get("/V", "/Off") result["checkboxes"][field_name] = value != "/Off" return result def generate_vorbericht(pdf_data: dict, template_path: str, output_path: str) -> str: """Generiert Vorbericht.docx aus PDF-Daten""" doc = Document(template_path) tf = pdf_data.get("textfields", {}) replacements = { "Schadennummer": tf.get("Schadennummer", "xx"), "Versicherer": tf.get("Versicherer:", "xx"), "Versicherungsnehmer": tf.get("Versicherungsnehmer:", "xx"), "Adresse": tf.get("Adresse:", "xx"), } # Dropdown-Antworten fuer Sachverhalt sammeln dropdowns = pdf_data.get("dropdowns", {}) sachverhalt = [] for key in sorted(dropdowns.keys()): val = dropdowns[key].get("selected", "") if val and val not in ["Was ist beschädigt?", "Schadenursache?", ""]: sachverhalt.append(val) for para in doc.paragraphs: text = para.text for key, value in replacements.items(): if key.lower() in text.lower() and "xx" in text: para.text = text.replace("xx", value, 1) break doc.save(output_path) return output_path