"""Tests for CRM CSV parser: synthetic and real-file tests.""" import pathlib from datetime import date import pytest from app.services.csv_parser import ParsedCase, parse_csv, parse_hauptkontakt from app.utils.fallgruppe_map import VALID_FALLGRUPPEN DATA_DIR = pathlib.Path("/home/frontend/dak_c2s/data") # ── parse_hauptkontakt tests ───────────────────────────────────────── class TestParseHauptkontakt: def test_full_contact(self): """All four fields present and valid.""" result = parse_hauptkontakt("Tonn | Regina | 28.04.1960 | D410126355") assert result["nachname"] == "Tonn" assert result["vorname"] == "Regina" assert result["geburtsdatum"] == date(1960, 4, 28) assert result["kvnr"] == "D410126355" def test_missing_kvnr(self): """KVNR field empty (trailing pipe).""" result = parse_hauptkontakt("Daum | Luana | 05.02.2016 |") assert result["nachname"] == "Daum" assert result["vorname"] == "Luana" assert result["geburtsdatum"] == date(2016, 2, 5) assert result["kvnr"] is None def test_bad_date(self): """Invalid date like 29.08.0196 -- geburtsdatum should be None.""" result = parse_hauptkontakt("Krölls | Peter | 29.08.0196 | S361390622") assert result["nachname"] == "Krölls" assert result["vorname"] == "Peter" assert result["geburtsdatum"] is None # bad date => None assert result["kvnr"] == "S361390622" def test_missing_geburtsdatum(self): """Geburtsdatum field empty (spaces only).""" result = parse_hauptkontakt("Schaumann | Janina | |") assert result["nachname"] == "Schaumann" assert result["vorname"] == "Janina" assert result["geburtsdatum"] is None assert result["kvnr"] is None def test_missing_geburtsdatum_with_kvnr(self): """Geburtsdatum empty but KVNR present.""" result = parse_hauptkontakt("Schuber | Fritz | | C352208902") assert result["nachname"] == "Schuber" assert result["vorname"] == "Fritz" assert result["geburtsdatum"] is None assert result["kvnr"] == "C352208902" def test_missing_vorname(self): """Vorname field empty.""" result = parse_hauptkontakt("Wuffy | | |") assert result["nachname"] == "Wuffy" assert result["vorname"] is None assert result["geburtsdatum"] is None assert result["kvnr"] is None def test_whitespace_handling(self): """Extra whitespace around pipes is stripped.""" result = parse_hauptkontakt(" Tonn | Regina | 28.04.1960 | D410126355 ") assert result["nachname"] == "Tonn" assert result["vorname"] == "Regina" assert result["kvnr"] == "D410126355" def test_hyphenated_name(self): """Hyphenated names are preserved.""" result = parse_hauptkontakt( "Hähle-Jakelski | Rüdiger | 14.10.1941 | X304698107" ) assert result["nachname"] == "Hähle-Jakelski" assert result["vorname"] == "Rüdiger" # ── parse_csv synthetic tests ───────────────────────────────────────── def _make_csv_bytes(*rows: str, bom: bool = True) -> bytes: """Helper: build CSV bytes from header + data rows.""" header = '"Hauptkontakt","Name","Thema","Erstellungsdatum","Modul"' lines = [header] + list(rows) text = "\n".join(lines) prefix = b"\xef\xbb\xbf" if bom else b"" return prefix + text.encode("utf-8") class TestParseCSVSynthetic: def test_basic_single_row(self): """Parse a minimal CSV with one valid row.""" row = ( '"Tonn | Regina | 28.04.1960 | D410126355",' '"103486",' '"Zweitmeinung für Regina Tonn",' '"02.02.26, 08:50",' '"Zweitmeinung Kardiologie"' ) cases = parse_csv(_make_csv_bytes(row)) assert len(cases) == 1 c = cases[0] assert c.nachname == "Tonn" assert c.vorname == "Regina" assert c.geburtsdatum == date(1960, 4, 28) assert c.kvnr == "D410126355" assert c.thema == "Zweitmeinung für Regina Tonn" assert c.fallgruppe == "kardio" assert c.datum == date(2026, 2, 2) assert c.jahr == 2026 assert c.kw == 6 assert c.crm_ticket_id == "103486" def test_multiple_fallgruppen(self): """Parse CSV with three different Fallgruppen.""" rows = [ ( '"A | B | 01.01.1990 | X123456789",' '"100",' '"Thema 1",' '"02.02.26, 09:00",' '"Zweitmeinung Onkologie"' ), ( '"C | D | 15.06.1985 | Y987654321",' '"101",' '"Thema 2",' '"03.02.26, 10:00",' '"Zweitmeinung Gallenblase"' ), ( '"E | F | 20.12.1970 |",' '"102",' '"Thema 3",' '"04.02.26, 11:00",' '"Zweitmeinung Schilddrüse"' ), ] cases = parse_csv(_make_csv_bytes(*rows)) assert len(cases) == 3 assert cases[0].fallgruppe == "onko" assert cases[1].fallgruppe == "galle" assert cases[2].fallgruppe == "sd" assert cases[2].kvnr is None def test_empty_modul_skipped(self): """Rows with empty Modul field are skipped (spam/junk entries).""" rows = [ ( '"Tonn | Regina | 28.04.1960 | D410126355",' '"103486",' '"Thema",' '"02.02.26, 08:50",' '"Zweitmeinung Onkologie"' ), ( '"Wuffy | | |",' '"103767",' '"Spam",' '"17.02.26, 17:16",' '""' ), ] cases = parse_csv(_make_csv_bytes(*rows)) assert len(cases) == 1 assert cases[0].nachname == "Tonn" def test_bad_geburtsdatum_still_parses(self): """Row with bad Geburtsdatum year is parsed, geburtsdatum=None.""" row = ( '"Krölls | Peter | 29.08.0196 | S361390622",' '"103514",' '"Zweitmeinung für Peter Krölls",' '"04.02.26, 11:06",' '"Zweitmeinung Onkologie"' ) cases = parse_csv(_make_csv_bytes(row)) assert len(cases) == 1 assert cases[0].nachname == "Krölls" assert cases[0].geburtsdatum is None assert cases[0].kvnr == "S361390622" assert cases[0].fallgruppe == "onko" def test_without_bom(self): """CSV without BOM is also parseable.""" row = ( '"A | B | 01.01.1990 | X123456789",' '"100",' '"Thema",' '"02.02.26, 09:00",' '"Zweitmeinung Onkologie"' ) cases = parse_csv(_make_csv_bytes(row, bom=False)) assert len(cases) == 1 def test_empty_csv_returns_empty(self): """CSV with only header returns empty list.""" cases = parse_csv(_make_csv_bytes()) assert cases == [] def test_crm_ticket_id_from_name_column(self): """The 'Name' column contains the CRM ticket ID.""" row = ( '"A | B | 01.01.1990 |",' '"999888",' '"Thema",' '"10.02.26, 10:00",' '"Zweitmeinung Intensiv"' ) cases = parse_csv(_make_csv_bytes(row)) assert cases[0].crm_ticket_id == "999888" assert cases[0].fallgruppe == "intensiv" # ── Real CSV file tests ──────────────────────────────────────────────── class TestParseRealCSV: @pytest.mark.skipif( not (DATA_DIR / "2026-02-06-0406.csv").exists(), reason="Real CSV file not available", ) def test_parse_real_csv_feb06(self): """Parse the 2026-02-06-0406.csv real export.""" content = (DATA_DIR / "2026-02-06-0406.csv").read_bytes() cases = parse_csv(content, filename="2026-02-06-0406.csv") # Should have parsed rows (16 data rows, minus 0 with empty Modul) assert len(cases) > 0 assert len(cases) == 16 # Every case must have a valid fallgruppe for c in cases: assert c.fallgruppe in VALID_FALLGRUPPEN, ( f"Invalid fallgruppe '{c.fallgruppe}' for {c.nachname}" ) assert c.nachname # nachname is always present assert c.datum is not None assert c.jahr >= 2026 assert 1 <= c.kw <= 53 assert c.crm_ticket_id is not None # Spot-check first row first = cases[0] assert first.nachname == "Tonn" assert first.vorname == "Regina" assert first.kvnr == "D410126355" assert first.fallgruppe == "kardio" assert first.datum == date(2026, 2, 2) assert first.kw == 6 # Spot-check edge case: missing KVNR daum = [c for c in cases if c.nachname == "Daum"][0] assert daum.kvnr is None assert daum.fallgruppe == "intensiv" # Spot-check edge case: bad Geburtsdatum krolls = [c for c in cases if c.nachname == "Krölls"][0] assert krolls.geburtsdatum is None assert krolls.kvnr == "S361390622" # Spot-check edge case: missing Geburtsdatum schaumann = [c for c in cases if c.nachname == "Schaumann"][0] assert schaumann.geburtsdatum is None assert schaumann.kvnr is None @pytest.mark.skipif( not (DATA_DIR / "2026-02-17-1041.csv").exists(), reason="Real CSV file not available", ) def test_parse_real_csv_with_spam_rows(self): """Parse file that contains rows with empty Modul (spam entries).""" content = (DATA_DIR / "2026-02-17-1041.csv").read_bytes() cases = parse_csv(content, filename="2026-02-17-1041.csv") # File has 6 data rows, 2 with empty Modul => 4 valid cases assert len(cases) == 4 # Spam entries (Wuffy, Apotheke) should NOT appear names = [c.nachname for c in cases] assert "Wuffy" not in names assert "Apotheke" not in names # All valid cases have a proper fallgruppe for c in cases: assert c.fallgruppe in VALID_FALLGRUPPEN @pytest.mark.skipif( not (DATA_DIR / "2026-02-17-0553.csv").exists(), reason="Real CSV file not available", ) def test_parse_real_csv_feb17(self): """Parse the 2026-02-17-0553.csv real export.""" content = (DATA_DIR / "2026-02-17-0553.csv").read_bytes() cases = parse_csv(content, filename="2026-02-17-0553.csv") assert len(cases) > 0 # Spot-check: Fritz Schuber has empty Geburtsdatum but has KVNR schuber = [c for c in cases if c.nachname == "Schuber"] if schuber: assert schuber[0].geburtsdatum is None assert schuber[0].kvnr == "C352208902" @pytest.mark.skipif( not (DATA_DIR / "2026-02-23-0902.csv").exists(), reason="Real CSV file not available", ) def test_parse_real_csv_feb23(self): """Parse the 2026-02-23-0902.csv real export.""" content = (DATA_DIR / "2026-02-23-0902.csv").read_bytes() cases = parse_csv(content, filename="2026-02-23-0902.csv") assert len(cases) > 0 for c in cases: assert c.fallgruppe in VALID_FALLGRUPPEN assert c.nachname