feat: use KVNR instead of Nachname in fall_id generation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
CCS Admin 2026-02-26 17:04:48 +00:00
parent d33fc7d242
commit a436580b03
2 changed files with 68 additions and 27 deletions

View file

@ -1,13 +1,15 @@
"""Import service for DAK Zweitmeinungs-Portal. """Import service for DAK Zweitmeinungs-Portal.
Handles: Handles:
- fall_id generation: YYYY-KW02d-fallgruppe-Nachname - fall_id generation: YYYY-KW02d-fallgruppe-KVNR (or random suffix)
- Duplicate detection: by fall_id or (nachname, fallgruppe, datum, vorname, geburtsdatum) - Duplicate detection: by fall_id or (nachname, fallgruppe, datum, vorname, geburtsdatum)
- Preview/confirm flow: preview_import() checks for duplicates, confirm_import() inserts - Preview/confirm flow: preview_import() checks for duplicates, confirm_import() inserts
- Import logging: writes ImportLog entry on each confirmed import - Import logging: writes ImportLog entry on each confirmed import
""" """
import logging import logging
import random
import string
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
@ -19,15 +21,24 @@ from app.services.csv_parser import ParsedCase
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def generate_random_suffix(length: int = 6) -> str:
"""Generate a random alphanumeric suffix (uppercase + digits)."""
charset = string.ascii_uppercase + string.digits
return "".join(random.choices(charset, k=length))
def generate_fall_id(parsed: ParsedCase) -> str: def generate_fall_id(parsed: ParsedCase) -> str:
"""Generate unique fall_id: YYYY-KW02d-fallgruppe-Nachname. """Generate unique fall_id: YYYY-KW02d-fallgruppe-KVNR.
Uses KVNR as identifier. Falls back to 6-char random suffix if
KVNR is missing or empty.
Examples: Examples:
- 2026-06-onko-Tonn - 2026-06-onko-A123456789
- 2026-12-kardio-Mueller - 2026-12-kardio-X7K9M2 (random fallback)
- 2026-06-intensiv-Daum
""" """
return f"{parsed.jahr}-{parsed.kw:02d}-{parsed.fallgruppe}-{parsed.nachname}" suffix = parsed.kvnr if parsed.kvnr else generate_random_suffix()
return f"{parsed.jahr}-{parsed.kw:02d}-{parsed.fallgruppe}-{suffix}"
def check_duplicate(db: Session, parsed: ParsedCase) -> bool: def check_duplicate(db: Session, parsed: ParsedCase) -> bool:

View file

@ -16,6 +16,7 @@ from app.services.import_service import (
check_duplicate, check_duplicate,
confirm_import, confirm_import,
generate_fall_id, generate_fall_id,
generate_random_suffix,
preview_import, preview_import,
) )
@ -55,10 +56,10 @@ def _make_parsed_case(
class TestGenerateFallId: class TestGenerateFallId:
def test_format(self): def test_format(self):
"""fall_id matches YYYY-KW-fallgruppe-Nachname format.""" """fall_id matches YYYY-KW-fallgruppe-KVNR format."""
pc = _make_parsed_case(nachname="Tonn", fallgruppe="onko", jahr=2026, kw=6) pc = _make_parsed_case(nachname="Tonn", kvnr="D410126355", fallgruppe="onko", jahr=2026, kw=6)
result = generate_fall_id(pc) result = generate_fall_id(pc)
assert result == "2026-06-onko-Tonn" assert result == "2026-06-onko-D410126355"
def test_kw_padding_single_digit(self): def test_kw_padding_single_digit(self):
"""KW < 10 is zero-padded to 2 digits.""" """KW < 10 is zero-padded to 2 digits."""
@ -79,31 +80,40 @@ class TestGenerateFallId:
assert "-01-" in result assert "-01-" in result
def test_different_cases_produce_different_ids(self): def test_different_cases_produce_different_ids(self):
"""Different patients/fallgruppen produce unique fall_ids.""" """Different KVNRs/fallgruppen produce unique fall_ids."""
pc1 = _make_parsed_case(nachname="Tonn", fallgruppe="onko") pc1 = _make_parsed_case(kvnr="A111111111", fallgruppe="onko")
pc2 = _make_parsed_case(nachname="Daum", fallgruppe="intensiv") pc2 = _make_parsed_case(kvnr="B222222222", fallgruppe="intensiv")
pc3 = _make_parsed_case(nachname="Tonn", fallgruppe="kardio") pc3 = _make_parsed_case(kvnr="A111111111", fallgruppe="kardio")
ids = {generate_fall_id(pc1), generate_fall_id(pc2), generate_fall_id(pc3)} ids = {generate_fall_id(pc1), generate_fall_id(pc2), generate_fall_id(pc3)}
assert len(ids) == 3 assert len(ids) == 3
def test_same_patient_same_week_same_fallgruppe(self): def test_same_patient_same_week_same_fallgruppe(self):
"""Same patient in same week and fallgruppe produces same fall_id.""" """Same KVNR in same week and fallgruppe produces same fall_id."""
pc1 = _make_parsed_case(nachname="Mueller", fallgruppe="onko", kw=8) pc1 = _make_parsed_case(kvnr="A111111111", fallgruppe="onko", kw=8)
pc2 = _make_parsed_case(nachname="Mueller", fallgruppe="onko", kw=8) pc2 = _make_parsed_case(kvnr="A111111111", fallgruppe="onko", kw=8)
assert generate_fall_id(pc1) == generate_fall_id(pc2) assert generate_fall_id(pc1) == generate_fall_id(pc2)
def test_umlauts_preserved(self): def test_random_suffix_when_no_kvnr(self):
"""German umlauts in Nachname are preserved in fall_id.""" """fall_id uses 6-char random suffix when KVNR is missing."""
pc = _make_parsed_case(nachname="Krölls", fallgruppe="onko") pc = _make_parsed_case(kvnr=None, fallgruppe="onko", jahr=2026, kw=6)
result = generate_fall_id(pc) result = generate_fall_id(pc)
assert "Krölls" in result parts = result.split("-")
assert parts[0] == "2026"
assert parts[1] == "06"
assert parts[2] == "onko"
suffix = parts[3]
assert len(suffix) == 6
assert suffix.isalnum()
assert suffix == suffix.upper()
def test_hyphenated_name(self): def test_random_suffix_when_empty_kvnr(self):
"""Hyphenated names are preserved in fall_id.""" """fall_id uses random suffix when KVNR is empty string."""
pc = _make_parsed_case(nachname="Hähle-Jakelski", fallgruppe="sd") pc = _make_parsed_case(kvnr="", fallgruppe="onko", jahr=2026, kw=6)
result = generate_fall_id(pc) result = generate_fall_id(pc)
assert "Hähle-Jakelski" in result parts = result.split("-")
suffix = parts[3]
assert len(suffix) == 6
assert suffix.isalnum()
def test_all_fallgruppen(self): def test_all_fallgruppen(self):
"""fall_id works for all valid fallgruppen.""" """fall_id works for all valid fallgruppen."""
@ -120,6 +130,26 @@ class TestGenerateFallId:
assert result.startswith("2027-") assert result.startswith("2027-")
# ── generate_random_suffix tests ───────────────────────────────────────
class TestGenerateRandomSuffix:
def test_length(self):
assert len(generate_random_suffix()) == 6
def test_charset(self):
"""Only uppercase letters and digits."""
import re
for _ in range(50):
s = generate_random_suffix()
assert re.match(r'^[A-Z0-9]{6}$', s)
def test_uniqueness(self):
"""Different calls produce different suffixes."""
suffixes = {generate_random_suffix() for _ in range(20)}
assert len(suffixes) >= 15
# ── ImportRow schema tests ────────────────────────────────────────────── # ── ImportRow schema tests ──────────────────────────────────────────────
@ -149,10 +179,10 @@ class TestImportRowSchema:
fallgruppe="kardio", fallgruppe="kardio",
datum=date(2026, 2, 2), datum=date(2026, 2, 2),
is_duplicate=True, is_duplicate=True,
fall_id="2026-06-kardio-Tonn", fall_id="2026-06-kardio-D410126355",
) )
assert row.is_duplicate is True assert row.is_duplicate is True
assert row.fall_id == "2026-06-kardio-Tonn" assert row.fall_id == "2026-06-kardio-D410126355"
# ── ImportPreview schema tests ────────────────────────────────────────── # ── ImportPreview schema tests ──────────────────────────────────────────