feat: utility functions — fallgruppe mapping, KW calc, ICD/KVNR validation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
CCS Admin 2026-02-24 07:41:09 +00:00
parent 4649f7a082
commit e0ca8c31c3
4 changed files with 245 additions and 0 deletions

View file

@ -0,0 +1,42 @@
"""Mapping of CRM 'Modul' strings to internal fallgruppe codes."""
MODUL_TO_FALLGRUPPE: dict[str, str] = {
"Zweitmeinung Onkologie": "onko",
"Zweitmeinung Kardiologie": "kardio",
"Zweitmeinung Intensiv": "intensiv",
"Zweitmeinung Gallenblase": "galle",
"Zweitmeinung Schilddrüse": "sd",
}
VALID_FALLGRUPPEN = {"onko", "kardio", "intensiv", "galle", "sd"}
def map_modul_to_fallgruppe(modul: str) -> str:
"""Map CRM Modul string to fallgruppe code.
Handles exact matches and 'Begutachtung *' patterns.
Raises ValueError for unmappable modules.
"""
modul = modul.strip()
if modul in MODUL_TO_FALLGRUPPE:
return MODUL_TO_FALLGRUPPE[modul]
modul_lower = modul.lower()
# Handle "Begutachtung" variants — derive from keywords
if "begutachtung" in modul_lower:
keyword_map = [
("onko", "onko"),
("krebs", "onko"),
("tumor", "onko"),
("kardio", "kardio"),
("herz", "kardio"),
("intensiv", "intensiv"),
("galle", "galle"),
("schilddrüse", "sd"),
("schilddruese", "sd"),
]
for keyword, gruppe in keyword_map:
if keyword in modul_lower:
return gruppe
raise ValueError(f"Cannot map module to fallgruppe: '{modul}'")

View file

@ -0,0 +1,50 @@
"""Calendar week and date utilities."""
from datetime import date
def date_to_kw(d: date) -> int:
"""Return ISO calendar week number."""
return d.isocalendar()[1]
def date_to_jahr(d: date) -> int:
"""Return ISO calendar year (can differ from d.year at year boundaries)."""
return d.isocalendar()[0]
def parse_german_date(s: str) -> date:
"""Parse German date formats: DD.MM.YY, DD.MM.YYYY, 'DD.MM.YY, HH:MM'.
Handles edge cases:
- Two-digit years: 00-49 -> 2000-2049, 50-99 -> 1950-1999
- Invalid dates like '29.08.0196' -> raises ValueError
- Leading/trailing whitespace
- Comma-separated datetime: '02.02.26, 08:50' -> takes date part only
"""
s = s.strip()
if not s:
raise ValueError("Empty date string")
# Split off time part if present (e.g., "02.02.26, 08:50")
if "," in s:
s = s.split(",")[0].strip()
parts = s.split(".")
if len(parts) != 3:
raise ValueError(f"Invalid date format: '{s}'")
day = int(parts[0])
month = int(parts[1])
year_str = parts[2].strip()
year = int(year_str)
# Handle 2-digit years
if year < 100:
year = 2000 + year if year < 50 else 1900 + year
# Reject obviously wrong years
if year < 1900 or year > 2100:
raise ValueError(f"Year out of range: {year}")
return date(year, month, day)

View file

@ -0,0 +1,41 @@
"""Data validation functions for ICD codes and KVNR."""
import re
ICD_PATTERN = re.compile(r"^[A-Z]\d{2}(\.\d{1,2})?$")
KVNR_PATTERN = re.compile(r"^[A-Z]\d{9}$")
def validate_icd(code: str) -> str:
"""Validate and normalize a single ICD code. Returns uppercase stripped code."""
code = code.strip().upper()
if not code:
raise ValueError("Empty ICD code")
if not ICD_PATTERN.match(code):
raise ValueError(f"Invalid ICD code format: '{code}'")
return code
def split_icd_codes(raw: str) -> list[str]:
"""Split a string of multiple ICD codes (comma or semicolon separated)."""
if not raw or not raw.strip():
return []
# Split by comma, semicolon, or whitespace+comma combinations
codes = re.split(r"[,;]\s*", raw.strip())
return [c.strip() for c in codes if c.strip()]
def normalize_icd_hauptgruppe(code: str) -> str:
"""Extract hauptgruppe from ICD code: 'C50.1' -> 'C50'."""
code = code.strip().upper()
return code[:3] if len(code) >= 3 else code
def validate_kvnr(kvnr: str) -> str:
"""Validate KVNR format (letter + 9 digits). Returns stripped uppercase."""
kvnr = kvnr.strip().upper()
if not kvnr:
raise ValueError("Empty KVNR")
if not KVNR_PATTERN.match(kvnr):
raise ValueError(f"Invalid KVNR format: '{kvnr}'")
return kvnr

112
backend/tests/test_utils.py Normal file
View file

@ -0,0 +1,112 @@
"""Tests for utility functions: fallgruppe mapping, KW/date utils, validators."""
from datetime import date
import pytest
from app.utils.fallgruppe_map import map_modul_to_fallgruppe
from app.utils.kw_utils import date_to_jahr, date_to_kw, parse_german_date
from app.utils.validators import (
normalize_icd_hauptgruppe,
split_icd_codes,
validate_icd,
validate_kvnr,
)
# ── Fallgruppe mapping ─────────────────────────────────────────────
class TestFallgruppeMapping:
def test_map_exact_matches(self):
assert map_modul_to_fallgruppe("Zweitmeinung Onkologie") == "onko"
assert map_modul_to_fallgruppe("Zweitmeinung Kardiologie") == "kardio"
assert map_modul_to_fallgruppe("Zweitmeinung Intensiv") == "intensiv"
assert map_modul_to_fallgruppe("Zweitmeinung Gallenblase") == "galle"
assert map_modul_to_fallgruppe("Zweitmeinung Schilddrüse") == "sd"
def test_map_with_whitespace(self):
assert map_modul_to_fallgruppe(" Zweitmeinung Onkologie ") == "onko"
def test_map_begutachtung_onko(self):
assert map_modul_to_fallgruppe("Begutachtung Onkologie") == "onko"
def test_map_begutachtung_herz(self):
assert map_modul_to_fallgruppe("Begutachtung Herz") == "kardio"
def test_map_unknown_raises(self):
with pytest.raises(ValueError, match="Cannot map module"):
map_modul_to_fallgruppe("Unknown Module")
# ── KW / Date utils ────────────────────────────────────────────────
class TestKWUtils:
def test_date_to_kw(self):
# 2026-02-24 is a Tuesday in KW 9
assert date_to_kw(date(2026, 2, 24)) == 9
def test_date_to_jahr_boundary(self):
# 2025-12-31 is a Wednesday — ISO week 1 of 2026
assert date_to_kw(date(2025, 12, 31)) == 1
assert date_to_jahr(date(2025, 12, 31)) == 2026
class TestParseGermanDate:
def test_parse_german_date_ddmmyy(self):
assert parse_german_date("02.02.26") == date(2026, 2, 2)
def test_parse_german_date_ddmmyyyy(self):
assert parse_german_date("28.04.1960") == date(1960, 4, 28)
def test_parse_german_date_with_time(self):
assert parse_german_date("02.02.26, 08:50") == date(2026, 2, 2)
def test_parse_german_date_bad_year(self):
with pytest.raises(ValueError, match="Year out of range"):
parse_german_date("29.08.0196")
def test_parse_german_date_empty(self):
with pytest.raises(ValueError, match="Empty date string"):
parse_german_date("")
# ── Validators ──────────────────────────────────────────────────────
class TestValidateICD:
def test_validate_icd_valid(self):
assert validate_icd("C50.1") == "C50.1"
assert validate_icd("c50") == "C50"
def test_validate_icd_invalid(self):
with pytest.raises(ValueError, match="Invalid ICD code format"):
validate_icd("XYZ")
with pytest.raises(ValueError, match="Empty ICD code"):
validate_icd("")
class TestSplitICDCodes:
def test_split_icd_codes_comma(self):
assert split_icd_codes("C50.1, C79.5") == ["C50.1", "C79.5"]
def test_split_icd_codes_semicolon(self):
assert split_icd_codes("C50.1;C79.5") == ["C50.1", "C79.5"]
def test_split_icd_codes_empty(self):
assert split_icd_codes("") == []
class TestNormalizeHauptgruppe:
def test_normalize_hauptgruppe(self):
assert normalize_icd_hauptgruppe("C50.1") == "C50"
class TestValidateKVNR:
def test_validate_kvnr_valid(self):
assert validate_kvnr("D410126355") == "D410126355"
def test_validate_kvnr_invalid(self):
with pytest.raises(ValueError, match="Invalid KVNR format"):
validate_kvnr("123456789")