From d33fc7d2424cdd14cb0f42455421eeb75ca37644 Mon Sep 17 00:00:00 2001 From: CCS Admin Date: Thu, 26 Feb 2026 17:01:09 +0000 Subject: [PATCH] docs: add implementation plan for fall-id anonymization Co-Authored-By: Claude Opus 4.6 --- ...6-fall-id-anonymisierung-implementation.md | 463 ++++++++++++++++++ 1 file changed, 463 insertions(+) create mode 100644 docs/plans/2026-02-26-fall-id-anonymisierung-implementation.md diff --git a/docs/plans/2026-02-26-fall-id-anonymisierung-implementation.md b/docs/plans/2026-02-26-fall-id-anonymisierung-implementation.md new file mode 100644 index 0000000..378119c --- /dev/null +++ b/docs/plans/2026-02-26-fall-id-anonymisierung-implementation.md @@ -0,0 +1,463 @@ +# Fall-ID Anonymisierung — Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Replace patient last names in fall_ids with KVNR (or 6-char random fallback), retroactively migrate all existing fall_ids, and auto-update fall_id when KVNR is later entered. + +**Architecture:** Modify `generate_fall_id()` to use KVNR instead of Nachname. Add a helper `generate_random_suffix()` for cases without KVNR. Write an Alembic migration that rebuilds all existing fall_ids. Extend the `PUT /cases/{case_id}/kvnr` endpoint to update the fall_id when the current suffix is a random sequence (not a KVNR). + +**Tech Stack:** Python 3.12, FastAPI, SQLAlchemy 2.0, Alembic, Pydantic v2, pytest + +--- + +### Task 1: Update `generate_fall_id()` and add `generate_random_suffix()` + +**Files:** +- Modify: `backend/app/services/import_service.py:22-30` +- Test: `backend/tests/test_import.py` + +**Step 1: Write failing tests for new fall_id format** + +Add these tests to `backend/tests/test_import.py` in `TestGenerateFallId`: + +```python +def test_uses_kvnr_when_available(self): + """fall_id uses KVNR instead of Nachname when KVNR is present.""" + pc = _make_parsed_case(nachname="Tonn", kvnr="A123456789", fallgruppe="onko", jahr=2026, kw=6) + result = generate_fall_id(pc) + assert result == "2026-06-onko-A123456789" + assert "Tonn" not in result + +def test_random_suffix_when_no_kvnr(self): + """fall_id uses 6-char random suffix when KVNR is missing.""" + pc = _make_parsed_case(nachname="Tonn", kvnr=None, fallgruppe="onko", jahr=2026, kw=6) + result = generate_fall_id(pc) + # Format: YYYY-KW-fallgruppe-XXXXXX (6 alphanumeric uppercase+digits) + parts = result.split("-") + assert parts[0] == "2026" + assert parts[1] == "06" + assert parts[2] == "onko" + suffix = parts[3] + assert len(suffix) == 6 + assert suffix.isalnum() + assert suffix == suffix.upper() # Only uppercase + digits + assert "Tonn" not in result + +def test_random_suffix_when_empty_kvnr(self): + """fall_id uses random suffix when KVNR is empty string.""" + pc = _make_parsed_case(nachname="Tonn", kvnr="", fallgruppe="onko", jahr=2026, kw=6) + result = generate_fall_id(pc) + parts = result.split("-") + suffix = parts[3] + assert len(suffix) == 6 + assert suffix.isalnum() + +def test_random_suffixes_are_unique(self): + """Different calls produce different random suffixes.""" + pc = _make_parsed_case(kvnr=None) + ids = {generate_fall_id(pc) for _ in range(20)} + assert len(ids) >= 15 # Allow small collision chance +``` + +Also add a new test for `generate_random_suffix`: + +```python +from app.services.import_service import generate_random_suffix + +class TestGenerateRandomSuffix: + def test_length(self): + assert len(generate_random_suffix()) == 6 + + def test_charset(self): + """Only uppercase letters and digits.""" + import re + for _ in range(50): + s = generate_random_suffix() + assert re.match(r'^[A-Z0-9]{6}$', s) +``` + +**Step 2: Run tests to verify they fail** + +Run: `cd /home/frontend/dak_c2s/backend && python -m pytest tests/test_import.py -v -k "kvnr or random_suffix"` +Expected: FAIL — tests reference new behavior/function not yet implemented. + +**Step 3: Implement `generate_random_suffix()` and update `generate_fall_id()`** + +In `backend/app/services/import_service.py`, replace the existing `generate_fall_id` function and add `generate_random_suffix`: + +```python +import random +import string + +def generate_random_suffix(length: int = 6) -> str: + """Generate a random alphanumeric suffix (uppercase + digits).""" + charset = string.ascii_uppercase + string.digits + return "".join(random.choices(charset, k=length)) + + +def generate_fall_id(parsed: ParsedCase) -> str: + """Generate unique fall_id: YYYY-KW02d-fallgruppe-KVNR. + + Uses KVNR as identifier. Falls back to 6-char random suffix if + KVNR is missing or empty. + + Examples: + - 2026-06-onko-A123456789 + - 2026-12-kardio-X7K9M2 (random fallback) + """ + suffix = parsed.kvnr if parsed.kvnr else generate_random_suffix() + return f"{parsed.jahr}-{parsed.kw:02d}-{parsed.fallgruppe}-{suffix}" +``` + +**Step 4: Update existing tests that expect Nachname in fall_id** + +Update `test_format` and other tests in `TestGenerateFallId` that assert Nachname-based fall_ids. Since the default `_make_parsed_case` has `kvnr="D410126355"`, the fall_id will now use that. Specifically update: + +- `test_format`: expect `"2026-06-onko-D410126355"` (not `"2026-06-onko-Tonn"`) +- `test_different_cases_produce_different_ids`: change to use different KVNRs +- `test_same_patient_same_week_same_fallgruppe`: same KVNR → same fall_id +- `test_umlauts_preserved`: pass kvnr=None, check random suffix format instead +- `test_hyphenated_name`: pass kvnr=None, check random suffix format instead +- `test_all_fallgruppen`: remains valid (checks `-{fg}-` pattern) +- `test_year_boundary`: remains valid (checks year prefix) + +In `TestImportRowSchema.test_full_row` update `fall_id="2026-06-kardio-D410126355"`. + +In `TestPreviewImportMocked` tests that check `fall_id is not None`: still valid. + +**Step 5: Run all tests to verify they pass** + +Run: `cd /home/frontend/dak_c2s/backend && python -m pytest tests/test_import.py -v` +Expected: ALL PASS + +**Step 6: Update module docstring** + +Change the docstring at top of `import_service.py` from: +``` +- fall_id generation: YYYY-KW02d-fallgruppe-Nachname +``` +to: +``` +- fall_id generation: YYYY-KW02d-fallgruppe-KVNR (or 6-char random suffix) +``` + +**Step 7: Commit** + +```bash +git add backend/app/services/import_service.py backend/tests/test_import.py +git commit -m "feat: use KVNR instead of Nachname in fall_id generation" +``` + +--- + +### Task 2: Update `check_duplicate()` for new fall_id format + +**Files:** +- Modify: `backend/app/services/import_service.py:33-59` +- Test: `backend/tests/test_import.py` + +**Step 1: Analyze impact** + +`check_duplicate()` currently checks: +1. Exact fall_id match +2. Personal data match (nachname + fallgruppe + datum + optional vorname/geburtsdatum) + +With the new fall_id format: +- Criterion 1 still works if same KVNR → same fall_id (deterministic). +- For random-suffix cases, fall_id match won't catch duplicates → criterion 2 is essential. +- Criterion 2 (personal data match) stays as-is — it doesn't depend on fall_id format. + +**No code change needed** for `check_duplicate()` — it already has both detection paths. The personal data match (criterion 2) handles the random-suffix case correctly. + +**Step 2: Write a verification test** + +Add to `TestCheckDuplicateMocked`: + +```python +def test_duplicate_detected_by_personal_data_when_kvnr_missing(self): + """Duplicate detected by personal data even when fall_id uses random suffix.""" + db = MagicMock() + query = MagicMock() + db.query.return_value = query + query.filter.return_value = query + # First .first() (fall_id check) → no match, second .first() (personal data) → match + query.first.side_effect = [None, MagicMock()] + pc = _make_parsed_case(kvnr=None) + assert check_duplicate(db, pc) is True +``` + +**Step 3: Run test** + +Run: `cd /home/frontend/dak_c2s/backend && python -m pytest tests/test_import.py::TestCheckDuplicateMocked -v` +Expected: ALL PASS + +**Step 4: Commit** + +```bash +git add backend/tests/test_import.py +git commit -m "test: verify duplicate detection works with new fall_id format" +``` + +--- + +### Task 3: Retroactive migration of existing fall_ids + +**Files:** +- Create: `backend/alembic/versions/006_anonymize_fall_ids.py` + +**Step 1: Write the Alembic migration** + +This migration: +1. Reads all cases with their fall_id and kvnr +2. For each case, regenerates the fall_id as `YYYY-KW-fallgruppe-KVNR` (or random suffix) +3. Updates in bulk + +```python +"""Anonymize fall_ids: replace Nachname with KVNR or random suffix. + +Revision ID: 006_anonymize_fall_ids +Revises: 005_add_disclosure_requests +""" + +import random +import string + +from alembic import op +import sqlalchemy as sa + +revision = "006_anonymize_fall_ids" +down_revision = "005_add_disclosure_requests" +branch_labels = None +depends_on = None + + +def _random_suffix(length=6): + charset = string.ascii_uppercase + string.digits + return "".join(random.choices(charset, k=length)) + + +def upgrade(): + conn = op.get_bind() + cases = conn.execute( + sa.text("SELECT id, fall_id, kvnr, jahr, kw, fallgruppe FROM cases") + ).fetchall() + + for case in cases: + case_id, old_fall_id, kvnr, jahr, kw, fallgruppe = case + suffix = kvnr if kvnr else _random_suffix() + new_fall_id = f"{jahr}-{kw:02d}-{fallgruppe}-{suffix}" + conn.execute( + sa.text("UPDATE cases SET fall_id = :new_id WHERE id = :case_id"), + {"new_id": new_fall_id, "case_id": case_id}, + ) + + +def downgrade(): + # Cannot restore original Nachname-based fall_ids — data is lost + pass +``` + +**Step 2: Run migration locally (dry-run check)** + +Run: `cd /home/frontend/dak_c2s/backend && python -c "from alembic.versions import *; print('syntax ok')"` + +The actual migration will be run on production during deploy (Task 5). + +**Step 3: Commit** + +```bash +git add backend/alembic/versions/006_anonymize_fall_ids.py +git commit -m "feat: add migration to anonymize existing fall_ids" +``` + +--- + +### Task 4: Auto-update fall_id when KVNR is entered + +**Files:** +- Modify: `backend/app/api/cases.py:417-452` (set_case_kvnr endpoint) +- Test: manual verification after deploy + +**Step 1: Add helper to detect random-suffix fall_ids** + +In `backend/app/services/import_service.py`, add: + +```python +import re + +# KVNR format: letter followed by 9 digits (e.g. A123456789) +KVNR_PATTERN = re.compile(r'^[A-Z]\d{9}$') + +def has_random_suffix(fall_id: str) -> bool: + """Check if a fall_id ends with a random suffix (not a KVNR). + + Random suffix: exactly 6 alphanumeric chars (uppercase + digits). + KVNR: letter + 9 digits (10 chars total). + Nachname: any other string (legacy format, also treated as non-KVNR). + """ + if not fall_id: + return False + parts = fall_id.rsplit("-", 1) + if len(parts) < 2: + return False + suffix = parts[1] + # If it matches KVNR pattern, it's NOT a random suffix + if KVNR_PATTERN.match(suffix): + return False + # Otherwise it's either a random suffix or a legacy Nachname — either way, should be updated + return True +``` + +**Step 2: Write tests for `has_random_suffix()`** + +Add to `backend/tests/test_import.py`: + +```python +from app.services.import_service import has_random_suffix + +class TestHasRandomSuffix: + def test_kvnr_suffix(self): + """Fall_id with KVNR is NOT random.""" + assert has_random_suffix("2026-06-onko-A123456789") is False + + def test_random_suffix(self): + """Fall_id with 6-char random suffix IS random.""" + assert has_random_suffix("2026-06-onko-X7K9M2") is True + + def test_legacy_nachname_suffix(self): + """Fall_id with legacy Nachname suffix IS treated as non-KVNR.""" + assert has_random_suffix("2020-32-onko-Bartl-Zimmermann") is True + assert has_random_suffix("2026-06-kardio-Tonn") is True + + def test_empty_fall_id(self): + assert has_random_suffix("") is False + assert has_random_suffix(None) is False +``` + +**Step 3: Run tests** + +Run: `cd /home/frontend/dak_c2s/backend && python -m pytest tests/test_import.py::TestHasRandomSuffix -v` +Expected: ALL PASS + +**Step 4: Modify `set_case_kvnr` endpoint** + +In `backend/app/api/cases.py`, update the endpoint to also update the fall_id: + +```python +from app.services.import_service import has_random_suffix + +@router.put("/{case_id}/kvnr", response_model=CaseResponse) +def set_case_kvnr( + case_id: int, + payload: dict, + request: Request, + db: Session = Depends(get_db), + user: User = Depends(get_current_user), +): + """Update the KVNR for a case. Accessible to both admin and dak_mitarbeiter. + + If the current fall_id has a random suffix (no KVNR), it is automatically + updated to use the new KVNR. + """ + case = db.query(Case).filter(Case.id == case_id).first() + if not case: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Case not found", + ) + + old_kvnr = case.kvnr + old_fall_id = case.fall_id + new_kvnr = payload.get("kvnr") + case.kvnr = new_kvnr + + # Auto-update fall_id if it currently uses a random suffix or legacy Nachname + if new_kvnr and case.fall_id and has_random_suffix(case.fall_id): + prefix = case.fall_id.rsplit("-", 1)[0] + case.fall_id = f"{prefix}-{new_kvnr}" + + case.updated_by = user.id + db.commit() + db.refresh(case) + + log_action( + db, + user_id=user.id, + action="kvnr_updated", + entity_type="case", + entity_id=case.id, + old_values={"kvnr": old_kvnr, "fall_id": old_fall_id}, + new_values={"kvnr": new_kvnr, "fall_id": case.fall_id}, + ip_address=request.client.host if request.client else None, + user_agent=request.headers.get("user-agent"), + ) + + return case +``` + +Note: The `rsplit("-", 1)` approach works for the standard format `YYYY-KW-fallgruppe-suffix`. For legacy Nachnames with hyphens like `2020-32-onko-Bartl-Zimmermann`, rsplit with maxsplit=1 gives `("2020-32-onko-Bartl", "Zimmermann")` which is wrong. Better approach: reconstruct from case fields. + +**Corrected implementation** — use case fields instead of string parsing: + +```python + if new_kvnr and case.fall_id and has_random_suffix(case.fall_id): + case.fall_id = f"{case.jahr}-{case.kw:02d}-{case.fallgruppe}-{new_kvnr}" +``` + +This is more robust because it uses the actual database fields. + +**Step 5: Commit** + +```bash +git add backend/app/services/import_service.py backend/app/api/cases.py backend/tests/test_import.py +git commit -m "feat: auto-update fall_id when KVNR is entered on a case" +``` + +--- + +### Task 5: Build, deploy, and run migration + +**Files:** +- Frontend: `frontend/` (build) +- Production DB: run Alembic migration + +**Step 1: Build frontend** + +Run: `cd /home/frontend/dak_c2s/frontend && pnpm build` +Expected: Build succeeds (no frontend code changes needed for this feature) + +**Step 2: Run all backend tests** + +Run: `cd /home/frontend/dak_c2s/backend && python -m pytest tests/ -v` +Expected: ALL PASS + +**Step 3: Commit all, push develop, merge to main** + +```bash +cd /home/frontend/dak_c2s +git push origin develop +git checkout main && git pull origin main && git merge develop && git push origin main +git checkout develop +``` + +**Step 4: Deploy to Hetzner 1** + +```bash +# Pull latest code +ssh hetzner1 "cd /opt/dak-portal && git pull origin main" + +# Run migration (updates all ~2900 fall_ids) +ssh hetzner1 "cd /opt/dak-portal/backend && source /opt/dak-portal/venv/bin/activate && alembic upgrade head" + +# Build and deploy frontend +ssh hetzner1 "cd /opt/dak-portal/frontend && pnpm install && pnpm build && cp -r dist/* /var/www/vhosts/complexcaresolutions.de/dak.complexcaresolutions.de/dist/" + +# Restart backend +ssh hetzner1 "systemctl restart dak-backend" +``` + +**Step 5: Verify on production** + +1. Check fall_ids no longer contain patient names: `ssh hetzner1 "mysql -u dak_c2s_admin -p'R@TIa&s7ygxm4x0b' dak_c2s -e \"SELECT fall_id FROM cases LIMIT 20\""` +2. Import a new CSV → verify new fall_ids use KVNR format +3. Enter KVNR on a case with random suffix → verify fall_id updates automatically