dak.c2s/backend/scripts/import_historical.py
CCS Admin f4afea7f85 feat: historical Excel import (Abrechnung_DAK.xlsx)
Add service and standalone script to import all cases from the master
Excel workbook into the database. Handles 5 year-sheets (2020-2022,
2023, 2024, 2025, 2026) with dynamic column mapping, fallgruppe
normalization, boolean/date parsing, phone number formatting, and
duplicate detection. Supports dry-run mode and per-sheet import.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 07:58:04 +00:00

190 lines
6.1 KiB
Python

"""One-time script: Import all cases from Abrechnung_DAK.xlsx into DB.
Usage:
cd /home/frontend/dak_c2s/backend
source venv/bin/activate
python -m scripts.import_historical [path_to_xlsx]
Default path: ../data/Abrechnung_DAK.xlsx
Options:
--dry-run Parse and validate without writing to DB
--sheet NAME Import only the named sheet (can repeat)
--verbose Show per-row errors in output
"""
import argparse
import logging
import os
import sys
# Ensure the backend package is importable
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from app.database import SessionLocal
from app.services.excel_import import (
YEAR_SHEETS,
import_abrechnung_sheet,
import_full_abrechnung,
)
def main() -> None:
parser = argparse.ArgumentParser(
description="Import historical cases from Abrechnung_DAK.xlsx"
)
parser.add_argument(
"filepath",
nargs="?",
default=os.path.join(
os.path.dirname( # dak_c2s/
os.path.dirname( # dak_c2s/backend/
os.path.dirname(os.path.abspath(__file__)) # dak_c2s/backend/scripts/
)
),
"data",
"Abrechnung_DAK.xlsx",
),
help="Path to the Excel file (default: ../../data/Abrechnung_DAK.xlsx)",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Parse and validate without writing to DB",
)
parser.add_argument(
"--sheet",
action="append",
dest="sheets",
help="Import only specific sheet(s); can be repeated",
)
parser.add_argument(
"--verbose", "-v",
action="store_true",
help="Show per-row errors in output",
)
args = parser.parse_args()
# Configure logging
log_level = logging.DEBUG if args.verbose else logging.INFO
logging.basicConfig(
level=log_level,
format="%(asctime)s %(levelname)-8s %(name)s: %(message)s",
datefmt="%H:%M:%S",
)
filepath = args.filepath
if not os.path.exists(filepath):
print(f"ERROR: File not found: {filepath}")
sys.exit(1)
print(f"Importing from: {filepath}")
if args.dry_run:
print("*** DRY RUN -- no changes will be committed ***")
print()
db = SessionLocal()
try:
if args.sheets:
# Import specific sheets only
from openpyxl import load_workbook
wb = load_workbook(filepath, read_only=True, data_only=True)
try:
for sheet_name in args.sheets:
if sheet_name not in wb.sheetnames:
print(f" WARNING: Sheet '{sheet_name}' not found, skipping")
continue
# Determine default year
if sheet_name == "2020-2022":
default_year = None
else:
try:
default_year = int(sheet_name)
except ValueError:
default_year = None
ws = wb[sheet_name]
result = import_abrechnung_sheet(
db=db,
ws=ws,
sheet_name=sheet_name,
default_year=default_year,
)
_print_result(sheet_name, result, args.verbose)
if args.dry_run:
db.rollback()
print("\nDry run complete -- rolled back all changes.")
else:
db.commit()
print("\nImport committed to database.")
finally:
wb.close()
else:
# Import all year sheets
if args.dry_run:
# For dry run, we do the same import but rollback at the end
from openpyxl import load_workbook
wb = load_workbook(filepath, read_only=True, data_only=True)
try:
for sheet_name in YEAR_SHEETS:
if sheet_name not in wb.sheetnames:
print(f" Sheet '{sheet_name}' not found, skipping")
continue
if sheet_name == "2020-2022":
default_year = None
else:
try:
default_year = int(sheet_name)
except ValueError:
default_year = None
ws = wb[sheet_name]
result = import_abrechnung_sheet(
db=db,
ws=ws,
sheet_name=sheet_name,
default_year=default_year,
)
_print_result(sheet_name, result, args.verbose)
finally:
wb.close()
db.rollback()
print("\nDry run complete -- rolled back all changes.")
else:
result = import_full_abrechnung(db, filepath)
print("Import results:")
for sheet_name, stats in result.items():
_print_result(sheet_name, stats, args.verbose)
print("\nImport committed to database.")
except Exception as e:
db.rollback()
print(f"\nERROR: Import failed: {e}")
logging.exception("Import failed")
sys.exit(1)
finally:
db.close()
def _print_result(sheet_name: str, result: dict, verbose: bool) -> None:
"""Print import result for a single sheet."""
imported = result["imported"]
skipped = result["skipped"]
error_count = len(result["errors"])
status = "OK" if error_count == 0 else f"{error_count} errors"
print(f" {sheet_name:12s}: {imported:4d} imported, {skipped:4d} skipped [{status}]")
if verbose and result["errors"]:
for err in result["errors"]:
print(f" - {err}")
if __name__ == "__main__":
main()