mirror of
https://github.com/complexcaresolutions/dak.c2s.git
synced 2026-03-17 23:03:41 +00:00
Add service and standalone script to import all cases from the master Excel workbook into the database. Handles 5 year-sheets (2020-2022, 2023, 2024, 2025, 2026) with dynamic column mapping, fallgruppe normalization, boolean/date parsing, phone number formatting, and duplicate detection. Supports dry-run mode and per-sheet import. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
190 lines
6.1 KiB
Python
190 lines
6.1 KiB
Python
"""One-time script: Import all cases from Abrechnung_DAK.xlsx into DB.
|
|
|
|
Usage:
|
|
cd /home/frontend/dak_c2s/backend
|
|
source venv/bin/activate
|
|
python -m scripts.import_historical [path_to_xlsx]
|
|
|
|
Default path: ../data/Abrechnung_DAK.xlsx
|
|
|
|
Options:
|
|
--dry-run Parse and validate without writing to DB
|
|
--sheet NAME Import only the named sheet (can repeat)
|
|
--verbose Show per-row errors in output
|
|
"""
|
|
|
|
import argparse
|
|
import logging
|
|
import os
|
|
import sys
|
|
|
|
# Ensure the backend package is importable
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from app.database import SessionLocal
|
|
from app.services.excel_import import (
|
|
YEAR_SHEETS,
|
|
import_abrechnung_sheet,
|
|
import_full_abrechnung,
|
|
)
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(
|
|
description="Import historical cases from Abrechnung_DAK.xlsx"
|
|
)
|
|
parser.add_argument(
|
|
"filepath",
|
|
nargs="?",
|
|
default=os.path.join(
|
|
os.path.dirname( # dak_c2s/
|
|
os.path.dirname( # dak_c2s/backend/
|
|
os.path.dirname(os.path.abspath(__file__)) # dak_c2s/backend/scripts/
|
|
)
|
|
),
|
|
"data",
|
|
"Abrechnung_DAK.xlsx",
|
|
),
|
|
help="Path to the Excel file (default: ../../data/Abrechnung_DAK.xlsx)",
|
|
)
|
|
parser.add_argument(
|
|
"--dry-run",
|
|
action="store_true",
|
|
help="Parse and validate without writing to DB",
|
|
)
|
|
parser.add_argument(
|
|
"--sheet",
|
|
action="append",
|
|
dest="sheets",
|
|
help="Import only specific sheet(s); can be repeated",
|
|
)
|
|
parser.add_argument(
|
|
"--verbose", "-v",
|
|
action="store_true",
|
|
help="Show per-row errors in output",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
# Configure logging
|
|
log_level = logging.DEBUG if args.verbose else logging.INFO
|
|
logging.basicConfig(
|
|
level=log_level,
|
|
format="%(asctime)s %(levelname)-8s %(name)s: %(message)s",
|
|
datefmt="%H:%M:%S",
|
|
)
|
|
|
|
filepath = args.filepath
|
|
if not os.path.exists(filepath):
|
|
print(f"ERROR: File not found: {filepath}")
|
|
sys.exit(1)
|
|
|
|
print(f"Importing from: {filepath}")
|
|
if args.dry_run:
|
|
print("*** DRY RUN -- no changes will be committed ***")
|
|
print()
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
if args.sheets:
|
|
# Import specific sheets only
|
|
from openpyxl import load_workbook
|
|
|
|
wb = load_workbook(filepath, read_only=True, data_only=True)
|
|
try:
|
|
for sheet_name in args.sheets:
|
|
if sheet_name not in wb.sheetnames:
|
|
print(f" WARNING: Sheet '{sheet_name}' not found, skipping")
|
|
continue
|
|
|
|
# Determine default year
|
|
if sheet_name == "2020-2022":
|
|
default_year = None
|
|
else:
|
|
try:
|
|
default_year = int(sheet_name)
|
|
except ValueError:
|
|
default_year = None
|
|
|
|
ws = wb[sheet_name]
|
|
result = import_abrechnung_sheet(
|
|
db=db,
|
|
ws=ws,
|
|
sheet_name=sheet_name,
|
|
default_year=default_year,
|
|
)
|
|
_print_result(sheet_name, result, args.verbose)
|
|
|
|
if args.dry_run:
|
|
db.rollback()
|
|
print("\nDry run complete -- rolled back all changes.")
|
|
else:
|
|
db.commit()
|
|
print("\nImport committed to database.")
|
|
finally:
|
|
wb.close()
|
|
else:
|
|
# Import all year sheets
|
|
if args.dry_run:
|
|
# For dry run, we do the same import but rollback at the end
|
|
from openpyxl import load_workbook
|
|
|
|
wb = load_workbook(filepath, read_only=True, data_only=True)
|
|
try:
|
|
for sheet_name in YEAR_SHEETS:
|
|
if sheet_name not in wb.sheetnames:
|
|
print(f" Sheet '{sheet_name}' not found, skipping")
|
|
continue
|
|
|
|
if sheet_name == "2020-2022":
|
|
default_year = None
|
|
else:
|
|
try:
|
|
default_year = int(sheet_name)
|
|
except ValueError:
|
|
default_year = None
|
|
|
|
ws = wb[sheet_name]
|
|
result = import_abrechnung_sheet(
|
|
db=db,
|
|
ws=ws,
|
|
sheet_name=sheet_name,
|
|
default_year=default_year,
|
|
)
|
|
_print_result(sheet_name, result, args.verbose)
|
|
finally:
|
|
wb.close()
|
|
|
|
db.rollback()
|
|
print("\nDry run complete -- rolled back all changes.")
|
|
else:
|
|
result = import_full_abrechnung(db, filepath)
|
|
print("Import results:")
|
|
for sheet_name, stats in result.items():
|
|
_print_result(sheet_name, stats, args.verbose)
|
|
print("\nImport committed to database.")
|
|
|
|
except Exception as e:
|
|
db.rollback()
|
|
print(f"\nERROR: Import failed: {e}")
|
|
logging.exception("Import failed")
|
|
sys.exit(1)
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
def _print_result(sheet_name: str, result: dict, verbose: bool) -> None:
|
|
"""Print import result for a single sheet."""
|
|
imported = result["imported"]
|
|
skipped = result["skipped"]
|
|
error_count = len(result["errors"])
|
|
|
|
status = "OK" if error_count == 0 else f"{error_count} errors"
|
|
print(f" {sheet_name:12s}: {imported:4d} imported, {skipped:4d} skipped [{status}]")
|
|
|
|
if verbose and result["errors"]:
|
|
for err in result["errors"]:
|
|
print(f" - {err}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|