diff --git a/app/routers/planning_import.py b/app/routers/planning_import.py index 9057827..6f21ab3 100644 --- a/app/routers/planning_import.py +++ b/app/routers/planning_import.py @@ -11,6 +11,7 @@ Le module pré-patching et le patching by-step seront branchés en étape 2/3. import io import json import re +import unicodedata from datetime import date, datetime, time from fastapi import APIRouter, Request, Depends, UploadFile, File, Form, Query from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse @@ -37,6 +38,104 @@ def _detect_pct_required(rec: dict) -> bool: return True return False + +# ──────────────────────────────────────────────────────────────────────── +# Normalisation casse / accent / variantes des champs taxonomiques +# (env, domaine) — pour éviter d'avoir 'Production' / 'production' / 'PROD' etc. +# en parallèle dans la BDD. +# ──────────────────────────────────────────────────────────────────────── + +ENV_CANONICAL = { + # Production + "production": "Production", + "prod": "Production", + "prd": "Production", + # Pré-Prod + "pré-prod": "Pré-Prod", + "pre-prod": "Pré-Prod", + "preprod": "Pré-Prod", + "pre prod": "Pré-Prod", + "pré prod": "Pré-Prod", + "pre-production": "Pré-Prod", + "pré-production": "Pré-Prod", + "preproduction": "Pré-Prod", + "préproduction": "Pré-Prod", + # Recette + "recette": "Recette", + "rec": "Recette", + "recettes": "Recette", + # Test (avec/sans numéro) + "test": "Test", + "tests": "Test", + "test 1": "Test 1", + "test1": "Test 1", + "test_1": "Test 1", + "test 2": "Test 2", + "test2": "Test 2", + "test_2": "Test 2", + # Dev + "développement": "Développement", + "developpement": "Développement", + "dev": "Développement", + "develop": "Développement", + # Qualif + "qualif": "Qualif", + "qualification": "Qualif", +} + +DOMAIN_CANONICAL = { + "flux libre": "Flux Libre", + "flux-libre": "Flux Libre", + "fluxlibre": "Flux Libre", + "flux libre": "Flux Libre", + "péage": "Péage", + "peage": "Péage", + "infrastructure": "Infrastructure", + "infra": "Infrastructure", + "dmz": "DMZ", + "lan": "LAN", + "trafic": "Trafic", + "traffic": "Trafic", + "gestion": "Gestion", + "bi": "BI", + "emv": "EMV", +} + + +def _strip_accents_lower(s: str) -> str: + nfkd = unicodedata.normalize("NFKD", s) + return "".join(c for c in nfkd if not unicodedata.combining(c)).lower() + + +def _canonicalize(v, mapping: dict): + """Renvoie la forme canonique si v matche (case + accents insensitive), + sinon renvoie v inchangé (en strip). + Lookup en 2 passes : 1) lowercase exact, 2) sans accents.""" + if v is None: + return None + s = str(v).strip() + if not s: + return s + low = s.lower() + if low in mapping: + return mapping[low] + no_acc = _strip_accents_lower(s) + if no_acc in mapping: + return mapping[no_acc] + # Aussi : compaction des espaces multiples (ex: "Flux Libre" -> "flux libre") + no_acc_compact = re.sub(r"\s+", " ", no_acc) + if no_acc_compact in mapping: + return mapping[no_acc_compact] + return s # pas de canonique connue, on garde tel quel + + +def _canonicalize_env(v): + return _canonicalize(v, ENV_CANONICAL) + + +def _canonicalize_domain(v): + return _canonicalize(v, DOMAIN_CANONICAL) + # Colonnes attendues dans les feuilles Sxx (ordre = priorité, on matche par regex/lower) # Le fichier 2026 a 12 variantes d'en-têtes selon la semaine # (ancien format S02-S06, nouveau format DTS S07+) @@ -554,8 +653,8 @@ async def import_upload(request: Request, db=Depends(get_db), "imp": import_id, "sn": sheet_name, "wn": week_num, "ri": rec["row_index"], "an": asset_str, "it": str(rec.get("intervenant")) if rec.get("intervenant") else None, - "en": str(rec.get("environnement")) if rec.get("environnement") else None, - "do": str(rec.get("domaine")) if rec.get("domaine") else None, + "en": _canonicalize_env(rec.get("environnement")) if rec.get("environnement") else None, + "do": _canonicalize_domain(rec.get("domaine")) if rec.get("domaine") else None, "os": str(rec.get("os")) if rec.get("os") else None, "ov": str(rec.get("os_version")) if rec.get("os_version") else None, "ap": str(rec.get("application_name")) if rec.get("application_name") else None, diff --git a/backfill_canonicalize_env_domain_20260507.sql b/backfill_canonicalize_env_domain_20260507.sql new file mode 100644 index 0000000..c127f0e --- /dev/null +++ b/backfill_canonicalize_env_domain_20260507.sql @@ -0,0 +1,70 @@ +-- One-shot : normalise les valeurs env/domaine existantes vers les formes canoniques +-- (cf table _canonicalize_env / _canonicalize_domain dans planning_import.py). +-- À jouer une fois après pull du code qui ajoute la canonicalisation à l'import. +-- Idempotent : on peut le rejouer sans effet de bord. + +-- ─── Environnements ────────────────────────────────────────── + +UPDATE patch_planning_import_rows SET environnement = 'Production' + WHERE LOWER(environnement) IN ('production','prod','prd') + AND environnement != 'Production'; + +UPDATE patch_planning_import_rows SET environnement = 'Pré-Prod' + WHERE LOWER(REGEXP_REPLACE(environnement, '\s+', ' ', 'g')) IN + ('pré-prod','pre-prod','preprod','pre prod','pré prod', + 'pre-production','pré-production','preproduction','préproduction') + AND environnement != 'Pré-Prod'; + +UPDATE patch_planning_import_rows SET environnement = 'Recette' + WHERE LOWER(environnement) IN ('recette','rec','recettes') + AND environnement != 'Recette'; + +UPDATE patch_planning_import_rows SET environnement = 'Test' + WHERE LOWER(environnement) IN ('test','tests') + AND environnement != 'Test'; + +UPDATE patch_planning_import_rows SET environnement = 'Test 1' + WHERE LOWER(REPLACE(REPLACE(environnement, '_', ' '), ' ', ' ')) IN ('test 1','test1') + AND environnement != 'Test 1'; + +UPDATE patch_planning_import_rows SET environnement = 'Test 2' + WHERE LOWER(REPLACE(REPLACE(environnement, '_', ' '), ' ', ' ')) IN ('test 2','test2') + AND environnement != 'Test 2'; + +UPDATE patch_planning_import_rows SET environnement = 'Développement' + WHERE LOWER(environnement) IN ('développement','developpement','dev','develop') + AND environnement != 'Développement'; + +UPDATE patch_planning_import_rows SET environnement = 'Qualif' + WHERE LOWER(environnement) IN ('qualif','qualification') + AND environnement != 'Qualif'; + +-- ─── Domaines ──────────────────────────────────────────────── + +UPDATE patch_planning_import_rows SET domaine = 'Flux Libre' + WHERE LOWER(REGEXP_REPLACE(domaine, '\s+', ' ', 'g')) IN ('flux libre','flux-libre','fluxlibre') + AND domaine != 'Flux Libre'; + +UPDATE patch_planning_import_rows SET domaine = 'Péage' + WHERE LOWER(domaine) IN ('peage','péage','peagé','pèage') + AND domaine != 'Péage'; + +UPDATE patch_planning_import_rows SET domaine = 'Infrastructure' + WHERE LOWER(domaine) IN ('infrastructure','infra') + AND domaine != 'Infrastructure'; + +UPDATE patch_planning_import_rows SET domaine = 'Trafic' + WHERE LOWER(domaine) IN ('trafic','traffic') + AND domaine != 'Trafic'; + +UPDATE patch_planning_import_rows SET domaine = UPPER(domaine) + WHERE LOWER(domaine) IN ('dmz','lan','bi','emv') + AND domaine != UPPER(domaine); + +UPDATE patch_planning_import_rows SET domaine = 'Gestion' + WHERE LOWER(domaine) = 'gestion' AND domaine != 'Gestion'; + +-- ─── Vérification ─────────────────────────────────────────── + +-- SELECT environnement, COUNT(*) FROM patch_planning_import_rows GROUP BY 1 ORDER BY 1; +-- SELECT domaine, COUNT(*) FROM patch_planning_import_rows GROUP BY 1 ORDER BY 1;