feat(planning_import): canonicalisation env + domaine a l'import (Production/production/PROD -> Production)

- Tables ENV_CANONICAL et DOMAIN_CANONICAL: lookup case+accent insensible
- _canonicalize_env / _canonicalize_domain : retournent la forme canonique connue,
  sinon valeur d'origine inchangee
- Applique dans l'INSERT row au moment de l'import
- Lookup en 3 passes:
  1. lowercase exact
  2. lowercase + accents stripped
  3. lowercase + accents + espaces normalises (ex 'Flux  Libre' -> 'flux libre')

Backfill SQL one-shot pour les rows existantes (backfill_canonicalize_env_domain_20260507.sql):
- env: Production/Pré-Prod/Recette/Test/Test 1/Test 2/Développement/Qualif
- domaine: Flux Libre/Péage/Infrastructure/Trafic/DMZ/LAN/BI/EMV/Gestion
- Idempotent
This commit is contained in:
Pierre & Lumière 2026-05-07 19:48:21 +02:00
parent 2b57ca3247
commit e448d8885b
2 changed files with 171 additions and 2 deletions

View File

@ -11,6 +11,7 @@ Le module pré-patching et le patching by-step seront branchés en étape 2/3.
import io import io
import json import json
import re import re
import unicodedata
from datetime import date, datetime, time from datetime import date, datetime, time
from fastapi import APIRouter, Request, Depends, UploadFile, File, Form, Query from fastapi import APIRouter, Request, Depends, UploadFile, File, Form, Query
from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
@ -37,6 +38,104 @@ def _detect_pct_required(rec: dict) -> bool:
return True return True
return False return False
# ────────────────────────────────────────────────────────────────────────
# Normalisation casse / accent / variantes des champs taxonomiques
# (env, domaine) — pour éviter d'avoir 'Production' / 'production' / 'PROD' etc.
# en parallèle dans la BDD.
# ────────────────────────────────────────────────────────────────────────
ENV_CANONICAL = {
# Production
"production": "Production",
"prod": "Production",
"prd": "Production",
# Pré-Prod
"pré-prod": "Pré-Prod",
"pre-prod": "Pré-Prod",
"preprod": "Pré-Prod",
"pre prod": "Pré-Prod",
"pré prod": "Pré-Prod",
"pre-production": "Pré-Prod",
"pré-production": "Pré-Prod",
"preproduction": "Pré-Prod",
"préproduction": "Pré-Prod",
# Recette
"recette": "Recette",
"rec": "Recette",
"recettes": "Recette",
# Test (avec/sans numéro)
"test": "Test",
"tests": "Test",
"test 1": "Test 1",
"test1": "Test 1",
"test_1": "Test 1",
"test 2": "Test 2",
"test2": "Test 2",
"test_2": "Test 2",
# Dev
"développement": "Développement",
"developpement": "Développement",
"dev": "Développement",
"develop": "Développement",
# Qualif
"qualif": "Qualif",
"qualification": "Qualif",
}
DOMAIN_CANONICAL = {
"flux libre": "Flux Libre",
"flux-libre": "Flux Libre",
"fluxlibre": "Flux Libre",
"flux libre": "Flux Libre",
"péage": "Péage",
"peage": "Péage",
"infrastructure": "Infrastructure",
"infra": "Infrastructure",
"dmz": "DMZ",
"lan": "LAN",
"trafic": "Trafic",
"traffic": "Trafic",
"gestion": "Gestion",
"bi": "BI",
"emv": "EMV",
}
def _strip_accents_lower(s: str) -> str:
nfkd = unicodedata.normalize("NFKD", s)
return "".join(c for c in nfkd if not unicodedata.combining(c)).lower()
def _canonicalize(v, mapping: dict):
"""Renvoie la forme canonique si v matche (case + accents insensitive),
sinon renvoie v inchangé (en strip).
Lookup en 2 passes : 1) lowercase exact, 2) sans accents."""
if v is None:
return None
s = str(v).strip()
if not s:
return s
low = s.lower()
if low in mapping:
return mapping[low]
no_acc = _strip_accents_lower(s)
if no_acc in mapping:
return mapping[no_acc]
# Aussi : compaction des espaces multiples (ex: "Flux Libre" -> "flux libre")
no_acc_compact = re.sub(r"\s+", " ", no_acc)
if no_acc_compact in mapping:
return mapping[no_acc_compact]
return s # pas de canonique connue, on garde tel quel
def _canonicalize_env(v):
return _canonicalize(v, ENV_CANONICAL)
def _canonicalize_domain(v):
return _canonicalize(v, DOMAIN_CANONICAL)
# Colonnes attendues dans les feuilles Sxx (ordre = priorité, on matche par regex/lower) # Colonnes attendues dans les feuilles Sxx (ordre = priorité, on matche par regex/lower)
# Le fichier 2026 a 12 variantes d'en-têtes selon la semaine # Le fichier 2026 a 12 variantes d'en-têtes selon la semaine
# (ancien format S02-S06, nouveau format DTS S07+) # (ancien format S02-S06, nouveau format DTS S07+)
@ -554,8 +653,8 @@ async def import_upload(request: Request, db=Depends(get_db),
"imp": import_id, "sn": sheet_name, "wn": week_num, "ri": rec["row_index"], "imp": import_id, "sn": sheet_name, "wn": week_num, "ri": rec["row_index"],
"an": asset_str, "an": asset_str,
"it": str(rec.get("intervenant")) if rec.get("intervenant") else None, "it": str(rec.get("intervenant")) if rec.get("intervenant") else None,
"en": str(rec.get("environnement")) if rec.get("environnement") else None, "en": _canonicalize_env(rec.get("environnement")) if rec.get("environnement") else None,
"do": str(rec.get("domaine")) if rec.get("domaine") else None, "do": _canonicalize_domain(rec.get("domaine")) if rec.get("domaine") else None,
"os": str(rec.get("os")) if rec.get("os") else None, "os": str(rec.get("os")) if rec.get("os") else None,
"ov": str(rec.get("os_version")) if rec.get("os_version") else None, "ov": str(rec.get("os_version")) if rec.get("os_version") else None,
"ap": str(rec.get("application_name")) if rec.get("application_name") else None, "ap": str(rec.get("application_name")) if rec.get("application_name") else None,

View File

@ -0,0 +1,70 @@
-- One-shot : normalise les valeurs env/domaine existantes vers les formes canoniques
-- (cf table _canonicalize_env / _canonicalize_domain dans planning_import.py).
-- À jouer une fois après pull du code qui ajoute la canonicalisation à l'import.
-- Idempotent : on peut le rejouer sans effet de bord.
-- ─── Environnements ──────────────────────────────────────────
UPDATE patch_planning_import_rows SET environnement = 'Production'
WHERE LOWER(environnement) IN ('production','prod','prd')
AND environnement != 'Production';
UPDATE patch_planning_import_rows SET environnement = 'Pré-Prod'
WHERE LOWER(REGEXP_REPLACE(environnement, '\s+', ' ', 'g')) IN
('pré-prod','pre-prod','preprod','pre prod','pré prod',
'pre-production','pré-production','preproduction','préproduction')
AND environnement != 'Pré-Prod';
UPDATE patch_planning_import_rows SET environnement = 'Recette'
WHERE LOWER(environnement) IN ('recette','rec','recettes')
AND environnement != 'Recette';
UPDATE patch_planning_import_rows SET environnement = 'Test'
WHERE LOWER(environnement) IN ('test','tests')
AND environnement != 'Test';
UPDATE patch_planning_import_rows SET environnement = 'Test 1'
WHERE LOWER(REPLACE(REPLACE(environnement, '_', ' '), ' ', ' ')) IN ('test 1','test1')
AND environnement != 'Test 1';
UPDATE patch_planning_import_rows SET environnement = 'Test 2'
WHERE LOWER(REPLACE(REPLACE(environnement, '_', ' '), ' ', ' ')) IN ('test 2','test2')
AND environnement != 'Test 2';
UPDATE patch_planning_import_rows SET environnement = 'Développement'
WHERE LOWER(environnement) IN ('développement','developpement','dev','develop')
AND environnement != 'Développement';
UPDATE patch_planning_import_rows SET environnement = 'Qualif'
WHERE LOWER(environnement) IN ('qualif','qualification')
AND environnement != 'Qualif';
-- ─── Domaines ────────────────────────────────────────────────
UPDATE patch_planning_import_rows SET domaine = 'Flux Libre'
WHERE LOWER(REGEXP_REPLACE(domaine, '\s+', ' ', 'g')) IN ('flux libre','flux-libre','fluxlibre')
AND domaine != 'Flux Libre';
UPDATE patch_planning_import_rows SET domaine = 'Péage'
WHERE LOWER(domaine) IN ('peage','péage','peagé','pèage')
AND domaine != 'Péage';
UPDATE patch_planning_import_rows SET domaine = 'Infrastructure'
WHERE LOWER(domaine) IN ('infrastructure','infra')
AND domaine != 'Infrastructure';
UPDATE patch_planning_import_rows SET domaine = 'Trafic'
WHERE LOWER(domaine) IN ('trafic','traffic')
AND domaine != 'Trafic';
UPDATE patch_planning_import_rows SET domaine = UPPER(domaine)
WHERE LOWER(domaine) IN ('dmz','lan','bi','emv')
AND domaine != UPPER(domaine);
UPDATE patch_planning_import_rows SET domaine = 'Gestion'
WHERE LOWER(domaine) = 'gestion' AND domaine != 'Gestion';
-- ─── Vérification ───────────────────────────────────────────
-- SELECT environnement, COUNT(*) FROM patch_planning_import_rows GROUP BY 1 ORDER BY 1;
-- SELECT domaine, COUNT(*) FROM patch_planning_import_rows GROUP BY 1 ORDER BY 1;