feat(planning_import): canonicalisation env + domaine a l'import (Production/production/PROD -> Production)

- Tables ENV_CANONICAL et DOMAIN_CANONICAL: lookup case+accent insensible
- _canonicalize_env / _canonicalize_domain : retournent la forme canonique connue,
  sinon valeur d'origine inchangee
- Applique dans l'INSERT row au moment de l'import
- Lookup en 3 passes:
  1. lowercase exact
  2. lowercase + accents stripped
  3. lowercase + accents + espaces normalises (ex 'Flux  Libre' -> 'flux libre')

Backfill SQL one-shot pour les rows existantes (backfill_canonicalize_env_domain_20260507.sql):
- env: Production/Pré-Prod/Recette/Test/Test 1/Test 2/Développement/Qualif
- domaine: Flux Libre/Péage/Infrastructure/Trafic/DMZ/LAN/BI/EMV/Gestion
- Idempotent
This commit is contained in:
Pierre & Lumière 2026-05-07 19:48:21 +02:00
parent 2b57ca3247
commit e448d8885b
2 changed files with 171 additions and 2 deletions

View File

@ -11,6 +11,7 @@ Le module pré-patching et le patching by-step seront branchés en étape 2/3.
import io
import json
import re
import unicodedata
from datetime import date, datetime, time
from fastapi import APIRouter, Request, Depends, UploadFile, File, Form, Query
from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
@ -37,6 +38,104 @@ def _detect_pct_required(rec: dict) -> bool:
return True
return False
# ────────────────────────────────────────────────────────────────────────
# Normalisation casse / accent / variantes des champs taxonomiques
# (env, domaine) — pour éviter d'avoir 'Production' / 'production' / 'PROD' etc.
# en parallèle dans la BDD.
# ────────────────────────────────────────────────────────────────────────
ENV_CANONICAL = {
# Production
"production": "Production",
"prod": "Production",
"prd": "Production",
# Pré-Prod
"pré-prod": "Pré-Prod",
"pre-prod": "Pré-Prod",
"preprod": "Pré-Prod",
"pre prod": "Pré-Prod",
"pré prod": "Pré-Prod",
"pre-production": "Pré-Prod",
"pré-production": "Pré-Prod",
"preproduction": "Pré-Prod",
"préproduction": "Pré-Prod",
# Recette
"recette": "Recette",
"rec": "Recette",
"recettes": "Recette",
# Test (avec/sans numéro)
"test": "Test",
"tests": "Test",
"test 1": "Test 1",
"test1": "Test 1",
"test_1": "Test 1",
"test 2": "Test 2",
"test2": "Test 2",
"test_2": "Test 2",
# Dev
"développement": "Développement",
"developpement": "Développement",
"dev": "Développement",
"develop": "Développement",
# Qualif
"qualif": "Qualif",
"qualification": "Qualif",
}
DOMAIN_CANONICAL = {
"flux libre": "Flux Libre",
"flux-libre": "Flux Libre",
"fluxlibre": "Flux Libre",
"flux libre": "Flux Libre",
"péage": "Péage",
"peage": "Péage",
"infrastructure": "Infrastructure",
"infra": "Infrastructure",
"dmz": "DMZ",
"lan": "LAN",
"trafic": "Trafic",
"traffic": "Trafic",
"gestion": "Gestion",
"bi": "BI",
"emv": "EMV",
}
def _strip_accents_lower(s: str) -> str:
nfkd = unicodedata.normalize("NFKD", s)
return "".join(c for c in nfkd if not unicodedata.combining(c)).lower()
def _canonicalize(v, mapping: dict):
"""Renvoie la forme canonique si v matche (case + accents insensitive),
sinon renvoie v inchangé (en strip).
Lookup en 2 passes : 1) lowercase exact, 2) sans accents."""
if v is None:
return None
s = str(v).strip()
if not s:
return s
low = s.lower()
if low in mapping:
return mapping[low]
no_acc = _strip_accents_lower(s)
if no_acc in mapping:
return mapping[no_acc]
# Aussi : compaction des espaces multiples (ex: "Flux Libre" -> "flux libre")
no_acc_compact = re.sub(r"\s+", " ", no_acc)
if no_acc_compact in mapping:
return mapping[no_acc_compact]
return s # pas de canonique connue, on garde tel quel
def _canonicalize_env(v):
return _canonicalize(v, ENV_CANONICAL)
def _canonicalize_domain(v):
return _canonicalize(v, DOMAIN_CANONICAL)
# Colonnes attendues dans les feuilles Sxx (ordre = priorité, on matche par regex/lower)
# Le fichier 2026 a 12 variantes d'en-têtes selon la semaine
# (ancien format S02-S06, nouveau format DTS S07+)
@ -554,8 +653,8 @@ async def import_upload(request: Request, db=Depends(get_db),
"imp": import_id, "sn": sheet_name, "wn": week_num, "ri": rec["row_index"],
"an": asset_str,
"it": str(rec.get("intervenant")) if rec.get("intervenant") else None,
"en": str(rec.get("environnement")) if rec.get("environnement") else None,
"do": str(rec.get("domaine")) if rec.get("domaine") else None,
"en": _canonicalize_env(rec.get("environnement")) if rec.get("environnement") else None,
"do": _canonicalize_domain(rec.get("domaine")) if rec.get("domaine") else None,
"os": str(rec.get("os")) if rec.get("os") else None,
"ov": str(rec.get("os_version")) if rec.get("os_version") else None,
"ap": str(rec.get("application_name")) if rec.get("application_name") else None,

View File

@ -0,0 +1,70 @@
-- One-shot : normalise les valeurs env/domaine existantes vers les formes canoniques
-- (cf table _canonicalize_env / _canonicalize_domain dans planning_import.py).
-- À jouer une fois après pull du code qui ajoute la canonicalisation à l'import.
-- Idempotent : on peut le rejouer sans effet de bord.
-- ─── Environnements ──────────────────────────────────────────
UPDATE patch_planning_import_rows SET environnement = 'Production'
WHERE LOWER(environnement) IN ('production','prod','prd')
AND environnement != 'Production';
UPDATE patch_planning_import_rows SET environnement = 'Pré-Prod'
WHERE LOWER(REGEXP_REPLACE(environnement, '\s+', ' ', 'g')) IN
('pré-prod','pre-prod','preprod','pre prod','pré prod',
'pre-production','pré-production','preproduction','préproduction')
AND environnement != 'Pré-Prod';
UPDATE patch_planning_import_rows SET environnement = 'Recette'
WHERE LOWER(environnement) IN ('recette','rec','recettes')
AND environnement != 'Recette';
UPDATE patch_planning_import_rows SET environnement = 'Test'
WHERE LOWER(environnement) IN ('test','tests')
AND environnement != 'Test';
UPDATE patch_planning_import_rows SET environnement = 'Test 1'
WHERE LOWER(REPLACE(REPLACE(environnement, '_', ' '), ' ', ' ')) IN ('test 1','test1')
AND environnement != 'Test 1';
UPDATE patch_planning_import_rows SET environnement = 'Test 2'
WHERE LOWER(REPLACE(REPLACE(environnement, '_', ' '), ' ', ' ')) IN ('test 2','test2')
AND environnement != 'Test 2';
UPDATE patch_planning_import_rows SET environnement = 'Développement'
WHERE LOWER(environnement) IN ('développement','developpement','dev','develop')
AND environnement != 'Développement';
UPDATE patch_planning_import_rows SET environnement = 'Qualif'
WHERE LOWER(environnement) IN ('qualif','qualification')
AND environnement != 'Qualif';
-- ─── Domaines ────────────────────────────────────────────────
UPDATE patch_planning_import_rows SET domaine = 'Flux Libre'
WHERE LOWER(REGEXP_REPLACE(domaine, '\s+', ' ', 'g')) IN ('flux libre','flux-libre','fluxlibre')
AND domaine != 'Flux Libre';
UPDATE patch_planning_import_rows SET domaine = 'Péage'
WHERE LOWER(domaine) IN ('peage','péage','peagé','pèage')
AND domaine != 'Péage';
UPDATE patch_planning_import_rows SET domaine = 'Infrastructure'
WHERE LOWER(domaine) IN ('infrastructure','infra')
AND domaine != 'Infrastructure';
UPDATE patch_planning_import_rows SET domaine = 'Trafic'
WHERE LOWER(domaine) IN ('trafic','traffic')
AND domaine != 'Trafic';
UPDATE patch_planning_import_rows SET domaine = UPPER(domaine)
WHERE LOWER(domaine) IN ('dmz','lan','bi','emv')
AND domaine != UPPER(domaine);
UPDATE patch_planning_import_rows SET domaine = 'Gestion'
WHERE LOWER(domaine) = 'gestion' AND domaine != 'Gestion';
-- ─── Vérification ───────────────────────────────────────────
-- SELECT environnement, COUNT(*) FROM patch_planning_import_rows GROUP BY 1 ORDER BY 1;
-- SELECT domaine, COUNT(*) FROM patch_planning_import_rows GROUP BY 1 ORDER BY 1;