feat(planning_import): canonicalisation env + domaine a l'import (Production/production/PROD -> Production)
- Tables ENV_CANONICAL et DOMAIN_CANONICAL: lookup case+accent insensible - _canonicalize_env / _canonicalize_domain : retournent la forme canonique connue, sinon valeur d'origine inchangee - Applique dans l'INSERT row au moment de l'import - Lookup en 3 passes: 1. lowercase exact 2. lowercase + accents stripped 3. lowercase + accents + espaces normalises (ex 'Flux Libre' -> 'flux libre') Backfill SQL one-shot pour les rows existantes (backfill_canonicalize_env_domain_20260507.sql): - env: Production/Pré-Prod/Recette/Test/Test 1/Test 2/Développement/Qualif - domaine: Flux Libre/Péage/Infrastructure/Trafic/DMZ/LAN/BI/EMV/Gestion - Idempotent
This commit is contained in:
parent
2b57ca3247
commit
e448d8885b
@ -11,6 +11,7 @@ Le module pré-patching et le patching by-step seront branchés en étape 2/3.
|
|||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
import unicodedata
|
||||||
from datetime import date, datetime, time
|
from datetime import date, datetime, time
|
||||||
from fastapi import APIRouter, Request, Depends, UploadFile, File, Form, Query
|
from fastapi import APIRouter, Request, Depends, UploadFile, File, Form, Query
|
||||||
from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
|
from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
|
||||||
@ -37,6 +38,104 @@ def _detect_pct_required(rec: dict) -> bool:
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# ────────────────────────────────────────────────────────────────────────
|
||||||
|
# Normalisation casse / accent / variantes des champs taxonomiques
|
||||||
|
# (env, domaine) — pour éviter d'avoir 'Production' / 'production' / 'PROD' etc.
|
||||||
|
# en parallèle dans la BDD.
|
||||||
|
# ────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
ENV_CANONICAL = {
|
||||||
|
# Production
|
||||||
|
"production": "Production",
|
||||||
|
"prod": "Production",
|
||||||
|
"prd": "Production",
|
||||||
|
# Pré-Prod
|
||||||
|
"pré-prod": "Pré-Prod",
|
||||||
|
"pre-prod": "Pré-Prod",
|
||||||
|
"preprod": "Pré-Prod",
|
||||||
|
"pre prod": "Pré-Prod",
|
||||||
|
"pré prod": "Pré-Prod",
|
||||||
|
"pre-production": "Pré-Prod",
|
||||||
|
"pré-production": "Pré-Prod",
|
||||||
|
"preproduction": "Pré-Prod",
|
||||||
|
"préproduction": "Pré-Prod",
|
||||||
|
# Recette
|
||||||
|
"recette": "Recette",
|
||||||
|
"rec": "Recette",
|
||||||
|
"recettes": "Recette",
|
||||||
|
# Test (avec/sans numéro)
|
||||||
|
"test": "Test",
|
||||||
|
"tests": "Test",
|
||||||
|
"test 1": "Test 1",
|
||||||
|
"test1": "Test 1",
|
||||||
|
"test_1": "Test 1",
|
||||||
|
"test 2": "Test 2",
|
||||||
|
"test2": "Test 2",
|
||||||
|
"test_2": "Test 2",
|
||||||
|
# Dev
|
||||||
|
"développement": "Développement",
|
||||||
|
"developpement": "Développement",
|
||||||
|
"dev": "Développement",
|
||||||
|
"develop": "Développement",
|
||||||
|
# Qualif
|
||||||
|
"qualif": "Qualif",
|
||||||
|
"qualification": "Qualif",
|
||||||
|
}
|
||||||
|
|
||||||
|
DOMAIN_CANONICAL = {
|
||||||
|
"flux libre": "Flux Libre",
|
||||||
|
"flux-libre": "Flux Libre",
|
||||||
|
"fluxlibre": "Flux Libre",
|
||||||
|
"flux libre": "Flux Libre",
|
||||||
|
"péage": "Péage",
|
||||||
|
"peage": "Péage",
|
||||||
|
"infrastructure": "Infrastructure",
|
||||||
|
"infra": "Infrastructure",
|
||||||
|
"dmz": "DMZ",
|
||||||
|
"lan": "LAN",
|
||||||
|
"trafic": "Trafic",
|
||||||
|
"traffic": "Trafic",
|
||||||
|
"gestion": "Gestion",
|
||||||
|
"bi": "BI",
|
||||||
|
"emv": "EMV",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_accents_lower(s: str) -> str:
|
||||||
|
nfkd = unicodedata.normalize("NFKD", s)
|
||||||
|
return "".join(c for c in nfkd if not unicodedata.combining(c)).lower()
|
||||||
|
|
||||||
|
|
||||||
|
def _canonicalize(v, mapping: dict):
|
||||||
|
"""Renvoie la forme canonique si v matche (case + accents insensitive),
|
||||||
|
sinon renvoie v inchangé (en strip).
|
||||||
|
Lookup en 2 passes : 1) lowercase exact, 2) sans accents."""
|
||||||
|
if v is None:
|
||||||
|
return None
|
||||||
|
s = str(v).strip()
|
||||||
|
if not s:
|
||||||
|
return s
|
||||||
|
low = s.lower()
|
||||||
|
if low in mapping:
|
||||||
|
return mapping[low]
|
||||||
|
no_acc = _strip_accents_lower(s)
|
||||||
|
if no_acc in mapping:
|
||||||
|
return mapping[no_acc]
|
||||||
|
# Aussi : compaction des espaces multiples (ex: "Flux Libre" -> "flux libre")
|
||||||
|
no_acc_compact = re.sub(r"\s+", " ", no_acc)
|
||||||
|
if no_acc_compact in mapping:
|
||||||
|
return mapping[no_acc_compact]
|
||||||
|
return s # pas de canonique connue, on garde tel quel
|
||||||
|
|
||||||
|
|
||||||
|
def _canonicalize_env(v):
|
||||||
|
return _canonicalize(v, ENV_CANONICAL)
|
||||||
|
|
||||||
|
|
||||||
|
def _canonicalize_domain(v):
|
||||||
|
return _canonicalize(v, DOMAIN_CANONICAL)
|
||||||
|
|
||||||
# Colonnes attendues dans les feuilles Sxx (ordre = priorité, on matche par regex/lower)
|
# Colonnes attendues dans les feuilles Sxx (ordre = priorité, on matche par regex/lower)
|
||||||
# Le fichier 2026 a 12 variantes d'en-têtes selon la semaine
|
# Le fichier 2026 a 12 variantes d'en-têtes selon la semaine
|
||||||
# (ancien format S02-S06, nouveau format DTS S07+)
|
# (ancien format S02-S06, nouveau format DTS S07+)
|
||||||
@ -554,8 +653,8 @@ async def import_upload(request: Request, db=Depends(get_db),
|
|||||||
"imp": import_id, "sn": sheet_name, "wn": week_num, "ri": rec["row_index"],
|
"imp": import_id, "sn": sheet_name, "wn": week_num, "ri": rec["row_index"],
|
||||||
"an": asset_str,
|
"an": asset_str,
|
||||||
"it": str(rec.get("intervenant")) if rec.get("intervenant") else None,
|
"it": str(rec.get("intervenant")) if rec.get("intervenant") else None,
|
||||||
"en": str(rec.get("environnement")) if rec.get("environnement") else None,
|
"en": _canonicalize_env(rec.get("environnement")) if rec.get("environnement") else None,
|
||||||
"do": str(rec.get("domaine")) if rec.get("domaine") else None,
|
"do": _canonicalize_domain(rec.get("domaine")) if rec.get("domaine") else None,
|
||||||
"os": str(rec.get("os")) if rec.get("os") else None,
|
"os": str(rec.get("os")) if rec.get("os") else None,
|
||||||
"ov": str(rec.get("os_version")) if rec.get("os_version") else None,
|
"ov": str(rec.get("os_version")) if rec.get("os_version") else None,
|
||||||
"ap": str(rec.get("application_name")) if rec.get("application_name") else None,
|
"ap": str(rec.get("application_name")) if rec.get("application_name") else None,
|
||||||
|
|||||||
70
backfill_canonicalize_env_domain_20260507.sql
Normal file
70
backfill_canonicalize_env_domain_20260507.sql
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
-- One-shot : normalise les valeurs env/domaine existantes vers les formes canoniques
|
||||||
|
-- (cf table _canonicalize_env / _canonicalize_domain dans planning_import.py).
|
||||||
|
-- À jouer une fois après pull du code qui ajoute la canonicalisation à l'import.
|
||||||
|
-- Idempotent : on peut le rejouer sans effet de bord.
|
||||||
|
|
||||||
|
-- ─── Environnements ──────────────────────────────────────────
|
||||||
|
|
||||||
|
UPDATE patch_planning_import_rows SET environnement = 'Production'
|
||||||
|
WHERE LOWER(environnement) IN ('production','prod','prd')
|
||||||
|
AND environnement != 'Production';
|
||||||
|
|
||||||
|
UPDATE patch_planning_import_rows SET environnement = 'Pré-Prod'
|
||||||
|
WHERE LOWER(REGEXP_REPLACE(environnement, '\s+', ' ', 'g')) IN
|
||||||
|
('pré-prod','pre-prod','preprod','pre prod','pré prod',
|
||||||
|
'pre-production','pré-production','preproduction','préproduction')
|
||||||
|
AND environnement != 'Pré-Prod';
|
||||||
|
|
||||||
|
UPDATE patch_planning_import_rows SET environnement = 'Recette'
|
||||||
|
WHERE LOWER(environnement) IN ('recette','rec','recettes')
|
||||||
|
AND environnement != 'Recette';
|
||||||
|
|
||||||
|
UPDATE patch_planning_import_rows SET environnement = 'Test'
|
||||||
|
WHERE LOWER(environnement) IN ('test','tests')
|
||||||
|
AND environnement != 'Test';
|
||||||
|
|
||||||
|
UPDATE patch_planning_import_rows SET environnement = 'Test 1'
|
||||||
|
WHERE LOWER(REPLACE(REPLACE(environnement, '_', ' '), ' ', ' ')) IN ('test 1','test1')
|
||||||
|
AND environnement != 'Test 1';
|
||||||
|
|
||||||
|
UPDATE patch_planning_import_rows SET environnement = 'Test 2'
|
||||||
|
WHERE LOWER(REPLACE(REPLACE(environnement, '_', ' '), ' ', ' ')) IN ('test 2','test2')
|
||||||
|
AND environnement != 'Test 2';
|
||||||
|
|
||||||
|
UPDATE patch_planning_import_rows SET environnement = 'Développement'
|
||||||
|
WHERE LOWER(environnement) IN ('développement','developpement','dev','develop')
|
||||||
|
AND environnement != 'Développement';
|
||||||
|
|
||||||
|
UPDATE patch_planning_import_rows SET environnement = 'Qualif'
|
||||||
|
WHERE LOWER(environnement) IN ('qualif','qualification')
|
||||||
|
AND environnement != 'Qualif';
|
||||||
|
|
||||||
|
-- ─── Domaines ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
UPDATE patch_planning_import_rows SET domaine = 'Flux Libre'
|
||||||
|
WHERE LOWER(REGEXP_REPLACE(domaine, '\s+', ' ', 'g')) IN ('flux libre','flux-libre','fluxlibre')
|
||||||
|
AND domaine != 'Flux Libre';
|
||||||
|
|
||||||
|
UPDATE patch_planning_import_rows SET domaine = 'Péage'
|
||||||
|
WHERE LOWER(domaine) IN ('peage','péage','peagé','pèage')
|
||||||
|
AND domaine != 'Péage';
|
||||||
|
|
||||||
|
UPDATE patch_planning_import_rows SET domaine = 'Infrastructure'
|
||||||
|
WHERE LOWER(domaine) IN ('infrastructure','infra')
|
||||||
|
AND domaine != 'Infrastructure';
|
||||||
|
|
||||||
|
UPDATE patch_planning_import_rows SET domaine = 'Trafic'
|
||||||
|
WHERE LOWER(domaine) IN ('trafic','traffic')
|
||||||
|
AND domaine != 'Trafic';
|
||||||
|
|
||||||
|
UPDATE patch_planning_import_rows SET domaine = UPPER(domaine)
|
||||||
|
WHERE LOWER(domaine) IN ('dmz','lan','bi','emv')
|
||||||
|
AND domaine != UPPER(domaine);
|
||||||
|
|
||||||
|
UPDATE patch_planning_import_rows SET domaine = 'Gestion'
|
||||||
|
WHERE LOWER(domaine) = 'gestion' AND domaine != 'Gestion';
|
||||||
|
|
||||||
|
-- ─── Vérification ───────────────────────────────────────────
|
||||||
|
|
||||||
|
-- SELECT environnement, COUNT(*) FROM patch_planning_import_rows GROUP BY 1 ORDER BY 1;
|
||||||
|
-- SELECT domaine, COUNT(*) FROM patch_planning_import_rows GROUP BY 1 ORDER BY 1;
|
||||||
Loading…
Reference in New Issue
Block a user