Add fill_domaine_from_weekly: extrait servers.domaine depuis col D des sheets S02..S16

2026-04-15 00:07:55 +02:00 · 2026-04-15 00:07:55 +02:00 · 1cc8d42e4a
commit 1cc8d42e4a
parent ca4f779e48
1 changed files with 125 additions and 0 deletions
--- a/tools/fill_domaine_from_weekly.py
+++ b/tools/fill_domaine_from_weekly.py
@ -0,0 +1,125 @@
+"""Remplit servers.domaine depuis la colonne Domaine (col D, index 3) des sheets hebdo.
+
+Agrege les occurrences sur S02..S16 (configurable), retient le domaine majoritaire
+par hostname, puis UPDATE servers.domaine (uniquement si vide sauf --overwrite).
+
+Usage:
+    python tools/fill_domaine_from_weekly.py <xlsx> [--from 2] [--to 16] [--dry-run] [--overwrite]
+"""
+import os
+import re
+import argparse
+import unicodedata
+from collections import defaultdict
+from sqlalchemy import create_engine, text
+
+try:
+    import openpyxl
+except ImportError:
+    print("[ERR] pip install openpyxl")
+    raise
+
+DATABASE_URL = os.getenv("DATABASE_URL_DEMO") or os.getenv("DATABASE_URL") \
+    or "postgresql://patchcenter:PatchCenter2026!@localhost:5432/patchcenter_demo"
+
+
+def clean(v):
+    if v is None:
+        return None
+    s = str(v).replace("\xa0", " ").strip()
+    return s or None
+
+
+def norm_key(s):
+    """lowercase sans accent pour dedup 'Flux Libre' vs 'flux libre'."""
+    if not s:
+        return ""
+    nfkd = unicodedata.normalize("NFKD", s.strip())
+    return "".join(c for c in nfkd if not unicodedata.combining(c)).lower()
+
+
+# Canonical mapping (meme logique que cleanup_referentiel)
+CANONICAL = {
+    "flux libre": "Flux Libre",
+    "peage": "Péage",
+    "bi": "BI",
+    "infrastructure": "Infrastructure",
+    "gestion": "Gestion",
+    "emv": "EMV",
+    "trafic": "Trafic",
+    "dmz": "DMZ",
+}
+
+
+def canonicalize(name):
+    k = norm_key(name).rstrip("s")  # Peages -> peage
+    return CANONICAL.get(k, name)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("xlsx_path")
+    parser.add_argument("--from", dest="s_from", type=int, default=2)
+    parser.add_argument("--to", dest="s_to", type=int, default=16)
+    parser.add_argument("--dry-run", action="store_true")
+    parser.add_argument("--overwrite", action="store_true")
+    args = parser.parse_args()
+
+    engine = create_engine(DATABASE_URL)
+    conn = engine.connect().execution_options(isolation_level="AUTOCOMMIT")
+
+    wb = openpyxl.load_workbook(args.xlsx_path, data_only=True)
+
+    # Agrege : hostname -> { domaine_canonique: count }
+    scores = defaultdict(lambda: defaultdict(int))
+
+    sheets_done = 0
+    for wk in range(args.s_from, args.s_to + 1):
+        sname = f"S{wk:02d}"
+        if sname not in wb.sheetnames:
+            continue
+        ws = wb[sname]
+        sheets_done += 1
+        added = 0
+        for row in ws.iter_rows(min_row=2, values_only=True):
+            host = clean(row[0]) if len(row) > 0 else None
+            dom = clean(row[3]) if len(row) > 3 else None
+            if not host or not dom:
+                continue
+            host = host.split(".")[0].lower()
+            if not any(c.isalpha() for c in host):
+                continue
+            scores[host][canonicalize(dom)] += 1
+            added += 1
+        print(f"  [{sname}] +{added}")
+    print(f"[INFO] {sheets_done} sheets lues, {len(scores)} hostnames uniques")
+
+    # Update servers
+    stats = {"updated": 0, "unchanged": 0, "no_server": 0}
+    for host, doms in scores.items():
+        best, _ = max(doms.items(), key=lambda x: x[1])
+        srv = conn.execute(text("SELECT id, domaine FROM servers WHERE hostname=:h"),
+                           {"h": host}).fetchone()
+        if not srv:
+            stats["no_server"] += 1
+            continue
+        if srv.domaine and not args.overwrite:
+            stats["unchanged"] += 1
+            continue
+        if srv.domaine == best:
+            stats["unchanged"] += 1
+            continue
+        if args.dry_run:
+            print(f"  DRY: {host:25s} {srv.domaine or 'NULL'} -> {best}")
+        else:
+            conn.execute(text("UPDATE servers SET domaine=:d WHERE id=:sid"),
+                         {"d": best, "sid": srv.id})
+        stats["updated"] += 1
+
+    conn.close()
+    print(f"\n[DONE] Maj: {stats['updated']} | Inchanges: {stats['unchanged']} "
+          f"| Hors base: {stats['no_server']}")
+
+
+if __name__ == "__main__":
+    main()