"""Remplit servers.domaine depuis la colonne Domaine (col D, index 3) des sheets hebdo. Agrege les occurrences sur S02..S16 (configurable), retient le domaine majoritaire par hostname, puis UPDATE servers.domaine (uniquement si vide sauf --overwrite). Usage: python tools/fill_domaine_from_weekly.py [--from 2] [--to 16] [--dry-run] [--overwrite] """ import os import re import argparse import unicodedata from collections import defaultdict from sqlalchemy import create_engine, text try: import openpyxl except ImportError: print("[ERR] pip install openpyxl") raise DATABASE_URL = os.getenv("DATABASE_URL_DEMO") or os.getenv("DATABASE_URL") \ or "postgresql://patchcenter:PatchCenter2026!@localhost:5432/patchcenter_demo" def clean(v): if v is None: return None s = str(v).replace("\xa0", " ").strip() return s or None def norm_key(s): """lowercase sans accent pour dedup 'Flux Libre' vs 'flux libre'.""" if not s: return "" nfkd = unicodedata.normalize("NFKD", s.strip()) return "".join(c for c in nfkd if not unicodedata.combining(c)).lower() # Canonical mapping (meme logique que cleanup_referentiel) CANONICAL = { "flux libre": "Flux Libre", "peage": "Péage", "bi": "BI", "infrastructure": "Infrastructure", "gestion": "Gestion", "emv": "EMV", "trafic": "Trafic", "dmz": "DMZ", } def canonicalize(name): k = norm_key(name).rstrip("s") # Peages -> peage return CANONICAL.get(k, name) def main(): parser = argparse.ArgumentParser() parser.add_argument("xlsx_path") parser.add_argument("--from", dest="s_from", type=int, default=2) parser.add_argument("--to", dest="s_to", type=int, default=16) parser.add_argument("--dry-run", action="store_true") parser.add_argument("--overwrite", action="store_true") args = parser.parse_args() engine = create_engine(DATABASE_URL) conn = engine.connect().execution_options(isolation_level="AUTOCOMMIT") wb = openpyxl.load_workbook(args.xlsx_path, data_only=True) # Agrege : hostname -> { domaine_canonique: count } scores = defaultdict(lambda: defaultdict(int)) sheets_done = 0 for wk in range(args.s_from, args.s_to + 1): sname = f"S{wk:02d}" if sname not in wb.sheetnames: continue ws = wb[sname] sheets_done += 1 added = 0 for row in ws.iter_rows(min_row=2, values_only=True): host = clean(row[0]) if len(row) > 0 else None dom = clean(row[3]) if len(row) > 3 else None if not host or not dom: continue host = host.split(".")[0].lower() if not any(c.isalpha() for c in host): continue scores[host][canonicalize(dom)] += 1 added += 1 print(f" [{sname}] +{added}") print(f"[INFO] {sheets_done} sheets lues, {len(scores)} hostnames uniques") # Update servers stats = {"updated": 0, "unchanged": 0, "no_server": 0} for host, doms in scores.items(): best, _ = max(doms.items(), key=lambda x: x[1]) srv = conn.execute(text("SELECT id, domaine FROM servers WHERE hostname=:h"), {"h": host}).fetchone() if not srv: stats["no_server"] += 1 continue if srv.domaine and not args.overwrite: stats["unchanged"] += 1 continue if srv.domaine == best: stats["unchanged"] += 1 continue if args.dry_run: print(f" DRY: {host:25s} {srv.domaine or 'NULL'} -> {best}") else: conn.execute(text("UPDATE servers SET domaine=:d WHERE id=:sid"), {"d": best, "sid": srv.id}) stats["updated"] += 1 conn.close() print(f"\n[DONE] Maj: {stats['updated']} | Inchanges: {stats['unchanged']} " f"| Hors base: {stats['no_server']}") if __name__ == "__main__": main()