Add fill_domaine_from_weekly: extrait servers.domaine depuis col D des sheets S02..S16

This commit is contained in:
Pierre & Lumière 2026-04-15 00:07:55 +02:00
parent ca4f779e48
commit 1cc8d42e4a

View File

@ -0,0 +1,125 @@
"""Remplit servers.domaine depuis la colonne Domaine (col D, index 3) des sheets hebdo.
Agrege les occurrences sur S02..S16 (configurable), retient le domaine majoritaire
par hostname, puis UPDATE servers.domaine (uniquement si vide sauf --overwrite).
Usage:
python tools/fill_domaine_from_weekly.py <xlsx> [--from 2] [--to 16] [--dry-run] [--overwrite]
"""
import os
import re
import argparse
import unicodedata
from collections import defaultdict
from sqlalchemy import create_engine, text
try:
import openpyxl
except ImportError:
print("[ERR] pip install openpyxl")
raise
DATABASE_URL = os.getenv("DATABASE_URL_DEMO") or os.getenv("DATABASE_URL") \
or "postgresql://patchcenter:PatchCenter2026!@localhost:5432/patchcenter_demo"
def clean(v):
if v is None:
return None
s = str(v).replace("\xa0", " ").strip()
return s or None
def norm_key(s):
"""lowercase sans accent pour dedup 'Flux Libre' vs 'flux libre'."""
if not s:
return ""
nfkd = unicodedata.normalize("NFKD", s.strip())
return "".join(c for c in nfkd if not unicodedata.combining(c)).lower()
# Canonical mapping (meme logique que cleanup_referentiel)
CANONICAL = {
"flux libre": "Flux Libre",
"peage": "Péage",
"bi": "BI",
"infrastructure": "Infrastructure",
"gestion": "Gestion",
"emv": "EMV",
"trafic": "Trafic",
"dmz": "DMZ",
}
def canonicalize(name):
k = norm_key(name).rstrip("s") # Peages -> peage
return CANONICAL.get(k, name)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("xlsx_path")
parser.add_argument("--from", dest="s_from", type=int, default=2)
parser.add_argument("--to", dest="s_to", type=int, default=16)
parser.add_argument("--dry-run", action="store_true")
parser.add_argument("--overwrite", action="store_true")
args = parser.parse_args()
engine = create_engine(DATABASE_URL)
conn = engine.connect().execution_options(isolation_level="AUTOCOMMIT")
wb = openpyxl.load_workbook(args.xlsx_path, data_only=True)
# Agrege : hostname -> { domaine_canonique: count }
scores = defaultdict(lambda: defaultdict(int))
sheets_done = 0
for wk in range(args.s_from, args.s_to + 1):
sname = f"S{wk:02d}"
if sname not in wb.sheetnames:
continue
ws = wb[sname]
sheets_done += 1
added = 0
for row in ws.iter_rows(min_row=2, values_only=True):
host = clean(row[0]) if len(row) > 0 else None
dom = clean(row[3]) if len(row) > 3 else None
if not host or not dom:
continue
host = host.split(".")[0].lower()
if not any(c.isalpha() for c in host):
continue
scores[host][canonicalize(dom)] += 1
added += 1
print(f" [{sname}] +{added}")
print(f"[INFO] {sheets_done} sheets lues, {len(scores)} hostnames uniques")
# Update servers
stats = {"updated": 0, "unchanged": 0, "no_server": 0}
for host, doms in scores.items():
best, _ = max(doms.items(), key=lambda x: x[1])
srv = conn.execute(text("SELECT id, domaine FROM servers WHERE hostname=:h"),
{"h": host}).fetchone()
if not srv:
stats["no_server"] += 1
continue
if srv.domaine and not args.overwrite:
stats["unchanged"] += 1
continue
if srv.domaine == best:
stats["unchanged"] += 1
continue
if args.dry_run:
print(f" DRY: {host:25s} {srv.domaine or 'NULL'} -> {best}")
else:
conn.execute(text("UPDATE servers SET domaine=:d WHERE id=:sid"),
{"d": best, "sid": srv.id})
stats["updated"] += 1
conn.close()
print(f"\n[DONE] Maj: {stats['updated']} | Inchanges: {stats['unchanged']} "
f"| Hors base: {stats['no_server']}")
if __name__ == "__main__":
main()