126 lines
3.9 KiB
Python
126 lines
3.9 KiB
Python
"""Remplit servers.domaine depuis la colonne Domaine (col D, index 3) des sheets hebdo.
|
|
|
|
Agrege les occurrences sur S02..S16 (configurable), retient le domaine majoritaire
|
|
par hostname, puis UPDATE servers.domaine (uniquement si vide sauf --overwrite).
|
|
|
|
Usage:
|
|
python tools/fill_domaine_from_weekly.py <xlsx> [--from 2] [--to 16] [--dry-run] [--overwrite]
|
|
"""
|
|
import os
|
|
import re
|
|
import argparse
|
|
import unicodedata
|
|
from collections import defaultdict
|
|
from sqlalchemy import create_engine, text
|
|
|
|
try:
|
|
import openpyxl
|
|
except ImportError:
|
|
print("[ERR] pip install openpyxl")
|
|
raise
|
|
|
|
DATABASE_URL = os.getenv("DATABASE_URL_DEMO") or os.getenv("DATABASE_URL") \
|
|
or "postgresql://patchcenter:PatchCenter2026!@localhost:5432/patchcenter_demo"
|
|
|
|
|
|
def clean(v):
|
|
if v is None:
|
|
return None
|
|
s = str(v).replace("\xa0", " ").strip()
|
|
return s or None
|
|
|
|
|
|
def norm_key(s):
|
|
"""lowercase sans accent pour dedup 'Flux Libre' vs 'flux libre'."""
|
|
if not s:
|
|
return ""
|
|
nfkd = unicodedata.normalize("NFKD", s.strip())
|
|
return "".join(c for c in nfkd if not unicodedata.combining(c)).lower()
|
|
|
|
|
|
# Canonical mapping (meme logique que cleanup_referentiel)
|
|
CANONICAL = {
|
|
"flux libre": "Flux Libre",
|
|
"peage": "Péage",
|
|
"bi": "BI",
|
|
"infrastructure": "Infrastructure",
|
|
"gestion": "Gestion",
|
|
"emv": "EMV",
|
|
"trafic": "Trafic",
|
|
"dmz": "DMZ",
|
|
}
|
|
|
|
|
|
def canonicalize(name):
|
|
k = norm_key(name).rstrip("s") # Peages -> peage
|
|
return CANONICAL.get(k, name)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("xlsx_path")
|
|
parser.add_argument("--from", dest="s_from", type=int, default=2)
|
|
parser.add_argument("--to", dest="s_to", type=int, default=16)
|
|
parser.add_argument("--dry-run", action="store_true")
|
|
parser.add_argument("--overwrite", action="store_true")
|
|
args = parser.parse_args()
|
|
|
|
engine = create_engine(DATABASE_URL)
|
|
conn = engine.connect().execution_options(isolation_level="AUTOCOMMIT")
|
|
|
|
wb = openpyxl.load_workbook(args.xlsx_path, data_only=True)
|
|
|
|
# Agrege : hostname -> { domaine_canonique: count }
|
|
scores = defaultdict(lambda: defaultdict(int))
|
|
|
|
sheets_done = 0
|
|
for wk in range(args.s_from, args.s_to + 1):
|
|
sname = f"S{wk:02d}"
|
|
if sname not in wb.sheetnames:
|
|
continue
|
|
ws = wb[sname]
|
|
sheets_done += 1
|
|
added = 0
|
|
for row in ws.iter_rows(min_row=2, values_only=True):
|
|
host = clean(row[0]) if len(row) > 0 else None
|
|
dom = clean(row[3]) if len(row) > 3 else None
|
|
if not host or not dom:
|
|
continue
|
|
host = host.split(".")[0].lower()
|
|
if not any(c.isalpha() for c in host):
|
|
continue
|
|
scores[host][canonicalize(dom)] += 1
|
|
added += 1
|
|
print(f" [{sname}] +{added}")
|
|
print(f"[INFO] {sheets_done} sheets lues, {len(scores)} hostnames uniques")
|
|
|
|
# Update servers
|
|
stats = {"updated": 0, "unchanged": 0, "no_server": 0}
|
|
for host, doms in scores.items():
|
|
best, _ = max(doms.items(), key=lambda x: x[1])
|
|
srv = conn.execute(text("SELECT id, domaine FROM servers WHERE hostname=:h"),
|
|
{"h": host}).fetchone()
|
|
if not srv:
|
|
stats["no_server"] += 1
|
|
continue
|
|
if srv.domaine and not args.overwrite:
|
|
stats["unchanged"] += 1
|
|
continue
|
|
if srv.domaine == best:
|
|
stats["unchanged"] += 1
|
|
continue
|
|
if args.dry_run:
|
|
print(f" DRY: {host:25s} {srv.domaine or 'NULL'} -> {best}")
|
|
else:
|
|
conn.execute(text("UPDATE servers SET domaine=:d WHERE id=:sid"),
|
|
{"d": best, "sid": srv.id})
|
|
stats["updated"] += 1
|
|
|
|
conn.close()
|
|
print(f"\n[DONE] Maj: {stats['updated']} | Inchanges: {stats['unchanged']} "
|
|
f"| Hors base: {stats['no_server']}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|