Add fill_domaine_from_weekly: extrait servers.domaine depuis col D des sheets S02..S16
This commit is contained in:
parent
ca4f779e48
commit
1cc8d42e4a
125
tools/fill_domaine_from_weekly.py
Normal file
125
tools/fill_domaine_from_weekly.py
Normal file
@ -0,0 +1,125 @@
|
||||
"""Remplit servers.domaine depuis la colonne Domaine (col D, index 3) des sheets hebdo.
|
||||
|
||||
Agrege les occurrences sur S02..S16 (configurable), retient le domaine majoritaire
|
||||
par hostname, puis UPDATE servers.domaine (uniquement si vide sauf --overwrite).
|
||||
|
||||
Usage:
|
||||
python tools/fill_domaine_from_weekly.py <xlsx> [--from 2] [--to 16] [--dry-run] [--overwrite]
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import argparse
|
||||
import unicodedata
|
||||
from collections import defaultdict
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
try:
|
||||
import openpyxl
|
||||
except ImportError:
|
||||
print("[ERR] pip install openpyxl")
|
||||
raise
|
||||
|
||||
DATABASE_URL = os.getenv("DATABASE_URL_DEMO") or os.getenv("DATABASE_URL") \
|
||||
or "postgresql://patchcenter:PatchCenter2026!@localhost:5432/patchcenter_demo"
|
||||
|
||||
|
||||
def clean(v):
|
||||
if v is None:
|
||||
return None
|
||||
s = str(v).replace("\xa0", " ").strip()
|
||||
return s or None
|
||||
|
||||
|
||||
def norm_key(s):
|
||||
"""lowercase sans accent pour dedup 'Flux Libre' vs 'flux libre'."""
|
||||
if not s:
|
||||
return ""
|
||||
nfkd = unicodedata.normalize("NFKD", s.strip())
|
||||
return "".join(c for c in nfkd if not unicodedata.combining(c)).lower()
|
||||
|
||||
|
||||
# Canonical mapping (meme logique que cleanup_referentiel)
|
||||
CANONICAL = {
|
||||
"flux libre": "Flux Libre",
|
||||
"peage": "Péage",
|
||||
"bi": "BI",
|
||||
"infrastructure": "Infrastructure",
|
||||
"gestion": "Gestion",
|
||||
"emv": "EMV",
|
||||
"trafic": "Trafic",
|
||||
"dmz": "DMZ",
|
||||
}
|
||||
|
||||
|
||||
def canonicalize(name):
|
||||
k = norm_key(name).rstrip("s") # Peages -> peage
|
||||
return CANONICAL.get(k, name)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("xlsx_path")
|
||||
parser.add_argument("--from", dest="s_from", type=int, default=2)
|
||||
parser.add_argument("--to", dest="s_to", type=int, default=16)
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--overwrite", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
engine = create_engine(DATABASE_URL)
|
||||
conn = engine.connect().execution_options(isolation_level="AUTOCOMMIT")
|
||||
|
||||
wb = openpyxl.load_workbook(args.xlsx_path, data_only=True)
|
||||
|
||||
# Agrege : hostname -> { domaine_canonique: count }
|
||||
scores = defaultdict(lambda: defaultdict(int))
|
||||
|
||||
sheets_done = 0
|
||||
for wk in range(args.s_from, args.s_to + 1):
|
||||
sname = f"S{wk:02d}"
|
||||
if sname not in wb.sheetnames:
|
||||
continue
|
||||
ws = wb[sname]
|
||||
sheets_done += 1
|
||||
added = 0
|
||||
for row in ws.iter_rows(min_row=2, values_only=True):
|
||||
host = clean(row[0]) if len(row) > 0 else None
|
||||
dom = clean(row[3]) if len(row) > 3 else None
|
||||
if not host or not dom:
|
||||
continue
|
||||
host = host.split(".")[0].lower()
|
||||
if not any(c.isalpha() for c in host):
|
||||
continue
|
||||
scores[host][canonicalize(dom)] += 1
|
||||
added += 1
|
||||
print(f" [{sname}] +{added}")
|
||||
print(f"[INFO] {sheets_done} sheets lues, {len(scores)} hostnames uniques")
|
||||
|
||||
# Update servers
|
||||
stats = {"updated": 0, "unchanged": 0, "no_server": 0}
|
||||
for host, doms in scores.items():
|
||||
best, _ = max(doms.items(), key=lambda x: x[1])
|
||||
srv = conn.execute(text("SELECT id, domaine FROM servers WHERE hostname=:h"),
|
||||
{"h": host}).fetchone()
|
||||
if not srv:
|
||||
stats["no_server"] += 1
|
||||
continue
|
||||
if srv.domaine and not args.overwrite:
|
||||
stats["unchanged"] += 1
|
||||
continue
|
||||
if srv.domaine == best:
|
||||
stats["unchanged"] += 1
|
||||
continue
|
||||
if args.dry_run:
|
||||
print(f" DRY: {host:25s} {srv.domaine or 'NULL'} -> {best}")
|
||||
else:
|
||||
conn.execute(text("UPDATE servers SET domaine=:d WHERE id=:sid"),
|
||||
{"d": best, "sid": srv.id})
|
||||
stats["updated"] += 1
|
||||
|
||||
conn.close()
|
||||
print(f"\n[DONE] Maj: {stats['updated']} | Inchanges: {stats['unchanged']} "
|
||||
f"| Hors base: {stats['no_server']}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in New Issue
Block a user