"""Service pré-patching : vérifications avant lancement du patch. Architecture extensible : un dict `CHECKS` mappe `name -> callable`. Chaque check prend `(ctx)` et renvoie un dict : {"name": str, "label": str, "status": "ok"|"warn"|"ko", "message": str, "details": str} `ctx` est un dict : { "hostname": str, # nom court ex 'vpdsiawik1' "target": str|None, # FQDN résolu (None si DNS KO) "client": SSHClient|None, # paramiko ouvert ou None "row": dict, # ligne du planning Excel (pour ctxe additionnel) } Les checks sont indépendants : un check peut tourner même si un autre a échoué. La fonction `run_all_checks(hostname, row)` orchestre l'enchaînement et calcule un verdict global. """ from __future__ import annotations import logging import re import socket import time from typing import Callable, Dict, List, Any from .realtime_audit_service import _resolve, _connect, PARAMIKO_OK log = logging.getLogger("patchcenter.prepatch") # Timeout par commande SSH EXEC_TIMEOUT = 15 # Seuils espace disque (en Mo) DISK_MIN_ROOT_MB = 1500 # 1.5 Go sur / DISK_MIN_VARLOG_MB = 1000 # 1 Go sur /var/log # Satellites SANEF par zone réseau SATELLITE_LAN = "vpdsiasat2.sanef.groupe" SATELLITE_DMZ = "vpdsiasat1.sanef.groupe" def _pick_satellites(row: Dict[str, Any]) -> List[str]: """Renvoie la liste ordonnée des Satellites à tester. Priorité : 1) servers.satellite_url renseigné en BDD, 2) DMZ d'abord si la colonne Domaine contient 'DMZ', 3) LAN d'abord par défaut. L'autre satellite est toujours ajouté en fallback.""" forced = (row.get("satellite_url") or "").strip() domaine = str(row.get("domaine") or "").upper() if forced: # Override : on commence par celui en BDD, fallback sur l'autre other = SATELLITE_DMZ if forced == SATELLITE_LAN else SATELLITE_LAN return [forced] if forced not in (SATELLITE_LAN, SATELLITE_DMZ) else [forced, other] if "DMZ" in domaine: return [SATELLITE_DMZ, SATELLITE_LAN] return [SATELLITE_LAN, SATELLITE_DMZ] def _exec(client, cmd: str) -> Dict[str, Any]: """Exécute une commande SSH et renvoie {rc, stdout, stderr}.""" try: stdin, stdout, stderr = client.exec_command(cmd, timeout=EXEC_TIMEOUT) out = stdout.read().decode("utf-8", "replace").strip() err = stderr.read().decode("utf-8", "replace").strip() rc = stdout.channel.recv_exit_status() return {"rc": rc, "stdout": out, "stderr": err} except Exception as e: return {"rc": -1, "stdout": "", "stderr": f"exec error: {e}"} # ──────────────────────────────────────────────────────────────────────── # Checks individuels # ──────────────────────────────────────────────────────────────────────── def check_dns(ctx: Dict[str, Any]) -> Dict[str, Any]: """Résolution DNS du hostname (nom court → FQDN connu via base ou suffixes).""" hostname = ctx["hostname"] target = ctx.get("target") if target: return { "name": "dns", "label": "Résolution DNS", "status": "ok", "message": f"{hostname} → {target}", "details": "", } # Si _resolve a échoué, on retente directement gethostbyname pour récupérer une IP try: ip = socket.gethostbyname(hostname) return { "name": "dns", "label": "Résolution DNS", "status": "warn", "message": f"{hostname} → {ip} (FQDN non confirmé)", "details": "Aucun FQDN en base et aucun suffixe SANEF ne répond sur :22.", } except Exception as e: return { "name": "dns", "label": "Résolution DNS", "status": "ko", "message": "Impossible de résoudre le hostname", "details": str(e), } def check_ssh(ctx: Dict[str, Any]) -> Dict[str, Any]: """Vérifie qu'on a une session SSH ouverte (déjà tentée dans run_all_checks).""" if ctx.get("client") is not None: return { "name": "ssh", "label": "Connexion SSH", "status": "ok", "message": f"Connecté à {ctx.get('target')}", "details": "", } if not PARAMIKO_OK: return { "name": "ssh", "label": "Connexion SSH", "status": "ko", "message": "paramiko non disponible côté serveur PatchCenter", "details": "", } if not ctx.get("target"): return { "name": "ssh", "label": "Connexion SSH", "status": "ko", "message": "Pas de cible (DNS KO en amont)", "details": "", } err = (ctx.get("ssh_error") or "").strip() target = ctx.get("target") or "?" # Classification du type d'erreur (pour message actionnable) err_low = err.lower() if "no route to host" in err_low or "network is unreachable" in err_low: msg = f"Réseau injoignable ({target}) — vérifier routage/firewall" elif "connection timed out" in err_low or "timed out" in err_low: msg = f"Timeout connexion vers {target} — port SSH 22 filtré ou hôte down" elif "connection refused" in err_low: msg = f"Port 22 refusé sur {target} — sshd arrêté ou bloqué" elif "no matching" in err_low and ("kex" in err_low or "key exchange" in err_low or "host key" in err_low): msg = f"Algos KEX incompatibles avec {target} — durcissement SSH" elif "host key" in err_low or "hostkey" in err_low: msg = f"Host key inconnue/changée pour {target} — known_hosts ?" elif "permission denied" in err_low or "authentication failed" in err_low: msg = f"Authentification refusée par {target} — vérifier user/clé/password" elif "no authentication methods" in err_low: msg = f"Aucune méthode d'auth acceptée par {target}" elif "name or service not known" in err_low or "could not resolve" in err_low: msg = f"DNS échoué côté SSH ({target})" elif err: msg = f"Échec SSH vers {target}" else: msg = f"Échec connexion SSH vers {target} (raison inconnue)" details = (err or "Pas d'exception capturée") + \ "\nVérifier ssh_method/clé/PSMP/mot de passe dans Settings (section SSH)." return { "name": "ssh", "label": "Connexion SSH", "status": "ko", "message": msg, "details": details, } def _disk_avail_mb(client, path: str): """Renvoie l'espace dispo en Mo sur le FS contenant `path`, ou None si KO.""" r = _exec(client, f"sudo -n df -BM --output=avail {path} 2>&1 | tail -n +2") out = (r["stdout"] or "").strip() m = re.search(r"(\d+)\s*M", out) if m: return int(m.group(1)) return None def check_disk(ctx: Dict[str, Any]) -> Dict[str, Any]: """Vérifie l'espace disque dispo : - / >= 1.5 Go - /var/log >= 1 Go KO si insuffisant → pas éligible au snapshot. """ client = ctx.get("client") if client is None: return { "name": "disk", "label": f"Espace disque (/ ≥ {DISK_MIN_ROOT_MB}M, /var/log ≥ {DISK_MIN_VARLOG_MB}M)", "status": "ko", "message": "SSH KO en amont", "details": "", } root_mb = _disk_avail_mb(client, "/") var_mb = _disk_avail_mb(client, "/var/log") issues = [] parts = [] if root_mb is None: issues.append("/ : mesure impossible") else: parts.append(f"/ {root_mb}M") if root_mb < DISK_MIN_ROOT_MB: issues.append(f"/ {root_mb}M < min {DISK_MIN_ROOT_MB}M") if var_mb is None: issues.append("/var/log : mesure impossible") else: parts.append(f"/var/log {var_mb}M") if var_mb < DISK_MIN_VARLOG_MB: issues.append(f"/var/log {var_mb}M < min {DISK_MIN_VARLOG_MB}M") label = f"Espace disque (/ ≥ {DISK_MIN_ROOT_MB}M, /var/log ≥ {DISK_MIN_VARLOG_MB}M)" details = ( f"$ sudo df -BM --output=avail / → {root_mb if root_mb is not None else 'N/A'}M\n" f"$ sudo df -BM --output=avail /var/log → {var_mb if var_mb is not None else 'N/A'}M" ) if issues: return { "name": "disk", "label": label, "status": "ko", "message": " · ".join(issues), "details": details, } return { "name": "disk", "label": label, "status": "ok", "message": " · ".join(parts) + " (au-dessus seuils)", "details": details, } def check_satellite(ctx: Dict[str, Any]) -> Dict[str, Any]: """Vérifie : 1. la joignabilité d'un Satellite (LAN d'abord, fallback DMZ — ou inversement si serveur DMZ) 2. l'inscription du serveur (subscription-manager identity) 3. l'accès aux repos (yum repolist enabled --quiet) Toutes les commandes utilisent sudo -n. """ client = ctx.get("client") satellites = _pick_satellites(ctx.get("row") or {}) preferred = satellites[0] label = f"Satellite (préféré: {preferred})" if client is None: return { "name": "satellite", "label": label, "status": "ko", "message": "SSH KO en amont", "details": "", } # 1) Joignabilité : on tente chaque satellite jusqu'au premier qui répond sat_reached = None sat_attempts = [] for sat in satellites: r0 = _exec(client, f"sudo -n curl -k -s -o /dev/null -w '%{{http_code}}' " f"--max-time 5 https://{sat}/pub/ 2>&1") http_code = (r0["stdout"] or "").strip() sat_attempts.append(f"{sat} → HTTP {http_code or 'N/A'}") if http_code in ("200", "301", "302", "403"): sat_reached = sat break sat_reachable = sat_reached is not None # 2) subscription-manager identity (locale-indépendant via UUID) r1 = _exec(client, "sudo -n subscription-manager identity 2>&1") sub_ok = (r1["rc"] == 0 and bool(re.search( r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b", r1["stdout"], re.IGNORECASE))) # 3) yum repolist enabled --quiet r2 = _exec(client, "sudo -n yum repolist enabled --quiet 2>&1 | head -50") repolist_ok = (r2["rc"] == 0 and r2["stdout"].strip() != "") details = ( "$ curl tests :\n " + "\n ".join(sat_attempts) + "\n" f"$ sudo subscription-manager identity →\n{r1['stdout']}\n{r1['stderr']}\n" f"---\n" f"$ sudo yum repolist enabled --quiet (head -50) →\n{r2['stdout']}\n{r2['stderr']}" )[:2500] if sat_reachable and sub_ok and repolist_ok: nb = sum(1 for ln in r2["stdout"].splitlines() if ln and not ln.lower().startswith(("repo id", "loaded plugins", "updating subscription", "this system"))) msg = f"{sat_reached} joignable · système enregistré · ~{nb} repo(s)" if sat_reached != preferred: msg += f" — fallback depuis {preferred}" return {"name": "satellite", "label": label, "status": "ok", "message": msg, "details": details} issues = [] if not sat_reachable: tried = ", ".join(s for s in satellites) issues.append(f"Aucun Satellite joignable (testés: {tried})") if not sub_ok: issues.append("subscription-manager identity KO") if not repolist_ok: issues.append("yum repolist vide / KO") status = "ko" if (not sat_reachable or not repolist_ok) else "warn" return {"name": "satellite", "label": label, "status": status, "message": " · ".join(issues), "details": details} # ──────────────────────────────────────────────────────────────────────── # Registre extensible # ──────────────────────────────────────────────────────────────────────── CHECKS: Dict[str, Callable[[Dict[str, Any]], Dict[str, Any]]] = { "dns": check_dns, "ssh": check_ssh, "disk": check_disk, "satellite": check_satellite, } def register_check(name: str, fn: Callable): """Enregistre un check supplémentaire (pour extension future).""" CHECKS[name] = fn # ──────────────────────────────────────────────────────────────────────── # Orchestration # ──────────────────────────────────────────────────────────────────────── def run_all_checks(hostname: str, row: Dict[str, Any] | None = None, only: List[str] | None = None) -> Dict[str, Any]: """Exécute la séquence de checks pour 1 serveur. Args: hostname: nom court ex 'vpdsiawik1' row: dict optionnel d'éléments du planning (pour ctxe additionnel) only: liste de noms de checks à lancer (par défaut tous) Returns: { "hostname": str, "target": str|None, "checks": [check_result, ...], "overall": "ok" | "warn" | "ko" } """ t0 = time.time() only_set = set(only) if only else None target = _resolve(hostname) client = None ssh_error = None ssh_method = None if target and PARAMIKO_OK: try: client = _connect(target, hostname) # _connect peut renvoyer un tuple (client, method) selon implem ; fallback : if isinstance(client, tuple) and len(client) >= 1: ssh_method = client[1] if len(client) > 1 else None client = client[0] except Exception as e: log.warning(f"_connect raised on {hostname}: {e}") ssh_error = f"{type(e).__name__}: {e}" client = None ctx = {"hostname": hostname, "target": target, "client": client, "row": row or {}, "ssh_error": ssh_error, "ssh_method": ssh_method} results = [] for name, fn in CHECKS.items(): if only_set is not None and name not in only_set: continue try: r = fn(ctx) except Exception as e: r = {"name": name, "label": name, "status": "ko", "message": f"Exception: {e}", "details": ""} results.append(r) if client is not None: try: client.close() except Exception: pass # Verdict global : ok si tous OK ; warn si au moins un warn et aucun ko ; ko sinon statuses = [r["status"] for r in results] if all(s == "ok" for s in statuses): overall = "ok" elif any(s == "ko" for s in statuses): overall = "ko" else: overall = "warn" return { "hostname": hostname, "target": target, "checks": results, "overall": overall, "duration_ms": int((time.time() - t0) * 1000), }