- run_all_checks: capture l'exception levee par _connect dans ctx['ssh_error'] - check_ssh: utilise l'erreur reelle pour produire un message classifie: * 'No route to host' -> reseau injoignable * 'Connection timed out' -> port 22 filtre/host down * 'Connection refused' -> sshd arrete/bloque * 'no matching kex/key exchange' -> algos incompatibles * 'host key' -> known_hosts probleme * 'permission denied' / 'authentication failed' -> auth refusee * 'no authentication methods' -> aucune methode acceptee * 'name or service not known' -> DNS KO cote SSH - details inclut le message d'exception complet pour debug
395 lines
15 KiB
Python
395 lines
15 KiB
Python
"""Service pré-patching : vérifications avant lancement du patch.
|
|
|
|
Architecture extensible : un dict `CHECKS` mappe `name -> callable`.
|
|
Chaque check prend `(ctx)` et renvoie un dict :
|
|
{"name": str, "label": str, "status": "ok"|"warn"|"ko", "message": str, "details": str}
|
|
|
|
`ctx` est un dict :
|
|
{
|
|
"hostname": str, # nom court ex 'vpdsiawik1'
|
|
"target": str|None, # FQDN résolu (None si DNS KO)
|
|
"client": SSHClient|None, # paramiko ouvert ou None
|
|
"row": dict, # ligne du planning Excel (pour ctxe additionnel)
|
|
}
|
|
|
|
Les checks sont indépendants : un check peut tourner même si un autre a échoué.
|
|
La fonction `run_all_checks(hostname, row)` orchestre l'enchaînement et
|
|
calcule un verdict global.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import re
|
|
import socket
|
|
import time
|
|
from typing import Callable, Dict, List, Any
|
|
|
|
from .realtime_audit_service import _resolve, _connect, PARAMIKO_OK
|
|
|
|
log = logging.getLogger("patchcenter.prepatch")
|
|
|
|
# Timeout par commande SSH
|
|
EXEC_TIMEOUT = 15
|
|
|
|
# Seuils espace disque (en Mo)
|
|
DISK_MIN_ROOT_MB = 1500 # 1.5 Go sur /
|
|
DISK_MIN_VARLOG_MB = 1000 # 1 Go sur /var/log
|
|
|
|
# Satellites SANEF par zone réseau
|
|
SATELLITE_LAN = "vpdsiasat2.sanef.groupe"
|
|
SATELLITE_DMZ = "vpdsiasat1.sanef.groupe"
|
|
|
|
|
|
def _pick_satellites(row: Dict[str, Any]) -> List[str]:
|
|
"""Renvoie la liste ordonnée des Satellites à tester.
|
|
Priorité : 1) servers.satellite_url renseigné en BDD,
|
|
2) DMZ d'abord si la colonne Domaine contient 'DMZ',
|
|
3) LAN d'abord par défaut.
|
|
L'autre satellite est toujours ajouté en fallback."""
|
|
forced = (row.get("satellite_url") or "").strip()
|
|
domaine = str(row.get("domaine") or "").upper()
|
|
if forced:
|
|
# Override : on commence par celui en BDD, fallback sur l'autre
|
|
other = SATELLITE_DMZ if forced == SATELLITE_LAN else SATELLITE_LAN
|
|
return [forced] if forced not in (SATELLITE_LAN, SATELLITE_DMZ) else [forced, other]
|
|
if "DMZ" in domaine:
|
|
return [SATELLITE_DMZ, SATELLITE_LAN]
|
|
return [SATELLITE_LAN, SATELLITE_DMZ]
|
|
|
|
|
|
def _exec(client, cmd: str) -> Dict[str, Any]:
|
|
"""Exécute une commande SSH et renvoie {rc, stdout, stderr}."""
|
|
try:
|
|
stdin, stdout, stderr = client.exec_command(cmd, timeout=EXEC_TIMEOUT)
|
|
out = stdout.read().decode("utf-8", "replace").strip()
|
|
err = stderr.read().decode("utf-8", "replace").strip()
|
|
rc = stdout.channel.recv_exit_status()
|
|
return {"rc": rc, "stdout": out, "stderr": err}
|
|
except Exception as e:
|
|
return {"rc": -1, "stdout": "", "stderr": f"exec error: {e}"}
|
|
|
|
|
|
# ────────────────────────────────────────────────────────────────────────
|
|
# Checks individuels
|
|
# ────────────────────────────────────────────────────────────────────────
|
|
|
|
def check_dns(ctx: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Résolution DNS du hostname (nom court → FQDN connu via base ou suffixes)."""
|
|
hostname = ctx["hostname"]
|
|
target = ctx.get("target")
|
|
if target:
|
|
return {
|
|
"name": "dns",
|
|
"label": "Résolution DNS",
|
|
"status": "ok",
|
|
"message": f"{hostname} → {target}",
|
|
"details": "",
|
|
}
|
|
# Si _resolve a échoué, on retente directement gethostbyname pour récupérer une IP
|
|
try:
|
|
ip = socket.gethostbyname(hostname)
|
|
return {
|
|
"name": "dns",
|
|
"label": "Résolution DNS",
|
|
"status": "warn",
|
|
"message": f"{hostname} → {ip} (FQDN non confirmé)",
|
|
"details": "Aucun FQDN en base et aucun suffixe SANEF ne répond sur :22.",
|
|
}
|
|
except Exception as e:
|
|
return {
|
|
"name": "dns",
|
|
"label": "Résolution DNS",
|
|
"status": "ko",
|
|
"message": "Impossible de résoudre le hostname",
|
|
"details": str(e),
|
|
}
|
|
|
|
|
|
def check_ssh(ctx: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Vérifie qu'on a une session SSH ouverte (déjà tentée dans run_all_checks)."""
|
|
if ctx.get("client") is not None:
|
|
return {
|
|
"name": "ssh",
|
|
"label": "Connexion SSH",
|
|
"status": "ok",
|
|
"message": f"Connecté à {ctx.get('target')}",
|
|
"details": "",
|
|
}
|
|
if not PARAMIKO_OK:
|
|
return {
|
|
"name": "ssh",
|
|
"label": "Connexion SSH",
|
|
"status": "ko",
|
|
"message": "paramiko non disponible côté serveur PatchCenter",
|
|
"details": "",
|
|
}
|
|
if not ctx.get("target"):
|
|
return {
|
|
"name": "ssh",
|
|
"label": "Connexion SSH",
|
|
"status": "ko",
|
|
"message": "Pas de cible (DNS KO en amont)",
|
|
"details": "",
|
|
}
|
|
err = (ctx.get("ssh_error") or "").strip()
|
|
target = ctx.get("target") or "?"
|
|
|
|
# Classification du type d'erreur (pour message actionnable)
|
|
err_low = err.lower()
|
|
if "no route to host" in err_low or "network is unreachable" in err_low:
|
|
msg = f"Réseau injoignable ({target}) — vérifier routage/firewall"
|
|
elif "connection timed out" in err_low or "timed out" in err_low:
|
|
msg = f"Timeout connexion vers {target} — port SSH 22 filtré ou hôte down"
|
|
elif "connection refused" in err_low:
|
|
msg = f"Port 22 refusé sur {target} — sshd arrêté ou bloqué"
|
|
elif "no matching" in err_low and ("kex" in err_low or "key exchange" in err_low or "host key" in err_low):
|
|
msg = f"Algos KEX incompatibles avec {target} — durcissement SSH"
|
|
elif "host key" in err_low or "hostkey" in err_low:
|
|
msg = f"Host key inconnue/changée pour {target} — known_hosts ?"
|
|
elif "permission denied" in err_low or "authentication failed" in err_low:
|
|
msg = f"Authentification refusée par {target} — vérifier user/clé/password"
|
|
elif "no authentication methods" in err_low:
|
|
msg = f"Aucune méthode d'auth acceptée par {target}"
|
|
elif "name or service not known" in err_low or "could not resolve" in err_low:
|
|
msg = f"DNS échoué côté SSH ({target})"
|
|
elif err:
|
|
msg = f"Échec SSH vers {target}"
|
|
else:
|
|
msg = f"Échec connexion SSH vers {target} (raison inconnue)"
|
|
|
|
details = (err or "Pas d'exception capturée") + \
|
|
"\nVérifier ssh_method/clé/PSMP/mot de passe dans Settings (section SSH)."
|
|
return {
|
|
"name": "ssh",
|
|
"label": "Connexion SSH",
|
|
"status": "ko",
|
|
"message": msg,
|
|
"details": details,
|
|
}
|
|
|
|
|
|
def _disk_avail_mb(client, path: str):
|
|
"""Renvoie l'espace dispo en Mo sur le FS contenant `path`, ou None si KO."""
|
|
r = _exec(client, f"sudo -n df -BM --output=avail {path} 2>&1 | tail -n +2")
|
|
out = (r["stdout"] or "").strip()
|
|
m = re.search(r"(\d+)\s*M", out)
|
|
if m:
|
|
return int(m.group(1))
|
|
return None
|
|
|
|
|
|
def check_disk(ctx: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Vérifie l'espace disque dispo :
|
|
- / >= 1.5 Go
|
|
- /var/log >= 1 Go
|
|
KO si insuffisant → pas éligible au snapshot.
|
|
"""
|
|
client = ctx.get("client")
|
|
if client is None:
|
|
return {
|
|
"name": "disk",
|
|
"label": f"Espace disque (/ ≥ {DISK_MIN_ROOT_MB}M, /var/log ≥ {DISK_MIN_VARLOG_MB}M)",
|
|
"status": "ko",
|
|
"message": "SSH KO en amont",
|
|
"details": "",
|
|
}
|
|
root_mb = _disk_avail_mb(client, "/")
|
|
var_mb = _disk_avail_mb(client, "/var/log")
|
|
issues = []
|
|
parts = []
|
|
if root_mb is None:
|
|
issues.append("/ : mesure impossible")
|
|
else:
|
|
parts.append(f"/ {root_mb}M")
|
|
if root_mb < DISK_MIN_ROOT_MB:
|
|
issues.append(f"/ {root_mb}M < min {DISK_MIN_ROOT_MB}M")
|
|
if var_mb is None:
|
|
issues.append("/var/log : mesure impossible")
|
|
else:
|
|
parts.append(f"/var/log {var_mb}M")
|
|
if var_mb < DISK_MIN_VARLOG_MB:
|
|
issues.append(f"/var/log {var_mb}M < min {DISK_MIN_VARLOG_MB}M")
|
|
label = f"Espace disque (/ ≥ {DISK_MIN_ROOT_MB}M, /var/log ≥ {DISK_MIN_VARLOG_MB}M)"
|
|
details = (
|
|
f"$ sudo df -BM --output=avail / → {root_mb if root_mb is not None else 'N/A'}M\n"
|
|
f"$ sudo df -BM --output=avail /var/log → {var_mb if var_mb is not None else 'N/A'}M"
|
|
)
|
|
if issues:
|
|
return {
|
|
"name": "disk", "label": label, "status": "ko",
|
|
"message": " · ".join(issues),
|
|
"details": details,
|
|
}
|
|
return {
|
|
"name": "disk", "label": label, "status": "ok",
|
|
"message": " · ".join(parts) + " (au-dessus seuils)",
|
|
"details": details,
|
|
}
|
|
|
|
|
|
def check_satellite(ctx: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Vérifie :
|
|
1. la joignabilité d'un Satellite (LAN d'abord, fallback DMZ — ou
|
|
inversement si serveur DMZ)
|
|
2. l'inscription du serveur (subscription-manager identity)
|
|
3. l'accès aux repos (yum repolist enabled --quiet)
|
|
Toutes les commandes utilisent sudo -n.
|
|
"""
|
|
client = ctx.get("client")
|
|
satellites = _pick_satellites(ctx.get("row") or {})
|
|
preferred = satellites[0]
|
|
label = f"Satellite (préféré: {preferred})"
|
|
if client is None:
|
|
return {
|
|
"name": "satellite", "label": label, "status": "ko",
|
|
"message": "SSH KO en amont", "details": "",
|
|
}
|
|
|
|
# 1) Joignabilité : on tente chaque satellite jusqu'au premier qui répond
|
|
sat_reached = None
|
|
sat_attempts = []
|
|
for sat in satellites:
|
|
r0 = _exec(client,
|
|
f"sudo -n curl -k -s -o /dev/null -w '%{{http_code}}' "
|
|
f"--max-time 5 https://{sat}/pub/ 2>&1")
|
|
http_code = (r0["stdout"] or "").strip()
|
|
sat_attempts.append(f"{sat} → HTTP {http_code or 'N/A'}")
|
|
if http_code in ("200", "301", "302", "403"):
|
|
sat_reached = sat
|
|
break
|
|
|
|
sat_reachable = sat_reached is not None
|
|
|
|
# 2) subscription-manager identity (locale-indépendant via UUID)
|
|
r1 = _exec(client, "sudo -n subscription-manager identity 2>&1")
|
|
sub_ok = (r1["rc"] == 0 and bool(re.search(
|
|
r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b",
|
|
r1["stdout"], re.IGNORECASE)))
|
|
|
|
# 3) yum repolist enabled --quiet
|
|
r2 = _exec(client, "sudo -n yum repolist enabled --quiet 2>&1 | head -50")
|
|
repolist_ok = (r2["rc"] == 0 and r2["stdout"].strip() != "")
|
|
|
|
details = (
|
|
"$ curl tests :\n " + "\n ".join(sat_attempts) + "\n"
|
|
f"$ sudo subscription-manager identity →\n{r1['stdout']}\n{r1['stderr']}\n"
|
|
f"---\n"
|
|
f"$ sudo yum repolist enabled --quiet (head -50) →\n{r2['stdout']}\n{r2['stderr']}"
|
|
)[:2500]
|
|
|
|
if sat_reachable and sub_ok and repolist_ok:
|
|
nb = sum(1 for ln in r2["stdout"].splitlines()
|
|
if ln and not ln.lower().startswith(("repo id", "loaded plugins",
|
|
"updating subscription",
|
|
"this system")))
|
|
msg = f"{sat_reached} joignable · système enregistré · ~{nb} repo(s)"
|
|
if sat_reached != preferred:
|
|
msg += f" — fallback depuis {preferred}"
|
|
return {"name": "satellite", "label": label, "status": "ok",
|
|
"message": msg, "details": details}
|
|
|
|
issues = []
|
|
if not sat_reachable:
|
|
tried = ", ".join(s for s in satellites)
|
|
issues.append(f"Aucun Satellite joignable (testés: {tried})")
|
|
if not sub_ok:
|
|
issues.append("subscription-manager identity KO")
|
|
if not repolist_ok:
|
|
issues.append("yum repolist vide / KO")
|
|
status = "ko" if (not sat_reachable or not repolist_ok) else "warn"
|
|
return {"name": "satellite", "label": label, "status": status,
|
|
"message": " · ".join(issues), "details": details}
|
|
|
|
|
|
# ────────────────────────────────────────────────────────────────────────
|
|
# Registre extensible
|
|
# ────────────────────────────────────────────────────────────────────────
|
|
|
|
CHECKS: Dict[str, Callable[[Dict[str, Any]], Dict[str, Any]]] = {
|
|
"dns": check_dns,
|
|
"ssh": check_ssh,
|
|
"disk": check_disk,
|
|
"satellite": check_satellite,
|
|
}
|
|
|
|
|
|
def register_check(name: str, fn: Callable):
|
|
"""Enregistre un check supplémentaire (pour extension future)."""
|
|
CHECKS[name] = fn
|
|
|
|
|
|
# ────────────────────────────────────────────────────────────────────────
|
|
# Orchestration
|
|
# ────────────────────────────────────────────────────────────────────────
|
|
|
|
def run_all_checks(hostname: str, row: Dict[str, Any] | None = None,
|
|
only: List[str] | None = None) -> Dict[str, Any]:
|
|
"""Exécute la séquence de checks pour 1 serveur.
|
|
|
|
Args:
|
|
hostname: nom court ex 'vpdsiawik1'
|
|
row: dict optionnel d'éléments du planning (pour ctxe additionnel)
|
|
only: liste de noms de checks à lancer (par défaut tous)
|
|
|
|
Returns:
|
|
{
|
|
"hostname": str,
|
|
"target": str|None,
|
|
"checks": [check_result, ...],
|
|
"overall": "ok" | "warn" | "ko"
|
|
}
|
|
"""
|
|
t0 = time.time()
|
|
only_set = set(only) if only else None
|
|
target = _resolve(hostname)
|
|
client = None
|
|
ssh_error = None
|
|
ssh_method = None
|
|
if target and PARAMIKO_OK:
|
|
try:
|
|
client = _connect(target, hostname)
|
|
# _connect peut renvoyer un tuple (client, method) selon implem ; fallback :
|
|
if isinstance(client, tuple) and len(client) >= 1:
|
|
ssh_method = client[1] if len(client) > 1 else None
|
|
client = client[0]
|
|
except Exception as e:
|
|
log.warning(f"_connect raised on {hostname}: {e}")
|
|
ssh_error = f"{type(e).__name__}: {e}"
|
|
client = None
|
|
|
|
ctx = {"hostname": hostname, "target": target, "client": client,
|
|
"row": row or {}, "ssh_error": ssh_error, "ssh_method": ssh_method}
|
|
results = []
|
|
for name, fn in CHECKS.items():
|
|
if only_set is not None and name not in only_set:
|
|
continue
|
|
try:
|
|
r = fn(ctx)
|
|
except Exception as e:
|
|
r = {"name": name, "label": name, "status": "ko",
|
|
"message": f"Exception: {e}", "details": ""}
|
|
results.append(r)
|
|
|
|
if client is not None:
|
|
try:
|
|
client.close()
|
|
except Exception:
|
|
pass
|
|
|
|
# Verdict global : ok si tous OK ; warn si au moins un warn et aucun ko ; ko sinon
|
|
statuses = [r["status"] for r in results]
|
|
if all(s == "ok" for s in statuses):
|
|
overall = "ok"
|
|
elif any(s == "ko" for s in statuses):
|
|
overall = "ko"
|
|
else:
|
|
overall = "warn"
|
|
|
|
return {
|
|
"hostname": hostname,
|
|
"target": target,
|
|
"checks": results,
|
|
"overall": overall,
|
|
"duration_ms": int((time.time() - t0) * 1000),
|
|
}
|