patchcenter/app/services/prepatch_check_service.py

351 lines
13 KiB
Python

"""Service pré-patching : vérifications avant lancement du patch.
Architecture extensible : un dict `CHECKS` mappe `name -> callable`.
Chaque check prend `(ctx)` et renvoie un dict :
{"name": str, "label": str, "status": "ok"|"warn"|"ko", "message": str, "details": str}
`ctx` est un dict :
{
"hostname": str, # nom court ex 'vpdsiawik1'
"target": str|None, # FQDN résolu (None si DNS KO)
"client": SSHClient|None, # paramiko ouvert ou None
"row": dict, # ligne du planning Excel (pour ctxe additionnel)
}
Les checks sont indépendants : un check peut tourner même si un autre a échoué.
La fonction `run_all_checks(hostname, row)` orchestre l'enchaînement et
calcule un verdict global.
"""
from __future__ import annotations
import logging
import re
import socket
import time
from typing import Callable, Dict, List, Any
from .realtime_audit_service import _resolve, _connect, PARAMIKO_OK
log = logging.getLogger("patchcenter.prepatch")
# Timeout par commande SSH
EXEC_TIMEOUT = 15
# Seuils espace disque (en Mo)
DISK_MIN_ROOT_MB = 1500 # 1.5 Go sur /
DISK_MIN_VARLOG_MB = 1000 # 1 Go sur /var/log
# Satellites SANEF par zone réseau
SATELLITE_LAN = "vpdsiasat2.sanef.groupe"
SATELLITE_DMZ = "vpdsiasat1.sanef.groupe"
def _pick_satellite(row: Dict[str, Any]) -> str:
"""Renvoie le hostname du Satellite cible selon le domaine.
Si la colonne Domaine contient 'DMZ' → vpdsiasat1, sinon vpdsiasat2 (LAN)."""
domaine = str(row.get("domaine") or "").upper()
if "DMZ" in domaine:
return SATELLITE_DMZ
return SATELLITE_LAN
def _exec(client, cmd: str) -> Dict[str, Any]:
"""Exécute une commande SSH et renvoie {rc, stdout, stderr}."""
try:
stdin, stdout, stderr = client.exec_command(cmd, timeout=EXEC_TIMEOUT)
out = stdout.read().decode("utf-8", "replace").strip()
err = stderr.read().decode("utf-8", "replace").strip()
rc = stdout.channel.recv_exit_status()
return {"rc": rc, "stdout": out, "stderr": err}
except Exception as e:
return {"rc": -1, "stdout": "", "stderr": f"exec error: {e}"}
# ────────────────────────────────────────────────────────────────────────
# Checks individuels
# ────────────────────────────────────────────────────────────────────────
def check_dns(ctx: Dict[str, Any]) -> Dict[str, Any]:
"""Résolution DNS du hostname (nom court → FQDN connu via base ou suffixes)."""
hostname = ctx["hostname"]
target = ctx.get("target")
if target:
return {
"name": "dns",
"label": "Résolution DNS",
"status": "ok",
"message": f"{hostname}{target}",
"details": "",
}
# Si _resolve a échoué, on retente directement gethostbyname pour récupérer une IP
try:
ip = socket.gethostbyname(hostname)
return {
"name": "dns",
"label": "Résolution DNS",
"status": "warn",
"message": f"{hostname}{ip} (FQDN non confirmé)",
"details": "Aucun FQDN en base et aucun suffixe SANEF ne répond sur :22.",
}
except Exception as e:
return {
"name": "dns",
"label": "Résolution DNS",
"status": "ko",
"message": "Impossible de résoudre le hostname",
"details": str(e),
}
def check_ssh(ctx: Dict[str, Any]) -> Dict[str, Any]:
"""Vérifie qu'on a une session SSH ouverte (déjà tentée dans run_all_checks)."""
if ctx.get("client") is not None:
return {
"name": "ssh",
"label": "Connexion SSH",
"status": "ok",
"message": f"Connecté à {ctx.get('target')}",
"details": "",
}
if not PARAMIKO_OK:
return {
"name": "ssh",
"label": "Connexion SSH",
"status": "ko",
"message": "paramiko non disponible côté serveur PatchCenter",
"details": "",
}
if not ctx.get("target"):
return {
"name": "ssh",
"label": "Connexion SSH",
"status": "ko",
"message": "Pas de cible (DNS KO en amont)",
"details": "",
}
return {
"name": "ssh",
"label": "Connexion SSH",
"status": "ko",
"message": "Échec connexion SSH",
"details": "Vérifier ssh_method/clé/PSMP/mot de passe dans Settings.",
}
def _disk_avail_mb(client, path: str):
"""Renvoie l'espace dispo en Mo sur le FS contenant `path`, ou None si KO."""
r = _exec(client, f"sudo -n df -BM --output=avail {path} 2>&1 | tail -n +2")
out = (r["stdout"] or "").strip()
m = re.search(r"(\d+)\s*M", out)
if m:
return int(m.group(1))
return None
def check_disk(ctx: Dict[str, Any]) -> Dict[str, Any]:
"""Vérifie l'espace disque dispo :
- / >= 1.5 Go
- /var/log >= 1 Go
KO si insuffisant → pas éligible au snapshot.
"""
client = ctx.get("client")
if client is None:
return {
"name": "disk",
"label": f"Espace disque (/ ≥ {DISK_MIN_ROOT_MB}M, /var/log ≥ {DISK_MIN_VARLOG_MB}M)",
"status": "ko",
"message": "SSH KO en amont",
"details": "",
}
root_mb = _disk_avail_mb(client, "/")
var_mb = _disk_avail_mb(client, "/var/log")
issues = []
parts = []
if root_mb is None:
issues.append("/ : mesure impossible")
else:
parts.append(f"/ {root_mb}M")
if root_mb < DISK_MIN_ROOT_MB:
issues.append(f"/ {root_mb}M < min {DISK_MIN_ROOT_MB}M")
if var_mb is None:
issues.append("/var/log : mesure impossible")
else:
parts.append(f"/var/log {var_mb}M")
if var_mb < DISK_MIN_VARLOG_MB:
issues.append(f"/var/log {var_mb}M < min {DISK_MIN_VARLOG_MB}M")
label = f"Espace disque (/ ≥ {DISK_MIN_ROOT_MB}M, /var/log ≥ {DISK_MIN_VARLOG_MB}M)"
details = (
f"$ sudo df -BM --output=avail / → {root_mb if root_mb is not None else 'N/A'}M\n"
f"$ sudo df -BM --output=avail /var/log → {var_mb if var_mb is not None else 'N/A'}M"
)
if issues:
return {
"name": "disk", "label": label, "status": "ko",
"message": " · ".join(issues),
"details": details,
}
return {
"name": "disk", "label": label, "status": "ok",
"message": " · ".join(parts) + " (au-dessus seuils)",
"details": details,
}
def check_satellite(ctx: Dict[str, Any]) -> Dict[str, Any]:
"""Vérifie :
1. la joignabilité du Satellite cible (LAN ou DMZ selon Domaine)
2. l'inscription du serveur (subscription-manager identity)
3. l'accès aux repos (yum repolist enabled --quiet)
Toutes les commandes utilisent sudo -n (non-interactif).
"""
client = ctx.get("client")
sat = _pick_satellite(ctx.get("row") or {})
label = f"Satellite ({sat})"
if client is None:
return {
"name": "satellite",
"label": label,
"status": "ko",
"message": "SSH KO en amont",
"details": "",
}
# 1) Joignabilité réseau du Satellite (HEAD https://<sat>/pub/)
r0 = _exec(client,
f"sudo -n curl -k -s -o /dev/null -w '%{{http_code}}' "
f"--max-time 5 https://{sat}/pub/ 2>&1")
http_code = (r0["stdout"] or "").strip()
sat_reachable = http_code in ("200", "301", "302", "403")
# 2) subscription-manager identity
# Locale-indépendant : on cherche un UUID dans la sortie (présent en EN comme en FR).
r1 = _exec(client, "sudo -n subscription-manager identity 2>&1")
sub_ok = (r1["rc"] == 0 and bool(re.search(
r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b",
r1["stdout"], re.IGNORECASE)))
# 3) yum repolist enabled --quiet
r2 = _exec(client, "sudo -n yum repolist enabled --quiet 2>&1 | head -50")
repolist_ok = (r2["rc"] == 0 and r2["stdout"].strip() != "")
details = (
f"$ curl https://{sat}/pub/ → http_code={http_code or 'N/A'}\n"
f"$ sudo subscription-manager identity →\n{r1['stdout']}\n{r1['stderr']}\n"
f"---\n"
f"$ sudo yum repolist enabled --quiet (head -50) →\n{r2['stdout']}\n{r2['stderr']}"
)[:2500]
if sat_reachable and sub_ok and repolist_ok:
nb = sum(1 for ln in r2["stdout"].splitlines()
if ln and not ln.lower().startswith(("repo id", "loaded plugins",
"updating subscription",
"this system")))
return {
"name": "satellite",
"label": label,
"status": "ok",
"message": f"{sat} joignable · système enregistré · ~{nb} repo(s) actifs",
"details": details,
}
# Construit message synthétique des KO
issues = []
if not sat_reachable:
issues.append(f"Satellite {sat} injoignable (http={http_code or 'N/A'})")
if not sub_ok:
issues.append("subscription-manager identity KO")
if not repolist_ok:
issues.append("yum repolist vide / KO")
status = "ko" if (not sat_reachable or not repolist_ok) else "warn"
return {
"name": "satellite",
"label": label,
"status": status,
"message": " · ".join(issues),
"details": details,
}
# ────────────────────────────────────────────────────────────────────────
# Registre extensible
# ────────────────────────────────────────────────────────────────────────
CHECKS: Dict[str, Callable[[Dict[str, Any]], Dict[str, Any]]] = {
"dns": check_dns,
"ssh": check_ssh,
"disk": check_disk,
"satellite": check_satellite,
}
def register_check(name: str, fn: Callable):
"""Enregistre un check supplémentaire (pour extension future)."""
CHECKS[name] = fn
# ────────────────────────────────────────────────────────────────────────
# Orchestration
# ────────────────────────────────────────────────────────────────────────
def run_all_checks(hostname: str, row: Dict[str, Any] | None = None,
only: List[str] | None = None) -> Dict[str, Any]:
"""Exécute la séquence de checks pour 1 serveur.
Args:
hostname: nom court ex 'vpdsiawik1'
row: dict optionnel d'éléments du planning (pour ctxe additionnel)
only: liste de noms de checks à lancer (par défaut tous)
Returns:
{
"hostname": str,
"target": str|None,
"checks": [check_result, ...],
"overall": "ok" | "warn" | "ko"
}
"""
t0 = time.time()
only_set = set(only) if only else None
target = _resolve(hostname)
client = None
if target and PARAMIKO_OK:
try:
client = _connect(target, hostname)
except Exception as e:
log.warning(f"_connect raised on {hostname}: {e}")
client = None
ctx = {"hostname": hostname, "target": target, "client": client, "row": row or {}}
results = []
for name, fn in CHECKS.items():
if only_set is not None and name not in only_set:
continue
try:
r = fn(ctx)
except Exception as e:
r = {"name": name, "label": name, "status": "ko",
"message": f"Exception: {e}", "details": ""}
results.append(r)
if client is not None:
try:
client.close()
except Exception:
pass
# Verdict global : ok si tous OK ; warn si au moins un warn et aucun ko ; ko sinon
statuses = [r["status"] for r in results]
if all(s == "ok" for s in statuses):
overall = "ok"
elif any(s == "ko" for s in statuses):
overall = "ko"
else:
overall = "warn"
return {
"hostname": hostname,
"target": target,
"checks": results,
"overall": overall,
"duration_ms": int((time.time() - t0) * 1000),
}