Probleme: vrexpbtex1 est sur .sanef.groupe (exception a la convention vr*=.sanef-rec.fr). Le code tentait UN seul target via _resolve (TCP port 22 only) et echouait sans fallback SSH. Logique de resolution revisee: 1. Convention en 1er: suffixe le plus probable selon prefixe (vr/lr/sr=>.sanef-rec.fr, vp/lp/sp=>.sanef.groupe, etc.) 2. servers.fqdn (BDD) en 2e position si renseigne et plausible (commence par hostname.) 3. Autres suffixes du referentiel ensuite Implementation: - _candidate_targets(hostname) : nouvelle fonction qui retourne la liste ordonnee des FQDN candidats a essayer SSH - _connect(target, hostname, errors=...) : accepte une list errors optionnelle ou les exceptions de chaque tentative (PSMP/cle/password) sont append en clair pour diagnostic. Retro-compatible (errors=None par defaut) - _connect_via_psmp(target, errors=...) : meme pattern - _fqdn_is_consistent : assoupli, ne verifie plus la convention SANEF (qui rejetait les exceptions legitimes), juste que le FQDN commence par hostname. - run_all_checks: itere _candidate_targets et essaie _connect sur chaque, accumule les erreurs de tous les candidats, retourne le 1er qui aboutit UI: - check_dns: si target trouve mais client KO, status=warn et liste des candidats tentes - check_ssh: classification erreur (no route/timeout/refused/permission denied/etc.) + liste des candidats tentes + suggestion pour FQDN exception
425 lines
17 KiB
Python
425 lines
17 KiB
Python
"""Service pré-patching : vérifications avant lancement du patch.
|
|
|
|
Architecture extensible : un dict `CHECKS` mappe `name -> callable`.
|
|
Chaque check prend `(ctx)` et renvoie un dict :
|
|
{"name": str, "label": str, "status": "ok"|"warn"|"ko", "message": str, "details": str}
|
|
|
|
`ctx` est un dict :
|
|
{
|
|
"hostname": str, # nom court ex 'vpdsiawik1'
|
|
"target": str|None, # FQDN résolu (None si DNS KO)
|
|
"client": SSHClient|None, # paramiko ouvert ou None
|
|
"row": dict, # ligne du planning Excel (pour ctxe additionnel)
|
|
}
|
|
|
|
Les checks sont indépendants : un check peut tourner même si un autre a échoué.
|
|
La fonction `run_all_checks(hostname, row)` orchestre l'enchaînement et
|
|
calcule un verdict global.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import re
|
|
import socket
|
|
import time
|
|
from typing import Callable, Dict, List, Any
|
|
|
|
from .realtime_audit_service import _resolve, _connect, _candidate_targets, PARAMIKO_OK
|
|
|
|
log = logging.getLogger("patchcenter.prepatch")
|
|
|
|
# Timeout par commande SSH
|
|
EXEC_TIMEOUT = 15
|
|
|
|
# Seuils espace disque (en Mo)
|
|
DISK_MIN_ROOT_MB = 1500 # 1.5 Go sur /
|
|
DISK_MIN_VARLOG_MB = 1000 # 1 Go sur /var/log
|
|
|
|
# Satellites SANEF par zone réseau
|
|
SATELLITE_LAN = "vpdsiasat2.sanef.groupe"
|
|
SATELLITE_DMZ = "vpdsiasat1.sanef.groupe"
|
|
|
|
|
|
def _pick_satellites(row: Dict[str, Any]) -> List[str]:
|
|
"""Renvoie la liste ordonnée des Satellites à tester.
|
|
Priorité : 1) servers.satellite_url renseigné en BDD,
|
|
2) DMZ d'abord si la colonne Domaine contient 'DMZ',
|
|
3) LAN d'abord par défaut.
|
|
L'autre satellite est toujours ajouté en fallback."""
|
|
forced = (row.get("satellite_url") or "").strip()
|
|
domaine = str(row.get("domaine") or "").upper()
|
|
if forced:
|
|
# Override : on commence par celui en BDD, fallback sur l'autre
|
|
other = SATELLITE_DMZ if forced == SATELLITE_LAN else SATELLITE_LAN
|
|
return [forced] if forced not in (SATELLITE_LAN, SATELLITE_DMZ) else [forced, other]
|
|
if "DMZ" in domaine:
|
|
return [SATELLITE_DMZ, SATELLITE_LAN]
|
|
return [SATELLITE_LAN, SATELLITE_DMZ]
|
|
|
|
|
|
def _exec(client, cmd: str) -> Dict[str, Any]:
|
|
"""Exécute une commande SSH et renvoie {rc, stdout, stderr}."""
|
|
try:
|
|
stdin, stdout, stderr = client.exec_command(cmd, timeout=EXEC_TIMEOUT)
|
|
out = stdout.read().decode("utf-8", "replace").strip()
|
|
err = stderr.read().decode("utf-8", "replace").strip()
|
|
rc = stdout.channel.recv_exit_status()
|
|
return {"rc": rc, "stdout": out, "stderr": err}
|
|
except Exception as e:
|
|
return {"rc": -1, "stdout": "", "stderr": f"exec error: {e}"}
|
|
|
|
|
|
# ────────────────────────────────────────────────────────────────────────
|
|
# Checks individuels
|
|
# ────────────────────────────────────────────────────────────────────────
|
|
|
|
def check_dns(ctx: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Résolution DNS du hostname (nom court → FQDN connu via base ou suffixes).
|
|
Status = ok si un candidat a SSH réussi (target défini ET client !== None),
|
|
warn si un FQDN candidat existe mais SSH a échoué partout,
|
|
ko si aucune résolution possible."""
|
|
hostname = ctx["hostname"]
|
|
target = ctx.get("target")
|
|
client_ok = ctx.get("client") is not None
|
|
tried = ctx.get("ssh_tried") or []
|
|
|
|
if target and client_ok:
|
|
return {
|
|
"name": "dns",
|
|
"label": "Résolution DNS",
|
|
"status": "ok",
|
|
"message": f"{hostname} → {target}",
|
|
"details": (f"Candidats essayés : {', '.join(tried)}" if len(tried) > 1 else ""),
|
|
}
|
|
if target and tried:
|
|
return {
|
|
"name": "dns",
|
|
"label": "Résolution DNS",
|
|
"status": "warn",
|
|
"message": f"{hostname} : {len(tried)} FQDN tentés, SSH KO partout",
|
|
"details": "Candidats : " + ", ".join(tried),
|
|
}
|
|
# Si _resolve a échoué, on retente directement gethostbyname pour récupérer une IP
|
|
try:
|
|
ip = socket.gethostbyname(hostname)
|
|
return {
|
|
"name": "dns",
|
|
"label": "Résolution DNS",
|
|
"status": "warn",
|
|
"message": f"{hostname} → {ip} (FQDN non confirmé)",
|
|
"details": "Aucun FQDN en base et aucun suffixe SANEF ne répond sur :22.",
|
|
}
|
|
except Exception as e:
|
|
return {
|
|
"name": "dns",
|
|
"label": "Résolution DNS",
|
|
"status": "ko",
|
|
"message": "Impossible de résoudre le hostname",
|
|
"details": str(e),
|
|
}
|
|
|
|
|
|
def check_ssh(ctx: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Vérifie qu'on a une session SSH ouverte (déjà tentée dans run_all_checks)."""
|
|
if ctx.get("client") is not None:
|
|
return {
|
|
"name": "ssh",
|
|
"label": "Connexion SSH",
|
|
"status": "ok",
|
|
"message": f"Connecté à {ctx.get('target')}",
|
|
"details": "",
|
|
}
|
|
if not PARAMIKO_OK:
|
|
return {
|
|
"name": "ssh",
|
|
"label": "Connexion SSH",
|
|
"status": "ko",
|
|
"message": "paramiko non disponible côté serveur PatchCenter",
|
|
"details": "",
|
|
}
|
|
if not ctx.get("target"):
|
|
return {
|
|
"name": "ssh",
|
|
"label": "Connexion SSH",
|
|
"status": "ko",
|
|
"message": "Pas de cible (DNS KO en amont)",
|
|
"details": "",
|
|
}
|
|
err = (ctx.get("ssh_error") or "").strip()
|
|
tried = ctx.get("ssh_tried") or []
|
|
err_low = err.lower()
|
|
|
|
# Classification du type d'erreur dominant
|
|
if "no route to host" in err_low or "network is unreachable" in err_low:
|
|
msg = "Réseau injoignable — vérifier routage/firewall"
|
|
elif "connection timed out" in err_low or "timed out" in err_low:
|
|
msg = "Timeout connexion — port SSH 22 filtré ou hôte down"
|
|
elif "connection refused" in err_low:
|
|
msg = "Port 22 refusé — sshd arrêté ou bloqué"
|
|
elif ("no matching" in err_low and ("kex" in err_low or "key exchange" in err_low)) \
|
|
or "host key" in err_low or "hostkey" in err_low:
|
|
msg = "Incompatibilité crypto SSH ou host key — algos KEX/host_keys"
|
|
elif "permission denied" in err_low or "authentication failed" in err_low:
|
|
msg = "Authentification refusée — vérifier user/clé/password"
|
|
elif "no authentication methods" in err_low:
|
|
msg = "Aucune méthode d'auth acceptée"
|
|
elif "name or service not known" in err_low or "could not resolve" in err_low:
|
|
msg = "DNS échoué — aucun FQDN candidat ne résoud"
|
|
elif err:
|
|
msg = "Échec SSH sur tous les candidats DNS"
|
|
else:
|
|
msg = "Échec connexion SSH (raison inconnue)"
|
|
|
|
if tried:
|
|
msg += f" — {len(tried)} candidat(s) tenté(s) : {', '.join(tried)}"
|
|
|
|
details = (err or "Pas d'exception capturée") + \
|
|
"\n\n→ Vérifier ssh_method/clé/PSMP/mot de passe dans Settings (section SSH)." + \
|
|
"\n→ Si l'hôte est sur un FQDN exception (ex: vr* sur .sanef.groupe), renseigner servers.fqdn explicitement."
|
|
return {
|
|
"name": "ssh",
|
|
"label": "Connexion SSH",
|
|
"status": "ko",
|
|
"message": msg,
|
|
"details": details,
|
|
}
|
|
|
|
|
|
def _disk_avail_mb(client, path: str):
|
|
"""Renvoie l'espace dispo en Mo sur le FS contenant `path`, ou None si KO."""
|
|
r = _exec(client, f"sudo -n df -BM --output=avail {path} 2>&1 | tail -n +2")
|
|
out = (r["stdout"] or "").strip()
|
|
m = re.search(r"(\d+)\s*M", out)
|
|
if m:
|
|
return int(m.group(1))
|
|
return None
|
|
|
|
|
|
def check_disk(ctx: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Vérifie l'espace disque dispo :
|
|
- / >= 1.5 Go
|
|
- /var/log >= 1 Go
|
|
KO si insuffisant → pas éligible au snapshot.
|
|
"""
|
|
client = ctx.get("client")
|
|
if client is None:
|
|
return {
|
|
"name": "disk",
|
|
"label": f"Espace disque (/ ≥ {DISK_MIN_ROOT_MB}M, /var/log ≥ {DISK_MIN_VARLOG_MB}M)",
|
|
"status": "ko",
|
|
"message": "SSH KO en amont",
|
|
"details": "",
|
|
}
|
|
root_mb = _disk_avail_mb(client, "/")
|
|
var_mb = _disk_avail_mb(client, "/var/log")
|
|
issues = []
|
|
parts = []
|
|
if root_mb is None:
|
|
issues.append("/ : mesure impossible")
|
|
else:
|
|
parts.append(f"/ {root_mb}M")
|
|
if root_mb < DISK_MIN_ROOT_MB:
|
|
issues.append(f"/ {root_mb}M < min {DISK_MIN_ROOT_MB}M")
|
|
if var_mb is None:
|
|
issues.append("/var/log : mesure impossible")
|
|
else:
|
|
parts.append(f"/var/log {var_mb}M")
|
|
if var_mb < DISK_MIN_VARLOG_MB:
|
|
issues.append(f"/var/log {var_mb}M < min {DISK_MIN_VARLOG_MB}M")
|
|
label = f"Espace disque (/ ≥ {DISK_MIN_ROOT_MB}M, /var/log ≥ {DISK_MIN_VARLOG_MB}M)"
|
|
details = (
|
|
f"$ sudo df -BM --output=avail / → {root_mb if root_mb is not None else 'N/A'}M\n"
|
|
f"$ sudo df -BM --output=avail /var/log → {var_mb if var_mb is not None else 'N/A'}M"
|
|
)
|
|
if issues:
|
|
return {
|
|
"name": "disk", "label": label, "status": "ko",
|
|
"message": " · ".join(issues),
|
|
"details": details,
|
|
}
|
|
return {
|
|
"name": "disk", "label": label, "status": "ok",
|
|
"message": " · ".join(parts) + " (au-dessus seuils)",
|
|
"details": details,
|
|
}
|
|
|
|
|
|
def check_satellite(ctx: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Vérifie :
|
|
1. la joignabilité d'un Satellite (LAN d'abord, fallback DMZ — ou
|
|
inversement si serveur DMZ)
|
|
2. l'inscription du serveur (subscription-manager identity)
|
|
3. l'accès aux repos (yum repolist enabled --quiet)
|
|
Toutes les commandes utilisent sudo -n.
|
|
"""
|
|
client = ctx.get("client")
|
|
satellites = _pick_satellites(ctx.get("row") or {})
|
|
preferred = satellites[0]
|
|
label = f"Satellite (préféré: {preferred})"
|
|
if client is None:
|
|
return {
|
|
"name": "satellite", "label": label, "status": "ko",
|
|
"message": "SSH KO en amont", "details": "",
|
|
}
|
|
|
|
# 1) Joignabilité : on tente chaque satellite jusqu'au premier qui répond
|
|
sat_reached = None
|
|
sat_attempts = []
|
|
for sat in satellites:
|
|
r0 = _exec(client,
|
|
f"sudo -n curl -k -s -o /dev/null -w '%{{http_code}}' "
|
|
f"--max-time 5 https://{sat}/pub/ 2>&1")
|
|
http_code = (r0["stdout"] or "").strip()
|
|
sat_attempts.append(f"{sat} → HTTP {http_code or 'N/A'}")
|
|
if http_code in ("200", "301", "302", "403"):
|
|
sat_reached = sat
|
|
break
|
|
|
|
sat_reachable = sat_reached is not None
|
|
|
|
# 2) subscription-manager identity (locale-indépendant via UUID)
|
|
r1 = _exec(client, "sudo -n subscription-manager identity 2>&1")
|
|
sub_ok = (r1["rc"] == 0 and bool(re.search(
|
|
r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b",
|
|
r1["stdout"], re.IGNORECASE)))
|
|
|
|
# 3) yum repolist enabled --quiet
|
|
r2 = _exec(client, "sudo -n yum repolist enabled --quiet 2>&1 | head -50")
|
|
repolist_ok = (r2["rc"] == 0 and r2["stdout"].strip() != "")
|
|
|
|
details = (
|
|
"$ curl tests :\n " + "\n ".join(sat_attempts) + "\n"
|
|
f"$ sudo subscription-manager identity →\n{r1['stdout']}\n{r1['stderr']}\n"
|
|
f"---\n"
|
|
f"$ sudo yum repolist enabled --quiet (head -50) →\n{r2['stdout']}\n{r2['stderr']}"
|
|
)[:2500]
|
|
|
|
if sat_reachable and sub_ok and repolist_ok:
|
|
nb = sum(1 for ln in r2["stdout"].splitlines()
|
|
if ln and not ln.lower().startswith(("repo id", "loaded plugins",
|
|
"updating subscription",
|
|
"this system")))
|
|
msg = f"{sat_reached} joignable · système enregistré · ~{nb} repo(s)"
|
|
if sat_reached != preferred:
|
|
msg += f" — fallback depuis {preferred}"
|
|
return {"name": "satellite", "label": label, "status": "ok",
|
|
"message": msg, "details": details}
|
|
|
|
issues = []
|
|
if not sat_reachable:
|
|
tried = ", ".join(s for s in satellites)
|
|
issues.append(f"Aucun Satellite joignable (testés: {tried})")
|
|
if not sub_ok:
|
|
issues.append("subscription-manager identity KO")
|
|
if not repolist_ok:
|
|
issues.append("yum repolist vide / KO")
|
|
status = "ko" if (not sat_reachable or not repolist_ok) else "warn"
|
|
return {"name": "satellite", "label": label, "status": status,
|
|
"message": " · ".join(issues), "details": details}
|
|
|
|
|
|
# ────────────────────────────────────────────────────────────────────────
|
|
# Registre extensible
|
|
# ────────────────────────────────────────────────────────────────────────
|
|
|
|
CHECKS: Dict[str, Callable[[Dict[str, Any]], Dict[str, Any]]] = {
|
|
"dns": check_dns,
|
|
"ssh": check_ssh,
|
|
"disk": check_disk,
|
|
"satellite": check_satellite,
|
|
}
|
|
|
|
|
|
def register_check(name: str, fn: Callable):
|
|
"""Enregistre un check supplémentaire (pour extension future)."""
|
|
CHECKS[name] = fn
|
|
|
|
|
|
# ────────────────────────────────────────────────────────────────────────
|
|
# Orchestration
|
|
# ────────────────────────────────────────────────────────────────────────
|
|
|
|
def run_all_checks(hostname: str, row: Dict[str, Any] | None = None,
|
|
only: List[str] | None = None) -> Dict[str, Any]:
|
|
"""Exécute la séquence de checks pour 1 serveur.
|
|
|
|
Args:
|
|
hostname: nom court ex 'vpdsiawik1'
|
|
row: dict optionnel d'éléments du planning (pour ctxe additionnel)
|
|
only: liste de noms de checks à lancer (par défaut tous)
|
|
|
|
Returns:
|
|
{
|
|
"hostname": str,
|
|
"target": str|None,
|
|
"checks": [check_result, ...],
|
|
"overall": "ok" | "warn" | "ko"
|
|
}
|
|
"""
|
|
t0 = time.time()
|
|
only_set = set(only) if only else None
|
|
candidates = _candidate_targets(hostname) if PARAMIKO_OK else []
|
|
client = None
|
|
target = None
|
|
all_errors = [] # erreurs accumulées sur tous les candidats tentés
|
|
tried = [] # FQDNs essayés (pour debug)
|
|
|
|
if PARAMIKO_OK:
|
|
for cand in candidates:
|
|
tried.append(cand)
|
|
errs = []
|
|
try:
|
|
c = _connect(cand, hostname, errors=errs)
|
|
except Exception as e:
|
|
errs.append(f"{type(e).__name__}: {e}")
|
|
c = None
|
|
if c is not None:
|
|
client = c
|
|
target = cand
|
|
break
|
|
# Échec sur ce candidat, on accumule les raisons et passe au suivant
|
|
all_errors.append(f"{cand}: " + " | ".join(errs) if errs else f"{cand}: échec sans détail")
|
|
|
|
# Si aucun client mais on a au moins un candidat dont on a vérifié DNS, garde-le pour info
|
|
if target is None and tried:
|
|
target = tried[0] # juste pour affichage du 1er essayé
|
|
|
|
ssh_error = "\n".join(all_errors) if all_errors else None
|
|
|
|
ctx = {"hostname": hostname, "target": target, "client": client,
|
|
"row": row or {}, "ssh_error": ssh_error,
|
|
"ssh_tried": tried}
|
|
results = []
|
|
for name, fn in CHECKS.items():
|
|
if only_set is not None and name not in only_set:
|
|
continue
|
|
try:
|
|
r = fn(ctx)
|
|
except Exception as e:
|
|
r = {"name": name, "label": name, "status": "ko",
|
|
"message": f"Exception: {e}", "details": ""}
|
|
results.append(r)
|
|
|
|
if client is not None:
|
|
try:
|
|
client.close()
|
|
except Exception:
|
|
pass
|
|
|
|
# Verdict global : ok si tous OK ; warn si au moins un warn et aucun ko ; ko sinon
|
|
statuses = [r["status"] for r in results]
|
|
if all(s == "ok" for s in statuses):
|
|
overall = "ok"
|
|
elif any(s == "ko" for s in statuses):
|
|
overall = "ko"
|
|
else:
|
|
overall = "warn"
|
|
|
|
return {
|
|
"hostname": hostname,
|
|
"target": target,
|
|
"checks": results,
|
|
"overall": overall,
|
|
"duration_ms": int((time.time() - t0) * 1000),
|
|
}
|