feat(patching/iexec B3.6): bouton 3e Reboot manuel (double confirmation, jamais auto) + 3f Wait reconnexion (poll TCP/22 + SSH uptime, timeout 10min) - shutdown -r +1 avec audit log

This commit is contained in:
Pierre & Lumière 2026-05-05 12:06:50 +02:00
parent 19d88f2d53
commit ff95424e03
3 changed files with 208 additions and 9 deletions

View File

@ -948,6 +948,57 @@ async def iexec_yum_update(request: Request, row_id: int, db=Depends(get_db)):
return JSONResponse(result) return JSONResponse(result)
@router.post("/patching/iexec/reboot/{row_id}")
async def iexec_reboot(request: Request, row_id: int, db=Depends(get_db)):
"""Step 3e — lance reboot avec délai +1 minute.
NE JAMAIS appeler en automatique : doit toujours venir d'un clic
utilisateur explicite (double confirmation côté UI)."""
user = get_current_user(request)
if not user:
return JSONResponse({"ok": False, "detail": "Non authentifié"}, status_code=401)
perms = get_user_perms(db, user)
row, err = _common_iexec_row_check(row_id, db, user, perms)
if err:
return err
hostname = (row.hostname or row.asset_name).strip()
from ..services.patch_run_service import reboot_host
result = reboot_host(hostname)
try:
db.execute(text("""
INSERT INTO patch_planning_row_log (row_id, action, details, performed_by)
VALUES (:rid, 'reboot_initiated', :de, :uid)
"""), {"rid": row_id,
"de": json.dumps(result, ensure_ascii=False),
"uid": user.get("uid")})
db.commit()
except Exception as e:
print(f"[iexec_reboot] audit log failed: {e}")
result["row_id"] = row_id
return JSONResponse(result)
@router.get("/patching/iexec/reboot-status/{row_id}")
async def iexec_reboot_status(request: Request, row_id: int, db=Depends(get_db)):
"""Step 3e — poll l'état du serveur après reboot (TCP/22 + SSH 'uptime').
Appelé en boucle côté frontend (toutes les 10s)."""
user = get_current_user(request)
if not user:
return JSONResponse({"ok": False, "detail": "Non authentifié"}, status_code=401)
perms = get_user_perms(db, user)
row, err = _common_iexec_row_check(row_id, db, user, perms)
if err:
return err
hostname = (row.hostname or row.asset_name).strip()
from ..services.patch_run_service import reboot_status
result = reboot_status(hostname)
result["row_id"] = row_id
return JSONResponse(result)
@router.post("/patching/import/{import_id}/delete") @router.post("/patching/import/{import_id}/delete")
async def import_delete(request: Request, import_id: int, db=Depends(get_db)): async def import_delete(request: Request, import_id: int, db=Depends(get_db)):
user = get_current_user(request) user = get_current_user(request)

View File

@ -8,6 +8,8 @@
import base64 import base64
import logging import logging
import re import re
import socket
from datetime import datetime
from typing import Dict, Any, List from typing import Dict, Any, List
from .realtime_audit_service import _resolve, _connect, PARAMIKO_OK from .realtime_audit_service import _resolve, _connect, PARAMIKO_OK
@ -400,3 +402,69 @@ def post_patch_compare(hostname: str) -> Dict[str, Any]:
"stdout": r["stdout"][-3000:], "stdout": r["stdout"][-3000:],
"stderr": r["stderr"][:500] if r["stderr"] else "", "stderr": r["stderr"][:500] if r["stderr"] else "",
} }
# ─── B3.6 — Reboot manuel sur clic + polling reconnexion ──────────────────
# IMPORTANT : reboot_host() n'est JAMAIS lancé automatiquement.
# Toujours déclenché par un clic utilisateur explicite avec double confirmation
# côté frontend.
def reboot_host(hostname: str) -> Dict[str, Any]:
"""Lance reboot avec délai +1 min (laisse le temps à SSH de retourner).
Appelée uniquement après confirmation explicite côté UI."""
client, target, err = _open_ssh(hostname)
if err:
return {"ok": False, "detail": err, "target": target}
try:
cmd = "sudo -n shutdown -r +1 'PatchCenter post-patch reboot' 2>&1"
r = _exec(client, cmd, timeout=15)
finally:
try:
client.close()
except Exception:
pass
ok = (r["rc"] == 0)
return {
"ok": ok,
"rc": r["rc"],
"cmd": cmd,
"target": target,
"started_at": datetime.now().isoformat(timespec="seconds"),
"stdout": r["stdout"][:500],
"stderr": r["stderr"][:500] if r["stderr"] else "",
}
def reboot_status(hostname: str) -> Dict[str, Any]:
"""Vérifie si le serveur est revenu : TCP/22 puis SSH 'uptime'.
À appeler en boucle (toutes les 10s côté frontend)."""
target = _resolve(hostname)
if not target:
return {"reachable": False, "tcp22": False, "ssh": False,
"detail": "DNS résolution impossible", "target": None}
# 1. TCP/22
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(3)
sock.connect((target, 22))
sock.close()
except Exception as e:
return {"reachable": False, "tcp22": False, "ssh": False,
"target": target, "detail": str(e)[:200]}
# 2. SSH minimal — uptime
if not PARAMIKO_OK:
return {"reachable": True, "tcp22": True, "ssh": False, "target": target,
"detail": "paramiko absent côté serveur PatchCenter"}
client = _connect(target, hostname)
if not client:
return {"reachable": True, "tcp22": True, "ssh": False, "target": target,
"detail": "TCP/22 OK mais SSH KO (probablement encore en boot)"}
try:
r = _exec(client, "uptime 2>&1", timeout=10)
finally:
try:
client.close()
except Exception:
pass
return {"reachable": True, "tcp22": True, "ssh": True, "target": target,
"uptime": (r.get("stdout") or "").strip()[:300]}

View File

@ -53,6 +53,7 @@
<th class="text-left p-1">Dry-run</th> <th class="text-left p-1">Dry-run</th>
<th class="text-left p-1">Pre-capt.</th> <th class="text-left p-1">Pre-capt.</th>
<th class="text-left p-1">Patch</th> <th class="text-left p-1">Patch</th>
<th class="text-left p-1">Reconnex.</th>
<th class="text-left p-1">Post-cmp.</th> <th class="text-left p-1">Post-cmp.</th>
</tr> </tr>
</thead> </thead>
@ -74,10 +75,11 @@
<td class="p-1 cell-dry text-gray-500">·</td> <td class="p-1 cell-dry text-gray-500">·</td>
<td class="p-1 cell-pre text-gray-500">·</td> <td class="p-1 cell-pre text-gray-500">·</td>
<td class="p-1 cell-patch text-gray-500">·</td> <td class="p-1 cell-patch text-gray-500">·</td>
<td class="p-1 cell-recon text-gray-500">·</td>
<td class="p-1 cell-post text-gray-500">·</td> <td class="p-1 cell-post text-gray-500">·</td>
</tr> </tr>
{% else %} {% else %}
<tr><td colspan="16" class="p-2 text-gray-500">Aucune ligne éligible.</td></tr> <tr><td colspan="17" class="p-2 text-gray-500">Aucune ligne éligible.</td></tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
</table> </table>
@ -116,8 +118,14 @@
<button id="btn-step3" class="btn-sm bg-cyber-blue/20 text-cyber-blue px-4 py-2 text-xs" disabled title="yum update -y : applique réellement les patchs"> <button id="btn-step3" class="btn-sm bg-cyber-blue/20 text-cyber-blue px-4 py-2 text-xs" disabled title="yum update -y : applique réellement les patchs">
→ 3c Patcher → 3c Patcher
</button> </button>
<button id="btn-post" class="btn-sm bg-cyber-blue/20 text-cyber-blue px-4 py-2 text-xs" disabled title="Compare services/ports avant/après patch (à lancer après reboot)"> <button id="btn-reboot" class="btn-sm bg-cyber-red/20 text-cyber-red px-4 py-2 text-xs" disabled title="shutdown -r +1 sur les serveurs patchés (double confirmation)">
→ 3d Post-cmp. → 3e Reboot
</button>
<button id="btn-recon" class="btn-sm bg-cyber-yellow/20 text-cyber-yellow px-4 py-2 text-xs" disabled title="Polle TCP/22 + SSH jusqu'à reconnexion">
→ 3f Wait reconn.
</button>
<button id="btn-post" class="btn-sm bg-cyber-blue/20 text-cyber-blue px-4 py-2 text-xs" disabled title="Compare services/ports avant/après patch (à lancer après reconnexion)">
→ 3g Post-cmp.
</button> </button>
</div> </div>
</div> </div>
@ -129,6 +137,8 @@
const btnDryrun = document.getElementById('btn-dryrun'); const btnDryrun = document.getElementById('btn-dryrun');
const btnPre = document.getElementById('btn-pre'); const btnPre = document.getElementById('btn-pre');
const btnStep3 = document.getElementById('btn-step3'); const btnStep3 = document.getElementById('btn-step3');
const btnReboot = document.getElementById('btn-reboot');
const btnRecon = document.getElementById('btn-recon');
const btnPost = document.getElementById('btn-post'); const btnPost = document.getElementById('btn-post');
const tbody = document.getElementById('check-tbody'); const tbody = document.getElementById('check-tbody');
const summary = document.getElementById('run-summary'); const summary = document.getElementById('run-summary');
@ -323,12 +333,15 @@
const snapOk = trs.filter(tr => tr._snapData && tr._snapData.ok); const snapOk = trs.filter(tr => tr._snapData && tr._snapData.ok);
const dryOk = trs.filter(tr => tr._dryData && tr._dryData.ok); const dryOk = trs.filter(tr => tr._dryData && tr._dryData.ok);
const preOk = trs.filter(tr => tr._preData && tr._preData.ok); const preOk = trs.filter(tr => tr._preData && tr._preData.ok);
const patchOk= trs.filter(tr => tr._patchData && tr._patchData.ok); const patchOk = trs.filter(tr => tr._patchData && tr._patchData.ok);
const recOk = trs.filter(tr => tr._reconData && tr._reconData.ok);
btnStep2.disabled = (ckOk.length === 0); btnStep2.disabled = (ckOk.length === 0);
btnDryrun.disabled = (snapOk.length === 0); btnDryrun.disabled = (snapOk.length === 0);
btnPre.disabled = (dryOk.length === 0); btnPre.disabled = (dryOk.length === 0);
btnStep3.disabled = (preOk.length === 0); btnStep3.disabled = (preOk.length === 0);
btnPost.disabled = (patchOk.length === 0); btnReboot.disabled = (patchOk.length === 0);
btnRecon.disabled = (patchOk.length === 0);
btnPost.disabled = (recOk.length === 0 && patchOk.length === 0);
} }
btnStep2.addEventListener('click', async () => { btnStep2.addEventListener('click', async () => {
@ -416,6 +429,73 @@
refreshStepButtons(); refreshStepButtons();
}); });
btnReboot.addEventListener('click', async () => {
const trs = Array.from(tbody.querySelectorAll('tr[data-row-id]'));
const targets = trs.filter(tr => tr._patchData && tr._patchData.ok);
if (!targets.length) { alert('Aucun serveur avec patch OK'); return; }
if (!confirm('⚠ REBOOT ⚠\n\nDéclencher `shutdown -r +1` sur ' + targets.length + ' serveur(s) ?\n(le reboot effectif a lieu dans 1 minute)')) return;
if (!confirm('Vraiment ? Liste des hôtes :\n' + targets.map(tr => tr.querySelector('td:nth-child(3)').textContent.trim()).join('\n') + '\n\nConfirmer le reboot ?')) return;
btnReboot.disabled = true;
let okCount = 0, koCount = 0;
for (const tr of targets) {
const cell = tr.querySelector('.cell-recon');
cell.innerHTML = '<span class="text-cyber-yellow">… reboot demandé</span>';
try {
const r = await fetch('/patching/iexec/reboot/' + tr.dataset.rowId, {method:'POST'});
const j = await r.json();
tr._rebootData = j;
if (j.ok) {
okCount++;
cell.innerHTML = '<span class="text-cyber-yellow">⏳ reboot dans 1min · ' + escapeHTML(j.started_at||'') + '</span>';
} else {
koCount++;
cell.innerHTML = '<span class="text-cyber-red" title="' + escapeHTML(j.detail||j.stderr||'') + '">✗ ' + escapeHTML((j.detail||'KO').slice(0,80)) + '</span>';
}
} catch(e) {
koCount++;
cell.innerHTML = '<span class="text-cyber-red">✗ erreur</span>';
}
}
summary.innerHTML += ' · Reboot : ✓ ' + okCount + ' / ✗ ' + koCount;
});
btnRecon.addEventListener('click', async () => {
const trs = Array.from(tbody.querySelectorAll('tr[data-row-id]'));
const targets = trs.filter(tr => tr._patchData && tr._patchData.ok);
if (!targets.length) { alert('Aucun serveur avec patch OK'); return; }
if (!confirm('Attendre la reconnexion (TCP/22 + SSH) sur ' + targets.length + ' serveur(s) ?\nPoll toutes les 10s, timeout 10 min par serveur.')) return;
btnRecon.disabled = true;
const startTs = Date.now();
// Pour chaque target, polling indépendant
await Promise.all(targets.map(async (tr) => {
const cell = tr.querySelector('.cell-recon');
const t0 = Date.now();
const TIMEOUT_MS = 10 * 60 * 1000; // 10 min
const POLL_MS = 10 * 1000;
cell.innerHTML = '<span class="text-cyber-yellow">⏳ poll TCP/22…</span>';
while (Date.now() - t0 < TIMEOUT_MS) {
await new Promise(r => setTimeout(r, POLL_MS));
try {
const resp = await fetch('/patching/iexec/reboot-status/' + tr.dataset.rowId);
const j = await resp.json();
if (j.tcp22 && j.ssh) {
const dur = Math.round((Date.now() - t0) / 1000);
tr._reconData = {ok: true, downtime_s: dur, uptime: j.uptime};
cell.innerHTML = '<span class="text-cyber-green">✓ revenu en ' + dur + 's</span>'
+ '<br><span class="text-[10px] text-gray-400" title="' + escapeHTML(j.uptime||'') + '">' + escapeHTML((j.uptime||'').slice(0,60)) + '</span>';
return;
}
cell.innerHTML = '<span class="text-cyber-yellow">⏳ '
+ (j.tcp22 ? 'TCP/22 OK · SSH KO' : 'pas joignable')
+ ' · ' + Math.round((Date.now()-t0)/1000) + 's</span>';
} catch(e) { /* ignore, retry */ }
}
tr._reconData = {ok: false};
cell.innerHTML = '<span class="text-cyber-red">✗ timeout 10 min</span>';
}));
refreshStepButtons();
});
btnPost.addEventListener('click', async () => { btnPost.addEventListener('click', async () => {
const trs = Array.from(tbody.querySelectorAll('tr[data-row-id]')); const trs = Array.from(tbody.querySelectorAll('tr[data-row-id]'));
const targets = trs.filter(tr => tr._patchData && tr._patchData.ok); const targets = trs.filter(tr => tr._patchData && tr._patchData.ok);