#!/bin/bash
# Watchdog - checks if the agent is alive and responsive
# Run via cron every 10 minutes:
# */10 * * * * /path/to/your/agent/watchdog.sh

WORKING_DIR="$(cd "$(dirname "$0")" && pwd)"
NODE_BIN="$(which node)"

HEARTBEAT="$WORKING_DIR/.heartbeat"
LOGFILE="$WORKING_DIR/watchdog.log"

log() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" >> "$LOGFILE"
}

# Check if agent process is running
AGENT_PID=$(pgrep -f "node.*agent.mjs" | head -1)

if [ -z "$AGENT_PID" ]; then
    log "ALERT: No agent process found. Starting fresh instance."

    cd "$WORKING_DIR"
    nohup "$NODE_BIN" agent.mjs >> "$WORKING_DIR/agent.log" 2>&1 &

    log "Started new agent instance (PID: $!)"
    exit 0
fi

# Agent is running - check if heartbeat is fresh
if [ ! -f "$HEARTBEAT" ]; then
    log "WARNING: No heartbeat file found. Creating one. Will check again next run."
    touch "$HEARTBEAT"
    exit 0
fi

# Check heartbeat age (macOS uses stat -f %m, Linux uses stat -c %Y)
if [[ "$(uname)" == "Darwin" ]]; then
    HEARTBEAT_MOD=$(stat -f %m "$HEARTBEAT")
else
    HEARTBEAT_MOD=$(stat -c %Y "$HEARTBEAT")
fi

NOW=$(date +%s)
HEARTBEAT_AGE=$(( NOW - HEARTBEAT_MOD ))
MAX_AGE=600  # 10 minutes

if [ "$HEARTBEAT_AGE" -gt "$MAX_AGE" ]; then
    log "ALERT: Heartbeat is ${HEARTBEAT_AGE}s old (max ${MAX_AGE}s). Agent appears frozen."
    log "Killing stale agent process: $AGENT_PID"

    kill "$AGENT_PID" 2>/dev/null
    sleep 3
    kill -9 "$AGENT_PID" 2>/dev/null
    sleep 2

    cd "$WORKING_DIR"
    nohup "$NODE_BIN" agent.mjs >> "$WORKING_DIR/agent.log" 2>&1 &

    log "Started fresh agent instance (PID: $!)"
else
    log "OK: Heartbeat is ${HEARTBEAT_AGE}s old. Agent is alive."
fi
