#!/usr/bin/env python3 """ Gateway Start Handler - Sends recovery notification when Gateway starts. Notifies only on each operation's origin channel. """ import sys import json import time import traceback from datetime import datetime, timezone from pathlib import Path from collections import defaultdict sys.path.insert(0, str(Path(__file__).parent)) def log(msg, level="INFO"): timestamp = datetime.now(timezone.utc).isoformat() line = f"[{timestamp}] [{level}] {msg}\n" try: print(line, flush=True) except: pass try: with open("/tmp/gateway_start_handler.log", "a") as f: f.write(line) except: pass def calculate_downtime(stopped_at: str) -> str: try: stopped = datetime.fromisoformat(stopped_at) now = datetime.now(timezone.utc) delta = now - stopped hours, remainder = divmod(int(delta.total_seconds()), 3600) minutes, seconds = divmod(remainder, 60) if hours > 0: return f"{hours}h {minutes}m" elif minutes > 0: return f"{minutes}m {seconds}s" else: return f"{seconds}s" except Exception: return "unknown" def main(): log("=== Gateway Start Handler Started ===") try: from notify import send_notification, send_resume_command log("notify module imported successfully") except Exception as e: log(f"Failed to import notify: {e}", "ERROR") return 0 try: from state_manager import OperationState, RESUME_TIMEOUT_SECONDS log("state_manager imported successfully") except Exception as e: log(f"Failed to import state_manager: {e}", "ERROR") return 0 # Check for restart marker marker_file = Path("/tmp/gateway_was_stopped") was_restart = marker_file.exists() marker_data = {} if was_restart: try: with open(marker_file) as f: marker_data = json.load(f) stopped_at = marker_data.get("stopped_at", datetime.now(timezone.utc).isoformat()) downtime = calculate_downtime(stopped_at) log(f"Detected restart, downtime: {downtime}") except Exception as e: log(f"Failed to read marker: {e}", "ERROR") was_restart = False downtime = "unknown" else: downtime = "N/A (fresh boot)" log("Fresh start (no marker found)") # Get interrupted operations interrupted = OperationState.list_interrupted() skipped_resuming = 0 # Count operations skipped due to recent resume for filepath in Path("/home/ubuntu/.openclaw/workspace/state").glob("operation-*.json"): try: with open(filepath) as f: data = json.load(f) if data.get("status") == "resuming" and not data.get("user_cancelled", False): resumed_at_str = data.get("metadata", {}).get("resumed_at") if resumed_at_str: resumed_at = datetime.fromisoformat(resumed_at_str) elapsed = (datetime.now(timezone.utc) - resumed_at).total_seconds() if elapsed < RESUME_TIMEOUT_SECONDS: skipped_resuming += 1 except: pass log(f"Found {len(interrupted)} operation(s) to resume") if skipped_resuming > 0: log(f"Skipped {skipped_resuming} operation(s) - recently resumed (within {RESUME_TIMEOUT_SECONDS}s)") # Group by origin channel by_channel = defaultdict(list) for op in interrupted: ch = op.get('channel') if ch: by_channel[ch].append(op) # Send notifications per channel for channel, ops in by_channel.items(): try: message = f"🟢 **Gateway STARTED**\n\n" if was_restart: message += f"⏱️ Downtime: {downtime}\n" else: message += f"🚀 Fresh start after system boot\n" message += f"📋 Resuming {len(ops)} operation(s):\n\n" for op in ops: progress = op.get('progress', {}) message += f"**{op.get('agent_name', 'unknown')}**: {op.get('description', 'No description')}\n" message += f"└ Step {progress.get('step', '?')}/{progress.get('of', '?')} — {progress.get('current', 'Unknown')}\n\n" # Use appropriate mention for the channel if channel.startswith("telegram:"): mention = "Julian" else: mention = ops[0].get('notify_user', 'taro83') send_notification(channel, message, mention=mention) log(f"Sent start notification to {channel} for {len(ops)} operation(s)") except Exception as e: log(f"Failed to send to {channel}: {e}", "ERROR") # Send resume commands per channel for channel, ops in by_channel.items(): time.sleep(1) for op in ops: try: resume_msg = f"🔄 **Resuming**: {op.get('description', 'Operation')}\n\n{op.get('resume_prompt', 'Continue work')}\n\n[State ID: `{op.get('operation_id', 'unknown')}`]" if channel.startswith("telegram:"): mention = "Julian" else: mention = op.get('notify_user', 'taro83') send_resume_command(channel, resume_msg, mention) log(f"Sent resume for {op.get('operation_id', 'unknown')} to {channel}") # Update state state = OperationState.load(op.get('operation_id')) if state: state.status = 'resuming' state.metadata['resumed_at'] = datetime.now(timezone.utc).isoformat() state.metadata['downtime'] = downtime if was_restart else None state.save() except Exception as e: log(f"Failed to resume {op.get('operation_id', 'unknown')}: {e}", "ERROR") # Clean up marker if was_restart: try: marker_file.unlink(missing_ok=True) log("Stop marker cleaned up") except Exception as e: log(f"Failed to clean up marker: {e}", "ERROR") log("=== Gateway Start Handler Complete ===") return 0 if __name__ == "__main__": sys.exit(main())