Diagnostics Messages

HORUS provides message types for system monitoring, health checks, error reporting, and general diagnostics.

Heartbeat

Periodic signal indicating a node is alive and operational.

use horus::prelude::*; // Provides diagnostics::Heartbeat;

// Create heartbeat
let mut heartbeat = Heartbeat::new("MotorController", 1);

// Update for each heartbeat cycle
heartbeat.update(120.5);  // 120.5 seconds uptime

println!("Node: {}", heartbeat.name());
println!("Sequence: {}", heartbeat.sequence);
println!("Uptime: {:.1}s", heartbeat.uptime);
println!("Alive: {}", heartbeat.alive);

Fields:

FieldTypeDescription
node_name[u8; 32]Node name (null-terminated)
node_idu32Node identifier
sequenceu64Heartbeat sequence number
aliveu8Node is responding (0 = dead, 1 = alive)
uptimef64Time since startup (seconds)
timestamp_nsu64Nanoseconds since epoch

DiagnosticStatus

General-purpose status reporting.

use horus::prelude::*; // Provides DiagnosticStatus, StatusLevel

// Create status messages
let ok = DiagnosticStatus::ok("System initialized successfully");

let warning = DiagnosticStatus::warn(1001, "Battery level low")
    .with_component("PowerManager");

let error = DiagnosticStatus::error(2001, "Sensor communication timeout")
    .with_component("SensorHub");

let fatal = DiagnosticStatus::fatal(9001, "Motor driver fault - emergency stop")
    .with_component("MotorController");

// Access status info
println!("[{:?}] {}: {}",
    error.level,
    error.component_str(),
    error.message_str());

StatusLevel values:

LevelValueDescription
Ok0Everything is OK
Warn1Warning condition
Error2Error (recoverable)
Fatal3Fatal error (system should stop)

Fields:

FieldTypeDescription
levelu8Severity level (use StatusLevel as u8 to set)
codeu32Component-specific error code
message[u8; 128]Human-readable message
component[u8; 32]Reporting component name
timestamp_nsu64Nanoseconds since epoch

EmergencyStop

Critical safety message to immediately stop all robot motion.

use horus::prelude::*; // Provides diagnostics::EmergencyStop;

// Engage emergency stop
let estop = EmergencyStop::engage("Obstacle detected in safety zone")
    .with_source("SafetyController");

println!("E-STOP engaged: {}", estop.engaged);
println!("Reason: {}", estop.reason_str());

// Release emergency stop
let release = EmergencyStop::release();

// Allow auto-reset
let mut estop_auto = EmergencyStop::engage("Soft limit exceeded");
estop_auto.auto_reset = 1;

Fields:

FieldTypeDescription
engagedu8Emergency stop is active (0 = off, 1 = on)
reason[u8; 64]Stop reason
source[u8; 32]Triggering source
auto_resetu8Can auto-reset after clearing (0 = no, 1 = yes)
timestamp_nsu64Nanoseconds since epoch

ResourceUsage

System resource utilization.

use horus::prelude::*; // Provides diagnostics::ResourceUsage;

let mut usage = ResourceUsage::new();
usage.cpu_percent = 45.5;
usage.memory_bytes = 1024 * 1024 * 512;  // 512MB
usage.memory_percent = 25.0;
usage.temperature = 65.5;
usage.thread_count = 12;

// Check thresholds
if usage.is_cpu_high(80.0) {
    println!("Warning: High CPU usage");
}

if usage.is_memory_high(90.0) {
    println!("Warning: High memory usage");
}

if usage.is_temperature_high(80.0) {
    println!("Warning: High temperature");
}

println!("CPU: {:.1}%, Memory: {:.1}%, Temp: {:.1}C",
    usage.cpu_percent, usage.memory_percent, usage.temperature);

Fields:

FieldTypeDescription
cpu_percentf32CPU usage (0-100)
memory_bytesu64Memory usage in bytes
memory_percentf32Memory usage (0-100)
disk_bytesu64Disk usage in bytes
disk_percentf32Disk usage (0-100)
network_tx_bytesu64Network bytes sent
network_rx_bytesu64Network bytes received
temperaturef32System temperature (Celsius)
thread_countu32Active thread count
timestamp_nsu64Nanoseconds since epoch

DiagnosticValue

Key-value pair for diagnostic reports.

use horus::prelude::*; // Provides diagnostics::DiagnosticValue;

// Create different value types
let string_val = DiagnosticValue::string("firmware_version", "1.2.3");
let int_val = DiagnosticValue::int("error_count", 42);
let float_val = DiagnosticValue::float("temperature", 65.5);
let bool_val = DiagnosticValue::bool("calibrated", true);

Value Type Constants:

ConstantValueDescription
TYPE_STRING0String value
TYPE_INT1Integer value
TYPE_FLOAT2Float value
TYPE_BOOL3Boolean value

Fields:

FieldTypeDescription
key[u8; 32]Key name
value[u8; 64]Value as string
value_typeu8Value type hint

DiagnosticReport

Diagnostic report with multiple key-value pairs (up to 16).

use horus::prelude::*; // Provides diagnostics::{DiagnosticReport, StatusLevel};

let mut report = DiagnosticReport::new("MotorController");

// Add diagnostic values
report.add_string("firmware", "2.1.0")?;
report.add_int("tick_count", 15000)?;
report.add_float("voltage", 24.5)?;
report.add_bool("calibrated", true)?;

// Set overall status
report.set_level(StatusLevel::Ok);

println!("Report has {} values at level {}",
    report.value_count, report.level);

Fields:

FieldTypeDescription
component[u8; 32]Component name
values[DiagnosticValue; 16]Diagnostic values
value_countu8Number of valid values
levelu8Overall status level (use StatusLevel as u8 to set)
timestamp_nsu64Nanoseconds since epoch

NodeState

Node execution state enumeration.

use horus_library::messages::diagnostics::NodeState;
// Note: The prelude's NodeState is the core scheduler version.
// For the POD message version, import from diagnostics directly.

let state = NodeState::Running;
println!("State: {}", state.as_str());  // "Running"

NodeState values:

StateValueDescription
Idle0Created but not started
Initializing1Running initialization
Running2Active and executing
Paused3Temporarily suspended
Stopped4Cleanly shut down
Error5Error/crashed state

HealthStatus

Node operational health status.

use horus::prelude::*; // Provides diagnostics::HealthStatus;

let health = HealthStatus::Healthy;
println!("Health: {} ({})", health.as_str(), health.color());

// Color codes for monitor display
// Healthy -> "green"
// Warning -> "yellow"
// Error -> "orange"
// Critical -> "red"
// Unknown -> "gray"

HealthStatus values:

StatusValueDescription
Healthy0Operating normally
Warning1Degraded performance
Error2Errors but running
Critical3Fatal errors
Unknown4No heartbeat received

NodeHeartbeat

Node status heartbeat with health information (written to shared memory).

use horus::prelude::*; // Provides NodeHeartbeat, HealthStatus
use horus_library::messages::diagnostics::NodeState; // POD version (distinct from core NodeState)

// Create heartbeat
let mut heartbeat = NodeHeartbeat::new(NodeState::Running, HealthStatus::Healthy);
heartbeat.tick_count = 15000;
heartbeat.target_rate = 100;
heartbeat.actual_rate = 98;
heartbeat.error_count = 0;

// Update timestamp
heartbeat.update_timestamp();

// Check freshness (within last 5 seconds)
if heartbeat.is_fresh(5) {
    println!("Node is alive");
}

// Serialize for file writing
let bytes = heartbeat.to_bytes();

// Deserialize from file
if let Some(hb) = NodeHeartbeat::from_bytes(&bytes) {
    println!("Tick rate: {}/{} Hz",
        hb.actual_rate, hb.target_rate);
}

Fields:

FieldTypeDescription
stateu8Execution state (use NodeState as u8 to set)
healthu8Health status (use HealthStatus as u8 to set)
tick_countu64Total tick count
target_rateu32Target tick rate
actual_rateu32Measured tick rate
error_countu32Error count
last_tick_timestampu64Last tick time (unix epoch seconds)
heartbeat_timestampu64Heartbeat time (unix epoch seconds)

SafetyStatus

Safety system status.

use horus::prelude::*; // Provides diagnostics::SafetyStatus;

let mut safety = SafetyStatus::new();
// SafetyStatus::new() sets good defaults (enabled=1, watchdog=1, limits=1, comms=1)
// Override only if needed:
safety.estop_engaged = 0;

// Check if safe to operate
if safety.is_safe() {
    println!("System is safe to operate");
} else {
    println!("Safety interlock active - fault code: {}", safety.fault_code);
}

// Set fault condition
safety.set_fault(1001);
println!("Mode: {}", match safety.mode {
    SafetyStatus::MODE_NORMAL => "Normal",
    SafetyStatus::MODE_REDUCED => "Reduced",
    SafetyStatus::MODE_SAFE_STOP => "Safe Stop",
    _ => "Unknown"
});

// Clear faults
safety.clear_faults();

Mode Constants:

ConstantValueDescription
MODE_NORMAL0Normal operation
MODE_REDUCED1Reduced speed/power
MODE_SAFE_STOP2Safe stop engaged

Fields:

FieldTypeDescription
enabledu8Safety system active (0 = off, 1 = on)
estop_engagedu8Emergency stop engaged (0 = no, 1 = yes)
watchdog_oku8Watchdog timer OK (0 = fault, 1 = ok)
limits_oku8All limits within bounds (0 = fault, 1 = ok)
comms_oku8Communication healthy (0 = fault, 1 = ok)
modeu8Safety mode
fault_codeu32Fault code (0 = none)
timestamp_nsu64Nanoseconds since epoch

Diagnostics Node Example

use horus::prelude::*;

struct DiagnosticsNode {
    status_pub: Topic<DiagnosticStatus>,
    resource_pub: Topic<ResourceUsage>,
    safety_sub: Topic<SafetyStatus>,
    estop_pub: Topic<EmergencyStop>,
    tick_count: u64,
    start_time: std::time::Instant,
}

impl Node for DiagnosticsNode {
    fn name(&self) -> &str { "Diagnostics" }

    fn tick(&mut self) {
        self.tick_count += 1;

        // Check safety status
        if let Some(safety) = self.safety_sub.recv() {
            if !safety.is_safe() {
                // Trigger emergency stop
                let estop = EmergencyStop::engage(&format!(
                    "Safety fault code: {}", safety.fault_code
                )).with_source("DiagnosticsNode");
                self.estop_pub.send(estop);

                // Send error status
                let status = DiagnosticStatus::error(safety.fault_code, "Safety system fault")
                    .with_component("SafetyMonitor");
                self.status_pub.send(status);
            }
        }

        // Periodic resource reporting (every 100 ticks)
        if self.tick_count % 100 == 0 {
            let mut usage = ResourceUsage::new();
            // ... populate with actual system metrics ...

            // Check thresholds
            if usage.is_cpu_high(90.0) {
                let status = DiagnosticStatus::warn(1001, "CPU usage above 90%")
                    .with_component("ResourceMonitor");
                self.status_pub.send(status);
            }

            self.resource_pub.send(usage);
        }

        // Periodic OK status (every 1000 ticks)
        if self.tick_count % 1000 == 0 {
            let uptime = self.start_time.elapsed().as_secs_f64();
            let status = DiagnosticStatus::ok(&format!("System healthy, uptime: {:.0}s", uptime))
                .with_component("DiagnosticsNode");
            self.status_pub.send(status);
        }
    }
}

See Also