Skip to content

Commit

Permalink
Added code to report time for the llp, mlp, & hlp (nasa#269)
Browse files Browse the repository at this point in the history
The system monitor now checks for the imu aug and guest science manager
heartbeats. On receiving one of these heartbeats, it compares the time
in the heartbeat to its current time. The difference, heartbeat
timestamp, and current time are then published. The system monitor
also logs the timestamps from the imu aug and guest science manager
on receiving the first heartbeat from these nodes.
  • Loading branch information
kbrowne15 authored Aug 30, 2021
1 parent 8de2e2c commit 27d723d
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 40 deletions.
2 changes: 1 addition & 1 deletion astrobee/config/management/data_bagger.config
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ default_topics = {{topic="gnc/ctl/traj", downlink="immediate", frequency=-1},
{topic="mob/state", downlink="immediate", frequency=-1},
{topic="command", downlink="immediate", frequency=-1},
{topic="mgt/ack", downlink="immediate", frequency=-1},
{topic="mgt/sys_monitor/time_diff", downlink="immediate", frequency=-1},
{topic="mgt/sys_monitor/time_sync", downlink="immediate", frequency=-1},
{topic="gs/gs_manager/ack", downlink="immediate", frequency=-1},
{topic="gs/gs_manager/config", downlink="immediate", frequency=-1},
{topic="gs/gs_manager/state", downlink="immediate", frequency=-1},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,11 @@
# Header with timestamp
std_msgs/Header header

# Time diff in seconds between llp and mlp
float32 time_diff_sec
# Processor that the heartbeat came from
string remote_processor

# Current time on the mlp
time mlp_time

# Time in the incoming heartbeat
time remote_time
5 changes: 3 additions & 2 deletions management/sys_monitor/include/sys_monitor/sys_monitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include <ff_msgs/FaultConfig.h>
#include <ff_msgs/FaultInfo.h>
#include <ff_msgs/FaultState.h>
#include <ff_msgs/TimeDiffStamped.h>
#include <ff_msgs/TimeSyncStamped.h>
#include <ff_msgs/UnloadLoadNodelet.h>
#include <ff_util/ff_faults.h>
#include <ff_util/ff_names.h>
Expand Down Expand Up @@ -195,7 +195,7 @@ class SysMonitor : public ff_util::FreeFlyerNodelet {
ros::NodeHandle nh_;
ros::Publisher pub_cmd_, pub_heartbeat_;
ros::Publisher pub_fault_config_, pub_fault_state_;
ros::Publisher pub_time_diff_;
ros::Publisher pub_time_sync_;
ros::Timer reload_params_timer_, startup_timer_, reload_nodelet_timer_;
ros::Timer heartbeat_timer_;
ros::ServiceServer unload_load_nodelet_service_;
Expand All @@ -213,6 +213,7 @@ class SysMonitor : public ff_util::FreeFlyerNodelet {
config_reader::ConfigReader config_params_;

bool time_diff_fault_triggered_;
bool log_time_llp_, log_time_hlp_;
int pub_queue_size_, sub_queue_size_;
int num_current_blocking_fault_;
unsigned int startup_time_, reload_nodelet_timeout_, heartbeat_pub_rate_;
Expand Down
101 changes: 67 additions & 34 deletions management/sys_monitor/src/sys_monitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ SysMonitor::SysMonitor() :
ff_util::FreeFlyerNodelet(NODE_SYS_MONITOR, false),
time_diff_node_("imu_aug"),
time_diff_fault_triggered_(false),
log_time_llp_(true),
log_time_hlp_(true),
pub_queue_size_(10),
sub_queue_size_(100),
time_drift_thres_sec_(0.25) {
Expand Down Expand Up @@ -163,35 +165,33 @@ void SysMonitor::HeartbeatCallback(ff_msgs::HeartbeatConstPtr const& hb) {
uint i = 0, j = 0, tmp_id;
bool fault_found = true;

// Check to see if node heartbeat is set up in watchdogs
if (watch_dogs_.count(hb->node) > 0) {
WatchdogPtr wd = watch_dogs_.at(hb->node);
wd->ResetTimer();
// Check if the manager in the heartbeat matches the manager in the config.
// If not, replace the manager with what is in the heartbeat since that is
// more accurate.
if (wd->nodelet_manager() != hb->nodelet_manager) {
wd->nodelet_manager(hb->nodelet_manager);
}

// Check to see if node restarted publishing its heartbeat
if (wd->hb_fault_occurring()) {
wd->hb_fault_occurring(false);
RemoveFault(wd->fault_id());
}
// Report time if the heartbeat came from the imu aug or guest science manager
// node.
if (hb->node == time_diff_node_ || hb->node == "guest_science_manager") {
ff_msgs::TimeSyncStamped time_sync_msg;
ros::Time time_now = ros::Time::now();

// Check time drift, use time in imu_aug heartbeat
// Check time drift from llp, use time in imu_aug heartbeat
if (hb->node == time_diff_node_) {
float time_diff_sec = (ros::Time::now() - hb->header.stamp).toSec();
PublishTimeDiff(time_diff_sec);
// Check if time difference is great than threshold. If it is, trigger
// fault
time_sync_msg.remote_processor = "LLP";

// Output time difference to the ros log for the first imu aug heartbeat
if (log_time_llp_) {
NODELET_INFO_STREAM("LLP time is " << hb->header.stamp <<
" and the MLP time is " << time_now << ".");
log_time_llp_ = false;
}

// Check for time drift. If time difference is great than the threshold,
// trigger fault
// This fault only applies to clock skew between the MLP and the LLP since
// this skew causes navigation failures.
float time_diff_sec = (time_now - hb->header.stamp).toSec();
if (abs(time_diff_sec) > time_drift_thres_sec_) {
if (!time_diff_fault_triggered_) {
std::string key = ff_util::fault_keys[ff_util::TIME_DIFF_TOO_HIGH];
unsigned int id = faults_[key];
AddFault(id, ("Time diff is: " +
std::to_string(abs(time_diff_sec))));
AddFault(id, ("Time diff is: " + std::to_string(abs(time_diff_sec))));
PublishFaultResponse(id);
time_diff_fault_triggered_ = true;
}
Expand All @@ -203,6 +203,38 @@ void SysMonitor::HeartbeatCallback(ff_msgs::HeartbeatConstPtr const& hb) {
time_diff_fault_triggered_ = false;
}
}
} else {
time_sync_msg.remote_processor = "HLP";
// Output time difference to the ros log for the first guest science
// manager heartbeat
if (log_time_hlp_) {
NODELET_INFO_STREAM("HLP time is " << hb->header.stamp <<
" and the MLP time is "<< time_now << ".");
log_time_hlp_ = false;
}
}

time_sync_msg.header.stamp = ros::Time::now();
time_sync_msg.mlp_time = time_now;
time_sync_msg.remote_time = hb->header.stamp;
pub_time_sync_.publish(time_sync_msg);
}

// Check to see if node heartbeat is set up in watchdogs
if (watch_dogs_.count(hb->node) > 0) {
WatchdogPtr wd = watch_dogs_.at(hb->node);
wd->ResetTimer();
// Check if the manager in the heartbeat matches the manager in the config.
// If not, replace the manager with what is in the heartbeat since that is
// more accurate.
if (wd->nodelet_manager() != hb->nodelet_manager) {
wd->nodelet_manager(hb->nodelet_manager);
}

// Check to see if node restarted publishing its heartbeat
if (wd->hb_fault_occurring()) {
wd->hb_fault_occurring(false);
RemoveFault(wd->fault_id());
}

// Get last heartbeat for fault comparison
Expand All @@ -215,10 +247,10 @@ void SysMonitor::HeartbeatCallback(ff_msgs::HeartbeatConstPtr const& hb) {
for (i = 0; i < previous_hb->faults.size(); i++) {
RemoveFault(previous_hb->faults[i].id);
}
// Set previous hb to null so that we don't compare the faults with the
// last time the nodelet was run
previous_hb = NULL;
}
// Set previous hb to null so that we don't compare the faults with the last
// time the nodelet was run

// Check to see if this is the first heartbeat from the node
if (!previous_hb) {
Expand Down Expand Up @@ -372,8 +404,8 @@ void SysMonitor::Initialize(ros::NodeHandle *nh) {
pub_queue_size_,
true);

pub_time_diff_ = nh_.advertise<ff_msgs::TimeDiffStamped>(
TOPIC_MANAGEMENT_SYS_MONITOR_TIME_DIFF,
pub_time_sync_ = nh_.advertise<ff_msgs::TimeSyncStamped>(
TOPIC_MANAGEMENT_SYS_MONITOR_TIME_SYNC,
pub_queue_size_,
false);

Expand All @@ -394,6 +426,14 @@ void SysMonitor::Initialize(ros::NodeHandle *nh) {
num_current_blocking_fault_ = 0;

OutputFaultTables();

// Add guest science manager to the list of unwatched heartbeats. The guest
// science manager heartbeat is only used to report the time on the HLP.
// It doesn't make sense to monitor the guest science heartbeat and trigger a
// fault if it is missing because the guest science manager doesn't start up
// with the flight software, isn't run all the time, and cannot be un/loaded
// like the nodes that run on the MLP and LLP.
unwatched_heartbeats_.push_back("guest_science_manager");
}

// Function used for debugging purposes only
Expand Down Expand Up @@ -517,13 +557,6 @@ void SysMonitor::PublishHeartbeat(bool initialization_fault) {
pub_heartbeat_.publish(heartbeat_);
}

void SysMonitor::PublishTimeDiff(float time_diff_sec) {
ff_msgs::TimeDiffStamped time_diff_msg;
time_diff_msg.header.stamp = ros::Time::now();
time_diff_msg.time_diff_sec = time_diff_sec;
pub_time_diff_.publish(time_diff_msg);
}

void SysMonitor::StartupTimerCallback(ros::TimerEvent const& te) {
ff_msgs::UnloadLoadNodelet::Request load_req;
int load_result;
Expand Down
2 changes: 1 addition & 1 deletion shared/ff_util/include/ff_util/ff_names.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@
#define TOPIC_MANAGEMENT_SYS_MONITOR_CONFIG "mgt/sys_monitor/config"
#define TOPIC_MANAGEMENT_SYS_MONITOR_STATE "mgt/sys_monitor/state"
#define TOPIC_MANAGEMENT_SYS_MONITOR_HEARTBEAT "mgt/sys_monitor/heartbeat"
#define TOPIC_MANAGEMENT_SYS_MONITOR_TIME_DIFF "mgt/sys_monitor/time_diff"
#define TOPIC_MANAGEMENT_SYS_MONITOR_TIME_SYNC "mgt/sys_monitor/time_sync"
#define TOPIC_MANAGEMENT_DATA_BAGGER_STATE "mgt/data_bagger/state"
#define TOPIC_MANAGEMENT_DATA_BAGGER_TOPICS "mgt/data_bagger/topics"
#define TOPIC_MANAGEMENT_CAMERA_STATE "mgt/camera_state"
Expand Down

0 comments on commit 27d723d

Please sign in to comment.