Skip to content

Commit

Permalink
implemented availability calculation with Monitoring::Availability
Browse files Browse the repository at this point in the history
  • Loading branch information
sni committed Jan 23, 2010
1 parent 4f13765 commit 20bbc2b
Show file tree
Hide file tree
Showing 12 changed files with 101 additions and 90 deletions.
2 changes: 1 addition & 1 deletion Makefile.PL
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ requires 'Moose';
requires 'MooseX::MethodAttributes::Inheritable';
requires 'MooseX::Role::WithOverloading';
requires 'Monitoring::Livestatus' => '0.34';
requires 'Monitoring::Availability' => '0.04';
requires 'Monitoring::Availability' => '0.06';
requires 'HTTP::Body';
requires 'LWP';
requires 'Config::Any';
Expand Down
27 changes: 18 additions & 9 deletions docs/development/TODO
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ config:
- add missing values

avail:
- missing completly
- remove duplicates from timeperiods/groups list
- CSV output
- trends graph

trends:
- missing completly
Expand All @@ -31,14 +31,23 @@ histogram:

livestatus:
- StatsSum: ( needed by tac and extinfo 4 )
- Contacts: Service Notification Options, Host Notification Options, Retention Options, Service Notification Commands, Host Notification Commands
- Hosts: Freshness Threshold, Contact Groups, Notification Options, Event Handler, Stalking Options, Flap Detection Options, Enable Failure Prediction, Retention Options
- Services: Parallelize, Volatile, Check Freshness, Freshness Threshold, Contact Groups, Notification Options, Stalking Options, Flap Detection Options, Enable Failure Prediction, Failure Prediction Options, Retention Options
- Contacts: Service Notification Options, Host Notification Options,
Retention Options, Service Notification Commands,
Host Notification Commands
- Hosts: Freshness Threshold, Contact Groups, Notification Options,
Event Handler, Stalking Options, Flap Detection Options,
Enable Failure Prediction, Retention Options
- Services: Parallelize, Volatile, Check Freshness, Freshness Threshold,
Contact Groups, Notification Options, Stalking Options,
Flap Detection Options, Enable Failure Prediction,
Failure Prediction Options, Retention Options
- Timeperiods:
Monday - Sunday, Exceptions
- Tables:
- Host Dependencies
- Host Escalations
- Service Dependencies
- Service Escalations
Host Dependencies
Host Escalations
Service Dependencies
Service Escalations

=========================================================
after 1.0
Expand Down
73 changes: 63 additions & 10 deletions lib/Thruk/Controller/avail.pm
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package Thruk::Controller::avail;
use strict;
use warnings;
use Data::Dumper;
use Monitoring::Availability;
use parent 'Catalyst::Controller';

=head1 NAME
Expand Down Expand Up @@ -35,7 +36,7 @@ sub index :Path :Args(0) :MyAction('AddDefaults') {
# lookup parameters
my $report_type = $c->{'request'}->{'parameters'}->{'report_type'} || '';
my $get_date_parts = $c->{'request'}->{'parameters'}->{'get_date_parts'};
my $timeperiod = $c->{'request'}->{'parameters'}->{'timeperiod'} || '';
my $timeperiod = $c->{'request'}->{'parameters'}->{'timeperiod'};
my $host = $c->{'request'}->{'parameters'}->{'host'} || '';
my $hostgroup = $c->{'request'}->{'parameters'}->{'hostgroup'} || '';
my $service = $c->{'request'}->{'parameters'}->{'service'} || '';
Expand Down Expand Up @@ -197,6 +198,7 @@ sub _create_report {
$c->stash->{template} = 'avail_report_hosts.tt';
}
else {
$c->log->debug("unknown report type");
return;
}

Expand All @@ -217,6 +219,7 @@ sub _create_report {
my $t1 = $c->{'request'}->{'parameters'}->{'t1'};
my $t2 = $c->{'request'}->{'parameters'}->{'t2'};

$timeperiod = 'last24hours' if(!defined $timeperiod and !defined $t1 and !defined $t2);
my($start,$end) = Thruk::Utils::get_start_end_for_timeperiod($timeperiod,$smon,$sday,$syear,$shour,$smin,$ssec,$emon,$eday,$eyear,$ehour,$emin,$esec,$t1,$t2);

$c->log->debug("start: ".$start." - ".(scalar localtime($start)));
Expand Down Expand Up @@ -248,6 +251,24 @@ sub _create_report {
$backtrack = 4 unless defined $backtrack;
$backtrack = 4 if $backtrack < 0;

$assumeinitialstates = 'no' unless $assumeinitialstates eq 'yes';
$assumestateretention = 'no' unless $assumestateretention eq 'yes';
$assumestatesduringnotrunning = 'no' unless $assumestatesduringnotrunning eq 'yes';
$includesoftstates = 'no' unless $includesoftstates eq 'yes';

$initialassumedhoststate = 0 unless $initialassumedhoststate == 0 # Unspecified
or $initialassumedhoststate == -1 # Current State
or $initialassumedhoststate == 3 # Host Up
or $initialassumedhoststate == 4 # Host Down
or $initialassumedhoststate == 5; # Host Unreachable

$initialassumedservicestate = 0 unless $initialassumedservicestate == 0 # Unspecified
or $initialassumedservicestate == -1 # Current State
or $initialassumedservicestate == 6 # Service Ok
or $initialassumedservicestate == 8 # Service Warning
or $initialassumedservicestate == 7 # Service Unknown
or $initialassumedservicestate == 9; # Service Critical

$c->stash->{rpttimeperiod} = $rpttimeperiod;
$c->stash->{assumeinitialstates} = $assumeinitialstates;
$c->stash->{assumestateretention} = $assumestateretention;
Expand Down Expand Up @@ -458,31 +479,27 @@ sub _create_report {
$logs = $c->{'live'}->selectall_arrayref($log_query, { Slice => 1} );
$c->stats->profile(end => "avail.pm fetchlogs");
$logs = Thruk::Utils::sort($c, $logs, 'time', 'ASC');
$c->stash->{'logs'} = $logs;
#$Data::Dumper::Indent = 1;
#open(FH, '>', '/tmp/logs.txt') or die("cannot open logs.txt: $!");
#print FH Dumper($logs);
##print FH Dumper($logs);
#for my $line (@{$logs}) {
# print FH '['.$line->{'time'}.'] '.$line->{'type'};
# print FH ': '.$line->{'options'} if(defined $line->{'options'} and $line->{'options'} ne '');
# print FH "\n";
#}
#close(FH);
use Monitoring::Availability;
$c->stats->profile(begin => "calculate availability");
my $ma = Monitoring::Availability->new(
'rpttimeperiod' => $rpttimeperiod,
'assumeinitialstates' => $assumeinitialstates,
'assumestateretention' => $assumestateretention,
'assumestatesduringnotrunning' => $assumestatesduringnotrunning,
'includesoftstates' => $includesoftstates,
'initialassumedhoststate' => $initialassumedhoststate,
'initialassumedservicestate' => $initialassumedservicestate,
'initialassumedhoststate' => $self->_initialassumedhoststate_to_state($initialassumedhoststate),
'initialassumedservicestate' => $self->_initialassumedservicestate_to_state($initialassumedservicestate),
'backtrack' => $backtrack,
#'verbose' => 1,
#'logger' => $c->log,
# 'verbose' => 1,
# 'logger' => $c->log,
);
$c->stash->{avail_data} = $ma->calculate(
'start' => $start,
Expand All @@ -494,13 +511,49 @@ sub _create_report {
#$c->log->info(Dumper($c->stash->{avail_data}));
$c->stats->profile(end => "calculate availability");
if($full_log_entries) {
$c->stash->{'logs'} = $ma->get_full_logs();
#$c->log->debug("got full logs: ".Dumper($c->stash->{'logs'}));
}
elsif($show_log_entries) {
$c->stash->{'logs'} = $ma->get_condensed_logs();
#$c->log->debug("got condensed logs: ".Dumper($c->stash->{'logs'}));
}
$c->stats->profile(end => "got logs");
# finished
$c->stash->{time_token} = time() - $start_time;
$c->stats->profile(end => "_create_report()");
return 1;
}
##########################################################
sub _initialassumedhoststate_to_state {
my $self = shift;
my $initialassumedhoststate = shift;
return 'unspecified' if $initialassumedhoststate == 0; # Unspecified
return 'current' if $initialassumedhoststate == -1; # Current State
return 'up' if $initialassumedhoststate == 3; # Host Up
return 'down' if $initialassumedhoststate == 4; # Host Down
return 'unreachable' if $initialassumedhoststate == 5; # Host Unreachable
}
##########################################################
sub _initialassumedservicestate_to_state {
my $self = shift;
my $initialassumedservicestate = shift;
return 'unspecified' if $initialassumedservicestate == 0; # Unspecified
return 'current' if $initialassumedservicestate == -1; # Current State
return 'ok' if $initialassumedservicestate == 6; # Service Ok
return 'warning' if $initialassumedservicestate == 8; # Service Warning
return 'unknown' if $initialassumedservicestate == 7; # Service Unknown
return 'critical' if $initialassumedservicestate == 9; # Service Critical
}
=head1 AUTHOR
Sven Nierlein, 2009, <nierlein@cpan.org>
Expand Down
2 changes: 1 addition & 1 deletion lib/Thruk/Utils.pm
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,7 @@ sub get_start_end_for_timeperiod {
$start = Mktime($year-1,1,1, 0,0,0);
$end = Mktime($year,1,1, 0,0,0);
}
elsif($timeperiod eq 'custom') {
elsif($timeperiod eq 'custom' or $timeperiod eq 'current') {
$start = $t1;
$end = $t2;
if(!defined $start) {
Expand Down
5 changes: 3 additions & 2 deletions templates/avail_report_form_box.tt
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
[% IF servicegroup %]<input type='hidden' name='servicegroup' value='[% servicegroup %]'>[% END %]
<input type='hidden' name='t1' value='[% start %]'>
<input type='hidden' name='t2' value='[% end %]'>
<input type='hidden' name='show_log_entries' value='[% show_log_entries %]'>
[% IF show_log_entries %]<input type='hidden' name='show_log_entries' value='[% show_log_entries %]'>[% END %]
[% IF full_log_entries %]<input type='hidden' name='full_log_entries' value='[% full_log_entries %]'>[% END %]
<input type='hidden' name='assumeinitialstates' value='[% assumeinitialstates %]'>
<input type='hidden' name='assumestateretention' value='[% assumestateretention %]'>
<input type='hidden' name='assumestatesduringnotrunning' value='[% assumestatesduringnotrunning %]'>
<input type='hidden' name='includesoftstates' value='[% includesoftstates %]'>
<input type='hidden' name='report_type' value='[% report_type %]'>
[% IF report_type %]<input type='hidden' name='report_type' value='[% report_type %]'>[% END %]
<table border="0" class='optBox'>
<tr>
[% UNLESS service %]
Expand Down
6 changes: 3 additions & 3 deletions templates/avail_report_hostgroup.tt
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@
<br>
<br>

[% FOR groupname IN groups.keys.sort %]
[% SET hostgroupname = groupname %]
[% SET hosts = g.hosts %]
[% FOR groupname IN groups.keys.sort %]
[% SET hostgroupname = groupname %]
[% SET hosts = groups.$groupname.hosts %]
[% PROCESS avail_report_hosts_part.tt %]
[% END %]

Expand Down
65 changes: 6 additions & 59 deletions templates/avail_report_log_entries.tt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
[% IF show_log_entries or full_log_entries %]
<div align="center" class='dataTitle'>[% IF service %]Service[% ELSE %]Host[% END %] Log Entries:</div><br>
<div align="center" class='infoMessage'>
<a href='avail.cgi?t1=[% start %]&amp;t2=[% end %][% IF host %]&amp;host=[% host %][% END %][% IF service %]&amp;service=[% service %][% END %]&amp;backtrack=[% backtrack %]&amp;assumestateretention=[% assumestateretention %]&amp;assumeinitialstates=[% assumeinitialstates %]&amp;assumestatesduringnotrunning=[% assumestatesduringnotrunning %]&amp;initialassumedhoststate=[% initialassumedhoststate %]&amp;initialassumedservicestate=[% initialassumedservicestate %]&amp;[% IF show_log_entries %]full_log_entries[% END %]&amp;[% IF full_log_entries %]show_log_entries[% END %]&amp;showscheduleddowntime=[% showscheduleddowntime %]'>[ View [% IF full_log_entries %]condensed[% ELSE %]full[% END %] log entries ]</a>
<a href='avail.cgi?t1=[% start %]&amp;t2=[% end %][% IF host %]&amp;host=[% host %][% END %][% IF service %]&amp;service=[% service %][% END %]&amp;backtrack=[% backtrack %]&amp;assumestateretention=[% assumestateretention %]&amp;assumeinitialstates=[% assumeinitialstates %]&amp;assumestatesduringnotrunning=[% assumestatesduringnotrunning %]&amp;initialassumedhoststate=[% initialassumedhoststate %]&amp;initialassumedservicestate=[% initialassumedservicestate %]&amp;[% IF full_log_entries %]show_log_entries[% ELSE %]full_log_entries[% END %]&amp;showscheduleddowntime=[% showscheduleddowntime %]'>[ View [% IF full_log_entries %]condensed[% ELSE %]full[% END %] log entries ]</a>
</div>
<div align="center">
<table border="1" cellspacing="0" cellpadding="3" class='logEntries'>
Expand All @@ -15,67 +15,14 @@
<th class='logEntries'>Event/State Information</th>
</tr>
[% FOR log IN logs %]


[%# filter some logs out #%]
[%# logentry outside our timedefinition #%]
[% IF log.time > end or log.time < start %][% NEXT %][% END %]

[% IF host and !service %]
[%# filter out anything besides the host on hostmode #%]
[% IF log.class != 2 and log.host_name != host %][% NEXT %][% END %]
[% IF log.class != 2 and log.service_description != '' %][% NEXT %][% END %]
[% END %]


[% SET class = 'logEntriesEven' IF loop.odd %]
[% SET class = 'logEntriesOdd' IF loop.even %]
[% SET message = log.type %]
[% SET plugin_output = log.plugin_output %]
[% IF log.type == 'CURRENT SERVICE STATE' or log.type == 'INITIAL SERVICE STATE' or log.type == 'SERVICE ALERT' %]
[% IF log.state == 0 %][% SET mclass = "OK" message = 'SERVICE OK' %]
[% ELSIF log.state == 1 %][% SET mclass = "WARNING" message = 'SERVICE WARNING' %]
[% ELSIF log.state == 2 %][% SET mclass = "UNKNOWN" message = 'SERVICE UNKNOWN' %]
[% ELSIF log.state == 3 %][% SET mclass = "CRITICAL" message = 'SERVICE CRITICAL' %]
[% END %]

[% IF log.options.search(';HARD;') %][% SET message = message _ ' (HARD)' %][% ELSE %][% SET message = message _ ' (SOFT)' %][% END %]
[% ELSIF log.type == 'CURRENT HOST STATE' or log.type == 'INITIAL HOST STATE' or log.type == 'HOST ALERT' %]
[% IF log.state == 0 %][% SET mclass = "UP" message = 'HOST UP' %]
[% ELSIF log.state == 1 %][% SET mclass = "DOWN" message = 'HOST DOWN' %]
[% ELSIF log.state == 2 %][% SET mclass = "UNREACHABLE" message = 'HOST UNREACHABLE' %]
[% END %]

[% IF log.options.search(';HARD;') %][% SET message = message _ ' (HARD)' %][% ELSE %][% SET message = message _ ' (SOFT)' %][% END %]
[% ELSIF log.type == 'HOST DOWNTIME ALERT' %]
[% IF log.options.search(';STARTED;') %][% SET mclass = "INDETERMINATE" message = 'HOST DOWNTIME START' plugin_output = 'Start of scheduled downtime' %][% END %]
[% IF log.options.search(';STOPPED;') %][% SET mclass = "INDETERMINATE" message = 'HOST DOWNTIME STOP' plugin_output = 'End of scheduled downtime' %][% END %]
[% ELSIF log.type == 'SERVICE DOWNTIME ALERT' %]
[% IF log.options.search(';STARTED;') %][% SET mclass = "INDETERMINATE" message = 'SERVICE DOWNTIME START' plugin_output = 'Start of scheduled downtime' %][% END %]
[% IF log.options.search(';STOPPED;') %][% SET mclass = "INDETERMINATE" message = 'SERVICE DOWNTIME STOP' plugin_output = 'End of scheduled downtime' %][% END %]
[% ELSIF log.type.search(' starting\.\.\.') or log.type.search(' restarting\.\.\.') %]
[% SET mclass = "INDETERMINATE" message = 'PROGRAM (RE)START' plugin_output = 'Program start' %]
[% ELSIF log.type.search(' shutting down\.\.\.') %]
[% SET mclass = "INDETERMINATE" message = 'PROGRAM END' plugin_output = 'Normal program termination' %]
[% ELSIF log.type.search('Bailing out') %]
[% SET mclass = "INDETERMINATE" message = 'PROGRAM END' plugin_output = 'Abnormal program termination' %]
[% ELSE %]
[%# SET message = 'SKIPPED!! ' _ message #%]
[%# SET plugin_output = 'SKIPPED!! ' _ plugin_output #%]
[% NEXT %]
[% END %]

[%# no process restarts in condensed mode #%]
[% IF show_log_entries and log.class == 2 %]
[% NEXT %]
[% END %]

<tr class='[% class %]'>
<td class='[% class %]'>[% date.format(log.time, '%Y-%m-%d %H:%M:%S') %]</td>
<td class='[% class %]'>2010-01-09 14:11:33</td>
<td class='[% class %]'>0d 14h 11m 33s</td>
<td class='logEntries[% mclass %]'>[% message %]</td>
<td class='[% class %]'>[% plugin_output %]</td>
<td class='[% class %]'>[% date.format(log.start, '%Y-%m-%d %H:%M:%S') %]</td>
<td class='[% class %]'>[% date.format(log.end, '%Y-%m-%d %H:%M:%S') %]</td>
<td class='[% class %]'>[% log.duration %]</td>
<td class='logEntries[% log.class %]'>[% log.type %]</td>
<td class='[% class %]'>[% log.plugin_output %]</td>
</tr>
[% END %]
</table>
Expand Down
2 changes: 1 addition & 1 deletion templates/avail_report_services_part.tt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
<th class='data'>Host</th>
[% END %]
<th class='data'>Service</th>
<th class='data'>% Time O </th>
<th class='data'>% Time Ok</th>
<th class='data'>% Time Warning</th>
<th class='data'>% Time Unknown</th>
<th class='data'>% Time Critical</th>
Expand Down
2 changes: 1 addition & 1 deletion templates/extinfo_type_1.tt
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@
[% ELSIF host.state == 2 %]
<div class='hostUNREACHABLE'>&nbsp;&nbsp;UNREACHABLE&nbsp;&nbsp;</div>
[% END %]
&nbsp;(for [% duration(0 - host.last_state_change) %])
&nbsp;(for [% duration(date.now - host.last_state_change) %])
[% IF host.acknowledged %]
&nbsp;(Has been acknowledged)
[% END %]
Expand Down
2 changes: 1 addition & 1 deletion templates/extinfo_type_2.tt
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
[% ELSIF service.state == 3 %]
<div class='serviceUNKNOWN'>&nbsp;&nbsp;UNKNOWN&nbsp;&nbsp;</div>
[% END %]
&nbsp;(for [% duration(0 - service.last_state_change) %])
&nbsp;(for [% duration(date.now - service.last_state_change) %])
[% IF service.acknowledged %]
&nbsp;(Has been acknowledged)
[% END %]
Expand Down
3 changes: 2 additions & 1 deletion templates/outages.tt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
[% USE date %]
[% PROCESS _header.tt %]

<table border="0" width="100%">
Expand Down Expand Up @@ -39,7 +40,7 @@
N/A
[% END %]
</td>
<td class='[% class %]'>[% duration(0 - host.last_state_change) %]</td>
<td class='[% class %]'>[% duration(date.now - host.last_state_change) %]</td>
<td class='[% class %]'>[% host.affected_hosts %]</td>
<td class='[% class %]'>[% host.affected_services %]</td>
<td class='[% class %]'>
Expand Down
Loading

0 comments on commit 20bbc2b

Please sign in to comment.