Dear list,
I've noticed that the "resource limit succeeded" reporting doesn't
contain any
information regarding the event that triggered the limit alert. This
makes it
difficult to figure out which limits succeeded when if there're
several alerts
at once, e.g. when monitoring both CPU usage and load, where when
one goes up
the other one tends to go up as well.
I'm including a patch against the current CVS version which adds
more detailed
reporting for the "resource limit succeeded" case below.
Lars
--------------------------------------------------------------------------------
Index: validate.c
===================================================================
RCS file: /sources/monit/monit/validate.c,v
retrieving revision 1.188
diff -u -r1.188 validate.c
--- validate.c 21 Jun 2008 15:39:27 -0000 1.188
+++ validate.c 5 Oct 2008 12:53:50 -0000
@@ -754,7 +754,6 @@
*/
static void check_process_resources(Service_T s, Resource_T r) {
- int okay= TRUE;
char report[STRLEN]={0};
ASSERT(s && r);
@@ -769,9 +768,9 @@
"cpu usage of %.1f%% matches resource limit [cpu usage%s%.1f%
%]",
s->inf->cpu_percent/10.0, operatorshortnames[r->operator],
r->limit/10.0);
- okay= FALSE;
} else {
- DEBUG("'%s' cpu usage check succeeded [current cpu usage=%.1f%
%]\n",
+ snprintf(report, STRLEN,
+ "'%s' cpu usage check succeeded [current cpu usage=%.1f%%]
\n",
s->name, s->inf->cpu_percent/10.0);
}
break;
@@ -784,9 +783,9 @@
"total cpu usage of %.1f%% matches resource limit [cpu usage
%s%.1f%%]",
s->inf->total_cpu_percent/10.0, operatorshortnames[r-
>operator],
r->limit/10.0);
- okay= FALSE;
} else {
- DEBUG("'%s' total cpu usage check succeeded [current cpu
usage=%.1f%%]\n",
+ snprintf(report, STRLEN,
+ "'%s' total cpu usage check succeeded [current cpu usage=%.
1f%%]\n",
s->name, s->inf->total_cpu_percent/10.0);
}
break;
@@ -797,9 +796,9 @@
"cpu user usage of %.1f%% matches resource limit [cpu user
usage%s%.1f%%]",
systeminfo.total_cpu_user_percent/10.0, operatorshortnames[r-
>operator],
r->limit/10.0);
- okay= FALSE;
} else {
- DEBUG("'%s' cpu user usage check succeeded [current cpu user
usage=%.1f%%]\n",
+ snprintf(report, STRLEN,
+ "'%s' cpu user usage check succeeded [current cpu user
usage=%.1f%%]\n",
s->name, systeminfo.total_cpu_user_percent/10.0);
}
break;
@@ -810,9 +809,9 @@
"cpu system usage of %.1f%% matches resource limit [cpu
system usage%s%.1f%%]",
systeminfo.total_cpu_syst_percent/10.0, operatorshortnames[r-
>operator],
r->limit/10.0);
- okay= FALSE;
} else {
- DEBUG("'%s' cpu system usage check succeeded [current cpu
system usage=%.1f%%]\n",
+ snprintf(report, STRLEN,
+ "'%s' cpu system usage check succeeded [current cpu system
usage=%.1f%%]\n",
s->name, systeminfo.total_cpu_syst_percent/10.0);
}
break;
@@ -823,9 +822,9 @@
"cpu wait usage of %.1f%% matches resource limit [cpu wait
usage%s%.1f%%]",
systeminfo.total_cpu_wait_percent/10.0, operatorshortnames[r-
>operator],
r->limit/10.0);
- okay= FALSE;
} else {
- DEBUG("'%s' cpu wait usage check succeeded [current cpu wait
usage=%.1f%%]\n",
+ snprintf(report, STRLEN,
+ "'%s' cpu wait usage check succeeded [current cpu wait
usage=%.1f%%]\n",
s->name, systeminfo.total_cpu_wait_percent/10.0);
}
break;
@@ -837,9 +836,9 @@
"mem usage of %.1f%% matches resource limit [mem usage%s%.
1f%%]",
systeminfo.total_mem_percent/10.0, operatorshortnames[r-
>operator],
r->limit/10.0);
- okay= FALSE;
} else {
- DEBUG("'%s' mem usage check succeeded [current mem usage=%.
1f%%]\n",
+ snprintf(report, STRLEN,
+ "'%s' mem usage check succeeded [current mem usage=%.1f%%]
\n",
s->name, systeminfo.total_mem_percent/10.0);
}
} else {
@@ -848,9 +847,9 @@
"mem usage of %.1f%% matches resource limit [mem usage%s%.
1f%%]",
s->inf->mem_percent/10.0, operatorshortnames[r->operator],
r->limit/10.0);
- okay= FALSE;
} else {
- DEBUG("'%s' mem usage check succeeded [current mem usage=%.
1f%%]\n",
+ snprintf(report, STRLEN,
+ "'%s' mem usage check succeeded [current mem usage=%.1f%%]
\n",
s->name, s->inf->mem_percent/10.0);
}
}
@@ -862,9 +861,9 @@
snprintf(report, STRLEN,
"mem amount of %ldkB matches resource limit [mem amount%s
%ldkB]",
systeminfo.total_mem_kbyte, operatorshortnames[r-
>operator], r->limit);
- okay= FALSE;
} else {
- DEBUG("'%s' mem amount check succeeded [current mem amount=
%ldkB]\n",
+ snprintf(report, STRLEN,
+ "'%s' mem amount check succeeded [current mem amount=
%ldkB]\n",
s->name, systeminfo.total_mem_kbyte);
}
} else {
@@ -872,9 +871,9 @@
snprintf(report, STRLEN,
"mem amount of %ldkB matches resource limit [mem amount%s
%ldkB]",
s->inf->mem_kbyte, operatorshortnames[r->operator], r-
>limit);
- okay= FALSE;
} else {
- DEBUG("'%s' mem amount check succeeded [current mem amount=
%ldkB]\n",
+ snprintf(report, STRLEN,
+ "'%s' mem amount check succeeded [current mem amount=
%ldkB]\n",
s->name, s->inf->mem_kbyte);
}
}
@@ -888,9 +887,9 @@
"[loadavg(1min)%s%.1f]",
systeminfo.loadavg[0], operatorshortnames[r->operator],
r->limit/10.0);
- okay= FALSE;
} else {
- DEBUG("'%s' loadavg(1min) check succeeded [current
loadavg(1min)=%.1f]\n",
+ snprintf(report, STRLEN,
+ "'%s' loadavg(1min) check succeeded [current loadavg(1min)=
%.1f]\n",
s->name, systeminfo.loadavg[0]);
}
break;
@@ -903,9 +902,9 @@
"[loadavg(5min)%s%.1f]",
systeminfo.loadavg[1], operatorshortnames[r->operator],
r->limit/10.0);
- okay= FALSE;
} else {
- DEBUG("'%s' loadavg(5min) check succeeded [current
loadavg(5min)=%.1f]\n",
+ snprintf(report, STRLEN,
+ "'%s' loadavg(5min) check succeeded [current loadavg(5min)=
%.1f]\n",
s->name, systeminfo.loadavg[1]);
}
break;
@@ -918,9 +917,9 @@
"[loadavg(15min)%s%.1f]",
systeminfo.loadavg[2], operatorshortnames[r->operator],
r->limit/10.0);
- okay= FALSE;
} else {
- DEBUG("'%s' loadavg(15min) check succeeded [current
loadavg(15min)=%.1f]\n",
+ snprintf(report, STRLEN,
+ "'%s' loadavg(15min) check succeeded [current
loadavg(15min)=%.1f]\n",
s->name, systeminfo.loadavg[2]);
}
break;
@@ -930,9 +929,9 @@
snprintf(report, STRLEN,
"children of %i matches resource limit [children%s%ld]",
s->inf->children, operatorshortnames[r->operator], r->limit);
- okay= FALSE;
} else {
- DEBUG("'%s' children check succeeded [current children=%i]\n",
+ snprintf(report, STRLEN,
+ "'%s' children check succeeded [current children=%i]\n",
s->name, s->inf->children);
}
break;
@@ -944,9 +943,9 @@
" [total mem amount%s%ldkB]",
s->inf->total_mem_kbyte, operatorshortnames[r->operator],
r->limit);
- okay= FALSE;
} else {
- DEBUG("'%s' total mem amount check succeeded "
+ snprintf(report, STRLEN,
+ "'%s' total mem amount check succeeded "
"[current total mem amount=%ldkB]\n", s->name, s->inf-
>total_mem_kbyte);
}
break;
@@ -958,9 +957,9 @@
" [total mem amount%s%.1f%%]",
(float)s->inf->total_mem_percent/10.0,
operatorshortnames[r->operator], (float)r->limit/10.0);
- okay= FALSE;
} else {
- DEBUG("'%s' total mem amount check succeeded "
+ snprintf(report, STRLEN,
+ "'%s' total mem amount check succeeded "
"[current total mem amount=%.1f%%]\n", s->name,
s->inf->total_mem_percent/10.0);
}
@@ -972,12 +971,7 @@
return;
}
- if(! okay) {
- Event_post(s, EVENT_RESOURCE, STATE_FAILED, r->action, "%s",
report);
- } else {
- Event_post(s, EVENT_RESOURCE, STATE_SUCCEEDED, r->action,
- "resource succeeded");
- }
+ Event_post(s, EVENT_RESOURCE, STATE_FAILED, r->action, "%s",
report);
}
_______________________________________________
monit-dev mailing list
address@hidden
http://lists.nongnu.org/mailman/listinfo/monit-dev