EVOLUTION-MANAGER

Edit File: check_cpu

#!/usr/bin/env python
##############################################################################
# Copyright (c) Members of the EGEE Collaboration. 2011.
# See http://www.eu-egee.org/partners/ for details on the copyright
# holders.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS
# OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# NAME :        check_cpu
#
# DESCRIPTION : Checks the CPU activity
#
# AUTHORS :     Alejandro.Alvarez.Ayllon@cern.ch
#
##############################################################################

import commands
import os
import time
from lcgdmcommon import *

class cpu:
  """
  Stores some information for the CPU
  """

# Fields names
  FIELDS  = ["user", "nice", "system", "idle", "iowait", "irq", "softirq"]
  NFIELDS = len(FIELDS)

def __setattr__(self, key, value):
    """
    All attributes are integers
    """
    self.__dict__[key] = int(value)
    return value

def __sub__(self, other):
    """
    Substracts every field, and returns an array with the result
    """
    r = []
    for f in self.FIELDS:
      r.append(self.__dict__[f] - other.__dict__[f])
    return r

class check_cpu:
  "Checks the CPU activity"
  __version__     = "0.0.1"
  __nagios_id__   = "DM-CPU"

# Constants
  PROC_STAT = "/proc/stat"

# Defaults
  DEFAULT_WARNING     = "60,60,60,100,80,60,60"
  DEFAULT_CRITICAL    = "70,70,70,100,90,70,70"
  DEFAULT_INTERVAL    = 5

# Specific parameters, where key = short, value = long (i.e. {"h":"help", "C:":"command="})
  # getopt format. The long version will be the one passed even when the short is specified
  __additional_opts__ = {"w:": "warning=",
                         "c:": "critical=",
                         "i:": "interval="}

# Specific usage information
  __usage__ = """
\t-w, --warning\tSets the warning values. Default: %s
\t-c, --critical\tSets the critical values. Default: %s
\t-i, --interval\tMeasure interval. Default: %d

The warning and critical value order is %s. The value will be considered percentage over the total. There is no need to use the %% symbol. The result identify the percentage of time the CPU has spent performing the following operation:

\tuser:\t\tnormal processes executing in user mode
\tnice:\t\tniced processes executing in user mode
\tsystem:\t\tprocesses executing in kernel mode
\tidle:\t\ttwiddling thumbs
\tiowait:\t\twaiting for I/O to complete
\tirq:\t\tservicing interrupts
\tsoftirq:\tservicing softirqs

Description of work executed by the probe:

\t1. Get informations about cpu activity using command /proc/stat
\t2. wait 5 second
\t3. Get new informations about cpu activity
\t4. Considering these two values, compute the percentage the cpu spent in each mode
\t5. Return values to nagios
\t\t Warning alert is triggered if a cpu state reach is corresponding threshold (in term of percentage of total usage)
\t\t Critical alert is triggered if a cpu state reach is corresponding threshold (in term of percentage of total usage) 
""" % (DEFAULT_WARNING, DEFAULT_CRITICAL, DEFAULT_INTERVAL, ",".join(cpu.FIELDS))

# Methods
  def __init__(self, opt = {}, args = []):
    """
    Constructor

@param opt  Contains a dictionary with the long option name as the key, and the argument as value
    @param args Contains the arguments not associated with any option
    """
    # Parameters
    opt_warning   = self.DEFAULT_WARNING
    opt_critical  = self.DEFAULT_CRITICAL
    if "warning" in opt:
      opt_warning = opt["warning"]
    if "critical" in opt:
      opt_critical = opt["critical"]
    
    opt_warning  = get_tuple(opt_warning,  cpu.NFIELDS)
    opt_critical = get_tuple(opt_critical, cpu.NFIELDS)

self.warning  = []
    self.critical = []
    for i in range(0, cpu.NFIELDS):
      self.warning.append(real_value(opt_warning[i],  100))
      self.critical.append(real_value(opt_critical[i], 100))

if "interval" in opt:
      self.interval = int(opt["interval"])
    else:
      self.interval = self.DEFAULT_INTERVAL

def get_status(self):
    """
    Returns a dictionary with the CPUs and their information
    """
    stat_line = cat(self.PROC_STAT)[0]
    c = cpu()
    (c.user, c.nice, c.system, c.idle, c.iowait, c.irq, c.softirq) = stat_line.split()[1:8]
    return c

def main(self):
    """
    Test code itself. May raise exceptions.

@return A tuple (exit code, message, performance)
    """
    debug("Using %s as source" % self.PROC_STAT)

initial = self.get_status()
    time.sleep(self.interval)
    final   = self.get_status()

diff = final - initial
    total = sum(diff)

exit = EX_OK
    msg = None
    for i in range(0, cpu.NFIELDS):
      diff[i] = (diff[i] * 100.0) / total

if diff[i] > self.warning[i] and exit == EX_OK:
        exit   = EX_WARNING
        prefix = "%s is using %.2f%% of the CPU time " % (cpu.FIELDS[i], diff[i])
      elif diff[i] > self.critical[i]:
        exit = EX_CRITICAL
        prefix = "%s is using %.2f%% of the CPU time " % (cpu.FIELDS[i], diff[i])

if not msg:
      if verbosity(V_EXTENDED):
        indexes = range(0, cpu.NFIELDS)
      else:
        indexes = [3,]
      msg = " ".join([ "%s=%.2f%%" % (cpu.FIELDS[i].capitalize(), diff[i]) for i in indexes ])

# Message string
    performance = " ".join([ "%s=%.2f%%;%d;%d" % (cpu.FIELDS[i], diff[i], self.warning[i], self.critical[i]) for i in range(0, cpu.NFIELDS) ])

return (exit, msg, performance)

# When called directly
if __name__ == "__main__":
  run(check_cpu)