quick cleanup of perflogserver.py

This commit is contained in:
Blake J. Harnden 2018-10-12 09:38:14 -07:00
parent b37eb2fe74
commit 5c7f9b7dc4

View file

@ -2,17 +2,27 @@
# #
# (c)2011-2012 the Boeing Company # (c)2011-2012 the Boeing Company
# #
# perfmon.py - CORE server and node performace metrics logger and alarmer
# server metrics: loadave1, 5, 15, mem, used cpu% of total, cpu1, cpu2, ..., cpun """
# node metrics: throughput, mem, cpu total, usr, sys, wait perflogserver.py - CORE server and node performace metrics logger and alarmer
# server metrics: loadave1, 5, 15, mem, used cpu% of total, cpu1, cpu2, ..., cpun
import os, sys, time, re, optparse, signal, commands, pdb node metrics: throughput, mem, cpu total, usr, sys, wait
"""
import commands
import optparse
import os
import pdb
import signal
import sys
import time
def readfile(fname): def readfile(fname):
lines = [] lines = []
try: try:
f = open(fname, "r") f = open(fname, "r")
except: except IOError:
if options.timestamp == True: if options.timestamp == True:
print str(time.time()), print str(time.time()),
print "ERROR: failed to open file %s\n" % fname print "ERROR: failed to open file %s\n" % fname
@ -21,6 +31,7 @@ def readfile(fname):
f.close() f.close()
return lines return lines
def numcpus(): def numcpus():
lines = readfile("/proc/stat") lines = readfile("/proc/stat")
n = 0 n = 0
@ -30,10 +41,13 @@ def numcpus():
n += 1 n += 1
return n return n
def handler(signum, frame): def handler(signum, frame):
print "stop timestamp:", str(time.time()) + ", cyclecount=", cyclecount, ", caught signal", signum print "stop timestamp:", str(
time.time()) + ", cyclecount=", cyclecount, ", caught signal", signum
sys.exit(0) sys.exit(0)
class ServerMetrics(object): class ServerMetrics(object):
def __init__(self): def __init__(self):
self.smetrics = {"serverloadavg1": 0.0, self.smetrics = {"serverloadavg1": 0.0,
@ -43,22 +57,20 @@ class ServerMetrics(object):
"serverusedcputime": 0.0, "serverusedcputime": 0.0,
"processorusedcputime": []} "processorusedcputime": []}
# set values from val = (nump, ldavg1, ldavg5, adavg15, mem, cpu, p1cpu, p2cpu...)
def setvalues(self, val): def setvalues(self, val):
"""
Set values from val = (nump, ldavg1, ldavg5, adavg15, mem, cpu, p1cpu, p2cpu...).
"""
self.smetrics["serverloadavg1"] = val[0] self.smetrics["serverloadavg1"] = val[0]
self.smetrics["serverloadavg5"] = val[1] self.smetrics["serverloadavg5"] = val[1]
self.smetrics["serverloadavg15"] = val[2] self.smetrics["serverloadavg15"] = val[2]
self.smetrics["serverusedmemory"] = val[4] self.smetrics["serverusedmemory"] = val[4]
self.smetrics["serverusedcputime"] = val[5] self.smetrics["serverusedcputime"] = val[5]
#print self.smetrics.keys(), self.smetrics.values()
pcpu = [] pcpu = []
for ind in range(5, len(val)): for ind in range(5, len(val)):
pcpu.append(val[ind]) pcpu.append(val[ind])
# print "[" + ",".join(map(lambda(x):str(round(x, 2)), pcpu)) +"]"
self.smetrics["processorusedcputime"] = pcpu self.smetrics["processorusedcputime"] = pcpu
#print self.smetrics.keys(), self.smetrics.values()
def setvalue(self, key, val): def setvalue(self, key, val):
self.smetrics[key] = val self.smetrics[key] = val
@ -72,18 +84,17 @@ class ServerMetrics(object):
def tocsv(self): def tocsv(self):
rv = "Server" rv = "Server"
for k in self.smetrics: for k in self.smetrics:
# print k, self.smetrics[k]
if isinstance(self.smetrics[k], float): if isinstance(self.smetrics[k], float):
rv += ", %.2f" % self.smetrics[k] rv += ", %.2f" % self.smetrics[k]
else: else:
if isinstance(self.smetrics[k], list): if isinstance(self.smetrics[k], list):
rv += ", [" + \ values = ", ".join(str(round(x, 2)) for x in self.smetrics[k])
", ".join(map(lambda(x):str(round(x, 2)), self.smetrics[k])) \ rv += ", [%s]" % values
+ "]"
else: else:
rv += ", " + str(self.smetrics[k]) rv += ", " + str(self.smetrics[k])
return rv return rv
def readserverthresholds(filename): def readserverthresholds(filename):
if filename is None: if filename is None:
return return
@ -91,91 +102,108 @@ def readserverthresholds(filename):
lines = readfile(filename) lines = readfile(filename)
for l in lines: for l in lines:
mval = l.strip().split('=') mval = l.strip().split('=')
#print "read line %s" % mval
if len(mval) > 1: if len(mval) > 1:
thekey = mval[0].strip() thekey = mval[0].strip()
theval = mval[1].strip() theval = mval[1].strip()
if thekey in serverthresholds.getkeys(): if thekey in serverthresholds.getkeys():
serverthresholds.setvalue(thekey, float(theval)) serverthresholds.setvalue(thekey, float(theval))
# print thekey," = %.2f" % float(theval)
def checkserverthreshold(metricval): def checkserverthreshold(metricval):
# print out an alarm if a ServerMetrics value crosses threshold """
Print out an alarm if a ServerMetrics value crosses threshold.
"""
for key in serverthresholds.getkeys(): for key in serverthresholds.getkeys():
# print "checking threshold of key = ", key
if key == "processorusedcputime": if key == "processorusedcputime":
pcpus = metricval.getvalue(key) pcpus = metricval.getvalue(key)
# print key, pcpus, serverthresholds[key]
for ind in range(0, len(pcpus)): for ind in range(0, len(pcpus)):
# print ind, pcpus[ind]
if pcpus[ind] > serverthresholds.getvalue(key): if pcpus[ind] > serverthresholds.getvalue(key):
alarm = ["server", os.uname()[1], str(ind) + key, alarm = ["server", os.uname()[1], str(ind) + key,
"%.2f" % pcpus[ind], ">", serverthresholds.getvalue(key)] "%.2f" % pcpus[ind], ">", serverthresholds.getvalue(key)]
if options.timestamp: if options.timestamp:
print str(time.time()) + ",", print str(time.time()) + ",",
print ", ".join(map(lambda(x):str(x), alarm)) print ", ".join(str(x) for x in alarm)
else: else:
if metricval.getvalue(key) > serverthresholds.getvalue(key): if metricval.getvalue(key) > serverthresholds.getvalue(key):
alarm = ["server", os.uname()[1], key, alarm = ["server", os.uname()[1], key,
"%.2f" % metricval.getvalue(key), ">", serverthresholds.getvalue(key)] "%.2f" % metricval.getvalue(key), ">", serverthresholds.getvalue(key)]
if options.timestamp: if options.timestamp:
print str(time.time()) + ",", print str(time.time()) + ",",
print ", ".join(map(lambda(x):str(x), alarm)) print ", ".join(str(x) for x in alarm)
def collectservercputimes(): def collectservercputimes():
# return cpu times in ticks of this server total and each processor 3*(1+#cpu) columns """
# (user+nice, sys, idle) from each /proc/stat cpu lines assume columns are: Return cpu times in ticks of this server total and each processor 3*(1+#cpu) columns
# cpu# user nice sys idle iowait irq softirq steal guest (man 5 proc) (user+nice, sys, idle) from each /proc/stat cpu lines assume columns are:
cpu# user nice sys idle iowait irq softirq steal guest (man 5 proc)
"""
rval = {} rval = {}
lines = readfile("/proc/stat") lines = readfile("/proc/stat")
for i in range(ncpus + 1): for i in range(ncpus + 1):
items = lines[i].split() items = lines[i].split()
(user, nice, sys, idle) = map(lambda(x): int(x), items[1:5]) user, nice, sys, idle = [int(x) for x in items[1:5]]
rval[i] = [user+nice, sys, idle] rval[i] = [user+nice, sys, idle]
return rval return rval
def csvservercputimes(cputimes): def csvservercputimes(cputimes):
# return a csv string of this server total and each processor's cpu times """
# (usr, sys, idle) in ticks Return a csv string of this server total and each processor's cpu times
(usr, sys, idle) in ticks.
"""
rval = '' rval = ''
for i in range(len(cputimes)): for i in range(len(cputimes)):
rval += ', '.join(map(lambda(x):str(x), cputimes[i])) rval += ", ".join(str(x) for x in cputimes[i])
return rval return rval
def calcservercputimes(cputimea, cputimeb): def calcservercputimes(cputimea, cputimeb):
# return cpu used/total % of this server total and each processor (1+#cpu columns) """
Return cpu used/total % of this server total and each processor (1+#cpu columns).
"""
p = {} p = {}
for n in range(ncpus + 1): for n in range(ncpus + 1):
# print cputimeb[n]
p[n] = [] p[n] = []
for i in range(len(cputimea[n])): for i in range(len(cputimea[n])):
p[n].append(cputimeb[n][i] - cputimea[n][i]) p[n].append(cputimeb[n][i] - cputimea[n][i])
# print p[n] # cpu times total delta
total = sum(p[n]) # cpu times total delta total = sum(p[n])
# print total
if total == 0: if total == 0:
p[n] = 0.0 p[n] = 0.0
else: else:
p[n] = 100 - ((100.0 * p[n][-1]) / total) p[n] = 100 - ((100.0 * p[n][-1]) / total)
return p return p
def collectservermems(): def collectservermems():
# return memory (total, free) in KB from proc/meminfo """
Return memory (total, free) in KB from proc/meminfo.
"""
lines = readfile("/proc/meminfo") lines = readfile("/proc/meminfo")
mem = map(lambda(x):x.split(), lines[0:2]) mem = [x.plit() for x in lines[0:2]]
return map(lambda(x):int(x), zip(*mem)[1]) return [int(x) for x in zip(*mem)[1]]
def csvservermems(mems): def csvservermems(mems):
# return a csv string of this server memory (total, free) """
return ", ".join(map(lambda x: str(x), mems)) Return a csv string of this server memory (total, free).
"""
return ", ".join(str(x) for x in mems)
def calcserverusedmem(mems): def calcserverusedmem(mems):
# return int(100*(MemTotal-MemFree)/MemTotal) from /proc/meminfo """
Return int(100*(MemTotal-MemFree)/MemTotal) from /proc/meminfo.
"""
return 100 * (mems[0] - mems[1]) / mems[0] return 100 * (mems[0] - mems[1]) / mems[0]
def collectservermetrics(cputimes, mems, thresholdcheck): def collectservermetrics(cputimes, mems, thresholdcheck):
# return ServerMetrics object with a dictionary of """
# loadavg1,loadavg5,loadavg15, usedmem%, usedcpu% for total, cpu1, cpu2, ... Return ServerMetrics object with a dictionary of
loadavg1,loadavg5,loadavg15, usedmem%, usedcpu% for total, cpu1, cpu2, ...
"""
metricval = [] metricval = []
ldavgs = os.getloadavg() ldavgs = os.getloadavg()
for v in ldavgs: for v in ldavgs:
@ -184,30 +212,35 @@ def collectservermetrics(cputimes, mems, thresholdcheck):
for i in range(ncpus + 1): for i in range(ncpus + 1):
metricval.append(cputimes[i]) metricval.append(cputimes[i])
# print cputimes[i]
srvmetrics = ServerMetrics() srvmetrics = ServerMetrics()
srvmetrics.setvalues(metricval) srvmetrics.setvalues(metricval)
# print srvmetrics.tocsv()
if thresholdcheck: if thresholdcheck:
checkserverthreshold(srvmetrics) checkserverthreshold(srvmetrics)
return srvmetrics return srvmetrics
def csvservermetrics(srvmetrics): def csvservermetrics(srvmetrics):
# return a csv string of ServerMetrics.tocsv() """
# loadavg1,loadavg5,loadavg15, usedmem%, usedcpu% for total, cpu1, cpu2, ... Return a csv string of ServerMetrics.tocsv()
loadavg1,loadavg5,loadavg15, usedmem%, usedcpu% for total, cpu1, cpu2, ...
"""
rv = "" rv = ""
if options.timestamp: if options.timestamp:
rv = str(time.time()) + ", " rv = str(time.time()) + ", "
rv += srvmetrics.tocsv() rv += srvmetrics.tocsv()
return rv return rv
def csvserverbaseline(): def csvserverbaseline():
# return a csv string of raw server metrics data: memfree, memtotal, cpuused, cpusystem, cpuidle """
Return a csv string of raw server metrics data: memfree, memtotal, cpuused, cpusystem, cpuidle.
"""
return "memory (total, free) = " + csvservermems(collectservermems()) + "\ncputime (used, sys, idl) = " + csvservercputimes(collectservercputimes()) return "memory (total, free) = " + csvservermems(collectservermems()) + "\ncputime (used, sys, idl) = " + csvservercputimes(collectservercputimes())
class NodeMetrics(object): class NodeMetrics(object):
def __init__(self): def __init__(self):
self.nmetrics = {"nodethroughput": 0.0, self.nmetrics = {"nodethroughput": 0.0,
@ -217,8 +250,10 @@ class NodeMetrics(object):
"nodesystemcpu": 0.0, "nodesystemcpu": 0.0,
"nodewaitcpu": 0.0} "nodewaitcpu": 0.0}
# set values from val = (throughput, mem, tcpu, ucpu, scpu, wcpu):
def setvalues(self, val): def setvalues(self, val):
"""
Set values from val = (throughput, mem, tcpu, ucpu, scpu, wcpu).
"""
self.nmetrics["nodethroughput"] = val[0] self.nmetrics["nodethroughput"] = val[0]
self.nmetrics["nodeusedmemory"] = val[1] self.nmetrics["nodeusedmemory"] = val[1]
self.nmetrics["nodetotalcpu"] = val[2] self.nmetrics["nodetotalcpu"] = val[2]
@ -236,7 +271,7 @@ class NodeMetrics(object):
return self.nmetrics.keys() return self.nmetrics.keys()
def tocsv(self): def tocsv(self):
return ", ".join(map(lambda(x):str(x), self.nmetrics.values())) return ", ".join(str(x) for x in self.nmetrics.values())
class LogSession(object): class LogSession(object):
@ -254,14 +289,14 @@ class LogSession(object):
self.nodemetricsC = {} self.nodemetricsC = {}
def getpids(self): def getpids(self):
# return dict of all CORE session pids in a dict using node name as the keys """
# parent pid (vnoded) is the first value Return dict of all CORE session pids in a dict using node name as the keys
parent pid (vnoded) is the first value.
"""
self.pids = {} self.pids = {}
nodes = commands.getstatusoutput("ls /tmp/pycore.%s/*pid" % options.session) nodes = commands.getstatusoutput(
"ls /tmp/pycore.%s/*pid" % options.session)
if nodes[0] != 0: if nodes[0] != 0:
# if options.timestamp == True:
# print str(time.time()),
# print "CORE session %s has not created nodes" % options.session
return return
nodes = nodes[1].split('\n') nodes = nodes[1].split('\n')
@ -274,7 +309,6 @@ class LogSession(object):
# build self.pids dict with key=nodename and val="ppid,pid,cmd" # build self.pids dict with key=nodename and val="ppid,pid,cmd"
for nname in self.pids: for nname in self.pids:
# print nname, self.pids[nname]
if self.pids[nname] == "": if self.pids[nname] == "":
if options.timestamp == True: if options.timestamp == True:
print str(time.time()), print str(time.time()),
@ -289,7 +323,6 @@ class LogSession(object):
if ppid == val[0]: if ppid == val[0]:
childprocs.append([val[1], val[2]]) childprocs.append([val[1], val[2]])
self.pids[nname] = childprocs self.pids[nname] = childprocs
# print nname, self.pids[nname]
return self.pids return self.pids
def printsesspids(self): def printsesspids(self):
@ -298,68 +331,69 @@ class LogSession(object):
for pp in self.pids: for pp in self.pids:
if self.pids[pp] != []: if self.pids[pp] != []:
for ap in range(len(self.pids[pp]) - 1): for ap in range(len(self.pids[pp]) - 1):
print ", " + self.pids[pp][ap][0], # ap pid # ap pid
print ", " + self.pids[pp][ap][1], # ap cmd print ", " + self.pids[pp][ap][0],
procmetrics = map(lambda(x):str(x),self.pids[pp][ap][-1]) # ap cmd
print ", " + self.pids[pp][ap][1],
procmetrics = [str(x) for x in self.pids[pp][ap][-1]]
print ", " + ", ".join(procmetrics), print ", " + ", ".join(procmetrics),
nodemetrics = map(lambda(x):str(x), self.pids[pp][-1]) nodemetrics = [str(x) for x in self.pids[pp][-1]]
print ", " + ", ".join(nodemetrics) print ", " + ", ".join(nodemetrics)
def getprocessmetrics(self, pid): def getprocessmetrics(self, pid):
# return [cpu#, vsize(kb), ttime, utime, stime, wtime] """
# from a /proc/pid/stat (a single line file) assume columns are: Return [cpu#, vsize(kb), ttime, utime, stime, wtime]
# pid(0) comm(1) state ppid pgrp sess tty_nr tpgid flags from a /proc/pid/stat (a single line file) assume columns are:
# minflt cmiflt majflt cmajflt # utime(12) stime cutime cstime pid(0) comm(1) state ppid pgrp sess tty_nr tpgid flags
# priority nice num_threads itrealvalue starttime vsize(22) rss rsslim minflt cmiflt majflt cmajflt # utime(12) stime cutime cstime
# startcode endcode startstack kstkesp signal blocked sigignore sigcatch priority nice num_threads itrealvalue starttime vsize(22) rss rsslim
# wchan nswap cnswap exit_signal processor(38) rt_priority startcode endcode startstack kstkesp signal blocked sigignore sigcatch
# policy ioblock guest_time cguest_time (man 5 proc) wchan nswap cnswap exit_signal processor(38) rt_priority
#rval = ProcessMetrics() policy ioblock guest_time cguest_time (man 5 proc)
#rval.__init__() """
rval = {} rval = {}
lines = readfile("/proc/" + pid + "/stat") lines = readfile("/proc/" + pid + "/stat")
if lines == []: if lines == []:
return rval return rval
items = lines[0].split() items = lines[0].split()
(utime, stime, cutime, cstime) = map(lambda(x):int(x), items[13:17]) utime, stime, cutime, cstime = [int(x) for x in items[13:17]]
# print ">???", utime, stime, cutime, cstime
rval = (items[38], # last run processor rval = (items[38], # last run processor
int(items[22])/1000, # process virtual mem in kb int(items[22])/1000, # process virtual mem in kb
utime + stime + cutime + cstime, # totoal time utime + stime + cutime + cstime, # totoal time
utime, # user time utime, # user time
stime, # system time stime, # system time
cutime + cstime) # wait time cutime + cstime) # wait time
# print "pid --- processmetrics", rval
return rval return rval
def getnodethroughput(self, pid): def getnodethroughput(self, pid):
# return node throughput of total receive and transmit packets in kb """
Return node throughput of total receive and transmit packets in kb.
"""
lines = readfile("/proc/" + pid + "/net/dev") lines = readfile("/proc/" + pid + "/net/dev")
if lines == []: if lines == []:
return -0.00 return -0.00
ifs = map(lambda(x): x.split(), lines[2:]) ifs = [x.split() for x in lines[2:]]
ifm = zip(*ifs) ifm = zip(*ifs)
rv = sum(map(lambda(x):int(x), ifm[1])) # received bytes rv = sum(int(x) for x in ifm[1]) # received bytes
tr = sum(map(lambda(x):int(x), ifm[9])) # transmited bytes tr = sum(int(x) for x in ifm[9]) # transmited bytes
#print 'node thruput :', rv, tr, (rv + tr)/1000
return (rv + tr)/1000 return (rv + tr)/1000
def getnodemetrics(self, mindex): def getnodemetrics(self, mindex):
# return NodeMetrics with indexed by nodename, values are rows of """
# [ [ppid, vnoded, [cpu#, vmem(kb), ttime, utime, stime, wtime]], Return NodeMetrics with indexed by nodename, values are rows of
# [cpid, cmd, [cpu#, vmem(kb), ttime, utime, stime, wtime]], ... , [ [ppid, vnoded, [cpu#, vmem(kb), ttime, utime, stime, wtime]],
# [thrput, vmem(kb), ttime, utime, stime, wtime]] [cpid, cmd, [cpu#, vmem(kb), ttime, utime, stime, wtime]], ... ,
[thrput, vmem(kb), ttime, utime, stime, wtime]]
"""
if mindex == 'a': if mindex == 'a':
metricref = self.nodemetricsA metricref = self.nodemetricsA
else: else:
metricref = self.nodemetricsB metricref = self.nodemetricsB
self.getpids() self.getpids()
# print " inside getnodemetrics()", self.pids
if self.pids == {}: if self.pids == {}:
return {} return {}
for nod in self.pids: for nod in self.pids:
nmetric = NodeMetrics() nmetric = NodeMetrics()
nmetric.__init__() nmetric.__init__()
@ -375,18 +409,18 @@ class LogSession(object):
nodeapps[ap] = procm nodeapps[ap] = procm
self.pids[nod][ap].append(nodeapps[ap]) self.pids[nod][ap].append(nodeapps[ap])
processm = zip(*nodeapps.values()) # get overall node metrics processm = zip(*nodeapps.values()) # get overall node metrics
# print processm
if len(processm) > 0: if len(processm) > 0:
# if nod == 'n6':
# print nod, self.getnodethroughput(self.pids[nod][0][0])
nmetric.setvalues((self.getnodethroughput(self.pids[nod][0][0]), nmetric.setvalues((self.getnodethroughput(self.pids[nod][0][0]),
sum(map(lambda(x):int(x), processm[1])), # vsize(kb) # vsize(kb)
sum(map(lambda(x):int(x), processm[2])), # ttime sum(int(x) for x in processm[1]),
sum(map(lambda(x):int(x), processm[3])), # utime # ttime
sum(map(lambda(x):int(x), processm[4])), # stime sum(int(x) for x in processm[2]),
sum(map(lambda(x):int(x), processm[5])))) # wtime # utime
sum(int(x) for x in processm[3]),
# stime
sum(int(x) for x in processm[4]),
sum(int(x) for x in processm[5]))) # wtime
metricref[nod] = nmetric metricref[nod] = nmetric
# print nod, self.pids[nod][0][0], metricref[nod].tocsv()
return metricref return metricref
def setnodemetricsC(self, key, val): def setnodemetricsC(self, key, val):
@ -412,30 +446,27 @@ class LogSession(object):
lines = readfile(filename) lines = readfile(filename)
for l in lines: for l in lines:
mval = l.strip().split('=') mval = l.strip().split('=')
# print "read line %s" % mval
if len(mval) > 1: if len(mval) > 1:
thekey = mval[0].strip() thekey = mval[0].strip()
theval = mval[1].strip() theval = mval[1].strip()
if thekey in self.nodethresholds.getkeys(): if thekey in self.nodethresholds.getkeys():
self.nodethresholds.setvalue(thekey, float(theval)) self.nodethresholds.setvalue(thekey, float(theval))
#print thekey," = %.2f" % float(theval)
def checknodethresholds(self, nname): def checknodethresholds(self, nname):
# print "check node thresholds", nname
calcm = self.nodemetricsC[nname] calcm = self.nodemetricsC[nname]
for keyname in self.nodethresholds.getkeys(): for keyname in self.nodethresholds.getkeys():
# print "HIII", keyname, calcm.getvalue(keyname), self.nodethresholds.getvalue(keyname)
if float(calcm.getvalue(keyname)) > float(self.nodethresholds.getvalue(keyname)): if float(calcm.getvalue(keyname)) > float(self.nodethresholds.getvalue(keyname)):
# print calculatednodem.getvalue(m) alarm = ["node", nname + "/" + self.pids[nname][0][0], keyname,
alarm = ["node", nname + "/" + self.pids[nname][0][0], keyname,\
calcm.getvalue(keyname), ">", self.nodethresholds.getvalue(keyname)] calcm.getvalue(keyname), ">", self.nodethresholds.getvalue(keyname)]
if options.timestamp: if options.timestamp:
print str(time.time()) + ",", print str(time.time()) + ",",
print ", ".join(map(lambda(x):str(x), alarm)) print ", ".join(str(x) for x in alarm)
def calcnodemetrics(self, cputimea, cputimeb, mems): def calcnodemetrics(self, cputimea, cputimeb, mems):
# return a dict of nodemetrics indexed by node name """
# nodemetrics[nodename][-1] = node/host% Return a dict of nodemetrics indexed by node name
nodemetrics[nodename][-1] = node/host%.
"""
p = [] p = []
for i in range(len(cputimeb[0])): for i in range(len(cputimeb[0])):
p.append(cputimeb[0][i] - cputimea[0][i]) p.append(cputimeb[0][i] - cputimea[0][i])
@ -464,32 +495,25 @@ class LogSession(object):
print "Warning: nodes %s is not fully instanciated" % nod print "Warning: nodes %s is not fully instanciated" % nod
else: else:
# calc throughput kbps # calc throughput kbps
#print "node b : ", nodesb[nod].tocsv() calcm.setvalue("nodethroughput", "%.2f" % (8 * (nodesb[nod].getvalue("nodethroughput")
#print "node a : ", nodesa[nod].tocsv()
#if nod == 'n6':
#print nodesb[nod].getvalue("nodethroughput"), nodesa[nod].getvalue("nodethroughput")
calcm.setvalue("nodethroughput", "%.2f" % (8 * (nodesb[nod].getvalue("nodethroughput") \
- nodesa[nod].getvalue("nodethroughput")) / options.interval)) - nodesa[nod].getvalue("nodethroughput")) / options.interval))
# calc mem node used / host used # calc mem node used / host used
calcm.setvalue("nodeusedmemory", "%.2f" % (100.0 * (nodesb[nod].getvalue("nodeusedmemory") / hostusedmem))) calcm.setvalue("nodeusedmemory", "%.2f" % (
100.0 * (nodesb[nod].getvalue("nodeusedmemory") / hostusedmem)))
# calc total cpu time node / host # calc total cpu time node / host
calcm.setvalue("nodetotalcpu", "%.2f" % (100.0 * (nodesb[nod].getvalue("nodetotalcpu")\ calcm.setvalue("nodetotalcpu", "%.2f" % (100.0 * (nodesb[nod].getvalue("nodetotalcpu")
- nodesa[nod].getvalue("nodetotalcpu")) / hostusedcpu)) - nodesa[nod].getvalue("nodetotalcpu")) / hostusedcpu))
# calc user cpu time node / host # calc user cpu time node / host
calcm.setvalue("nodeusercpu", "%.2f" % (100.0 * (nodesb[nod].getvalue("nodeusercpu")\ calcm.setvalue("nodeusercpu", "%.2f" % (100.0 * (nodesb[nod].getvalue("nodeusercpu")
- nodesa[nod].getvalue("nodeusercpu")) / hostusedcpu)) - nodesa[nod].getvalue("nodeusercpu")) / hostusedcpu))
# calc system cpu time node / host # calc system cpu time node / host
calcm.setvalue("nodesystemcpu", "%.2f" % (100.0 * (nodesb[nod].getvalue("nodesystemcpu")\ calcm.setvalue("nodesystemcpu", "%.2f" % (100.0 * (nodesb[nod].getvalue("nodesystemcpu")
- nodesa[nod].getvalue("nodesystemcpu")) / hostusedcpu)) - nodesa[nod].getvalue("nodesystemcpu")) / hostusedcpu))
# calc waitcpu time node / host # calc waitcpu time node / host
calcm.setvalue("nodewaitcpu", "%.2f" % (100.0 * (nodesb[nod].getvalue("nodewaitcpu")\ calcm.setvalue("nodewaitcpu", "%.2f" % (100.0 * (nodesb[nod].getvalue("nodewaitcpu")
- nodesa[nod].getvalue("nodewaitcpu")) / hostusedcpu)) - nodesa[nod].getvalue("nodewaitcpu")) / hostusedcpu))
#print nod, calcm.tocsv()
#print '=========================='
logsession.nodemetricsC[nod] = calcm logsession.nodemetricsC[nod] = calcm
# logsession.printnodemetrics('c')
if options.alarm is not None: if options.alarm is not None:
logsession.checknodethresholds(nod) logsession.checknodethresholds(nod)
@ -500,11 +524,12 @@ class LogSession(object):
return nodesb return nodesb
def main(): def main():
usagestr = "%prog [-h] [options] [args]\n\nLog server and optional CORE session metrics to stdout." usagestr = "%prog [-h] [options] [args]\n\nLog server and optional CORE session metrics to stdout."
parser = optparse.OptionParser(usage=usagestr) parser = optparse.OptionParser(usage=usagestr)
parser.set_defaults(interval=2, timestamp=False, parser.set_defaults(interval=2, timestamp=False,
configfile = "/etc/core/perflogserver.conf",\ configfile="/etc/core/perflogserver.conf",
alarm=True, session=None) alarm=True, session=None)
parser.add_option("-i", "--interval", dest="interval", type=int, parser.add_option("-i", "--interval", dest="interval", type=int,
help="seconds to wait between samples; default=%s" % help="seconds to wait between samples; default=%s" %
@ -528,8 +553,7 @@ def main():
global logsession global logsession
global cyclecount global cyclecount
(options, args) = parser.parse_args() options, _args = parser.parse_args()
# print options
signal.signal(signal.SIGINT, handler) signal.signal(signal.SIGINT, handler)
signal.signal(signal.SIGTERM, handler) signal.signal(signal.SIGTERM, handler)
@ -548,35 +572,28 @@ def main():
if options.session is not None: if options.session is not None:
logsession = LogSession() logsession = LogSession()
# print logsession
# mark host log baseline # mark host log baseline
print "server: ", ", ".join(map(lambda(x):str(x), os.uname())), ",", ncpus, "CPU cores" print "server: ", ", ".join(str(x) for x in os.uname()), ",", ncpus, "CPU cores"
print "start timestamp:", time.time(), ", baseline data: " print "start timestamp:", time.time(), ", baseline data: "
print csvserverbaseline() print csvserverbaseline()
print "server metrics: ", ", ".join(map(lambda(x):str(x), serverthresholds.getkeys())) print "server metrics: ", ", ".join(str(x) for x in serverthresholds.getkeys())
if options.session is not None: if options.session is not None:
print "node metrics: nodename, ", ", ".join(map(lambda(x):str(x), logsession.nodethresholds.getkeys())) print "node metrics: nodename, ", ", ".join(str(x) for x in logsession.nodethresholds.getkeys())
cyclecount = 0 cyclecount = 0
while True: while True:
cputimea = collectservercputimes() cputimea = collectservercputimes()
if options.session is not None:
nodesa = logsession.getnodemetrics('a')
# print "nodes a:", nodesa
time.sleep(options.interval) time.sleep(options.interval)
cputimeb = collectservercputimes() cputimeb = collectservercputimes()
mems = collectservermems() mems = collectservermems()
calccputime = calcservercputimes(cputimea, cputimeb) calccputime = calcservercputimes(cputimea, cputimeb)
m = csvservermetrics(collectservermetrics(calccputime, mems, options.alarm)) m = csvservermetrics(collectservermetrics(
calccputime, mems, options.alarm))
print m print m
if options.session is not None: if options.session is not None:
nodesb = logsession.getnodemetrics('b') nodesb = logsession.getnodemetrics('b')
# print "nodes b:", nodesb
if nodesb != {}: if nodesb != {}:
logsession.calcnodemetrics(cputimea, cputimeb, mems) logsession.calcnodemetrics(cputimea, cputimeb, mems)
logsession.printnodemetrics('c') logsession.printnodemetrics('c')
@ -584,5 +601,6 @@ def main():
sys.stdout.flush() sys.stdout.flush()
cyclecount = cyclecount + 1 cyclecount = cyclecount + 1
if __name__ == "__main__": if __name__ == "__main__":
main() main()