quick cleanup of perflogserver.py

2018-10-12 09:38:14 -07:00 · 2018-10-12 09:38:14 -07:00 · 5c7f9b7dc4
commit 5c7f9b7dc4
parent b37eb2fe74
1 changed files with 359 additions and 341 deletions
--- a/scripts/perf/perflogserver.py
+++ b/scripts/perf/perflogserver.py
@ -2,25 +2,36 @@
 #
 # (c)2011-2012 the Boeing Company
 #
-# perfmon.py - CORE server and node performace metrics logger and alarmer
-# server metrics: loadave1, 5, 15, mem, used cpu% of total, cpu1, cpu2, ..., cpun
-# node metrics: throughput, mem, cpu total, usr, sys, wait
-#
-import os, sys, time, re, optparse, signal, commands, pdb
+
+"""
+perflogserver.py - CORE server and node performace metrics logger and alarmer
+server metrics: loadave1, 5, 15, mem, used cpu% of total, cpu1, cpu2, ..., cpun
+node metrics: throughput, mem, cpu total, usr, sys, wait
+"""
+
+import commands
+import optparse
+import os
+import pdb
+import signal
+import sys
+import time
+

 def readfile(fname):
-    lines=[]
+    lines = []
    try:
        f = open(fname, "r")
-    except:
+    except IOError:
        if options.timestamp == True:
-	    print str(time.time()),
-	print "ERROR: failed to open file %s\n" % fname
-    else :
-	lines = f.readlines()
-	f.close()
+            print str(time.time()),
+        print "ERROR: failed to open file %s\n" % fname
+    else:
+        lines = f.readlines()
+        f.close()
    return lines

+
 def numcpus():
    lines = readfile("/proc/stat")
    n = 0
@ -30,35 +41,36 @@ def numcpus():
        n += 1
    return n

+
 def handler(signum, frame):
-    print "stop timestamp:", str(time.time()) + ", cyclecount=", cyclecount, ", caught signal", signum
+    print "stop timestamp:", str(
+        time.time()) + ", cyclecount=", cyclecount, ", caught signal", signum
    sys.exit(0)

+
 class ServerMetrics(object):
    def __init__(self):
-        self.smetrics = { "serverloadavg1" : 0.0,
-                  	  "serverloadavg5" : 0.0,
-                  	  "serverloadavg15" : 0.0,
-                  	  "serverusedmemory" : 0.0,
-                  	  "serverusedcputime" : 0.0,
-                  	  "processorusedcputime" : [] }
+        self.smetrics = {"serverloadavg1": 0.0,
+                         "serverloadavg5": 0.0,
+                         "serverloadavg15": 0.0,
+                         "serverusedmemory": 0.0,
+                         "serverusedcputime": 0.0,
+                         "processorusedcputime": []}

-    # set values from val = (nump, ldavg1, ldavg5, adavg15, mem, cpu, p1cpu, p2cpu...)
    def setvalues(self, val):
+        """
+        Set values from val = (nump, ldavg1, ldavg5, adavg15, mem, cpu, p1cpu, p2cpu...).
+        """
        self.smetrics["serverloadavg1"] = val[0]
        self.smetrics["serverloadavg5"] = val[1]
        self.smetrics["serverloadavg15"] = val[2]
        self.smetrics["serverusedmemory"] = val[4]
        self.smetrics["serverusedcputime"] = val[5]
-        #print self.smetrics.keys(), self.smetrics.values()
-
-	pcpu = []
-	for ind in range(5,len(val)):
-	    pcpu.append(val[ind])
-	# print "[" + ",".join(map(lambda(x):str(round(x, 2)), pcpu)) +"]"

+        pcpu = []
+        for ind in range(5, len(val)):
+            pcpu.append(val[ind])
        self.smetrics["processorusedcputime"] = pcpu
-        #print self.smetrics.keys(), self.smetrics.values()

    def setvalue(self, key, val):
        self.smetrics[key] = val
@ -70,457 +82,470 @@ class ServerMetrics(object):
        return self.smetrics.keys()

    def tocsv(self):
-	rv = "Server"
+        rv = "Server"
        for k in self.smetrics:
-	    # print k, self.smetrics[k]
            if isinstance(self.smetrics[k], float):
                rv += ", %.2f" % self.smetrics[k]
-	    else:
+            else:
                if isinstance(self.smetrics[k], list):
-                    rv += ", [" + \
-                          ", ".join(map(lambda(x):str(round(x, 2)), self.smetrics[k])) \
-			   + "]"
-		else:
-		    rv += ", " + str(self.smetrics[k])
-	return rv
+                    values = ", ".join(str(round(x, 2)) for x in self.smetrics[k])
+                    rv += ", [%s]" % values
+                else:
+                    rv += ", " + str(self.smetrics[k])
+        return rv
+

 def readserverthresholds(filename):
    if filename is None:
-       return
+        return

    lines = readfile(filename)
    for l in lines:
-         mval = l.strip().split('=')
-         #print "read line %s" % mval
-	 if len(mval) > 1 :
-             thekey = mval[0].strip()
-	     theval = mval[1].strip()
-             if thekey in serverthresholds.getkeys():
-                 serverthresholds.setvalue(thekey, float(theval))
-                 # print thekey," = %.2f" % float(theval)
-	
-def checkserverthreshold(metricval): 
-# print out an alarm if a ServerMetrics value crosses threshold 
+        mval = l.strip().split('=')
+        if len(mval) > 1:
+            thekey = mval[0].strip()
+            theval = mval[1].strip()
+            if thekey in serverthresholds.getkeys():
+                serverthresholds.setvalue(thekey, float(theval))
+
+
+def checkserverthreshold(metricval):
+    """
+    Print out an alarm if a ServerMetrics value crosses threshold.
+    """
    for key in serverthresholds.getkeys():
-	# print "checking threshold of key = ", key
-	if key == "processorusedcputime":
-	    pcpus = metricval.getvalue(key)
-            # print key, pcpus, serverthresholds[key]
-	    for ind in range(0, len(pcpus)):
-		# print ind, pcpus[ind]
-	        if pcpus[ind] > serverthresholds.getvalue(key):
+        if key == "processorusedcputime":
+            pcpus = metricval.getvalue(key)
+            for ind in range(0, len(pcpus)):
+                if pcpus[ind] > serverthresholds.getvalue(key):
                    alarm = ["server", os.uname()[1], str(ind) + key,
-		     "%.2f" % pcpus[ind], ">", serverthresholds.getvalue(key)]
+                             "%.2f" % pcpus[ind], ">", serverthresholds.getvalue(key)]
                    if options.timestamp:
-	                print str(time.time()) + ",",
-                    print ", ".join(map(lambda(x):str(x), alarm))
-	else: 
-	    if metricval.getvalue(key) > serverthresholds.getvalue(key):
+                        print str(time.time()) + ",",
+                    print ", ".join(str(x) for x in alarm)
+        else:
+            if metricval.getvalue(key) > serverthresholds.getvalue(key):
                alarm = ["server", os.uname()[1], key,
-		  "%.2f" % metricval.getvalue(key), ">", serverthresholds.getvalue(key)]
+                         "%.2f" % metricval.getvalue(key), ">", serverthresholds.getvalue(key)]
                if options.timestamp:
-	            print str(time.time()) + ",",
-            	print ", ".join(map(lambda(x):str(x), alarm))
+                    print str(time.time()) + ",",
+                print ", ".join(str(x) for x in alarm)
+

 def collectservercputimes():
-# return cpu times in ticks of this server total and each processor 3*(1+#cpu) columns
-# (user+nice, sys, idle) from each /proc/stat cpu lines assume columns are:
-# cpu# user nice sys idle iowait irq softirq steal guest (man 5 proc)
+    """
+    Return cpu times in ticks of this server total and each processor 3*(1+#cpu) columns
+    (user+nice, sys, idle) from each /proc/stat cpu lines assume columns are:
+        cpu# user nice sys idle iowait irq softirq steal guest (man 5 proc)
+    """
    rval = {}
    lines = readfile("/proc/stat")
    for i in range(ncpus + 1):
        items = lines[i].split()
-        (user, nice, sys, idle) = map(lambda(x): int(x), items[1:5])
-	rval[i] = [user+nice, sys, idle]
-    return rval
-    
-def csvservercputimes(cputimes):
-# return a csv string of this server total and each processor's cpu times 
-# (usr, sys, idle) in ticks
-    rval = ''
-    for i in range(len(cputimes)):
-        rval += ', '.join(map(lambda(x):str(x), cputimes[i]))
+        user, nice, sys, idle = [int(x) for x in items[1:5]]
+        rval[i] = [user+nice, sys, idle]
    return rval

+
+def csvservercputimes(cputimes):
+    """
+    Return a csv string of this server total and each processor's cpu times
+    (usr, sys, idle) in ticks.
+    """
+    rval = ''
+    for i in range(len(cputimes)):
+        rval += ", ".join(str(x) for x in cputimes[i])
+    return rval
+
+
 def calcservercputimes(cputimea, cputimeb):
-# return cpu used/total % of this server total and each processor (1+#cpu columns)
+    """
+    Return cpu used/total % of this server total and each processor (1+#cpu columns).
+    """
    p = {}
    for n in range(ncpus + 1):
-	# print cputimeb[n]
        p[n] = []
        for i in range(len(cputimea[n])):
            p[n].append(cputimeb[n][i] - cputimea[n][i])
-	# print p[n]
-        total = sum(p[n]) # cpu times total delta
-	# print total
+        # cpu times total delta
+        total = sum(p[n])
        if total == 0:
            p[n] = 0.0
        else:
            p[n] = 100 - ((100.0 * p[n][-1]) / total)
    return p

-def collectservermems(): 
-# return memory (total, free) in KB from proc/meminfo
+
+def collectservermems():
+    """
+    Return memory (total, free) in KB from proc/meminfo.
+    """
    lines = readfile("/proc/meminfo")
-    mem = map(lambda(x):x.split(), lines[0:2])
-    return map(lambda(x):int(x), zip(*mem)[1])
+    mem = [x.plit() for x in lines[0:2]]
+    return [int(x) for x in zip(*mem)[1]]

-def csvservermems(mems): 
-# return a csv string of this server memory (total, free)
-    return ", ".join(map(lambda x: str(x), mems))

-def calcserverusedmem(mems): 
-# return int(100*(MemTotal-MemFree)/MemTotal) from /proc/meminfo
+def csvservermems(mems):
+    """
+    Return a csv string of this server memory (total, free).
+    """
+    return ", ".join(str(x) for x in mems)
+
+
+def calcserverusedmem(mems):
+    """
+    Return int(100*(MemTotal-MemFree)/MemTotal) from /proc/meminfo.
+    """
    return 100 * (mems[0] - mems[1]) / mems[0]

+
 def collectservermetrics(cputimes, mems, thresholdcheck):
-# return ServerMetrics object with a dictionary of
-# loadavg1,loadavg5,loadavg15, usedmem%, usedcpu% for total, cpu1, cpu2, ...
+    """
+    Return ServerMetrics object with a dictionary of
+    loadavg1,loadavg5,loadavg15, usedmem%, usedcpu% for total, cpu1, cpu2, ...
+    """
    metricval = []
-    ldavgs=os.getloadavg()
+    ldavgs = os.getloadavg()
    for v in ldavgs:
        metricval.append(v)
    metricval.append(calcserverusedmem(mems))

    for i in range(ncpus + 1):
-	metricval.append(cputimes[i])
-        # print cputimes[i]
+        metricval.append(cputimes[i])

    srvmetrics = ServerMetrics()
    srvmetrics.setvalues(metricval)
-    # print srvmetrics.tocsv()

    if thresholdcheck:
-	checkserverthreshold(srvmetrics) 
+        checkserverthreshold(srvmetrics)

    return srvmetrics
-    
+
+
 def csvservermetrics(srvmetrics):
-# return a csv string of ServerMetrics.tocsv()
-# loadavg1,loadavg5,loadavg15, usedmem%, usedcpu% for total, cpu1, cpu2, ...
+    """
+    Return a csv string of ServerMetrics.tocsv()
+    loadavg1,loadavg5,loadavg15, usedmem%, usedcpu% for total, cpu1, cpu2, ...
+    """
    rv = ""
    if options.timestamp:
-       rv = str(time.time()) + ", "
+        rv = str(time.time()) + ", "
    rv += srvmetrics.tocsv()
    return rv

+
 def csvserverbaseline():
-# return a csv string of raw server metrics data: memfree, memtotal, cpuused, cpusystem, cpuidle
+    """
+    Return a csv string of raw server metrics data: memfree, memtotal, cpuused, cpusystem, cpuidle.
+    """
    return "memory (total, free) = " + csvservermems(collectservermems()) + "\ncputime (used, sys, idl) = " + csvservercputimes(collectservercputimes())

+
 class NodeMetrics(object):
    def __init__(self):
-        self.nmetrics = {"nodethroughput" : 0.0,
-		         "nodeusedmemory" : 0.0,
-		         "nodetotalcpu" : 0.0,
-		         "nodeusercpu" : 0.0,
-		         "nodesystemcpu" : 0.0,
-		         "nodewaitcpu" : 0.0}
+        self.nmetrics = {"nodethroughput": 0.0,
+                         "nodeusedmemory": 0.0,
+                         "nodetotalcpu": 0.0,
+                         "nodeusercpu": 0.0,
+                         "nodesystemcpu": 0.0,
+                         "nodewaitcpu": 0.0}

-    # set values from val = (throughput, mem, tcpu, ucpu, scpu, wcpu):
    def setvalues(self, val):
-	self.nmetrics["nodethroughput"] = val[0] 
-	self.nmetrics["nodeusedmemory"] = val[1]
-	self.nmetrics["nodetotalcpu"] = val[2] 
-	self.nmetrics["nodeusercpu"] = val[3] 
-	self.nmetrics["nodesystemcpu"] = val[4]
-	self.nmetrics["nodewaitcpu"] = val[5]
+        """
+        Set values from val = (throughput, mem, tcpu, ucpu, scpu, wcpu).
+        """
+        self.nmetrics["nodethroughput"] = val[0]
+        self.nmetrics["nodeusedmemory"] = val[1]
+        self.nmetrics["nodetotalcpu"] = val[2]
+        self.nmetrics["nodeusercpu"] = val[3]
+        self.nmetrics["nodesystemcpu"] = val[4]
+        self.nmetrics["nodewaitcpu"] = val[5]

    def setvalue(self, key, val):
-	self.nmetrics[key] = val
+        self.nmetrics[key] = val

    def getvalue(self, key):
-	return self.nmetrics[key]
+        return self.nmetrics[key]

    def getkeys(self):
-	return self.nmetrics.keys()
+        return self.nmetrics.keys()

    def tocsv(self):
-	return ", ".join(map(lambda(x):str(x), self.nmetrics.values()))
+        return ", ".join(str(x) for x in self.nmetrics.values())


 class LogSession(object):
    def __init__(self):
        self.nodethresholds = NodeMetrics()
-	# set node threshold default values:
-	# nodethroughput=20.0, nodeusedmemory=15.0, nodetotalcpu=90.0, 
-	# nodeusercpu=30.0, nodewaitcpu=50.0, nodesystemcpu=20.0}
-	self.nodethresholds.setvalues([20.0, 15.0, 90.0, 30.0, 50.0, 20.0])
-	if options.configfile is not None: 
-	    self.readnodethresholds(options.configfile)
+        # set node threshold default values:
+        # nodethroughput=20.0, nodeusedmemory=15.0, nodetotalcpu=90.0,
+        # nodeusercpu=30.0, nodewaitcpu=50.0, nodesystemcpu=20.0}
+        self.nodethresholds.setvalues([20.0, 15.0, 90.0, 30.0, 50.0, 20.0])
+        if options.configfile is not None:
+            self.readnodethresholds(options.configfile)
        self.pids = {}
        self.nodemetricsA = {}
        self.nodemetricsB = {}
        self.nodemetricsC = {}

-    def getpids(self): 
-    # return dict of all CORE session pids in a dict using node name as the keys 
-    # parent pid (vnoded) is the first value 
+    def getpids(self):
+        """
+        Return dict of all CORE session pids in a dict using node name as the keys
+        parent pid (vnoded) is the first value.
+        """
        self.pids = {}
-        nodes = commands.getstatusoutput("ls /tmp/pycore.%s/*pid" % options.session)
+        nodes = commands.getstatusoutput(
+            "ls /tmp/pycore.%s/*pid" % options.session)
        if nodes[0] != 0:
-    	    # if options.timestamp == True:
-    	        # print str(time.time()),
-    	    # print "CORE session %s has not created nodes" % options.session
-    	    return 
-    
+            return
+
        nodes = nodes[1].split('\n')
        for nod in nodes:
-    	    nodename = nod.split('/')[-1].strip(".pid")
-    	    self.pids[nodename] = commands.getoutput("cat %s" % nod)
-    
+            nodename = nod.split('/')[-1].strip(".pid")
+            self.pids[nodename] = commands.getoutput("cat %s" % nod)
+
        # do not expect failure of this command
        procs = commands.getoutput('ps -eo ppid,pid,comm').split('\n')
-    
+
        # build self.pids dict with key=nodename and val="ppid,pid,cmd"
        for nname in self.pids:
-            # print nname, self.pids[nname]
            if self.pids[nname] == "":
-    	        if options.timestamp == True:
-    	            print str(time.time()),
-    	        print "ERROR: null vnoded pid of node: %s" % nname 
-    	    else:
-    	        childprocs = []
-    	        ppid = self.pids[nname]
+                if options.timestamp == True:
+                    print str(time.time()),
+                print "ERROR: null vnoded pid of node: %s" % nname
+            else:
+                childprocs = []
+                ppid = self.pids[nname]
                for proc in procs:
-    		    val=proc.split()
-    	            if ppid == val[1]:
-    		        childprocs.append([val[1], val[2]] )
-    	            if ppid == val[0]:
-    		        childprocs.append([val[1], val[2]])
+                    val = proc.split()
+                    if ppid == val[1]:
+                        childprocs.append([val[1], val[2]])
+                    if ppid == val[0]:
+                        childprocs.append([val[1], val[2]])
                    self.pids[nname] = childprocs
-            # print nname, self.pids[nname]
        return self.pids
-    
+
    def printsesspids(self):
        if self.pids == {}:
            return {}
        for pp in self.pids:
-    	    if self.pids[pp] != []:
+            if self.pids[pp] != []:
                for ap in range(len(self.pids[pp]) - 1):
-    	            print ", " + self.pids[pp][ap][0], # ap pid
-    	            print ", " + self.pids[pp][ap][1], # ap cmd
-                    procmetrics = map(lambda(x):str(x),self.pids[pp][ap][-1])
-    	            print ", " + ", ".join(procmetrics),
-    	        nodemetrics = map(lambda(x):str(x), self.pids[pp][-1])
-    	        print ", " + ", ".join(nodemetrics)
+                    # ap pid
+                    print ", " + self.pids[pp][ap][0],
+                    # ap cmd
+                    print ", " + self.pids[pp][ap][1],
+                    procmetrics = [str(x) for x in self.pids[pp][ap][-1]]
+                    print ", " + ", ".join(procmetrics),
+                nodemetrics = [str(x) for x in self.pids[pp][-1]]
+                print ", " + ", ".join(nodemetrics)

    def getprocessmetrics(self, pid):
-    # return [cpu#, vsize(kb), ttime, utime, stime, wtime] 
-    # from a /proc/pid/stat (a single line file) assume columns are:
-    # pid(0) comm(1) state ppid pgrp sess tty_nr tpgid flags 
-    # minflt cmiflt majflt cmajflt # utime(12) stime cutime cstime 
-    # priority nice num_threads itrealvalue starttime vsize(22) rss rsslim 
-    # startcode endcode startstack kstkesp signal blocked sigignore sigcatch
-    # wchan nswap cnswap exit_signal processor(38) rt_priority 
-    # policy ioblock guest_time cguest_time (man 5 proc)
-        #rval = ProcessMetrics()
-	#rval.__init__()
+        """
+        Return [cpu#, vsize(kb), ttime, utime, stime, wtime]
+        from a /proc/pid/stat (a single line file) assume columns are:
+        pid(0) comm(1) state ppid pgrp sess tty_nr tpgid flags
+        minflt cmiflt majflt cmajflt # utime(12) stime cutime cstime
+        priority nice num_threads itrealvalue starttime vsize(22) rss rsslim
+        startcode endcode startstack kstkesp signal blocked sigignore sigcatch
+        wchan nswap cnswap exit_signal processor(38) rt_priority
+        policy ioblock guest_time cguest_time (man 5 proc)
+        """
        rval = {}
        lines = readfile("/proc/" + pid + "/stat")
        if lines == []:
-    	   return rval
+            return rval
        items = lines[0].split()
-        (utime, stime, cutime, cstime) = map(lambda(x):int(x), items[13:17])
-        # print ">???", utime, stime, cutime, cstime
+        utime, stime, cutime, cstime = [int(x) for x in items[13:17]]
        rval = (items[38],      	# last run processor
-        	int(items[22])/1000, 		# process virtual mem in kb
-    	        utime + stime + cutime + cstime,# totoal time
+                int(items[22])/1000, 		# process virtual mem in kb
+                utime + stime + cutime + cstime,  # totoal time
                utime,          		# user time
                stime,          		# system time
                cutime + cstime)		# wait time
- 	# print "pid --- processmetrics", rval
        return rval
-    
+
    def getnodethroughput(self, pid):
-    # return node throughput of total receive and transmit packets in kb
+        """
+        Return node throughput of total receive and transmit packets in kb.
+        """
        lines = readfile("/proc/" + pid + "/net/dev")
        if lines == []:
-    	    return -0.00
-        ifs = map(lambda(x): x.split(), lines[2:])
+            return -0.00
+        ifs = [x.split() for x in lines[2:]]
        ifm = zip(*ifs)
-        rv = sum(map(lambda(x):int(x), ifm[1])) # received bytes
-        tr = sum(map(lambda(x):int(x), ifm[9])) # transmited bytes
-	#print 'node thruput :', rv, tr, (rv + tr)/1000
+        rv = sum(int(x) for x in ifm[1])  # received bytes
+        tr = sum(int(x) for x in ifm[9])  # transmited bytes
        return (rv + tr)/1000
-    
+
    def getnodemetrics(self, mindex):
-    # return NodeMetrics with indexed by nodename, values are rows of
-    # [ [ppid, vnoded, [cpu#, vmem(kb), ttime, utime, stime, wtime]], 
-    #   [cpid, cmd,    [cpu#,  vmem(kb), ttime, utime, stime, wtime]], ... ,
-    #   [thrput, vmem(kb), ttime, utime, stime, wtime]]
-	if mindex == 'a':
-	    metricref = self.nodemetricsA
-	else:
-	    metricref = self.nodemetricsB
+        """
+        Return NodeMetrics with indexed by nodename, values are rows of
+         [ [ppid, vnoded, [cpu#, vmem(kb), ttime, utime, stime, wtime]],
+          [cpid, cmd,    [cpu#,  vmem(kb), ttime, utime, stime, wtime]], ... ,
+          [thrput, vmem(kb), ttime, utime, stime, wtime]]
+        """
+        if mindex == 'a':
+            metricref = self.nodemetricsA
+        else:
+            metricref = self.nodemetricsB

        self.getpids()
-        # print " inside getnodemetrics()", self.pids
        if self.pids == {}:
-    	    return {}
-
+            return {}

        for nod in self.pids:
            nmetric = NodeMetrics()
            nmetric.__init__()
            nodeapps = {}
-    	    for ap in range(len(self.pids[nod])): # get each process metrics
-    	        procm = self.getprocessmetrics(self.pids[nod][ap][0])
-    	        if procm == []:
-    		    if options.timestamp == True:
-    		        print str(time.time()),
-    	            print "WARNING: transient process", self.pids[nod][ap][1], \
-			  "/", self.pids[nod][ap][0], "on node %s" % nod
-    	        else:
-    	            nodeapps[ap] = procm
-    	            self.pids[nod][ap].append(nodeapps[ap])
-            processm = zip(*nodeapps.values()) # get overall node metrics
-            # print processm
-    	    if len(processm) > 0:
-		# if nod == 'n6':
-		    # print nod, self.getnodethroughput(self.pids[nod][0][0])  
-    	        nmetric.setvalues(( self.getnodethroughput(self.pids[nod][0][0]), 
-            	      sum(map(lambda(x):int(x), processm[1])), # vsize(kb)
-    		      sum(map(lambda(x):int(x), processm[2])), # ttime
-    		      sum(map(lambda(x):int(x), processm[3])), # utime
-    		      sum(map(lambda(x):int(x), processm[4])), # stime
-    		      sum(map(lambda(x):int(x), processm[5])))) # wtime 
+            for ap in range(len(self.pids[nod])):  # get each process metrics
+                procm = self.getprocessmetrics(self.pids[nod][ap][0])
+                if procm == []:
+                    if options.timestamp == True:
+                        print str(time.time()),
+                    print "WARNING: transient process", self.pids[nod][ap][1], \
+                          "/", self.pids[nod][ap][0], "on node %s" % nod
+                else:
+                    nodeapps[ap] = procm
+                    self.pids[nod][ap].append(nodeapps[ap])
+            processm = zip(*nodeapps.values())  # get overall node metrics
+            if len(processm) > 0:
+                nmetric.setvalues((self.getnodethroughput(self.pids[nod][0][0]),
+                                   # vsize(kb)
+                                   sum(int(x) for x in processm[1]),
+                                   # ttime
+                                   sum(int(x) for x in processm[2]),
+                                   # utime
+                                   sum(int(x) for x in processm[3]),
+                                   # stime
+                                   sum(int(x) for x in processm[4]),
+                                   sum(int(x) for x in processm[5])))  # wtime
                metricref[nod] = nmetric
-                # print nod, self.pids[nod][0][0], metricref[nod].tocsv()
        return metricref

-    def setnodemetricsC(self, key, val): 
-	self.nodemetricsC[key] = val
+    def setnodemetricsC(self, key, val):
+        self.nodemetricsC[key] = val

-    def printnodemetrics(self, mindex): 
+    def printnodemetrics(self, mindex):
        if mindex == 'c':
-	   mm = self.nodemetricsC
-	else:
-	   if mindex == 'a':
-		mm = self.nodemetricsA
-	   else:
-		mm = self.nodemetricsB
+            mm = self.nodemetricsC
+        else:
+            if mindex == 'a':
+                mm = self.nodemetricsA
+            else:
+                mm = self.nodemetricsB

-	for k in self.nodemetricsC:
+        for k in self.nodemetricsC:
            if options.timestamp:
                print str(time.time()) + ",",
- 	    print k, ",", mm[k].tocsv()
+            print k, ",", mm[k].tocsv()

-    def readnodethresholds(self, filename): 
-	if filename is None:
-	    return
-	lines = readfile(filename)
+    def readnodethresholds(self, filename):
+        if filename is None:
+            return
+        lines = readfile(filename)
        for l in lines:
-             mval = l.strip().split('=')
-             # print "read line %s" % mval
-	     if len(mval) > 1 :
-                 thekey = mval[0].strip()
-	         theval = mval[1].strip()
-                 if thekey in self.nodethresholds.getkeys():
-                     self.nodethresholds.setvalue(thekey, float(theval))
-                     #print thekey," = %.2f" % float(theval)
-    
+            mval = l.strip().split('=')
+            if len(mval) > 1:
+                thekey = mval[0].strip()
+                theval = mval[1].strip()
+                if thekey in self.nodethresholds.getkeys():
+                    self.nodethresholds.setvalue(thekey, float(theval))
+
    def checknodethresholds(self, nname):
-	# print "check node thresholds", nname
-	calcm = self.nodemetricsC[nname]
+        calcm = self.nodemetricsC[nname]
        for keyname in self.nodethresholds.getkeys():
-	    # print "HIII", keyname, calcm.getvalue(keyname), self.nodethresholds.getvalue(keyname)
-	    if float(calcm.getvalue(keyname)) > float(self.nodethresholds.getvalue(keyname)):
-                # print calculatednodem.getvalue(m)
-                alarm = ["node", nname + "/" + self.pids[nname][0][0], keyname,\
-                      calcm.getvalue(keyname), ">", self.nodethresholds.getvalue(keyname)]
+            if float(calcm.getvalue(keyname)) > float(self.nodethresholds.getvalue(keyname)):
+                alarm = ["node", nname + "/" + self.pids[nname][0][0], keyname,
+                         calcm.getvalue(keyname), ">", self.nodethresholds.getvalue(keyname)]
                if options.timestamp:
                    print str(time.time()) + ",",
-                print ", ".join(map(lambda(x):str(x), alarm))
+                print ", ".join(str(x) for x in alarm)

    def calcnodemetrics(self, cputimea, cputimeb, mems):
-    # return a dict of nodemetrics indexed by node name
-    # nodemetrics[nodename][-1] = node/host%
+        """
+        Return a dict of nodemetrics indexed by node name
+        nodemetrics[nodename][-1] = node/host%.
+        """
        p = []
        for i in range(len(cputimeb[0])):
            p.append(cputimeb[0][i] - cputimea[0][i])
        hostusedcpu = p[0] + p[1]
        hostusedmem = mems[0] - mems[1]
        if hostusedcpu == 0:
-	    print "WARNING: host used cpu = 0, ", p[0], p[1]
-	    hostusedcpu = 1
+            print "WARNING: host used cpu = 0, ", p[0], p[1]
+            hostusedcpu = 1
        if hostusedmem == 0:
-	    print "WARNING: host used mem = 0, ", mems[0], mems[1]
-	    hostusedmem = 1
+            print "WARNING: host used mem = 0, ", mems[0], mems[1]
+            hostusedmem = 1

-	nodesa = self.nodemetricsA
-	nodesb = self.nodemetricsB
+        nodesa = self.nodemetricsA
+        nodesb = self.nodemetricsB
        for nod in nodesb:
-	    calcm = self.nodemetricsC
+            calcm = self.nodemetricsC
            calcm = NodeMetrics()
-	    calcm.__init__()
-	    if (nod in nodesa):
-	        try:
-		    if (nodesb[nod] == []) | (nodesa[nod] == []) | \
-		       ( False == isinstance(nodesb[nod], NodeMetrics)) |  \
-		       ( False == isinstance(nodesa[nod], NodeMetrics)):
-    		        if options.timestamp == True:
-    		            print str(time.time()),
-    	                print "Warning: nodes %s is not fully instanciated" % nod
-		    else:
-    	                # calc throughput kbps
-    		        #print "node b : ", nodesb[nod].tocsv() 
-		        #print "node a : ", nodesa[nod].tocsv()
-		        #if nod == 'n6':
-    	                    #print nodesb[nod].getvalue("nodethroughput"), nodesa[nod].getvalue("nodethroughput")
-    	                calcm.setvalue("nodethroughput", "%.2f" % (8 * (nodesb[nod].getvalue("nodethroughput") \
-				- nodesa[nod].getvalue("nodethroughput")) / options.interval))
-    	                # calc mem node used / host used
-    	                calcm.setvalue("nodeusedmemory", "%.2f" % (100.0 * (nodesb[nod].getvalue("nodeusedmemory") / hostusedmem)))
-    
-    	                # calc total cpu time node / host
-                        calcm.setvalue("nodetotalcpu", "%.2f" % (100.0 * (nodesb[nod].getvalue("nodetotalcpu")\
-				- nodesa[nod].getvalue("nodetotalcpu")) /hostusedcpu))
-    	                # calc user cpu time node / host
-                        calcm.setvalue("nodeusercpu", "%.2f" % (100.0 * (nodesb[nod].getvalue("nodeusercpu")\
-				- nodesa[nod].getvalue("nodeusercpu")) /hostusedcpu))
-    	                # calc system cpu time node / host
-                        calcm.setvalue("nodesystemcpu", "%.2f" % (100.0 * (nodesb[nod].getvalue("nodesystemcpu")\
-				- nodesa[nod].getvalue("nodesystemcpu")) /hostusedcpu))
-    	                # calc waitcpu time node / host
-                        calcm.setvalue("nodewaitcpu", "%.2f" % (100.0 * (nodesb[nod].getvalue("nodewaitcpu")\
-				- nodesa[nod].getvalue("nodewaitcpu")) /hostusedcpu))
+            calcm.__init__()
+            if (nod in nodesa):
+                try:
+                    if (nodesb[nod] == []) | (nodesa[nod] == []) | \
+                       (False == isinstance(nodesb[nod], NodeMetrics)) |  \
+                       (False == isinstance(nodesa[nod], NodeMetrics)):
+                        if options.timestamp == True:
+                            print str(time.time()),
+                        print "Warning: nodes %s is not fully instanciated" % nod
+                    else:
+                        # calc throughput kbps
+                        calcm.setvalue("nodethroughput", "%.2f" % (8 * (nodesb[nod].getvalue("nodethroughput")
+                                                                        - nodesa[nod].getvalue("nodethroughput")) / options.interval))
+                        # calc mem node used / host used
+                        calcm.setvalue("nodeusedmemory", "%.2f" % (
+                            100.0 * (nodesb[nod].getvalue("nodeusedmemory") / hostusedmem)))

-    	                #print nod, calcm.tocsv()
-		        #print '=========================='
-    		        logsession.nodemetricsC[nod] = calcm
-    	                # logsession.printnodemetrics('c')
+                        # calc total cpu time node / host
+                        calcm.setvalue("nodetotalcpu", "%.2f" % (100.0 * (nodesb[nod].getvalue("nodetotalcpu")
+                                                                          - nodesa[nod].getvalue("nodetotalcpu")) / hostusedcpu))
+                        # calc user cpu time node / host
+                        calcm.setvalue("nodeusercpu", "%.2f" % (100.0 * (nodesb[nod].getvalue("nodeusercpu")
+                                                                         - nodesa[nod].getvalue("nodeusercpu")) / hostusedcpu))
+                        # calc system cpu time node / host
+                        calcm.setvalue("nodesystemcpu", "%.2f" % (100.0 * (nodesb[nod].getvalue("nodesystemcpu")
+                                                                           - nodesa[nod].getvalue("nodesystemcpu")) / hostusedcpu))
+                        # calc waitcpu time node / host
+                        calcm.setvalue("nodewaitcpu", "%.2f" % (100.0 * (nodesb[nod].getvalue("nodewaitcpu")
+                                                                         - nodesa[nod].getvalue("nodewaitcpu")) / hostusedcpu))
+                        logsession.nodemetricsC[nod] = calcm

                        if options.alarm is not None:
-    	                    logsession.checknodethresholds(nod)
-	        except IndexError:
-		    pass
-	    else:
-	        print "Warning: transient node %s " % nod
+                            logsession.checknodethresholds(nod)
+                except IndexError:
+                    pass
+            else:
+                print "Warning: transient node %s " % nod

        return nodesb
-    
+
+
 def main():
    usagestr = "%prog [-h] [options] [args]\n\nLog server and optional CORE session metrics to stdout."
-    parser = optparse.OptionParser(usage = usagestr)
-    parser.set_defaults(interval=2, timestamp=False, 
-			configfile = "/etc/core/perflogserver.conf",\
-                        alarm = True, session = None)
-    parser.add_option("-i", "--interval", dest = "interval", type = int,
-                      help = "seconds to wait between samples; default=%s" %
+    parser = optparse.OptionParser(usage=usagestr)
+    parser.set_defaults(interval=2, timestamp=False,
+                        configfile="/etc/core/perflogserver.conf",
+                        alarm=True, session=None)
+    parser.add_option("-i", "--interval", dest="interval", type=int,
+                      help="seconds to wait between samples; default=%s" %
                      parser.defaults["interval"])
-    parser.add_option("-t", "--timestamp", action = "store_true",
-                      dest = "timestamp", 
-                      help = "include timestamp on each line")
-    parser.add_option("-c", "--configfile", dest = "configfile", 
-		      type = "string",
-                      help = "read threshold values from the specified file;\
+    parser.add_option("-t", "--timestamp", action="store_true",
+                      dest="timestamp",
+                      help="include timestamp on each line")
+    parser.add_option("-c", "--configfile", dest="configfile",
+                      type="string",
+                      help="read threshold values from the specified file;\
  		      default=%s" % parser.defaults["configfile"])
-    parser.add_option("-a", "--alarm", action = "store_true",
-                      dest = "alarm", 
-                      help = "generate alarms based threshold check on each cycle")
-    parser.add_option("-s", "--session", dest = "session", type = int,
-                      help = "CORE session id; default=%s" %
+    parser.add_option("-a", "--alarm", action="store_true",
+                      dest="alarm",
+                      help="generate alarms based threshold check on each cycle")
+    parser.add_option("-s", "--session", dest="session", type=int,
+                      help="CORE session id; default=%s" %
                      parser.defaults["session"])
    global options
    global ncpus
@ -528,8 +553,7 @@ def main():
    global logsession
    global cyclecount

-    (options, args) = parser.parse_args()
-    # print options
+    options, _args = parser.parse_args()

    signal.signal(signal.SIGINT, handler)
    signal.signal(signal.SIGTERM, handler)
@ -538,51 +562,45 @@ def main():

    # server threshold dictionary - a ServerMetrics instant with default values
    serverthresholds = ServerMetrics()
-    # set to server threshold default values: serverloadavg1=3.5, 
-    # serverloadavg5=3.5, serverloadavg15=3.5, serverusedmemory=80.0, 
+    # set to server threshold default values: serverloadavg1=3.5,
+    # serverloadavg5=3.5, serverloadavg15=3.5, serverusedmemory=80.0,
    # serverusedcputime=80.0, processorusedcputime=90.0
    serverthresholds.setvalues([3.5, 3.5, 3.5, 80.0, 80.0, 90.0])
    if options.alarm is True:
        # read server threshold values from configuration file
-	readserverthresholds(options.configfile)
-	
+        readserverthresholds(options.configfile)
+
    if options.session is not None:
        logsession = LogSession()
-	# print logsession

    # mark host log baseline
-    print "server: ", ", ".join(map(lambda(x):str(x), os.uname())), ",", ncpus, "CPU cores"
+    print "server: ", ", ".join(str(x) for x in os.uname()), ",", ncpus, "CPU cores"
    print "start timestamp:", time.time(), ", baseline data: "
    print csvserverbaseline()
-    print "server metrics: ", ", ".join(map(lambda(x):str(x), serverthresholds.getkeys()))
+    print "server metrics: ", ", ".join(str(x) for x in serverthresholds.getkeys())
    if options.session is not None:
-        print "node metrics: nodename, ", ", ".join(map(lambda(x):str(x), logsession.nodethresholds.getkeys()))
+        print "node metrics: nodename, ", ", ".join(str(x) for x in logsession.nodethresholds.getkeys())

    cyclecount = 0
    while True:
        cputimea = collectservercputimes()
-        if options.session is not None:
-	   nodesa = logsession.getnodemetrics('a')
-	   # print "nodes a:", nodesa
-
        time.sleep(options.interval)
-
        cputimeb = collectservercputimes()
-	mems = collectservermems()
-
+        mems = collectservermems()
        calccputime = calcservercputimes(cputimea, cputimeb)
-	m = csvservermetrics(collectservermetrics(calccputime, mems, options.alarm))
-	print m
+        m = csvservermetrics(collectservermetrics(
+            calccputime, mems, options.alarm))
+        print m

        if options.session is not None:
-	    nodesb = logsession.getnodemetrics('b')
-	    # print "nodes b:", nodesb
-	    if nodesb != {}:
-	        logsession.calcnodemetrics(cputimea, cputimeb, mems)
-		logsession.printnodemetrics('c')
+            nodesb = logsession.getnodemetrics('b')
+            if nodesb != {}:
+                logsession.calcnodemetrics(cputimea, cputimeb, mems)
+                logsession.printnodemetrics('c')

-	sys.stdout.flush()
+        sys.stdout.flush()
        cyclecount = cyclecount + 1

+
 if __name__ == "__main__":
    main()