import sys class NVVP(object): """ This class gets kernel information from the SQL (nvvp) database. """ driverT = "CUPTI_ACTIVITY_KIND_DRIVER" runtimeT = "CUPTI_ACTIVITY_KIND_RUNTIME" kernelT = "CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL" markerT = "CUPTI_ACTIVITY_KIND_MARKER" stringT = "StringTable" def __init__(self, db): self.db = db self.markerId = 0 def getProfileStart(self): """ Get the profile start time """ profStart = sys.maxsize for table in [self.driverT, self.runtimeT, self.kernelT, self.markerT]: colname = "timestamp" if table is self.markerT else "start" cmd = "select {} from {} ORDER BY {} ASC LIMIT 1".format(colname, table, colname) result = self.db.select(cmd) assert(len(result) <= 1) if (len(result) == 1): assert(colname in result[0]) t = result[0][colname] if (t < profStart): profStart = t assert(profStart < sys.maxsize) return profStart def getString(self, id_): """ Get the string associated with an id. """ cmd = "select value from {} where _id_ = {}".format(self.stringT, id_) result = self.db.select(cmd) assert (len(result) == 1) return result[0]['value'] def createMarkerTable(self): """ Create a temporary table and index it to speed up repeated SQL quesries. The table is an INNER JOIN of CUPTI_ACTIVITY_KIND_MARKER with itself. """ cmd = 'CREATE TEMPORARY TABLE marker AS SELECT \ a._id_ as id, \ a.timestamp AS startTime, \ b.timestamp AS endTime, \ HEX(a.objectId) AS objectId, \ a.name AS name \ FROM {} AS a INNER JOIN {} AS b ON \ a.id = b.id and \ a.flags = 2 and b.flags = 4'.format(self.markerT, self.markerT) self.db.execute(cmd) self.db.execute('CREATE INDEX start_index ON marker (startTime)') self.db.execute('CREATE INDEX end_index ON marker (endTime)') self.db.execute('CREATE INDEX id_index ON marker (id)') def getCPUInfo(self, corrId): """ Given the correlation id, get CPU start, end, thread id, process id. The information can be in the runtime table or the driver table. """ #First look in the runtime table cmd = "select start,end,processId,threadId from {} where correlationId={}".format(self.runtimeT, corrId); result = self.db.select(cmd) assert (len(result) <= 1) if (len(result) == 0): #Look in the driver table cmd = "select start,end,processId,threadId from {} where correlationId={}".format(self.driverT, corrId); result = self.db.select(cmd) assert (len(result) == 1) info = result[0] start = info['start'] end = info['end'] pid = info['processId'] tid = info['threadId'] tid = tid & 0xffffffff #convert to unsigned assert (end > start) return [start, end, pid, tid] def getKernelInfo(self): """ Get GPU kernel info """ cmd = "select name,correlationId,start,end,deviceId,streamId,gridX,gridY,gridZ,blockX,blockY,blockZ from {}".format(self.kernelT) result = self.db.select(cmd) return result def getMarkerInfo(self, objId, startTime, endTime): """ This function first finds all NVTX markers encapsulating a runtime / driver kernel launch. It then splits the markers into many lists. layerMarkers : User added NVTX markers traceMarkers : Call trace markers (inserted by pyprof) reprMarkers : Markers containing the extra_repr() of a module (inserted by pyprof) pyprofMarkers: Markers containing args and kwargs (tensor shape, datatype etc.) seqMarkers : Markers containing PyTorch internal sequence markers (inserted by PyTorch) altSeqMarkers: Markers inserted by PyTorch between two kernel launches. Needs better explanation. otherMarkers : Markers not in either of the above categories. We extract seqId from the seq and altSeq markers. The seqId is used in bprop. We also extract information from the layerMarkers. """ layerMarkers = [] traceMarkers = [] reprMarkers = [] pyprofMarkers = [] seqMarkers = [] otherMarkers = [] altSeqMarkers = [] bprop = False #Helper functions def delete(objId, sTime): """ Delete rows from the temporary SQL table which are no longer required. This speeds up future queries. """ margin = 0 cmd = 'DELETE FROM marker WHERE objectId = "{}" AND endTime < {}'.format(objId, sTime - margin) #cmd = 'DELETE FROM marker WHERE endTime < {}'.format(sTime - margin) self.db.execute(cmd) def getLayerName(mlist): """ Get layer names from layer marker list. """ layers = [] assert(type(mlist) == list) for m in mlist: assert("layer:" in m) l = m.split(":")[1] layers.append(l) return layers def getSeqId(mlist): """ Get sequence ids from seq / alt seq marker list. """ ids = [] assert(type(mlist) == list) for m in mlist: assert(", seq = " in m) seq = int(m.split("=")[1]) ids.append(seq) #Remove duplicates ids = list(set(ids)) ids.sort() return ids def seqcompare(elem): """ Sorting function for sequence markers """ assert (", seq = " in elem) #sort by sequence id and then the string l = elem.split(" = ") return l[1] + l[0] def prune(mlist): """ Remove markers with the same seqId and if the strings are similar. This function works on a sorted sequence. """ assert (type(mlist) == list) assert (len(mlist)) a = mlist[0:1] for i in range(1,len(mlist)): m = mlist[i] pm = mlist[i-1] name,seq = m.split(",") pname,pseq = pm.split(",") similar = (name in pname) or (pname in name) if (seq == pseq) and similar: continue else: a.append(m) return a def filterTrace(mlist): """ Filter trace markers to remove certain file names. """ assert (type(mlist) == list) if len(mlist) == 0: return mlist mlist = mlist[-1] #The last stack trace will be a super set. mlist = eval(mlist) mlist = mlist['traceMarker'] assert (type(mlist) == list) mlist = list(filter(lambda x : "/torch/nn/modules/" not in x, mlist)) mlist = list(filter(lambda x : "/torch/nn/functional.py" not in x, mlist)) mlist = list(filter(lambda x : "/torch/tensor.py" not in x, mlist)) mlist = list(filter(lambda x : "/torch/autograd/__init__.py" not in x, mlist)) mlist = list(filter(lambda x : "/torch/_jit_internal.py" not in x, mlist)) mlist = list(filter(lambda x : "/pyprof/nvtx/nvmarker.py" not in x, mlist)) mlist = list(filter(lambda x : "/apex/optimizers/" not in x, mlist)) mlist = list(filter(lambda x : "/torch/_utils.py" not in x, mlist)) mlist = list(filter(lambda x : "/torch/optim/" not in x, mlist)) return mlist #Find all encapsulating markers cmd = 'SELECT id,name from marker where \ objectId = "{}" and \ startTime < {} and \ endTime > {} \ ORDER BY startTime ASC'.format(objId, startTime, endTime) result = self.db.select(cmd) #Bin markers into different lists for r in result: m = self.getString(r['name']) #Hack: If its a known gradient checkpointing marker, ignore it. if m.find("CheckpointFunctionBackward") >= 0: continue if ("_backward, seq =" in m) or ("Backward, seq =" in m) or ("Backward0, seq =" in m): bprop = True if ("mod" in m) and ("op" in m) and ("args" in m) and ("type" in m): pyprofMarkers.append(m) elif ("layer:" in m): layerMarkers.append(m) elif ("traceMarker" in m): traceMarkers.append(m) elif ("strRepr" in m): reprMarkers.append(m) elif (", seq = " in m): seqMarkers.append(m) else: otherMarkers.append(m) #Remove duplicates, sort and prune seqMarkers if (len(seqMarkers)): seqMarkers = list(set(seqMarkers)) seqMarkers.sort(key=seqcompare) seqMarkers = prune(seqMarkers) #Remove duplicates from otherMarkers otherMarkers = list(set(otherMarkers)) #Get markers with seq id (inserted by PyTorch) from the previous kernel to the present kernel #Only for fprop kernels if (len(result) and not bprop): loId = self.markerId hiId = result[-1]['id'] self.markerId = hiId #Get markers between loId and hiId cmd = 'SELECT id,name from marker where objectId = "{}" and id > {} and id < {} ORDER BY startTime ASC'.format(objId, loId, hiId) result1 = self.db.select(cmd) for r in result1: m = self.getString(r['name']) #Get only markers with seq id if (", seq=" in m): altSeqMarkers.append(m) #Remove duplicates, sort and prune altSeqMarkers if (len(altSeqMarkers)): altSeqMarkers = list(set(altSeqMarkers)) altSeqMarkers.sort(key=seqcompare) altSeqMarkers = prune(altSeqMarkers) delete(objId, startTime) return layerMarkers, filterTrace(traceMarkers), reprMarkers, pyprofMarkers, seqMarkers, otherMarkers, altSeqMarkers, getSeqId(seqMarkers), getSeqId(altSeqMarkers), getLayerName(layerMarkers)