from collections import OrderedDict from .utility import Utility from .base import OperatorLayerBase class Conv(OperatorLayerBase): """ # N = batch size # C,H,W = input channels, height, width # K,P,Q = output channels, height, width # R,S = filter height, width # g = groups """ #todo: refine winograd and FFT convAuxList = ["nchwToNhwc", "nhwcToNchw", "OffsetsKernel",] winoAuxList = ["generateWinogradTilesKernel", "winogradWgradData", "winogradWgradOutput", "winogradWgradDelta"] fftAuxList = ["compute_gemm_pointers", "flip_filter", "fft2d_r2c_", "fft2d_c2r_", "fft1d_r2c", "fft1d_c2r"] miscAuxList = ["scaleTensor_kernel",] convList = ["_s884cudnn_", "_s1688cudnn_", "_scudnn_", "2d_grouped_direct_kernel", "cudnn::detail::implicit_convolve_sgemm", "cudnn::detail::dgrad2d_alg1_1", "cudnn::detail::wgrad_alg0_engine", "cudnn::detail::dgrad_engine", "dgrad_1x1_stride_2x2", "spatialDepthwiseConvolutionUpdateOutput"] winoList = ["winograd3x3Kernel", "_sgemm_"] fftList = ["fermiPlusCgemmLDS128_batched", "_gcgemm_",] miscList = [] def __init__(self, d): marker = eval(d.argMarker[0]) mod = marker['mod'] op = marker['op'] args = marker['args'] self.marker = marker self.mod_ = mod self.op_ = op self.args = args self.dir = d.dir self.name = d.name self.sub = d.sub assert (mod == "torch.nn.functional") assert (op in ["conv1d", "conv2d"]) length = len(args) assert (length >= 2) and (length <= 7) i,w = args[0], args[1] assert (i['type'] == "tensor") assert (w['type'] == "tensor") #ignore bias if (length >= 4) and (args[3]['name'] == ""): s = args[3] elif any(x['name'] == 'stride' for x in args): s = list(filter(lambda x : x['name'] == 'stride', args))[0] else: s = {'name': 'stride', 'type': 'int', 'value': 1} if (length >= 5) and (args[4]['name'] == ""): p = args[4] elif any(x['name'] == 'padding' for x in args): p = list(filter(lambda x : x['name'] == 'padding', args))[0] else: p = {'name': 'padding', 'type': 'int', 'value': 0} if (length >= 6) and (args[5]['name'] == ""): d = args[5] elif any(x['name'] == 'dilation' for x in args): d = list(filter(lambda x : x['name'] == 'dilation', args))[0] else: d = {'name': 'dilation', 'type': 'int', 'value': 1} if (length == 7) and (args[6]['name'] == ""): g = args[6] elif any(x['name'] == 'groups' for x in args): g = list(filter(lambda x : x['name'] == 'groups', args))[0] else: g = {'name': 'groups', 'type': 'int', 'value': 1} if op == "conv1d": assert (len(i['shape']) == 3) assert (len(w['shape']) == 3) assert (i['dtype'] == w['dtype']) N, C1, W = i['shape'] K, C2, S = w['shape'] assert (C1 == C2) p = p['value'] if Utility.isscalar(p['type']) else p['value'][0] s = s['value'] if Utility.isscalar(s['type']) else s['value'][0] d = d['value'] if Utility.isscalar(d['type']) else d['value'][0] g = g['value'] assert (g == 1) H = 1 R = 1 P = 1 + (H - (((R-1))+1)) Q = 1 + (W + 2*p - (((S-1)*d)+1))/s P = int(P) Q = int(Q) if (H == 1): assert (P == 1) if (W == 1): assert (Q == 1) self.N = N self.C = C1 self.H = H self.W = W self.K = K self.P = P self.Q = Q self.R = R self.S = S self.ph = 0 self.pw = p self.U = 1 self.V = s self.dh = 1 self.dw = d self.g = g self.type = i['dtype'] elif op == "conv2d": assert (len(i['shape']) == 4) assert (len(w['shape']) == 4) assert (i['dtype'] == w['dtype']) N, C1, H, W = i['shape'] K, C2, R, S = w['shape'] if Utility.isscalar(p['type']): ph = pw = p['value'] else: assert (p['type'] == "tuple") ph, pw = p['value'] if Utility.isscalar(s['type']): sh = sw = s['value'] else: assert (s['type'] == "tuple") sh, sw = s['value'] if Utility.isscalar(d['type']): dh = dw = d['value'] else: assert (d['type'] == "tuple") dh, dw = d['value'] g = g['value'] assert (g >= 1) assert (C1 == C2*g) P = 1 + (H + 2*ph - (((R-1)*dh)+1))/sh Q = 1 + (W + 2*pw - (((S-1)*dw)+1))/sw P = int(P) Q = int(Q) if (H == 1): assert (P == 1) if (W == 1): assert (Q == 1) self.N = N self.C = C1 self.H = H self.W = W self.K = K self.P = P self.Q = Q self.R = R self.S = S self.ph = ph self.pw = pw self.U = sh self.V = sw self.dh = dh self.dw = dw self.g = g self.type = i['dtype'] else: assert False def params(self): p = OrderedDict([('N',self.N), ('C',self.C), ('H',self.H), ('W',self.W), ('K',self.K), ('P',self.P), ('Q',self.Q), ('R',self.R), ('S',self.S), ('ph',self.ph), ('pw',self.pw), ('U',self.U), ('V',self.V), ('dh',self.dh), ('dw',self.dw), ('g',self.g), ('type',self.type)]) return p def conv_bytes_flops(self, N, C, H, W, K, P, Q, R, S, g, t): f = 2*N*K*P*Q*C*R*S/g #for fprop elems = N*C*H*W + K*C*R*S/g + N*K*P*Q b = elems * Utility.typeToBytes(t) return b,f def bytes_flops(self): N,C,H,W,K,P,Q,R,S,ph,pw,U,V,dh,dw,g,t = self.params().values() if any(x in self.name for x in Conv.convAuxList+Conv.winoAuxList+Conv.fftAuxList+Conv.miscAuxList): bytes, flops = [0, 0] elif any(x in self.name for x in Conv.convList+Conv.winoList+Conv.fftList+Conv.miscList): if g == 1: bytes, flops = self.conv_bytes_flops(N,C,H,W,K,P,Q,R,S,g,t) else: if "2d_grouped_direct_kernel" in self.name: #only 1 kernel is called bytes, flops = self.conv_bytes_flops(N,C,H,W,K,P,Q,R,S,g,t) elif "spatialDepthwiseConvolutionUpdateOutput" in self.name: #one kernel for separable conv bytes, flops = self.conv_bytes_flops(N,C,H,W,K,P,Q,R,S,g,t) else: #a kernel per group is called bytes, flops = self.conv_bytes_flops(N,C/g,H,W,K/g,P,Q,R,S,1,t) elif ("calc_bias_diff" in self.name): #bias gradient elems = N*K*P*Q flops = elems bytes = 2 * elems * Utility.typeToBytes(t) #params = OrderedDict([('N',N), ('K',K), ('P',P), ('Q',Q), ('type', t)]) else: bytes, flops = [0, 0] return bytes, flops def bytes(self): b,_ = self.bytes_flops() return b def flops(self): _,f = self.bytes_flops() return f def tc(self): for s in ["884cudnn", "1688cudnn"]: if s in self.name: return 1 return "-" def op(self): return self.op_ def mod(self): return self.mod_