[docs]defenable(self,func):''' @tp.enable decorator is used to indicate that we want to extract the cumulative timing measurement for the function to which it is applied. '''@functools.wraps(func)defwrapper(*args,**kwargs):result=func(*args,**kwargs)func_name=func.__name__func_file=inspect.getfile(func)func_ln=inspect.getsourcelines(func)[1]func_desc=(func_file,func_ln,func_name)iffunc_descnotinself.enabled:self.enabled.append(func_desc)returnresultreturnwrapper
[docs]defprofile(self,func):''' @tp.profile decorator is applied to a function for which we want to run cProfile (ideally it should be the outermost function in the workflow). Rather than displaying all cProfile outputs, we find rows corresponding to the functions selected by @tp.enable, plus some extra rows interesting for us. The outputs are saved in a Pandas DataFrame. '''@functools.wraps(func)defwrapper(*args,**kwargs):self.profiler.enable()result=func(*args,**kwargs)self.profiler.disable()# Stats are saved in the following format for each function call:# (file, line_num, func_name): (ncalls, pcalls, tottime, cumtime, parent_func)stats=pstats.Stats(self.profiler)self.full_outputs.append(stats)aliases={}# In addition to functions included by @tp.enable, we want to measure timing of# some low-level functions such as decompression.fork,vinstats.stats.items():# Actual function name is long => will use alias for nicer looking outputextra_funcs={"decompress":"<method 'decompress' of '_lzma.LZMADecompressor' objects>","wait":"<method 'acquire' of '_thread.lock' objects>"}foralias,extra_funcinextra_funcs.items():ifk[2]==extra_func:self.enabled.append(k)aliases[extra_func]=aliasforfunc_descinself.enabled:# Find cumulative time value corresponding to this functionfunc_time=[v[3]fork,vinstats.stats.items()if(k==func_desc)]func_file,func_ln,func_name=func_desc# remove global path# TODO: make this relative to base dir of the packagefunc_file_short="/".join(func_file.split("/")[-3:])# Edit name to display in output dataframeiffunc_nameinaliases:func_name=aliases[func_name]func_name="time:"+func_name# If by accident there are multiple results - save allself.report_df=pd.concat([self.report_df,pd.DataFrame([{'func_file':func_file_short,'func_ln':func_ln,'func_name':func_name,'func_time':ft}forftinfunc_time])]).reset_index(drop=True)returnresultreturnwrapper