[docs]classCSMParser:""" Parses the cytosim trajectory file for a given frame Returns an extended DataFrame object (CSMFrame) """def__init__(self):pass
[docs]defparse_simFile(self,filename):""" Parses cytosim trajectory file generated using `report` Returns a CSMSimulation """sim=CSMSimulation()withopen(filename,'r')asfile_object:forlineinfile_object:ifline.startswith('% frame'):frame_data=""line=file_object.readline()whilenotline.startswith('% end'):frame_data+=lineline=file_object.readline()frame_data=io.StringIO(frame_data)sim.add_frame(self.parse_frame(frame_data))returnsim
[docs]defparse_frame(self,frame_data):""" Parses the cytosim trajectory file """data_list=[]# list to store the dataframe objecttime=0def_generate_data_pattern(*args):""" Deprecated generates a compiled regex to parse data such that match.group(arg) returns the value of the corresponding arg """arg_string=r''forarginargs:arg_string+=r'\s*'arg_string+=r'(?P<'+arg+r'>\S+)'arg_string+=r'\n'returnre.compile(arg_string)# define the regex patternsdata_pattern=None# read from filereport_keyword=Nonehas_headers=Truecolumn_headers=[]time_pattern=re.compile(r'% time\s+(?P<time>\d+.\d+e?\d+)')report_typeII={'fiber','fiber:confine_force','fiber:position'}report_typeIII={'fiber:distribution'}report_unsupported={'fiber:speckle','bead:singles'}indexLine=0forlineinframe_data:ifmatch:=time_pattern.search(line):# obtain frame timetime=float(match.group('time'))ifdata_patternisNoneandhas_headers:# Deprecated# We don't know where the indexLine is# so we use some heuristics to decide where it# might be and trigger this line of code after finding itifindexLine==1:column_headers=line.split()column_headers.remove(r'%')data_pattern=_generate_data_pattern(*column_headers)print(f"Generated data pattern: {data_pattern}")indexLine=0# To-Do: This is very fragile: idea is to trigger a data pattern# search for the line after current lineifindexLine==2:# skip this line entirelyindexLine=1continueifline.startswith('% report'):report_keyword=line.split()[2]# the line following this line typically contains# the data arguments (posX,posY etc.)# trigger data pattern search in the next line# file structure:# 09 % report bead:position# 10 % class posX posY posZ ...indexLine=1ifreport_keywordinreport_typeII:# Some files have an additional comment below the % report line# handle some of those cases# trigger data pattern search two lines later# file structure:# 09 % report fiber:position# 10 % some comment# 11 % class posX posY posZ ...indexLine=2ifreport_keywordinreport_typeIII:# Some report files simply do not have headers# give up on generating column headers# file structure# 09 % report fiber:distribution# 10 bin 1 2 3 4 5 ...# 11 count 4 5 3 3 2 ...has_headers=Falseifreport_keywordinreport_unsupported:raiseNotImplementedError(f"Report type {report_keyword} is not supported yet.")# extract data if the line is not a commentelifnotline.startswith('%'):ifnotline.isspace():data_vals=line.split()# Convert potential numbers in data_vals to appropriate typesfori,valinenumerate(data_vals):try:# Try integer conversion firstif('.'notinval)and('e'notinval)and('E'notinval):data_vals[i]=int(val)else:data_vals[i]=float(val)exceptValueError:# Keep as string if conversion failspassdata_list.append(data_vals)ifreport_keywordisNone:warnings.warn("Could not find report keyword in frame data. The file may be malformed.",UserWarning)iflen(data_list)==0:warnings.warn("No data found in frame. The file may be empty or malformed.",UserWarning)# convert data_dict into a CSMFrame objectframe_data=pd.DataFrame(data_list)ifcolumn_headersandlen(frame_data.columns)==len(column_headers):frame_data.columns=column_headersframe=CSMFrame(frame_data)# append csmframe attributesframe.time=timereturnframe
[docs]classCYMParser:""" Deprecated A Class for parsin the configuration file (configuration.cym) Returns a dictionary that can be used in a CSMSimulation """
[docs]defparse_config(config_file):""" Parses configuration file using regex to get params """param_dictionary={}# construct regex dictionaries# parameters in rx_dict are automatically added to the simulation parametersrx_dict={'motor_count':re.compile(r'new (?P<motor_count>\d+) couple motor'),'cell_radius':re.compile(r' +geometry = circle (?P<cell_radius>\d+)'),'crosslinker_count':re.compile(r'new (?P<crosslinker_count>\d+) couple crosslinker'),'filament_count':re.compile(r'new (?P<filament_count>\d+) fiber filament')}# object_dict is used to obtain parameters that belong to an object, like motorsobject_dict={'plus_motor':re.compile(r'set hand plus_motor'),'binder':re.compile(r'set hand binder'),'filament':re.compile(r'(set fiber filament|new \d+ fiber filament)')}# the object attributes end hereend_pattern=re.compile(r'}')attribute_pattern=re.compile(r' +(?P<attr_name>\S+) = (?P<attr_val>\S+)')withopen(config_file,'r')asfile:forlineinfile:# check if line matches main keyforkey,rxinrx_dict.items():match=rx.match(line)ifmatch:param_dictionary[key]=float(match.group(key))break# get properties of objects, if objects are foundforobj,rxinobject_dict.items():ifmatch:=rx.match(line):obj_dict={}line=file.readline()# read all the attributes within the object specificationwhilenotend_pattern.match(line):ifmatch:=attribute_pattern.match(line):try:# convert to float if possibleobj_dict[match.group('attr_name')]=float(match.group('attr_val'))exceptValueError:obj_dict[match.group('attr_name')]=match.group('attr_val')line=file.readline()ifnotobjinparam_dictionary:param_dictionary[obj]=obj_dict# add the object dictionary to the parameterselse:param_dictionary[obj].update(obj_dict)# update in cases where multiple places provide attributesbreakreturnparam_dictionary