Mercurial > repos > george-weingart > lefse
comparison format_input.py @ 2:a31c10fe09c8 draft default tip
Fixed bug due to numerical approximation after normalization affecting root-level clades (e.g. "Bacteria" or "Archaea")
| author | george-weingart |
|---|---|
| date | Tue, 07 Jul 2015 13:52:29 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:db64b6287cd6 | 2:a31c10fe09c8 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import sys,os,argparse,pickle,re,numpy | |
| 4 | |
| 5 | |
| 6 | |
| 7 | |
| 8 #*************************************************************************************************************** | |
| 9 #* Log of change * | |
| 10 #* January 16, 2014 - George Weingart - george.weingart@gmail.com * | |
| 11 #* * | |
| 12 #* biom Support * | |
| 13 #* Modified the program to enable it to accept biom files as input * | |
| 14 #* * | |
| 15 #* Added two optional input parameters: * | |
| 16 #* 1. biom_c is the name of the biom metadata to be used as class * | |
| 17 #* 2. biom_s is the name of the biom metadata to be used as subclass * | |
| 18 #* class and subclass are used in the same context as the original * | |
| 19 #* parameters class and subclass * | |
| 20 #* These parameters are totally optional, the default is the program * | |
| 21 #* chooses as class the first metadata received from the conversion * | |
| 22 #* of the biom file into a sequential (pcl) file as generated by * | |
| 23 #* breadcrumbs, and similarly, the second metadata is selected as * | |
| 24 #* subclass. * | |
| 25 #* The syntax or logic for the original non-biom case was NOT changed. * | |
| 26 #* * | |
| 27 #* <******************* IMPORTANT NOTE *************************> * | |
| 28 #* The biom case requires breadcrumbs and therefore there is a * | |
| 29 #* a conditional import of the breadcrumbs modules * | |
| 30 #* If the User uses a biom input and breadcrumbs is not detected, * | |
| 31 #* the run is abnormally ended * | |
| 32 #* breadcrumbs itself needs a biom environment, so if the immport * | |
| 33 #* of biom in breadcrumbs fails, the run is also abnormally | |
| 34 #* ended (Only if the input file was biom) * | |
| 35 #* * | |
| 36 #* USAGE EXAMPLES * | |
| 37 #* -------------- * | |
| 38 #* Case #1: Using a sequential file as input (Old version - did not change * | |
| 39 #* ./format_input.py hmp_aerobiosis_small.txt hmp_aerobiosis_small.in -c 1 -s 2 -u 3 -o 1000000 * | |
| 40 #* Case #2: Using a biom file as input * | |
| 41 #* ./format_input.py hmp_aerobiosis_small.biom hmp_aerobiosis_small.in -o 1000000 * | |
| 42 #* Case #3: Using a biom file as input and override the class and subclass * | |
| 43 #* ./format_input.py lefse.biom hmp_aerobiosis_small.in -biom_c oxygen_availability -biom_s body_site -o 1000000 | |
| 44 #* * | |
| 45 #*************************************************************************************************************** | |
| 46 | |
| 47 def read_input_file(inp_file, CommonArea): | |
| 48 | |
| 49 if inp_file.endswith('.biom'): #* If the file format is biom: | |
| 50 CommonArea = biom_processing(inp_file) #* Process in biom format | |
| 51 return CommonArea #* And return the CommonArea | |
| 52 | |
| 53 with open(inp_file) as inp: | |
| 54 CommonArea['ReturnedData'] = [[v.strip() for v in line.strip().split("\t")] for line in inp.readlines()] | |
| 55 return CommonArea | |
| 56 | |
| 57 def transpose(data): | |
| 58 return zip(*data) | |
| 59 | |
| 60 def read_params(args): | |
| 61 parser = argparse.ArgumentParser(description='LEfSe formatting modules') | |
| 62 parser.add_argument('input_file', metavar='INPUT_FILE', type=str, help="the input file, feature hierarchical level can be specified with | or . and those symbols must not be present for other reasons in the input file.") | |
| 63 parser.add_argument('output_file', metavar='OUTPUT_FILE', type=str, | |
| 64 help="the output file containing the data for LEfSe") | |
| 65 parser.add_argument('--output_table', type=str, required=False, default="", | |
| 66 help="the formatted table in txt format") | |
| 67 parser.add_argument('-f',dest="feats_dir", choices=["c","r"], type=str, default="r", | |
| 68 help="set whether the features are on rows (default) or on columns") | |
| 69 parser.add_argument('-c',dest="class", metavar="[1..n_feats]", type=int, default=1, | |
| 70 help="set which feature use as class (default 1)") | |
| 71 parser.add_argument('-s',dest="subclass", metavar="[1..n_feats]", type=int, default=None, | |
| 72 help="set which feature use as subclass (default -1 meaning no subclass)") | |
| 73 parser.add_argument('-o',dest="norm_v", metavar="float", type=float, default=-1.0, | |
| 74 help="set the normalization value (default -1.0 meaning no normalization)") | |
| 75 parser.add_argument('-u',dest="subject", metavar="[1..n_feats]", type=int, default=None, | |
| 76 help="set which feature use as subject (default -1 meaning no subject)") | |
| 77 parser.add_argument('-m',dest="missing_p", choices=["f","s"], type=str, default="d", | |
| 78 help="set the policy to adopt with missin values: f removes the features with missing values, s removes samples with missing values (default f)") | |
| 79 parser.add_argument('-n',dest="subcl_min_card", metavar="int", type=int, default=10, | |
| 80 help="set the minimum cardinality of each subclass (subclasses with low cardinalities will be grouped together, if the cardinality is still low, no pairwise comparison will be performed with them)") | |
| 81 | |
| 82 parser.add_argument('-biom_c',dest="biom_class", type=str, | |
| 83 help="For biom input files: Set which feature use as class ") | |
| 84 parser.add_argument('-biom_s',dest="biom_subclass", type=str, | |
| 85 help="For biom input files: set which feature use as subclass ") | |
| 86 | |
| 87 args = parser.parse_args() | |
| 88 | |
| 89 return vars(args) | |
| 90 | |
| 91 def remove_missing(data,roc): | |
| 92 if roc == "c": data = transpose(data) | |
| 93 max_len = max([len(r) for r in data]) | |
| 94 to_rem = [] | |
| 95 for i,r in enumerate(data): | |
| 96 if len([v for v in r if not( v == "" or v.isspace())]) < max_len: to_rem.append(i) | |
| 97 if len(to_rem): | |
| 98 for i in to_rem.reverse(): | |
| 99 data.pop(i) | |
| 100 if roc == "c": return transpose(data) | |
| 101 return data | |
| 102 | |
| 103 | |
| 104 def sort_by_cl(data,n,c,s,u): | |
| 105 def sort_lines1(a,b): | |
| 106 return int(a[c] > b[c])*2-1 | |
| 107 def sort_lines2u(a,b): | |
| 108 if a[c] != b[c]: return int(a[c] > b[c])*2-1 | |
| 109 return int(a[u] > b[u])*2-1 | |
| 110 def sort_lines2s(a,b): | |
| 111 if a[c] != b[c]: return int(a[c] > b[c])*2-1 | |
| 112 return int(a[s] > b[s])*2-1 | |
| 113 def sort_lines3(a,b): | |
| 114 if a[c] != b[c]: return int(a[c] > b[c])*2-1 | |
| 115 if a[s] != b[s]: return int(a[s] > b[s])*2-1 | |
| 116 return int(a[u] > b[u])*2-1 | |
| 117 if n == 3: data.sort(sort_lines3) | |
| 118 if n == 2: | |
| 119 if s is None: | |
| 120 data.sort(sort_lines2u) | |
| 121 else: | |
| 122 data.sort(sort_lines2s) | |
| 123 if n == 1: data.sort(sort_lines1) | |
| 124 return data | |
| 125 | |
| 126 def group_small_subclasses(cls,min_subcl): | |
| 127 last = "" | |
| 128 n = 0 | |
| 129 repl = [] | |
| 130 dd = [list(cls['class']),list(cls['subclass'])] | |
| 131 for d in dd: | |
| 132 if d[1] != last: | |
| 133 if n < min_subcl and last != "": | |
| 134 repl.append(d[1]) | |
| 135 last = d[1] | |
| 136 n = 1 | |
| 137 for i,d in enumerate(dd): | |
| 138 if d[1] in repl: dd[i][1] = "other" | |
| 139 dd[i][1] = str(dd[i][0])+"_"+str(dd[i][1]) | |
| 140 cls['class'] = dd[0] | |
| 141 cls['subclass'] = dd[1] | |
| 142 return cls | |
| 143 | |
| 144 def get_class_slices(data): | |
| 145 previous_class = data[0][0] | |
| 146 previous_subclass = data[0][1] | |
| 147 subclass_slices = [] | |
| 148 class_slices = [] | |
| 149 last_cl = 0 | |
| 150 last_subcl = 0 | |
| 151 class_hierarchy = [] | |
| 152 subcls = [] | |
| 153 for i,d in enumerate(data): | |
| 154 if d[1] != previous_subclass: | |
| 155 subclass_slices.append((previous_subclass,(last_subcl,i))) | |
| 156 last_subcl = i | |
| 157 subcls.append(previous_subclass) | |
| 158 if d[0] != previous_class: | |
| 159 class_slices.append((previous_class,(last_cl,i))) | |
| 160 class_hierarchy.append((previous_class,subcls)) | |
| 161 subcls = [] | |
| 162 last_cl = i | |
| 163 previous_subclass = d[1] | |
| 164 previous_class = d[0] | |
| 165 subclass_slices.append((previous_subclass,(last_subcl,i+1))) | |
| 166 subcls.append(previous_subclass) | |
| 167 class_slices.append((previous_class,(last_cl,i+1))) | |
| 168 class_hierarchy.append((previous_class,subcls)) | |
| 169 return dict(class_slices), dict(subclass_slices), dict(class_hierarchy) | |
| 170 | |
| 171 def numerical_values(feats,norm): | |
| 172 mm = [] | |
| 173 for k,v in feats.items(): | |
| 174 feats[k] = [float(val) for val in v] | |
| 175 if norm < 0.0: return feats | |
| 176 tr = zip(*(feats.values())) | |
| 177 mul = [] | |
| 178 fk = feats.keys() | |
| 179 hie = True if sum([k.count(".") for k in fk]) > len(fk) else False | |
| 180 for i in range(len(feats.values()[0])): | |
| 181 if hie: mul.append(sum([t for j,t in enumerate(tr[i]) if fk[j].count(".") < 1 ])) | |
| 182 else: mul.append(sum(tr[i])) | |
| 183 if hie and sum(mul) == 0: | |
| 184 mul = [] | |
| 185 for i in range(len(feats.values()[0])): | |
| 186 mul.append(sum(tr[i])) | |
| 187 for i,m in enumerate(mul): | |
| 188 if m == 0: mul[i] = 0.0 | |
| 189 else: mul[i] = float(norm) / m | |
| 190 for k,v in feats.items(): | |
| 191 feats[k] = [val*mul[i] for i,val in enumerate(v)] | |
| 192 if numpy.mean(feats[k]) and (numpy.std(feats[k])/numpy.mean(feats[k])) < 1e-10: | |
| 193 feats[k] = [ float(round(kv*1e6)/1e6) for kv in feats[k]] | |
| 194 return feats | |
| 195 | |
| 196 def add_missing_levels2(ff): | |
| 197 | |
| 198 if sum( [f.count(".") for f in ff] ) < 1: return ff | |
| 199 | |
| 200 dn = {} | |
| 201 | |
| 202 added = True | |
| 203 while added: | |
| 204 added = False | |
| 205 for f in ff: | |
| 206 lev = f.count(".") | |
| 207 if lev == 0: continue | |
| 208 if lev not in dn: dn[lev] = [f] | |
| 209 else: dn[lev].append(f) | |
| 210 for fn in sorted(dn,reverse=True): | |
| 211 for f in dn[fn]: | |
| 212 fc = ".".join(f.split('.')[:-1]) | |
| 213 if fc not in ff: | |
| 214 ab_all = [ff[fg] for fg in ff if (fg.count(".") == 0 and fg == fc) or (fg.count(".") > 0 and fc == ".".join(fg.split('.')[:-1]))] | |
| 215 ab =[] | |
| 216 for l in [f for f in zip(*ab_all)]: | |
| 217 ab.append(sum([float(ll) for ll in l])) | |
| 218 ff[fc] = ab | |
| 219 added = True | |
| 220 if added: | |
| 221 break | |
| 222 | |
| 223 return ff | |
| 224 | |
| 225 | |
| 226 def add_missing_levels(ff): | |
| 227 if sum( [f.count(".") for f in ff] ) < 1: return ff | |
| 228 | |
| 229 clades2leaves = {} | |
| 230 for f in ff: | |
| 231 fs = f.split(".") | |
| 232 if len(fs) < 2: | |
| 233 continue | |
| 234 for l in range(len(fs)): | |
| 235 n = ".".join( fs[:l] ) | |
| 236 if n in clades2leaves: | |
| 237 clades2leaves[n].append( f ) | |
| 238 else: | |
| 239 clades2leaves[n] = [f] | |
| 240 for k,v in clades2leaves.items(): | |
| 241 if k and k not in ff: | |
| 242 ff[k] = [sum(a) for a in zip(*[[float(fn) for fn in ff[vv]] for vv in v])] | |
| 243 return ff | |
| 244 | |
| 245 | |
| 246 | |
| 247 def modify_feature_names(fn): | |
| 248 ret = fn | |
| 249 | |
| 250 for v in [' ',r'\$',r'\@',r'#',r'%',r'\^',r'\&',r'\*',r'\"',r'\'']: | |
| 251 ret = [re.sub(v,"",f) for f in ret] | |
| 252 for v in ["/",r'\(',r'\)',r'-',r'\+',r'=',r'{',r'}',r'\[',r'\]', | |
| 253 r',',r'\.',r';',r':',r'\?',r'\<',r'\>',r'\.',r'\,']: | |
| 254 ret = [re.sub(v,"_",f) for f in ret] | |
| 255 for v in ["\|"]: | |
| 256 ret = [re.sub(v,".",f) for f in ret] | |
| 257 | |
| 258 ret2 = [] | |
| 259 for r in ret: | |
| 260 if r[0] in ['0','1','2','3','4','5','6','7','8','9']: | |
| 261 ret2.append("f_"+r) | |
| 262 else: ret2.append(r) | |
| 263 | |
| 264 return ret2 | |
| 265 | |
| 266 | |
| 267 def rename_same_subcl(cl,subcl): | |
| 268 toc = [] | |
| 269 for sc in set(subcl): | |
| 270 if len(set([cl[i] for i in range(len(subcl)) if sc == subcl[i]])) > 1: | |
| 271 toc.append(sc) | |
| 272 new_subcl = [] | |
| 273 for i,sc in enumerate(subcl): | |
| 274 if sc in toc: new_subcl.append(cl[i]+"_"+sc) | |
| 275 else: new_subcl.append(sc) | |
| 276 return new_subcl | |
| 277 | |
| 278 | |
| 279 #************************************************************************************* | |
| 280 #* Modifications by George Weingart, Jan 15, 2014 * | |
| 281 #* If the input file is biom: * | |
| 282 #* a. Load an AbundanceTable (Using breadcrumbs) * | |
| 283 #* b. Create a sequential file from the AbundanceTable (de-facto - pcl) * | |
| 284 #* c. Use that file as input to the rest of the program * | |
| 285 #* d. Calculate the c,s,and u parameters, either from the values the User entered * | |
| 286 #* from the meta data values in the biom file or set up defaults * | |
| 287 #* <<<------------- I M P O R T A N T N O T E ------------------->> * | |
| 288 #* breadcrumbs src directory must be included in the PYTHONPATH * | |
| 289 #* <<<------------- I M P O R T A N T N O T E ------------------->> * | |
| 290 #************************************************************************************* | |
| 291 def biom_processing(inp_file): | |
| 292 CommonArea = dict() #* Set up a dictionary to return | |
| 293 CommonArea['abndData'] = AbundanceTable.funcMakeFromFile(inp_file, #* Create AbundanceTable from input biom file | |
| 294 cDelimiter = None, | |
| 295 sMetadataID = None, | |
| 296 sLastMetadataRow = None, | |
| 297 sLastMetadata = None, | |
| 298 strFormat = None) | |
| 299 | |
| 300 #**************************************************************** | |
| 301 #* Building the data element here * | |
| 302 #**************************************************************** | |
| 303 ResolvedData = list() #This is the Resolved data that will be returned | |
| 304 IDMetadataName = CommonArea['abndData'].funcGetIDMetadataName() #* ID Metadataname | |
| 305 IDMetadata = [CommonArea['abndData'].funcGetIDMetadataName()] #* The first Row | |
| 306 for IDMetadataEntry in CommonArea['abndData'].funcGetMetadataCopy()[IDMetadataName]: #* Loop on all the metadata values | |
| 307 IDMetadata.append(IDMetadataEntry) | |
| 308 ResolvedData.append(IDMetadata) #Add the IDMetadata with all its values to the resolved area | |
| 309 for key, value in CommonArea['abndData'].funcGetMetadataCopy().iteritems(): | |
| 310 if key != IDMetadataName: | |
| 311 MetadataEntry = list() #* Set it up | |
| 312 MetadataEntry.append(key) #* And post it to the area | |
| 313 for x in value: | |
| 314 MetadataEntry.append(x) #* Append the metadata value name | |
| 315 ResolvedData.append(MetadataEntry) | |
| 316 for AbundanceDataEntry in CommonArea['abndData'].funcGetAbundanceCopy(): #* The Abundance Data | |
| 317 lstAbundanceDataEntry = list(AbundanceDataEntry) #Convert tuple to list | |
| 318 ResolvedData.append(lstAbundanceDataEntry) #Append the list to the metadata list | |
| 319 CommonArea['ReturnedData'] = ResolvedData #Post the results | |
| 320 return CommonArea | |
| 321 | |
| 322 | |
| 323 #******************************************************************************* | |
| 324 #* Check the params and override in the case of biom * | |
| 325 #******************************************************************************* | |
| 326 def check_params_for_biom_case(params, CommonArea): | |
| 327 CommonArea['MetadataNames'] = list() #Metadata names | |
| 328 params['original_class'] = params['class'] #Save the original class | |
| 329 params['original_subclass'] = params['subclass'] #Save the original subclass | |
| 330 params['original_subject'] = params['subject'] #Save the original subclass | |
| 331 | |
| 332 | |
| 333 TotalMetadataEntriesAndIDInBiomFile = len(CommonArea['abndData'].funcGetMetadataCopy()) # The number of metadata entries | |
| 334 for i in range(0,TotalMetadataEntriesAndIDInBiomFile): #* Populate the meta data names table | |
| 335 CommonArea['MetadataNames'].append(CommonArea['ReturnedData'][i][0]) #Add the metadata name | |
| 336 | |
| 337 | |
| 338 #**************************************************** | |
| 339 #* Setting the params here * | |
| 340 #**************************************************** | |
| 341 | |
| 342 if TotalMetadataEntriesAndIDInBiomFile > 0: #If there is at least one entry - has to be the subject | |
| 343 params['subject'] = 1 | |
| 344 if TotalMetadataEntriesAndIDInBiomFile == 2: #If there are 2 - The first is the subject and the second has to be the metadata, and that is the class | |
| 345 params['class'] = 2 | |
| 346 if TotalMetadataEntriesAndIDInBiomFile == 3: #If there are 3: Set up default that the second entry is the class and the third is the subclass | |
| 347 params['class'] = 2 | |
| 348 params['subclass'] = 3 | |
| 349 FlagError = False #Set up error flag | |
| 350 | |
| 351 if not params['biom_class'] is None and not params['biom_subclass'] is None: #Check if the User passed a valid class and subclass | |
| 352 if params['biom_class'] in CommonArea['MetadataNames']: | |
| 353 params['class'] = CommonArea['MetadataNames'].index(params['biom_class']) +1 #* Set up the index for that metadata | |
| 354 else: | |
| 355 FlagError = True | |
| 356 if params['biom_subclass'] in CommonArea['MetadataNames']: | |
| 357 params['subclass'] = CommonArea['MetadataNames'].index(params['biom_subclass']) +1 #* Set up the index for that metadata | |
| 358 else: | |
| 359 FlagError = True | |
| 360 if FlagError == True: #* If the User passed an invalid class | |
| 361 print "**Invalid biom class or subclass passed - Using defaults: First metadata=class, Second Metadata=subclass\n" | |
| 362 params['class'] = 2 | |
| 363 params['subclass'] = 3 | |
| 364 return params | |
| 365 | |
| 366 | |
| 367 | |
| 368 if __name__ == '__main__': | |
| 369 CommonArea = dict() #Build a Common Area to pass variables in the biom case | |
| 370 params = read_params(sys.argv) | |
| 371 | |
| 372 #************************************************************* | |
| 373 #* Conditionally import breadcrumbs if file is a biom file * | |
| 374 #* If it is and no breadcrumbs found - abnormally exit * | |
| 375 #************************************************************* | |
| 376 if params['input_file'].endswith('.biom'): | |
| 377 try: | |
| 378 from lefsebiom.ConstantsBreadCrumbs import * | |
| 379 from lefsebiom.AbundanceTable import * | |
| 380 except ImportError: | |
| 381 sys.stderr.write("************************************************************************************************************ \n") | |
| 382 sys.stderr.write("* Error: Breadcrumbs libraries not detected - required to process biom files - run abnormally terminated * \n") | |
| 383 sys.stderr.write("************************************************************************************************************ \n") | |
| 384 exit(1) | |
| 385 | |
| 386 | |
| 387 if type(params['subclass']) is int and int(params['subclass']) < 1: | |
| 388 params['subclass'] = None | |
| 389 if type(params['subject']) is int and int(params['subject']) < 1: | |
| 390 params['subject'] = None | |
| 391 | |
| 392 | |
| 393 CommonArea = read_input_file(sys.argv[1], CommonArea) #Pass The CommonArea to the Read | |
| 394 data = CommonArea['ReturnedData'] #Select the data | |
| 395 | |
| 396 if sys.argv[1].endswith('biom'): #* Check if biom: | |
| 397 params = check_params_for_biom_case(params, CommonArea) #Check the params for the biom case | |
| 398 | |
| 399 if params['feats_dir'] == "c": | |
| 400 data = transpose(data) | |
| 401 | |
| 402 ncl = 1 | |
| 403 if not params['subclass'] is None: ncl += 1 | |
| 404 if not params['subject'] is None: ncl += 1 | |
| 405 | |
| 406 first_line = zip(*data)[0] | |
| 407 | |
| 408 first_line = modify_feature_names(list(first_line)) | |
| 409 | |
| 410 data = zip( first_line, | |
| 411 *sort_by_cl(zip(*data)[1:], | |
| 412 ncl, | |
| 413 params['class']-1, | |
| 414 params['subclass']-1 if not params['subclass'] is None else None, | |
| 415 params['subject']-1 if not params['subject'] is None else None)) | |
| 416 # data.insert(0,first_line) | |
| 417 # data = remove_missing(data,params['missing_p']) | |
| 418 cls = {} | |
| 419 | |
| 420 cls_i = [('class',params['class']-1)] | |
| 421 if params['subclass'] > 0: cls_i.append(('subclass',params['subclass']-1)) | |
| 422 if params['subject'] > 0: cls_i.append(('subject',params['subject']-1)) | |
| 423 cls_i.sort(lambda x, y: -cmp(x[1],y[1])) | |
| 424 for v in cls_i: cls[v[0]] = data.pop(v[1])[1:] | |
| 425 if not params['subclass'] > 0: cls['subclass'] = [str(cl)+"_subcl" for cl in cls['class']] | |
| 426 | |
| 427 cls['subclass'] = rename_same_subcl(cls['class'],cls['subclass']) | |
| 428 # if 'subclass' in cls.keys(): cls = group_small_subclasses(cls,params['subcl_min_card']) | |
| 429 class_sl,subclass_sl,class_hierarchy = get_class_slices(zip(*cls.values())) | |
| 430 | |
| 431 feats = dict([(d[0],d[1:]) for d in data]) | |
| 432 | |
| 433 feats = add_missing_levels(feats) | |
| 434 | |
| 435 feats = numerical_values(feats,params['norm_v']) | |
| 436 out = {} | |
| 437 out['feats'] = feats | |
| 438 out['norm'] = params['norm_v'] | |
| 439 out['cls'] = cls | |
| 440 out['class_sl'] = class_sl | |
| 441 out['subclass_sl'] = subclass_sl | |
| 442 out['class_hierarchy'] = class_hierarchy | |
| 443 | |
| 444 if params['output_table']: | |
| 445 with open( params['output_table'], "w") as outf: | |
| 446 if 'class' in cls: outf.write( "\t".join(list(["class"])+list(cls['class'])) + "\n" ) | |
| 447 if 'subclass' in cls: outf.write( "\t".join(list(["subclass"])+list(cls['subclass'])) + "\n" ) | |
| 448 if 'subject' in cls: outf.write( "\t".join(list(["subject"])+list(cls['subject'])) + "\n" ) | |
| 449 for k,v in out['feats'].items(): outf.write( "\t".join([k]+[str(vv) for vv in v]) + "\n" ) | |
| 450 | |
| 451 with open(params['output_file'], 'wb') as back_file: | |
| 452 pickle.dump(out,back_file) | |
| 453 |
