proteore_id_converter: id_converter.py comparison

comparison id_converter.py @ 18:5252bbcfbdd7 draft

planemo upload commit dc6d9d8f1808c4c6bcc42ac6861a8b811e4cee58-dirty

author	proteore
date	Fri, 10 May 2019 10:38:46 -0400
parents	1e45ea50f145
children	9d758344d36e

comparison

equal deleted inserted replaced

-:1e45ea50f145
+:5252bbcfbdd7
-import sys, os, argparse, re, csv
+import sys, os, argparse, re, csv, itertools
 def get_args() :
 parser = argparse.ArgumentParser()
 parser.add_argument("-d", "--ref_file", help="path to reference file: <species>_id_mapping.tsv", required=True)
 parser.add_argument("--input_type", help="type of input (list of id or filename)", required=True)
 ids_list= list(set(ids_list))
 return new_file, ids_list
+def output_one_id_one_line(line,convert_ids,target_ids):
+ids_not_processed = ["GI","PDB","GO","PIR","MIM","UniGene","BioGrid","STRING"]  #ids with multiple ids per line in output file
+ids_not_processed = [id for id in ids_not_processed if id in target_ids]    #ids present in target_ids with multiple ids per line in output file
+for id_not_processed in ids_not_processed :
+index = target_ids.index(id_not_processed)
+convert_ids[index] = [";".join(convert_ids[index])]
+res = itertools.product(*convert_ids)   #getting all possibilities between lists of ids
+res = [list(e) for e in res]            #convert to lists
+res = [line+list(ids) for ids in res]   #adding the rest of the line
+return(res)
 #return the column number in int format
 def nb_col_to_int(nb_col):
 try :
 nb_col = int(nb_col.replace("c", "")) - 1
 return nb_col
 result_dict = {}
 for id in ids :
 for target_id in id_out :
 if id in ids_dictionary :
-res = ";".join(ids_dictionary[id][target_id])
+res = ids_dictionary[id][target_id]
 else :
 res=""
 if id in result_dict :
 result_dict[id].append(res)
 else :
 ids_dictionary[id][ids_dictionary_index[other_id_type]] |= set(line[other_id_type].replace(" ","").split(";"))
 if len(ids_dictionary[id][ids_dictionary_index[other_id_type]]) > 1 and '' in ids_dictionary[id][ids_dictionary_index[other_id_type]] :
 ids_dictionary[id][ids_dictionary_index[other_id_type]].remove('')
+print ("dictionary created")
 #Get file and/or ids from input
 if args.input_type == "list" :
 ids = get_input_ids_from_string(args.input)
 elif args.input_type == "file" :
 input_file, ids = get_input_ids_from_file(args.input,args.column_number,header)
+print ("starting mapping")
 #Mapping ids
 result_dict = map_to_dictionary(ids,ids_dictionary,args.id_type,target_ids)
+print ("mapping done")
+print ("creating output file")
 #creating output file
-if header :
-output_file=[input_file[0]+target_ids]
-input_file = input_file[1:]
-else :
-output_file=[[args.id_type]+target_ids]
-if args.input_type=="file" :
-for line in input_file :
-output_file.append(line+result_dict[line[args.column_number]])
-elif args.input_type=="list" :
-for id in ids :
-output_file.append([id]+result_dict[id])
-#convert blank to NA
-output_file = blank_to_NA(output_file)
-#write output file
 with open(args.output,"w") as output :
 writer = csv.writer(output,delimiter="\t")
-writer.writerows(output_file)
+#writer.writerows(output_file)
+#write header
+if header :
+writer.writerow(input_file[0]+target_ids)
+input_file = input_file[1:]
+else :
+writer.writerow([args.id_type]+target_ids)
+#write lines
+if args.input_type=="file" :
+for line in input_file :
+tmp = output_one_id_one_line(line,result_dict[line[args.column_number]],target_ids)
+tmp = blank_to_NA(tmp)
+for row in tmp :
+writer.writerow(row)
+elif args.input_type=="list" :
+for id in ids :
+tmp = output_one_id_one_line([id],result_dict[id],target_ids)
+tmp = blank_to_NA(tmp)
+for row in tmp :
+writer.writerow(row)
+print ("output file created")
 if __name__ == "__main__":
 main()

Mercurial > repos > proteore > proteore_id_converter

comparison id_converter.py @ 18:5252bbcfbdd7 draft