annotate get_unique_srm.py @ 0:a2b06836de90 draft

planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
author proteore
date Fri, 12 Jul 2019 07:49:45 -0400
parents
children b526dba9dc40
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
1 import argparse, csv, re
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
2
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
3 def get_args():
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
4
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
5 parser = argparse.ArgumentParser()
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
6 parser.add_argument("--input_type", help="type of input (list of id or filename)", required=True)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
7 parser.add_argument("-i", "--input", help="list of IDs (text or filename)", required=True)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
8 parser.add_argument("--header", help="true/false if your file contains a header")
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
9 parser.add_argument("-c", "--column_number", help="list of IDs (text or filename)")
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
10 parser.add_argument("-f", "--features", help="Protein features to return from SRM Atlas", required=True)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
11 parser.add_argument("-d", "--ref_file", help="path to reference file", required=True)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
12 parser.add_argument("-o", "--output", help="output filename", required=True)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
13 args = parser.parse_args()
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
14 return args
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
15
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
16 #return the column number in int format
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
17 def nb_col_to_int(nb_col):
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
18 try :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
19 nb_col = int(nb_col.replace("c", "")) - 1
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
20 return nb_col
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
21 except :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
22 sys.exit("Please specify the column where you would like to apply the filter with valid format")
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
23
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
24 #replace all blank cells to NA
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
25 def blank_to_NA(csv_file) :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
26 tmp=[]
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
27 for line in csv_file :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
28 line = ["NA" if cell=="" or cell==" " or cell=="NaN" else cell for cell in line]
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
29 tmp.append(line)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
30
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
31 return tmp
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
32
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
33 #convert string to boolean
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
34 def str2bool(v):
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
35 if v.lower() in ('yes', 'true', 't', 'y', '1'):
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
36 return True
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
37 elif v.lower() in ('no', 'false', 'f', 'n', '0'):
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
38 return False
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
39 else:
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
40 raise argparse.ArgumentTypeError('Boolean value expected.')
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
41
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
42 #return list of (unique) ids from string
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
43 def get_input_ids_from_string(input) :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
44
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
45 ids_list = list(set(re.split(r'\s+',input.replace("_SNP","").replace("d_","").replace("\r","").replace("\n"," ").replace("\t"," "))))
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
46 if "" in ids_list : ids_list.remove("")
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
47
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
48 return ids_list
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
49
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
50 #return input_file and list of unique ids from input file path
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
51 def get_input_ids_from_file(input,nb_col,header) :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
52 with open(input, "r") as csv_file :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
53 input_file= list(csv.reader(csv_file, delimiter='\t'))
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
54
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
55 input_file, ids_list = one_id_one_line(input_file,nb_col,header)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
56 if "" in ids_list : ids_list.remove("")
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
57
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
58 return input_file, ids_list
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
59
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
60 #function to check if an id is an uniprot accession number : return True or False-
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
61 def check_uniprot (id):
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
62 uniprot_pattern = re.compile("[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}")
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
63 if uniprot_pattern.match(id) :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
64 return True
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
65 else :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
66 return False
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
67
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
68 #return input file by adding lines when there are more than one id per line
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
69 def one_id_one_line(input_file,nb_col,header) :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
70
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
71 if header :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
72 new_file = [input_file[0]]
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
73 input_file = input_file[1:]
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
74 else :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
75 new_file=[]
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
76 ids_list=[]
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
77
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
78 for line in input_file :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
79 if line != [] and set(line) != {''}:
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
80 line[nb_col] = re.sub(r"\s+","",line[nb_col])
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
81 if line[nb_col] == "" : line[nb_col]='NA'
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
82 if ";" in line[nb_col] :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
83 ids = line[nb_col].split(";")
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
84 for id in ids :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
85 new_file.append(line[:nb_col]+[id]+line[nb_col+1:])
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
86 ids_list.append(id)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
87 else :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
88 new_file.append(line)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
89 ids_list.append(line[nb_col])
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
90
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
91 ids_list=[e.replace("_SNP","").replace("d_","") for e in ids_list]
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
92 ids_list= list(set(ids_list))
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
93
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
94 return new_file, ids_list
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
95
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
96 def create_srm_atlas_dictionary(features,srm_atlas_csv):
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
97
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
98 srm_atlas={}
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
99 features_index = {"PeptideSeq" : 0, "SSRT" : 1 , "Length" : 2 , "type": 3 , "PA_AccNum" : 4, "MW" : 5 }
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
100 features_to_get = [features_index[feature] for feature in features]
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
101 for line in srm_atlas_csv[1:]:
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
102 id = line[9].replace("_SNP","").replace("d_","")
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
103 if id not in srm_atlas:
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
104 srm_atlas[id]=[[line[i] for i in features_to_get]]
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
105 else:
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
106 srm_atlas[id].append([line[i] for i in features_to_get])
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
107 return srm_atlas
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
108
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
109 def retrieve_srm_features(srm_atlas,ids):
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
110
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
111 result_dict = {}
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
112 for id in ids:
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
113 if id in srm_atlas:
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
114 res = srm_atlas[id]
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
115 else :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
116 res=""
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
117 result_dict[id]=res
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
118 return result_dict
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
119
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
120 def create_header(input_file,ncol,features):
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
121 col_names = list(range(1,len(input_file[0])+1))
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
122 col_names = ["col"+str(e) for e in col_names]
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
123 col_names[ncol]="Uniprot-AC"
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
124 col_names = col_names+features
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
125 return(col_names)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
126
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
127 def main():
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
128
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
129 #Get args from command line
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
130 args = get_args()
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
131 features=args.features.split(",")
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
132 header=False
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
133 if args.input_type=="file" :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
134 column_number = nb_col_to_int(args.column_number)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
135 header = str2bool(args.header)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
136
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
137 #Get reference file (Human SRM Atlas)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
138 with open(args.ref_file, "r") as csv_file :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
139 srm_atlas_csv = csv.reader(csv_file, delimiter='\t')
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
140 srm_atlas_csv = [line for line in srm_atlas_csv]
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
141
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
142 #Create srm Atlas dictionary
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
143 srm_atlas = create_srm_atlas_dictionary(features,srm_atlas_csv)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
144
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
145 #Get file and/or ids from input
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
146 if args.input_type == "list" :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
147 ids = get_input_ids_from_string(args.input)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
148 elif args.input_type == "file" :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
149 input_file, ids = get_input_ids_from_file(args.input,column_number,header)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
150
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
151 #Check Uniprot-AC
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
152 if not any([check_uniprot(id) for id in ids]):
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
153 print ("No Uniprot-AC found, please check your input")
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
154 exit()
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
155
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
156 #retrieve features
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
157 result_dict = retrieve_srm_features(srm_atlas,ids)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
158
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
159 #write output
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
160 with open(args.output,"w") as output :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
161 writer = csv.writer(output,delimiter="\t")
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
162
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
163 #write header
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
164 if header :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
165 writer.writerow(input_file[0]+features)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
166 input_file = input_file[1:]
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
167 elif args.input_type=="file":
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
168 col_names = [create_header(input_file,column_number,features)]
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
169 writer.writerow(col_names)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
170 else :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
171 writer.writerow(["Uniprot-AC"]+features)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
172
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
173 #write lines
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
174 previous_line=""
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
175 if args.input_type=="file" :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
176 for line in input_file :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
177 for res in result_dict[line[column_number]]:
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
178 output_line = ["NA" if cell=="" or cell==" " or cell=="NaN" else cell for cell in line+res]
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
179 if previous_line != output_line :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
180 writer.writerow(output_line)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
181 previous_line=output_line
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
182 elif args.input_type=="list" :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
183 for id in ids :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
184 for res in result_dict[id]:
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
185 line = ["NA" if cell=="" or cell==" " or cell=="NaN" else cell for cell in [id]+res]
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
186 if previous_line != line :
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
187 writer.writerow(line)
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
188 previous_line=line
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
189
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
190
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
191 if __name__ == "__main__":
a2b06836de90 planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff changeset
192 main()