Mercurial > repos > proteore > proteore_get_unique_peptide_srm_method
annotate get_unique_srm.py @ 0:a2b06836de90 draft
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
author | proteore |
---|---|
date | Fri, 12 Jul 2019 07:49:45 -0400 |
parents | |
children | b526dba9dc40 |
rev | line source |
---|---|
0
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
1 import argparse, csv, re |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
2 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
3 def get_args(): |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
4 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
5 parser = argparse.ArgumentParser() |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
6 parser.add_argument("--input_type", help="type of input (list of id or filename)", required=True) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
7 parser.add_argument("-i", "--input", help="list of IDs (text or filename)", required=True) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
8 parser.add_argument("--header", help="true/false if your file contains a header") |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
9 parser.add_argument("-c", "--column_number", help="list of IDs (text or filename)") |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
10 parser.add_argument("-f", "--features", help="Protein features to return from SRM Atlas", required=True) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
11 parser.add_argument("-d", "--ref_file", help="path to reference file", required=True) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
12 parser.add_argument("-o", "--output", help="output filename", required=True) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
13 args = parser.parse_args() |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
14 return args |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
15 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
16 #return the column number in int format |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
17 def nb_col_to_int(nb_col): |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
18 try : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
19 nb_col = int(nb_col.replace("c", "")) - 1 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
20 return nb_col |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
21 except : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
22 sys.exit("Please specify the column where you would like to apply the filter with valid format") |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
23 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
24 #replace all blank cells to NA |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
25 def blank_to_NA(csv_file) : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
26 tmp=[] |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
27 for line in csv_file : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
28 line = ["NA" if cell=="" or cell==" " or cell=="NaN" else cell for cell in line] |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
29 tmp.append(line) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
30 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
31 return tmp |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
32 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
33 #convert string to boolean |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
34 def str2bool(v): |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
35 if v.lower() in ('yes', 'true', 't', 'y', '1'): |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
36 return True |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
37 elif v.lower() in ('no', 'false', 'f', 'n', '0'): |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
38 return False |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
39 else: |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
40 raise argparse.ArgumentTypeError('Boolean value expected.') |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
41 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
42 #return list of (unique) ids from string |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
43 def get_input_ids_from_string(input) : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
44 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
45 ids_list = list(set(re.split(r'\s+',input.replace("_SNP","").replace("d_","").replace("\r","").replace("\n"," ").replace("\t"," ")))) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
46 if "" in ids_list : ids_list.remove("") |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
47 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
48 return ids_list |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
49 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
50 #return input_file and list of unique ids from input file path |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
51 def get_input_ids_from_file(input,nb_col,header) : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
52 with open(input, "r") as csv_file : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
53 input_file= list(csv.reader(csv_file, delimiter='\t')) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
54 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
55 input_file, ids_list = one_id_one_line(input_file,nb_col,header) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
56 if "" in ids_list : ids_list.remove("") |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
57 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
58 return input_file, ids_list |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
59 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
60 #function to check if an id is an uniprot accession number : return True or False- |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
61 def check_uniprot (id): |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
62 uniprot_pattern = re.compile("[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
63 if uniprot_pattern.match(id) : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
64 return True |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
65 else : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
66 return False |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
67 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
68 #return input file by adding lines when there are more than one id per line |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
69 def one_id_one_line(input_file,nb_col,header) : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
70 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
71 if header : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
72 new_file = [input_file[0]] |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
73 input_file = input_file[1:] |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
74 else : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
75 new_file=[] |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
76 ids_list=[] |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
77 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
78 for line in input_file : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
79 if line != [] and set(line) != {''}: |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
80 line[nb_col] = re.sub(r"\s+","",line[nb_col]) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
81 if line[nb_col] == "" : line[nb_col]='NA' |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
82 if ";" in line[nb_col] : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
83 ids = line[nb_col].split(";") |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
84 for id in ids : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
85 new_file.append(line[:nb_col]+[id]+line[nb_col+1:]) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
86 ids_list.append(id) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
87 else : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
88 new_file.append(line) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
89 ids_list.append(line[nb_col]) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
90 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
91 ids_list=[e.replace("_SNP","").replace("d_","") for e in ids_list] |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
92 ids_list= list(set(ids_list)) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
93 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
94 return new_file, ids_list |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
95 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
96 def create_srm_atlas_dictionary(features,srm_atlas_csv): |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
97 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
98 srm_atlas={} |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
99 features_index = {"PeptideSeq" : 0, "SSRT" : 1 , "Length" : 2 , "type": 3 , "PA_AccNum" : 4, "MW" : 5 } |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
100 features_to_get = [features_index[feature] for feature in features] |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
101 for line in srm_atlas_csv[1:]: |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
102 id = line[9].replace("_SNP","").replace("d_","") |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
103 if id not in srm_atlas: |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
104 srm_atlas[id]=[[line[i] for i in features_to_get]] |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
105 else: |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
106 srm_atlas[id].append([line[i] for i in features_to_get]) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
107 return srm_atlas |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
108 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
109 def retrieve_srm_features(srm_atlas,ids): |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
110 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
111 result_dict = {} |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
112 for id in ids: |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
113 if id in srm_atlas: |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
114 res = srm_atlas[id] |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
115 else : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
116 res="" |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
117 result_dict[id]=res |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
118 return result_dict |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
119 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
120 def create_header(input_file,ncol,features): |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
121 col_names = list(range(1,len(input_file[0])+1)) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
122 col_names = ["col"+str(e) for e in col_names] |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
123 col_names[ncol]="Uniprot-AC" |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
124 col_names = col_names+features |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
125 return(col_names) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
126 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
127 def main(): |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
128 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
129 #Get args from command line |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
130 args = get_args() |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
131 features=args.features.split(",") |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
132 header=False |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
133 if args.input_type=="file" : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
134 column_number = nb_col_to_int(args.column_number) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
135 header = str2bool(args.header) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
136 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
137 #Get reference file (Human SRM Atlas) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
138 with open(args.ref_file, "r") as csv_file : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
139 srm_atlas_csv = csv.reader(csv_file, delimiter='\t') |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
140 srm_atlas_csv = [line for line in srm_atlas_csv] |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
141 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
142 #Create srm Atlas dictionary |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
143 srm_atlas = create_srm_atlas_dictionary(features,srm_atlas_csv) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
144 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
145 #Get file and/or ids from input |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
146 if args.input_type == "list" : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
147 ids = get_input_ids_from_string(args.input) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
148 elif args.input_type == "file" : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
149 input_file, ids = get_input_ids_from_file(args.input,column_number,header) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
150 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
151 #Check Uniprot-AC |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
152 if not any([check_uniprot(id) for id in ids]): |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
153 print ("No Uniprot-AC found, please check your input") |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
154 exit() |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
155 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
156 #retrieve features |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
157 result_dict = retrieve_srm_features(srm_atlas,ids) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
158 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
159 #write output |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
160 with open(args.output,"w") as output : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
161 writer = csv.writer(output,delimiter="\t") |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
162 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
163 #write header |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
164 if header : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
165 writer.writerow(input_file[0]+features) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
166 input_file = input_file[1:] |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
167 elif args.input_type=="file": |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
168 col_names = [create_header(input_file,column_number,features)] |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
169 writer.writerow(col_names) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
170 else : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
171 writer.writerow(["Uniprot-AC"]+features) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
172 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
173 #write lines |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
174 previous_line="" |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
175 if args.input_type=="file" : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
176 for line in input_file : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
177 for res in result_dict[line[column_number]]: |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
178 output_line = ["NA" if cell=="" or cell==" " or cell=="NaN" else cell for cell in line+res] |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
179 if previous_line != output_line : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
180 writer.writerow(output_line) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
181 previous_line=output_line |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
182 elif args.input_type=="list" : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
183 for id in ids : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
184 for res in result_dict[id]: |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
185 line = ["NA" if cell=="" or cell==" " or cell=="NaN" else cell for cell in [id]+res] |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
186 if previous_line != line : |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
187 writer.writerow(line) |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
188 previous_line=line |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
189 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
190 |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
191 if __name__ == "__main__": |
a2b06836de90
planemo upload commit f9de6f4e3302c41e64c39d639bee780e5eafd84d-dirty
proteore
parents:
diff
changeset
|
192 main() |