0
|
1 import requests
|
|
2 import json
|
|
3 import time
|
|
4 import urllib
|
|
5 import sys
|
|
6 import csv
|
|
7 import re
|
|
8
|
|
9 try:
|
|
10 input_filename = sys.argv[1]
|
|
11 input_select_bar = sys.argv[2]
|
|
12 GRCh_build = sys.argv[3]
|
|
13 psm_filename = sys.argv[4]
|
|
14 output_filename = sys.argv[5]
|
|
15 file_3 = sys.argv[6]
|
|
16 file_4 = sys.argv[7]
|
|
17 file_5 = sys.argv[8]
|
|
18 except:
|
|
19 input_filename = '1.) Galaxy2-[Human_Vcf_MCF7]-minimum.vcf'
|
|
20 input_filename = 'input/[tgriffin_cguerrer_20160726_MCF7_RNAseq_01_S13_R1_001.vcf].vcf'
|
|
21 input_select_bar = 'VEST'
|
|
22 GRCh_build = 'GRCh38'
|
|
23 output_filename = 'combined_variants.tsv'
|
|
24 psm_filename = 'input/[ERLIC_MCF7_110kb_R123-CustomProDB_RNA-Seq_cRAP_DB.psm-report].tabular'
|
|
25 file_3 = 'output/' + time.strftime("%H:%M") + '_Z_Gene_Level_Analysis.tsv'
|
|
26 file_4 = 'output/' + time.strftime("%H:%M") + '_Z_Variant_Non-coding.Result.tsv'
|
|
27 file_5 = 'output/' + time.strftime("%H:%M") + '_Z_Input_Errors.Result.tsv'
|
|
28
|
|
29
|
|
30 #in_file = open('input_call.txt', "r")
|
|
31 #out_file = open('output_call.txt', "w")
|
|
32
|
|
33 write_header = True
|
|
34
|
|
35 GRCh37hg19 = 'off'
|
|
36 if GRCh_build == 'GRCh37':
|
|
37 GRCh37hg19 = 'on'
|
|
38
|
|
39 # http://staging.cravat.us/CRAVAT/rest/service/submit:
|
|
40
|
|
41 #plugs in params to given URL
|
|
42 submit = requests.post('http://staging.cravat.us/CRAVAT/rest/service/submit', files={'inputfile':open(input_filename)}, data={'email':'znylund@insilico.us.com', 'analyses': input_select_bar, 'hg19': GRCh37hg19})
|
|
43
|
|
44 #Makes the data a json dictionary, takes out only the job ID
|
|
45 jobid = json.loads(submit.text)['jobid']
|
|
46 #out_file.write(jobid)
|
|
47 submitted = json.loads(submit.text)['status']
|
|
48 #out_file.write('\t' + submitted)
|
|
49
|
|
50 input_file = open(input_filename)
|
|
51 is_comment_line = re.compile(".*#+.*")
|
|
52
|
|
53
|
|
54 #loops until we find a status equal to Success, then breaks
|
|
55 while True:
|
|
56 check = requests.get('http://staging.cravat.us/CRAVAT/rest/service/status', params={'jobid': jobid})
|
|
57 status = json.loads(check.text)['status']
|
|
58 resultfileurl = json.loads(check.text)['resultfileurl']
|
|
59 #out_file.write(str(status) + ', ')
|
|
60 if status == 'Success':
|
|
61 #out_file.write('\t' + resultfileurl)
|
|
62 break
|
|
63 else:
|
|
64 time.sleep(2)
|
|
65
|
|
66 #out_file.write('\n')
|
|
67
|
|
68 #creates three files
|
|
69 file_1 = time.strftime("%H:%M") + '_Z_Variant_Result.tsv'
|
|
70 file_2 = time.strftime("%H:%M") + '_Z_Additional_Details.tsv'
|
|
71 #file_3 = time.strftime("%H:%M") + 'Combined_Variant_Results.tsv'
|
|
72
|
|
73 #Download the two results
|
|
74 urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Variant.Result.tsv", file_1)
|
|
75 urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Variant_Additional_Details.Result.tsv", file_2)
|
|
76 urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Gene_Level_Analysis.Result.tsv", file_3)
|
|
77 urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Variant_Non-coding.Result.tsv", file_4)
|
|
78 urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Input_Errors.Result.tsv", file_5)
|
|
79
|
|
80 headers = []
|
|
81 duplicates = []
|
|
82
|
|
83 #opens the Variant Result file and the Variant Additional Details file as csv readers, then opens the output file (galaxy) as a writer
|
|
84 with open(file_1) as tsvin_1, open(file_2) as tsvin_2, open(output_filename, 'wb') as tsvout:
|
|
85 tsvreader_1 = csv.reader(tsvin_1, delimiter='\t')
|
|
86 tsvreader_2 = csv.reader(tsvin_2, delimiter='\t')
|
|
87
|
|
88 tsvout = csv.writer(tsvout, delimiter='\t')
|
|
89
|
|
90 # Processes the PSM report
|
|
91 if (psm_filename != 'None'):
|
|
92 tsvin_3 = open(psm_filename)
|
|
93 psmreader = csv.reader(tsvin_3, delimiter='\t')
|
|
94
|
|
95 psmreader.next()
|
|
96 peptide_map = {}
|
|
97 s = re.compile('[A-Z][0-9]+[A-Z]')
|
|
98 for row in psmreader:
|
|
99 pro_name = row[1]
|
|
100 pep_seq = row[2]
|
|
101
|
|
102 prot_seq_changes = s.findall(pro_name)
|
|
103
|
|
104 for change in prot_seq_changes:
|
|
105 if change in peptide_map:
|
|
106 if pep_seq not in peptide_map[change].split(';'):
|
|
107 peptide_map[change] = peptide_map[change] + ';' + pep_seq
|
|
108 else:
|
|
109 peptide_map[change] = pep_seq
|
|
110
|
|
111 #loops through each row in the Variant Additional Details file
|
|
112
|
|
113 print 'Checkpoint 3'
|
|
114 for row in tsvreader_2:
|
|
115 #sets row_2 equal to the same row in Variant Result file
|
|
116 row_2 = tsvreader_1.next()
|
|
117 #checks if row is empty or if the first term contains '#'
|
|
118 if row == [] or row[0][0] == '#':
|
|
119 tsvout.writerow(row)
|
|
120 else:
|
|
121 #checks if the row begins with input line
|
|
122 if row[0] == 'Input line':
|
|
123 #Goes through each value in the headers list in VAD
|
|
124 #print 'Original row'
|
|
125 #print row
|
|
126 #print row_2
|
|
127 for value in row:
|
|
128 #Adds each value into headers
|
|
129 headers.append(value)
|
|
130 #Loops through the Keys in VR
|
|
131 for value in row_2:
|
|
132 #Checks if the value is already in headers
|
|
133 if value in headers:
|
|
134 continue
|
|
135 #else adds the header to headers
|
|
136 else:
|
|
137 headers.append(value)
|
|
138 if (psm_filename != 'None'):
|
|
139 headers.insert(1, 'Peptide')
|
|
140 #print headers
|
|
141 tsvout.writerow(headers)
|
|
142 else:
|
|
143
|
|
144 cells = []
|
|
145 #Inserts a peptide column into the row
|
|
146 if (psm_filename != 'None'):
|
|
147 if row[12] in peptide_map:
|
|
148 row.insert(1, peptide_map[row[12]])
|
|
149 else:
|
|
150 row.insert(1, '')
|
|
151
|
|
152 #Goes through each value in the next list
|
|
153 for i,value in enumerate(row):
|
|
154 #adds it to cells
|
|
155 cells.append(value)
|
|
156 #Goes through each value from the VR file after position 11 (After it is done repeating from VAD file)
|
|
157 for i,value in enumerate(row_2[11:]):
|
|
158 #adds in the rest of the values to cells
|
|
159
|
|
160 # Skips the 2nd VEST p-value
|
|
161 if (i != 49 - 11):
|
|
162 cells.append(value)
|
|
163
|
|
164 print cells
|
|
165 tsvout.writerow(cells)
|
|
166
|
|
167
|
|
168
|
|
169
|
|
170
|
|
171
|
|
172
|
|
173
|
|
174
|
|
175
|
|
176
|
|
177
|
|
178
|
|
179
|
|
180
|
|
181
|
|
182
|
|
183
|
|
184
|
|
185
|
|
186
|
|
187
|
|
188
|
|
189
|
|
190
|
|
191
|
|
192
|
|
193
|
|
194
|
|
195
|
|
196 #a = 'col1\tcol2\tcol3'
|
|
197 #header_list = a.split('\t')
|
|
198
|
|
199 #loop through the two results, when you first hit header you print out the headers in tabular form
|
|
200 #Print out each header only once
|
|
201 #Combine both headers into one output file
|
|
202 #loop through the rest of the data and assign each value to its assigned header
|
|
203 #combine this all into one output file
|
|
204
|
|
205
|
|
206
|
|
207
|
|
208
|