comparison combine_output.py @ 21:19d8fd10248e

* Added interface to METEXP data store, including tool to fire queries in batch mode * Improved quantification output files of MsClust, a.o. sorting mass list based on intensity (last two columns of quantification files) * Added Molecular Mass calculation method
author pieter.lukasse@wur.nl
date Wed, 05 Mar 2014 17:20:11 +0100
parents 9d5f4f5f764b
children
comparison
equal deleted inserted replaced
20:24fb75fedee0 21:19d8fd10248e
153 ''' 153 '''
154 Writes tab-separated data to file 154 Writes tab-separated data to file
155 @param data: dictionary containing merged dataset 155 @param data: dictionary containing merged dataset
156 @param out_csv: output csv file 156 @param out_csv: output csv file
157 ''' 157 '''
158 header = ['Centrotype', 158 # Columns we don't repeat:
159 header_part1 = ['Centrotype',
159 'cent.Factor', 160 'cent.Factor',
160 'scan nr.', 161 'scan nr.',
161 'R.T. (umin)', 162 'R.T. (umin)',
162 'nr. Peaks', 163 'nr. Peaks',
163 'R.T.', 164 'R.T.']
165 # These are the headers/columns we repeat in case of
166 # combining hits in one line (see alternative_headers method below):
167 header_part2 = [
164 'Name', 168 'Name',
165 'FORMULA', 169 'FORMULA',
166 'Library', 170 'Library',
167 'CAS', 171 'CAS',
168 'Forward', 172 'Forward',
188 outfile_multi_handle = open(out_csv_multi, 'wb') 192 outfile_multi_handle = open(out_csv_multi, 'wb')
189 output_single_handle = csv.writer(outfile_single_handle, delimiter="\t") 193 output_single_handle = csv.writer(outfile_single_handle, delimiter="\t")
190 output_multi_handle = csv.writer(outfile_multi_handle, delimiter="\t") 194 output_multi_handle = csv.writer(outfile_multi_handle, delimiter="\t")
191 195
192 # Write headers 196 # Write headers
193 output_single_handle.writerow(header) 197 output_single_handle.writerow(header_part1 + header_part2)
194 output_multi_handle.writerow(header * nhits) 198 output_multi_handle.writerow(header_part1 + header_part2 + alternative_headers(header_part2, nhits-1))
195 # Combine all hits for each centrotype into one line 199 # Combine all hits for each centrotype into one line
196 line = [] 200 line = []
197 for centrotype_idx in xrange(len(data)): 201 for centrotype_idx in xrange(len(data)):
202 i = 0
198 for hit in data[centrotype_idx]: 203 for hit in data[centrotype_idx]:
199 line.extend(hit) 204 if i==0:
205 line.extend(hit)
206 else:
207 line.extend(hit[6:])
208 i = i+1
209 # small validation (if error, it is a programming error):
210 if i > nhits:
211 raise Exception('Error: more hits that expected for centrotype_idx ' + centrotype_idx)
200 output_multi_handle.writerow(line) 212 output_multi_handle.writerow(line)
201 line = [] 213 line = []
202 214
203 # Write one line for each centrotype 215 # Write one line for each centrotype
204 for centrotype_idx in xrange(len(data)): 216 for centrotype_idx in xrange(len(data)):
205 for hit in data[centrotype_idx]: 217 for hit in data[centrotype_idx]:
206 output_single_handle.writerow(hit) 218 output_single_handle.writerow(hit)
207 219
220 def alternative_headers(header_part2, nr_alternative_hits):
221 '''
222 This method will iterate over the header names and add the string 'ALT#_' before each,
223 where # is the number of the alternative, according to number of alternative hits we want to add
224 to final csv/tsv
225 '''
226 result = []
227 for i in xrange(nr_alternative_hits):
228 for header_name in header_part2:
229 result.append("ALT" + str(i+1) + "_" + header_name)
230 return result
208 231
209 def main(): 232 def main():
210 ''' 233 '''
211 Combine Output main function 234 Combine Output main function
212 It will merge the result files from "RankFilter" and "Lookup RI for CAS numbers" 235 It will merge the result files from "RankFilter" and "Lookup RI for CAS numbers"