annotate vkmz.py @ 6:35b984684450 draft

planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
author eslerm
date Tue, 10 Jul 2018 17:58:35 -0400
parents 04079c34452a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
1 import argparse
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
2 import csv
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
3 import math
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
4 import re
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
5
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
6 parser = argparse.ArgumentParser()
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
7 inputSubparser = parser.add_subparsers(help='Select input type:', dest='input-type')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
8 parse_tsv = inputSubparser.add_parser('tsv', help='Use tabular data as input.')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
9 parse_tsv.add_argument('--input', '-i', required=True, help='Path to tabular file. Must include columns: sample ID, mz, polarity, intensity, & retention time.')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
10 parse_xcms = inputSubparser.add_parser('xcms', help='Use XCMS data as input.')
6
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
11 parse_xcms.add_argument('--data-matrix', '-xd', required=True, nargs='?', type=str, help='Path to XCMS data matrix file.')
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
12 parse_xcms.add_argument('--sample-metadata', '-xs', required=True, nargs='?', type=str, help='Path to XCMS sample metadata file.')
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
13 parse_xcms.add_argument('--variable-metadata', '-xv', required=True, nargs='?', type=str, help='Path to XCMS variable metadata file.')
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
14 for inputSubparser in [parse_tsv, parse_xcms]:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
15 inputSubparser.add_argument('--output', '-o', nargs='?', type=str, required=True, help='Specify output file path.')
6
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
16 inputSubparser.add_argument('--error', '-e', nargs='?', type=float, required=True, help='Mass error of mass spectrometer in parts-per-million.')
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
17 inputSubparser.add_argument('--database', '-db', nargs='?', default='databases/bmrb-light.tsv', help='Select database of known formula masses.')
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
18 inputSubparser.add_argument('--directory','-dir', nargs='?', default='', type=str, help='Define path of tool directory. Assumes relative path if unset.')
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
19 inputSubparser.add_argument('--polarity', '-p', choices=['positive','negative'], help='Force polarity mode to positive or negative. Overrides variables in input file.')
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
20 inputSubparser.add_argument('--neutral', '-n', action='store_true', help='Set neutral flag if masses in input data are neutral. No mass adjustmnet will be made.')
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
21 inputSubparser.add_argument('--unique', '-u', action='store_true', help='Set flag to remove features with multiple predictions.')
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
22 args = parser.parse_args()
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
23
6
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
24 # store input constants
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
25 INPUT_TYPE = getattr(args, "input-type")
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
26 POLARITY = getattr(args, "polarity")
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
27
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
28 def polaritySanitizer(sample_polarity):
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
29 if sample_polarity.lower() in {'positive','pos','+'}:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
30 sample_polarity = 'positive'
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
31 elif sample_polarity.lower() in {'negative', 'neg', '-'}:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
32 sample_polarity = 'negative'
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
33 else:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
34 print('A sample has an unknown polarity type: %s. Polarity in the XCMS sample metadata should be set to "negative" or "positive".' % sample_polarity)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
35 raise ValueError
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
36 return sample_polarity
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
37
6
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
38 # read input
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
39 vkInput = [] # each element is a feature from input
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
40 if INPUT_TYPE == "tsv":
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
41 tsvFile = getattr(args, "input")
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
42 try:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
43 with open(tsvFile, 'r') as f:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
44 next(f) # skip hearder line
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
45 tsvData = csv.reader(f, delimiter='\t')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
46 for row in tsvData:
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
47 vkInput.append([row[0],polaritySanitizer(row[1]),float(row[2]),float(row[3]),float(row[4])])
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
48 except ValueError:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
49 print('The %s data file could not be read.' % tsvFile)
6
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
50 else: # INPUT_TYPE == "xcms"
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
51 xcmsSampleMetadataFile = getattr(args, "sample_metadata")
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
52 try:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
53 polarity = {}
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
54 with open(xcmsSampleMetadataFile, 'r') as f:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
55 xcmsSampleMetadata = csv.reader(f, delimiter='\t')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
56 next(xcmsSampleMetadata, None) # skip header
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
57 for row in xcmsSampleMetadata:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
58 sample = row[0]
6
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
59 if POLARITY:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
60 polarity[sample] = POLARITY
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
61 else:
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
62 sample_polarity = polaritySanitizer(row[2])
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
63 polarity[sample] = sample_polarity
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
64 except ValueError:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
65 print('The %s data file could not be read. Check that polarity is set to "negative" or "positive"' % xcmsSampleMetadataFile)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
66 xcmsVariableMetadataFile = getattr(args, "variable_metadata")
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
67 try:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
68 mz = {}
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
69 rt = {}
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
70 variable_index = {}
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
71 mz_index = int()
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
72 rt_index = int()
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
73 with open(xcmsVariableMetadataFile, 'r') as f:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
74 xcmsVariableMetadata = csv.reader(f, delimiter='\t')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
75 i = 0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
76 for row in xcmsVariableMetadata:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
77 if i != 0:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
78 mz[row[0]] = float(row[mz_index])
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
79 rt[row[0]] = float(row[rt_index])
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
80 else:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
81 for column in row:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
82 variable_index[column] = i
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
83 i += 1
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
84 mz_index = variable_index["mz"]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
85 rt_index = variable_index["rt"]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
86 except ValueError:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
87 print('The %s data file could not be read.' % xcmsVariableMetadataFile)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
88 xcmsDataMatrixFile = getattr(args, "data_matrix")
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
89 try:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
90 with open(xcmsDataMatrixFile, 'r') as f:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
91 xcmsDataMatrix = csv.reader(f, delimiter='\t')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
92 first_row = True
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
93 for row in xcmsDataMatrix:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
94 if first_row:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
95 sample_id = row
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
96 first_row = False
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
97 else:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
98 i = 0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
99 while(i < len(row)):
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
100 if i == 0:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
101 i+=1
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
102 else:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
103 intensity = row[i]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
104 if intensity not in {'NA', '#DIV/0!', '0'}:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
105 variable = row[0]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
106 sample = sample_id[i]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
107 # XCMS data may include empty columns
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
108 if sample != "":
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
109 vkInput.append([sample, polarity[sample], mz[variable], rt[variable], float(intensity)])
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
110 i+=1
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
111 except ValueError:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
112 print('The %s data file could not be read.' % xcmsDataMatrixFile)
6
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
113
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
114 # store||generate remaining constants
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
115 OUTPUT = getattr(args, "output")
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
116 MASS_ERROR = getattr(args, "error")
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
117 UNIQUE = getattr(args, "unique")
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
118 NEUTRAL = getattr(args, "neutral")
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
119 DATABASE = getattr(args, "database")
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
120 DIRECTORY = getattr(args, "directory")
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
121 MASS = []
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
122 FORMULA = []
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
123 try:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
124 with open(DIRECTORY+DATABASE, 'r') as tsv:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
125 for row in tsv:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
126 mass, formula = row.split()
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
127 MASS.append(mass)
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
128 FORMULA.append(formula)
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
129 except ValueError:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
130 print('The %s database could not be loaded.' % DATABASE)
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
131 MAX_MASS_INDEX = len(MASS)-1
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
132
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
133 # adjust charged mass to a neutral mass
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
134 def adjust(mass, polarity):
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
135 # value to adjust by
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
136 proton = 1.007276
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
137 if polarity == 'positive':
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
138 mass -= proton
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
139 else: # sanitized to negative
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
140 mass += proton
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
141 return mass
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
142
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
143 # binary search to match a neutral mass to known mass within error
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
144 def predict(mass, uncertainty, left, right):
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
145 mid = ((right - left) / 2) + left
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
146 if left <= mid <= right and mid <= MAX_MASS_INDEX:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
147 delta = float(MASS[mid]) - mass
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
148 if uncertainty >= abs(delta):
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
149 return mid
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
150 elif uncertainty < delta:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
151 return predict(mass, uncertainty, left, mid-1)
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
152 else:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
153 return predict(mass, uncertainty, mid+1, right)
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
154 return -1
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
155
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
156 # find and rank predictions which are adjacent to the index of an intial prediction
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
157 def predictNeighbors(mass, uncertainty, prediction):
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
158 i = 0
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
159 neighbors = [[MASS[prediction],FORMULA[prediction],(float(MASS[prediction])-mass)],]
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
160 while prediction+i+1 <= MAX_MASS_INDEX:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
161 neighbor = prediction+i+1
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
162 delta = float(MASS[neighbor])-mass
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
163 if uncertainty >= abs(delta):
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
164 neighbors.append([MASS[neighbor],FORMULA[neighbor],delta])
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
165 i += 1
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
166 else:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
167 break
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
168 i = 0
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
169 while prediction+i-1 >= 0:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
170 neighbor = prediction+i-1
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
171 delta = float(MASS[neighbor])-mass
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
172 if uncertainty >= abs(delta):
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
173 neighbors.append([MASS[neighbor],FORMULA[neighbor],(float(MASS[neighbor])-mass)])
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
174 i -= 1
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
175 else:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
176 break
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
177 neighbors = sorted(neighbors, key = (lambda delta: abs(delta[2])))
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
178 return neighbors
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
179
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
180 # predict formulas by the mass of a feature
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
181 def featurePrediction(feature):
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
182 if NEUTRAL:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
183 mass = feature[2]
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
184 else:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
185 mass = adjust(feature[2], feature[1]) # mz & polarity
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
186 uncertainty = mass * MASS_ERROR / 1e6
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
187 prediction = predict(mass, uncertainty, 0, MAX_MASS_INDEX)
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
188 if prediction != -1: # else feature if forgotten
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
189 predictions = predictNeighbors(mass, uncertainty, prediction)
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
190 if UNIQUE and len(predictions) > 1:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
191 return
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
192 feature.append(predictions) # feature[5]
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
193 formula = predictions[0][1] # formula of prediction with lowest abs(delta)
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
194 formulaList = re.findall('[A-Z][a-z]?|[0-9]+', formula)
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
195 formulaDictionary = {'C':0,'H':0,'O':0,'N':0} # other elements are easy to add
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
196 i = 0;
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
197 while i < len(formulaList):
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
198 if formulaList[i] in formulaDictionary:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
199 # if there is only one of this element
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
200 if i+1 == len(formulaList) or formulaList[i+1].isalpha():
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
201 formulaDictionary[formulaList[i]] = 1
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
202 else:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
203 formulaDictionary[formulaList[i]] = formulaList[i+1]
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
204 i+=1
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
205 i+=1
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
206 hc = float(formulaDictionary['H'])/float(formulaDictionary['C'])
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
207 oc = float(formulaDictionary['O'])/float(formulaDictionary['C'])
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
208 nc = float(formulaDictionary['N'])/float(formulaDictionary['C'])
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
209 feature += [hc, oc, nc] # feature[6], [7], [8]
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
210 return(feature)
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
211
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
212 # write output file
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
213 def write(vkData):
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
214 json = ''
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
215 try:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
216 # write tabular file and generate json for html output
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
217 with open(OUTPUT+'.tsv', 'w') as f:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
218 f.writelines(str("sample_id\tpolarity\tmz\trt\tintensity\tpredictions\thc\toc\tnc") + '\n')
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
219 for feature in vkData:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
220 f.writelines(feature[0]+'\t'+feature[1]+'\t'+str(feature[2])+'\t'+str(feature[3])+'\t'+str(feature[4])+'\t'+str(feature[5])+'\t'+str(feature[6])+'\t'+str(feature[7])+'\t'+str(feature[8])+'\n')
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
221 json += '{sample_id:\''+str(feature[0])+'\', polarity:\''+str(feature[1])+'\', mz:'+str(feature[2])+', rt:'+str(feature[3])+', intensity:'+str(feature[4])+', predictions:'+str(feature[5])+', hc:'+str(feature[6])+', oc:'+str(feature[7])+', nc:'+str(feature[8])+'},'
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
222 json = json[:-1] # remove final comma
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
223 # write html
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
224 try:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
225 with open(DIRECTORY+'d3.html', 'r') as template, open(OUTPUT+'.html', 'w') as f:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
226 for line in template:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
227 line = re.sub('^var data.*$', 'var data = ['+json+']', line, flags=re.M)
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
228 f.write(line)
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
229 except ValueError:
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
230 print('"%s" could not be read or "%s" could not be written' % template, f)
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
231 except ValueError:
6
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
232 print('"%s" could not be saved.' % filename)
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
233
6
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
234 # main
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
235 vkData = map(featurePrediction, vkInput)
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
236 vkData = [x for x in vkData if x is not None]
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
237 # sort by intensity so D3 draws largest symbols first
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
238 vkData.sort(key=lambda x: x[4], reverse=True)
35b984684450 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents: 5
diff changeset
239 write(vkData)