Mercurial > repos > eslerm > vkmz
annotate vkmz.py @ 6:35b984684450 draft
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
author | eslerm |
---|---|
date | Tue, 10 Jul 2018 17:58:35 -0400 |
parents | 04079c34452a |
children |
rev | line source |
---|---|
6
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
1 import argparse |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
2 import csv |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
3 import math |
0
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
4 import re |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
5 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
6 parser = argparse.ArgumentParser() |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
7 inputSubparser = parser.add_subparsers(help='Select input type:', dest='input-type') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
8 parse_tsv = inputSubparser.add_parser('tsv', help='Use tabular data as input.') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
9 parse_tsv.add_argument('--input', '-i', required=True, help='Path to tabular file. Must include columns: sample ID, mz, polarity, intensity, & retention time.') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
10 parse_xcms = inputSubparser.add_parser('xcms', help='Use XCMS data as input.') |
6
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
11 parse_xcms.add_argument('--data-matrix', '-xd', required=True, nargs='?', type=str, help='Path to XCMS data matrix file.') |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
12 parse_xcms.add_argument('--sample-metadata', '-xs', required=True, nargs='?', type=str, help='Path to XCMS sample metadata file.') |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
13 parse_xcms.add_argument('--variable-metadata', '-xv', required=True, nargs='?', type=str, help='Path to XCMS variable metadata file.') |
0
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
14 for inputSubparser in [parse_tsv, parse_xcms]: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
15 inputSubparser.add_argument('--output', '-o', nargs='?', type=str, required=True, help='Specify output file path.') |
6
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
16 inputSubparser.add_argument('--error', '-e', nargs='?', type=float, required=True, help='Mass error of mass spectrometer in parts-per-million.') |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
17 inputSubparser.add_argument('--database', '-db', nargs='?', default='databases/bmrb-light.tsv', help='Select database of known formula masses.') |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
18 inputSubparser.add_argument('--directory','-dir', nargs='?', default='', type=str, help='Define path of tool directory. Assumes relative path if unset.') |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
19 inputSubparser.add_argument('--polarity', '-p', choices=['positive','negative'], help='Force polarity mode to positive or negative. Overrides variables in input file.') |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
20 inputSubparser.add_argument('--neutral', '-n', action='store_true', help='Set neutral flag if masses in input data are neutral. No mass adjustmnet will be made.') |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
21 inputSubparser.add_argument('--unique', '-u', action='store_true', help='Set flag to remove features with multiple predictions.') |
0
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
22 args = parser.parse_args() |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
23 |
6
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
24 # store input constants |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
25 INPUT_TYPE = getattr(args, "input-type") |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
26 POLARITY = getattr(args, "polarity") |
0
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
27 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
28 def polaritySanitizer(sample_polarity): |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
29 if sample_polarity.lower() in {'positive','pos','+'}: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
30 sample_polarity = 'positive' |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
31 elif sample_polarity.lower() in {'negative', 'neg', '-'}: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
32 sample_polarity = 'negative' |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
33 else: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
34 print('A sample has an unknown polarity type: %s. Polarity in the XCMS sample metadata should be set to "negative" or "positive".' % sample_polarity) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
35 raise ValueError |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
36 return sample_polarity |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
37 |
6
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
38 # read input |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
39 vkInput = [] # each element is a feature from input |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
40 if INPUT_TYPE == "tsv": |
0
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
41 tsvFile = getattr(args, "input") |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
42 try: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
43 with open(tsvFile, 'r') as f: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
44 next(f) # skip hearder line |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
45 tsvData = csv.reader(f, delimiter='\t') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
46 for row in tsvData: |
1
b02af8eb8e6e
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents:
0
diff
changeset
|
47 vkInput.append([row[0],polaritySanitizer(row[1]),float(row[2]),float(row[3]),float(row[4])]) |
0
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
48 except ValueError: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
49 print('The %s data file could not be read.' % tsvFile) |
6
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
50 else: # INPUT_TYPE == "xcms" |
0
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
51 xcmsSampleMetadataFile = getattr(args, "sample_metadata") |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
52 try: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
53 polarity = {} |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
54 with open(xcmsSampleMetadataFile, 'r') as f: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
55 xcmsSampleMetadata = csv.reader(f, delimiter='\t') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
56 next(xcmsSampleMetadata, None) # skip header |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
57 for row in xcmsSampleMetadata: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
58 sample = row[0] |
6
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
59 if POLARITY: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
60 polarity[sample] = POLARITY |
1
b02af8eb8e6e
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents:
0
diff
changeset
|
61 else: |
b02af8eb8e6e
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents:
0
diff
changeset
|
62 sample_polarity = polaritySanitizer(row[2]) |
b02af8eb8e6e
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents:
0
diff
changeset
|
63 polarity[sample] = sample_polarity |
0
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
64 except ValueError: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
65 print('The %s data file could not be read. Check that polarity is set to "negative" or "positive"' % xcmsSampleMetadataFile) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
66 xcmsVariableMetadataFile = getattr(args, "variable_metadata") |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
67 try: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
68 mz = {} |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
69 rt = {} |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
70 variable_index = {} |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
71 mz_index = int() |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
72 rt_index = int() |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
73 with open(xcmsVariableMetadataFile, 'r') as f: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
74 xcmsVariableMetadata = csv.reader(f, delimiter='\t') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
75 i = 0 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
76 for row in xcmsVariableMetadata: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
77 if i != 0: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
78 mz[row[0]] = float(row[mz_index]) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
79 rt[row[0]] = float(row[rt_index]) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
80 else: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
81 for column in row: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
82 variable_index[column] = i |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
83 i += 1 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
84 mz_index = variable_index["mz"] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
85 rt_index = variable_index["rt"] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
86 except ValueError: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
87 print('The %s data file could not be read.' % xcmsVariableMetadataFile) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
88 xcmsDataMatrixFile = getattr(args, "data_matrix") |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
89 try: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
90 with open(xcmsDataMatrixFile, 'r') as f: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
91 xcmsDataMatrix = csv.reader(f, delimiter='\t') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
92 first_row = True |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
93 for row in xcmsDataMatrix: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
94 if first_row: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
95 sample_id = row |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
96 first_row = False |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
97 else: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
98 i = 0 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
99 while(i < len(row)): |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
100 if i == 0: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
101 i+=1 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
102 else: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
103 intensity = row[i] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
104 if intensity not in {'NA', '#DIV/0!', '0'}: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
105 variable = row[0] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
106 sample = sample_id[i] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
107 # XCMS data may include empty columns |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
108 if sample != "": |
1
b02af8eb8e6e
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents:
0
diff
changeset
|
109 vkInput.append([sample, polarity[sample], mz[variable], rt[variable], float(intensity)]) |
0
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
110 i+=1 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
111 except ValueError: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
112 print('The %s data file could not be read.' % xcmsDataMatrixFile) |
6
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
113 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
114 # store||generate remaining constants |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
115 OUTPUT = getattr(args, "output") |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
116 MASS_ERROR = getattr(args, "error") |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
117 UNIQUE = getattr(args, "unique") |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
118 NEUTRAL = getattr(args, "neutral") |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
119 DATABASE = getattr(args, "database") |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
120 DIRECTORY = getattr(args, "directory") |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
121 MASS = [] |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
122 FORMULA = [] |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
123 try: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
124 with open(DIRECTORY+DATABASE, 'r') as tsv: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
125 for row in tsv: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
126 mass, formula = row.split() |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
127 MASS.append(mass) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
128 FORMULA.append(formula) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
129 except ValueError: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
130 print('The %s database could not be loaded.' % DATABASE) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
131 MAX_MASS_INDEX = len(MASS)-1 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
132 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
133 # adjust charged mass to a neutral mass |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
134 def adjust(mass, polarity): |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
135 # value to adjust by |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
136 proton = 1.007276 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
137 if polarity == 'positive': |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
138 mass -= proton |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
139 else: # sanitized to negative |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
140 mass += proton |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
141 return mass |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
142 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
143 # binary search to match a neutral mass to known mass within error |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
144 def predict(mass, uncertainty, left, right): |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
145 mid = ((right - left) / 2) + left |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
146 if left <= mid <= right and mid <= MAX_MASS_INDEX: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
147 delta = float(MASS[mid]) - mass |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
148 if uncertainty >= abs(delta): |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
149 return mid |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
150 elif uncertainty < delta: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
151 return predict(mass, uncertainty, left, mid-1) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
152 else: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
153 return predict(mass, uncertainty, mid+1, right) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
154 return -1 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
155 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
156 # find and rank predictions which are adjacent to the index of an intial prediction |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
157 def predictNeighbors(mass, uncertainty, prediction): |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
158 i = 0 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
159 neighbors = [[MASS[prediction],FORMULA[prediction],(float(MASS[prediction])-mass)],] |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
160 while prediction+i+1 <= MAX_MASS_INDEX: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
161 neighbor = prediction+i+1 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
162 delta = float(MASS[neighbor])-mass |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
163 if uncertainty >= abs(delta): |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
164 neighbors.append([MASS[neighbor],FORMULA[neighbor],delta]) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
165 i += 1 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
166 else: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
167 break |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
168 i = 0 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
169 while prediction+i-1 >= 0: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
170 neighbor = prediction+i-1 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
171 delta = float(MASS[neighbor])-mass |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
172 if uncertainty >= abs(delta): |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
173 neighbors.append([MASS[neighbor],FORMULA[neighbor],(float(MASS[neighbor])-mass)]) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
174 i -= 1 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
175 else: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
176 break |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
177 neighbors = sorted(neighbors, key = (lambda delta: abs(delta[2]))) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
178 return neighbors |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
179 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
180 # predict formulas by the mass of a feature |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
181 def featurePrediction(feature): |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
182 if NEUTRAL: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
183 mass = feature[2] |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
184 else: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
185 mass = adjust(feature[2], feature[1]) # mz & polarity |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
186 uncertainty = mass * MASS_ERROR / 1e6 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
187 prediction = predict(mass, uncertainty, 0, MAX_MASS_INDEX) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
188 if prediction != -1: # else feature if forgotten |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
189 predictions = predictNeighbors(mass, uncertainty, prediction) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
190 if UNIQUE and len(predictions) > 1: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
191 return |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
192 feature.append(predictions) # feature[5] |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
193 formula = predictions[0][1] # formula of prediction with lowest abs(delta) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
194 formulaList = re.findall('[A-Z][a-z]?|[0-9]+', formula) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
195 formulaDictionary = {'C':0,'H':0,'O':0,'N':0} # other elements are easy to add |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
196 i = 0; |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
197 while i < len(formulaList): |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
198 if formulaList[i] in formulaDictionary: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
199 # if there is only one of this element |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
200 if i+1 == len(formulaList) or formulaList[i+1].isalpha(): |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
201 formulaDictionary[formulaList[i]] = 1 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
202 else: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
203 formulaDictionary[formulaList[i]] = formulaList[i+1] |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
204 i+=1 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
205 i+=1 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
206 hc = float(formulaDictionary['H'])/float(formulaDictionary['C']) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
207 oc = float(formulaDictionary['O'])/float(formulaDictionary['C']) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
208 nc = float(formulaDictionary['N'])/float(formulaDictionary['C']) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
209 feature += [hc, oc, nc] # feature[6], [7], [8] |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
210 return(feature) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
211 |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
212 # write output file |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
213 def write(vkData): |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
214 json = '' |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
215 try: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
216 # write tabular file and generate json for html output |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
217 with open(OUTPUT+'.tsv', 'w') as f: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
218 f.writelines(str("sample_id\tpolarity\tmz\trt\tintensity\tpredictions\thc\toc\tnc") + '\n') |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
219 for feature in vkData: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
220 f.writelines(feature[0]+'\t'+feature[1]+'\t'+str(feature[2])+'\t'+str(feature[3])+'\t'+str(feature[4])+'\t'+str(feature[5])+'\t'+str(feature[6])+'\t'+str(feature[7])+'\t'+str(feature[8])+'\n') |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
221 json += '{sample_id:\''+str(feature[0])+'\', polarity:\''+str(feature[1])+'\', mz:'+str(feature[2])+', rt:'+str(feature[3])+', intensity:'+str(feature[4])+', predictions:'+str(feature[5])+', hc:'+str(feature[6])+', oc:'+str(feature[7])+', nc:'+str(feature[8])+'},' |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
222 json = json[:-1] # remove final comma |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
223 # write html |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
224 try: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
225 with open(DIRECTORY+'d3.html', 'r') as template, open(OUTPUT+'.html', 'w') as f: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
226 for line in template: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
227 line = re.sub('^var data.*$', 'var data = ['+json+']', line, flags=re.M) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
228 f.write(line) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
229 except ValueError: |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
230 print('"%s" could not be read or "%s" could not be written' % template, f) |
0
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
231 except ValueError: |
6
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
232 print('"%s" could not be saved.' % filename) |
0
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
233 |
6
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
234 # main |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
235 vkData = map(featurePrediction, vkInput) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
236 vkData = [x for x in vkData if x is not None] |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
237 # sort by intensity so D3 draws largest symbols first |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
238 vkData.sort(key=lambda x: x[4], reverse=True) |
35b984684450
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5ef8d2b36eb35ff5aad5d5e9b78c38405fc95c1a
eslerm
parents:
5
diff
changeset
|
239 write(vkData) |