annotate vkmz.py @ 5:04079c34452a draft

planemo upload for repository https://github.com/HegemanLab/VKMZ commit 3e37abe007f126480b9b748ad5322755445ac95b-dirty
author eslerm
date Thu, 31 May 2018 12:06:20 -0400
parents b02af8eb8e6e
children 35b984684450
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
1 '''
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
2 based on the BMRB compound database which can be found at:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
3 http://www.bmrb.wisc.edu/ftp/pub/bmrb/relational_tables/metabolomics/Chem_comp.csv
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
4 '''
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
5
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
6 import re
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
7 import argparse
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
8 import multiprocessing
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
9 from multiprocessing import Pool
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
10 import csv
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
11
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
12 import numpy as np
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
13 import math
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
14 import pandas as pd
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
15 from plotly import __version__
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
16 import plotly.offline as py
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
17 import plotly.graph_objs as go
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
18
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
19 parser = argparse.ArgumentParser()
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
20 inputSubparser = parser.add_subparsers(help='Select input type:', dest='input-type')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
21 parse_tsv = inputSubparser.add_parser('tsv', help='Use tabular data as input.')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
22 parse_tsv.add_argument('--input', '-i', required=True, help='Path to tabular file. Must include columns: sample ID, mz, polarity, intensity, & retention time.')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
23 parse_tsv.add_argument('--no-plot', '-np', action='store_true', help='Disable plot generation.')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
24 parse_xcms = inputSubparser.add_parser('xcms', help='Use XCMS data as input.')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
25 parse_xcms.add_argument('--data-matrix', '-xd', required=True, nargs='?', type=str, help='Path to XCMS dataMatrix file.')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
26 parse_xcms.add_argument('--sample-metadata', '-xs', required=True, nargs='?', type=str, help='Path to XCMS sampleMetadata file.')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
27 parse_xcms.add_argument('--variable-metadata', '-xv', required=True, nargs='?', type=str, help='Path to XCMS variableMetadata file.')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
28 parse_xcms.add_argument('--no-plot', '-n', action='store_true', help='Disable plot generation.')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
29 parse_plot = inputSubparser.add_parser('plot', help='Only plot data.')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
30 parse_plot.add_argument('--input', '-i', required=True, nargs='?', type=str, help='Path to VKMZ generated tabular file.')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
31 for inputSubparser in [parse_tsv, parse_xcms]:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
32 inputSubparser.add_argument('--output', '-o', nargs='?', type=str, required=True, help='Specify output file path.')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
33 inputSubparser.add_argument('--error', '-e', nargs='?', type=float, required=True, help='Mass error of mass spectrometer in PPM')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
34 inputSubparser.add_argument('--database', '-d', nargs='?', default='databases/bmrb-light.tsv', help='Select database.')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
35 inputSubparser.add_argument('--directory', nargs='?', default='', type=str, help='Define directory of tool.')
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
36 inputSubparser.add_argument('--polarity', '-p', choices=['positive','negative'], help='Force polarity mode. Ignore variables in input file.')
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
37 inputSubparser.add_argument('--no-adjustment', '-na', action='store_true', help='Use flag to turn off polarity based mass adjustment. This flag should always be used if reprocessing data generated by VKMZ.')
5
04079c34452a planemo upload for repository https://github.com/HegemanLab/VKMZ commit 3e37abe007f126480b9b748ad5322755445ac95b-dirty
eslerm
parents: 1
diff changeset
38 inputSubparser.add_argument('--unique', '-u', action='store_true', help='Set flag to only output features which have a single match.')
04079c34452a planemo upload for repository https://github.com/HegemanLab/VKMZ commit 3e37abe007f126480b9b748ad5322755445ac95b-dirty
eslerm
parents: 1
diff changeset
39 inputSubparser.add_argument('--multiprocessing', '-m', action='store_true', help='Set flag to turn on multiprocessing.')
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
40 inputSubparser.add_argument('--plottype', '-t', nargs='?', default='scatter-2d', choices=['scatter-2d', 'scatter-3d'], help='Select plot type.')
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
41 inputSubparser.add_argument('--size', '-s', nargs='?', default=5, type=int, help='Set maxium size of plot symbols.')
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
42 inputSubparser.add_argument('--size-algorithm', '-a', nargs='?', default=0, type=int, choices=[0,1], help="Symbol size algorithm selector. Algorithm 0 sets all symbols to the maxium size. Algorithm 2 determines a features symbol size by it's log intensity.")
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
43 args = parser.parse_args()
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
44
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
45 vkInputType = getattr(args, "input-type")
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
46
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
47 # read inputs, arguments and define globals
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
48 vkError = getattr(args, "error")
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
49
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
50 vkPolarity = getattr(args, "polarity")
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
51
5
04079c34452a planemo upload for repository https://github.com/HegemanLab/VKMZ commit 3e37abe007f126480b9b748ad5322755445ac95b-dirty
eslerm
parents: 1
diff changeset
52 vkUnique = getattr(args, "unique")
04079c34452a planemo upload for repository https://github.com/HegemanLab/VKMZ commit 3e37abe007f126480b9b748ad5322755445ac95b-dirty
eslerm
parents: 1
diff changeset
53
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
54 vkMultiprocessing = getattr(args, "multiprocessing")
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
55
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
56 vkNoAdjustment = getattr(args, "no_adjustment")
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
57
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
58 vkDatabaseFile = getattr(args, "database")
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
59 vkDirectory = getattr(args, "directory")
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
60
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
61 vkMass = []
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
62 vkFormula = []
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
63 try:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
64 with open(vkDirectory+vkDatabaseFile, 'r') as tsv:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
65 next(tsv) # skip first row
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
66 for row in tsv:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
67 mass, formula = row.split()
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
68 vkMass.append(mass)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
69 vkFormula.append(formula)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
70 except ValueError:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
71 print('The %s database could not be loaded.' % vkDatabaseFile)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
72 vkMaxIndex = len(vkMass)-1
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
73
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
74 vkOutput = getattr(args, "output")
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
75
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
76 vkPlotType = getattr(args, 'plottype')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
77
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
78 vkSize = getattr(args, 'size')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
79
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
80 vkSizeAlgo = getattr(args, 'size_algorithm')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
81
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
82 # control predictions
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
83 def forecaster(vkInput):
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
84 if vkMultiprocessing:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
85 try:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
86 pool = Pool()
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
87 vkOutputList = pool.map(featurePrediction, vkInput)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
88 except Exception as e:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
89 print("Error during multirpocessing: "+str(e))
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
90 finally:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
91 pool.close()
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
92 pool.join()
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
93 else:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
94 vkOutputList = map(featurePrediction, vkInput)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
95 vkOutputList = [x for x in vkOutputList if x is not None]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
96 return(vkOutputList)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
97
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
98 # predict feature formulas and creates output list
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
99 def featurePrediction(feature):
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
100 if vkNoAdjustment:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
101 mass = feature[2]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
102 else:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
103 mass = adjust(feature[2], feature[1]) # mz & polarity
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
104 uncertainty = mass * vkError / 1e6
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
105 prediction = predict(mass, uncertainty, 0, vkMaxIndex)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
106 if prediction != -1:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
107 feature[2] = mass
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
108 predictions = predictNeighbors(mass, uncertainty, prediction)
5
04079c34452a planemo upload for repository https://github.com/HegemanLab/VKMZ commit 3e37abe007f126480b9b748ad5322755445ac95b-dirty
eslerm
parents: 1
diff changeset
109 if vkUnique and len(predictions) > 1:
04079c34452a planemo upload for repository https://github.com/HegemanLab/VKMZ commit 3e37abe007f126480b9b748ad5322755445ac95b-dirty
eslerm
parents: 1
diff changeset
110 return
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
111 feature.append(predictions) # feature[5]
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
112 predictionClosest = predictions[0]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
113 formula = predictionClosest[1]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
114 formulaList = re.findall('[A-Z][a-z]?|[0-9]+', formula)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
115 formulaDictionary = {'C':0,'H':0,'O':0,'N':0}
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
116 i = 0;
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
117 while i < len(formulaList):
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
118 if formulaList[i] in formulaDictionary:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
119 # if there is only one of this element
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
120 if i+1 == len(formulaList) or formulaList[i+1].isalpha():
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
121 formulaDictionary[formulaList[i]] = 1
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
122 else:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
123 formulaDictionary[formulaList[i]] = formulaList[i+1]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
124 i+=1
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
125 i+=1
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
126 predictionClosest.append(formulaDictionary)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
127 hc = float(formulaDictionary['H'])/float(formulaDictionary['C'])
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
128 oc = float(formulaDictionary['O'])/float(formulaDictionary['C'])
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
129 nc = float(formulaDictionary['N'])/float(formulaDictionary['C'])
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
130 predictionClosestDelta = feature[5][0][2]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
131 feature += [predictionClosestDelta, hc, oc, nc]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
132 return(feature)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
133
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
134 # adjust charged mass to a neutral mass
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
135 def adjust(mass, polarity):
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
136 # value to adjust by
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
137 proton = 1.007276
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
138 if polarity == 'positive':
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
139 mass += proton
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
140 elif polarity == 'negative':
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
141 mass -= proton
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
142 return mass
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
143
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
144 # Binary search to match observed mass to known mass within error
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
145 # https://en.wikipedia.org/wiki/Binary_search_tree
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
146 def predict(mass, uncertainty, left, right):
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
147 mid = ((right - left) / 2) + left
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
148 if left <= mid <= right and mid <= vkMaxIndex:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
149 delta = float(vkMass[mid]) - mass
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
150 if uncertainty >= abs(delta):
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
151 return mid
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
152 elif uncertainty < delta:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
153 return predict(mass, uncertainty, left, mid-1)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
154 else:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
155 return predict(mass, uncertainty, mid+1, right)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
156 return -1
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
157
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
158 def plotData(vkData):
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
159 if vkSizeAlgo == 0:
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
160 for row in vkData:
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
161 row.append(vkSize)
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
162 else:
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
163 max_intensity = 0.0
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
164 for row in vkData:
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
165 intensity = row[4]
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
166 if intensity > max_intensity:
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
167 max_intensity = intensity
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
168 alpha = vkSize/math.log(max_intensity+1)
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
169 for row in vkData:
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
170 intensity = row[4]
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
171 row.append(alpha*math.log(intensity+1))
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
172 return vkData
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
173
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
174 # find and sort known masses within error limit of observed mass
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
175 def predictNeighbors(mass, uncertainty, prediction):
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
176 i = 0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
177 neighbors = [[vkMass[prediction],vkFormula[prediction],(float(vkMass[prediction])-mass)],]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
178 while prediction+i+1 <= vkMaxIndex:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
179 neighbor = prediction+i+1
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
180 delta = float(vkMass[neighbor])-mass
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
181 if uncertainty >= abs(delta):
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
182 neighbors.append([vkMass[neighbor],vkFormula[neighbor],delta])
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
183 i += 1
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
184 else:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
185 break
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
186 i = 0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
187 while prediction+i-1 >= 0:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
188 neighbor = prediction+i-1
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
189 delta = float(vkMass[neighbor])-mass
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
190 if uncertainty >= abs(delta):
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
191 neighbors.append([vkMass[neighbor],vkFormula[neighbor],(float(vkMass[neighbor])-mass)])
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
192 i -= 1
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
193 else:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
194 break
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
195 neighbors = sorted(neighbors, key = (lambda delta: abs(delta[2])))
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
196 return neighbors
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
197
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
198 # write output file
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
199 def saveForcast(vkOutputList):
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
200 try:
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
201 with open(vkOutput+'.tsv', 'w') as f:
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
202 f.writelines(str("sample_id\tpolarity\tmz\tretention_time\tintensity\tpredictions\tdelta\tH:C\tO:C\tN:C\tsymbol_size") + '\n')
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
203 for feature in vkOutputList:
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
204 f.writelines(feature[0]+'\t'+feature[1]+'\t'+str(feature[2])+'\t'+str(feature[3])+'\t'+str(feature[4])+'\t'+str(feature[5])+'\t'+str(feature[6])+'\t'+str(feature[7])+'\t'+str(feature[8])+'\t'+str(feature[9])+'\t'+str(feature[10])+'\t'+'\n')
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
205 except ValueError:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
206 print('"%s" could not be saved.' % filename)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
207
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
208 def plotRatios(vkData):
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
209 max_rt = 0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
210 max_hc = 0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
211 max_oc = 0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
212 max_nc = 0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
213 for row in vkData:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
214 if row[3] > max_rt:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
215 max_rt = row[3]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
216 if row[7] > max_hc:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
217 max_hc = row[7]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
218 if row[8] > max_oc:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
219 max_oc = row[8]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
220 if row[9] > max_nc:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
221 max_nc = row[9]
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
222 labels = ['sampleID', 'polarity', 'mz', 'rt', 'intensity', 'predictions', 'delta', 'hc', 'oc', 'nc', 'symbol_size']
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
223 df = pd.DataFrame.from_records(vkData, columns=labels)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
224 sampleIDs = df.sampleID.unique()
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
225 data = []
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
226 menus = []
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
227 i = 0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
228 for sampleID in sampleIDs:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
229 dfSample = df.loc[df['sampleID'] == sampleID]
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
230 size = dfSample.symbol_size
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
231 trace = go.Scatter(
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
232 x = dfSample.oc,
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
233 y = dfSample.hc,
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
234 text = dfSample.predictions.apply(lambda x: "Prediction: "+str(x[0][1])+"<br>mz: " +str(x[0][0])+"<br>Delta: "+str(x[0][2])),
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
235 line = dict(width = 0.5),
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
236 mode = 'markers',
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
237 marker = dict(
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
238 size = size,
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
239 sizemode = "area",
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
240 color = dfSample.rt,
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
241 colorscale = 'Viridis',
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
242 cmin = 0,
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
243 cmax = max_rt,
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
244 colorbar=dict(title='Retention Time (s)'),
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
245 line = dict(width = 0.5),
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
246 showscale = True
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
247 ),
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
248 opacity = 0.8
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
249 )
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
250 data.append(trace)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
251 vision = []
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
252 j = 0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
253 while j < len(sampleIDs):
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
254 if j != i:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
255 vision.append(False)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
256 else:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
257 vision.append(True)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
258 j += 1
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
259 menu = dict(
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
260 method = 'update',
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
261 label = sampleID,
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
262 args = [{'visible': vision}, {'title': sampleID}]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
263 )
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
264 menus.append(menu)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
265 i += 1
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
266 updatemenus = list([
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
267 dict(
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
268 active = -1,
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
269 buttons = menus
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
270 )
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
271 ])
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
272 layout = go.Layout(
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
273 title = "Van Krevelen Diagram",
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
274 showlegend = False,
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
275 xaxis = dict(
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
276 title = 'Oxygen to Carbon Ratio',
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
277 zeroline = False,
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
278 gridcolor = 'rgb(183,183,183)',
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
279 showline = True,
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
280 range = [0, max_oc]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
281 ),
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
282 yaxis = dict(
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
283 title = 'Hydrogen to Carbon Ratio',
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
284 zeroline = False,
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
285 gridcolor = 'rgb(183,183,183)',
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
286 showline = True,
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
287 range = [0, max_hc]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
288 ),
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
289 margin = dict(r=0, b=100, l=100, t=100),
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
290 updatemenus = updatemenus
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
291 )
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
292 fig = go.Figure(data=data, layout=layout)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
293 py.plot(fig, auto_open=False, show_link=False, filename=vkOutput+'.html')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
294
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
295 def polaritySanitizer(sample_polarity):
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
296 if sample_polarity.lower() in {'positive','pos','+'}:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
297 sample_polarity = 'positive'
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
298 elif sample_polarity.lower() in {'negative', 'neg', '-'}:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
299 sample_polarity = 'negative'
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
300 else:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
301 print('A sample has an unknown polarity type: %s. Polarity in the XCMS sample metadata should be set to "negative" or "positive".' % sample_polarity)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
302 raise ValueError
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
303 return sample_polarity
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
304
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
305 # main
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
306 if vkInputType == "tsv":
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
307 vkInput = []
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
308 tsvFile = getattr(args, "input")
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
309 try:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
310 with open(tsvFile, 'r') as f:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
311 next(f) # skip hearder line
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
312 tsvData = csv.reader(f, delimiter='\t')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
313 for row in tsvData:
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
314 vkInput.append([row[0],polaritySanitizer(row[1]),float(row[2]),float(row[3]),float(row[4])])
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
315 except ValueError:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
316 print('The %s data file could not be read.' % tsvFile)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
317 vkData = forecaster(vkInput)
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
318 vkData = plotData(vkData)
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
319 saveForcast(vkData)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
320 plotRatios(vkData)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
321 elif vkInputType == "xcms":
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
322 vkInput = []
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
323 xcmsSampleMetadataFile = getattr(args, "sample_metadata")
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
324 try:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
325 polarity = {}
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
326 with open(xcmsSampleMetadataFile, 'r') as f:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
327 xcmsSampleMetadata = csv.reader(f, delimiter='\t')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
328 next(xcmsSampleMetadata, None) # skip header
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
329 for row in xcmsSampleMetadata:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
330 sample = row[0]
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
331 if vkPolarity:
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
332 polarity[sample] = vkPolarity
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
333 else:
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
334 sample_polarity = polaritySanitizer(row[2])
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
335 polarity[sample] = sample_polarity
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
336 except ValueError:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
337 print('The %s data file could not be read. Check that polarity is set to "negative" or "positive"' % xcmsSampleMetadataFile)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
338 xcmsVariableMetadataFile = getattr(args, "variable_metadata")
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
339 try:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
340 mz = {}
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
341 rt = {}
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
342 variable_index = {}
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
343 mz_index = int()
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
344 rt_index = int()
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
345 with open(xcmsVariableMetadataFile, 'r') as f:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
346 xcmsVariableMetadata = csv.reader(f, delimiter='\t')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
347 i = 0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
348 for row in xcmsVariableMetadata:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
349 if i != 0:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
350 mz[row[0]] = float(row[mz_index])
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
351 rt[row[0]] = float(row[rt_index])
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
352 else:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
353 for column in row:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
354 variable_index[column] = i
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
355 i += 1
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
356 mz_index = variable_index["mz"]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
357 rt_index = variable_index["rt"]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
358 except ValueError:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
359 print('The %s data file could not be read.' % xcmsVariableMetadataFile)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
360 xcmsDataMatrixFile = getattr(args, "data_matrix")
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
361 try:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
362 with open(xcmsDataMatrixFile, 'r') as f:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
363 xcmsDataMatrix = csv.reader(f, delimiter='\t')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
364 first_row = True
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
365 for row in xcmsDataMatrix:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
366 if first_row:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
367 sample_id = row
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
368 first_row = False
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
369 else:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
370 i = 0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
371 while(i < len(row)):
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
372 if i == 0:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
373 i+=1
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
374 else:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
375 intensity = row[i]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
376 if intensity not in {'NA', '#DIV/0!', '0'}:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
377 variable = row[0]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
378 sample = sample_id[i]
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
379 # XCMS data may include empty columns
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
380 if sample != "":
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
381 vkInput.append([sample, polarity[sample], mz[variable], rt[variable], float(intensity)])
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
382 i+=1
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
383 except ValueError:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
384 print('The %s data file could not be read.' % xcmsDataMatrixFile)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
385 vkData = forecaster(vkInput)
1
b02af8eb8e6e planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
eslerm
parents: 0
diff changeset
386 vkData = plotData(vkData)
0
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
387 saveForcast(vkData)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
388 plotRatios(vkData)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
389 else:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
390 vkData = []
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
391 tsvPlotvFile = getattr(args, "input")
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
392 try:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
393 with open(tsvPlotFile, 'r') as f:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
394 next(f) # skip header line
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
395 plotData = csv.reader(f, delimiter='\t')
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
396 for row in plotData:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
397 vkData.append([row[0],row[1],float(row[2]),float(row[3]),float(row[4]),list(row[4]),float(row[5]),float(row[6]),float(row[7]),float(row[8])])
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
398 except ValueError:
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
399 print('The %s data file could not be read.' % tsvFile)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
400 plotRatios(vkData)
0b8ddf650752 planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff changeset
401