Mercurial > repos > eslerm > vkmz
annotate vkmz.py @ 0:0b8ddf650752 draft
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
author | eslerm |
---|---|
date | Wed, 02 May 2018 18:31:06 -0400 |
parents | |
children | b02af8eb8e6e |
rev | line source |
---|---|
0
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
1 ''' |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
2 based on the BMRB compound database which can be found at: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
3 http://www.bmrb.wisc.edu/ftp/pub/bmrb/relational_tables/metabolomics/Chem_comp.csv |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
4 ''' |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
5 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
6 import re |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
7 import argparse |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
8 import multiprocessing |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
9 from multiprocessing import Pool |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
10 import csv |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
11 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
12 import numpy as np |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
13 import math |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
14 import pandas as pd |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
15 from plotly import __version__ |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
16 import plotly.offline as py |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
17 import plotly.graph_objs as go |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
18 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
19 parser = argparse.ArgumentParser() |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
20 inputSubparser = parser.add_subparsers(help='Select input type:', dest='input-type') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
21 parse_tsv = inputSubparser.add_parser('tsv', help='Use tabular data as input.') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
22 parse_tsv.add_argument('--input', '-i', required=True, help='Path to tabular file. Must include columns: sample ID, mz, polarity, intensity, & retention time.') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
23 parse_tsv.add_argument('--no-plot', '-np', action='store_true', help='Disable plot generation.') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
24 parse_xcms = inputSubparser.add_parser('xcms', help='Use XCMS data as input.') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
25 parse_xcms.add_argument('--data-matrix', '-xd', required=True, nargs='?', type=str, help='Path to XCMS dataMatrix file.') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
26 parse_xcms.add_argument('--sample-metadata', '-xs', required=True, nargs='?', type=str, help='Path to XCMS sampleMetadata file.') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
27 parse_xcms.add_argument('--variable-metadata', '-xv', required=True, nargs='?', type=str, help='Path to XCMS variableMetadata file.') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
28 parse_xcms.add_argument('--no-plot', '-n', action='store_true', help='Disable plot generation.') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
29 parse_plot = inputSubparser.add_parser('plot', help='Only plot data.') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
30 parse_plot.add_argument('--input', '-i', required=True, nargs='?', type=str, help='Path to VKMZ generated tabular file.') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
31 for inputSubparser in [parse_tsv, parse_xcms]: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
32 inputSubparser.add_argument('--output', '-o', nargs='?', type=str, required=True, help='Specify output file path.') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
33 inputSubparser.add_argument('--error', '-e', nargs='?', type=float, required=True, help='Mass error of mass spectrometer in PPM') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
34 inputSubparser.add_argument('--database', '-d', nargs='?', default='databases/bmrb-light.tsv', help='Select database.') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
35 inputSubparser.add_argument('--directory', nargs='?', default='', type=str, help='Define directory of tool.') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
36 inputSubparser.add_argument('--no-adjustment', '-na', action='store_true', help='Use flag to turn off polarity based mass adjustment. This flag should always be used if reprocessing data generated by VKMZ.') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
37 inputSubparser.add_argument('--multiprocessing', '-m', action='store_true', help='Use flag to turn on multiprocessing.') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
38 inputSubparser.add_argument('--plottype', '-p', nargs='?', default='scatter-2d', choices=['scatter-2d', 'scatter-3d'], help='Select plot type.') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
39 inputSubparser.add_argument('--size', '-s', nargs='?', default=5, type=int, help='Set size of of dots. size+2*log(size*peak/(highest_peak/lowest_peak') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
40 inputSubparser.add_argument('--size-algorithm', '-a', nargs='?', default=0, type=int, choices=[0,1,2],help='Size algorithm selector. Algo 0: size, Algo 1: size+2*log(size*peak/(highest_peak/lowest_peak, Algo 2: size+2*size*peak/(highest_peak-lowest_peak)') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
41 args = parser.parse_args() |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
42 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
43 vkInputType = getattr(args, "input-type") |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
44 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
45 # read inputs, arguments and define globals |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
46 vkError = getattr(args, "error") |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
47 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
48 vkMultiprocessing = getattr(args, "multiprocessing") |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
49 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
50 vkNoAdjustment = getattr(args, "no_adjustment") |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
51 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
52 vkDatabaseFile = getattr(args, "database") |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
53 vkDirectory = getattr(args, "directory") |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
54 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
55 vkMass = [] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
56 vkFormula = [] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
57 try: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
58 with open(vkDirectory+vkDatabaseFile, 'r') as tsv: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
59 next(tsv) # skip first row |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
60 for row in tsv: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
61 mass, formula = row.split() |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
62 vkMass.append(mass) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
63 vkFormula.append(formula) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
64 except ValueError: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
65 print('The %s database could not be loaded.' % vkDatabaseFile) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
66 vkMaxIndex = len(vkMass)-1 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
67 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
68 vkOutput = getattr(args, "output") |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
69 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
70 vkPlotType = getattr(args, 'plottype') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
71 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
72 vkSize = getattr(args, 'size') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
73 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
74 vkSizeAlgo = getattr(args, 'size_algorithm') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
75 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
76 # control predictions |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
77 def forecaster(vkInput): |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
78 if vkMultiprocessing: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
79 try: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
80 pool = Pool() |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
81 vkOutputList = pool.map(featurePrediction, vkInput) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
82 except Exception as e: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
83 print("Error during multirpocessing: "+str(e)) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
84 finally: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
85 pool.close() |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
86 pool.join() |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
87 else: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
88 vkOutputList = map(featurePrediction, vkInput) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
89 vkOutputList = [x for x in vkOutputList if x is not None] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
90 return(vkOutputList) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
91 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
92 # predict feature formulas and creates output list |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
93 def featurePrediction(feature): |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
94 if vkNoAdjustment: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
95 mass = feature[2] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
96 else: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
97 mass = adjust(feature[2], feature[1]) # mz & polarity |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
98 uncertainty = mass * vkError / 1e6 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
99 prediction = predict(mass, uncertainty, 0, vkMaxIndex) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
100 if prediction != -1: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
101 feature[2] = mass |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
102 predictions = predictNeighbors(mass, uncertainty, prediction) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
103 feature[5] = predictions |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
104 predictionClosest = predictions[0] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
105 formula = predictionClosest[1] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
106 formulaList = re.findall('[A-Z][a-z]?|[0-9]+', formula) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
107 formulaDictionary = {'C':0,'H':0,'O':0,'N':0} |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
108 i = 0; |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
109 while i < len(formulaList): |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
110 if formulaList[i] in formulaDictionary: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
111 # if there is only one of this element |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
112 if i+1 == len(formulaList) or formulaList[i+1].isalpha(): |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
113 formulaDictionary[formulaList[i]] = 1 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
114 else: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
115 formulaDictionary[formulaList[i]] = formulaList[i+1] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
116 i+=1 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
117 i+=1 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
118 predictionClosest.append(formulaDictionary) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
119 hc = float(formulaDictionary['H'])/float(formulaDictionary['C']) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
120 oc = float(formulaDictionary['O'])/float(formulaDictionary['C']) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
121 nc = float(formulaDictionary['N'])/float(formulaDictionary['C']) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
122 predictionClosestDelta = feature[5][0][2] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
123 feature += [predictionClosestDelta, hc, oc, nc] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
124 return(feature) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
125 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
126 # adjust observed mass to a neutral mass |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
127 def adjust(mass, polarity): |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
128 # value to adjust by |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
129 proton = 1.007276 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
130 if polarity == 'positive': |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
131 mass += proton |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
132 elif polarity == 'negative': |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
133 mass -= proton |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
134 return mass |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
135 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
136 # Binary search to match observed mass to known mass within error |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
137 # https://en.wikipedia.org/wiki/Binary_search_tree |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
138 def predict(mass, uncertainty, left, right): |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
139 mid = ((right - left) / 2) + left |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
140 if left <= mid <= right and mid <= vkMaxIndex: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
141 delta = float(vkMass[mid]) - mass |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
142 if uncertainty >= abs(delta): |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
143 return mid |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
144 elif uncertainty < delta: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
145 return predict(mass, uncertainty, left, mid-1) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
146 else: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
147 return predict(mass, uncertainty, mid+1, right) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
148 return -1 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
149 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
150 # find and sort known masses within error limit of observed mass |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
151 def predictNeighbors(mass, uncertainty, prediction): |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
152 i = 0 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
153 neighbors = [[vkMass[prediction],vkFormula[prediction],(float(vkMass[prediction])-mass)],] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
154 while prediction+i+1 <= vkMaxIndex: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
155 neighbor = prediction+i+1 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
156 delta = float(vkMass[neighbor])-mass |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
157 if uncertainty >= abs(delta): |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
158 neighbors.append([vkMass[neighbor],vkFormula[neighbor],delta]) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
159 i += 1 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
160 else: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
161 break |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
162 i = 0 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
163 while prediction+i-1 >= 0: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
164 neighbor = prediction+i-1 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
165 delta = float(vkMass[neighbor])-mass |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
166 if uncertainty >= abs(delta): |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
167 neighbors.append([vkMass[neighbor],vkFormula[neighbor],(float(vkMass[neighbor])-mass)]) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
168 i -= 1 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
169 else: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
170 break |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
171 neighbors = sorted(neighbors, key = (lambda delta: abs(delta[2]))) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
172 return neighbors |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
173 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
174 # write output file |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
175 def saveForcast(vkOutputList): |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
176 try: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
177 with open(vkOutput+'.tsv', 'w') as f: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
178 f.writelines(str("sample_id\tpolarity\tmz\tretention_time\tintensity\tpredictions\tdelta\tH:C\tO:C\tN:C") + '\n') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
179 for feature in vkOutputList: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
180 f.writelines(feature[0]+'\t'+feature[1]+'\t'+str(feature[2])+'\t'+str(feature[3])+'\t'+str(feature[4])+'\t'+str(feature[5])+'\t'+str(feature[6])+'\t'+str(feature[7])+'\t'+str(feature[8])+'\t'+str(feature[9])+'\t'+'\n') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
181 except ValueError: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
182 print('"%s" could not be saved.' % filename) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
183 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
184 def plotRatios(vkData): |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
185 max_rt = 0 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
186 min_intensity = 10.0**10 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
187 max_intensity = 0.0 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
188 max_hc = 0 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
189 max_oc = 0 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
190 max_nc = 0 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
191 for row in vkData: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
192 if row[3] > max_rt: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
193 max_rt = row[3] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
194 intensity = float(row[4]) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
195 if intensity < min_intensity: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
196 min_intensity = intensity |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
197 if intensity > max_intensity: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
198 max_intensity = intensity |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
199 if row[7] > max_hc: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
200 max_hc = row[7] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
201 if row[8] > max_oc: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
202 max_oc = row[8] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
203 if row[9] > max_nc: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
204 max_nc = row[9] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
205 labels = ['sampleID', 'polarity', 'mz', 'rt', 'intensity', 'predictions', 'delta', 'hc', 'oc', 'nc'] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
206 df = pd.DataFrame.from_records(vkData, columns=labels) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
207 sampleIDs = df.sampleID.unique() |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
208 data = [] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
209 menus = [] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
210 i = 0 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
211 for sampleID in sampleIDs: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
212 dfSample = df.loc[df['sampleID'] == sampleID] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
213 if vkSizeAlgo == 0: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
214 size = dfSample.intensity.apply(lambda x: vkSize) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
215 else: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
216 size = dfSample.intensity.apply(lambda x: vkSize+4*vkSize*float(x)/max_intensity) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
217 trace = go.Scatter( |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
218 x = dfSample.oc, |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
219 y = dfSample.hc, |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
220 text = dfSample.predictions.apply(lambda x: "Prediction: "+str(x[0][1])+"<br>mz: " +str(x[0][0])+"<br>Delta: "+str(x[0][2])), |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
221 line = dict(width = 0.5), |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
222 mode = 'markers', |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
223 marker = dict( |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
224 size = size, |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
225 color = dfSample.rt, |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
226 colorscale = 'Viridis', |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
227 cmin = 0, |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
228 cmax = max_rt, |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
229 colorbar=dict(title='Retention Time (s)'), |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
230 line = dict(width = 0.5), |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
231 showscale = True |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
232 ), |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
233 opacity = 0.8 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
234 ) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
235 data.append(trace) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
236 vision = [] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
237 j = 0 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
238 while j < len(sampleIDs): |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
239 if j != i: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
240 vision.append(False) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
241 else: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
242 vision.append(True) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
243 j += 1 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
244 menu = dict( |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
245 method = 'update', |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
246 label = sampleID, |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
247 args = [{'visible': vision}, {'title': sampleID}] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
248 ) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
249 menus.append(menu) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
250 i += 1 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
251 updatemenus = list([ |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
252 dict( |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
253 active = -1, |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
254 buttons = menus |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
255 ) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
256 ]) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
257 layout = go.Layout( |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
258 title = "Van Krevelen Diagram", |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
259 showlegend = False, |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
260 xaxis = dict( |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
261 title = 'Oxygen to Carbon Ratio', |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
262 zeroline = False, |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
263 gridcolor = 'rgb(183,183,183)', |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
264 showline = True, |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
265 range = [0, max_oc] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
266 ), |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
267 yaxis = dict( |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
268 title = 'Hydrogen to Carbon Ratio', |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
269 zeroline = False, |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
270 gridcolor = 'rgb(183,183,183)', |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
271 showline = True, |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
272 range = [0, max_hc] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
273 ), |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
274 margin = dict(r=0, b=100, l=100, t=100), |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
275 updatemenus = updatemenus |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
276 ) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
277 fig = go.Figure(data=data, layout=layout) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
278 py.plot(fig, auto_open=False, show_link=False, filename=vkOutput+'.html') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
279 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
280 def polaritySanitizer(sample_polarity): |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
281 if sample_polarity.lower() in {'positive','pos','+'}: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
282 sample_polarity = 'positive' |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
283 elif sample_polarity.lower() in {'negative', 'neg', '-'}: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
284 sample_polarity = 'negative' |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
285 else: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
286 print('A sample has an unknown polarity type: %s. Polarity in the XCMS sample metadata should be set to "negative" or "positive".' % sample_polarity) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
287 raise ValueError |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
288 return sample_polarity |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
289 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
290 # main |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
291 if vkInputType == "tsv": |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
292 vkInput = [] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
293 tsvFile = getattr(args, "input") |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
294 try: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
295 with open(tsvFile, 'r') as f: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
296 next(f) # skip hearder line |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
297 tsvData = csv.reader(f, delimiter='\t') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
298 for row in tsvData: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
299 vkInput.append([row[0],polaritySanitizer(row[1]),float(row[2]),float(row[3]),float(row[4]),[]]) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
300 except ValueError: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
301 print('The %s data file could not be read.' % tsvFile) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
302 vkData = forecaster(vkInput) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
303 saveForcast(vkData) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
304 plotRatios(vkData) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
305 elif vkInputType == "xcms": |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
306 vkInput = [] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
307 xcmsSampleMetadataFile = getattr(args, "sample_metadata") |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
308 try: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
309 polarity = {} |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
310 with open(xcmsSampleMetadataFile, 'r') as f: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
311 xcmsSampleMetadata = csv.reader(f, delimiter='\t') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
312 next(xcmsSampleMetadata, None) # skip header |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
313 for row in xcmsSampleMetadata: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
314 sample = row[0] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
315 sample_polarity = polaritySanitizer(row[2]) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
316 polarity[sample] = sample_polarity |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
317 except ValueError: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
318 print('The %s data file could not be read. Check that polarity is set to "negative" or "positive"' % xcmsSampleMetadataFile) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
319 xcmsVariableMetadataFile = getattr(args, "variable_metadata") |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
320 try: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
321 mz = {} |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
322 rt = {} |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
323 variable_index = {} |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
324 mz_index = int() |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
325 rt_index = int() |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
326 with open(xcmsVariableMetadataFile, 'r') as f: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
327 xcmsVariableMetadata = csv.reader(f, delimiter='\t') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
328 i = 0 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
329 for row in xcmsVariableMetadata: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
330 if i != 0: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
331 mz[row[0]] = float(row[mz_index]) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
332 rt[row[0]] = float(row[rt_index]) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
333 else: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
334 for column in row: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
335 variable_index[column] = i |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
336 i += 1 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
337 mz_index = variable_index["mz"] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
338 rt_index = variable_index["rt"] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
339 except ValueError: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
340 print('The %s data file could not be read.' % xcmsVariableMetadataFile) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
341 xcmsDataMatrixFile = getattr(args, "data_matrix") |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
342 try: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
343 with open(xcmsDataMatrixFile, 'r') as f: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
344 xcmsDataMatrix = csv.reader(f, delimiter='\t') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
345 first_row = True |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
346 for row in xcmsDataMatrix: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
347 if first_row: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
348 sample_id = row |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
349 first_row = False |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
350 else: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
351 i = 0 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
352 while(i < len(row)): |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
353 if i == 0: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
354 i+=1 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
355 else: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
356 intensity = row[i] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
357 if intensity not in {'NA', '#DIV/0!', '0'}: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
358 variable = row[0] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
359 sample = sample_id[i] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
360 # XCMS data may include empty columns |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
361 if sample != "": |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
362 vkInput.append([sample, polarity[sample], mz[variable], rt[variable], float(intensity), []]) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
363 i+=1 |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
364 except ValueError: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
365 print('The %s data file could not be read.' % xcmsDataMatrixFile) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
366 vkData = forecaster(vkInput) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
367 saveForcast(vkData) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
368 plotRatios(vkData) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
369 else: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
370 vkData = [] |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
371 tsvPlotvFile = getattr(args, "input") |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
372 try: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
373 with open(tsvPlotFile, 'r') as f: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
374 next(f) # skip header line |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
375 plotData = csv.reader(f, delimiter='\t') |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
376 for row in plotData: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
377 vkData.append([row[0],row[1],float(row[2]),float(row[3]),float(row[4]),list(row[4]),float(row[5]),float(row[6]),float(row[7]),float(row[8])]) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
378 except ValueError: |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
379 print('The %s data file could not be read.' % tsvFile) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
380 plotRatios(vkData) |
0b8ddf650752
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 7c299d22bdce251ce599cd34df76919d297a7007-dirty
eslerm
parents:
diff
changeset
|
381 |