Mercurial > repos > eslerm > vkmz
diff vkmz.py @ 1:b02af8eb8e6e draft
planemo upload for repository https://github.com/HegemanLab/VKMZ commit 5e7a43415df3902b44b7623cb2c6ffb8845751ac
author | eslerm |
---|---|
date | Wed, 30 May 2018 13:17:32 -0400 |
parents | 0b8ddf650752 |
children | 04079c34452a |
line wrap: on
line diff
--- a/vkmz.py Wed May 02 18:31:06 2018 -0400 +++ b/vkmz.py Wed May 30 13:17:32 2018 -0400 @@ -33,11 +33,12 @@ inputSubparser.add_argument('--error', '-e', nargs='?', type=float, required=True, help='Mass error of mass spectrometer in PPM') inputSubparser.add_argument('--database', '-d', nargs='?', default='databases/bmrb-light.tsv', help='Select database.') inputSubparser.add_argument('--directory', nargs='?', default='', type=str, help='Define directory of tool.') + inputSubparser.add_argument('--polarity', '-p', choices=['positive','negative'], help='Force polarity mode. Ignore variables in input file.') inputSubparser.add_argument('--no-adjustment', '-na', action='store_true', help='Use flag to turn off polarity based mass adjustment. This flag should always be used if reprocessing data generated by VKMZ.') inputSubparser.add_argument('--multiprocessing', '-m', action='store_true', help='Use flag to turn on multiprocessing.') - inputSubparser.add_argument('--plottype', '-p', nargs='?', default='scatter-2d', choices=['scatter-2d', 'scatter-3d'], help='Select plot type.') - inputSubparser.add_argument('--size', '-s', nargs='?', default=5, type=int, help='Set size of of dots. size+2*log(size*peak/(highest_peak/lowest_peak') - inputSubparser.add_argument('--size-algorithm', '-a', nargs='?', default=0, type=int, choices=[0,1,2],help='Size algorithm selector. Algo 0: size, Algo 1: size+2*log(size*peak/(highest_peak/lowest_peak, Algo 2: size+2*size*peak/(highest_peak-lowest_peak)') + inputSubparser.add_argument('--plottype', '-t', nargs='?', default='scatter-2d', choices=['scatter-2d', 'scatter-3d'], help='Select plot type.') + inputSubparser.add_argument('--size', '-s', nargs='?', default=5, type=int, help='Set maxium size of plot symbols.') + inputSubparser.add_argument('--size-algorithm', '-a', nargs='?', default=0, type=int, choices=[0,1], help="Symbol size algorithm selector. Algorithm 0 sets all symbols to the maxium size. Algorithm 2 determines a features symbol size by it's log intensity.") args = parser.parse_args() vkInputType = getattr(args, "input-type") @@ -45,6 +46,8 @@ # read inputs, arguments and define globals vkError = getattr(args, "error") +vkPolarity = getattr(args, "polarity") + vkMultiprocessing = getattr(args, "multiprocessing") vkNoAdjustment = getattr(args, "no_adjustment") @@ -100,7 +103,7 @@ if prediction != -1: feature[2] = mass predictions = predictNeighbors(mass, uncertainty, prediction) - feature[5] = predictions + feature.append(predictions) # feature[5] predictionClosest = predictions[0] formula = predictionClosest[1] formulaList = re.findall('[A-Z][a-z]?|[0-9]+', formula) @@ -123,7 +126,7 @@ feature += [predictionClosestDelta, hc, oc, nc] return(feature) -# adjust observed mass to a neutral mass +# adjust charged mass to a neutral mass def adjust(mass, polarity): # value to adjust by proton = 1.007276 @@ -147,6 +150,22 @@ return predict(mass, uncertainty, mid+1, right) return -1 +def plotData(vkData): + if vkSizeAlgo == 0: + for row in vkData: + row.append(vkSize) + else: + max_intensity = 0.0 + for row in vkData: + intensity = row[4] + if intensity > max_intensity: + max_intensity = intensity + alpha = vkSize/math.log(max_intensity+1) + for row in vkData: + intensity = row[4] + row.append(alpha*math.log(intensity+1)) + return vkData + # find and sort known masses within error limit of observed mass def predictNeighbors(mass, uncertainty, prediction): i = 0 @@ -173,36 +192,29 @@ # write output file def saveForcast(vkOutputList): - try: + try: with open(vkOutput+'.tsv', 'w') as f: - f.writelines(str("sample_id\tpolarity\tmz\tretention_time\tintensity\tpredictions\tdelta\tH:C\tO:C\tN:C") + '\n') + f.writelines(str("sample_id\tpolarity\tmz\tretention_time\tintensity\tpredictions\tdelta\tH:C\tO:C\tN:C\tsymbol_size") + '\n') for feature in vkOutputList: - f.writelines(feature[0]+'\t'+feature[1]+'\t'+str(feature[2])+'\t'+str(feature[3])+'\t'+str(feature[4])+'\t'+str(feature[5])+'\t'+str(feature[6])+'\t'+str(feature[7])+'\t'+str(feature[8])+'\t'+str(feature[9])+'\t'+'\n') + f.writelines(feature[0]+'\t'+feature[1]+'\t'+str(feature[2])+'\t'+str(feature[3])+'\t'+str(feature[4])+'\t'+str(feature[5])+'\t'+str(feature[6])+'\t'+str(feature[7])+'\t'+str(feature[8])+'\t'+str(feature[9])+'\t'+str(feature[10])+'\t'+'\n') except ValueError: print('"%s" could not be saved.' % filename) def plotRatios(vkData): max_rt = 0 - min_intensity = 10.0**10 - max_intensity = 0.0 max_hc = 0 max_oc = 0 max_nc = 0 for row in vkData: if row[3] > max_rt: max_rt = row[3] - intensity = float(row[4]) - if intensity < min_intensity: - min_intensity = intensity - if intensity > max_intensity: - max_intensity = intensity if row[7] > max_hc: max_hc = row[7] if row[8] > max_oc: max_oc = row[8] if row[9] > max_nc: max_nc = row[9] - labels = ['sampleID', 'polarity', 'mz', 'rt', 'intensity', 'predictions', 'delta', 'hc', 'oc', 'nc'] + labels = ['sampleID', 'polarity', 'mz', 'rt', 'intensity', 'predictions', 'delta', 'hc', 'oc', 'nc', 'symbol_size'] df = pd.DataFrame.from_records(vkData, columns=labels) sampleIDs = df.sampleID.unique() data = [] @@ -210,10 +222,7 @@ i = 0 for sampleID in sampleIDs: dfSample = df.loc[df['sampleID'] == sampleID] - if vkSizeAlgo == 0: - size = dfSample.intensity.apply(lambda x: vkSize) - else: - size = dfSample.intensity.apply(lambda x: vkSize+4*vkSize*float(x)/max_intensity) + size = dfSample.symbol_size trace = go.Scatter( x = dfSample.oc, y = dfSample.hc, @@ -222,6 +231,7 @@ mode = 'markers', marker = dict( size = size, + sizemode = "area", color = dfSample.rt, colorscale = 'Viridis', cmin = 0, @@ -296,10 +306,11 @@ next(f) # skip hearder line tsvData = csv.reader(f, delimiter='\t') for row in tsvData: - vkInput.append([row[0],polaritySanitizer(row[1]),float(row[2]),float(row[3]),float(row[4]),[]]) + vkInput.append([row[0],polaritySanitizer(row[1]),float(row[2]),float(row[3]),float(row[4])]) except ValueError: print('The %s data file could not be read.' % tsvFile) vkData = forecaster(vkInput) + vkData = plotData(vkData) saveForcast(vkData) plotRatios(vkData) elif vkInputType == "xcms": @@ -312,8 +323,11 @@ next(xcmsSampleMetadata, None) # skip header for row in xcmsSampleMetadata: sample = row[0] - sample_polarity = polaritySanitizer(row[2]) - polarity[sample] = sample_polarity + if vkPolarity: + polarity[sample] = vkPolarity + else: + sample_polarity = polaritySanitizer(row[2]) + polarity[sample] = sample_polarity except ValueError: print('The %s data file could not be read. Check that polarity is set to "negative" or "positive"' % xcmsSampleMetadataFile) xcmsVariableMetadataFile = getattr(args, "variable_metadata") @@ -359,11 +373,12 @@ sample = sample_id[i] # XCMS data may include empty columns if sample != "": - vkInput.append([sample, polarity[sample], mz[variable], rt[variable], float(intensity), []]) + vkInput.append([sample, polarity[sample], mz[variable], rt[variable], float(intensity)]) i+=1 except ValueError: print('The %s data file could not be read.' % xcmsDataMatrixFile) vkData = forecaster(vkInput) + vkData = plotData(vkData) saveForcast(vkData) plotRatios(vkData) else: