# HG changeset patch # User linda.bakker@wur.nl # Date 1426867804 -3600 # Node ID 346ff9ad8c7abe05934b6c5ea2fe4236227b5ab7 # Parent 5a753524e525afbeaedadf618d654b5baf454ef3 fix for rankfilter, removed pfd read functional diff -r 5a753524e525 -r 346ff9ad8c7a GCMS/combine_output.py --- a/GCMS/combine_output.py Thu Mar 19 15:04:56 2015 +0100 +++ b/GCMS/combine_output.py Fri Mar 20 17:10:04 2015 +0100 @@ -81,7 +81,6 @@ # The ID in the RankFilter output contains the following 5 fields: rf_id = rankfilter['ID'].split('-') try: - name, formula = _remove_formula(rankfilter['Name']) hit = [rf_id[0], # Centrotype rf_id[1], # cent.Factor rf_id[2], # scan nr @@ -89,8 +88,8 @@ rf_id[4], # nr. Peaks # Appending other fields rankfilter['R.T.'], - name, - caslookup['FORMULA'] if not formula else formula, + rankfilter['Name'], + rankfilter['Formula'], rankfilter['Library'].strip(), rankfilter['CAS'].strip(), rankfilter['Forward'], @@ -120,18 +119,7 @@ return hit -def _remove_formula(name): - ''' - The RankFilter Name field often contains the Formula as well, this function removes it from the Name - @param name: complete name of the compound from the RankFilter output - ''' - name = name.split() - poss_formula = name[-1] - match = re.match("^(([A-Z][a-z]{0,2})(\d*))+$", poss_formula) - if match: - return ' '.join(name[:-1]), poss_formula - else: - return ' '.join(name), False + def _get_default_caslookup(): diff -r 5a753524e525 -r 346ff9ad8c7a GCMS/combine_output.xml --- a/GCMS/combine_output.xml Thu Mar 19 15:04:56 2015 +0100 +++ b/GCMS/combine_output.xml Fri Mar 20 17:10:04 2015 +0100 @@ -17,6 +17,10 @@ Performs a combination of output files from the 'RankFilter' and 'Lookup RI for CAS' tools into two tab-separated files. +Merges data from both input dictionaries based on the Centrotype field. +In the 'RIQC-RankFilter output' the centrotype is found in the 'ID' field (first part before the "-"). In the 'RIQC-Lookup RI for CAS output' +the centrotype is found in the 'Centrotype' field. + The files produced are contain either all hits for a compound on a single line (Single) or on separate lines (Multi). diff -r 5a753524e525 -r 346ff9ad8c7a metaMS/README.txt --- a/metaMS/README.txt Thu Mar 19 15:04:56 2015 +0100 +++ b/metaMS/README.txt Fri Mar 20 17:10:04 2015 +0100 @@ -1,70 +1,74 @@ Wrappers for: -- the metaMS R package by Ron Wehrens. -- the xcms package. +- the metaMS R package by Ron Wehrens (https://github.com/rwehrens/metaMS.git) +- the xcms package (https://xcmsonline.scripps.edu/, http://www.bioconductor.org/packages/release/bioc/html/xcms.html) +- the CAMERA tool ( http://www.bioconductor.org/packages/release/bioc/html/CAMERA.html) Wrappers written by Pieter Lukasse. -Installation (when updating: close all vignettes [i.e. pdfs/manuals] !) +=======Installation (when updating: close all vignettes [i.e. pdfs/manuals] !)============ -In R: +In R execute: source("http://bioconductor.org/biocLite.R") biocLite("metaMS") biocLite("multtest") -#biocLite("R2HTML") + # for "multi-threading" (actually starts multiple R processes for parallel processing): install.packages("snow") install.packages("Cairo") +>> Running the wrappers: go to directory and execute: + ======Run metaMS_cmd_pick_and_group.r with:================= -E:\workspace\PRIMS-metabolomics\python-tools\tools\metaMS>Rscript metaMS_cmd_pick_and_group.r test/extdata.zip test/example_settings.txt test/out/peakTable.txt test/out/xsAnnotatePrep.rdata positive test/out/html_peaks.html test/out +Rscript metaMS_cmd_pick_and_group.r test/extdata.zip test/example_settings.txt test/out/peakTable.txt test/out/xsAnnotatePrep.rdata positive test/out/html_peaks.html test/out ======Run metaMS_cmd_annotate.r with:================= -E:\workspace\PRIMS-metabolomics\python-tools\tools\metaMS>Rscript metaMS_cmd_annotate.r test/LCDBtest.RData test/out/xsAnnotatePrep.rdata test/example_settings.txt test/out/annotationTable.txt "0" test/out/html_annot.html test/out +Rscript metaMS_cmd_annotate.r test/LCDBtest.RData test/out/xsAnnotatePrep.rdata test/example_settings.txt test/out/annotationTable.txt "0" test/out/html_annot.html test/out ======Run xcms_differential_analysis.r with:================= -E:\workspace\PRIMS-metabolomics\python-tools\tools\metaMS>Rscript xcms_differential_analysis.r test/out/xsAnnotatePrep.rdata "CLASS1" "CLASS2" 10 test/out2/outtable.tsv test/out2/html/html.html test/out2/html +Rscript xcms_differential_analysis.r test/out/xsAnnotatePrep.rdata "CLASS1" "CLASS2" 10 test/out2/outtable.tsv test/out2/html/html.html test/out2/html ======Run xcms_get_eic.r with:================= -E:\workspace\PRIMS-metabolomics\python-tools\tools\metaMS>Rscript xcms_get_alignment_eic.r test/out/xsAnnotatePrep.rdata 10 300 3 STDmix_GC_01,STDmix_GC_02 test/out3/html/html.html test/out3/html +Rscript xcms_get_alignment_eic.r test/out/xsAnnotatePrep.rdata 10 300 3 STDmix_GC_01,STDmix_GC_02 test/out3/html/html.html test/out3/html OR -E:\workspace\PRIMS-metabolomics\python-tools\tools\metaMS>Rscript xcms_get_mass_eic.r test/out/xsAnnotatePrep.rdata 10 3000 -1 -1 "77.98,231.96" 5 STDmix_GC_01,STDmix_GC_02 Yes raw test/out4/html/html.html test/out4/html +Rscript xcms_get_mass_eic.r test/out/xsAnnotatePrep.rdata 10 3000 -1 -1 "77.98,231.96" 5 STDmix_GC_01,STDmix_GC_02 Yes raw test/out4/html/html.html test/out4/html !!!!!!!!!!!!!!!!Troubleshooting:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -NetCDF is required. If the following is found in the installation.log +(1) NetCDF is required. If the following is found in the installation.log : -In file included from rnetCDF.c:2:0: -rnetCDF.h:1:20: fatal error: netcdf.h: No such file or directory -compilation terminated. + In file included from rnetCDF.c:2:0: + rnetCDF.h:1:20: fatal error: netcdf.h: No such file or directory + compilation terminated. then metaMS will not have been installed and running the tool will result in error: - + + Possible solution: -> Install the -dev of those packages to get the headers that are - required to compile the package. In this case, you need libnetcdf-dev, udunits-bin and libudunits2-dev - (from http://stackoverflow.com/questions/11319698/how-to-install-r-packages-rnetcdf-and-ncdf-on-ubuntu) + > Install the -dev of those packages to get the headers that are + required to compile the package. In this case, you need libnetcdf-dev, udunits-bin and libudunits2-dev + (from http://stackoverflow.com/questions/11319698/how-to-install-r-packages-rnetcdf-and-ncdf-on-ubuntu) So ->>sudo apt-get install libnetcdf-dev, udunits-bin and libudunits2-dev + >>sudo apt-get install libnetcdf-dev, udunits-bin and libudunits2-dev -Cairo / no X11 mode is required. +(2) Cairo / "no X11" (headless) mode is required. Possible solution: -> install of cairo (http://www.cairographics.org/) and/or set CAIRO_CFLAGS/LIBS correspondingly. + > install of cairo (http://www.cairographics.org/) and/or set CAIRO_CFLAGS/LIBS correspondingly. So ->>sudo apt-get install libcairo2-dev + >>sudo apt-get install libcairo2-dev diff -r 5a753524e525 -r 346ff9ad8c7a rankfilter_GCMS/rankfilter.py --- a/rankfilter_GCMS/rankfilter.py Thu Mar 19 15:04:56 2015 +0100 +++ b/rankfilter_GCMS/rankfilter.py Fri Mar 20 17:10:04 2015 +0100 @@ -25,9 +25,8 @@ from numpy import array, linalg, ones from numpy.polynomial import polynomial import math -import pdfread import sys - +import csv def calibrate(standards): ''' @@ -269,6 +268,19 @@ i = i + 1 +def read_tabular(in_csv): + ''' + Parses a tab-separated file returning a dictionary with named columns + @param in_csv: input filename to be parsed + ''' + data = list(csv.reader(open(in_csv, 'rU'), delimiter='\t')) + header = data.pop(0) + # Create dictionary with column name as key + output = {} + for index in xrange(len(header)): + output[header[index]] = [row[index] for row in data] + return output + #---------End-------------- def main(): #Ranking and filtering procedure @@ -381,18 +393,15 @@ if InputData['analysis_type'] == 'NIST': #HitList_missed - a variable of type dictionary containing the hits with the symbol ";" #in the name - if not NDIS_is_tabular: - print "Warning; NDIS is not tabular format, reading PDF..\n" - [HitList, HitList_missed] = pdfread.getPDF(InputData['sample']) - else: - HitList = pdfread.read_tabular(InputData['sample']) + # HITLIST = the NIST results file given here as input: + HitList = read_tabular(InputData['sample']) #Convert RT to RI if InputData['model'] == 'linear': HitList = convert_rt(HitList, coeff) if InputData['model'] == 'poly': - print "Executing convert_rt_poly().." - HitList = convert_rt_poly(HitList, poly_cal) + print "Executing convert_rt_poly().." + HitList = convert_rt_poly(HitList, poly_cal) #------Read the library data with the predicted RI------ try: @@ -415,16 +424,11 @@ #------Print the ranked and filtered hits------ #Specify which data to be printed if InputData['analysis_type'] == 'AMDIS': - keys_to_print = ['R.T.', 'CAS', 'Name', 'Rank', 'RIexp', 'RIsvr', '%rel.err', 'Weighted', 'Reverse', 'Synonyms'] + keys_to_print = ['R.T.', 'CAS', 'Name', 'Formula','Rank', 'RIexp', 'RIsvr', '%rel.err', 'Weighted', 'Reverse', 'Synonyms'] else: - keys_to_print = ['ID', 'R.T.', 'Name', 'CAS', 'Rank', 'RIexp', 'RIsvr', '%rel.err', 'Forward', 'Reverse', 'Synonyms', 'Library'] + keys_to_print = ['ID', 'R.T.', 'Name', 'Formula', 'CAS', 'Rank', 'RIexp', 'RIsvr', '%rel.err', 'Forward', 'Reverse', 'Synonyms', 'Library'] - #skip this error output from reading a pdftotext file when file is tabular - if InputData['analysis_type'] == 'NIST' and not NDIS_is_tabular: - out_missed_pdf = open(output_files['missed_parse_pdf'], 'w') - for x, y in zip(HitList_missed['Missed Compounds'], HitList_missed['RT missed Compounds']): - out_missed_pdf.write('%s\n' % '\t'.join([y, x])) - out_missed_pdf.close() + print_to_file(HitList, output_files, keys_to_print, print_subsets)