# HG changeset patch # User eschen42 # Date 1509285905 14400 # Node ID 948bac69394713e149a8a8d1b4e887ae3da8f0ec planemo upload for repository https://github.com/HegemanLab/w4mjoinpn_galaxy_wrapper/tree/master commit cedf2e01903099ef5f1bbe624afe4c2845d6bf23 diff -r 000000000000 -r 948bac693947 LICENSE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE Sun Oct 29 10:05:05 2017 -0400 @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 Hegeman Lab + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff -r 000000000000 -r 948bac693947 README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Sun Oct 29 10:05:05 2017 -0400 @@ -0,0 +1,5 @@ +This tool joins two sets of post-XCMS post-CAMERA Workflow4Metabolomics datasets +(i.e., sampleMetadata, variableMetadata, dataMatrix), +one gathered in negative ionization-mode and the other in positive ionization-mode. + +Please see https://github.com/HegemanLab/w4mjoinpn_galaxy_wrapper for details. diff -r 000000000000 -r 948bac693947 README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Sun Oct 29 10:05:05 2017 -0400 @@ -0,0 +1,33 @@ +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1038289.svg)](https://doi.org/10.5281/zenodo.1038289) + +# w4mjoinpn_galaxy_wrapper + +This tool joins two sets of MS1 datasets for **exactly** the same set of samples, where one was gathered in positive +ionization-mode and the other in negative ionization-mode, for reasons set forth below. + +Workflow4Metabolomics (W4M, Giacomoni *et al.*, 2014, http://dx.doi.org/10.1093/bioinformatics/btu813; http://workflow4metabolomics.org; +https://github.com/workflow4metabolomics) provides a suite of Galaxy tools for processing and analyzing metabolomics data. + +W4M uses the XCMS package (Smith *et al.*, 2006 http://dx.doi.org/10.1021/ac051437y) to extract features and align +their retention times among multiple samples. + +After peak extraction and alignment, W4M uses the CAMERA package (Kuhl *et al.*, 2012, http://dx.doi.org/10.1021/ac202450g) +"to postprocess XCMS feature lists and to collect all features related to a compound into a compound spectrum." + +Both of these steps are done using data collected in a single ionization mode (i.e., only negative or only positive) +because it would not make sense to attempt to use CAMERA otherwise. + +However, multivariate analysis in general, and particularly the "False Discovery Rate" adjustment in hypothesis testing, +would both benefit from having all variables (features), negative and positive, combined for one analysis. It is also +cumbersome to be forced to do an analysis twice, once for each ionization mode. + +This tool will fail: + * when the samples are not listed in exactly the same order in the negative-mode dataMatrix and the positive-mode dataMatrix + * when the samples are not listed in exactly the same order in the negative-mode sampleMetadata and the positive-mode sampleMetadata + +Otherwise + * the two dataMatrix files are concatenated, and the names of features identified from positive ionization-mode data +are prefixed with "P"; negative, with "N". + * the two variableMetadata files are concatenated, and the names of features are prefixed in the same way. + * if sampleMetadata has a polarity column, its value is set to "posneg" in the output. + * Technically, the sampleMetadata file in the output is derived from the negative ionization-mode sampleMetadata. diff -r 000000000000 -r 948bac693947 w4mjoinpn.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/w4mjoinpn.sh Sun Oct 29 10:05:05 2017 -0400 @@ -0,0 +1,140 @@ +#!/bin/bash +# join positive and negative ionization-mode XCMS datasets for a common set of samples +# summary: +# - parse and validate arguments (or abort) +# - check that the same samples are present in the same order in both the positive and negative mode data matrices (or abort) + +# Parse arguments +# ref: https://stackoverflow.com/questions/192249/how-do-i-parse-command-line-arguments-in-bash/14203146#14203146 +POSITIONAL=() +while [[ $# -gt 0 ]]; do + key="$1" + case $key in + dmpos) + DMPOS="$2" + shift # past argument + shift # past value + ;; + dmneg) + DMNEG="$2" + shift # past argument + shift # past value + ;; + dmout) + DMOUT="$2" + shift # past argument + shift # past value + ;; + smpos) + SMPOS="$2" + shift # past argument + shift # past value + ;; + smneg) + SMNEG="$2" + shift # past argument + shift # past value + ;; + smout) + SMOUT="$2" + shift # past argument + shift # past value + ;; + vmpos) + VMPOS="$2" + shift # past argument + shift # past value + ;; + vmneg) + VMNEG="$2" + shift # past argument + shift # past value + ;; + vmout) + VMOUT="$2" + shift # past argument + shift # past value + ;; + *) # unknown option + POSITIONAL+=("$1") # save it in an array for later + shift # past argument + ;; + esac +done +set -- "${POSITIONAL[@]}" # restore positional parameters +if [[ -n $1 ]]; then + echo "unexpected argument $1" + echo "arguments supplied: $@" + exit 1 +fi + +# Validate that we got the expected args +set -- ${DMPOS} ${DMNEG} ${DMOUT} ${SMPOS} ${SMNEG} ${SMOUT} ${VMPOS} ${VMNEG} ${VMOUT} +if [[ ! -n $9 ]]; then + echo "expecting nine arguments" + echo "parsed arguments: $@" + exit 1 +fi + +# Show them what we got +echo "dataMatrix positive_mode ${DMPOS}" +echo "dataMatrix negative_mode ${DMNEG}" +echo "dataMatrix joined_modes ${DMOUT}" +echo "sampleMetadata positive_mode ${SMPOS}" +echo "sampleMetadata negative_mode ${SMNEG}" +echo "sampleMetadata joined_modes ${SMOUT}" +echo "variableMetadata positive_mode ${VMPOS}" +echo "variableMetadata negative_mode ${VMNEG}" +echo "variableMetadata joined_modes ${VMOUT}" + +# Check that sample names are the same, in the same order, for the dataMatrix in both datasets +if [ "$( head -n 1 ${DMPOS} )" != "$( head -n 1 ${DMNEG} )" ]; then echo sample names in dataMatrix files differ; exit 1; fi +# Check that sample names are the same, in the same order, for the sampleMetadata in both datasets +if [ "$( cut -f 1 ${SMPOS} )" != "$( cut -f 1 ${SMNEG} )" ]; then echo sample names in sampleMetadata files differ; exit 1; fi + +# Concatenate variableMetadata datasets to respective output file +cat <( head -n 1 ${VMNEG} ) <( sed -n -e '1 d; s/^/N/; p;' ${VMNEG} ) <( sed -n -e '1 d; s/^/P/; p;' ${VMPOS} ) > ${VMOUT} + +# Concatenate dataMatrix datasets to respective output file +cat <( head -n 1 ${DMNEG} ) <( sed -n -e '1 d; s/^/N/; p;' ${DMNEG} ) <( sed -n -e '1 d; s/^/P/; p;' ${DMPOS} ) > ${DMOUT} + +# Determine whether negative ionization-mode sampleMetadata file's column three is titled "polarity" + +# find the ordinal number of the first column named "polarity" of the negative ionization-mode sampleMetadata file, if any +set -- `head -n 1 ${SMNEG}` +POLARITY=0 +MAXCOUNT=0 +while [[ $# -gt 0 ]]; do + MAXCOUNT=$(( MAXCOUNT + 1 )) + key="$1" + case $key in + polarity) + if [ $POLARITY -eq 0 ]; then POLARITY=${MAXCOUNT}; fi + shift # past argument + ;; + *) # unknown option + shift # past argument + ;; + esac +done +echo "Polarity is in column $POLARITY of ${SMNEG}" +echo "There are $MAXCOUNT columns in ${SMNEG}" + +# Copy sampleMetadata from negative ionization-mode to output file, replacing polarity if possible +if [ ${POLARITY} -gt 1 ]; then + COLBEFORE=$(( POLARITY - 1 )) + COLAFTER=$(( POLARITY + 1 )) + # Replace all entries in column three of negative ionization-mode sampleMetadata file with "posneg" in respective output file + if [ ${POLARITY} -lt ${MAXCOUNT} ]; then + # Handle the case where polarity is not in the last column + paste <( cut -f 1-${COLBEFORE} ${SMNEG} ) <( cut -f ${POLARITY} ${SMNEG} | sed -n -e '2,$ s/.*/posneg/; p;' ) <( cut -f ${COLAFTER}- ${SMNEG} ) > ${SMOUT} + else + # Handle the case where polarity is in the last column + paste <( cut -f 1-${COLBEFORE} ${SMNEG} ) <( cut -f ${POLARITY} ${SMNEG} | sed -n -e '2,$ s/.*/posneg/; p;' ) > ${SMOUT} + fi +else + # Handle the case where polarity was not found: Copy negative ionization-mode sampleMetadata file to the respective output file + cp ${SMNEG} ${SMOUT} +fi + +exit 0 diff -r 000000000000 -r 948bac693947 w4mjoinpn.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/w4mjoinpn.xml Sun Oct 29 10:05:05 2017 -0400 @@ -0,0 +1,143 @@ + + Join positive and negative ionization-mode W4M datasets for the same samples + + coreutils + sed + + + + + &2 ; + which cut sed head paste cat cp bash test 1>&2 ; + $__tool_directory__/w4mjoinpn.sh + dmneg $dmneg + dmpos $dmpos + dmout $dmout + smneg $smneg + smpos $smpos + smout $smout + vmneg $vmneg + vmpos $vmpos + vmout $vmout + ]]> + + + + + + + + + + + + + + + + 10.5281/zenodo.1038289 + + + 10.1093/bioinformatics/btu813 + + 10.1021/ac202450g + + 10.1021/ac051437y + +