proteomics_moff: moff.xml comparison

comparison moff.xml @ 0:b4098353ee73 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/moFF commit bc0fad49e3ba73fa5b5b326e940adf9e11854d94

author	galaxyp
date	Fri, 05 Jan 2018 12:47:36 -0500
parents
children	8f0e76ad46ef

comparison

equal deleted inserted replaced

--1:000000000000
+:b4098353ee73
+<tool id="proteomics_moff" name="moFF" version="@VERSION@">
+<description>extracts MS1 intensities from spectrum files</description>
+<macros>
+<token name="@VERSION@">1.2</token>
+<!-- xml macros, used for shared Galaxy parameter inputs -->
+<xml name="ident_input_macro" token_allow_multiple="true" token_input_type="data">
+<!-- this is exactly the same across all three, except for allowing multiple in MBR and all but not in moff -->
+<conditional name="ident_input">
+<param name="input_type_selector" type="select" label="Choose the format for the identification file:">
+<option value="ps">Peptide Shaker PSM report (standard, not extended)</option>
+<option value="generic">Another tabular identification file</option>
+</param>
+<when value="ps">
+<param name="ident_input_file" type="@INPUT_TYPE@" format="tabular" label="Peptide Shaker PSM report" multiple="@ALLOW_MULTIPLE@"/>
+</when>
+<when value="generic">
+<param name="ident_input_file" type="@INPUT_TYPE@" format="tabular" label="A general tabular format" multiple="@ALLOW_MULTIPLE@"
+help="Must have specific columns; see below to select these columns from your file. The file should have at most one header line. "/>
+<param name="remove_header" type="boolean" value="false" label="Remove the header line?" help="This is necessary if the file has a line with column headers"/>
+<param name="peptide"
+type="data_column"
+data_ref="ident_input_file"
+label="Column with peptide-spectrum-match sequence"/>
+<param name="prot"
+type="data_column"
+data_ref="ident_input_file"
+label="Column with protein ID"/>
+<param name="mod_peptide"
+type="data_column"
+data_ref="ident_input_file"
+label="Column with peptide-spectrum-match sequence that contains possible modifications"/>
+<param name="rt"
+type="data_column"
+data_ref="ident_input_file"
+label="Column with PSM retention time (in second)"/>
+<param name="mz"
+type="data_column"
+data_ref="ident_input_file"
+label="Column with m/z (mass over charge)"/>
+<param name="mass"
+type="data_column"
+data_ref="ident_input_file"
+label="Column with mass of the peptide"/>
+<param name="charge"
+type="data_column"
+data_ref="ident_input_file"
+label="Column with charge of ionized peptide"/>
+</when>
+</conditional>
+</xml>
+<xml name="raw_input_macro" token_allow_multiple="true" token_input_type="data">
+<conditional name="msms_input">
+<param name="input_type_selector" type="select" label="Choose the format for the MS/MS file">
+<option value="raw">Thermo RAW file</option>
+<option value="mzml">mzML</option>
+</param>
+<when value="raw">
+<param argument="--inputraw" type="@INPUT_TYPE@" multiple="@ALLOW_MULTIPLE@" format="raw" label="RAW file(s)"/>
+</when>
+<when value="mzml">
+<param argument="--inputraw" type="@INPUT_TYPE@" multiple="@ALLOW_MULTIPLE@" format="mzml" label="mzML file(s)"/>
+</when>
+</conditional>
+</xml>
+<!-- tokens (code snippets used in <command>) -->
+<token name="@IDENT_INPUT_ARG_MULTIPLE@"><![CDATA[
+## this is where the ident input gets passed to moff/moff_all/moff_mbr
+--inputtsv
+#for $key in $task.ident_input.ident_input_file.keys():
+'./ident_inputs/${task.ident_input.ident_input_file[$key].display_name}'
+#end for
+]]></token>
+<token name="@IDENT_INPUT_ARG_SINGLE@"><![CDATA[
+## this is where the ident input gets passed to moff/moff_all/moff_mbr
+--inputtsv './ident_inputs/${task.ident_input.ident_input_file.display_name}'
+]]></token>
+<token name="@WRANGLE_IDENT_INPUT_SINGLE@"><![CDATA[
+mkdir ./ident_inputs &&
+#if $task.ident_input.input_type_selector == "ps":
+ln -s '$task.ident_input.ident_input_file' './ident_inputs/$task.ident_input.ident_input_file.display_name' &&
+#else
+## optionally remove first line
+#if $task.ident_input.remove_header:
+sed -i '1d' '$task.ident_input.ident_input_file' &&
+#end if
+## header row with correct names: "peptide", "prot", "mod_peptide", "rt", "mz", "mass", and "charge"
+echo -e "peptide\tprot\tmod_peptide\trt\tmz\tmass\tcharge" > tempfile.tab &&
+awk 'BEGIN{OFS="\t"; FS="\t"}{print \$pep,\$prot,\$mod,\$rt,\$mz,\$mass,\$charge}' pep="${task.ident_input.peptide}" prot="$task.ident_input.prot" mod="$task.ident_input.mod_peptide" rt="$task.ident_input.rt" mz="$task.ident_input.mz" mass="$task.ident_input.mass" charge="$task.ident_input.charge" '$task.ident_input.ident_input_file' >> tempfile.tab &&
+mv tempfile.tab '$task.ident_input.ident_input_file' &&
+ln -s '$task.ident_input.ident_input_file' './ident_inputs/$task.ident_input.ident_input_file.display_name' &&
+#end if
+]]></token>
+<token name="@WRANGLE_IDENT_INPUT_MULTIPLE@"><![CDATA[
+mkdir ./ident_inputs &&
+#if $task.ident_input.input_type_selector == "ps":
+#for $key in $task.ident_input.ident_input_file.keys():
+ln -s '${task.ident_input.ident_input_file[$key]}' './ident_inputs/${task.ident_input.ident_input_file[$key].display_name}' &&
+#end for
+#else
+#for $key in $task.ident_input.ident_input_file.keys():
+## optionally remove first line
+#if $task.ident_input.remove_header:
+sed -i '1d' '$task.ident_input.ident_input_file[$key]' &&
+#end if
+## header row with correct names: "peptide", "prot", "mod_peptide", "rt", "mz", "mass", and "charge"
+echo -e "peptide\tprot\tmod_peptide\trt\tmz\tmass\tcharge" > tempfile.tab &&
+awk 'BEGIN{OFS="\t"; FS="\t"}{print \$pep,\$prot,\$mod,\$rt,\$mz,\$mass,\$charge}' pep="${task.ident_input.peptide}" prot="$task.ident_input.prot" mod="$task.ident_input.mod_peptide" rt="$task.ident_input.rt" mz="$task.ident_input.mz" mass="$task.ident_input.mass" charge="$task.ident_input.charge" '$filename' >> tempfile.tab &&
+mv tempfile.tab '$task.ident_input.ident_input_file[$key]' &&
+ln -s '$task.ident_input.ident_input_file[$key]' './ident_inputs/$task.ident_input.ident_input_file[$key].display_name' &&
+#end for
+#end if
+]]></token>
+<token name="@RAW_INPUT_ARG_SINGLE@"><![CDATA[
+--inputraw './raws/$task.msms_input.inputraw.display_name'
+]]></token>
+<token name="@RAW_INPUT_ARG_MULTIPLE@"><![CDATA[
+--inputraw
+#for $key in $task.msms_input.inputraw.keys():
+'./raws/$task.msms_input.inputraw[$key].display_name'
+#end for
+]]></token>
+<token name="@WRANGLE_RAW_INPUT_SINGLE@"><![CDATA[
+mkdir ./raws &&
+## for files, need to softlink the display name to the history item
+ln -s '$task.msms_input.inputraw' './raws/$task.msms_input.inputraw.display_name' &&
+]]></token>
+<token name="@WRANGLE_RAW_INPUT_MULTIPLE@"><![CDATA[
+mkdir ./raws &&
+## for files, need to softlink the display name to the history item
+#for $key in $task.msms_input.inputraw.keys():
+ln -s '$task.msms_input.inputraw[$key]' './raws/$task.msms_input.inputraw[$key].display_name' &&
+#end for
+]]></token>
+</macros>
+<requirements>
+<requirement type="package" version="@VERSION@">moff</requirement>
+</requirements>
+<command detect_errors="aggressive"><![CDATA[
+mkdir ./out &&
+#if $task.task_selector == "moff":
+@WRANGLE_IDENT_INPUT_SINGLE@
+@WRANGLE_RAW_INPUT_SINGLE@
+moff.py
+@IDENT_INPUT_ARG_SINGLE@
+@RAW_INPUT_ARG_SINGLE@
+--tol $task.tol
+--rt_w $task.rt_w
+--rt_p $task.rt_p
+--output_folder ./out
+#if ($task.peptide_summary):
+--peptide_summary 1
+#end if
+&&
+#if $task.peptide_summary:
+mv ./out/peptide_summary_intensity_moFF_run.tab '$output_peptide_summary' &&
+#end if
+mv ./out/*moff_result.txt '$output_table'
+&&
+mv ./out/*.log '$output_logs'
+#else if $task.task_selector == "mbr":
+@WRANGLE_IDENT_INPUT_MULTIPLE@
+moff_mbr.py
+--inputF ./ident_inputs
+--ext $task.ext
+&&
+mv ./ident_inputs/mbr_output/* ./out
+#else:
+## moff_all (mbr followed by apex)
+@WRANGLE_IDENT_INPUT_MULTIPLE@
+@WRANGLE_RAW_INPUT_MULTIPLE@
+moff_all.py
+@IDENT_INPUT_ARG_MULTIPLE@
+@RAW_INPUT_ARG_MULTIPLE@
+--tol $task.tol
+--rt_w $task.rt_w
+--rt_p $task.rt_p
+--rt_p_match $task.rt_p_match
+--output_folder ./out
+--ext txt
+#if $task.peptide_summary:
+--peptide_summary 1
+#end if
+&&
+#if $task.peptide_summary:
+mv ./out/peptide_summary_intensity_moFF_run.tab '$output_peptide_summary' &&
+#end if
+echo -ne
+#end if
+]]></command>
+<inputs>
+<conditional name="task">
+<param name="task_selector" type="select" label="Choose which module to run">
+<option value="moff" selected="true">Apex intensity</option>
+<option value="mbr">Match between runs</option>
+<option value="all">All (match-between-runs followed by quantitation)</option>
+</param>
+<when value = "moff">
+<expand macro="ident_input_macro" allow_multiple="false"/>
+<expand macro="raw_input_macro" allow_multiple="false"/>
+<param argument="--tol" type="float" value="10" label="Tolerance parameter"
+help="Specify the tolerance parameter in ppm." />
+<param argument="--rt_w" type="float" value="3.0" label="Retention time window"
+help="Specify rt window for xic in minutes." />
+<param argument="--rt_p" type="float" value="1" label="Time window for the peak"
+help="Specify the time windows for the peak in minutes." />
+<param argument="--rt_p_match" type="float" value="1.5" label="Time window for the matched peak"
+help="Specify the time windows for the matched peak in minutes." />
+<param argument="--peptide_summary" type="boolean" value="true" label="Output the peptide summary?"/>
+</when>
+<when value="mbr">
+<expand macro="ident_input_macro" allow_multiple="false" input_type="data_collection"/>
+<param argument="--ext" type="text" value="tab" label="Provide the extension used in the display file name (without the period)"/>
+</when>
+<when value="all">
+<expand macro="ident_input_macro" allow_multiple="false" input_type="data_collection"/>
+<expand macro="raw_input_macro" allow_multiple="false" input_type="data_collection"/>
+<param argument="--tol" type="float" value="10" label="Tolerance parameter"
+help="Specify the tolerance parameter in ppm." />
+<param argument="--rt_w" type="float" value="3.0" label="Retention time window"
+help="Specify rt window for xic in minutes." />
+<param argument="--rt_p" type="float" value="1" label="Time window for the peak"
+help="Specify the time windows for the peak in minutes." />
+<param argument="--rt_p_match" type="float" value="1.2" label="Time window for the matched peak"
+help="Specify the time windows for the matched peak in minutes." />
+<param argument="--peptide_summary" type="boolean" value="true" label="Output the peptide summary?"/>
+</when>
+</conditional>
+</inputs>
+<outputs>
+<data format="tabular" name="output_table" label="${tool.name} quantification: ${on_string}">
+<filter>task['task_selector']=='moff'</filter>
+</data>
+<data format="txt" name="output_logs" label="${tool.name} log: ${on_string}">
+<filter>task['task_selector']=='moff'</filter>
+</data>
+<collection name="ident_output" type="list" label="${tool.name} quantification: ${on_string}">
+<filter>task['task_selector']=='all' or task['task_selector']=='mbr'</filter>
+<!--discover datasets method -->
+<discover_datasets pattern="(?P&lt;designation&gt;.*)\.txt" directory="out" format="tabular"/>
+</collection>
+<collection name="log_output" type="list" label="${tool.name} logs: ${on_string}">
+<filter>task['task_selector']=='all' or task['task_selector']=='mbr'</filter>
+<discover_datasets pattern="(?P&lt;designation&gt;.*)\.log" directory="out" format="txt"/>
+</collection>
+<data format="tabular" name="output_peptide_summary" label="${tool.name} peptide summary: ${on_string}">
+<filter>task['peptide_summary']</filter>
+</data>
+</outputs>
+<tests>
+<!-- test moff_all -->
+<test>
+<param name="task_selector" value="all"/>
+<param name="input_type_selector" value="ps"/>
+<param name="ident_input_file">
+<collection type="list">
+<element name="mbr_test1" value="input/mbr_test1.tabular"/>
+<element name="mbr_test2" value="input/mbr_test2.tabular"/>
+</collection>
+</param>
+<param name="inputraw">
+<collection type="list">
+<element name="mbr_test1" value="input/mbr_test1.mzml"/>
+<element name="mbr_test2" value="input/mbr_test2.mzml"/>
+</collection>
+</param>
+<param name="peptide_summary" value="true"/>
+<output name="output_peptide_summary" ftype="tabular">
+<assert_contents>
+<has_text text="sumIntensity_mbr_test1"/>
+<has_text text="sumIntensity_mbr_test2"/>
+</assert_contents>
+</output>
+<output_collection name="ident_output" type="list">
+<element name="mbr_test1_match_moff_result" value="output1/mbr_test1_match_moff_result.txt"/>
+<element name="mbr_test2_match_moff_result" value="output1/mbr_test2_match_moff_result.txt"/>
+</output_collection>
+<output_collection name="log_output" type="list">
+<element name="mbr_test1_match__moff">
+<assert_contents>
+<has_line line="peptide at line 200 -->  MZ: 783.4200 RT: 134.6997 matched (yes=1/no=0): 0"/>
+</assert_contents>
+</element>
+<element name="mbr_test2_match__moff">
+<assert_contents>
+<has_line line="peptide at line 132 -->  MZ: 767.8700 RT: 98.1975 matched (yes=1/no=0): 0"/>
+</assert_contents>
+</element>
+</output_collection>
+</test>
+<!-- test moff alone -->
+<test>
+<param name="task_selector" value="moff"/>
+<param name="input_type_selector" value="ps"/>
+<param name="ident_input_file" value="input/test.tabular" ftype="tabular"/>
+<param name="msms_input" value="mzml"/>
+<param name="inputraw" value="input/test.mzml" ftype="mzml"/>
+<param name="peptide_summary" value="true"/>
+<output name="output_peptide_summary" ftype="tabular" file="output2/moff_test_pepsum.tab"/>
+<output name="output_logs">
+<assert_contents>
+<has_line line="peptide at line 294 -->  MZ: 677.3300 RT: 60.6078"/>
+</assert_contents>
+</output>
+</test>
+<!-- test the generic input -->
+<test>
+<param name="task_selector" value="moff"/>
+<param name="input_type_selector" value="generic"/>
+<param name="ident_input_file" value="input/test.tabular" ftype="tabular"/>
+<param name="remove_header" value="true"/>
+<param name="msms_input" value="mzml"/>
+<param name="inputraw" value="input/test.mzml" ftype="mzml"/>
+<param name="peptide" value="3"/>
+<param name="prot" value="2"/>
+<param name="mod_peptide" value="7"/>
+<param name="rt" value="13"/>
+<param name="mz" value="14"/>
+<param name="mass" value="17"/>
+<param name="charge" value="15"/>
+<param name="peptide_summary" value="true"/>
+<output name="output_peptide_summary" ftype="tabular" file="output2/moff_test_pepsum.tab"/>
+<output name="output_logs">
+<assert_contents>
+<has_line line="peptide at line 294 -->  MZ: 677.3300 RT: 60.6078"/>
+</assert_contents>
+</output>
+</test>
+<!-- test mbr -->
+<test>
+<param name="task_selector" value="mbr"/>
+<param name="input_type_selector" value="ps"/>
+<param name="ident_input_file">
+<collection type="list">
+<element name="mbr_test1" value="input/mbr_test1.tabular"/>
+<element name="mbr_test2" value="input/mbr_test2.tabular"/>
+</collection>
+</param>
+<param name="ext" value="tabular"/>
+<output_collection name="ident_output" type="list" count="2">
+<element name="mbr_test1_match" file="input/mbr_output/mbr_test1_match.txt"/>
+<element name="mbr_test2_match" file="input/mbr_output/mbr_test2_match.txt"/>
+</output_collection>
+</test>
+</tests>
+<help>
+<![CDATA[
+**Description**
+moFF (a Modest Feature Finder) is an OS independent tool designed to extract
+apex MS1 intensity using a set of identified MS2 peptides.
+It currently uses a Go library to directly extract data from Thermo Raw spectrum files,
+eliminating the need for conversions from other formats.
+Moreover, moFF also allows one to work directly with mzML files.
+**Usage**
+*Modules:*
+1. Apex Intensity: this is used for a single pair of files, one identification and one spectrum file.
+2. Match between runs (MBR): for multiple identification files, share MS2 identified peptides between runs and predict the retention time.
+3. All (match between runs followed by apex intensity): this is used for more than one pair of identification and spectrum files.
+If both match between runs and apex intensity are desired, it is best to run them both at once (i.e., run the 'All' module).
+The MBR module is mainly useful for observing the intermediate steps of the algorithm - its outputs are not able to be used as inputs in moFF or in other tools.
+*Inputs:*
+- Identification file: this can either be a generic tabular file or the standard PSM report from PeptideShaker.
+If it is a generic tabular file, please select the columns corresponding to the required information.
+- MS/MS file: this can either be a Thermo raw file or an mzML file.
+A given pair of files must have the *exact* same display name, not including the extension;
+e.g. ``example1.tabular`` and ``example1.mzml``.
+If the display names are different, simply change them in the history menu.
+For multiple files (the MBR or All modules), the identification and spectrum files must be provided as dataset collections.
+This allows for usage of the output dataset collections in workflows.
+*Parameters:*
+All the parameters related to the the time windows (``rt_w``, ``rt_p``, ``rt_p_match``) are basically the
+half of the entire time windows where the apex peak is searched or the XIC is retrieved.
+For correct rt windows, we suggest you set the ``rt_p`` value equal to or slighly greater than the
+dynamic exclusion duration set in your machine. We suggest also to set the
+``rt_p_match`` always slightly bigger than tha values used for ``rt_p``.
+*Outputs:*
+When used in the single file mode ("Apex intensity" module), the outputs are 2 (or 3) files: a log file, a quantitation file,
+and (optionally) a peptide summary, with intensities aggregated across peptides. When used in the multiple file mode ("All"),
+the outputs are a dataset collection of log files (one per identification file), a dataset collection of quantification files, and (optionally) a peptide summary.
+If used with a generic tabular format, the only columns in the output file are the 7 columns selected while using moFF plus the columns that moFF adds. Other columns are discarded.
+**More Information**
+See the moFF Github site at https://github.com/compomics/moFF,
+and the publication at https://dx.doi.org/10.1038/nmeth.4075
+]]>
+</help>
+<citations>
+<citation type="doi">10.1038/nmeth.4075</citation>
+</citations>
+</tool>

Mercurial > repos > galaxyp > proteomics_moff

comparison moff.xml @ 0:b4098353ee73 draft