comparison pathway_pipeline.xml @ 0:6077821e8ae7 draft

planemo upload for repository https://github.com/picrust/picrust2 commit 972784d909912af20cd213fc56830fee79d83ca6
author iuc
date Sat, 04 Mar 2023 20:27:27 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:6077821e8ae7
1 <tool id="picrust2_pathway_pipeline" name="PICRUSt2 Pathway abundance inference" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description></description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="bio_tool"/>
7 <expand macro="requirements"/>
8 <version_command>pathway_pipeline.py -v</version_command>
9 <command detect_errors="exit_code"><![CDATA[
10 @VAR_ACCESS_FOO@
11 #if $intermediate_check
12 mkdir intermediate &&
13 #end if
14
15 pathway_pipeline.py
16 --input '$input'
17 @PATHWAY_PIPELINE_PARAMS@
18 #if $intermediate_check
19 --intermediate 'intermediate/pathways/'
20 #end if
21 --out_dir 'pathways_out'
22 --processes "\${GALAXY_SLOTS:-1}"
23
24 ## gunzip all results and rename all tsv files to tabular to simplify discovery
25 #set find_paths="pathways_out"
26 #if $intermediate_check
27 #set find_paths+=" intermediate"
28 #end if
29 &&
30 find $find_paths -name "*.gz" -exec gunzip {} \;
31 &&
32 find $find_paths -name "*.tsv" -exec sh -c 'mv {} \$(dirname {})/\$(basename {} .tsv).tabular' \;
33 &&
34 ## otherwise the `;` on the last line is swallowed
35 true
36 ]]></command>
37 <inputs>
38 <param argument="--input" type="data" format="tabular" label="Input table with gene family abundances" help="Unstratified or stratified output of PICRUSt2 Metagenome prediction tool"/>
39 <expand macro="pathway_pipeline_params" mapargument="--map"/>
40 <param argument="--intermediate_check" type="boolean" truevalue="intermediate_check" falsevalue="" checked="false" label="Keep intermediate files" help="Intermediate output files will be deleted by default"/>
41 </inputs>
42 <outputs>
43 <expand macro="pathways_output" from_work_dir="./">
44 <token name="intermediate_filter">
45 <filter>intermediate_check</filter>
46 </token>
47 <token name="coverage_filter">
48 <filter>coverage</filter>
49 </token>
50 <token name="per_sequence_filter">
51 <filter>strat_output['per_sequence_contrib'] != ''</filter>
52 </token>
53 </expand>
54 </outputs>
55 <tests>
56 <test expect_num_outputs="1">
57 <param name="input" ftype="tabular" value="pred_metagenome_unstrat.tsv.gz"/>
58 <param name="skip_minpath" value="true"/>
59 <param name="no_gap_fill" value="true"/>
60 <param name="no_regroup" value=""/>
61 <conditional name="strat_output">
62 <param name="per_sequence_contrib" value=""/>
63 </conditional>
64 <param name="intermediate_check" value="false"/>
65 <output name="pathways_output" ftype="tabular">
66 <assert_contents>
67 <has_text text="pathway"/>
68 <has_n_lines n="205"/>
69 </assert_contents>
70 </output>
71 <assert_command>
72 <!-- assert that regrouping is chosen but without custom map-->
73 <has_text text="--no_regroup" negate="true"/>
74 <has_text text="--regroup_map" negate="true"/>
75 </assert_command>
76 </test>
77 <test expect_num_outputs="6">
78 <param name="input" ftype="tabular" value="pred_metagenome_unstrat.tsv.gz"/>
79 <param name="skip_minpath" value="true"/>
80 <param name="no_gap_fill" value="true"/>
81 <param name="no_regroup" value=""/>
82 <param name="intermediate_check" value="false"/>
83 <conditional name="strat_output">
84 <param name="per_sequence_contrib" value="--per_sequence_contrib"/>
85 <param name="wide_table" value="false"/>
86 <param name="per_sequence_abun" ftype="tabular" value="seqtab_norm.tsv.gz"/>
87 <param name="per_sequence_function" ftype="tabular" value="per_seq_func.tsv.gz"/>
88 </conditional>
89 <param name="coverage" value="true"/>
90 <param name="intermediate_check" value="true"/>
91 <output name="pathways_output" ftype="tabular">
92 <assert_contents>
93 <has_text text="pathway"/>
94 <has_n_lines n="205"/>
95 </assert_contents>
96 </output>
97 <output name="path_cov_unstrat" ftype="tabular">
98 <assert_contents>
99 <has_text text="100CHE6KO"/>
100 <has_n_lines n="92"/>
101 </assert_contents>
102 </output>
103 <output name="path_abun_unstrat_per_seq" ftype="tabular">
104 <assert_contents>
105 <has_text text="101CHE6WT"/>
106 <has_n_lines n="3"/>
107 </assert_contents>
108 </output>
109 <output name="path_abun_predictions" ftype="tabular">
110 <assert_contents>
111 <has_text text="PWY-5837"/>
112 <has_n_lines n="3"/>
113 </assert_contents>
114 </output>
115 <output name="path_abun_contrib" ftype="tabular">
116 <assert_contents>
117 <has_text text="taxon"/>
118 <has_n_lines n="7"/>
119 </assert_contents>
120 </output>
121 <output_collection name="pathways_intermediate_output" type="list" count="2">
122 <element name="parsed_mapfile" ftype="tabular">
123 <assert_contents>
124 <has_text text="METHYLENETHFDEHYDROG"/>
125 <has_n_lines n="575"/>
126 </assert_contents>
127 </element>
128 <element name="regrouped_infile" ftype="tabular">
129 <assert_contents>
130 <has_text text="100CHE6KO"/>
131 <has_n_lines n="1790"/>
132 </assert_contents>
133 </element>
134 </output_collection>
135 </test>
136 </tests>
137 <help><![CDATA[
138 @HELP_HEADER@
139
140 Infer pathway abundances
141 ========================
142 Infer the presence and abundances of pathways based on gene family abundances in a sample. By default, this script expects a table of E.C. number abundances (as output by PICRUSt2). However, alternative reaction to pathways mapping files can also be specified. By default, E.C. numbers are first regrouped to MetaCyc reactions, which are then linked to MetaCyc pathways through the default database.
143
144 Pathway abundances are calculated using the same approach as HUMAnN2 based on the abundances of gene families that can be linked to reactions within pathways (E.C. numbers regrouped to MetaCyc reactions be default). By default, pathways will first be identified as present or not with MinPath.
145
146 Either a structured or unstructured pathway mapfile can be input (the mapfile is structured by default), which will identify which set of pathways are likely present based on the presence of requisite gene families.
147
148 Note
149 ====
150 Stratified output will only be output if a stratified metagenome is input (or if --per_sequence_contrib is set). Please note that by default stratified abundances are based on how much predicted genomes (e.g. sequences) contribute to the community-wide abundance, not the abundance of the pathway based on the predicted genes in that genome alone. In other words, a predicted genome might be contributing greatly to the community-wide pathway abundance simply because one required gene for that pathway is at extremely high abundance in that genome even though no other required genes for that pathway are present. In contrast, the --per_sequence_contrib option should be used to get the predicted abundance and coverage of each pathway based on the predicted gene families within each genome. Note that using the --per_sequence_contrib option can greatly increase runtime.
151
152 Input
153 =====
154 Input tabular table of gene family abundances (either the unstratified or stratified output of metagenome_pipeline.py).
155
156 Output
157 ======
158 Pathway abundance.
159
160 ]]></help>
161 <citations>
162 <citation type="doi">10.1038/s41587-020-0548-6</citation>
163 </citations>
164 </tool>