changeset 2:a92a632c4d9b draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f1c6f4fe1e572ace84cf9106bc253603f55aac55"
author iuc
date Mon, 14 Jun 2021 12:48:10 +0000
parents b89b0765695d
children ff8f55893e7d
files formatoutput.py macros.xml metaphlan.xml test-data/no_taxon_input.fasta
diffstat 4 files changed, 174 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/formatoutput.py	Mon May 17 20:10:24 2021 +0000
+++ b/formatoutput.py	Mon Jun 14 12:48:10 2021 +0000
@@ -57,7 +57,9 @@
                 # skip headers
                 if line.startswith("#"):
                     continue
-
+                # skip UNKNOWN lines in Predicted taxon relative abundances
+                if "UNKNOWN" in line:
+                    continue
                 # spit lines
                 split_line = line[:-1].split('\t')
                 taxo_n = split_line[0].split('|')
--- a/macros.xml	Mon May 17 20:10:24 2021 +0000
+++ b/macros.xml	Mon Jun 14 12:48:10 2021 +0000
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <macros>
-    <token name="@TOOL_VERSION@">3.0.8</token>
+    <token name="@TOOL_VERSION@">3.0.9</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <token name="@PROFILE@">20.01</token>
     <xml name="edam_ontology">
@@ -21,7 +21,7 @@
     </xml>
     <xml name="citations">
         <citations>
-            <citation type="doi">1101/2020.11.19.388223</citation>
+            <citation type="doi">10.7554/eLife.65088</citation>
         </citations>
     </xml>
 </macros>
--- a/metaphlan.xml	Mon May 17 20:10:24 2021 +0000
+++ b/metaphlan.xml	Mon Jun 14 12:48:10 2021 +0000
@@ -195,8 +195,7 @@
     --metaphlan_output '$output_file'
     --outdir 'split_levels'
     $out.legacy_output
-&&
-ls split_levels
+
 #end if
 
 #if $out.krona_output
@@ -211,26 +210,26 @@
         <section name="inputs" title="Inputs" expanded="true">
             <conditional name="in">
                 <param name="selector" type="select" label="Input(s)">
-                    <option value="raw" selected="true">Fasta/FastQ file(s) with metagenomic reads</option>
+                    <option value="raw" selected="true">Fasta/FastQ file(s) with microbiota reads</option>
                     <option value="sam">Externally BowTie2-mapped SAM file</option>
-                    <option value="bowtie2out">Intermediary mapping file of the metagenome generated by a previous MetaPhlAn run</option>
+                    <option value="bowtie2out">Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run</option>
                 </param>
                 <when value="raw">
                     <conditional name="raw_in">
-                        <param name="selector" type="select" label="Fasta/FastQ file(s) with metagenomic reads">
+                        <param name="selector" type="select" label="Fasta/FastQ file(s) with microbiota reads">
                             <option value="single" selected="true">One single-end file</option>
                             <option value="multiple">Multiple single-end files</option>
                             <option value="paired">Paired-end files</option>
                         </param>
                         <when value="single">
-                            <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ file with metagenomic reads"/>
+                            <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ file with microbiota reads"/>
                         </when>
                         <when value="multiple">
-                            <param name="in" type="data" format="@FILE_FORMATS@" multiple="true" label="Single-end Fasta/FastQ files with metagenomic reads"/>
+                            <param name="in" type="data" format="@FILE_FORMATS@" multiple="true" label="Single-end Fasta/FastQ files with microbiota reads"/>
                         </when>
                         <when value="paired">
-                            <param name="in_f" type="data" format="@FILE_FORMATS@" label="Forward paired-end Fasta/FastQ file with metagenomic reads"/>
-                            <param name="in_r" type="data" format="@FILE_FORMATS@" label="Reverse paired-end Fasta/FastQ file with metagenomic reads"/>
+                            <param name="in_f" type="data" format="@FILE_FORMATS@" label="Forward paired-end Fasta/FastQ file with microbiota reads"/>
+                            <param name="in_r" type="data" format="@FILE_FORMATS@" label="Reverse paired-end Fasta/FastQ file with microbiota reads"/>
                         </when>
                     </conditional>
                     <param argument="--read_min_len" type="integer" value="70" label="Minimum length of the reads to be considered when parsing the input file"/>
@@ -245,10 +244,10 @@
                     </section>
                 </when>
                 <when value="sam">
-                    <param name="in" type="data" format="sam" label="Externally BowTie2-mapped SAM file" help="BowTie2 needs to be used first to map metagenom reads"/>
+                    <param name="in" type="data" format="sam" label="Externally BowTie2-mapped SAM file" help="BowTie2 needs to be used first to map microbiota reads"/>
                 </when>
                 <when value="bowtie2out">
-                    <param name="in" type="data" format="tabular" label="Intermediary mapping file of the metagenome generated by a previous MetaPhlAn run" 
+                    <param name="in" type="data" format="tabular" label="Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run" 
                         help="File needs to be generated with MetaPhlAn versions >3.0"/>
                 </when>
             </conditional>
@@ -273,12 +272,12 @@
         <section name="analysis" title="Analysis" expanded="true">
             <conditional name="analysis_type">
                 <param argument="-t" type="select" label="Type of analysis to perform">
-                    <option value="rel_ab" selected="true">rel_ab: Profiling a metagenomes in terms of relative abundances</option>
-                    <option value="rel_ab_w_read_stats">rel_ab_w_read_stats: Profiling a metagenomes in terms of relative abundances and estimate the number of reads comming from each clade</option>
+                    <option value="rel_ab" selected="true">rel_ab: Profiling a microbiota in terms of relative abundances</option>
+                    <option value="rel_ab_w_read_stats">rel_ab_w_read_stats: Profiling a microbiota in terms of relative abundances and estimate the number of reads comming from each clade</option>
                     <option value="reads_map">reads_map: Mapping from reads to clades (only reads hitting a marker)</option>
                     <option value="clade_profiles">clade_profiles: Normalized marker counts for clades with at least a non-null marker</option>
                     <option value="clade_specific_strain_tracker">clade_specific_strain_tracker: List of markers present for a specific clade and all its subclades</option>
-                    <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when > 0.0 and normalized by metagenome size if number of reads is specified)</option>
+                    <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when > 0.0 and normalized by microbiota size if number of reads is specified)</option>
                     <option value="marker_counts">marker_counts: Non-normalized marker counts (use with extreme caution)</option>
                     <option value="marker_pres_table">marker_pres_table: List of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th</option>
                 </param>
@@ -296,8 +295,8 @@
                     <param argument="--min_ab" type="float" optional="true" label="The minimum percentage abundance for the clade"/>
                 </when>
                 <when value="marker_ab_table">
-                    <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original metagenome" 
-                        help="It is used for normalizing the length-normalized counts with the metagenome size as well. No normalization applied if the value is not specified"/>
+                    <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original microbiota" 
+                        help="It is used for normalizing the length-normalized counts with the microbiota size as well. No normalization applied if the value is not specified"/>
                 </when>
                 <when value="marker_counts"/>
                 <when value="marker_pres_table">
@@ -369,6 +368,152 @@
                     <conditional name="raw_in">
                         <!-- Single GZ file -->
                         <param name="selector" value="single"/>
+                        <param name="in" value="no_taxon_input.fasta"/>
+                    </conditional>
+                    <param name="read_min_len" value="70"/>
+                    <section name="mapping">
+                        <param name="bt2_ps" value="sensitive"/>
+                        <param name="min_mapq_val" value="5"/>
+                    </section>
+                </conditional>
+                <conditional name="db">
+                    <!-- Cached db -->
+                    <param name="db_selector" value="cached"/>
+                    <param name="cached_db" value="test-db-20210409"/>
+                </conditional>
+            </section>
+            <section name="analysis">
+                <conditional name="analysis_type">
+                    <param name="t" value="rel_ab"/>
+                    <conditional name="tax_lev">
+                        <param name="tax_lev" value="a"/>
+                        <param name="split_levels" value="true"/>
+                    </conditional>
+                </conditional>
+                <param name="min_cu_len" value="2000"/>
+                <param name="organism_profiling" value="add_viruses"/>
+                <param name="stat" value="avg_g"/>
+                <param name="stat_q" value="0.2"/>
+                <param name="perc_nonzero" value="0.33"/>
+                <param name="avoid_disqm" value="true"/>
+            </section>
+            <section name="out">
+                <param name="sample_id_key" value="SampleID"/>
+                <param name="sample_id" value="Metaphlan_Analysis"/>
+                <param name="use_group_representative" value="false"/>
+                <param name="legacy_output" value="false"/>
+                <param name="CAMI_format_output" value="false"/>
+                <param name="unknown_estimation" value="false"/>
+                <param name="krona_output" value="true"/>
+            </section>
+            <output name="output_file" ftype="tabular">
+                <assert_contents>
+                    <has_text text="UNKNOWN"/>
+                </assert_contents>
+            </output>
+            <output name="bowtie2out" ftype="tabular">
+                <assert_contents>
+                    <not_has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
+                    <has_n_lines n="2"/>
+                </assert_contents>
+            </output>
+            <output name="sam_output_file" ftype="sam">
+                <assert_contents>
+                    <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
+                </assert_contents>
+            </output>
+            <output name="biom_output_file" ftype="biom1">
+                <assert_contents>
+                    <not_has_text text="k__Bacteria"/>
+                    <not_has_text text="p__Actinobacteria"/>
+                </assert_contents>
+            </output>
+            <output_collection name="levels" type="list" >
+                <element name="all" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="class"/>
+                        <has_n_columns n="17"/>
+                        <has_n_lines n="1"/>
+                    </assert_contents>
+                </element>
+                <element name="kingdom" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="kingdom_id"/>
+                        <has_n_columns n="3"/>
+                        <has_n_lines n="1"/>
+                    </assert_contents>
+                </element>
+                <element name="phylum" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="phylum_id"/>
+                        <not_has_text text="kingdom_id"/>
+                        <has_n_columns n="3"/>
+                        <has_n_lines n="1"/>
+                    </assert_contents>
+                </element>
+                <element name="class" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="class_id"/>
+                        <not_has_text text="phylum_id"/>
+                        <has_n_columns n="3"/>
+                        <has_n_lines n="1"/>
+                    </assert_contents>
+                </element>
+                <element name="order" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="order_id"/>
+                        <not_has_text text="class_id"/>
+                        <has_n_columns n="3"/>
+                        <has_n_lines n="1"/>
+                    </assert_contents>
+                </element>
+                <element name="family" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="family_id"/>
+                        <not_has_text text="order"/>
+                        <has_n_columns n="3"/>
+                        <has_n_lines n="1"/>
+                    </assert_contents>
+                </element>
+                <element name="genus" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="genus_id"/>
+                        <not_has_text text="family"/>
+                        <has_n_columns n="3"/>
+                        <has_n_lines n="1"/>
+                    </assert_contents>
+                </element>
+                <element name="species" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="species_id"/>
+                        <not_has_text text="genus"/>
+                        <has_n_columns n="3"/>
+                        <has_n_lines n="1"/>
+                    </assert_contents>
+                </element>
+                <element name="strains" ftype="tabular">
+                    <assert_contents>
+                        <has_text text="strains_id"/>
+                        <not_has_text text="species_id"/>
+                        <has_n_columns n="3"/>
+                        <has_n_lines n="1"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output name="krona_output_file" ftype="tabular">
+                <assert_contents>
+                    <not_has_text text="k__Bacteria"/>
+                    <has_n_lines n="0"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="6">
+            <section name="inputs">
+                <conditional name="in">
+                    <param name="selector" value="raw"/>
+                    <conditional name="raw_in">
+                        <!-- Single GZ file -->
+                        <param name="selector" value="single"/>
                         <param name="in" value="SRS014464-Anterior_nares.fasta.gz"/>
                     </conditional>
                     <param name="read_min_len" value="70"/>
@@ -916,13 +1061,13 @@
 - species-level resolution for bacteria, archaea, eukaryotes and viruses;
 - strain identification and tracking
 - orders of magnitude speedups compared to existing methods.
-- metagenomic strain-level population genomics
+- microbiota strain-level population genomics
 
 MetaPhlAn clade-abundance estimation
 ------------------------------------
 
 The basic usage of MetaPhlAn consists in the identification of the clades (from phyla to species and 
-strains in particular cases) present in the metagenome obtained from a microbiome sample and their 
+strains in particular cases) present in the microbiota obtained from a microbiome sample and their 
 relative abundance.
 
 Marker level analysis
@@ -944,7 +1089,7 @@
 
 - one or several sequence files in Fasta, FastQ (compressed or not)
 - a BowTie2 produced SAM file
-- an intermediary mapping file of the metagenome generated by a previous MetaPhlAn run
+- an intermediary mapping file of the microbiota generated by a previous MetaPhlAn run
 
 It also need the reference database, which can be locally installed or customized using the dedicated tools.
 
@@ -965,4 +1110,4 @@
 
     ]]></help>
     <expand macro="citations"/>
-</tool>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/no_taxon_input.fasta	Mon Jun 14 12:48:10 2021 +0000
@@ -0,0 +1,4 @@
+> seq1
+ATTAGGGATTTTAGGGGGGGAGATTTAGAGAGAGAGAGAGAGAAGAAGAGAAGAAGAAGAAGAAAAAGGGGGAAGAGAGA
+> seq2
+ATTAGGGATTTTAGGGGGGGAGATTTAGAGAGAGAGAGAGAGAAGAAGAGAAGAAGAAGAAGAAAAAGGGGGAAGAGAGA