Mercurial > repos > iuc > krakentools_extract_kraken_reads

diff extract_kraken_reads.xml @ 0:519e0835abd7 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/krakentools commit 92c58a65005708fbd56b27b86969c01a1258ddb9
author: iuc
date: Wed, 15 Mar 2023 18:18:27 +0000
children: f329328da134
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_kraken_reads.xml	Wed Mar 15 18:18:27 2023 +0000
@@ -0,0 +1,251 @@
+<tool id="krakentools_extract_kraken_reads" name="Krakentools: Extract Kraken Reads By ID" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>
+        Extract reads that were classified by the Kraken family at specified taxonomic IDs
+    </description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements">
+        <requirement type="package" version="1.12">gzip</requirement>
+    </expand>
+    <command detect_errors="exit_code">
+        <![CDATA[
+
+        ##set input
+        #if $library.type == 'paired':
+            #set input_1 = $library.input_1
+            #set input_2 = $library.input_2
+        #else if $library.type == 'paired_collection'
+            #set input_1 = $library.input_1.forward
+            #set input_2 = $library.input_1.reverse
+        #else
+            #set input_1 = $library.input_1
+        #end if
+
+        ## name output according to --fastq_output param
+        ## tests fails if file does not have correct ending
+        #if str($fastq_output) == '':
+            #set temp_output_1 = 'output_1.fasta'
+            #set temp_output_2 = 'output_2.fasta'
+        #else:
+            #set temp_output_1 = 'output_1.fastq'
+            #set temp_output_2 = 'output_2.fastq'
+        #end if
+
+        ## do not quote $taxid
+        extract_kraken_reads.py
+        -k '$results'
+        -s '$input_1'
+        -o '$temp_output_1'
+        --taxid $taxid
+        --max '$max'
+        $include_parents
+        $include_children
+        $exclude
+        $fastq_output
+
+        #if str( $library.type ) != "single":
+        -s2 '$input_2'
+        -o2 '$temp_output_2'
+        #end if
+        #if $include_parents or $include_children:
+        --report '$report'
+        #end if
+
+        ##compress output
+        && gzip -cvf '$temp_output_1' > output_1.gz
+
+        #if str( $library.type ) != "single":
+        && gzip -cvf '$temp_output_2' > output_2.gz
+        #end if
+
+        ]]>
+    </command>
+    <inputs>
+        <!-- Reads -->
+        <conditional name="library">
+            <param name="type" type="select" label="Single or paired reads?">
+                <option value="single">
+                    Single
+                </option>
+                <option value="paired">
+                    Paired
+                </option>
+                <option value="paired_collection">
+                    Paired Collection
+                </option>
+            </param>
+            <when value="single">
+                <param name="input_1" format="fastq,fasta" type="data" label="FASTQ/A file" help="FASTQ or FASTQ input reads (may be gzipped)" />
+            </when>
+            <when value="paired">
+                <param name="input_1" format="fastq,fasta" type="data" label="FASTQ/A forward file" help="FASTQ or FASTQ input reads (may be gzipped)" />
+                <param name="input_2" format="fastq,fasta" type="data" label="FASTQ/A reverse file" help="FASTQ or FASTQ input reads (for paired reads, may be gzipped)" />
+            </when>
+            <when value="paired_collection">
+                <param name="input_1" format="fastq,fasta" type="data_collection" collection_type="paired" label="Paired Collection" help="FASTQ or FASTA read pair collection (may be gzipped)" />
+            </when>
+        </conditional>
+        <param name="results" argument="-k" format="tabular" type="data" label="Results" help="Results (classification) file from Kraken/KrakenUniq/Kraken2" />
+        <param argument="--report" format="tabular" type="data" label="Report" optional="true" help="Report file from Kraken/KrakenUniq/Kraken2" />
+        <param  argument="--taxid" type="text" value="" label="Taxonomic ID(s) to match" help="Space-delimited list of taxonomic IDs for which to extract matching reads">
+            <validator type="regex" message="Enter a space-separated list of numeric tax IDs">^\d+[\d ]*$</validator>
+        </param>
+        <param argument="--max" type="integer" value="100000000" min="1" label="Maximum reads to save" help="Maximum number of reads to save for each ID" />
+        <param argument="--exclude" type="boolean" value="False" truevalue="--exclude" falsevalue="" label="Invert output" help="Instead of finding reads that match given taxonomic IDs, find all reads that DO NOT match given IDs" />
+        <param argument="--fastq_output" type="boolean" value="False" truevalue="--fastq-output" falsevalue="" label="Output as FASTQ" help="Write output as FASTQ instead of the default FASTA" />
+        <param argument="--include_parents" type="boolean" value="False" truevalue="--include-parents" falsevalue="" label="Include parents" help="Include reads classified at parent levels of the specified tax IDs" />
+        <param argument="--include_children" type="boolean" value="False" truevalue="--include-children" falsevalue="" label="Include children" help="Include reads classified more specifically than the specified tax IDs" />
+    </inputs>
+    <outputs>
+        <data name="output_1" format="fasta.gz" from_work_dir="output_1.gz" metadata_source="input_1" label="${tool.name} on ${on_string}: forward reads">
+            <filter>
+                (library['type'] == 'single' or library['type'] == 'paired')
+            </filter>
+            <change_format>
+                <when input="fastq_output" value="--fastq-output" format="fastq.gz" />
+            </change_format>
+        </data>
+        <data name="output_2" format="fasta.gz" from_work_dir="output_2.gz" metadata_source="input_2" label="${tool.name} on ${on_string}: reverse reads">
+            <filter>
+                (library['type'] == 'paired')
+            </filter>
+            <change_format>
+                <when input="fastq_output" value="--fastq-output" format="fastq.gz" />
+            </change_format>
+        </data>
+        <collection type="paired" name="output_collection" label="${tool.name} on ${on_string}: collection">
+            <filter>(library['type'] == 'paired_collection')</filter>
+            <data name="forward" format="fasta.gz" metadata_source="input_1" from_work_dir="output_1.gz" >
+            <change_format>
+                <when input="fastq_output" value="--fastq-output" format="fastq.gz" />
+            </change_format>
+            </data>
+            <data name="reverse" format="fasta.gz" metadata_source="input_2" from_work_dir="output_2.gz" >
+            <change_format>
+                <when input="fastq_output" value="--fastq-output" format="fastq.gz" />
+            </change_format>
+            </data>
+        </collection>
+    </outputs>
+    <tests>
+        <!-- test Kraken2 input, single input -->
+        <test expect_num_outputs="1">
+            <param name="input_1" value="extract_kraken_reads/R1.fq.gz" ftype="fastqsanger" />
+            <param name="library|type" value="single" />
+            <param name="results" value="extract_kraken_reads/kraken2.results" ftype="tabular" />
+            <param name="taxid" value="11176" />
+            <output name="output_1" file="extract_kraken_reads/out1.k2.11176.fa" decompress="true" ftype="fasta.gz"  />
+        </test>
+        <!-- test paired input -->
+        <test expect_num_outputs="2">
+            <param name="input_1" value="extract_kraken_reads/R1.fq.gz" ftype="fastqsanger" />
+            <param name="input_2" value="extract_kraken_reads/R2.fq.gz" ftype="fastqsanger" />
+            <param name="library|type" value="paired" />
+            <param name="results" value="extract_kraken_reads/kraken2.results" ftype="tabular" />
+            <param name="taxid" value="11176" />
+            <output name="output_1" file="extract_kraken_reads/out1.k2.11176.fa" decompress="true" ftype="fasta.gz" />
+            <output name="output_2" file="extract_kraken_reads/out2.k2.11176.fa" decompress="true" ftype="fasta.gz" />
+        </test>
+        <!-- test paired collection input -->
+        <test expect_num_outputs="3">
+            <param name="input_1">
+                <collection type="paired">
+                    <element name="forward" value="extract_kraken_reads/R1.fq.gz" ftype="fastqsanger" />
+                    <element name="reverse" value="extract_kraken_reads/R2.fq.gz" ftype="fastqsanger" />
+                </collection>
+            </param>
+            <param name="library|type" value="paired_collection" />
+            <param name="results" value="extract_kraken_reads/kraken2.results" ftype="tabular" />
+            <param name="taxid" value="11176" />
+            <output_collection name="output_collection" type="paired">
+                <element name="forward" file="extract_kraken_reads/out1.k2.11176.fa" decompress="true" ftype="fasta.gz" />
+                <element name="reverse" file="extract_kraken_reads/out2.k2.11176.fa" decompress="true" ftype="fasta.gz" />
+            </output_collection>
+        </test>
+        <!-- test Kraken1 input, include children -->
+        <test expect_num_outputs="1">
+            <param name="input_1" value="extract_kraken_reads/R1.fq.gz" ftype="fastqsanger" />
+            <param name="library|type" value="single" />
+            <param name="results" value="extract_kraken_reads/kraken1.results" ftype="tabular" />
+            <param name="report" value="extract_kraken_reads/kraken1.report" ftype="tabular" />
+            <param name="taxid" value="11176" />
+            <param name="include_children" value="True" />
+            <output name="output_1" file="extract_kraken_reads/out1.k1.11176.children.fa" decompress="true" ftype="fasta.gz" />
+        </test>
+        <!-- test exclude -->
+        <test expect_num_outputs="1">
+            <param name="input_1" value="extract_kraken_reads/R1.fq.gz" ftype="fastqsanger" />
+            <param name="library|type" value="single" />
+            <param name="results" value="extract_kraken_reads/kraken1.results" ftype="tabular" />
+            <param name="report" value="extract_kraken_reads/kraken1.report" ftype="tabular" />
+            <param name="taxid" value="10386" />
+            <param name="include_children" value="True" />
+            <param name="exclude" value="True" />
+            <output name="output_1" file="extract_kraken_reads/out1.k1.e10386.children.fa" decompress="true" ftype="fasta.gz" />
+        </test>
+        <!-- test max -->
+        <test expect_num_outputs="1">
+            <param name="input_1" value="extract_kraken_reads/R1.fq.gz" ftype="fastqsanger" />
+            <param name="library|type" value="single" />
+            <param name="results" value="extract_kraken_reads/kraken2.results" ftype="tabular" />
+            <param name="taxid" value="11176" />
+            <param name="max" value="2" />
+            <output name="output_1" file="extract_kraken_reads/out1.k2.11176.max2.fa" decompress="true" ftype="fasta.gz" />
+        </test>
+        <!-- test include parents -->
+        <test expect_num_outputs="1">
+            <param name="input_1" value="extract_kraken_reads/R1.fq.gz" ftype="fastqsanger" />
+            <param name="library|type" value="single" />
+            <param name="results" value="extract_kraken_reads/kraken2.results" ftype="tabular" />
+            <param name="taxid" value="11176" />
+            <param name="include_parents" value="True" />
+            <param name="report" value="extract_kraken_reads/kraken2.report" ftype="tabular" />
+            <output name="output_1" file="extract_kraken_reads/out1.k2.11176.parents.fa" decompress="true" ftype="fasta.gz" />
+        </test>
+        <!-- test multiple tax IDs -->
+        <test expect_num_outputs="1">
+            <param name="input_1" value="extract_kraken_reads/R1.fq.gz" ftype="fastqsanger" />
+            <param name="library|type" value="single" />
+            <param name="results" value="extract_kraken_reads/kraken2.results" ftype="tabular" />
+            <param name="taxid" value="10386 11176" />
+            <param name="exclude" value="True" />
+            <param name="include_parents" value="True" />
+            <param name="report" value="extract_kraken_reads/kraken2.report" ftype="tabular" />
+            <output name="output_1" file="extract_kraken_reads/out1.k2.exclude_both.fa" decompress="true" ftype="fasta.gz" />
+        </test>
+        <!-- test multiple tax IDs -->
+        <test expect_failure="true">
+            <param name="input_1" value="extract_kraken_reads/R1.fq.gz" ftype="fastqsanger" />
+            <param name="library|type" value="single" />
+            <param name="results" value="extract_kraken_reads/kraken2.results" ftype="tabular" />
+            <param name="taxid" value="10386 f5" />
+        </test>
+        <!-- test FASTQ output -->
+        <test expect_num_outputs="1">
+            <param name="input_1" value="extract_kraken_reads/R1.fq.gz" ftype="fastqsanger" />
+            <param name="library|type" value="single" />
+            <param name="results" value="extract_kraken_reads/kraken2.results" ftype="tabular" />
+            <param name="taxid" value="11176" />
+            <param name="fastq_output" value="true" />
+            <output name="output_1" file="extract_kraken_reads/out1.k2.11176.fq" decompress="true" ftype="fastq.gz" />
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+**What it does**
+-------------------
+After running Kraken, Kraken2, or KrakenUniq, users may use the
+`extract_kraken_reads.py` program to extract the FASTA or FASTQ reads
+classified as a specific taxonomy ID. For example, this program can be used to
+extract all bacterial reads or only reads assigned to Escherichia coli. Users
+must provide (at minimum) the original sequence file(s), at least one taxonomy
+ID, and the Kraken output file.
+        ]]>
+    </help>
+    <expand macro="citations" />
+    <creator>
+        <person givenName="Jeremy" familyName="Volkening" url="https://github.com/jvolkening" />
+        <person givenName="Paul" familyName="Zierep" email="zierep@informatik.uni-freiburg.de" />
+    </creator>
+</tool>
author	iuc
date	Wed, 15 Mar 2023 18:18:27 +0000
parents
children	f329328da134