Mercurial > repos > jvolkening > krakentools
diff extract_kraken_reads.xml @ 0:d491c23394f9 draft default tip
"planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/krakentools"
author | jvolkening |
---|---|
date | Thu, 30 Sep 2021 17:54:31 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_kraken_reads.xml Thu Sep 30 17:54:31 2021 +0000 @@ -0,0 +1,282 @@ +<tool id="krakentools_extract_kraken_reads" name="Extract Kraken Reads By ID" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="17.09"> + <description>Extract reads that were classified by the Kraken family at specified taxonomic IDs</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <version_command>echo -n @TOOL_VERSION@</version_command> + + <command detect_errors="exit_code"><![CDATA[ + +#if $library.type == 'paired': + #set input_1 = $library.input_1 + #set input_2 = $library.input_2 +#else if $library.type == 'paired_collection' + #set input_1 = $library.input_1.forward + #set input_2 = $library.input_1.reverse +#else + #set input_1 = $library.input_1 +#end if + +## do not quote $taxid +extract_kraken_reads.py + + -k '$results' + -s '$input_1' + -o '$output_1' + --taxid $taxid + --max '$max' + $include_parents + $include_children + $exclude + $fastq_output +#if str( $library.type ) != "single": + -s2 '$input_2' + -o2 '$output_2' +#end if +#if $include_parents or $include_children: + --report $report +#end if + + ]]></command> + <inputs> + + <!-- Reads --> + <conditional name="library"> + <param name="type" type="select" label="Single or paired reads?"> + <option value="single">Single</option> + <option value="paired">Paired</option> + <option value="paired_collection">Paired Collection</option> + </param> + + <when value="single"> + <param name="input_1" format="fastq,fastqsanger,fasta" type="data" label="FASTQ/A file" help="FASTQ or FASTQ input reads" /> + </when> + + <when value="paired"> + <param name="input_1" format="fastq,fastqsanger,fasta" type="data" label="FASTQ/A forward file" help="FASTQ or FASTQ input reads" /> + <param name="input_2" format="fastq,fastqsanger,fasta" type="data" label="FASTQ/A reverse file" help="FASTQ or FASTQ input reads" /> + </when> + + <when value="paired_collection"> + <param name="input_1" format="fastq,fastqsanger,fasta" type="data_collection" collection_type="paired" label="Paired Collection" help="FASTQ or FASTA read pair collection" /> + </when> + + </conditional> + <param name="results" argument="-k" format="tabular" type="data" label="Results" help="Results (classification) file from Kraken/KrakenUniq/Kraken2" /> + <param name="report" argument="--report" format="tabular" type="data" label="Report" optional="True" help="Report file from Kraken/KrakenUniq/Kraken2" /> + + <param name="taxid" argument="--taxid" type="text" value="" label="Taxonomic ID(s) to match" help="Space-delimited list of taxonomic IDs for which to extract matching reads"> + <validator type="regex" message="Enter a space-separated list of numeric tax IDs">^\d+[\d ]*$</validator> + </param> + <param name="max" argument="--max" type="integer" value="100000000" min="1" label="Maximum reads to save" help="Maximum number of reads to save for each ID" /> + <param name="exclude" argument="--exclude" type="boolean" value="False" truevalue="--exclude" falsevalue="" label="Invert output" help="Instead of finding reads that match given taxonomic IDs, find all reads that DO NOT match given IDs" /> + <param name="fastq_output" argument="--fastq-output" type="boolean" value="False" truevalue="--fastq-output" falsevalue="" label="Output as FASTQ" help="Write output as FASTQ instead of the default FASTA" /> + <param name="include_parents" argument="--include-parents" type="boolean" value="False" truevalue="--include-parents" falsevalue="" label="Include parents" help="Include reads classified at parent levels of the specified tax IDs" /> + <param name="include_children" argument="--include-children" type="boolean" value="False" truevalue="--include-children" falsevalue="" label="Include children" help="Include reads classified more specifically than the specified tax IDs" /> + + </inputs> + + <outputs> + <data name="output_1" format="fasta" metadata_source="input_1" label="${tool.name} on ${on_string}: forward reads"> + <change_format> + <when input="fastq_output" value="True" format="fastqsanger" /> + </change_format> + </data> + <data name="output_2" format="fasta" metadata_source="input_2" label="${tool.name} on ${on_string}: reverse reads" > + <filter>(library['type'] == 'paired' or library['type'] == 'paired_collection')</filter> + <change_format> + <when input="fastq_output" value="True" format="fastqsanger" /> + </change_format> + </data> + </outputs> + + <tests> + <!-- test Kraken2 input, single input --> + <test> + <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/> + <param name="library|type" value="single"/> + <param name="results" value="kraken2.results" ftype="tabular"/> + <param name="taxid" value="11176"/> + <output name="output_1" file="out1.k2.11176.fa"/> + </test> + <!-- test paired input --> + <test> + <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/> + <param name="input_2" value="R2.fq.gz" ftype="fastqsanger"/> + <param name="library|type" value="paired"/> + <param name="results" value="kraken2.results" ftype="tabular"/> + <param name="taxid" value="11176"/> + <output name="output_1" file="out1.k2.11176.fa"/> + <output name="output_2" file="out2.k2.11176.fa"/> + </test> + <!-- test paired collection input --> + <test> + <param name="input_1"> + <collection type="paired"> + <element name="forward" value="R1.fq.gz" ftype="fastqsanger"/> + <element name="reverse" value="R2.fq.gz" ftype="fastqsanger"/> + </collection> + </param> + <param name="library|type" value="paired_collection"/> + <param name="results" value="kraken2.results" ftype="tabular"/> + <param name="taxid" value="11176"/> + <output name="output_1" file="out1.k2.11176.fa"/> + <output name="output_2" file="out2.k2.11176.fa"/> + </test> + <!-- test Kraken1 input, include children --> + <test> + <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/> + <param name="library|type" value="single"/> + <param name="results" value="kraken1.results" ftype="tabular"/> + <param name="report" value="kraken1.report" ftype="tabular"/> + <param name="taxid" value="11176"/> + <param name="include_children" value="True"/> + <output name="output_1" file="out1.k1.11176.children.fa"/> + </test> + <!-- test exclude --> + <test> + <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/> + <param name="library|type" value="single"/> + <param name="results" value="kraken1.results" ftype="tabular"/> + <param name="report" value="kraken1.report" ftype="tabular"/> + <param name="taxid" value="10386"/> + <param name="include_children" value="True"/> + <param name="exclude" value="True"/> + <output name="output_1" file="out1.k1.e10386.children.fa"/> + </test> + <!-- test max --> + <test> + <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/> + <param name="library|type" value="single"/> + <param name="results" value="kraken2.results" ftype="tabular"/> + <param name="taxid" value="11176"/> + <param name="max" value="2"/> + <output name="output_1" file="out1.k2.11176.max2.fa"/> + </test> + <!-- test include parents --> + <test> + <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/> + <param name="library|type" value="single"/> + <param name="results" value="kraken2.results" ftype="tabular"/> + <param name="taxid" value="11176"/> + <param name="include_parents" value="True"/> + <param name="report" value="kraken2.report" ftype="tabular"/> + <output name="output_1" file="out1.k2.11176.parents.fa"/> + </test> + <!-- test multiple tax IDs--> + <test> + <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/> + <param name="library|type" value="single"/> + <param name="results" value="kraken2.results" ftype="tabular"/> + <param name="taxid" value="10386 11176"/> + <param name="exclude" value="True"/> + <param name="include_parents" value="True"/> + <param name="report" value="kraken2.report" ftype="tabular"/> + <output name="output_1" file="out1.k2.exclude_both.fa"/> + </test> + <!-- test multiple tax IDs--> + <test expect_failure="True"> + <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/> + <param name="library|type" value="single"/> + <param name="results" value="kraken2.results" ftype="tabular"/> + <param name="taxid" value="10386 f5"/> + </test> + <!-- test FASTQ output --> + <test> + <param name="input_1" value="R1.fq.gz" ftype="fastqsanger"/> + <param name="library|type" value="single"/> + <param name="results" value="kraken2.results" ftype="tabular"/> + <param name="taxid" value="11176"/> + <param name="fastq_output" value="True"/> + <output name="output_1" file="out1.k2.11176.fq"/> + </test> + + </tests> + + <help><![CDATA[ + +.. class:: infomark + +**What it does** + +------------------- + +After running Kraken, Kraken2, or KrakenUniq, users may use the +`extract_kraken_reads.py` program to extract the FASTA or FASTQ reads +classified as a specific taxonomy ID. For example, this program can be used to +extract all bacterial reads or only reads assigned to Escherichia coli. Users +must provide (at minimum) the original sequence file(s), at least one taxonomy +ID, and the Kraken output file. + +------------------- + +**Command-line arguments** + +------------------- + +The following command-line usage corresponds with the Galaxy wrapper +parameters:: + + usage: extract_kraken_reads.py [-h] -k KRAKEN_FILE -s SEQ_FILE1 + [-s2 SEQ_FILE2] -t TAXID [TAXID ...] -o + OUTPUT_FILE [-o2 OUTPUT_FILE2] [--append] + [--noappend] [--max MAX_READS] [-r REPORT_FILE] + [--include-parents] [--include-children] + [--exclude] [--fastq-output] + + optional arguments: + -h, --help show this help message and exit + -k KRAKEN_FILE Kraken output file to parse + -s SEQ_FILE1, -s1 SEQ_FILE1, -1 SEQ_FILE1, -U SEQ_FILE1 + FASTA/FASTQ File containing the raw sequence letters. + -s2 SEQ_FILE2, -2 SEQ_FILE2 + 2nd FASTA/FASTQ File containing the raw sequence + letters (paired). + -t TAXID [TAXID ...], --taxid TAXID [TAXID ...] + Taxonomy ID[s] of reads to extract (space-delimited) + -o OUTPUT_FILE, --output OUTPUT_FILE + Output FASTA/Q file containing the reads and sample + IDs + -o2 OUTPUT_FILE2, --output2 OUTPUT_FILE2 + Output FASTA/Q file containig the second pair of reads + [required for paired input] + --max MAX_READS Maximum number of reads to save [default: 100,000,000] + -r REPORT_FILE, --report REPORT_FILE + Kraken report file. [required only if --include- + parents/children is specified] + --include-parents Include reads classified at parent levels of the + specified taxids + --include-children Include reads classified more specifically than the + specified taxids + --exclude Instead of finding reads matching specified taxids, + finds all reads NOT matching specified taxids + --fastq-output Print output FASTQ reads [requires input FASTQ, + default: output is FASTA] + +-------------------- + +**More Information** + +-------------------- + +Author: Jennifer Lu + +See the `online documentation`_ + +.. _`online documentation`: https://ccb.jhu.edu/software/krakentools/index.shtml?t=extractreads + +-------------------- + +**Galaxy Wrapper Development** + +-------------------- + +Author: Jeremy Volkening + + ]]></help> + + <expand macro="citations" /> + +</tool>