diff kallisto_quant.xml @ 0:59a4c97b85d6 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kallisto/ commit 3b54163c4f7daff76fcc589c4a9057bb03904380
author iuc
date Sat, 05 Aug 2017 04:02:28 -0400
parents
children 7a9158bb6f98
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kallisto_quant.xml	Sat Aug 05 04:02:28 2017 -0400
@@ -0,0 +1,197 @@
+<?xml version="1.0"?>
+<tool id="kallisto_quant" name="Kallisto quant" version="@VERSION@.0">
+    <description>- quantify abundances of RNA-Seq transcripts</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code">
+        <![CDATA[
+        #if $reference_genome.reference_genome_source == "history":
+            ln -s '$reference_genome.reference' reference.fa &&
+            kallisto index reference.fa -i reference.kallisto &&
+            #set index_path = 'reference.kallisto'
+        #else:
+            #set index_path = $reference_genome.index.fields.path
+        #end if
+        kallisto quant -i '$index_path'
+            $bias --bootstrap-samples $bootstrap_samples --seed $seed $fusion $pseudobam
+            #if $pseudobam:
+                -o .
+            #else:
+                --threads \${GALAXY_SLOTS:-1}
+                -o .
+            #end if
+            #if str($single_paired.single_paired_selector) == 'single':
+                --single
+                #set $single_reads = "' '".join(str($single_paired.reads).split(','))
+                --fragment-length $single_paired.fragment_length
+                --sd $single_paired.sd
+                '$single_reads'
+            #else:
+                #if str($single_paired.collection.collection_selector) == 'datasets':
+                    #set $forward_reads = str($single_paired.collection.forward).split(',')
+                    #set $reverse_reads = str($single_paired.collection.reverse).split(',')
+                #else:
+                    #set $forward_reads = [str($read.forward) for $read in $single_paired.collection.reads]
+                    #set $reverse_reads = [str($read.reverse) for $read in $single_paired.collection.reads]
+                #end if
+                #set $tuplexed_reads = zip($forward_reads, $reverse_reads)
+                #set $multiplexed_reads = []
+                #for read_pair in $tuplexed_reads:
+                    ${multiplexed_reads.extend([read for read in read_pair])}
+                #end for
+                #set $reads = "' '".join($multiplexed_reads)
+                '$reads'
+            #end if
+            #if $pseudobam:
+                | samtools sort -O bam -@ \${GALAXY_SLOTS:-1} -o '$pseudobam_output' -
+            #end if
+            && cat run_info.json
+        ]]>
+    </command>
+    <inputs>
+        <conditional name="reference_genome">
+            <param name="reference_genome_source" type="select" label="Reference genome for quantification">
+                <option value="indexed" selected="true">Use a built-in genome</option>
+                <option value="history">Use a genome from history</option>
+            </param>
+            <when value="indexed">
+                <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy administrator">
+                    <options from_data_table="kallisto_indexes">
+                        <filter type="sort_by" column="2" />
+                        <validator type="no_options" message="No genomes are available for the selected input dataset" />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="reference" type="data" format="fasta" label="FASTA reference genome" />
+            </when>
+        </conditional>
+        <conditional name="single_paired">
+            <param name="single_paired_selector" type="select" label="Single-end or paired reads">
+                <option value="single" selected="true">Single-end</option>
+                <option value="paired">Paired</option>
+            </param>
+            <when value="single">
+                <param name="reads" type="data" format="fastq" multiple="True" label="Reads in FASTQ format" />
+                <param name="fragment_length" argument="--fragment-length" type="integer" value="200" label="Average fragment length" help="Illumina typically produces reads of 180-200bp" />
+                <param argument="--sd" type="integer" value="20" label="Estimated standard deviation of fragment length" />
+            </when>
+            <when value="paired">
+                <conditional name="collection">
+                    <param name="collection_selector" type="select" label="Collection or individual datasets">
+                        <option value="datasets" selected="true">Individual files</option>
+                        <option value="collection">Pair or list of pairs</option>
+                    </param>
+                    <when value="datasets">
+                        <param name="forward" type="data" format="fastq" multiple="True" label="Forward reads" />
+                        <param name="reverse" type="data" format="fastq" multiple="True" label="Reverse reads" />
+                    </when>
+                    <when value="collection">
+                        <param name="reads" type="data_collection" format="fastq" collection_type="list:paired" label="Collection of reads" />
+                    </when>
+                </conditional>
+            </when>
+        </conditional>
+        <param argument="--bias" type="boolean" truevalue="--bias" falsevalue="" label="Perform sequence based bias correction" />
+        <param name="bootstrap_samples" argument="--bootstrap-samples" type="integer" value="0" label="Number of bootstrap samples" help="default: 0" />
+        <param argument="--seed" type="integer" value="42" label="Seed for the bootstrap sampling" help="default: 42" />
+        <param argument="--fusion" type="boolean" truevalue="--fusion" falsevalue="" label="Search for fusions" help="for Pizzly" />
+        <param argument="--pseudobam" type="boolean" truevalue="--pseudobam" falsevalue="" label="Output pseudoalignments in BAM format" />
+    </inputs>
+    <outputs>
+        <data format="h5" name="abundance_h5" from_work_dir="abundance.h5" label="${tool.name} on ${on_string}: Abundances (HDF5)" />
+        <data format="tabular" name="abundance_tab" from_work_dir="abundance.tsv" label="${tool.name} on ${on_string}: Abundances (tabular)" />
+        <data format="bam" name="pseudobam_output" label="${tool.name} on ${on_string}: Pseudoalignments">
+            <filter>pseudobam</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="reference_genome_source" value="history" />
+            <param name="reference" ftype="fasta" value="mm10_chrM.fa" />
+            <param name="single_paired_selector" value="paired" />
+            <param name="collection_selector" value="datasets" />
+            <param name="forward" ftype="fastq" value="mm10_chrM-1.f.fq,mm10_chrM-2.f.fq,mm10_chrM-3.f.fq,mm10_chrM-4.f.fq,mm10_chrM-5.f.fq" />
+            <param name="reverse" ftype="fastq" value="mm10_chrM-1.r.fq,mm10_chrM-2.r.fq,mm10_chrM-3.r.fq,mm10_chrM-4.r.fq,mm10_chrM-5.r.fq" />
+            <output name="abundance_tab" file="kallisto_quant_out1.tab" ftype="tabular" />
+        </test>
+        <test>
+            <param name="reference_genome_source" value="history" />
+            <param name="reference" ftype="fasta" value="mm10_chrM.fa" />
+            <param name="single_paired_selector" value="paired" />
+            <param name="collection_selector" value="collection" />
+            <param name="reads">
+                <collection type="list:paired">
+                    <element name="p1">
+                        <collection type="paired">
+                            <element name="forward" value="mm10_chrM-1.f.fq" />
+                            <element name="reverse" value="mm10_chrM-1.r.fq" />
+                        </collection>
+                    </element>
+                    <element name="p2">
+                        <collection type="paired">
+                            <element name="forward" value="mm10_chrM-2.f.fq" />
+                            <element name="reverse" value="mm10_chrM-2.r.fq" />
+                        </collection>
+                    </element>
+                    <element name="p3">
+                        <collection type="paired">
+                            <element name="forward" value="mm10_chrM-3.f.fq" />
+                            <element name="reverse" value="mm10_chrM-3.r.fq" />
+                        </collection>
+                    </element>
+                    <element name="p4">
+                        <collection type="paired">
+                            <element name="forward" value="mm10_chrM-4.f.fq" />
+                            <element name="reverse" value="mm10_chrM-4.r.fq" />
+                        </collection>
+                    </element>
+                    <element name="p5">
+                        <collection type="paired">
+                            <element name="forward" value="mm10_chrM-5.f.fq" />
+                            <element name="reverse" value="mm10_chrM-5.r.fq" />
+                        </collection>
+                    </element>
+                </collection>
+            </param>
+            <output name="abundance_tab" file="kallisto_quant_out2.tab" ftype="tabular" />
+        </test>
+        <test>
+            <param name="reference_genome_source" value="history" />
+            <param name="reference" ftype="fasta" value="mm10_chrM.fa" />
+            <param name="single_paired_selector" value="single" />
+            <param name="collection_selector" value="collection" />
+            <param name="reads" ftype="fastq" value="mm10_chrM-1.f.fq,mm10_chrM-2.f.fq,mm10_chrM-3.f.fq,mm10_chrM-4.f.fq,mm10_chrM-5.f.fq" />
+            <output name="abundance_tab" file="kallisto_quant_out3.tab" ftype="tabular" />
+        </test>
+        <test>
+            <param name="reference_genome_source" value="history" />
+            <param name="reference" ftype="fasta" value="felCat8_chrM.fa" />
+            <param name="single_paired_selector" value="paired" />
+            <param name="collection_selector" value="datasets" />
+            <param name="pseudobam" value="true" />
+            <param name="forward" ftype="fastq" value="felCat8_chrM_F.fq" />
+            <param name="reverse" ftype="fastq" value="felCat8_chrM_R.fq" />
+            <output name="abundance_tab" file="kallisto_quant_out4.tab" ftype="tabular" />
+            <output name="pseudobam_output" file="kallisto_quant_out4.bam" ftype="bam" />
+        </test>
+        <test>
+            <param name="reference_genome_source" value="cached" />
+            <param name="single_paired_selector" value="paired" />
+            <param name="collection_selector" value="datasets" />
+            <param name="pseudobam" value="true" />
+            <param name="forward" ftype="fastq" dbkey="sacCer2" value="sacCer2_chrM_F.fq" />
+            <param name="reverse" ftype="fastq" dbkey="sacCer2" value="sacCer2_chrM_R.fq" />
+            <output name="abundance_tab" file="kallisto_quant_out5.tab" ftype="tabular" />
+            <output name="pseudobam_output" file="kallisto_quant_out5.bam" ftype="bam" />
+        </test>
+    </tests>
+    <help>
+ <![CDATA[
+ kallisto is a program for quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads. It is based on the novel idea of pseudoalignment for rapidly determining the compatibility of reads with targets, without the need for alignment. On benchmarks with standard RNA-Seq data, kallisto can quantify 30 million human reads in less than 3 minutes on a Mac desktop computer using only the read sequences and a transcriptome index that itself takes less than 10 minutes to build. Pseudoalignment of reads preserves the key information needed for quantification, and kallisto is therefore not only fast, but also as accurate as existing quantification tools. In fact, because the pseudoalignment procedure is robust to errors in the reads, in many benchmarks kallisto significantly outperforms existing tools.
+ ]]>
+     </help>
+    <expand macro="citations" />
+</tool>