diff stringtie.xml @ 10:c84d44519b2e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stringtie commit b78c073ab258852730fc9af1cd4862d571459103
author iuc
date Tue, 04 Apr 2017 12:58:27 -0400
parents e3f369973054
children 6e45b443ef1f
line wrap: on
line diff
--- a/stringtie.xml	Tue Mar 21 03:07:22 2017 -0400
+++ b/stringtie.xml	Tue Apr 04 12:58:27 2017 -0400
@@ -1,4 +1,4 @@
-<tool id="stringtie" name="StringTie" version="1.2.3">
+<tool id="stringtie" name="StringTie" version="1.3.3">
     <description>transcript assembly and quantification</description>
     <macros>
         <import>macros.xml</import>
@@ -8,28 +8,36 @@
     <expand macro="version_command" />
     <command>
     <![CDATA[
+        mkdir -p ./special_de_output/sample1/ &&
+        #if str($guide.use_guide) == 'yes':
+            ln -s '$guide.guide_gff' ./special_de_output/sample1/guide.gtf &&
+        #end if
+
         #if $input_bam.metadata.ftype == 'sam':
-            samtools sort -@ \${GALAXY_SLOTS:-1} "$input_bam" | stringtie
+            samtools sort -@ \${GALAXY_SLOTS:-1} '$input_bam' | stringtie
         #else
-            stringtie "$input_bam"
+            stringtie '$input_bam'
         #end if
+
         -o "$output_gtf"
         -p "\${GALAXY_SLOTS:-1}"
         #if str($guide.use_guide) == 'yes':
-            -C "$coverage" -G "$guide.guide_gff" $guide.input_estimation
-            #if $guide.output_ballgown:
-                -b .
+            -C '$coverage'
+            -G '$guide.guide_gff'
+            $guide.input_estimation
+            #if $guide.special_outputs != 'no':
+                -b ./special_de_output/sample1/
             #end if
         #end if
         #if str($option_set.options) == 'advanced':
-            -l "$option_set.name_prefix"
-            -f "$option_set.fraction"
-            -m "$option_set.min_tlen"
-            -a "$option_set.min_anchor_len"
-            -j "$option_set.min_anchor_cov"
-            -c "$option_set.min_bundle_cov"
-            -g "$option_set.bdist"
-            -M "$option_set.bundle_fraction" $option_set.sensitive $option_set.disable_trimming $option_set.multi_mapping
+            -l '$option_set.name_prefix'
+            -f '$option_set.fraction'
+            -m '$option_set.min_tlen'
+            -a '$option_set.min_anchor_len'
+            -j '$option_set.min_anchor_cov'
+            -c '$option_set.min_bundle_cov'
+            -g '$option_set.bdist'
+            -M '$option_set.bundle_fraction' $option_set.sensitive $option_set.disable_trimming $option_set.multi_mapping
             #if $option_set.abundance_estimation:
                 -A "$gene_abundance_estimation"
             #end if
@@ -37,20 +45,63 @@
                 -x "$option_set.omit_sequences"
             #end if
         #end if
+
+        #if str($guide.use_guide) == 'yes':
+            #if $guide.special_outputs.special_outputs_select == 'deseq2':
+                &&
+                prepDE.py
+                    -i ./special_de_output/
+                    -g gene_cout_matrix.tsv
+                    -t transcripts_count_matrix.tsv
+                    -l $guide.special_outputs.read_length
+                    #if str($option_set.options) == 'advanced':
+                        -s '$option_set.name_prefix'
+                    #end if
+                    #if $guide.special_outputs.clustering:
+                        -c
+                        --legend ./legend.tsv
+
+                        &&
+                        sed -i.bak 's/,/\t/g' ./legend.tsv
+
+                    #end if
+                &&
+                sed -i.bak 's/,/\t/g' transcripts_count_matrix.tsv
+                &&
+                sed -i.bak 's/,/\t/g' gene_cout_matrix.tsv
+            #end if
+        #end if
+
     ]]>
     </command>
     <inputs>
         <param format="sam,bam" label="Mapped reads to assemble transcripts from" name="input_bam" type="data" />
         <conditional name="guide">
             <param label="Use GFF file to guide assembly" name="use_guide" type="select">
-                <option value="yes">Use GFF</option>
-                <option selected="True" value="no">Do not use GFF</option>
+                <option value="yes">Use GFF/GTF</option>
+                <option selected="True" value="no">Do not use GFF/GTF</option>
             </param>
             <when value="no" />
             <when value="yes">
-                <param argument="-G" format="gtf,gff3" help="" label="Reference annotation to use for guiding the assembly process" name="guide_gff" type="data" />
-                <param argument="-e" falsevalue="" help="" label="Perform abundance estimation only of input transcripts" name="input_estimation" truevalue="-e" type="boolean" />
-                <param argument="-b" falsevalue="" help="" label="Output additional files for use in Ballgown" name="output_ballgown" truevalue="-b" type="boolean" />
+                <param argument="-G" format="gtf,gff3" name="guide_gff" type="data" 
+                       help="" label="Reference annotation to use for guiding the assembly process" />
+                <param argument="-e" name="input_estimation" truevalue="-e" type="boolean" falsevalue=""
+                       help="" label="Perform abundance estimation only of input transcripts" />
+                <conditional name="special_outputs">
+                    <param label="Output additional files for use in..." name="special_outputs_select" type="select">
+                        <option value="ballgown">Ballgown</option>
+                        <option selected="True" value="deseq2">DESeq2/EdgeR</option>
+                        <option value="no">No addional output</option>
+                    </param>
+                    <when value="ballgown" />
+                    <when value="deseq2">
+                        <param label="Average read length" name="read_length" type="integer" value="75" help="" />
+                        <param label="Whether to cluster genes that overlap with different gene IDs"
+                               name="clustering"
+                               truevalue="--cluster"
+                               type="boolean" help="ignoring ones with geneID pattern" falsevalue="" />
+                    </when>
+                </conditional>
             </when>
         </conditional>
         <conditional name="option_set">
@@ -60,19 +111,24 @@
             </param>
             <when value="default" />
             <when value="advanced">
-                <param argument="-t" falsevalue="" help="" label="Disable trimming of predicted transcripts based on coverage" name="disable_trimming" truevalue="-t" type="boolean" />
-                <param argument="-S" falsevalue="" help="" label="Increase sensitivity" name="sensitive" truevalue="-S" type="boolean" />
-                <param argument="-l" help="" label="Name prefix for output transcripts" name="name_prefix" type="text" value="STRG" />
-                <param argument="-f" help="" label="Minimum isoform fraction" max="1.0" min="0.0" name="fraction" type="float" value="0.15" />
-                <param argument="-m" help="" label="Minimum assembled transcript length" name="min_tlen" type="integer" value="200" />
-                <param argument="-a" help="" label="Minimum anchor length for junctions" name="min_anchor_len" type="integer" value="10" />
-                <param argument="-j" help="" label="Minimum junction coverage" name="min_anchor_cov" type="integer" value="1" />
-                <param argument="-c" help="" label="Minimum bundle reads per bp coverage to consider for assembly" name="min_bundle_cov" type="integer" value="2" />
-                <param argument="-g" help="" label="Gap between read mappings triggering a new bundle" name="bdist" type="integer" value="50" />
-                <param argument="-M" help="" label="Fraction of bundle allowed to be covered by multi-hit reads" name="bundle_fraction" type="float" value="0.95" />
-                <param argument="-x" help="e.g. chrM,chrX" label="Do not assemble any transcripts on these reference sequence(s)" name="omit_sequences" type="text" value="" />
-                <param argument="-A" falsevalue="" help="" label="Additional gene abundance estimation output file" name="abundance_estimation" truevalue="-A" type="boolean" />
-                <param argument="-u" falsevalue="" help="" label="Disable multi-mapping correction" name="multi_mapping" truevalue="-u" type="boolean" />
+                <param argument="-t" falsevalue="" name="disable_trimming" truevalue="-t" type="boolean"
+                    label="Disable trimming of predicted transcripts based on coverage" />
+                <param argument="-S" falsevalue="" 
+                    label="Increase sensitivity" name="sensitive" truevalue="-S" type="boolean" />
+                <param argument="-l" label="Name prefix for output transcripts" name="name_prefix" type="text" value="STRG" />
+                <param argument="-f" label="Minimum isoform fraction" max="1.0" min="0.0" name="fraction" type="float" value="0.15" />
+                <param argument="-m" label="Minimum assembled transcript length" name="min_tlen" type="integer" value="200" />
+                <param argument="-a" label="Minimum anchor length for junctions" name="min_anchor_len" type="integer" value="10" />
+                <param argument="-j" label="Minimum junction coverage" name="min_anchor_cov" type="integer" value="1" />
+                <param argument="-c" label="Minimum bundle reads per bp coverage to consider for assembly" name="min_bundle_cov" type="integer" value="2" />
+                <param argument="-g" label="Gap between read mappings triggering a new bundle" name="bdist" type="integer" value="50" />
+                <param argument="-M" label="Fraction of bundle allowed to be covered by multi-hit reads" name="bundle_fraction" type="float" value="0.95" />
+                <param argument="-x" name="omit_sequences" type="text" value="" 
+                    help="e.g. chrM,chrX" label="Do not assemble any transcripts on these reference sequence(s)" />
+                <param argument="-A" falsevalue="" name="abundance_estimation" truevalue="-A" type="boolean"
+                    label="Additional gene abundance estimation output file" />
+                <param argument="-u" falsevalue="" truevalue="-u" type="boolean"
+                    label="Disable multi-mapping correction" name="multi_mapping" />
             </when>
         </conditional>
     </inputs>
@@ -84,20 +140,38 @@
         <data format="gff3" label="${tool.name} on ${on_string}: Coverage" name="coverage">
             <filter>guide['use_guide'] == 'yes'</filter>
         </data>
-        <data format="tabular" from_work_dir="e_data.ctab" label="${tool.name} on ${on_string}: exon-level expression measurements" name="exon_expression">
-            <filter>guide['use_guide'] == 'yes' and guide['output_ballgown']</filter>
+        <data format="tabular" from_work_dir="special_de_output/sample1/e_data.ctab"
+            label="${tool.name} on ${on_string}: exon-level expression measurements" name="exon_expression">
+            <filter>guide['use_guide'] == 'yes' and guide['special_outputs']['special_outputs_select'] == 'ballgown'</filter>
+        </data>
+        <data format="tabular" from_work_dir="special_de_output/sample1/i_data.ctab"
+            label="${tool.name} on ${on_string}: intron-level expression measurements" name="intron_expression">
+            <filter>guide['use_guide'] == 'yes' and guide['special_outputs']['special_outputs_select'] == 'ballgown'</filter>
         </data>
-        <data format="tabular" from_work_dir="i_data.ctab" label="${tool.name} on ${on_string}: intron-level expression measurements" name="intron_expression">
-            <filter>guide['use_guide'] == 'yes' and guide['output_ballgown']</filter>
+        <data format="tabular" from_work_dir="special_de_output/sample1/t_data.ctab"
+            label="${tool.name} on ${on_string}: transcript-level expression measurements" name="transcript_expression">
+            <filter>guide['use_guide'] == 'yes' and guide['special_outputs']['special_outputs_select'] == 'ballgown'</filter>
+        </data>
+        <data format="tabular" from_work_dir="special_de_output/sample1/e2t.ctab"
+            label="${tool.name} on ${on_string}: exon to transcript mapping" name="exon_transcript_mapping">
+            <filter>guide['use_guide'] == 'yes' and guide['special_outputs']['special_outputs_select'] == 'ballgown'</filter>
         </data>
-        <data format="tabular" from_work_dir="t_data.ctab" label="${tool.name} on ${on_string}: transcript-level expression measurements" name="transcript_expression">
-            <filter>guide['use_guide'] == 'yes' and guide['output_ballgown']</filter>
+        <data format="tabular" from_work_dir="special_de_output/sample1/i2t.ctab"
+            label="${tool.name} on ${on_string}: intron to transcript mapping" name="intron_transcript_mapping">
+            <filter>guide['use_guide'] == 'yes' and guide['special_outputs']['special_outputs_select'] == 'ballgown'</filter>
         </data>
-        <data format="tabular" from_work_dir="e2t.ctab" label="${tool.name} on ${on_string}: exon to transcript mapping" name="exon_transcript_mapping">
-            <filter>guide['use_guide'] == 'yes' and guide['output_ballgown']</filter>
+        
+        <data format="tabular" from_work_dir="gene_cout_matrix.tsv"
+            label="${tool.name} on ${on_string}: Gene counts" name="gene_counts">
+            <filter>guide['use_guide'] == 'yes' and guide['special_outputs']['special_outputs_select'] == 'deseq2'</filter>
         </data>
-        <data format="tabular" from_work_dir="i2t.ctab" label="${tool.name} on ${on_string}: intron to transcript mapping" name="intron_transcript_mapping">
-            <filter>guide['use_guide'] == 'yes' and guide['output_ballgown']</filter>
+        <data format="tabular" from_work_dir="transcripts_count_matrix.tsv"
+            label="${tool.name} on ${on_string}: Transcript counts" name="transcript_counts">
+            <filter>guide['use_guide'] == 'yes' and guide['special_outputs']['special_outputs_select'] == 'deseq2'</filter>
+        </data>
+        <data format="tabular" from_work_dir="legend.tsv"
+            label="${tool.name} on ${on_string}: legend" name="legend">
+            <filter>guide['use_guide'] == 'yes' and guide['special_outputs']['special_outputs_select'] == 'deseq2' and guide['special_outputs']['clustering'] is True</filter>
         </data>
     </outputs>
     <tests>
@@ -117,6 +191,7 @@
         <test>
             <param ftype="bam" name="input_bam" value="stringtie_in1.bam" />
             <param name="use_guide" value="yes" />
+            <param name="special_outputs_select" value="no" />
             <param name="guide_gff" value="stringtie_in.gtf" />
             <param name="options" value="default" />
             <output file="stringtie_out3.gtf" ftype="gtf" lines_diff="2" name="output_gtf" />
@@ -124,6 +199,7 @@
         <test>
             <param ftype="bam" name="input_bam" value="stringtie_in1.bam" />
             <param name="use_guide" value="yes" />
+            <param name="special_outputs_select" value="no" />
             <param name="guide_gff" value="stringtie_in.gtf" />
             <param name="options" value="advanced" />
             <param name="fraction" value="0.17" />
@@ -132,35 +208,50 @@
         <test>
             <param ftype="bam" name="input_bam" value="stringtie_in1.bam" />
             <param name="use_guide" value="yes" />
-            <param name="output_ballgown" value="yes" />
+            <param name="special_outputs_select" value="ballgown" />
             <param name="guide_gff" value="stringtie_in.gtf" />
             <param name="options" value="default" />
-            <output file="ballgown/e_data.ctab" ftype="tabular" name="exon_expression" />
-            <output file="ballgown/i_data.ctab" ftype="tabular" name="intron_expression" />
-            <output file="ballgown/t_data.ctab" ftype="tabular" name="transcript_expression" />
-            <output file="ballgown/e2t.ctab" ftype="tabular" name="exon_transcript_mapping" />
-            <output file="ballgown/i2t.ctab" ftype="tabular" name="intron_transcript_mapping" />
+            <output file="./ballgown/e_data.ctab" ftype="tabular" name="exon_expression" />
+            <output file="./ballgown/i_data.ctab" ftype="tabular" name="intron_expression" />
+            <output file="./ballgown/t_data.ctab" ftype="tabular" name="transcript_expression" />
+            <output file="./ballgown/e2t.ctab" ftype="tabular" name="exon_transcript_mapping" />
+            <output file="./ballgown/i2t.ctab" ftype="tabular" name="intron_transcript_mapping" />
             <output file="stringtie_out5.gtf" ftype="gtf" lines_diff="2" name="output_gtf" />
             <output file="stringtie_out_coverage.gtf" ftype="gff3" name="coverage" />
         </test>
         <test>
             <param ftype="bam" name="input_bam" value="stringtie_in1.bam" />
             <param name="use_guide" value="yes" />
+            <param name="special_outputs_select" value="deseq2" />
+            <param name="input_estimation" value="True" />
+            <param name="guide_gff" value="stringtie_in.gtf" />
+            <param name="options" value="default" />
+            <param name="clustering" value="True" />
+            <output file="./deseq2/gene_counts.tsv" ftype="tabular" lines_diff="2" name="gene_counts" />
+            <output file="./deseq2/transcript_counts.tsv" ftype="tabular" name="transcript_counts" />
+            <output file="./deseq2/legend.tsv" ftype="tabular" name="legend" />
+            <output file="stringtie_out6.gtf" ftype="gtf" lines_diff="2" name="output_gtf" />
+            <output file="stringtie_out_coverage.gtf" ftype="gff3" name="coverage" />
+        </test>
+        <test>
+            <param ftype="bam" name="input_bam" value="stringtie_in1.bam" />
+            <param name="use_guide" value="yes" />
             <param name="guide_gff" value="stringtie_in.gtf" />
             <param name="options" value="advanced" />
             <param name="fraction" value="0.17" />
             <param name="abundance_estimation" value="True" />
             <output file="stringtie_out4.gtf" ftype="gtf" lines_diff="2" name="output_gtf" />
-            <output file="stringtie_out6.gtf" ftype="gtf" lines_diff="2" name="gene_abundance_estimation" />
+            <output file="stringtie_out7.gtf" ftype="gtf" lines_diff="2" name="gene_abundance_estimation" />
         </test>
         <test>
             <param ftype="bam" name="input_bam" value="stringtie_in1.bam" />
             <param name="use_guide" value="yes" />
+            <param name="special_outputs_select" value="no" />
             <param name="guide_gff" value="stringtie_in.gtf" />
             <param name="options" value="advanced" />
             <param name="fraction" value="0.15" />
             <param name="c" value="test_chromosome" />
-            <output file="stringtie_out7.gtf" ftype="gtf" lines_diff="2" name="output_gtf" />
+            <output file="stringtie_out8.gtf" ftype="gtf" lines_diff="2" name="output_gtf" />
         </test>
     </tests>
     <help>