Repository 'stringtie'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/stringtie

Changeset 13:a305d75e13f2 (2018-04-12)
Previous changeset 12:76d290331481 (2017-11-09) Next changeset 14:eafd5dc95228 (2018-05-04)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stringtie commit e811a7887db870f4f94f620f52bce656c8d5ba23
modified:
stringtie.xml
added:
test-data/gene_counts_deseq2.tsv
test-data/gene_counts_edger.tsv
test-data/legend.tsv
test-data/transcript_counts_deseq2.tsv
test-data/transcript_counts_edger.tsv
removed:
test-data/deseq2/gene_counts.tsv
test-data/deseq2/legend.tsv
test-data/deseq2/transcript_counts.tsv
b
diff -r 76d290331481 -r a305d75e13f2 stringtie.xml
--- a/stringtie.xml Thu Nov 09 11:17:32 2017 -0500
+++ b/stringtie.xml Thu Apr 12 17:30:07 2018 -0400
[
b'@@ -1,4 +1,4 @@\n-<tool id="stringtie" name="StringTie" version="1.3.3.1">\n+<tool id="stringtie" name="StringTie" version="1.3.3.2">\n     <description>transcript assembly and quantification</description>\n     <macros>\n         <import>macros.xml</import>\n@@ -7,6 +7,7 @@\n     <expand macro="stdio" />\n     <expand macro="version_command" />\n     <command><![CDATA[\n+#import re\n mkdir -p ./special_de_output/sample1/ &&\n \n ## Get Guide GTF/GFF if selected\n@@ -62,40 +63,49 @@\n \n #if str($guide.use_guide) == \'yes\':\n     #if $guide.special_outputs.special_outputs_select == \'deseq2\':\n+        #set escaped_element_identifier = re.sub(\'[^\\w\\-]\', \'_\', str($input_bam.element_identifier))\n         &&\n         ln -s \'$output_gtf\' ./special_de_output/sample1/output.gtf\n         &&\n+        TAB=\\$(printf \'\\t\')\n+        &&\n+        CR=\\$(printf \'\\r\')\n+        &&\n         prepDE.py\n-            -i ./special_de_output/\n-            -g \'$gene_counts\'\n-            -t \'$transcript_counts\'\n-            -l $guide.special_outputs.read_length\n-            #if $guide.special_outputs.string:\n-                -s \'$guide.special_outputs.string\'\n+        -i ./special_de_output/\n+        -g gene_counts.csv\n+        -t transcript_counts.csv\n+        -l $guide.special_outputs.read_length\n+        #if $guide.special_outputs.string:\n+            -s \'$guide.special_outputs.string\'\n+        #end if\n+        #if $guide.special_outputs.clustering:\n+            -c\n+            #if $guide.special_outputs.key:\n+                -k \'$guide.special_outputs.key\'\n             #end if\n-            #if $guide.special_outputs.clustering:\n-                -c\n-                #if $guide.special_outputs.key:\n-                    -k \'$guide.special_outputs.key\'\n-                #end if\n-                --legend \'$legend\'\n-                > /dev/null\n-                &&\n-                sed -i.bak \'s/,/\\t/g\' \'$legend\'\n-                &&\n-                sed -i.bak \'s/\\r//g\' \'$legend\'\n-            #end if\n+            --legend \'$legend\'\n+            > /dev/null\n+            &&\n+            sed -i.bak -e "s/,/\\${TAB}/g" -e "s/\\${CR}//g" \'$legend\'\n+        #else\n+            > /dev/null\n+        #end if\n \n-        > /dev/null\n-\n-        &&\n-        sed -i.bak \'s/,/\\t/g\' \'$transcript_counts\'\n+        ## Replace commas with tabs\n         &&\n-        sed -i.bak \'s/\\r//g\' \'$transcript_counts\'\n+        sed -i.bak -e "s/,/\\${TAB}/g" -e "s/\\${CR}//g" gene_counts.csv transcript_counts.csv\n+        #if $guide.special_outputs.keep_header:\n+            &&\n+            head -n 1 gene_counts.csv | sed -e \'s/sample1/$escaped_element_identifier/\' > \'$gene_counts\'\n+            &&\n+            head -n 1 transcript_counts.csv | sed -e \'s/sample1/$escaped_element_identifier/\' > \'$transcript_counts\'\n+        #end if\n+        ## Sort count files on the first column\n         &&\n-        sed -i.bak \'s/,/\\t/g\' \'$gene_counts\'\n+        tail -n +2 gene_counts.csv | sort -t"\\${TAB}" -k1 >> \'$gene_counts\'\n         &&\n-        sed -i.bak \'s/\\r//g\' \'$gene_counts\'\n+        tail -n +2 transcript_counts.csv | sort -t"\\${TAB}" -k1 >> \'$transcript_counts\'\n     #end if\n #end if\n     ]]></command>\n@@ -141,7 +151,7 @@\n                     <when value="ballgown" />\n                     <when value="deseq2">\n                         <param name="read_length" argument="--length" type="integer" min="0" value="75" label="Specify the average read length" help="Default: 75" />\n-                        <param name="clustering" argument="--cluster" type="boolean" truevalue="--cluster" falsevalue="" checked="False" label="Cluster overlapping genes" help="Choose whether to cluster genes with different gene IDs that overlap. Transcripts containing the geneID prefix will be ignored. Default: No" />\n+                        <param name="clustering" argument="--cluster" type="boolean" truevalue="--cluster" falsevalue="" checked="false" label="Cluster overlapping genes" help="Choose whether to cluster genes with different gene IDs that ov'..b'aram>\n+                        <param name="keep_header" type="boolean" checked="true" label="Output header line?" help="Keep the header line for edgeR, remove it for DESeq2" />\n                     </when>\n                     <when value="no" />\n                 </conditional>\n-                 <param name="coverage_file" argument="-C" type="boolean" truevalue="-C" falsevalue="" checked="False" label="Output coverage file?" help="If StringTie is run with this option (requires -G), it returns a file with all the transcripts in the reference annotation that are fully covered, end to end, by reads. The output format is a GTF file as described below. Each line of the GTF is corresponds to a gene or transcript in the reference annotation. Default: No"/>\n+                <param name="coverage_file" argument="-C" type="boolean" truevalue="-C" falsevalue="" checked="False" label="Output coverage file?" help="If StringTie is run with this option (requires -G), it returns a file with all the transcripts in the reference annotation that are fully covered, end to end, by reads. The output format is a GTF file as described below. Each line of the GTF is corresponds to a gene or transcript in the reference annotation. Default: No"/>\n             </when>\n         </conditional>\n         <section name="adv" title="Advanced Options">\n@@ -260,7 +271,7 @@\n             <output name="output_gtf" file="stringtie_out5.gtf" ftype="gtf" lines_diff="2" />\n             <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" />\n         </test>\n-        <!--Ensure output for DESeq2/edgeR works -->\n+        <!--Ensure output for edgeR works -->\n         <test expect_num_outputs="5">\n             <param name="input_bam" ftype="bam" value="stringtie_in1.bam" />\n             <param name="use_guide" value="yes" />\n@@ -270,9 +281,26 @@\n             <param name="ref_hist" ftype="gtf" value="stringtie_in.gtf" />\n             <param name="coverage_file" value="True" />\n             <param name="clustering" value="True" />\n-            <output name="gene_counts" file="./deseq2/gene_counts.tsv" ftype="tabular" />\n-            <output name="transcript_counts" file="./deseq2/transcript_counts.tsv" ftype="tabular" />\n-            <output name="legend" file="./deseq2/legend.tsv" ftype="tabular" />\n+            <output name="gene_counts" file="gene_counts_edger.tsv" ftype="tabular" />\n+            <output name="transcript_counts" file="transcript_counts_edger.tsv" ftype="tabular" />\n+            <output name="legend" file="legend.tsv" ftype="tabular" />\n+            <output name="output_gtf" file="stringtie_out6.gtf" ftype="gtf" lines_diff="2" />\n+            <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" />\n+        </test>\n+        <!--Ensure output for DESeq2 works -->\n+        <test expect_num_outputs="5">\n+            <param name="input_bam" ftype="bam" value="stringtie_in1.bam" />\n+            <param name="use_guide" value="yes" />\n+            <param name="special_outputs_select" value="deseq2" />\n+            <param name="keep_header" value="False" />\n+            <param name="input_estimation" value="True" />\n+            <param name="guide_gff_select" value="history" />\n+            <param name="ref_hist" ftype="gtf" value="stringtie_in.gtf" />\n+            <param name="coverage_file" value="True" />\n+            <param name="clustering" value="True" />\n+            <output name="gene_counts" file="gene_counts_deseq2.tsv" ftype="tabular" />\n+            <output name="transcript_counts" file="transcript_counts_deseq2.tsv" ftype="tabular" />\n+            <output name="legend" file="legend.tsv" ftype="tabular" />\n             <output name="output_gtf" file="stringtie_out6.gtf" ftype="gtf" lines_diff="2" />\n             <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" />\n         </test>\n@@ -474,4 +502,4 @@\n \n     ]]></help>\n     <expand macro="citations" />\n-</tool>\n\\ No newline at end of file\n+</tool>\n'
b
diff -r 76d290331481 -r a305d75e13f2 test-data/deseq2/gene_counts.tsv
--- a/test-data/deseq2/gene_counts.tsv Thu Nov 09 11:17:32 2017 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-gene_id sample1
-CUFF.1 182
b
diff -r 76d290331481 -r a305d75e13f2 test-data/deseq2/transcript_counts.tsv
--- a/test-data/deseq2/transcript_counts.tsv Thu Nov 09 11:17:32 2017 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-transcript_id sample1
-CUFF.1.1 182
b
diff -r 76d290331481 -r a305d75e13f2 test-data/gene_counts_deseq2.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gene_counts_deseq2.tsv Thu Apr 12 17:30:07 2018 -0400
b
@@ -0,0 +1,1 @@
+CUFF.1 182
b
diff -r 76d290331481 -r a305d75e13f2 test-data/gene_counts_edger.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gene_counts_edger.tsv Thu Apr 12 17:30:07 2018 -0400
b
@@ -0,0 +1,2 @@
+gene_id stringtie_in1_bam
+CUFF.1 182
b
diff -r 76d290331481 -r a305d75e13f2 test-data/transcript_counts_deseq2.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcript_counts_deseq2.tsv Thu Apr 12 17:30:07 2018 -0400
b
@@ -0,0 +1,1 @@
+CUFF.1.1 182
b
diff -r 76d290331481 -r a305d75e13f2 test-data/transcript_counts_edger.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcript_counts_edger.tsv Thu Apr 12 17:30:07 2018 -0400
b
@@ -0,0 +1,2 @@
+transcript_id stringtie_in1_bam
+CUFF.1.1 182