Mercurial > repos > iuc > stringtie
changeset 13:a305d75e13f2 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stringtie commit e811a7887db870f4f94f620f52bce656c8d5ba23
author | iuc |
---|---|
date | Thu, 12 Apr 2018 17:30:07 -0400 |
parents | 76d290331481 |
children | eafd5dc95228 |
files | stringtie.xml test-data/deseq2/gene_counts.tsv test-data/deseq2/legend.tsv test-data/deseq2/transcript_counts.tsv test-data/gene_counts_deseq2.tsv test-data/gene_counts_edger.tsv test-data/legend.tsv test-data/transcript_counts_deseq2.tsv test-data/transcript_counts_edger.tsv |
diffstat | 7 files changed, 67 insertions(+), 37 deletions(-) [+] |
line wrap: on
line diff
--- a/stringtie.xml Thu Nov 09 11:17:32 2017 -0500 +++ b/stringtie.xml Thu Apr 12 17:30:07 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="stringtie" name="StringTie" version="1.3.3.1"> +<tool id="stringtie" name="StringTie" version="1.3.3.2"> <description>transcript assembly and quantification</description> <macros> <import>macros.xml</import> @@ -7,6 +7,7 @@ <expand macro="stdio" /> <expand macro="version_command" /> <command><![CDATA[ +#import re mkdir -p ./special_de_output/sample1/ && ## Get Guide GTF/GFF if selected @@ -62,40 +63,49 @@ #if str($guide.use_guide) == 'yes': #if $guide.special_outputs.special_outputs_select == 'deseq2': + #set escaped_element_identifier = re.sub('[^\w\-]', '_', str($input_bam.element_identifier)) && ln -s '$output_gtf' ./special_de_output/sample1/output.gtf && + TAB=\$(printf '\t') + && + CR=\$(printf '\r') + && prepDE.py - -i ./special_de_output/ - -g '$gene_counts' - -t '$transcript_counts' - -l $guide.special_outputs.read_length - #if $guide.special_outputs.string: - -s '$guide.special_outputs.string' + -i ./special_de_output/ + -g gene_counts.csv + -t transcript_counts.csv + -l $guide.special_outputs.read_length + #if $guide.special_outputs.string: + -s '$guide.special_outputs.string' + #end if + #if $guide.special_outputs.clustering: + -c + #if $guide.special_outputs.key: + -k '$guide.special_outputs.key' #end if - #if $guide.special_outputs.clustering: - -c - #if $guide.special_outputs.key: - -k '$guide.special_outputs.key' - #end if - --legend '$legend' - > /dev/null - && - sed -i.bak 's/,/\t/g' '$legend' - && - sed -i.bak 's/\r//g' '$legend' - #end if + --legend '$legend' + > /dev/null + && + sed -i.bak -e "s/,/\${TAB}/g" -e "s/\${CR}//g" '$legend' + #else + > /dev/null + #end if - > /dev/null - - && - sed -i.bak 's/,/\t/g' '$transcript_counts' + ## Replace commas with tabs && - sed -i.bak 's/\r//g' '$transcript_counts' + sed -i.bak -e "s/,/\${TAB}/g" -e "s/\${CR}//g" gene_counts.csv transcript_counts.csv + #if $guide.special_outputs.keep_header: + && + head -n 1 gene_counts.csv | sed -e 's/sample1/$escaped_element_identifier/' > '$gene_counts' + && + head -n 1 transcript_counts.csv | sed -e 's/sample1/$escaped_element_identifier/' > '$transcript_counts' + #end if + ## Sort count files on the first column && - sed -i.bak 's/,/\t/g' '$gene_counts' + tail -n +2 gene_counts.csv | sort -t"\${TAB}" -k1 >> '$gene_counts' && - sed -i.bak 's/\r//g' '$gene_counts' + tail -n +2 transcript_counts.csv | sort -t"\${TAB}" -k1 >> '$transcript_counts' #end if #end if ]]></command> @@ -141,7 +151,7 @@ <when value="ballgown" /> <when value="deseq2"> <param name="read_length" argument="--length" type="integer" min="0" value="75" label="Specify the average read length" help="Default: 75" /> - <param name="clustering" argument="--cluster" type="boolean" truevalue="--cluster" falsevalue="" checked="False" label="Cluster overlapping genes" help="Choose whether to cluster genes with different gene IDs that overlap. Transcripts containing the geneID prefix will be ignored. Default: No" /> + <param name="clustering" argument="--cluster" type="boolean" truevalue="--cluster" falsevalue="" checked="false" label="Cluster overlapping genes" help="Choose whether to cluster genes with different gene IDs that overlap. Transcripts containing the geneID prefix will be ignored. Default: No" /> <param argument="--string" type="text" label="Prefix used for transcripts" help="If a different prefix was used for geneIDs assigned by StringTie than the default, specify it here. Only letters and numbers will be retained in this field. Default: MSTRG" > <sanitizer> <valid initial="string.letters,string.digits"></valid> @@ -152,10 +162,11 @@ <valid initial="string.letters,string.digits"></valid> </sanitizer> </param> + <param name="keep_header" type="boolean" checked="true" label="Output header line?" help="Keep the header line for edgeR, remove it for DESeq2" /> </when> <when value="no" /> </conditional> - <param name="coverage_file" argument="-C" type="boolean" truevalue="-C" falsevalue="" checked="False" label="Output coverage file?" help="If StringTie is run with this option (requires -G), it returns a file with all the transcripts in the reference annotation that are fully covered, end to end, by reads. The output format is a GTF file as described below. Each line of the GTF is corresponds to a gene or transcript in the reference annotation. Default: No"/> + <param name="coverage_file" argument="-C" type="boolean" truevalue="-C" falsevalue="" checked="False" label="Output coverage file?" help="If StringTie is run with this option (requires -G), it returns a file with all the transcripts in the reference annotation that are fully covered, end to end, by reads. The output format is a GTF file as described below. Each line of the GTF is corresponds to a gene or transcript in the reference annotation. Default: No"/> </when> </conditional> <section name="adv" title="Advanced Options"> @@ -260,7 +271,7 @@ <output name="output_gtf" file="stringtie_out5.gtf" ftype="gtf" lines_diff="2" /> <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" /> </test> - <!--Ensure output for DESeq2/edgeR works --> + <!--Ensure output for edgeR works --> <test expect_num_outputs="5"> <param name="input_bam" ftype="bam" value="stringtie_in1.bam" /> <param name="use_guide" value="yes" /> @@ -270,9 +281,26 @@ <param name="ref_hist" ftype="gtf" value="stringtie_in.gtf" /> <param name="coverage_file" value="True" /> <param name="clustering" value="True" /> - <output name="gene_counts" file="./deseq2/gene_counts.tsv" ftype="tabular" /> - <output name="transcript_counts" file="./deseq2/transcript_counts.tsv" ftype="tabular" /> - <output name="legend" file="./deseq2/legend.tsv" ftype="tabular" /> + <output name="gene_counts" file="gene_counts_edger.tsv" ftype="tabular" /> + <output name="transcript_counts" file="transcript_counts_edger.tsv" ftype="tabular" /> + <output name="legend" file="legend.tsv" ftype="tabular" /> + <output name="output_gtf" file="stringtie_out6.gtf" ftype="gtf" lines_diff="2" /> + <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" /> + </test> + <!--Ensure output for DESeq2 works --> + <test expect_num_outputs="5"> + <param name="input_bam" ftype="bam" value="stringtie_in1.bam" /> + <param name="use_guide" value="yes" /> + <param name="special_outputs_select" value="deseq2" /> + <param name="keep_header" value="False" /> + <param name="input_estimation" value="True" /> + <param name="guide_gff_select" value="history" /> + <param name="ref_hist" ftype="gtf" value="stringtie_in.gtf" /> + <param name="coverage_file" value="True" /> + <param name="clustering" value="True" /> + <output name="gene_counts" file="gene_counts_deseq2.tsv" ftype="tabular" /> + <output name="transcript_counts" file="transcript_counts_deseq2.tsv" ftype="tabular" /> + <output name="legend" file="legend.tsv" ftype="tabular" /> <output name="output_gtf" file="stringtie_out6.gtf" ftype="gtf" lines_diff="2" /> <output name="coverage" file="stringtie_out_coverage.gtf" ftype="gtf" /> </test> @@ -474,4 +502,4 @@ ]]></help> <expand macro="citations" /> -</tool> \ No newline at end of file +</tool>
--- a/test-data/deseq2/gene_counts.tsv Thu Nov 09 11:17:32 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -gene_id sample1 -CUFF.1 182
--- a/test-data/deseq2/transcript_counts.tsv Thu Nov 09 11:17:32 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -transcript_id sample1 -CUFF.1.1 182
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gene_counts_deseq2.tsv Thu Apr 12 17:30:07 2018 -0400 @@ -0,0 +1,1 @@ +CUFF.1 182
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gene_counts_edger.tsv Thu Apr 12 17:30:07 2018 -0400 @@ -0,0 +1,2 @@ +gene_id stringtie_in1_bam +CUFF.1 182