comparison htseq-count.xml @ 10:5d969cb56112

Version 0.3 - paried-end sorting is now built-in (uses Picard tools)
author lparsons
date Fri, 07 Dec 2012 14:35:44 -0500
parents 971e20519fb8
children f320093f1e8e
comparison
equal deleted inserted replaced
9:971e20519fb8 10:5d969cb56112
1 <tool id="htseq_count" name="htseq-count" version="0.2.1"> 1 <tool id="htseq_count" name="htseq-count" version="0.3">
2 <description> - Count aligned reads in a BAM file that overlap features in a GFF file</description> 2 <description> - Count aligned reads in a BAM file that overlap features in a GFF file</description>
3 <version_command>htseq-count -h | grep version | sed 's/^\(.*\)*\(version .*\)\./\2/'</version_command> 3 <version_command>htseq-count -h | grep version | sed 's/^\(.*\)*\(version .*\)\./\2/'</version_command>
4 <requirements> 4 <requirements>
5 <requirement type="package" version="1.6.2">numpy</requirement> 5 <requirement type="package" version="1.6.2">numpy</requirement>
6 <requirement type="package" version="0.5.3p9">htseq</requirement> 6 <requirement type="package" version="0.5.3p9">htseq</requirement>
7 <requirement type="package" version="0.1.18">samtools</requirement> 7 <requirement type="package" version="0.1.18">samtools</requirement>
8 <requirement type="package" version="1.56.0">picard</requirement>
8 </requirements> 9 </requirements>
9 <command> 10 <command>
10 ##set up input files 11 ##set up input files
11 #set $reference_fasta_filename = "localref.fa" 12 #set $reference_fasta_filename = "localref.fa"
12 #if $samout_conditional.samout: 13 #if $samout_conditional.samout:
15 samtools faidx "${reference_fasta_filename}" 2&gt;&amp;1 || echo "Error running samtools faidx for htseq-count" &gt;&amp;2 &amp;&amp; 16 samtools faidx "${reference_fasta_filename}" 2&gt;&amp;1 || echo "Error running samtools faidx for htseq-count" &gt;&amp;2 &amp;&amp;
16 #else: 17 #else:
17 #set $reference_fasta_filename = str( $samout_conditional.reference_source.ref_file.fields.path ) 18 #set $reference_fasta_filename = str( $samout_conditional.reference_source.ref_file.fields.path )
18 #end if 19 #end if
19 #end if 20 #end if
20 21 #if str($singlepaired) == "paired":
21 #if $samfile.extension == "bam": 22 ln -s $samfile local_input.sam &amp;&amp;
22 samtools view $samfile | 23 java -Xmx2G -jar "\$JAVA_JAR_PATH/SortSam.jar" VALIDATION_STRINGENCY=LENIENT SORT_ORDER=queryname O=prepared_input.sam I=local_input.sam TMP_DIR="${__new_file_path__}"
24 || echo "Error running Picard MergeSamFiles" &gt;&amp;2 &amp;&amp;
25 #else:
26 #if $samfile.extension == "bam":
27 samtools view $samfile |
28 #else
29 ln -s $samfile prepared_input.sam &amp;&amp;
30 #end if
23 #end if 31 #end if
24 htseq-count 32 htseq-count
25 --mode=$mode 33 --mode=$mode
26 --stranded=$stranded 34 --stranded=$stranded
27 --minaqual=$minaqual 35 --minaqual=$minaqual
28 --type=$featuretype 36 --type=$featuretype
29 --idattr=$idattr 37 --idattr=$idattr
30 #if $samout_conditional.samout: 38 #if $samout_conditional.samout:
31 --samout=$__new_file_path__/${samoutfile.id}_tmp 39 --samout=$__new_file_path__/${samoutfile.id}_tmp
32 #end if 40 #end if
33 #if $samfile.extension == "bam": 41 #if str($singlepaired) == "paired":
34 - 42 prepared_input.sam
35 #else 43 #else:
36 $samfile 44 #if $samfile.extension == "bam":
37 #end if 45 -
46 #else:
47 prepared_input.sam
48 #end if
49 #end if
38 $gfffile 50 $gfffile
39 | awk '{if ($1 ~ "no_feature|ambiguous|too_low_aQual|not_aligned|alignment_not_unique") print $0 | "cat 1>&amp;2"; else print $0}' &gt; $counts 2&gt;$othercounts 51 | awk '{if ($1 ~ "no_feature|ambiguous|too_low_aQual|not_aligned|alignment_not_unique") print $0 | "cat 1>&amp;2"; else print $0}' &gt; $counts 2&gt;$othercounts
40 #if $samout_conditional.samout: 52 #if $samout_conditional.samout:
41 &amp;&amp; samtools view -Su -t ${reference_fasta_filename}.fai $__new_file_path__/${samoutfile.id}_tmp | samtools sort -o - sorted > $samoutfile 53 &amp;&amp; samtools view -Su -t ${reference_fasta_filename}.fai $__new_file_path__/${samoutfile.id}_tmp | samtools sort -o - sorted > $samoutfile
42 #end if</command> 54 #end if</command>
43 <inputs> 55 <inputs>
44 <param format="sam, bam" name="samfile" type="data" label="Aligned SAM/BAM File"> 56 <param format="sam, bam" name="samfile" type="data" label="Aligned SAM/BAM File"/>
45 <help>Paired-End data MUST be sorted by QUERY NAME, use "NGS: Picard - Paired Read Mate Fixer" to sort by QUERY NAME and output to SAM (not BAM) before using this tool on paired data.</help> 57 <param name="singlepaired" type="select" label="Is this library mate-paired?">
58 <help>Paired libraries will be sorted by read name prior to counting.</help>
59 <option value="single" selected="true">single-end</option>
60 <option value="paired">paired-end</option>
46 </param> 61 </param>
47 <param format="gff" name="gfffile" type="data" label="GFF File"/> 62 <param format="gff" name="gfffile" type="data" label="GFF File"/>
48 <param name="mode" type="select" label="Mode"> 63 <param name="mode" type="select" label="Mode">
49 <help>Mode to handle reads overlapping more than one feature.</help> 64 <help>Mode to handle reads overlapping more than one feature.</help>
50 <option value="union" selected="true">Union</option> 65 <option value="union" selected="true">Union</option>
91 </when> 106 </when>
92 </conditional> 107 </conditional>
93 </inputs> 108 </inputs>
94 109
95 <outputs> 110 <outputs>
96 <data format="tabular" name="counts" label="${tool.name} on ${on_string}"/> 111 <data format="tabular" name="counts" metadata_source="samfile" label="${tool.name} on ${on_string}"/>
97 <data format="tabular" name="othercounts" label="${tool.name} on ${on_string} (no feature)"/> 112 <data format="tabular" name="othercounts" metadata_source="samfile" label="${tool.name} on ${on_string} (no feature)"/>
98 <data format="bam" name="samoutfile" label="${tool.name} on ${on_string} (BAM)"> 113 <data format="bam" name="samoutfile" metadata_source="samfile" label="${tool.name} on ${on_string} (BAM)">
99 <filter>samout_conditional['samout']</filter> 114 <filter>samout_conditional['samout']</filter>
100 </data> 115 </data>
101 </outputs> 116 </outputs>
102 117
103 <stdio> 118 <stdio>
105 <regex match="htseq-count: command not found" source="stderr" level="fatal" description="The HTSeq python package is not properly installed, contact Galaxy administrators" /> 120 <regex match="htseq-count: command not found" source="stderr" level="fatal" description="The HTSeq python package is not properly installed, contact Galaxy administrators" />
106 <regex match="samtools: command not found" source="stderr" level="fatal" description="The samtools package is not properly installed, contact Galaxy administrators" /> 121 <regex match="samtools: command not found" source="stderr" level="fatal" description="The samtools package is not properly installed, contact Galaxy administrators" />
107 <regex match="Error: Feature (.+) does not contain a '(.+)' attribute" source="both" level="fatal" description="Error parsing the GFF file, at least one feature of the specified 'Feature type' does not have a value for the specified 'ID Attribute'" /> 122 <regex match="Error: Feature (.+) does not contain a '(.+)' attribute" source="both" level="fatal" description="Error parsing the GFF file, at least one feature of the specified 'Feature type' does not have a value for the specified 'ID Attribute'" />
108 <regex match="Error occured in line (\d+) of file" source="stderr" level="fatal" description="Unknown error parsing the GFF file" /> 123 <regex match="Error occured in line (\d+) of file" source="stderr" level="fatal" description="Unknown error parsing the GFF file" />
109 <regex match="Error" source="stderr" level="fatal" description="Unknown error occured" /> 124 <regex match="Error" source="stderr" level="fatal" description="Unknown error occured" />
125 <regex match="Warning: Read (.+) claims to have an aligned mate which could not be found. \(Is the SAM file properly sorted\?\)" source="stderr" level="warning" description="PAIRED DATA MISSING OR NOT PROPERLY SORTED. Try reruning and selecting the paired-end option. See stderr output of this dataset for more information." />
110 </stdio> 126 </stdio>
111 127
112 <tests> 128 <tests>
113 <test> 129 <test>
114 <param name="samfile" value="htseq-test.sam" /> 130 <param name="samfile" value="htseq-test.sam" />
121 <param name="samfile" value="htseq-test.bam" /> 137 <param name="samfile" value="htseq-test.bam" />
122 <param name="gfffile" value="htseq-test.gff" /> 138 <param name="gfffile" value="htseq-test.gff" />
123 <param name="samout" value="False" /> 139 <param name="samout" value="False" />
124 <output name="counts" file="htseq-test_counts.tsv" /> 140 <output name="counts" file="htseq-test_counts.tsv" />
125 <output name="othercounts" file="htseq-test_othercounts.tsv" /> 141 <output name="othercounts" file="htseq-test_othercounts.tsv" />
142 </test>
143 <test>
144 <param name="samfile" value="htseq-test-paired.bam" />
145 <param name="singlepaired" value="paired" />
146 <param name="gfffile" value="htseq-test.gff" />
147 <param name="samout" value="False" />
148 <output name="counts" file="htseq-test-paired_counts.tsv" />
149 <output name="othercounts" file="htseq-test-paired_othercounts.tsv" />
126 </test> 150 </test>
127 <!-- Seems to be an issue setting the $reference_fasta_filename variable during test 151 <!-- Seems to be an issue setting the $reference_fasta_filename variable during test
128 <test> 152 <test>
129 <param name="samfile" value="htseq-test.sam" /> 153 <param name="samfile" value="htseq-test.sam" />
130 <param name="gfffile" value="htseq-test.gff" /> 154 <param name="gfffile" value="htseq-test.gff" />