annotate rsem_calculate_expression.xml @ 2:5949673f9e3e

rename test data RSEM_ref_reference.rsem_ref
author Jim Johnson <jj@umn.edu>
date Fri, 07 Feb 2014 07:34:29 -0600
parents ca988deacfd1
children 59459de65740
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
1 <tool id="rsem_calculate_expression" name="RSEM calculate expression" version="1.1.17">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
2 <description>RNA-Seq by Expectation-Maximization</description>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
3 <requirements>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
4 <requirement type="package" version="1.1.17">rsem</requirement>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
5 <requirement type="package" version="0.1.19">samtools</requirement>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
6 <requirement type="package" version="1.0.0">bowtie</requirement>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
7 </requirements>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
8 <command>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
9 rsem-calculate-expression
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
10 ## --tag string
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
11 #if $seedlength:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
12 --seed-length $seedlength
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
13 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
14 --forward-prob $forward_prob
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
15 #if $rsem_options.fullparams == 'fullset':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
16 ## Fragment info
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
17 #if $rsem_options.fragment_length_mean:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
18 --fragment-length-mean $rsem_options.fragment_length_mean
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
19 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
20 #if $rsem_options.fragment_length_min:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
21 --fragment-length-min $rsem_options.fragment_length_min
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
22 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
23 #if $rsem_options.fragment_length_sd:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
24 --fragment-length-sd $rsem_options.fragment_length_sd
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
25 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
26 #if $rsem_options.fragment_length_max:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
27 --fragment-length-max $rsem_options.fragment_length_max
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
28 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
29 ## RSPD
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
30 #if $rsem_options.rspd.estimate == 'yes':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
31 --estimate-rspd
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
32 #if $rsem_options.rspd.num_rspd_bins:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
33 --num-rspd-bins $rsem_options.rspd.num_rspd_bins
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
34 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
35 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
36 ## Calculate 95% credibility intervals and posterior mean estimates.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
37 #if $rsem_options.useci.ci == 'yes':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
38 --calc-ci
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
39 #if $rsem_options.useci.cimem:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
40 --ci-memory $rsem_options.useci.cimem
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
41 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
42 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
43 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
44 ## --num-threads $GALAXY_SLOTS
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
45 #if $input.format != 'bam' and $input.bowtie_options.fullparams == 'fullset':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
46 ## Bowtie params
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
47 #if $bowtie_options.bowtie_e:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
48 --bowtie-e $bowtie_options.bowtie_e
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
49 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
50 #if $bowtie_options.bowtie_m:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
51 --bowtie-m $bowtie_options.bowtie_m
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
52 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
53 #if $bowtie_options.bowtie_n:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
54 --bowtie-n $bowtie_options.bowtie_n
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
55 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
56 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
57 ## Outputs
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
58 #if $rsem_outputs.result_bams == 'none':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
59 --no-bam-output
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
60 #else
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
61 #if $rsem_outputs.result_bams == 'both':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
62 --output-genome-bam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
63 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
64 $rsem_outputs.sampling_for_bam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
65 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
66 ## Input data
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
67 #if $input.format=="fastq"
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
68 $input.fastq_select
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
69 #if $input.fastq.matepair=="single":
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
70 $input.fastq.singlefastq
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
71 #elif $input.fastq.matepair=="paired":
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
72 --paired-end
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
73 $input.fastq.fastq1
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
74 $input.fastq.fastq2
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
75 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
76 #elif $input.format=="fasta"
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
77 --no-qualities
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
78 #if $input.fasta.matepair=="single":
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
79 $input.fasta.singlefasta
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
80 #elif $input.fasta.matepair=="paired":
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
81 --paired-end
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
82 $input.fasta.fasta1
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
83 $input.fasta.fasta2
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
84 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
85 #elif $input.format=="sam"
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
86 #if $input.matepair=="paired":
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
87 --paired-end
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
88 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
89 #if $input.rsem_sam._extension == 'sam':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
90 --sam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
91 #elif $input.rsem_sam._extension == 'bam':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
92 --bam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
93 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
94 $input.rsem_sam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
95 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
96 ## RSEM reference
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
97 #if $reference.refSrc == 'history':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
98 ${reference.rsem_ref.extra_files_path}/${reference.rsem_ref.metadata.reference_name}
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
99 #elif $reference.refSrc == 'cached':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
100 ${reference.index.fields.path}
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
101 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
102 ## sample_name: use a hard coded name so we can pull out galaxy outputs
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
103 rsem_output
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
104 ## direct output into logfile
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
105 > $log
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
106 </command>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
107 <macros>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
108 <macro name="rsem_options">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
109 <param name="seedlength" type="integer" value="25" optional="true" label="Seed length used by the read aligner" help="Providing the correct value for this parameter is important for RSEM's accuracy if the data are single-end reads. RSEM uses this value for Bowtie's seed length parameter. The minimum value is 25. (Default:25)">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
110 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
111 <param name="forward_prob" type="select" label="Is the library strand specific?">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
112 <option value="0.5" selected="true">No</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
113 <option value="1">Yes, the reads (or first reads from paired-end libraries) are only in the forward orientation</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
114 <option value="0">Yes, the reads (or first reads from paired-end libraries) are only in the reverse orientation</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
115 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
116 <conditional name="rsem_options">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
117 <param name="fullparams" type="select" label="Additional RSEM options">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
118 <option value="default">Use RSEM Defaults</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
119 <option value="fullset">Set Additional RSEM Options</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
120 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
121 <when value="default"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
122 <when value="fullset">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
123 <param name="fragment_length_min" type="integer" value="1" optional="true" label="Minimum read/insert length." help=" This is also the value for the bowtie -I option">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
124 <validator type="in_range" message="0 or greater" min="0" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
125 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
126 <param name="fragment_length_max" type="integer" value="1000" optional="true" label="Maximum read/insert length." help=" This is also the value for the bowtie -X option">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
127 <validator type="in_range" message="0 or greater" min="0" max="1000000"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
128 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
129 <param name="fragment_length_mean" type="float" value="" optional="true" label="Fragment length mean (single-end data only)" help="The mean of the fragment length distribution, which is assumed to be a Gaussian. (Default: -1, which disables use of the fragment length distribution)">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
130 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
131 <param name="fragment_length_sd" type="float" value="" optional="true" label="The standard deviation of the fragment length distribution (single-end data only)" help="Default 0, which assumes that all fragments are of the same length, given by the rounded value of fragment length mean. ">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
132 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
133 <conditional name="rspd">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
134 <param name="estimate" type="select" lanel="Read Start Position Distribution (RSPD)"
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
135 help="Set this option if you want to estimate the read start position distribution (RSPD) from data. Otherwise, RSEM will use a uniform RSPD.">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
136 <option value="no" selected="true">Use a uniform RSPD</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
137 <option value="yes">Estimate and correct for a non-uniform RSPD</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
138 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
139 <when value="no"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
140 <when value="yes">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
141 <param name="num_rspd_bins" type="integer" value="20" optional="true" label="Number of bins in the RSPD." help="Use of the default setting of 20 is recommended.">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
142 <validator type="in_range" message="" min="0" max="100"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
143 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
144 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
145 </conditional>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
146 <conditional name="useci">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
147 <param name="ci" type="select" label="Calculate 95% Credibility Intervals">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
148 <option value="no" selected="true">no</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
149 <option value="yes">yes</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
150 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
151 <when value="no"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
152 <when value="yes">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
153 <param name="cimem" size="4" type="text" value="1024" label="Amount of memory in (MB) for computing CI" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
154 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
155 </conditional>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
156 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
157 </conditional>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
158 </macro>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
159 <macro name="bowtie_options">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
160 <conditional name="bowtie_options">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
161 <param name="fullparams" type="select" label="bowtie settings">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
162 <option value="default">use bowtie defaults</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
163 <option value="fullset">set bowtie options</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
164 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
165 <when value="default"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
166 <when value="fullset">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
167 <param name="bowtie_n" type="integer" value="2" optional="true" label="Bowtie mismatches" help="Bowtie parameter max # of mismatches in the seed. (Range: 0-3, Default: 2) ">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
168 <validator type="in_range" message="max # of mismatches in the seed between 0 and 3" min="0" max="3"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
169 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
170 <param name="bowtie_e" type="integer" value="99999999" label="Maximum sum of quality scores at mismatched positions in read alignments. This is also the value for the Bowtie -e option">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
171 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
172 <param name="bowtie_m" type="integer" value="200" label="Discard alignments for reads with number of alignments greater than">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
173 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
174 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
175 </conditional>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
176 </macro>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
177 <macro name="sampling_for_bam">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
178 <param name="sampling_for_bam" type="boolean" truevalue="--sampling-for-bam" falsevalue="" checked="false" label="Use sampling for BAM">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
179 <help> When RSEM generates a BAM file, instead of outputing all alignments a read has with their posterior probabilities, one alignment is sampled according to the posterior probabilities. The sampling procedure includes the alignment to the "noise" transcript, which does not appear in the BAM file. Only the sampled alignment has a weight of 1. All other alignments have weight 0. If the "noise" transcript is sampled, all alignments appeared in the BAM file should have weight 0. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
180 </help>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
181 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
182 </macro>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
183 </macros>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
184
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
185 <inputs>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
186 <param name="sample" type="text" value="rsem_sample" label="Sample name" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
187 <conditional name="reference">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
188 <param name="refSrc" type="select" label="RSEM Reference Source">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
189 <option value="cached">Locally cached</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
190 <option value="history">From your history</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
191 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
192 <when value="cached">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
193 <param name="index" type="select" label="Select RSEM reference" help="Select from a list of pre-indexed references. If you don't see anything consult the wrapper's documentation on how to create or download a reference">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
194 <options from_data_table="rsem_indexes">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
195 <filter type="sort_by" column="2" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
196 <validator type="no_options" message="No indexes are available" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
197 </options>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
198 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
199 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
200 <when value="history">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
201 <param name="rsem_ref" type="data" format="rsem_ref" label="RSEM reference" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
202 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
203 </conditional>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
204 <conditional name="input">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
205 <param name="format" type="select" label="RSEM Input file type">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
206 <option value="fastq">FASTQ</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
207 <option value="fasta">FASTA</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
208 <option value="sam">SAM/BAM</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
209 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
210 <when value="fastq">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
211 <param name="fastq_select" size="15" type="select" label="FASTQ type" >
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
212 <option value="--phred33-quals" selected="true">phred33 qualities (default for sanger)</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
213 <option value="--solexa-quals">solexa qualities</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
214 <option value="--phred64-quals">phred64 qualities</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
215 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
216 <conditional name="fastq">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
217 <param name="matepair" type="select" label="Library type">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
218 <option value="single">Single End Reads</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
219 <option value="paired">Paired End Reads</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
220 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
221 <when value="single">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
222 <param name="singlefastq" type="data" format="fastq" label="FASTQ file" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
223 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
224 <when value="paired">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
225 <param name="fastq1" type="data" format="fastq" label="Read 1 fastq file" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
226 <param name="fastq2" type="data" format="fastq" label="Read 2 fastq file" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
227 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
228 </conditional>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
229 <expand macro="bowtie_options"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
230 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
231 <when value="fasta">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
232 <conditional name="fasta">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
233 <param name="matepair" type="select" label="Library Type">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
234 <option value="single">Single End Reads</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
235 <option value="paired">Paired End Reads</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
236 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
237 <when value="single">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
238 <param name="singlefasta" type="data" format="fasta" label="fasta file" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
239 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
240 <when value="paired">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
241 <param name="fasta1" type="data" format="fasta" label="Read 1 fasta file" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
242 <param name="fasta2" type="data" format="fasta" label="Read 2 fasta file" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
243 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
244 </conditional>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
245 <expand macro="bowtie_options"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
246 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
247 <when value="sam">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
248 <!-- convert-sam-for-rsem /ref/mouse_125 input.sam -o input_for_rsem.sam -->
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
249 <param name="matepair" type="select" label="Library Type">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
250 <option value="single">Single End Reads</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
251 <option value="paired">Paired End Reads</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
252 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
253 <param name="rsem_sam" type="data" format="rsem_sam" label="RSEM formatted SAM file" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
254 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
255 </conditional>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
256 <expand macro="rsem_options"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
257 <conditional name="rsem_outputs">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
258 <param name="result_bams" type="select" label="Create bam results files"
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
259 help="In addition to the transcript-coordinate-based BAM file output, also output a BAM file with the read alignments in genomic coordinates" >
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
260 <option value="none">No BAM results files</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
261 <option value="default" selected="true">Transcript BAM results file</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
262 <option value="both">Transcript and genome BAM results files</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
263 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
264 <when value="none"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
265 <when value="default">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
266 <expand macro="sampling_for_bam"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
267 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
268 <when value="both">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
269 <expand macro="sampling_for_bam"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
270 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
271 </conditional>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
272 </inputs>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
273 <stdio>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
274 <exit_code range="1:" level="fatal" description="Error Running RSEM" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
275 </stdio>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
276 <outputs>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
277 <data format="tabular" name="gene_abundances" label="${sample}.gene_abundances" from_work_dir="rsem_output.genes.results"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
278 <data format="tabular" name="isoform_abundances" label="${sample}.isoform_abundances" from_work_dir="rsem_output.isoforms.results"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
279 <data format="bam" name="transcript_bam" label="${sample}.transcript.bam" from_work_dir="rsem_output.transcript.bam" >
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
280 <filter>rsem_outputs['result_bams'] != "none"</filter>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
281 </data>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
282 <data format="bam" name="transcript_sorted_bam" label="${sample}.transcript.bam" from_work_dir="rsem_output.transcript.sorted.bam" >
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
283 <filter>rsem_outputs['result_bams'] != "none"</filter>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
284 </data>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
285 <data format="bam" name="genome_bam" label="${sample}.genome.bam" from_work_dir="rsem_output.genome.bam">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
286 <filter>rsem_outputs['result_bams'] == "both"</filter>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
287 </data>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
288 <data format="bam" name="genome_sorted_bam" label="${sample}.genome.sorted.bam" from_work_dir="rsem_output.genome.sorted.bam">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
289 <filter>rsem_outputs['result_bams'] == "both"</filter>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
290 </data>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
291 <data format="txt" name="log" label="${sample}.rsem_log"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
292 </outputs>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
293 <tests>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
294 <test>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
295 <param name="sample" value="rsem_sample"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
296 <param name="refSrc" value="history"/>
2
5949673f9e3e rename test data RSEM_ref_reference.rsem_ref
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
297 <param name="rsem_ref" value="RSEM_ref_reference.rsem_ref" ftype="rsem_ref"/>
0
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
298 <param name="format" value="fastq"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
299 <param name="matepair" value="single"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
300 <param name="singlefastq" value="test.fastq" ftype="fastqsanger"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
301 <param name="result_bams" value="none"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
302 <output name="gene_abundances">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
303 <assert_contents>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
304 <has_text text="ENST00000423562,ENST00000438504,ENST00000488147,ENST00000538476,ENST00000541675" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
305 </assert_contents>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
306 </output>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
307 <output name="isoform_abundances">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
308 <assert_contents>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
309 <has_text text="ENST00000332831" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
310 </assert_contents>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
311 </output>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
312 <output name="log">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
313 <assert_contents>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
314 <has_text text="Expression Results are written" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
315 </assert_contents>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
316 </output>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
317 </test>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
318 </tests>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
319 <help>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
320
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
321
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
322 RSEM HOME PAGE - http://deweylab.biostat.wisc.edu/rsem/
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
323
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
324 NAME
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
325 rsem-calculate-expression
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
326
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
327 SYNOPSIS
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
328 rsem-calculate-expression [options] upstream_read_file(s) reference_name sample_name
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
329 rsem-calculate-expression [options] --paired-end upstream_read_file/s downstream_read_file/s reference_name sample_name
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
330 rsem-calculate-expression [options] --sam/--bam [--paired-end] input reference_name sample_name
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
331
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
332 ARGUMENTS
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
333 upstream_read_files/s
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
334 Comma-separated list of files containing single-end reads or
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
335 upstream reads for paired-end data. By default, these files are
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
336 assumed to be in FASTQ format. If the --no-qualities option is
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
337 specified, then FASTA format is expected.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
338
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
339 downstream_read_file/s
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
340 Comma-separated list of files containing downstream reads which are
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
341 paired with the upstream reads. By default, these files are assumed
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
342 to be in FASTQ format. If the --no-qualities option is specified,
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
343 then FASTA format is expected.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
344
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
345 input
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
346 SAM/BAM formatted input file. If "-" is specified for the filename,
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
347 SAM/BAM input is instead assumed to come from standard input. RSEM
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
348 requires all alignments of the same read group together. For
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
349 paired-end reads, RSEM also requires the two mates of any alignment
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
350 be adjacent. See Description section for how to make input file obey
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
351 RSEM's requirements.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
352
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
353 reference_name
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
354 The name of the reference used. The user must have run
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
355 'rsem-prepare-reference' with this reference_name before running
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
356 this program.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
357
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
358 sample_name
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
359 The name of the sample analyzed. All output files are prefixed by
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
360 this name (e.g., sample_name.genes.results)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
361
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
362 OPTIONS
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
363
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
364 --paired-end
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
365 Input reads are paired-end reads. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
366
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
367 --no-qualities
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
368 Input reads do not contain quality scores. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
369
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
370 --strand-specific
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
371 The RNA-Seq protocol used to generate the reads is strand specific,
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
372 i.e., all (upstream) reads are derived from the forward strand. This
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
373 option is equivalent to --forward-prob=1.0. With this option set, if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
374 RSEM runs the Bowtie aligner, the '--norc' Bowtie option will be
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
375 used, which disables alignment to the reverse strand of transcripts.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
376 (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
377
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
378 --sam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
379 Input file is in SAM format. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
380
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
381 --bam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
382 Input file is in BAM format. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
383
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
384 --sam-header-info [file]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
385 RSEM reads header information from input by default. If this option
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
386 is on, header information is read from the specified file. For the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
387 format of the file, please see SAM official website. (Default: "")
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
388
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
389 -p/--num-threads [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
390 Number of threads to use. Both Bowtie and expression estimation will
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
391 use this many threads. (Default: 1)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
392
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
393 --no-bam-output
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
394 Do not output any BAM file. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
395
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
396 --output-genome-bam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
397 Generate a BAM file, 'sample_name.genome.bam', with alignments
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
398 mapped to genomic coordinates and annotated with their posterior
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
399 probabilities. In addition, RSEM will call samtools (included in
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
400 RSEM package) to sort and index the bam file.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
401 'sample_name.genome.sorted.bam' and
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
402 'sample_name.genome.sorted.bam.bai' will be generated. (Default:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
403 off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
404
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
405 --sampling-for-bam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
406 When RSEM generates a BAM file, instead of outputing all alignments
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
407 a read has with their posterior probabilities, one alignment is
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
408 sampled and outputed according to the posterior probabilities. If
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
409 the sampling result is that the read comes from the "noise"
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
410 transcript, nothing is outputed. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
411
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
412 --calc-ci
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
413 Calculate 95% credibility intervals and posterior mean estimates.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
414 (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
415
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
416 --seed-length [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
417 Seed length used by the read aligner. Providing the correct value is
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
418 important for RSEM. If RSEM runs Bowtie, it uses this value for
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
419 Bowtie's seed length parameter. Any read with its or at least one of
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
420 its mates' (for paired-end reads) length less than this value will
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
421 be ignored. If the references are not added poly(A) tails, the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
422 minimum allowed value is 5, otherwise, the minimum allowed value is
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
423 25. Note that this script will only check if the value less or equal than
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
424 5 and give a warning message if the value less than 25 but greter or equal than
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
425 5. (Default: 25)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
426
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
427 --tag [string]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
428 The name of the optional field used in the SAM input for identifying
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
429 a read with too many valid alignments. The field should have the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
430 format [tagName]:i:[value], where a [value] bigger than 0 indicates
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
431 a read with too many alignments. (Default: "")
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
432
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
433 --bowtie-path [path]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
434 The path to the bowtie executables. (Default: the path to the bowtie
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
435 executables is assumed to be in the user's PATH environment
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
436 variable)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
437
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
438 --bowtie-n [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
439 (Bowtie parameter) max # of mismatches in the seed. (Range: 0-3,
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
440 Default: 2)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
441
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
442 --bowtie-e [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
443 (Bowtie parameter) max sum of mismatch quality scores across the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
444 alignment. (Default: 99999999)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
445
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
446 --bowtie-m [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
447 (Bowtie parameter) suppress all alignments for a read if greater then [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
448 valid alignments exist. (Default: 200)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
449
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
450 --bowtie-chunkmbs [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
451 (Bowtie parameter) memory allocated for best first alignment
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
452 calculation (Default: 0 - use bowtie's default)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
453
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
454 --phred33-quals
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
455 Input quality scores are encoded as Phred+33. (Default: on)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
456
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
457 --phred64-quals
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
458 Input quality scores are encoded as Phred+64 (default for GA
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
459 Pipeline ver. less than 1.3). (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
460
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
461 --solexa-quals
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
462 Input quality scores are solexa encoded (from GA Pipeline ver. less
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
463 than 1.3). (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
464
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
465 --forward-prob [double]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
466 Probability of generating a read from the forward strand of a
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
467 transcript. Set to 1 for a strand-specific protocol where all
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
468 (upstream) reads are derived from the forward strand, 0 for a
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
469 strand-specific protocol where all (upstream) read are derived from
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
470 the reverse strand, or 0.5 for a non-strand-specific protocol.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
471 (Default: 0.5)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
472
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
473 --fragment-length-min [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
474 Minimum read/insert length allowed. This is also the value for the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
475 bowtie -I option. (Default: 1)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
476
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
477 --fragment-length-max [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
478 Maximum read/insert length allowed. This is also the value for the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
479 bowtie -X option. (Default: 1000)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
480
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
481 --fragment-length-mean [double]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
482 (single-end data only) The mean of the fragment length distribution,
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
483 which is assumed to be a Gaussian. (Default: -1, which disables use
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
484 of the fragment length distribution)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
485
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
486 --fragment-length-sd [double]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
487 (single-end data only) The standard deviation of the fragment length
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
488 distribution, which is assumed to be a Gaussian. (Default: 0, which
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
489 assumes that all fragments are of the same length, given by the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
490 rounded value of --fragment-length-mean)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
491
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
492 --estimate-rspd
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
493 Set this option if you want to estimate the read start position
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
494 distribution (RSPD) from data. Otherwise, RSEM will use a uniform
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
495 RSPD. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
496
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
497 --num-rspd-bins [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
498 Number of bins in the RSPD. Only relevant when '--estimate-rspd' is
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
499 specified. Use of the default setting is recommended. (Default: 20)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
500
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
501 --ci-memory [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
502 Maximum size (in memory, MB) of the auxiliary buffer used for
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
503 computing credibility intervals (CI). Set it larger for a faster CI
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
504 calculation. However, leaving 2 GB memory free for other usage is
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
505 recommended. (Default: 1024)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
506
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
507 --keep-intermediate-files
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
508 Keep temporary files generated by RSEM. RSEM creates a temporary
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
509 directory, 'sample_name.temp', into which it puts all intermediate
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
510 output files. If this directory already exists, RSEM overwrites all
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
511 files generated by previous RSEM runs inside of it. By default,
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
512 after RSEM finishes, the temporary directory is deleted. Set this
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
513 option to prevent the deletion of this directory and the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
514 intermediate files inside of it. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
515
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
516 --time
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
517 Output time consumed by each step of RSEM to 'sample_name.time'.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
518 (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
519
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
520 -q/--quiet
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
521 Suppress the output of logging information. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
522
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
523 -h/--help
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
524 Show help information.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
525
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
526 DESCRIPTION
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
527 In its default mode, this program aligns input reads against a reference
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
528 transcriptome with Bowtie and calculates expression values using the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
529 alignments. RSEM assumes the data are single-end reads with quality
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
530 scores, unless the '--paired-end' or '--no-qualities' options are
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
531 specified. Users may use an alternative aligner by specifying one of the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
532 --sam and --bam options, and providing an alignment file in the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
533 specified format. However, users should make sure that they align
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
534 against the indices generated by 'rsem-prepare-reference' and the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
535 alignment file satisfies the requirements mentioned in ARGUMENTS
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
536 section.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
537
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
538 One simple way to make the alignment file satisfying RSEM's requirements
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
539 (assuming the aligner used put mates in a paired-end read adjacent) is
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
540 to use 'convert-sam-for-rsem' script. This script only accept SAM format
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
541 files as input. If a BAM format file is obtained, please use samtools to
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
542 convert it to a SAM file first. For example, if '/ref/mouse_125' is the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
543 'reference_name' and the SAM file is named 'input.sam', you can run the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
544 following command:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
545
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
546 convert-sam-for-rsem /ref/mouse_125 input.sam -o input_for_rsem.sam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
547
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
548 For details, please refer to 'convert-sam-for-rsem's documentation page.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
549
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
550 The SAM/BAM format RSEM uses is v1.4. However, it is compatible with old
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
551 SAM/BAM format. However, RSEM cannot recognize 0x100 in the FLAG field.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
552 In addition, RSEM requires SEQ and QUAL are not '*'.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
553
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
554 The user must run 'rsem-prepare-reference' with the appropriate
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
555 reference before using this program.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
556
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
557 For single-end data, it is strongly recommended that the user provide
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
558 the fragment length distribution parameters (--fragment-length-mean and
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
559 --fragment-length-sd). For paired-end data, RSEM will automatically
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
560 learn a fragment length distribution from the data.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
561
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
562 Please note that some of the default values for the Bowtie parameters
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
563 are not the same as those defined for Bowtie itself.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
564
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
565 The temporary directory and all intermediate files will be removed when
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
566 RSEM finishes unless '--keep-intermediate-files' is specified.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
567
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
568 With the '--calc-ci' option, 95% credibility intervals and posterior
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
569 mean estimates will be calculated in addition to maximum likelihood
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
570 estimates.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
571
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
572 OUTPUT
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
573 sample_name.genes.results
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
574 File containing gene level expression estimates. The format of each
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
575 line in this file is:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
576
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
577 gene_id expected_counts tau_value [pmc_value tau_pme_value
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
578 tau_ci_lower_bound tau_ci_upper_bound] transcript_id_list
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
579
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
580 Fields are separated by the tab character. Fields within "[]" are
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
581 only presented if '--calc-ci' is set. pme stands for posterior mean
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
582 estimation. pmc stands for posterior mean counts. ci_lower_bound(l)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
583 means the lower bound of the credibility intervals,
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
584 ci_upper_bound(u) means the upper bound of the credibility
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
585 intervals. So the credibility interval is [l, u].
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
586 'transcript_id_list' is a space-separated list of transcript_ids
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
587 belonging to the gene. If no gene information is provided, this file
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
588 has the same content as 'sample_name.isoforms.results'.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
589
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
590 sample_name.isoforms.results
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
591 File containing isoform level expression values. The format of each
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
592 line in this file is:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
593
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
594 transcript_id expected_counts tau_value [pmc_value tau_pme_value
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
595 tau_ci_lower_bound tau_ci_upper_bound] gene_id
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
596
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
597 Fields are separated by the tab character. 'gene_id' is the gene_id
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
598 of the gene which this transcript belongs to. If no gene information
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
599 is provided, 'gene_id' and 'transcript_id' are the same.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
600
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
601 sample_name.transcript.bam, sample_name.transcript.sorted.bam and
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
602 sample_name.transcript.sorted.bam.bai
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
603 Only generated when --no-bam-output is not specified.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
604
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
605 'sample_name.transcript.bam' is a BAM-formatted file of read
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
606 alignments in transcript coordinates. The MAPQ field of each
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
607 alignment is set to min(100, floor(-10 * log10(1.0 - w) + 0.5)),
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
608 where w is the posterior probability of that alignment being the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
609 true mapping of a read. In addition, RSEM pads a new tag ZW:f:value,
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
610 where value is a single precision floating number representing the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
611 posterior probability.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
612
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
613 'sample_name.transcript.sorted.bam' and
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
614 'sample_name.transcript.sorted.bam.bai' are the sorted BAM file and
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
615 indices generated by samtools (included in RSEM package).
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
616
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
617 sample_name.genome.bam, sample_name.genome.sorted.bam and
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
618 sample_name.genome.sorted.bam.bai
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
619 Only generated when --no-bam-output is not specified and
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
620 --output-genome-bam is specified.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
621
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
622 'sample_name.genome.bam' is a BAM-formatted file of read alignments
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
623 in genomic coordinates. Alignments of reads that have identical
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
624 genomic coordinates (i.e., alignments to different isoforms that
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
625 share the same genomic region) are collapsed into one alignment. The
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
626 MAPQ field of each alignment is set to min(100, floor(-10 *
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
627 log10(1.0 - w) + 0.5)), where w is the posterior probability of that
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
628 alignment being the true mapping of a read. In addition, RSEM pads a
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
629 new tag ZW:f:value, where value is a single precision floating
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
630 number representing the posterior probability. If an alignment is
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
631 spliced, a XS:A:value tag is also added, where value is either '+'
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
632 or '-' indicating the strand of the transcript it aligns to.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
633
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
634 'sample_name.genome.sorted.bam' and
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
635 'sample_name.genome.sorted.bam.bai' are the sorted BAM file and
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
636 indices generated by samtools (included in RSEM package).
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
637
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
638 sample_name.sam.gz
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
639 Only generated when the input files are raw reads instead of SAM/BAM
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
640 format files
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
641
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
642 It is the gzipped SAM output produced by bowtie aligner.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
643
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
644 sample_name.time
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
645 Only generated when --time is specified.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
646
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
647 It contains time (in seconds) consumed by aligning reads, estimating
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
648 expression levels and calculating credibility intervals.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
649
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
650 sample_name.stat
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
651 This is a folder instead of a file. All model related statistics are
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
652 stored in this folder. Use 'rsem-plot-model' can generate plots
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
653 using this folder.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
654
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
655 EXAMPLES
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
656 Assume the path to the bowtie executables is in the user's PATH
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
657 environment variable. Reference files are under '/ref' with name
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
658 'mouse_125'.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
659
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
660 1) '/data/mmliver.fq', single-end reads with quality scores. Quality
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
661 scores are encoded as for 'GA pipeline version >= 1.3'. We want to use 8
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
662 threads and generate a genome BAM file:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
663
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
664 rsem-calculate-expression --phred64-quals \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
665 -p 8 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
666 --output-genome-bam \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
667 /data/mmliver.fq \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
668 /ref/mouse_125 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
669 mmliver_single_quals
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
670
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
671 2) '/data/mmliver_1.fq' and '/data/mmliver_2.fq', paired-end reads with
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
672 quality scores. Quality scores are in SANGER format. We want to use 8
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
673 threads and do not generate a genome BAM file:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
674
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
675 rsem-calculate-expression -p 8 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
676 --paired-end \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
677 /data/mmliver_1.fq \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
678 /data/mmliver_2.fq \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
679 /ref/mouse_125 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
680 mmliver_paired_end_quals
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
681
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
682 3) '/data/mmliver.fa', single-end reads without quality scores. We want
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
683 to use 8 threads:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
684
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
685 rsem-calculate-expression -p 8 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
686 --no-qualities \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
687 /data/mmliver.fa \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
688 /ref/mouse_125 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
689 mmliver_single_without_quals
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
690
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
691 4) Data are the same as 1). We want to take a fragment length
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
692 distribution into consideration. We set the fragment length mean to 150
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
693 and the standard deviation to 35. In addition to a BAM file, we also
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
694 want to generate credibility intervals. We allow RSEM to use 1GB of
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
695 memory for CI calculation:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
696
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
697 rsem-calculate-expression --bowtie-path /sw/bowtie \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
698 --phred64-quals \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
699 --fragment-length-mean 150.0 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
700 --fragment-length-sd 35.0 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
701 -p 8 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
702 --output-genome-bam \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
703 --calc-ci \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
704 --ci-memory 1024 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
705 /data/mmliver.fq \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
706 /ref/mouse_125 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
707 mmliver_single_quals
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
708
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
709 5) '/data/mmliver_paired_end_quals.bam', paired-end reads with quality
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
710 scores. We want to use 8 threads:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
711
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
712 rsem-calculate-expression --paired-end \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
713 --bam \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
714 -p 8 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
715 /data/mmliver_paired_end_quals.bam \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
716 /ref/mouse_125 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
717 mmliver_paired_end_quals
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
718 </help>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
719 </tool>