comparison cuffdiff_wrapper.xml @ 7:8da0eade1f59 draft

Uploaded
author devteam
date Fri, 19 Dec 2014 11:57:43 -0500
parents 604fa75232a2
children 88fcac97c2a0
comparison
equal deleted inserted replaced
6:604fa75232a2 7:8da0eade1f59
1 <tool id="cuffdiff" name="Cuffdiff" version="0.0.7"> 1 <tool id="cuffdiff" name="Cuffdiff" version="@VERSION@.0">
2 <!-- Wrapper supports Cuffdiff versions 2.1.0-2.1.1 -->
3 <description>find significant changes in transcript expression, splicing, and promoter use</description> 2 <description>find significant changes in transcript expression, splicing, and promoter use</description>
4 <requirements> 3 <expand macro="requirements" />
5 <requirement type="package" version="2.1.1">cufflinks</requirement> 4 <expand macro="stdio" />
6 </requirements> 5 <macros>
6 <import>cuff_macros.xml</import>
7 </macros>
7 <version_command>cuffdiff 2>&amp;1 | head -n 1</version_command> 8 <version_command>cuffdiff 2>&amp;1 | head -n 1</version_command>
8 <command> 9 <command>
9 cuffdiff 10 cuffdiff
10 --no-update-check 11 --no-update-check
11 --FDR=$fdr 12 --FDR=$fdr
12 --num-threads="\${GALAXY_SLOTS:-4}" 13 --num-threads="\${GALAXY_SLOTS:-4}"
13 --min-alignment-count=$min_alignment_count 14 --min-alignment-count=$min_alignment_count
14 --library-norm-method=$library_norm_method 15 --library-norm-method=$library_norm_method
15 --dispersion-method=$dispersion_method 16 --dispersion-method=$dispersion_method
16 17
17 ## Set advanced data parameters? 18 ## Set advanced SE data parameters?
18 #if $additional.sAdditional == "Yes": 19 #if $additional.sAdditional == "Yes":
19 -m $additional.frag_mean_len 20 -m $additional.frag_mean_len
20 -s $additional.frag_len_std_dev 21 -s $additional.frag_len_std_dev
21 #end if 22 #end if
22 23
23 ## Multi-read correct? 24 ## Multi-read correct?
24 #if str($multiread_correct) == "Yes": 25 #if $multiread_correct :
25 -u 26 -u
26 #end if 27 #end if
27 28
28 ## Bias correction? 29 ## Bias correction?
29 #if $bias_correction.do_bias_correction == "Yes": 30 #if $bias_correction.do_bias_correction == "Yes":
30 -b 31 -b
31 #if $bias_correction.seq_source.index_source == "history": 32 #if $bias_correction.seq_source.index_source == "history":
32 ## Custom genome from history. 33 ## Custom genome from history.
33 $bias_correction.seq_source.ref_file 34 $bias_correction.seq_source.ref_file
34 #else: 35 #else:
35 ## Built-in genome. 36 ## Built-in genome.
36 ${__get_data_table_entry__('fasta_indexes', 'value', $gtf_input.dbkey, 'path')} 37 "${ bias_correction.seq_source.index.fields.path }"
37 #end if 38 #end if
38 #end if 39 #end if
39 40
40 #set labels = '\'' + '\',\''.join( [ str( $condition.name ) for $condition in $conditions ] ) + '\'' 41 @CONDITION_LABELS@
41 --labels $labels 42
42 43 $length_correction
44
45 ## Set advanced parameters for cufflinks
46 #if $advanced_settings.sAdvanced == "Yes":
47 #if str($advanced_settings.library_type) != 'auto':
48 --library-type=$advanced_settings.library_type
49 #end if
50 #if $advanced_settings.mask_file:
51 --mask-file=$advanced_settings.mask_file
52 #end if
53 #if $advanced_settings.time_series:
54 --time-series
55 #end if
56 --max-mle-iterations=$advanced_settings.max_mle_iterations
57 $advanced_settings.hits_norm
58 --max-bundle-frags=$advanced_settings.max_bundle_frags
59 --num-frag-count-draws=$advanced_settings.num_frag_count_draws
60 --num-frag-assign-draws=$advanced_settings.num_frag_assign_draws
61 --min-reps-for-js-test=$advanced_settings.min_reps_for_js_test
62 #end if
43 ## Inputs. 63 ## Inputs.
44 $gtf_input 64 $gtf_input
45 #for $condition in $conditions: 65
46 #set samples = ','.join( [ str( $sample.sample ) for $sample in $condition.samples ] ) 66 @CONDITION_SAMPLES@
47 $samples
48 #end for
49 </command> 67 </command>
50 <inputs> 68 <inputs>
51 <param format="gtf,gff3" name="gtf_input" type="data" label="Transcripts" help="A transcript GFF3 or GTF file produced by cufflinks, cuffcompare, or other source."/> 69 <param format="gtf,gff3" name="gtf_input" type="data" label="Transcripts" help="A transcript GFF3 or GTF file produced by cufflinks, cuffcompare, or other source."/>
52 70 <expand macro="condition_inputs" />
53 <repeat name="conditions" title="Condition" min="2">
54 <param name="name" title="Condition name" type="text" label="Name"/>
55 <repeat name="samples" title="Replicate" min="1">
56 <param name="sample" label="Add replicate" type="data" format="sam,bam"/>
57 </repeat>
58 </repeat>
59
60 <param name="library_norm_method" type="select" label="Library normalization method"> 71 <param name="library_norm_method" type="select" label="Library normalization method">
61 <option value="geometric" selected="True">geometric</option> 72 <option value="geometric" selected="True">geometric</option>
62 <option value="classic-fpkm">classic-fpkm</option> 73 <option value="classic-fpkm">classic-fpkm</option>
63 <option value="quartile">quartile</option> 74 <option value="quartile">quartile</option>
64 </param> 75 </param>
65 76
66 <param name="dispersion_method" type="select" label="Dispersion estimation method" help="If using only one sample per condition, you must use 'blind.'"> 77 <param name="dispersion_method" type="select" label="Dispersion estimation method" help="If using only one sample per condition, you must use 'blind.'">
67 <option value="pooled" selected="True">pooled</option> 78 <option value="pooled" selected="True">pooled</option>
68 <option value="per-condition">per-condition</option> 79 <option value="per-condition">per-condition</option>
69 <option value="blind">blind</option> 80 <option value="blind">blind</option>
81 <option value="poisson">poisson</option>
70 </param> 82 </param>
71 83
72 <param name="fdr" type="float" value="0.05" label="False Discovery Rate" help="The allowed false discovery rate."/> 84 <param name="fdr" type="float" value="0.05" label="False Discovery Rate" help="The allowed false discovery rate."/>
73 85 <param name="min_alignment_count" type="integer" value="10" label="Min Alignment Count"
74 <param name="min_alignment_count" type="integer" value="10" label="Min Alignment Count" help="The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples."/> 86 help="The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples."/>
75 87 <param name="multiread_correct" type="boolean" label="Use multi-read correct"
76 <param name="multiread_correct" type="select" label="Use multi-read correct" help="Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome."> 88 help="Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome." />
77 <option value="No" selected="true">No</option>
78 <option value="Yes">Yes</option>
79 </param>
80 89
81 <conditional name="bias_correction"> 90 <conditional name="bias_correction">
82 <param name="do_bias_correction" type="select" label="Perform Bias Correction" help="Bias detection and correction can significantly improve accuracy of transcript abundance estimates."> 91 <param name="do_bias_correction" type="select" label="Perform Bias Correction"
92 help="Bias detection and correction can significantly improve accuracy of transcript abundance estimates.">
83 <option value="No">No</option> 93 <option value="No">No</option>
84 <option value="Yes">Yes</option> 94 <option value="Yes">Yes</option>
85 </param> 95 </param>
86 <when value="Yes"> 96 <when value="Yes">
87 <conditional name="seq_source"> 97 <conditional name="seq_source">
107 117
108 <param name="include_read_group_files" type="select" label="Include Read Group Datasets" help="Read group datasets provide information on replicates."> 118 <param name="include_read_group_files" type="select" label="Include Read Group Datasets" help="Read group datasets provide information on replicates.">
109 <option value="No" selected="true">No</option> 119 <option value="No" selected="true">No</option>
110 <option value="Yes">Yes</option> 120 <option value="Yes">Yes</option>
111 </param> 121 </param>
122
123 <param name="include_count_files" type="select" label="Include Count Based output files"
124 help="Cuffdiff estimates the number of fragments that originated from each transcript, primary transcript, and gene in each sample. Primary transcript and gene counts are computed by summing the counts of transcripts in each primary transcript group or gene group.">
125 <option value="No" selected="true">No</option>
126 <option value="Yes">Yes</option>
127 </param>
128
129 <param name="length_correction" type="select" label="apply length correction" help="mode of length normalization to transcript fpkm.">
130 <option value="" selected="true">cufflinks effective length correction</option>
131 <option value="--no-effective-length-correction">standard length correction</option>
132 <option value="--no-length-correction">no length correction at all (use raw counts)</option>
133 </param>
112 134
113 <conditional name="additional"> 135 <conditional name="additional">
114 <param name="sAdditional" type="select" label="Set Additional Parameters? (not recommended for paired-end reads)"> 136 <param name="sAdditional" type="select" label="Set Additional Parameters for single end reads? (not recommended for paired-end reads)">
115 <option value="No">No</option> 137 <option value="No" selected="True">No</option>
116 <option value="Yes">Yes</option> 138 <option value="Yes">Yes</option>
117 </param> 139 </param>
118 <when value="No"></when> 140 <when value="No"></when>
119 <when value="Yes"> 141 <when value="Yes">
120 <param name="frag_mean_len" type="integer" value="200" label="Average Fragment Length"/> 142 <param name="frag_mean_len" type="integer" value="200" label="Average Fragment Length"/>
121 <param name="frag_len_std_dev" type="integer" value="80" label="Fragment Length Standard Deviation"/> 143 <param name="frag_len_std_dev" type="integer" value="80" label="Fragment Length Standard Deviation"/>
122 </when> 144 </when>
123 </conditional> 145 </conditional>
146
147 <conditional name="advanced_settings">
148 <param name="sAdvanced" type="select" label="Set Advanced Cuffdiff parameters? ">
149 <option value="No" selected="True">No</option>
150 <option value="Yes">Yes</option>
151 </param>
152 <when value="No"></when>
153 <when value="Yes">
154 <param type="select" name="library_type" label="Library prep used for input reads" help="">
155 <option value="auto" selected="True">Auto Detect</option>
156 <option value="ff-firststrand">ff-firststrand</option>
157 <option value="ff-secondstrand">ff-secondstrand</option>
158 <option value="ff-unstranded">ff-unstranded</option>
159 <option value="fr-firststrand">fr-firststrand</option>
160 <option value="fr-secondstrand">fr-secondstrand</option>
161 <option value="fr-unstranded" >fr-unstranded</option>
162 <option value="transfrags">transfrags</option>
163 </param>
164 <param name="mask_file" type="data" format="gtf,gff3" label="Mask File" help="Ignore all alignment within transcripts in this file" optional="True" />
165 <param name="time_series" type="boolean" label="Perform Time Series analysis"
166 help="Instructs Cuffdiff to analyze the provided samples as a time series, rather than testing for differences between all pairs of samples. Samples should be provided in increasing time order at the command line (e.g first time point SAM, second timepoint SAM, etc.)" />
167 <param name="max_mle_iterations" value="5000" type="integer" label="Max MLE iterations" help="Maximum iterations allowed for Maximal Likelyhood Estimation calculations" />
168 <param name="hits_norm" type="select" label="Hits included in normalization" help="All Hits: With this option, Cufflinks counts all fragments, including those not compatible with any reference transcript, towards the number of mapped fragments used in the FPKM denominator. Compatible Hits: With this option, Cufflinks counts only those fragments compatible with some reference transcript towards the number of mapped fragments used in the FPKM denominator. Using this mode is generally recommended in Cuffdiff to reduce certain types of bias caused by differential amounts of ribosomal reads which can create the impression of falsely differentially expressed genes. It is active by default." >
169 <option value="--compatible-hits-norm" selected="True">Compatible Hits</option>
170 <option value="--total-hits-norm">All Hits</option>
171 </param>
172 <param name="max_bundle_frags" type="integer" value="500000" label="Maximum number of fragments per locus"
173 help="Sets the maximum number of fragments a locus may have before being skipped. Skipped loci are listed in skipped.gtf. Default: 500,000" />
174 <param name="num_frag_count_draws" type="integer" value="100" label="Number of fragment generation samples"
175 help="Cuffdiff will make this many draws from each transcript's predicted negative binomial random numbder generator. Each draw is a number of fragments that will be probabilistically assigned to the transcripts in the transcriptome. Used to estimate the variance-covariance matrix on assigned fragment counts. Default: 100."/>
176 <param name="num_frag_assign_draws" type="integer" value="50" label="Number of fragment assignment samples per generation" help="For each fragment drawn from a transcript, Cuffdiff will assign it this many times (probabilistically), thus estimating the assignment uncertainty for each transcript. Used to estimate the variance-covariance matrix on assigned fragment counts. Default: 50."/>
177 <param name="min_reps_for_js_test" type="integer" value="3" label="Minimal Replicates for isoform shift testing" help="Cuffdiff won't test genes for differential regulation unless the conditions in question have at least this many replicates. Default: 3." />
178 </when>
179 </conditional>
124 </inputs> 180 </inputs>
125
126 <stdio>
127 <regex match="Error" source="both" level="fatal" description="Error"/>
128 <regex match=".*" source="both" level="log" description="tool progress"/>
129 </stdio>
130
131 <outputs> 181 <outputs>
132 <!-- Optional read group datasets. --> 182 <!-- Optional read group datasets. -->
133 <data format="tabular" name="isoforms_read_group" label="${tool.name} on ${on_string}: isoforms read group tracking" from_work_dir="isoforms.read_group_tracking" > 183 <data format="tabular" name="isoforms_read_group" label="${tool.name} on ${on_string}: isoforms read group tracking" from_work_dir="isoforms.read_group_tracking" >
134 <filter>(params['include_read_group_files'] == 'Yes'</filter> 184 <filter>(include_read_group_files == 'Yes')</filter>
135 </data> 185 </data>
136 <data format="tabular" name="genes_read_group" label="${tool.name} on ${on_string}: genes read group tracking" from_work_dir="genes.read_group_tracking" > 186 <data format="tabular" name="genes_read_group" label="${tool.name} on ${on_string}: genes read group tracking" from_work_dir="genes.read_group_tracking" >
137 <filter>(params['include_read_group_files'] == 'Yes'</filter> 187 <filter>(include_read_group_files == 'Yes')</filter>
138 </data> 188 </data>
139 <data format="tabular" name="cds_read_group" label="${tool.name} on ${on_string}: CDs read group tracking" from_work_dir="cds.read_group_tracking" > 189 <data format="tabular" name="cds_read_group" label="${tool.name} on ${on_string}: CDs read group tracking" from_work_dir="cds.read_group_tracking" >
140 <filter>(params['include_read_group_files'] == 'Yes'</filter> 190 <filter>(include_read_group_files == 'Yes')</filter>
141 </data> 191 </data>
142 <data format="tabular" name="tss_groups_read_group" label="${tool.name} on ${on_string}: TSS groups read group tracking" from_work_dir="tss_groups.read_group_tracking" > 192 <data format="tabular" name="tss_groups_read_group" label="${tool.name} on ${on_string}: TSS groups read group tracking" from_work_dir="tss_groups.read_group_tracking" >
143 <filter>(params['include_read_group_files'] == 'Yes'</filter> 193 <filter>(include_read_group_files == 'Yes')</filter>
144 </data> 194 </data>
145 195 <data format="text" name="read_group_info" label="${tool.name} on ${on_string}: read group info" from_work_dir="read_groups.info" >
196 <filter>(include_read_group_files == 'Yes')</filter>
197 </data>
198 <data format="text" name="run_info" label="${tool.name} on ${on_string}: run info" from_work_dir="run.info" >
199 <filter>(include_read_group_files == 'Yes')</filter>
200 </data>
146 <!-- Standard datasets. --> 201 <!-- Standard datasets. -->
147 <data format="tabular" name="splicing_diff" label="${tool.name} on ${on_string}: splicing differential expression testing" from_work_dir="splicing.diff" /> 202 <data format="tabular" name="splicing_diff" label="${tool.name} on ${on_string}: splicing differential expression testing" from_work_dir="splicing.diff" />
148 <data format="tabular" name="promoters_diff" label="${tool.name} on ${on_string}: promoters differential expression testing" from_work_dir="promoters.diff" /> 203 <data format="tabular" name="promoters_diff" label="${tool.name} on ${on_string}: promoters differential expression testing" from_work_dir="promoters.diff" />
149 <data format="tabular" name="cds_diff" label="${tool.name} on ${on_string}: CDS overloading diffential expression testing" from_work_dir="cds.diff" /> 204 <data format="tabular" name="cds_diff" label="${tool.name} on ${on_string}: CDS overloading diffential expression testing" from_work_dir="cds.diff" />
150 <data format="tabular" name="cds_exp_fpkm_tracking" label="${tool.name} on ${on_string}: CDS FPKM differential expression testing" from_work_dir="cds_exp.diff" /> 205 <data format="tabular" name="cds_exp_fpkm_tracking" label="${tool.name} on ${on_string}: CDS FPKM differential expression testing" from_work_dir="cds_exp.diff" />
151 <data format="tabular" name="cds_fpkm_tracking" label="${tool.name} on ${on_string}: CDS FPKM tracking" from_work_dir="cds.fpkm_tracking" /> 206 <data format="tabular" name="cds_fpkm_tracking" label="${tool.name} on ${on_string}: CDS FPKM tracking" from_work_dir="cds.fpkm_tracking" />
207 <data format="tabular" name="cds_count_tracking" label="${tool.name} on ${on_string}: CDS count tracking" from_work_dir="cds.count_tracking" >
208 <filter>(include_count_files == 'Yes')</filter>
209 </data>
210
152 <data format="tabular" name="tss_groups_exp" label="${tool.name} on ${on_string}: TSS groups differential expression testing" from_work_dir="tss_group_exp.diff" /> 211 <data format="tabular" name="tss_groups_exp" label="${tool.name} on ${on_string}: TSS groups differential expression testing" from_work_dir="tss_group_exp.diff" />
153 <data format="tabular" name="tss_groups_fpkm_tracking" label="${tool.name} on ${on_string}: TSS groups FPKM tracking" from_work_dir="tss_groups.fpkm_tracking" /> 212 <data format="tabular" name="tss_groups_fpkm_tracking" label="${tool.name} on ${on_string}: TSS groups FPKM tracking" from_work_dir="tss_groups.fpkm_tracking" />
213 <data format="tabular" name="tss_groups_count_tracking" label="${tool.name} on ${on_string}: TSS count FPKM tracking" from_work_dir="tss_groups.count_tracking" >
214 <filter>(include_count_files == 'Yes')</filter>
215 </data>
216
154 <data format="tabular" name="genes_exp" label="${tool.name} on ${on_string}: gene differential expression testing" from_work_dir="gene_exp.diff" /> 217 <data format="tabular" name="genes_exp" label="${tool.name} on ${on_string}: gene differential expression testing" from_work_dir="gene_exp.diff" />
155 <data format="tabular" name="genes_fpkm_tracking" label="${tool.name} on ${on_string}: gene FPKM tracking" from_work_dir="genes.fpkm_tracking" /> 218 <data format="tabular" name="genes_fpkm_tracking" label="${tool.name} on ${on_string}: gene FPKM tracking" from_work_dir="genes.fpkm_tracking" />
219 <data format="tabular" name="genes_count_tracking" label="${tool.name} on ${on_string}: gene count tracking" from_work_dir="genes.count_tracking" >
220 <filter>(include_count_files == 'Yes')</filter>
221 </data>
222
156 <data format="tabular" name="isoforms_exp" label="${tool.name} on ${on_string}: transcript differential expression testing" from_work_dir="isoform_exp.diff" /> 223 <data format="tabular" name="isoforms_exp" label="${tool.name} on ${on_string}: transcript differential expression testing" from_work_dir="isoform_exp.diff" />
157 <data format="tabular" name="isoforms_fpkm_tracking" label="${tool.name} on ${on_string}: transcript FPKM tracking" from_work_dir="isoforms.fpkm_tracking" /> 224 <data format="tabular" name="isoforms_fpkm_tracking" label="${tool.name} on ${on_string}: transcript FPKM tracking" from_work_dir="isoforms.fpkm_tracking" />
225 <data format="tabular" name="isoforms_count_tracking" label="${tool.name} on ${on_string}: transcript count tracking" from_work_dir="isoforms.count_tracking" >
226 <filter>(include_count_files == 'Yes')</filter>
227 </data>
158 </outputs> 228 </outputs>
159 229
160 <tests> 230 <tests>
161 <test> 231 <test>
162 <!-- 232 <!--
163 cuffdiff cuffcompare_out5.gtf cuffdiff_in1.sam cuffdiff_in2.sam 233 cuffdiff cuffcompare_out5.gtf cuffdiff_in1.sam cuffdiff_in2.sam
164 --> 234 -->
165 <!--
166 NOTE: as of version 0.0.6 of the wrapper, tests cannot be run because multiple inputs to a repeat
167 element are not supported.
168 <param name="gtf_input" value="cuffcompare_out5.gtf" ftype="gtf" /> 235 <param name="gtf_input" value="cuffcompare_out5.gtf" ftype="gtf" />
169 <param name="do_groups" value="No" /> 236 <conditional name="in_type">
170 <param name="aligned_reads1" value="cuffdiff_in1.sam" ftype="sam" /> 237 <param name="set_in_type" value="BAM" />
171 <param name="aligned_reads2" value="cuffdiff_in2.sam" ftype="sam" /> 238 <repeat name="conditions">
239 <param name="name" value="q1" />
240 <repeat name="samples">
241 <param name="sample" value="cuffdiff_in1.sam" ftype="sam" />
242 </repeat>
243 </repeat>
244 <repeat name="conditions">
245 <param name="name" value="q2" />
246 <repeat name="samples">
247 <param name="sample" value="cuffdiff_in2.sam" ftype="sam" />
248 </repeat>
249 </repeat>
250 </conditional>
172 <param name="fdr" value="0.05" /> 251 <param name="fdr" value="0.05" />
173 <param name="min_alignment_count" value="0" /> 252 <param name="min_alignment_count" value="0" />
174 <param name="do_bias_correction" value="No" /> 253 <param name="do_bias_correction" value="No" />
175 <param name="do_normalization" value="No" /> 254 <param name="do_normalization" value="No" />
176 <param name="multiread_correct" value="No"/> 255 <param name="multiread_correct" value="No"/>
184 <output name="tss_groups_fpkm_tracking" file="cuffdiff_out7.txt"/> 263 <output name="tss_groups_fpkm_tracking" file="cuffdiff_out7.txt"/>
185 <output name="genes_exp" file="cuffdiff_out2.txt" lines_diff="200"/> 264 <output name="genes_exp" file="cuffdiff_out2.txt" lines_diff="200"/>
186 <output name="genes_fpkm_tracking" file="cuffdiff_out6.txt" lines_diff="200"/> 265 <output name="genes_fpkm_tracking" file="cuffdiff_out6.txt" lines_diff="200"/>
187 <output name="isoforms_exp" file="cuffdiff_out1.txt" lines_diff="200"/> 266 <output name="isoforms_exp" file="cuffdiff_out1.txt" lines_diff="200"/>
188 <output name="isoforms_fpkm_tracking" file="cuffdiff_out5.txt" lines_diff="200"/> 267 <output name="isoforms_fpkm_tracking" file="cuffdiff_out5.txt" lines_diff="200"/>
189 -->
190 </test> 268 </test>
191 </tests> 269 </tests>
192
193 <help> 270 <help>
194 **Cuffdiff Overview** 271 **Cuffdiff Overview**
195 272
196 Cuffdiff is part of Cufflinks_. Cuffdiff find significant changes in transcript expression, splicing, and promoter use. Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621 273 Cuffdiff is part of Cufflinks_. Cuffdiff find significant changes in transcript expression, splicing, and promoter use. Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621
197 274
198 .. _Cufflinks: http://cufflinks.cbcb.umd.edu/ 275 .. _Cufflinks: http://cole-trapnell-lab.github.io/cufflinks/
199 276
200 ------ 277 ------
201 278
202 **Know what you are doing** 279 **Know what you are doing**
203 280
204 .. class:: warningmark 281 .. class:: warningmark
205 282
206 There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. 283 There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
207 284
208 .. __: http://cufflinks.cbcb.umd.edu/manual.html#cuffdiff 285 .. __: http://cole-trapnell-lab.github.io/cufflinks/cuffdiff/
209 286
210 ------ 287 ------
211 288
212 **Input format** 289 **Input format**
213 290
217 294
218 **Outputs** 295 **Outputs**
219 296
220 Cuffdiff produces many output files: 297 Cuffdiff produces many output files:
221 298
222 1. Transcript FPKM expression tracking. 299 1. Transcript FPKM (+count) expression tracking.
223 2. Gene FPKM expression tracking; tracks the summed FPKM of transcripts sharing each gene_id 300 2. Gene FPKM (+count) expression tracking; tracks the summed FPKM of transcripts sharing each gene_id
224 3. Primary transcript FPKM tracking; tracks the summed FPKM of transcripts sharing each tss_id 301 3. Primary transcript FPKM (+count) tracking; tracks the summed FPKM of transcripts sharing each tss_id
225 4. Coding sequence FPKM tracking; tracks the summed FPKM of transcripts sharing each p_id, independent of tss_id 302 4. Coding sequence FPKM (+count) tracking; tracks the summed FPKM of transcripts sharing each p_id, independent of tss_id
226 5. Transcript differential FPKM. 303 5. Transcript differential FPKM.
227 6. Gene differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each gene_id 304 6. Gene differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each gene_id
228 7. Primary transcript differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each tss_id 305 7. Primary transcript differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each tss_id
229 8. Coding sequence differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each p_id independent of tss_id 306 8. Coding sequence differential FPKM. Tests difference sin the summed FPKM of transcripts sharing each p_id independent of tss_id
230 9. Differential splicing tests: this tab delimited file lists, for each primary transcript, the amount of overloading detected among its isoforms, i.e. how much differential splicing exists between isoforms processed from a single primary transcript. Only primary transcripts from which two or more isoforms are spliced are listed in this file. 307 9. Differential splicing tests: this tab delimited file lists, for each primary transcript, the amount of overloading detected among its isoforms, i.e. how much differential splicing exists between isoforms processed from a single primary transcript. Only primary transcripts from which two or more isoforms are spliced are listed in this file.
241 318
242 **Cuffdiff parameter list** 319 **Cuffdiff parameter list**
243 320
244 This is a list of implemented Cuffdiff options:: 321 This is a list of implemented Cuffdiff options::
245 322
246 -m INT Average fragement length; default 200 323 -m INT Average fragment length (SE reads); default 200
247 -s INT Fragment legnth standard deviation; default 80 324 -s INT Fragment legnth standard deviation (SE reads); default 80
248 -c INT The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples. If no testing is performed, changes in the locus are deemed not significant, and the locus' observed changes don't contribute to correction for multiple testing. The default is 1,000 fragment alignments (up to 2,000 paired reads). 325 -c INT The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples. If no testing is performed, changes in the locus are deemed not significant, and the locus' observed changes don't contribute to correction for multiple testing. The default is 1,000 fragment alignments (up to 2,000 paired reads).
249 --FDR FLOAT The allowed false discovery rate. The default is 0.05. 326 --FDR FLOAT The allowed false discovery rate. The default is 0.05.
250 --num-importance-samples INT Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000
251 --max-mle-iterations INT Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000 327 --max-mle-iterations INT Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000
252 -N With this option, Cufflinks excludes the contribution of the top 25 percent most highly expressed genes from the number of mapped fragments used in the FPKM denominator. This can improve robustness of differential expression calls for less abundant genes and transcripts. 328 --library-norm-method Library Normalization method : Geometric (default), classic-fpkm, quartile
253 329 --dispersion-method Dispersion estimation method : Pooled (default), per-condition, blind, poisson
330 -u Multi read correction tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome.
331 -b ref.fasta bias correction. Bias detection and correction can significantly improve accuracy of transcript abundance estimates.
332 --no-effective-length-correction Use standard length correction
333 --no-length-correction Disable all length correction.
334 --library-type ff-firststrand,ff-secondstrand,ff-unstranded,fr-firstrand,fr-secondstrand,fr-unstranded,transfrags
335 --mask-file (gff3/gtf) Ignore all alignment within transcripts in this file
336 --time-series Treat provided sam files as time series
337 --compatible-hits-norm With this option, Cufflinks counts only those fragments compatible with some reference transcript towards the number of mapped fragments used in the FPKM denominator. Using this mode is generally recommended in Cuffdiff to reduce certain types of bias caused by differential amounts of ribosomal reads which can create the impression of falsely differentially expressed genes.
338 --total-hits-norm With this option, Cufflinks counts all fragments, including those not compatible with any reference transcript, towards the number of mapped fragments used in the FPKM denominator
339 --max-bundle-frags Sets the maximum number of fragments a locus may have before being skipped. Skipped loci are listed in skipped.gtf.
340 --num-frag-count-draws Cuffdiff will make this many draws from each transcript's predicted negative binomial random numbder generator. Each draw is a number of fragments that will be probabilistically assigned to the transcripts in the transcriptome. Used to estimate the variance-covariance matrix on assigned fragment counts.
341 --num-frag-assign-draws For each fragment drawn from a transcript, Cuffdiff will assign it this many times (probabilistically), thus estimating the assignment uncertainty for each transcript. Used to estimate the variance-covariance matrix on assigned fragment counts.
342 --min-reps-for-js-test Cuffdiff won't test genes for differential regulation unless the conditions in question have at least this many replicates.
254 </help> 343 </help>
344 <citations>
345 <citation type="doi">10.1038/nbt.1621</citation>
346 </citations>
255 </tool> 347 </tool>