annotate cufflinks_wrapper.xml @ 0:b50aacc8ae49

Uploaded tool tarball.
author devteam
date Tue, 01 Oct 2013 12:55:37 -0400
parents
children da11bfc10e81
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
1 <tool id="cufflinks" name="Cufflinks" version="0.0.6">
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
2 <!-- Wrapper supports Cufflinks versions v1.3.0 and newer -->
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
3 <description>transcript assembly and FPKM (RPKM) estimates for RNA-Seq data</description>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
4 <requirements>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
5 <requirement type="package" version="2.1.1">cufflinks</requirement>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
6 </requirements>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
7 <version_command>cufflinks 2>&amp;1 | head -n 1</version_command>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
8 <command interpreter="python">
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
9 cufflinks_wrapper.py
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
10 --input=$input
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
11 --assembled-isoforms-output=$assembled_isoforms
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
12 --num-threads="4"
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
13 -I $max_intron_len
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
14 -F $min_isoform_fraction
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
15 -j $pre_mrna_fraction
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
16 $effective_length_correction
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
17
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
18 ## Include reference annotation?
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
19 #if $reference_annotation.use_ref == "Use reference annotation":
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
20 -G $reference_annotation.reference_annotation_file
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
21 #end if
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
22 #if $reference_annotation.use_ref == "Use reference annotation guide":
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
23 -g $reference_annotation.reference_annotation_guide_file
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
24 #end if
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
25
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
26 ## Normalization?
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
27 #if str($do_normalization) == "Yes":
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
28 -N
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
29 #end if
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
30
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
31 ## Bias correction?
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
32 #if $bias_correction.do_bias_correction == "Yes":
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
33 -b
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
34 #if $bias_correction.seq_source.index_source == "history":
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
35 --ref_file=$bias_correction.seq_source.ref_file
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
36 #else:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
37 --ref_file="None"
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
38 #end if
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
39 --dbkey=${input.metadata.dbkey}
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
40 --index_dir=${GALAXY_DATA_INDEX_DIR}
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
41 #end if
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
42
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
43 ## Multi-read correct?
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
44 #if str($multiread_correct) == "Yes":
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
45 -u
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
46 #end if
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
47
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
48 ## Include global model if available.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
49 #if $global_model:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
50 --global_model=$global_model
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
51 #end if
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
52 </command>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
53 <inputs>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
54 <param format="sam,bam" name="input" type="data" label="SAM or BAM file of aligned RNA-Seq reads" help=""/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
55 <param name="max_intron_len" type="integer" value="300000" min="1" max="600000" label="Max Intron Length" help=""/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
56 <param name="min_isoform_fraction" type="float" value="0.10" min="0" max="1" label="Min Isoform Fraction" help=""/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
57 <param name="pre_mrna_fraction" type="float" value="0.15" min="0" max="1" label="Pre MRNA Fraction" help=""/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
58 <param name="do_normalization" type="select" label="Perform quartile normalization" help="Removes top 25% of genes from FPKM denominator to improve accuracy of differential expression calls for low abundance transcripts.">
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
59 <option value="No" selected="true">No</option>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
60 <option value="Yes">Yes</option>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
61 </param>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
62 <conditional name="reference_annotation">
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
63 <param name="use_ref" type="select" label="Use Reference Annotation">
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
64 <option value="No" selected="true">No</option>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
65 <option value="Use reference annotation">Use reference annotation</option>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
66 <option value="Use reference annotation guide">Use reference annotation as guide</option>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
67 </param>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
68 <when value="No"></when>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
69 <when value="Use reference annotation">
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
70 <param format="gff3,gtf" name="reference_annotation_file" type="data" label="Reference Annotation" help="Gene annotation dataset in GTF or GFF3 format."/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
71 </when>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
72 <when value="Use reference annotation guide">
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
73 <param format="gff3,gtf" name="reference_annotation_guide_file" type="data" label="Reference Annotation" help="Gene annotation dataset in GTF or GFF3 format."/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
74 </when>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
75 </conditional>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
76 <conditional name="bias_correction">
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
77 <param name="do_bias_correction" type="select" label="Perform Bias Correction" help="Bias detection and correction can significantly improve accuracy of transcript abundance estimates.">
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
78 <option value="No" selected="true">No</option>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
79 <option value="Yes">Yes</option>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
80 </param>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
81 <when value="Yes">
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
82 <conditional name="seq_source">
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
83 <param name="index_source" type="select" label="Reference sequence data">
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
84 <option value="cached" selected="true">Locally cached</option>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
85 <option value="history">History</option>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
86 </param>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
87 <when value="cached"></when>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
88 <when value="history">
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
89 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
90 </when>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
91 </conditional>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
92 </when>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
93 <when value="No"></when>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
94 </conditional>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
95
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
96 <param name="multiread_correct" type="select" label="Use multi-read correct" help="Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome.">
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
97 <option value="No" selected="true">No</option>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
98 <option value="Yes">Yes</option>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
99 </param>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
100
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
101 <param name="effective_length_correction" type="select" label="Use effective length correction" help="Cufflinks will not employ its 'effective' length normalization to transcript FPKM.">
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
102 <option value="" selected="true">Yes</option>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
103 <option value="--no-effective-length-correction">No</option>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
104 </param>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
105
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
106 <param name="global_model" type="hidden_data" label="Global model (for use in Trackster)" optional="True"/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
107 </inputs>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
108
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
109 <outputs>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
110 <data format="tabular" name="genes_expression" label="${tool.name} on ${on_string}: gene expression" from_work_dir="genes.fpkm_tracking"/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
111 <data format="tabular" name="transcripts_expression" label="${tool.name} on ${on_string}: transcript expression" from_work_dir="isoforms.fpkm_tracking"/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
112 <data format="gtf" name="assembled_isoforms" label="${tool.name} on ${on_string}: assembled transcripts"/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
113 <data format="txt" name="total_map_mass" label="${tool.name} on ${on_string}: total map mass" hidden="true" from_work_dir="global_model.txt"/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
114 </outputs>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
115
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
116 <trackster_conf>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
117 <action type="set_param" name="global_model" output_name="total_map_mass"/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
118 </trackster_conf>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
119
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
120 <tests>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
121 <!--
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
122 Simple test that uses test data included with cufflinks.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
123 -->
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
124 <test>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
125 <param name="input" value="cufflinks_in.bam"/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
126 <param name="max_intron_len" value="300000"/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
127 <param name="min_isoform_fraction" value="0.05"/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
128 <param name="pre_mrna_fraction" value="0.05"/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
129 <param name="use_ref" value="No"/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
130 <param name="do_normalization" value="No" />
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
131 <param name="do_bias_correction" value="No"/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
132 <param name="multiread_correct" value="No"/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
133 <param name="effective_length_correction" value="Yes"/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
134 <output name="genes_expression" format="tabular" lines_diff="2" file="cufflinks_out3.fpkm_tracking"/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
135 <output name="transcripts_expression" format="tabular" lines_diff="2" file="cufflinks_out2.fpkm_tracking"/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
136 <output name="assembled_isoforms" file="cufflinks_out1.gtf"/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
137 <output name="global_model" file="cufflinks_out4.txt"/>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
138 </test>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
139 </tests>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
140
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
141 <help>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
142 **Cufflinks Overview**
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
143
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
144 Cufflinks_ assembles transcripts, estimates their abundances, and tests for differential expression and regulation in RNA-Seq samples. It accepts aligned RNA-Seq reads and assembles the alignments into a parsimonious set of transcripts. Cufflinks then estimates the relative abundances of these transcripts based on how many reads support each one. Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
145
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
146 .. _Cufflinks: http://cufflinks.cbcb.umd.edu/
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
147
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
148 ------
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
149
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
150 **Know what you are doing**
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
151
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
152 .. class:: warningmark
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
153
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
154 There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
155
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
156 .. __: http://cufflinks.cbcb.umd.edu/manual.html
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
157
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
158 ------
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
159
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
160 **Input formats**
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
161
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
162 Cufflinks takes a text file of SAM alignments as input. The RNA-Seq read mapper TopHat produces output in this format, and is recommended for use with Cufflinks. However Cufflinks will accept SAM alignments generated by any read mapper. Here's an example of an alignment Cufflinks will accept::
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
163
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
164 s6.25mer.txt-913508 16 chr1 4482736 255 14M431N11M * 0 0 \
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
165 CAAGATGCTAGGCAAGTCTTGGAAG IIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 XS:A:-
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
166
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
167 Note the use of the custom tag XS. This attribute, which must have a value of "+" or "-", indicates which strand the RNA that produced this read came from. While this tag can be applied to any alignment, including unspliced ones, it must be present for all spliced alignment records (those with a 'N' operation in the CIGAR string).
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
168 The SAM file supplied to Cufflinks must be sorted by reference position. If you aligned your reads with TopHat, your alignments will be properly sorted already. If you used another tool, you may want to make sure they are properly sorted as follows::
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
169
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
170 sort -k 3,3 -k 4,4n hits.sam > hits.sam.sorted
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
171
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
172 NOTE: Cufflinks currently only supports SAM alignments with the CIGAR match ('M') and reference skip ('N') operations. Support for the other operations, such as insertions, deletions, and clipping, will be added in the future.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
173
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
174 ------
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
175
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
176 **Outputs**
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
177
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
178 Cufflinks produces three output files:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
179
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
180 Transcripts and Genes:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
181
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
182 This GTF file contains Cufflinks' assembled isoforms. The first 7 columns are standard GTF, and the last column contains attributes, some of which are also standardized (e.g. gene_id, transcript_id). There one GTF record per row, and each record represents either a transcript or an exon within a transcript. The columns are defined as follows::
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
183
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
184 Column number Column name Example Description
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
185 -----------------------------------------------------
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
186 1 seqname chrX Chromosome or contig name
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
187 2 source Cufflinks The name of the program that generated this file (always 'Cufflinks')
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
188 3 feature exon The type of record (always either "transcript" or "exon").
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
189 4 start 77696957 The leftmost coordinate of this record (where 0 is the leftmost possible coordinate)
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
190 5 end 77712009 The rightmost coordinate of this record, inclusive.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
191 6 score 77712009 The most abundant isoform for each gene is assigned a score of 1000. Minor isoforms are scored by the ratio (minor FPKM/major FPKM)
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
192 7 strand + Cufflinks' guess for which strand the isoform came from. Always one of '+', '-' '.'
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
193 7 frame . Cufflinks does not predict where the start and stop codons (if any) are located within each transcript, so this field is not used.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
194 8 attributes See below
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
195
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
196 Each GTF record is decorated with the following attributes::
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
197
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
198 Attribute Example Description
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
199 -----------------------------------------
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
200 gene_id CUFF.1 Cufflinks gene id
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
201 transcript_id CUFF.1.1 Cufflinks transcript id
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
202 FPKM 101.267 Isoform-level relative abundance in Reads Per Kilobase of exon model per Million mapped reads
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
203 frac 0.7647 Reserved. Please ignore, as this attribute may be deprecated in the future
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
204 conf_lo 0.07 Lower bound of the 95% confidence interval of the abundance of this isoform, as a fraction of the isoform abundance. That is, lower bound = FPKM * (1.0 - conf_lo)
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
205 conf_hi 0.1102 Upper bound of the 95% confidence interval of the abundance of this isoform, as a fraction of the isoform abundance. That is, upper bound = FPKM * (1.0 + conf_lo)
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
206 cov 100.765 Estimate for the absolute depth of read coverage across the whole transcript
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
207
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
208
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
209 Transcripts only:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
210 This file is simply a tab delimited file containing one row per transcript and with columns containing the attributes above. There are a few additional attributes not in the table above, but these are reserved for debugging, and may change or disappear in the future.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
211
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
212 Genes only:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
213 This file contains gene-level coordinates and expression values.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
214
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
215 -------
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
216
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
217 **Cufflinks settings**
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
218
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
219 All of the options have a default value. You can change any of them. Most of the options in Cufflinks have been implemented here.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
220
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
221 ------
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
222
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
223 **Cufflinks parameter list**
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
224
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
225 This is a list of implemented Cufflinks options::
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
226
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
227 -m INT This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments selected at 300bp, where each end is 50bp, you should set -r to be 200. The default is 45bp.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
228 -s INT The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
229 -I INT The minimum intron length. Cufflinks will not report transcripts with introns longer than this, and will ignore SAM alignments with REF_SKIP CIGAR operations longer than this. The default is 300,000.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
230 -F After calculating isoform abundance for a gene, Cufflinks filters out transcripts that it believes are very low abundance, because isoforms expressed at extremely low levels often cannot reliably be assembled, and may even be artifacts of incompletely spliced precursors of processed transcripts. This parameter is also used to filter out introns that have far fewer spliced alignments supporting them. The default is 0.05, or 5% of the most abundant isoform (the major isoform) of the gene.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
231 -j Some RNA-Seq protocols produce a significant amount of reads that originate from incompletely spliced transcripts, and these reads can confound the assembly of fully spliced mRNAs. Cufflinks uses this parameter to filter out alignments that lie within the intronic intervals implied by the spliced alignments. The minimum depth of coverage in the intronic region covered by the alignment is divided by the number of spliced reads, and if the result is lower than this parameter value, the intronic alignments are ignored. The default is 5%.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
232 -G Tells Cufflinks to use the supplied reference annotation to estimate isoform expression. It will not assemble novel transcripts, and the program will ignore alignments not structurally compatible with any reference transcript.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
233 -N With this option, Cufflinks excludes the contribution of the top 25 percent most highly expressed genes from the number of mapped fragments used in the FPKM denominator. This can improve robustness of differential expression calls for less abundant genes and transcripts.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
234 </help>
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
235 </tool>