annotate csem.xml @ 7:32dd04a63316

Uploaded
author dongjun
date Mon, 12 Sep 2011 10:31:48 -0400
parents d58769ed6a44
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
1 <tool id="csem" name="CSEM: Multi-read Allocation for ChIP-seq" version="1.0.0">
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
2
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
3 <description></description>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
4
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
5 <parallelism method="basic"></parallelism>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
6
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
7 <requirements>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
8 <requirement type="binary">csem</requirement>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
9 <requirement type="package">bowtie</requirement>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
10 </requirements>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
11
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
12 <command interpreter="perl">
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
13 csem_wrapper.pl
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
14 ## Input file name
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
15 $InputParams.Input
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
16 ## Input file format (FASTA or FASTQ)
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
17 $InputParams.InfileFormat
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
18 ## Output file name
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
19 $out_csem
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
20 ## Output file format
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
21 $OutfileFormat
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
22 ## Reference genome idnex for Bowtie
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
23 $index.fields.path$index
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
24 ## Generate pseudo-tags?
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
25 $pseudoTag
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
26 ## Bowtie settings (Max num of mismatches, Max num of aligned positions)
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
27 #if $bowtieParams.bSettingsType == "preSet"
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
28 2
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
29 99
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
30 #else
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
31 $bowtieParams.Mismatch
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
32 $bowtieParams.SuppressAlign
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
33 #end if
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
34 ## CSEM settings (window size, number of iterations)
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
35 #if $csemParams.cSettingsType == "preSet"
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
36 101
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
37 200
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
38 #else
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
39 $csemParams.windowSize
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
40 $csemParams.nIteration
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
41 #end if
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
42 ## Number of cores to use
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
43 8
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
44 </command>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
45
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
46 <inputs>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
47 <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed - contact Galaxy team.">
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
48 <options from_data_table="bowtie_indexes">
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
49 <filter type="sort_by" column="2" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
50 <validator type="no_options" message="No indexes are available" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
51 </options>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
52 </param>
7
32dd04a63316 Uploaded
dongjun
parents: 6
diff changeset
53 <conditional name="InputParams">
32dd04a63316 Uploaded
dongjun
parents: 6
diff changeset
54 <param name="InfileFormat" type="select" label="Select file format to process" help="Bowtie accepts FASTA or FASTQ file formats.">
32dd04a63316 Uploaded
dongjun
parents: 6
diff changeset
55 <option value="fasta">FASTA</option>
32dd04a63316 Uploaded
dongjun
parents: 6
diff changeset
56 <option value="fastq">FASTQ</option>
32dd04a63316 Uploaded
dongjun
parents: 6
diff changeset
57 </param>
5
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
58 <when value="fasta">
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
59 <param name="Input" type="data" format="fasta" label="FASTA file"/>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
60 </when>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
61 <when value="fastq">
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
62 <param name="Input" type="data" format="fastq,fastqsanger,fastqillumina,fastqsolexa" label="FASTQ file"/>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
63 </when>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
64 </conditional> <!-- InputParams -->
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
65 <param name="OutfileFormat" type="select" label="Select file format to export" help="Multi-read allocator can export results into BED or GFF file formats, or as a table.">
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
66 <option value="bed">BED</option>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
67 <option value="gff">GFF</option>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
68 <option value="table">table</option>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
69 </param>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
70 <param name="pseudoTag" type="select" label="Generate pseudo-tags?" help="See section 'Pseudo-tags' in the help below for more details.">
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
71 <option value="N">NO</option>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
72 <option value="Y">YES</option>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
73 </param>
7
32dd04a63316 Uploaded
dongjun
parents: 6
diff changeset
74 <conditional name="bowtieParams">
32dd04a63316 Uploaded
dongjun
parents: 6
diff changeset
75 <param name="bSettingsType" type="select" label="Bowtie settings to use" help="For most mapping applications, use the 'Commonly used' settings. If you want full control, use 'Full parameter list'.">
32dd04a63316 Uploaded
dongjun
parents: 6
diff changeset
76 <option value="preSet">Commonly used</option>
32dd04a63316 Uploaded
dongjun
parents: 6
diff changeset
77 <option value="full">Full parameter list</option>
32dd04a63316 Uploaded
dongjun
parents: 6
diff changeset
78 </param>
5
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
79 <when value="preSet" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
80 <when value="full">
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
81 <param name="Mismatch" type="integer" value="2" label="Maximum number of mismatches permitted (-v)" help="May be 0, 1, 2, or 3." />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
82 <param name="SuppressAlign" type="integer" value="99" label="Suppress all alignments for a read if more than n reportable alignments exist (-m)" help="99 is appropriate for most cases. Use -1 for no limit." />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
83 </when> <!-- full -->
7
32dd04a63316 Uploaded
dongjun
parents: 6
diff changeset
84 </conditional> <!-- bowtieParams -->
32dd04a63316 Uploaded
dongjun
parents: 6
diff changeset
85 <conditional name="csemParams">
32dd04a63316 Uploaded
dongjun
parents: 6
diff changeset
86 <param name="cSettingsType" type="select" label="CSEM settings to use" help="For most multi-read allocation applications, use the 'Commonly used' settings. If you want full control, use 'Full parameter list'.">
32dd04a63316 Uploaded
dongjun
parents: 6
diff changeset
87 <option value="preSet">Commonly used</option>
32dd04a63316 Uploaded
dongjun
parents: 6
diff changeset
88 <option value="full">Full parameter list</option>
32dd04a63316 Uploaded
dongjun
parents: 6
diff changeset
89 </param>
5
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
90 <when value="preSet" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
91 <when value="full">
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
92 <param name="windowSize" type="integer" value="101" label="Window size for the multi-read allocator" help="Set window size to some odd number close to the half of average fragment length." />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
93 <param name="nIteration" type="integer" value="200" label="Number of iterations for the multi-read allocator" help="200 is appropriate for most cases." />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
94 </when> <!-- full -->
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
95 </conditional> <!-- csemParams -->
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
96 </inputs>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
97
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
98 <outputs>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
99 <data format="tabular" name="out_csem">
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
100 <change_format>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
101 <when input="OutfileFormat" value="bed" format="bed" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
102 <when input="OutfileFormat" value="gff" format="gff" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
103 </change_format>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
104 </data>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
105 </outputs>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
106
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
107 <tests>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
108 <test>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
109 <param name="index" value="eschColi_K12" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
110 <param name="InfileFormat" value="fasta" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
111 <param name="Input" ftype="fasta" value="csem_test1_in.fa" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
112 <param name="OutfileFormat" value="bed" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
113 <param name="pseudoTag" value="N" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
114 <param name="bSettingsType" value="preSet" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
115 <param name="cSettingsType" value="preSet" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
116 <output name="out_csem" ftype="bed" file="csem_test1_out_original_sorted.bed" sort="True" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
117 </test>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
118 <test>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
119 <param name="index" value="eschColi_K12" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
120 <param name="InfileFormat" value="fastq" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
121 <param name="Input" ftype="fastq" value="csem_test1_in.fq" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
122 <param name="OutfileFormat" value="bed" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
123 <param name="pseudoTag" value="Y" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
124 <param name="bSettingsType" value="preSet" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
125 <param name="cSettingsType" value="preSet" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
126 <output name="out_csem" ftype="bed" file="csem_test1_out_pseudo_sorted.bed" sort="True" />
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
127 </test>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
128 </tests>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
129
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
130 <help>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
131
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
132 **What it does**
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
133
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
134 CSEM (ChIP-Seq multi-read allocation using E-M algorithm) is a multi-read allocation algorithm. *Multi-reads* are the reads that map to multiple locations on the reference genome. Most common analysis of ChIP-seq data relies on using only reads that map uniquely to relevant reference genome (*uni-reads*). This can lead to the omission of up to 30 % of alignable reads. Chung et al. (2011) illustrated that incorporation of multi-reads significantly increases sequencing depths, leads to detection of novel peaks that are not otherwise identifiable with uni-reads, and improves detection of peaks in low mappable regions. The computational and experimental results established that multi-reads can be of critical importance for studying DNA-protein interactions in highly repetitive regions of genomes with ChIP-seq experiments. Output from CSEM can be used with other peak callers such as MOSAiCS and MACS to identify peaks that are in both high and low mappable regions of genomes.
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
135
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
136 Please cite: Chung D, Kuan PF, Li B, SanalKumar R, Liang K, Bresnick E, Dewey C, and Keles S (2011),
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
137 "Discovering transcription factor binding sites in highly repetitive regions of genomes
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
138 with multi-read analysis of ChIP-Seq data," PLoS Computational Biology, 7(7): e1002111.
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
139
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
140 ------
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
141
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
142 **Input formats**
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
143
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
144 CSEM accepts short reads aligned using bowtie as input. Bowtie accepts single-end reads, in FASTA or FASTQ format, as input. Quality scores of reads are ignored.
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
145
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
146 ------
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
147
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
148 **Pseudo-tags**
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
149
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
150 For each read in the alignment file, CSEM estimates the fraction of the read allocated to each of its alignments. This fraction reflects the degree of confidence in each particular alignment. Currently, only the peak caller MOSAiCS can accept fractional of reads as input. However, you can incorporate multi-reads into ChIP-seq analysis with your favoriate peak-caller by utilizing this pseudo-tag functionality. Pseudo-tags are generated by assigning each multi-read to the location it maps to with the largest weight and filtering out multi-reads with weights less than 0.5. Although summarizing CSEM output as pseudo-tags decreases the number of utilized multi-reads, it still leads to a significant increase in the sequencing depth compared to using uni-reads alone and facilitates identification of peaks in repetitive regions.
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
151
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
152 ------
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
153
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
154 **Outputs**
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
155
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
156 Currently, results from CSEM can be exported into BED or GFF file formats, or as a table. Each line of the output file specifies a single alignment. The lines of the output file are ordered such that all of the unique read alignments appear first. If pseudo-tags are generated, *FRAC* equals to 1 for all reads if the output is a table and *score* is set to 1000 for all the reads in the BED and GFF formats.
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
157
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
158 If the output is a table, it has the following columns::
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
159
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
160 Column Description
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
161 -------- --------------------------------------------------------
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
162 1 RID ID of a read
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
163 2 CID Chromosome of the alignment
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
164 3 DIR Strand of the alignment (+ or -)
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
165 4 POS Left-most position of the aligned read (the first base in a chromosome is numbered 1)
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
166 5 FRAC Fraction of the read allocated to the alignment (which is 1 for uni-reads)
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
167
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
168 If the output is in BED format, it has the following columns::
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
169
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
170 Column Description
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
171 ------------ --------------------------------------------------------
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
172 1 chrom Chromosome of the alignment
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
173 2 chromStart Start position of the aligned read (the first base in a chromosome is numbered 0)
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
174 3 chromEnd End position of the aligned read (the first base in a chromosome is numbered 0)
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
175 4 name ID of a read
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
176 5 score 1000 * fraction of the read allocated to the alignment (which is 1000 for uni-reads)
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
177 6 strand Strand of the alignment (+ or -)
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
178
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
179 If the output is in GFF format, it has the following columns::
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
180
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
181 Column Description
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
182 --------- --------------------------------------------------------
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
183 1 seqname Chromosome of the alignment
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
184 2 source Always "CSEM"
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
185 3 feature ID of a read
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
186 4 start Start position of the aligned read (the first base in a chromosome is numbered 1)
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
187 5 end End position of the aligned read (the first base in a chromosome is numbered 1)
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
188 6 score 1000 * fraction of the read allocated to the alignment (which is 1000 for uni-reads)
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
189 7 strand Strand of the alignment (+ or -)
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
190 8 frame Always "."
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
191 9 group Always "."
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
192
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
193
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
194 </help>
bcea928ce5ef Uploaded
dongjun
parents:
diff changeset
195 </tool>