annotate csem.xml @ 9:5fd51ab70dad

Uploaded
author dongjun
date Thu, 03 Nov 2011 21:16:36 -0400
parents
children b0290425de13
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
1 <tool id="csem" name="CSEM: Multi-read Allocation for ChIP-seq" version="1.0.0">
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
2
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
3 <description></description>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
4
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
5 <parallelism method="basic"></parallelism>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
6
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
7 <requirements>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
8 <requirement type="binary">csem</requirement>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
9 <requirement type="package">bowtie</requirement>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
10 </requirements>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
11
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
12 <command interpreter="perl">
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
13 csem_wrapper.pl
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
14 ## Input file name
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
15 $InputParams.Input
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
16 ## Input file format (FASTA or FASTQ)
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
17 $InputParams.InfileFormat
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
18 ## Output file name
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
19 $out_csem
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
20 ## Output file format
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
21 $OutfileFormat
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
22 ## Reference genome idnex for Bowtie
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
23 $index.fields.path
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
24 ## Generate pseudo-tags?
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
25 $pseudoTag
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
26 ## Bowtie settings (Max num of mismatches, Max num of aligned positions)
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
27 #if $bowtieParams.bSettingsType == "preSet"
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
28 2
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
29 99
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
30 #else
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
31 $bowtieParams.Mismatch
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
32 $bowtieParams.SuppressAlign
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
33 #end if
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
34 ## CSEM settings (window size, number of iterations)
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
35 #if $csemParams.cSettingsType == "preSet"
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
36 201
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
37 200
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
38 #else
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
39 $csemParams.windowSize
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
40 $csemParams.nIteration
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
41 #end if
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
42 ## Number of cores to use
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
43 8
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
44 </command>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
45
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
46 <inputs>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
47 <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed - contact Galaxy team.">
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
48 <options from_data_table="bowtie_indexes">
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
49 <filter type="sort_by" column="2" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
50 <validator type="no_options" message="No indexes are available" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
51 </options>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
52 </param>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
53 <conditional name="InputParams">
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
54 <param name="InfileFormat" type="select" label="Select file format to process" help="Bowtie accepts FASTA or FASTQ file formats.">
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
55 <option value="fasta">FASTA</option>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
56 <option value="fastq">FASTQ</option>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
57 </param>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
58 <when value="fasta">
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
59 <param name="Input" type="data" format="fasta" label="FASTA file"/>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
60 </when>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
61 <when value="fastq">
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
62 <param name="Input" type="data" format="fastq,fastqsanger,fastqillumina,fastqsolexa" label="FASTQ file"/>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
63 </when>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
64 </conditional> <!-- InputParams -->
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
65 <param name="OutfileFormat" type="select" label="Select file format to export" help="Multi-read allocator can export results into BED or GFF file formats, or as a table.">
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
66 <option value="bed">BED</option>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
67 <option value="gff">GFF</option>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
68 <option value="table">table</option>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
69 </param>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
70 <param name="pseudoTag" type="select" label="Generate pseudo-tags?" help="See section 'Pseudo-tags' in the help below for more details.">
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
71 <option value="N">NO</option>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
72 <option value="Y">YES</option>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
73 </param>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
74 <conditional name="bowtieParams">
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
75 <param name="bSettingsType" type="select" label="Bowtie settings to use" help="For most mapping applications, use the 'Commonly used' settings. If you want full control, use 'Full parameter list'.">
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
76 <option value="preSet">Commonly used</option>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
77 <option value="full">Full parameter list</option>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
78 </param>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
79 <when value="preSet" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
80 <when value="full">
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
81 <param name="Mismatch" type="integer" value="2" label="Maximum number of mismatches permitted (-v)" help="May be 0, 1, 2, or 3." />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
82 <param name="SuppressAlign" type="integer" value="99" label="Suppress all alignments for a read if more than n reportable alignments exist (-m)" help="99 is appropriate for most cases. Use -1 for no limit." />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
83 </when> <!-- full -->
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
84 </conditional> <!-- bowtieParams -->
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
85 <conditional name="csemParams">
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
86 <param name="cSettingsType" type="select" label="CSEM settings to use" help="For most multi-read allocation applications, use the 'Commonly used' settings. If you want full control, use 'Full parameter list'.">
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
87 <option value="preSet">Commonly used</option>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
88 <option value="full">Full parameter list</option>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
89 </param>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
90 <when value="preSet" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
91 <when value="full">
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
92 <param name="windowSize" type="integer" value="201" label="Window size for the multi-read allocator" help="Set window size to some odd number close to the average fragment length." />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
93 <param name="nIteration" type="integer" value="200" label="Number of iterations for the multi-read allocator" help="200 is appropriate for most cases." />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
94 </when> <!-- full -->
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
95 </conditional> <!-- csemParams -->
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
96 </inputs>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
97
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
98 <outputs>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
99 <data format="tabular" name="out_csem">
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
100 <change_format>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
101 <when input="OutfileFormat" value="bed" format="bed" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
102 <when input="OutfileFormat" value="gff" format="gff" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
103 </change_format>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
104 </data>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
105 </outputs>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
106
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
107 <tests>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
108 <test>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
109 <param name="index" value="eschColi_K12" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
110 <param name="InfileFormat" value="fasta" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
111 <param name="Input" ftype="fasta" value="csem_test1_in.fa" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
112 <param name="OutfileFormat" value="bed" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
113 <param name="pseudoTag" value="N" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
114 <param name="bSettingsType" value="preSet" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
115 <param name="cSettingsType" value="preSet" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
116 <output name="out_csem" ftype="bed" file="csem_test1_out_original_sorted.bed" sort="True" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
117 </test>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
118 <test>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
119 <param name="index" value="eschColi_K12" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
120 <param name="InfileFormat" value="fastq" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
121 <param name="Input" ftype="fastq" value="csem_test1_in.fq" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
122 <param name="OutfileFormat" value="bed" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
123 <param name="pseudoTag" value="Y" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
124 <param name="bSettingsType" value="preSet" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
125 <param name="cSettingsType" value="preSet" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
126 <output name="out_csem" ftype="bed" file="csem_test1_out_pseudo_sorted.bed" sort="True" />
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
127 </test>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
128 </tests>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
129
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
130 <help>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
131
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
132 **What it does**
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
133
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
134 CSEM (ChIP-Seq multi-read allocation using E-M algorithm) is a multi-read allocation algorithm. *Multi-reads* are the reads that map to multiple locations on the reference genome. Most common analysis of ChIP-seq data relies on using only reads that map uniquely to relevant reference genome (*uni-reads*). This can lead to the omission of up to 30 % of alignable reads. Chung et al. (2011) illustrated that incorporation of multi-reads significantly increases sequencing depths, leads to detection of novel peaks that are not otherwise identifiable with uni-reads, and improves detection of peaks in low mappable regions. The computational and experimental results established that multi-reads can be of critical importance for studying DNA-protein interactions in highly repetitive regions of genomes with ChIP-seq experiments. Output from CSEM can be used with other peak callers such as MOSAiCS and MACS to identify peaks that are in both high and low mappable regions of genomes.
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
135
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
136 Please cite: Chung D, Kuan PF, Li B, SanalKumar R, Liang K, Bresnick E, Dewey C, and Keles S (2011),
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
137 "Discovering transcription factor binding sites in highly repetitive regions of genomes
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
138 with multi-read analysis of ChIP-Seq data," PLoS Computational Biology, 7(7): e1002111.
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
139
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
140 ------
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
141
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
142 **Input formats**
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
143
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
144 CSEM accepts short reads aligned using bowtie as input. Bowtie accepts single-end reads, in FASTA or FASTQ format, as input. Quality scores of reads are ignored.
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
145
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
146 ------
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
147
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
148 **Pseudo-tags**
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
149
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
150 For each read in the alignment file, CSEM estimates the fraction of the read allocated to each of its alignments. This fraction reflects the degree of confidence in each particular alignment. Currently, only the peak caller MOSAiCS can accept fractional of reads as input. However, you can incorporate multi-reads into ChIP-seq analysis with your favoriate peak-caller by utilizing this pseudo-tag functionality. Pseudo-tags are generated by assigning each multi-read to the location it maps to with the largest weight and filtering out multi-reads with weights less than 0.5. Although summarizing CSEM output as pseudo-tags decreases the number of utilized multi-reads, it still leads to a significant increase in the sequencing depth compared to using uni-reads alone and facilitates identification of peaks in repetitive regions.
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
151
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
152 ------
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
153
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
154 **Outputs**
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
155
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
156 Currently, results from CSEM can be exported into BED or GFF file formats, or as a table. Each line of the output file specifies a single alignment. The lines of the output file are ordered such that all of the unique read alignments appear first. If pseudo-tags are generated, *FRAC* equals to 1 for all reads if the output is a table and *score* is set to 1000 for all the reads in the BED and GFF formats.
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
157
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
158 If the output is a table, it has the following columns::
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
159
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
160 Column Description
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
161 -------- --------------------------------------------------------
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
162 1 RID ID of a read
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
163 2 CID Chromosome of the alignment
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
164 3 DIR Strand of the alignment (+ or -)
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
165 4 POS Left-most position of the aligned read (the first base in a chromosome is numbered 1)
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
166 5 FRAC Fraction of the read allocated to the alignment (which is 1 for uni-reads)
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
167
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
168 If the output is in BED format, it has the following columns::
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
169
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
170 Column Description
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
171 ------------ --------------------------------------------------------
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
172 1 chrom Chromosome of the alignment
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
173 2 chromStart Start position of the aligned read (the first base in a chromosome is numbered 0)
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
174 3 chromEnd End position of the aligned read (the first base in a chromosome is numbered 0)
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
175 4 name ID of a read
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
176 5 score 1000 * fraction of the read allocated to the alignment (which is 1000 for uni-reads)
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
177 6 strand Strand of the alignment (+ or -)
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
178
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
179 If the output is in GFF format, it has the following columns::
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
180
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
181 Column Description
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
182 --------- --------------------------------------------------------
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
183 1 seqname Chromosome of the alignment
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
184 2 source Always "CSEM"
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
185 3 feature ID of a read
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
186 4 start Start position of the aligned read (the first base in a chromosome is numbered 1)
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
187 5 end End position of the aligned read (the first base in a chromosome is numbered 1)
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
188 6 score 1000 * fraction of the read allocated to the alignment (which is 1000 for uni-reads)
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
189 7 strand Strand of the alignment (+ or -)
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
190 8 frame Always "."
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
191 9 group Always "."
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
192
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
193
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
194 </help>
5fd51ab70dad Uploaded
dongjun
parents:
diff changeset
195 </tool>