annotate mtls_analyze/mtls_analyze.xml @ 4:b465306d00ba draft default tip

Uploaded
author kmace
date Mon, 23 Jul 2012 13:00:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
b465306d00ba Uploaded
kmace
parents:
diff changeset
1 <tool name="Chip-Cluster: Cluster ChIP-seq peaks and create a heatmap" id="chip-cluster">
b465306d00ba Uploaded
kmace
parents:
diff changeset
2 <description>
b465306d00ba Uploaded
kmace
parents:
diff changeset
3 Merge multiple ChIP-seq experiments, alligning their peaks to MTLs (Multi
b465306d00ba Uploaded
kmace
parents:
diff changeset
4 Transcription Factor Loci(us)) and optionally incorperate expression
b465306d00ba Uploaded
kmace
parents:
diff changeset
5 </description>
b465306d00ba Uploaded
kmace
parents:
diff changeset
6 <command interpreter="command">/bin/bash $shscript </command>
b465306d00ba Uploaded
kmace
parents:
diff changeset
7 <inputs>
b465306d00ba Uploaded
kmace
parents:
diff changeset
8 <param name="chipInputFormat" type="select" display="radio" label="ChIP Input Format">
b465306d00ba Uploaded
kmace
parents:
diff changeset
9 <option name="macs" value="MACS">MACS</option>
b465306d00ba Uploaded
kmace
parents:
diff changeset
10 <option name="bed" value="BED">BED</option>
b465306d00ba Uploaded
kmace
parents:
diff changeset
11 </param>
b465306d00ba Uploaded
kmace
parents:
diff changeset
12 <param name="mtlType" type="select" display="radio" label="Cluster by: ">
b465306d00ba Uploaded
kmace
parents:
diff changeset
13 <option name="summit" value="summit">Summit</option>
b465306d00ba Uploaded
kmace
parents:
diff changeset
14 <option name="interval" value="interval">Interval</option>
b465306d00ba Uploaded
kmace
parents:
diff changeset
15 </param>
b465306d00ba Uploaded
kmace
parents:
diff changeset
16 <param name="summitDistance" type="text" label="Summit Distance (BP) - Summit only" value="100">
b465306d00ba Uploaded
kmace
parents:
diff changeset
17 </param>
b465306d00ba Uploaded
kmace
parents:
diff changeset
18 <param name="numberBins" type="text" label="Number of Bins" value="30">
b465306d00ba Uploaded
kmace
parents:
diff changeset
19 </param>
b465306d00ba Uploaded
kmace
parents:
diff changeset
20 <repeat name="chip_tracks" title="MACS/BED Files">
b465306d00ba Uploaded
kmace
parents:
diff changeset
21 <param name="file" type="data" format="tabular" label="Dataset"/>
b465306d00ba Uploaded
kmace
parents:
diff changeset
22 <param name="name" type="text" label="Dataset Name"/>
b465306d00ba Uploaded
kmace
parents:
diff changeset
23 </repeat>
b465306d00ba Uploaded
kmace
parents:
diff changeset
24 <param name="map_rna" type="boolean" truevalue="yes" falsevalue="no" label="Incorperate RNA?"/>
b465306d00ba Uploaded
kmace
parents:
diff changeset
25 <param name="includeTargetless" checked="true" type="boolean" truevalue="yes" falsevalue="no" label="Include Targetless MTLs?"/>
b465306d00ba Uploaded
kmace
parents:
diff changeset
26 <param name="reference_file" type="data" format="tabular" label="Reference Genome File"/>
b465306d00ba Uploaded
kmace
parents:
diff changeset
27
b465306d00ba Uploaded
kmace
parents:
diff changeset
28 <param name="normalize_rna" type="boolean" truevalue="yes" falsevalue="no" label="Normalize Expression?"/>
b465306d00ba Uploaded
kmace
parents:
diff changeset
29 <param name="use_mean" type="boolean" truevalue="yes" falsevalue="no" label="Use mean expression across exp. to normalize?"/>
b465306d00ba Uploaded
kmace
parents:
diff changeset
30 <param name="rnaInputFormat" type="select" display="radio" label="RNA Input Format">
b465306d00ba Uploaded
kmace
parents:
diff changeset
31 <option name="cufflinks" value="cufflinks">Cufflinks</option>
b465306d00ba Uploaded
kmace
parents:
diff changeset
32 <option name="bed" value="bed">BED</option>
b465306d00ba Uploaded
kmace
parents:
diff changeset
33 </param>
b465306d00ba Uploaded
kmace
parents:
diff changeset
34 <param name="numClusters" type="text" label="Number of Clusters (kmeans)" value="8">
b465306d00ba Uploaded
kmace
parents:
diff changeset
35 </param>
b465306d00ba Uploaded
kmace
parents:
diff changeset
36 <param name="trgtDistance" type="text" label="Transcript threshold distance" value="5000">
b465306d00ba Uploaded
kmace
parents:
diff changeset
37 </param>
b465306d00ba Uploaded
kmace
parents:
diff changeset
38 <repeat name="rna_tracks" title="Cufflinks/BED Files">
b465306d00ba Uploaded
kmace
parents:
diff changeset
39 <param name="file" type="data" format="tabular" label="Dataset"/>
b465306d00ba Uploaded
kmace
parents:
diff changeset
40 <param name="name" type="text" label="Dataset Name"/>
b465306d00ba Uploaded
kmace
parents:
diff changeset
41 <param name="norm" type="data" label="Normalization Dataset"/>
b465306d00ba Uploaded
kmace
parents:
diff changeset
42 </repeat>
b465306d00ba Uploaded
kmace
parents:
diff changeset
43 </inputs>
b465306d00ba Uploaded
kmace
parents:
diff changeset
44 <outputs>
b465306d00ba Uploaded
kmace
parents:
diff changeset
45 <data format="xls" name="cluster_assignments" label="Cluster Assignments"/>
b465306d00ba Uploaded
kmace
parents:
diff changeset
46 <data format="xls" name="mtls" label="MTLS File"/>
b465306d00ba Uploaded
kmace
parents:
diff changeset
47 <data format="txt" name="log" label="Log file" />
b465306d00ba Uploaded
kmace
parents:
diff changeset
48 <data format="bmp" name="heatmap_image" label="Heatmap Image" />
b465306d00ba Uploaded
kmace
parents:
diff changeset
49 <!-- <data format="png" name="heatmap_image" label="Heatmap Image" >-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
50 <!-- <filter>imageFormat=="png"</filter>-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
51 <!-- </data>-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
52 <!-- <data format="pdf" name="heatmap_image" label="Heatmap Image" >-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
53 <!-- <filter>imageFormat=="pdf"</filter>-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
54 <!-- </data>-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
55
b465306d00ba Uploaded
kmace
parents:
diff changeset
56 </outputs>
b465306d00ba Uploaded
kmace
parents:
diff changeset
57 <configfiles>
b465306d00ba Uploaded
kmace
parents:
diff changeset
58 <configfile name="shscript">
b465306d00ba Uploaded
kmace
parents:
diff changeset
59 <!-- This is the script that runs (Chettah/bash code)-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
60 #!/bin/bash
b465306d00ba Uploaded
kmace
parents:
diff changeset
61
b465306d00ba Uploaded
kmace
parents:
diff changeset
62 #import os
b465306d00ba Uploaded
kmace
parents:
diff changeset
63 #set $path = $os.path.abspath($__app__.config.tool_path)
b465306d00ba Uploaded
kmace
parents:
diff changeset
64
b465306d00ba Uploaded
kmace
parents:
diff changeset
65
b465306d00ba Uploaded
kmace
parents:
diff changeset
66 ## Set symbols so that they are not incorrectly interpreted:
b465306d00ba Uploaded
kmace
parents:
diff changeset
67 #set $dollar = chr(36)
b465306d00ba Uploaded
kmace
parents:
diff changeset
68 #set $gt = chr(62)
b465306d00ba Uploaded
kmace
parents:
diff changeset
69 #set $lt = chr(60)
b465306d00ba Uploaded
kmace
parents:
diff changeset
70 #set $ad = chr(38)
b465306d00ba Uploaded
kmace
parents:
diff changeset
71 #set $bs = chr(92)
b465306d00ba Uploaded
kmace
parents:
diff changeset
72
b465306d00ba Uploaded
kmace
parents:
diff changeset
73 echo $map_rna ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
74 echo "This is the Bash log file: " ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
75 ###############################################################################
b465306d00ba Uploaded
kmace
parents:
diff changeset
76 ## Convert the gtf file to a file that aviv's script can hadel
b465306d00ba Uploaded
kmace
parents:
diff changeset
77 #if str($map_rna)=='yes'
b465306d00ba Uploaded
kmace
parents:
diff changeset
78 echo "Converting gtf file" ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
79 Rscript $path/visualization/gtfToMapFriendlyAnnotation.R $reference_file ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
80 echo "done converting gtf file" ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
81 #end if
b465306d00ba Uploaded
kmace
parents:
diff changeset
82 ###############################################################################
b465306d00ba Uploaded
kmace
parents:
diff changeset
83 ## Get ChIP data in correctly formated strings and annotate if nessisary.
b465306d00ba Uploaded
kmace
parents:
diff changeset
84 #set $sep = '::'
b465306d00ba Uploaded
kmace
parents:
diff changeset
85 #for $i, $chip in enumerate( $chip_tracks )
b465306d00ba Uploaded
kmace
parents:
diff changeset
86 #if $i==0
b465306d00ba Uploaded
kmace
parents:
diff changeset
87 echo "Chip Files:" ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
88 echo "The first file label is: ${chip.name}" ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
89 echo "The first file path is: ${chip.file}" ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
90 chip_labels=${chip.name}
b465306d00ba Uploaded
kmace
parents:
diff changeset
91 chip_paths=${chip.file}
b465306d00ba Uploaded
kmace
parents:
diff changeset
92 #else
b465306d00ba Uploaded
kmace
parents:
diff changeset
93 echo "The next file label is: ${chip.name}" ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
94 echo "The next file path is: ${chip.file}" ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
95 chip_labels=${dollar}chip_labels${sep}${chip.name}
b465306d00ba Uploaded
kmace
parents:
diff changeset
96 chip_paths=${dollar}chip_paths${sep}${chip.file}
b465306d00ba Uploaded
kmace
parents:
diff changeset
97 #end if
b465306d00ba Uploaded
kmace
parents:
diff changeset
98 #end for
b465306d00ba Uploaded
kmace
parents:
diff changeset
99
b465306d00ba Uploaded
kmace
parents:
diff changeset
100 echo chip paths are - ${dollar}chip_paths ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
101 echo chip labels are - ${dollar}chip_labels ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
102
b465306d00ba Uploaded
kmace
parents:
diff changeset
103 ###############################################################################
b465306d00ba Uploaded
kmace
parents:
diff changeset
104 ## Cluster peaks
b465306d00ba Uploaded
kmace
parents:
diff changeset
105
b465306d00ba Uploaded
kmace
parents:
diff changeset
106 Rscript $path/visualization/cluster_peaks.R \
b465306d00ba Uploaded
kmace
parents:
diff changeset
107 --input_files ${dollar}chip_paths \
b465306d00ba Uploaded
kmace
parents:
diff changeset
108 --input_type $chipInputFormat \
b465306d00ba Uploaded
kmace
parents:
diff changeset
109 --path_output ./ \
b465306d00ba Uploaded
kmace
parents:
diff changeset
110 --expt_names ${dollar}chip_labels \
b465306d00ba Uploaded
kmace
parents:
diff changeset
111 --dist_summits $summitDistance \
b465306d00ba Uploaded
kmace
parents:
diff changeset
112 --mtl_type $mtlType ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
113
b465306d00ba Uploaded
kmace
parents:
diff changeset
114 ###############################################################################
b465306d00ba Uploaded
kmace
parents:
diff changeset
115 ## Annotate mtls.xls if nessisary
b465306d00ba Uploaded
kmace
parents:
diff changeset
116 #if str($map_rna)=="yes"
b465306d00ba Uploaded
kmace
parents:
diff changeset
117 echo "annotating mtls.xls..." ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
118 Rscript $path/visualization/annotate_mtls.R mtls.xls gene_annotation.txt $trgtDistance ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
119 #end if
b465306d00ba Uploaded
kmace
parents:
diff changeset
120 ###############################################################################
b465306d00ba Uploaded
kmace
parents:
diff changeset
121 ## If rna is specified, then get RNA data in correctly formated strings:
b465306d00ba Uploaded
kmace
parents:
diff changeset
122 #if str($map_rna)=='yes'
b465306d00ba Uploaded
kmace
parents:
diff changeset
123 #set $sep = '::'
b465306d00ba Uploaded
kmace
parents:
diff changeset
124 #for $i, $rna in enumerate( $rna_tracks )
b465306d00ba Uploaded
kmace
parents:
diff changeset
125 #if $i==0
b465306d00ba Uploaded
kmace
parents:
diff changeset
126 echo "The first file label is: ${rna.name}" ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
127 echo "The first file path is: ${rna.file}" ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
128 rna_labels=${rna.name}
b465306d00ba Uploaded
kmace
parents:
diff changeset
129 rna_paths=${rna.file}
b465306d00ba Uploaded
kmace
parents:
diff changeset
130 rna_norm_paths=${rna.norm}
b465306d00ba Uploaded
kmace
parents:
diff changeset
131 #else
b465306d00ba Uploaded
kmace
parents:
diff changeset
132 echo "The next file label is: ${rna.name}" ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
133 echo "The next file path is: ${rna.file}" ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
134 rna_labels=${dollar}rna_labels${sep}${rna.name}
b465306d00ba Uploaded
kmace
parents:
diff changeset
135 rna_paths=${dollar}rna_paths${sep}${rna.file}
b465306d00ba Uploaded
kmace
parents:
diff changeset
136 rna_norm_paths=${dollar}rna_norm_paths${sep}${rna.norm}
b465306d00ba Uploaded
kmace
parents:
diff changeset
137 #end if
b465306d00ba Uploaded
kmace
parents:
diff changeset
138 #end for
b465306d00ba Uploaded
kmace
parents:
diff changeset
139 echo rna paths are - ${dollar}rna_paths ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
140 echo rna labels are - ${dollar}rna_labels ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
141 echo rna norm files are - ${dollar}rna_norm_paths ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
142 #end if
b465306d00ba Uploaded
kmace
parents:
diff changeset
143 ###############################################################################
b465306d00ba Uploaded
kmace
parents:
diff changeset
144
b465306d00ba Uploaded
kmace
parents:
diff changeset
145 #if str($normalize_rna)=='no'
b465306d00ba Uploaded
kmace
parents:
diff changeset
146 echo "Normalization by file is set to no" ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
147 rna_norm_paths=no
b465306d00ba Uploaded
kmace
parents:
diff changeset
148 #end if
b465306d00ba Uploaded
kmace
parents:
diff changeset
149
b465306d00ba Uploaded
kmace
parents:
diff changeset
150 #if str($use_mean)=='yes'
b465306d00ba Uploaded
kmace
parents:
diff changeset
151 echo "Normalization of expression will be done by mean" ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
152 rna_norm_paths=mean
b465306d00ba Uploaded
kmace
parents:
diff changeset
153 #end if
b465306d00ba Uploaded
kmace
parents:
diff changeset
154
b465306d00ba Uploaded
kmace
parents:
diff changeset
155 #if str($map_rna)=='no'
b465306d00ba Uploaded
kmace
parents:
diff changeset
156 mtls_file=mtls.xls
b465306d00ba Uploaded
kmace
parents:
diff changeset
157 rna_paths=none
b465306d00ba Uploaded
kmace
parents:
diff changeset
158 rna_labels=none
b465306d00ba Uploaded
kmace
parents:
diff changeset
159 #else
b465306d00ba Uploaded
kmace
parents:
diff changeset
160 mtls_file=annotated_mtls.xls
b465306d00ba Uploaded
kmace
parents:
diff changeset
161 #end if
b465306d00ba Uploaded
kmace
parents:
diff changeset
162
b465306d00ba Uploaded
kmace
parents:
diff changeset
163 echo "
b465306d00ba Uploaded
kmace
parents:
diff changeset
164 Rscript $path/visualization/heatmap.R --mtls_file ./${dollar}mtls_file \
b465306d00ba Uploaded
kmace
parents:
diff changeset
165 --cluster_file ./cluster \
b465306d00ba Uploaded
kmace
parents:
diff changeset
166 --chip_experiment_order ${dollar}chip_labels \
b465306d00ba Uploaded
kmace
parents:
diff changeset
167 --heatmap_file ./heatmap \
b465306d00ba Uploaded
kmace
parents:
diff changeset
168 --heatmap_type bmp \
b465306d00ba Uploaded
kmace
parents:
diff changeset
169 --n_clusters $numClusters \
b465306d00ba Uploaded
kmace
parents:
diff changeset
170 --filter_percentage 100 \
b465306d00ba Uploaded
kmace
parents:
diff changeset
171 --expression_file ${dollar}rna_paths \
b465306d00ba Uploaded
kmace
parents:
diff changeset
172 --expression_name ${dollar}rna_labels \
b465306d00ba Uploaded
kmace
parents:
diff changeset
173 --normalization_file ${dollar}rna_norm_paths \
b465306d00ba Uploaded
kmace
parents:
diff changeset
174 ${ad}${gt}${gt} $log" ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
175
b465306d00ba Uploaded
kmace
parents:
diff changeset
176 Rscript $path/visualization/heatmap.R --mtls_file ./${dollar}mtls_file \
b465306d00ba Uploaded
kmace
parents:
diff changeset
177 --cluster_file ./cluster \
b465306d00ba Uploaded
kmace
parents:
diff changeset
178 --chip_experiment_order ${dollar}chip_labels \
b465306d00ba Uploaded
kmace
parents:
diff changeset
179 --heatmap_file ./heatmap \
b465306d00ba Uploaded
kmace
parents:
diff changeset
180 --heatmap_type bmp \
b465306d00ba Uploaded
kmace
parents:
diff changeset
181 --n_clusters $numClusters \
b465306d00ba Uploaded
kmace
parents:
diff changeset
182 --filter_percentage 100 \
b465306d00ba Uploaded
kmace
parents:
diff changeset
183 --number_bins $numberBins \
b465306d00ba Uploaded
kmace
parents:
diff changeset
184 --include_targetless $includeTargetless \
b465306d00ba Uploaded
kmace
parents:
diff changeset
185 --expression_file ${dollar}rna_paths \
b465306d00ba Uploaded
kmace
parents:
diff changeset
186 --expression_name ${dollar}rna_labels \
b465306d00ba Uploaded
kmace
parents:
diff changeset
187 --normalization_file ${dollar}rna_norm_paths \
b465306d00ba Uploaded
kmace
parents:
diff changeset
188 ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
189
b465306d00ba Uploaded
kmace
parents:
diff changeset
190 ls ${ad}${gt}${gt} $log
b465306d00ba Uploaded
kmace
parents:
diff changeset
191
b465306d00ba Uploaded
kmace
parents:
diff changeset
192
b465306d00ba Uploaded
kmace
parents:
diff changeset
193
b465306d00ba Uploaded
kmace
parents:
diff changeset
194
b465306d00ba Uploaded
kmace
parents:
diff changeset
195 ##################################################################
b465306d00ba Uploaded
kmace
parents:
diff changeset
196 #if str($map_rna)=='yes'
b465306d00ba Uploaded
kmace
parents:
diff changeset
197 mv ./annotated_mtls.xls $mtls
b465306d00ba Uploaded
kmace
parents:
diff changeset
198 #else
b465306d00ba Uploaded
kmace
parents:
diff changeset
199 mv ./mtls.xls $mtls
b465306d00ba Uploaded
kmace
parents:
diff changeset
200 #end if
b465306d00ba Uploaded
kmace
parents:
diff changeset
201 mv ./heatmap.* $heatmap_image
b465306d00ba Uploaded
kmace
parents:
diff changeset
202 mv ./cluster.tsv $cluster_assignments
b465306d00ba Uploaded
kmace
parents:
diff changeset
203
b465306d00ba Uploaded
kmace
parents:
diff changeset
204 </configfile>
b465306d00ba Uploaded
kmace
parents:
diff changeset
205 </configfiles>
b465306d00ba Uploaded
kmace
parents:
diff changeset
206 <!--<tests>-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
207 <!-- <test maxseconds="3600" name="GCA_1">-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
208 <!-- <param name="bfile" value="bedfile.bed" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
209 <!-- <param name="span" value="3000" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
210 <!-- <param name="genome" value="hg18" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
211 <!-- <output name="output" file="gca_1/gca_1.xls" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
212 <!-- <output name="output" file="gca_1/gca_1.log" lines_diff = "200" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
213 <!-- </test>-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
214 <!-- <test maxseconds="3600" name="GCA_2">-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
215 <!-- <param name="bfile" value="bedfile.bed" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
216 <!-- <param name="span" value="100" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
217 <!-- <param name="genome" value="hg18" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
218 <!-- <output name="output" file="gca_2/gca_2.xls" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
219 <!-- <output name="output" file="gca_2/gca_2.log" lines_diff = "200" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
220 <!-- </test>-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
221 <!-- <test maxseconds="3600" name="GCA_3">-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
222 <!-- <param name="bfile" value="bedfile.bed" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
223 <!-- <param name="span" value="500" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
224 <!-- <param name="genome" value="hg18" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
225 <!-- <output name="output" file="gca_3/gca_3.xls" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
226 <!-- <output name="output" file="gca_3/gca_3.log" lines_diff = "200" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
227 <!-- </test>-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
228 <!-- <test maxseconds="3600" name="GCA_4">-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
229 <!-- <param name="bfile" value="bedfile.bed" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
230 <!-- <param name="span" value="1000" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
231 <!-- <param name="genome" value="hg18" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
232 <!-- <output name="output" file="gca_4/gca_4.xls" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
233 <!-- <output name="output" file="gca_4/gca_4.log" lines_diff = "200" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
234 <!-- </test>-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
235 <!-- <test maxseconds="3600" name="GCA_5">-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
236 <!-- <param name="bfile" value="bedfile.bed" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
237 <!-- <param name="span" value="10000" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
238 <!-- <param name="genome" value="hg18" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
239 <!-- <output name="output" file="gca_5/gca_5.xls" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
240 <!-- <output name="output" file="gca_5/gca_5.log" lines_diff = "200" />-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
241 <!-- </test>-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
242 <!--</tests>-->
b465306d00ba Uploaded
kmace
parents:
diff changeset
243 <help>
b465306d00ba Uploaded
kmace
parents:
diff changeset
244 This tool will merge peaks form multiple chip-seq experiments, creating MTLs for
b465306d00ba Uploaded
kmace
parents:
diff changeset
245 each overlapping region. It will then cluster each MTL based on the score of
b465306d00ba Uploaded
kmace
parents:
diff changeset
246 each peak within each MTL (using K-means clustering, with k set by user). A
b465306d00ba Uploaded
kmace
parents:
diff changeset
247 heatmap is then generated from the resulting cluster along with the MTLs
b465306d00ba Uploaded
kmace
parents:
diff changeset
248 generated. This module in writin in R and is will be made available on github
b465306d00ba Uploaded
kmace
parents:
diff changeset
249 and bioconductor. This work was done by Kieran Mace and Aviv Madar.
b465306d00ba Uploaded
kmace
parents:
diff changeset
250
b465306d00ba Uploaded
kmace
parents:
diff changeset
251 **NEED IMPROVEMENT**
b465306d00ba Uploaded
kmace
parents:
diff changeset
252
b465306d00ba Uploaded
kmace
parents:
diff changeset
253 -----
b465306d00ba Uploaded
kmace
parents:
diff changeset
254
b465306d00ba Uploaded
kmace
parents:
diff changeset
255 **Parameters**
b465306d00ba Uploaded
kmace
parents:
diff changeset
256
b465306d00ba Uploaded
kmace
parents:
diff changeset
257 - **Input files** contains either macs or BED files to be merged. This list of files must be two or larger.
b465306d00ba Uploaded
kmace
parents:
diff changeset
258 - **Experiment names** contains the name given to each track.
b465306d00ba Uploaded
kmace
parents:
diff changeset
259 - **Summit distance** is the cuttoff distance (in BP) to be included in an MTL. This option is not used with the summit option below
b465306d00ba Uploaded
kmace
parents:
diff changeset
260 - **Input Format** Either bed of MACS file format, all files must be of one type. Defaults to MACS
b465306d00ba Uploaded
kmace
parents:
diff changeset
261 - **MTL Type** Either interval or summit (defaults to summit).
b465306d00ba Uploaded
kmace
parents:
diff changeset
262 - **Number clusters** the value of k for kmeans clustering.
b465306d00ba Uploaded
kmace
parents:
diff changeset
263 - **Filter top MTLS** The top percentage of MTLs to keep for image and cluster (based on the union of mean, non-zero mean, and variance of the scores).
b465306d00ba Uploaded
kmace
parents:
diff changeset
264 -----
b465306d00ba Uploaded
kmace
parents:
diff changeset
265
b465306d00ba Uploaded
kmace
parents:
diff changeset
266 **Output**
b465306d00ba Uploaded
kmace
parents:
diff changeset
267
b465306d00ba Uploaded
kmace
parents:
diff changeset
268 - **XLS file** is the tab-delimited file containing the MTL data.
b465306d00ba Uploaded
kmace
parents:
diff changeset
269 - **PNG file** is the heatmap image generated after clustering the MTL data.
b465306d00ba Uploaded
kmace
parents:
diff changeset
270
b465306d00ba Uploaded
kmace
parents:
diff changeset
271 -----
b465306d00ba Uploaded
kmace
parents:
diff changeset
272
b465306d00ba Uploaded
kmace
parents:
diff changeset
273 **script parameter list of Chip-Cluster**
b465306d00ba Uploaded
kmace
parents:
diff changeset
274
b465306d00ba Uploaded
kmace
parents:
diff changeset
275 Options:
b465306d00ba Uploaded
kmace
parents:
diff changeset
276 DESCRIPTIION:
b465306d00ba Uploaded
kmace
parents:
diff changeset
277 cluster_peaks.R takes MACS/.bed tab delimited files as input and produces one tab delimeted file (named mtls.xls) where
b465306d00ba Uploaded
kmace
parents:
diff changeset
278 each row corresponds to a Multi TF Loci (MTL) in which peaks from different experiments (input MACS/.bed files)
b465306d00ba Uploaded
kmace
parents:
diff changeset
279 fall within a certain distance between summits from eachother.
b465306d00ba Uploaded
kmace
parents:
diff changeset
280
b465306d00ba Uploaded
kmace
parents:
diff changeset
281 INPUT:
b465306d00ba Uploaded
kmace
parents:
diff changeset
282 1.path_input=path to MACS/bed files '::' delim [path_input=f1::f2::f3::...::fk]
b465306d00ba Uploaded
kmace
parents:
diff changeset
283 2.path_output=path to save generated MTL cluster file (where to save mtls.xls)
b465306d00ba Uploaded
kmace
parents:
diff changeset
284 3.expt_names=user specified names for MACS files '::' delim [expt_names=n1::n2::n3::...::nk]
b465306d00ba Uploaded
kmace
parents:
diff changeset
285 4.dist.summits=maximum distance between summits belonging to the same MTL (defaults to 100)
b465306d00ba Uploaded
kmace
parents:
diff changeset
286 5.input_type=the type of input file used (MACS or .bed; defaults to MACS)
b465306d00ba Uploaded
kmace
parents:
diff changeset
287 6.mtl_type=interval or summit (defaults to summit)
b465306d00ba Uploaded
kmace
parents:
diff changeset
288
b465306d00ba Uploaded
kmace
parents:
diff changeset
289 EXAMPLE RUN:
b465306d00ba Uploaded
kmace
parents:
diff changeset
290 cluster_peaks.R
b465306d00ba Uploaded
kmace
parents:
diff changeset
291 --input_macs_files input/SL2870_SL2871_peaks.xls::input/SL2872_SL2876_peaks.xls::input/SL3032_SL2871_peaks.xls::input/SL3037_SL3036_peaks.xls::input/SL3315_SL3319_peaks.xls
b465306d00ba Uploaded
kmace
parents:
diff changeset
292 --input_type MACS
b465306d00ba Uploaded
kmace
parents:
diff changeset
293 --path_output results/
b465306d00ba Uploaded
kmace
parents:
diff changeset
294 --expt_names RORC_Th17::IRF4_Th17::MAF_Th17::BATF_Th17::STAT3_Th17
b465306d00ba Uploaded
kmace
parents:
diff changeset
295 --dist_summits 100
b465306d00ba Uploaded
kmace
parents:
diff changeset
296 --mtl_type summit
b465306d00ba Uploaded
kmace
parents:
diff changeset
297
b465306d00ba Uploaded
kmace
parents:
diff changeset
298 DESCRIPTIION:
b465306d00ba Uploaded
kmace
parents:
diff changeset
299 heatmap.R takes a ...
b465306d00ba Uploaded
kmace
parents:
diff changeset
300
b465306d00ba Uploaded
kmace
parents:
diff changeset
301 INPUT:
b465306d00ba Uploaded
kmace
parents:
diff changeset
302 1.--mtls_file path to mtls file.
b465306d00ba Uploaded
kmace
parents:
diff changeset
303
b465306d00ba Uploaded
kmace
parents:
diff changeset
304 2.--cluster_file the destination path for the cluster file.
b465306d00ba Uploaded
kmace
parents:
diff changeset
305
b465306d00ba Uploaded
kmace
parents:
diff changeset
306 3.--heatmap_file the destination path for heatmap image (no extension).
b465306d00ba Uploaded
kmace
parents:
diff changeset
307
b465306d00ba Uploaded
kmace
parents:
diff changeset
308 4.--heatmap_type choice of image type, currently support png and pdf.
b465306d00ba Uploaded
kmace
parents:
diff changeset
309
b465306d00ba Uploaded
kmace
parents:
diff changeset
310 5.--n_clusters number of clusters in the heatmap
b465306d00ba Uploaded
kmace
parents:
diff changeset
311
b465306d00ba Uploaded
kmace
parents:
diff changeset
312 6.--filter_percentage percentage of mtls that will be analysed. for eg. if
b465306d00ba Uploaded
kmace
parents:
diff changeset
313 we make filter_percentage 30, we will take the union of the top mtls in
b465306d00ba Uploaded
kmace
parents:
diff changeset
314 mean, non-zero mean and variance.
b465306d00ba Uploaded
kmace
parents:
diff changeset
315
b465306d00ba Uploaded
kmace
parents:
diff changeset
316
b465306d00ba Uploaded
kmace
parents:
diff changeset
317 EXAMPLE RUN:
b465306d00ba Uploaded
kmace
parents:
diff changeset
318 Rscript heatmap.R
b465306d00ba Uploaded
kmace
parents:
diff changeset
319 --mtls_file mtls.xls
b465306d00ba Uploaded
kmace
parents:
diff changeset
320 --cluster_file output/cluster
b465306d00ba Uploaded
kmace
parents:
diff changeset
321 --heatmap_file output/heatmap
b465306d00ba Uploaded
kmace
parents:
diff changeset
322 --heatmap_type png
b465306d00ba Uploaded
kmace
parents:
diff changeset
323 --n_clusters 13
b465306d00ba Uploaded
kmace
parents:
diff changeset
324 --filter_percentage 60
b465306d00ba Uploaded
kmace
parents:
diff changeset
325
b465306d00ba Uploaded
kmace
parents:
diff changeset
326 Please cite us if you used this script:
b465306d00ba Uploaded
kmace
parents:
diff changeset
327 The transcription factor network regulating Th17 lineage specification and function.
b465306d00ba Uploaded
kmace
parents:
diff changeset
328 Maria Ciofani, Aviv Madar, Carolina Galan, Kieran Mace, Agarwal, Kim Newberry, Richard M. Myers,
b465306d00ba Uploaded
kmace
parents:
diff changeset
329 Richard Bonneau and Dan R. Littman et. al. (in preperation)
b465306d00ba Uploaded
kmace
parents:
diff changeset
330
b465306d00ba Uploaded
kmace
parents:
diff changeset
331 </help>
b465306d00ba Uploaded
kmace
parents:
diff changeset
332
b465306d00ba Uploaded
kmace
parents:
diff changeset
333 </tool>