comparison mytools/align2multiple.xml @ 9:87eb5c5ddfe9

Uploaded
author xuebing
date Fri, 09 Mar 2012 20:01:43 -0500
parents f0dc65e7f6c0
children
comparison
equal deleted inserted replaced
8:361ec1c0479d 9:87eb5c5ddfe9
1 <tool id="align2multiple" name="align-to-multiple">
2 <description>features</description>
3 <command>cat $script_file | R --vanilla --slave > $logfile </command>
4 <inputs>
5 <param name="query" type="data" format="interval" label="Query intervals" help= "keep it small (less than 1,000,000 lines)"/>
6 <param name="label" type="text" value="" size="30" label="Data Label"/>
7 <param name="windowsize" size="10" type="integer" value="5000" label="radius of the window" help="will create new intervals of w bp flanking the original center. set to 0 will not change input interval size)"/>
8 <param name="nbins" size="10" type="integer" value="20" label="Number of bins dividing the window"/>
9 <param name="sort" label="Sort intervals" help="Sort by the center of the first input, then the second input, then third..." type="boolean" truevalue="sort" falsevalue="none" checked="True"/>
10 <repeat name="series" title="input file">
11 <param name="label" type="text" value="" size="30" label="Data Label"/>
12 <param name="input" type="data" format="interval" label="Dataset"/>
13 </repeat>
14 </inputs>
15
16 <configfiles>
17 <configfile name="script_file">
18 ## Setup R error handling to go to stderr
19 cat('\n[',date(),'] Start running job\n')
20 options(warn=-1)
21 windowsize = as.integer("$windowsize")
22 labels = '$label'
23 ## align query to itself
24 cmd = 'python /Users/xuebing/galaxy-dist/tools/mytools/alignr.py -a $query -b $query -o $label-$label --profile-only -q -w $windowsize -n $nbins'
25 cat('\n[',date(),'] ',cmd,'\n')
26 system(cmd)
27 ## align other sets to query
28 #for $i,$s in enumerate( $series )
29 labels = c(labels,'$s.label.value')
30 cmd = 'python /Users/xuebing/galaxy-dist/tools/mytools/alignr.py -a $s.input.file_name -b $query -o $label-$s.label.value --profile-only -q -w $windowsize -n $nbins'
31 cat('\n[',date(),'] ',cmd,'\n')
32 system(cmd)
33 #end for
34 cat('\n[',date(),'] Read output\n')
35 ## read output of query2query
36 print(paste(labels[1],labels[1],sep='-'))
37 x = read.table(paste(labels[1],labels[1],sep='-'))
38 ids = as.character(x[,1])
39 nfeat = nrow(x)
40 x = as.matrix(x[,3:ncol(x)])
41 nbin = ncol(x)
42
43 ## a table mapping id to position
44 ind = list()
45 for (i in 1:nfeat){
46 ind[[ids[i]]] = i
47 }
48 ## read other output files
49 for (i in 2:length(labels)){
50 print(paste(labels[1],labels[i],sep='-'))
51 x0 = read.table(paste(labels[1],labels[i],sep='-'))
52 ids0 = as.character(x0[,1])
53 x0 = as.matrix(x0[,3:ncol(x0)])
54 x1 = matrix(0,nfeat,nbin)
55 for (j in 1:nrow(x0)){
56 #cat(j,'\t',ids0[j],'\t',ind[[ids0[j]]],'\n')
57 x1[ind[[ids0[j]]],] = x0[j,]
58 }
59 x = cbind(x,x1)
60 }
61 ## reorder
62 if ("${sort}" == "sort"){
63 cat('\n[',date(),'] Sort intervals\n')
64 for (i in rev(2:length(labels))){
65 x = x[order(x[,i*nbin-nbin/2]>0),]
66 }
67 }
68 png("${out_file1}")
69 ##par(mfrow=c(2,length(labels)),mar=c(1,1,4,1))
70 layout(matrix(seq(2*length(labels)),nrow=2,byrow=T),heights=c(1,5))
71 cat('\n[',date(),'] Plot summary\n')
72 par(mar=c(0,0,4,0)+0.1)
73 for (i in 1:length(labels)){
74 plot(colSums(x[,((i-1)*nbin+1):(i*nbin)]),type='l',axes=F,main=labels[i])
75 }
76 cat('\n[',date(),'] Plot heatmap\n')
77 par(mar=c(0,0,0,0)+0.1)
78 for (i in 1:length(labels)){
79 image(-t(log2(1+x[,((i-1)*nbin+1):(i*nbin)])),axes=F)
80 }
81 dev.off()
82 cat('\n[',date(),'] Finished\n')
83
84 </configfile>
85 </configfiles>
86
87 <outputs>
88 <data format="txt" name="logfile" label="${tool.name} on ${on_string}: (log)" />
89 <data format="png" name="out_file1" label="${tool.name} on ${on_string}: (plot)" />
90 </outputs>
91
92 <help>
93 .. class:: infomark
94
95 This tool allows you to check the co-localization pattern of multiple interval sets. All interval sets are aligned to the center of the intervals in the query interval set.
96
97 Each row represents a window of certain size around the center of one interval in the query set, such as ChIP peaks. Each heatmap shows the position of other features in the SAME window (the same rows in each heatmap represent the same interval/genomic position).
98
99
100 The example below shows that of all Fox2 peaks, half of them are within 1kb of TSS. Of the half outside TSS, about one half has H3K4me1, two thirds of which are further depleted of H3K4me3.
101
102 -----
103
104 **Example**
105
106 .. image:: ./static/images/align2multiple.png
107
108 </help>
109 </tool>