comparison tools/new_operations/join.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 <tool id="gops_join_1" name="Join">
2 <description>the intervals of two datasets side-by-side</description>
3 <command interpreter="python">gops_join.py $input1 $input2 $output -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol} -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol} -m $min -f $fill</command>
4 <inputs>
5 <param format="interval" name="input1" type="data" help="First dataset">
6 <label>Join</label>
7 </param>
8 <param format="interval" name="input2" type="data" help="Second dataset">
9 <label>with</label>
10 </param>
11 <param name="min" size="4" type="integer" value="1" help="(bp)">
12 <label>with min overlap</label>
13 </param>
14 <param name="fill" type="select" label="Return">
15 <option value="none">Only records that are joined (INNER JOIN)</option>
16 <option value="right">All records of first dataset (fill null with ".")</option>
17 <option value="left">All records of second dataset (fill null with ".")</option>
18 <option value="both">All records of both datasets (fill nulls with ".")</option>
19 </param>
20 </inputs>
21 <outputs>
22 <data format="interval" name="output" metadata_source="input1" />
23 </outputs>
24 <code file="operation_filter.py"/>
25 <tests>
26 <test>
27 <param name="input1" value="1.bed" />
28 <param name="input2" value="2.bed" />
29 <param name="min" value="1" />
30 <param name="fill" value="none" />
31 <output name="output" file="gops-join-none.dat" />
32 </test>
33 <test>
34 <param name="input1" value="1.bed" />
35 <param name="input2" value="2.bed" />
36 <param name="min" value="1" />
37 <param name="fill" value="right" />
38 <output name="output" file="gops-join-right.dat" />
39 </test>
40 <test>
41 <param name="input1" value="1.bed" />
42 <param name="input2" value="2.bed" />
43 <param name="min" value="1" />
44 <param name="fill" value="left" />
45 <output name="output" file="gops-join-left.dat" />
46 </test>
47 <test>
48 <param name="input1" value="1.bed" />
49 <param name="input2" value="2.bed" />
50 <param name="min" value="1" />
51 <param name="fill" value="both" />
52 <output name="output" file="gops-join-both.dat" />
53 </test>
54 <test>
55 <param name="input1" value="1.bed" />
56 <param name="input2" value="2.bed" />
57 <param name="min" value="500" />
58 <param name="fill" value="none" />
59 <output name="output" file="gops-join-none-500.dat" />
60 </test>
61 <test>
62 <param name="input1" value="1.bed" />
63 <param name="input2" value="2.bed" />
64 <param name="min" value="100" />
65 <param name="fill" value="both" />
66 <output name="output" file="gops-join-both-100.dat" />
67 </test>
68 </tests>
69 <help>
70
71 .. class:: infomark
72
73 **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in interval format. Use "edit attributes" to set chromosome, start, end, and strand columns.
74
75 -----
76
77 **Screencasts!**
78
79 See Galaxy Interval Operation Screencasts_ (right click to open this link in another window).
80
81 .. _Screencasts: http://wiki.g2.bx.psu.edu/Learn/Interval%20Operations
82
83 -----
84
85 **Syntax**
86
87 - **Where overlap** specifies the minimum overlap between intervals that allows them to be joined.
88 - **Return only records that are joined** returns only the records of the first dataset that join to a record in the second dataset. This is analogous to an INNER JOIN.
89 - **Return all records of first dataset (fill null with &quot;.&quot;)** returns all intervals of the first dataset, and any intervals that do not join an interval from the second dataset are filled in with a period(.). This is analogous to a LEFT JOIN.
90 - **Return all records of second dataset (fill null with &quot;.&quot;)** returns all intervals of the second dataset, and any intervals that do not join an interval from the first dataset are filled in with a period(.). **Note that this may produce an invalid interval file, since a period(.) is not a valid chrom, start, end or strand.**
91 - **Return all records of both datasets (fill nulls with &quot;.&quot;)** returns all records from both datasets, and fills on either the right or left with periods. **Note that this may produce an invalid interval file, since a period(.) is not a valid chrom, start, end or strand.**
92
93 -----
94
95 **Example**
96
97 If **First dataset** is::
98
99 chr1 10 100 Query1.1
100 chr1 500 1000 Query1.2
101 chr1 1100 1250 Query1.3
102
103 and **Second dataset** is::
104
105 chr1 20 80 Query2.1
106 chr1 2000 2204 Query2.2
107 chr1 2500 3000 Query2.3
108
109
110 The four return options will generate:
111
112
113 - **Return only records that are joined**::
114
115 chr1 10 100 Query1.1 chr1 20 80 Query2.1
116
117 - **Return all records of first dataset**::
118
119 chr1 10 100 Query1.1 chr1 20 80 Query2.1
120 chr1 500 1000 Query1.2 . . . .
121 chr1 1100 1250 Query1.3 . . . .
122
123 - **Return all records of second dataset**::
124
125 chr1 10 100 Query1.1 chr1 20 80 Query2.1
126 . . . . chr1 2000 2204 Query2.2
127 . . . . chr1 2500 3000 Query2.3
128
129 - **Return all records of both datasets**::
130
131 chr1 10 100 Query1.1 chr1 20 80 Query2.1
132 chr1 500 1000 Query1.2 . . . .
133 chr1 1100 1250 Query1.3 . . . .
134 . . . . chr1 2000 2204 Query2.2
135 . . . . chr1 2500 3000 Query2.3
136
137
138 </help>
139 </tool>