annotate tools/filters/joiner.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 <tool id="join1" name="Join two Datasets" version="2.0.2">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 <description>side by side on a specified field</description>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 <command interpreter="python">join.py $input1 $input2 $field1 $field2 $out_file1 $unmatched $partial --index_depth=3 --buffer=50000000 --fill_options_file=$fill_options_file</command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 <inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 <param format="tabular" name="input1" type="data" label="Join"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 <param name="field1" label="using column" type="data_column" data_ref="input1" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 <param format="tabular" name="input2" type="data" label="with" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 <param name="field2" label="and column" type="data_column" data_ref="input2" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 <param name="unmatched" type="select" label="Keep lines of first input that do not join with second input">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 <option value="-u">Yes</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 <option value="" selected="true">No</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 <param name="partial" type="select" label="Keep lines of first input that are incomplete">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 <option value="-p">Yes</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 <option value="" selected="true">No</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 <conditional name="fill_empty_columns">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 <param name="fill_empty_columns_switch" type="select" label="Fill empty columns">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 <option value="no_fill" selected="True">No</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 <option value="fill_empty">Yes</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 <when value="no_fill">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 <!-- do nothing -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 <when value="fill_empty">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 <param type="select" name="fill_columns_by" label="Only fill unjoined rows">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 <option value="fill_unjoined_only" selected="True">Yes</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 <option value="fill_all">No</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 <conditional name="do_fill_empty_columns">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 <param name="column_fill_type" type="select" label="Fill Columns by">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 <option value="single_fill_value" selected="True">Single fill value</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 <option value="fill_value_by_column">Values by column</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 <when value="single_fill_value">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 <param type="text" name="fill_value" label="Fill value" value="."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 <when value="fill_value_by_column">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 <repeat name="column_fill1" title="Fill Column for Input 1">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 <param name="column_number1" label="Column" type="data_column" data_ref="input1" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 <param type="text" name="fill_value1" value="."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 </repeat>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 <repeat name="column_fill2" title="Fill Column for Input 2">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 <param name="column_number2" label="Column" type="data_column" data_ref="input2" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 <param type="text" name="fill_value2" value="."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 </repeat>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 </inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 <configfiles>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 <configfile name="fill_options_file">&lt;%
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 import simplejson
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 %&gt;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 #set $__fill_options = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 #if $fill_empty_columns['fill_empty_columns_switch'] == 'fill_empty':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 #set $__fill_options['fill_unjoined_only'] = $fill_empty_columns['fill_columns_by'].value == 'fill_unjoined_only'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'single_fill_value':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 #set $__start_fill = $fill_empty_columns['do_fill_empty_columns']['fill_value'].value
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 #else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 #set $__start_fill = ""
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 #set $__fill_options['file1_columns'] = [ __start_fill for i in range( int( $input1.metadata.columns ) ) ]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 #set $__fill_options['file2_columns'] = [ __start_fill for i in range( int( $input2.metadata.columns ) ) ]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'fill_value_by_column':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 #for column_fill1 in $fill_empty_columns['do_fill_empty_columns']['column_fill1']:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 #set $__fill_options['file1_columns'][ int( column_fill1['column_number1'].value ) - 1 ] = column_fill1['fill_value1'].value
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 #end for
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 #for column_fill2 in $fill_empty_columns['do_fill_empty_columns']['column_fill2']:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 #set $__fill_options['file2_columns'][ int( column_fill2['column_number2'].value ) - 1 ] = column_fill2['fill_value2'].value
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 #end for
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 ${simplejson.dumps( __fill_options )}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 </configfile>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 </configfiles>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 <outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 <data format="input" name="out_file1" metadata_source="input1" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 </outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 <tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 <param name="input1" value="1.bed"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 <param name="input2" value="2.bed"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 <param name="field1" value="2"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 <param name="field2" value="2"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 <param name="unmatched" value=""/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 <param name="partial" value=""/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 <param name="fill_empty_columns_switch" value="no_fill"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 <output name="out_file1" file="joiner_out1.bed"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 <param name="input1" value="1.bed"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 <param name="input2" value="2.bed"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 <param name="field1" value="2"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 <param name="field2" value="2"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 <param name="unmatched" value="Yes"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 <param name="partial" value="Yes"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 <param name="fill_empty_columns_switch" value="no_fill"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 <output name="out_file1" file="joiner_out2.bed"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 <param name="input1" value="1.bed"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 <param name="input2" value="2.bed"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 <param name="field1" value="2"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 <param name="field2" value="2"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 <param name="unmatched" value="Yes"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 <param name="partial" value="Yes"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 <param name="fill_empty_columns_switch" value="fill_empty"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 <param name="fill_columns_by" value="fill_all"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 <param name="column_fill_type" value="single_fill_value"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 <param name="fill_value" value="~"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 <output name="out_file1" file="joiner_out3.bed"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 <param name="input1" value="1.bed"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 <param name="input2" value="2.bed"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 <param name="field1" value="2"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 <param name="field2" value="2"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 <param name="unmatched" value="Yes"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 <param name="partial" value="Yes"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 <param name="fill_empty_columns_switch" value="fill_empty"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 <param name="fill_columns_by" value="fill_all"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 <param name="column_fill_type" value="fill_value_by_column"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 <param name="column_number1" value="6"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 <param name="fill_value1" value="+"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 <param name="column_number2" value="1"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 <param name="fill_value2" value="NoChrom"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 <output name="out_file1" file="joiner_out4.bed"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 </tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 <help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 .. class:: warningmark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 **This tool will attempt to reuse the metadata from your first input.** To change metadata assignments click on the "edit attributes" link of the history item generated by this tool.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 .. class:: infomark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140 **TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 **Syntax**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146 This tool joins lines of two datasets on a common field. An empty string ("") is not a valid identifier.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 You may choose to include lines of your first input that do not join with your second input.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 - Columns are referenced with a **number**. For example, **3** refers to the 3rd column of a tab-delimited file.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153 **Example**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155 Dataset1::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157 chr1 10 20 geneA
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 chr1 50 80 geneB
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159 chr5 10 40 geneL
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 Dataset2::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163 geneA tumor-supressor
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164 geneB Foxp2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165 geneC Gnas1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166 geneE INK4a
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
168 Joining the 4th column of Dataset1 with the 1st column of Dataset2 will yield::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
169
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
170 chr1 10 20 geneA geneA tumor-suppressor
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
171 chr1 50 80 geneB geneB Foxp2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
172
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
173 Joining the 4th column of Dataset1 with the 1st column of Dataset2, while keeping all lines from Dataset1, will yield::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
174
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
175 chr1 10 20 geneA geneA tumor-suppressor
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
176 chr1 50 80 geneB geneB Foxp2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
177 chr5 10 40 geneL
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
178
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
179 </help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
180 </tool>