comparison tools/filters/joiner.xml @ 0:9071e359b9a3

author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 <tool id="join1" name="Join two Datasets" version="2.0.2">
2 <description>side by side on a specified field</description>
3 <command interpreter="python"> $input1 $input2 $field1 $field2 $out_file1 $unmatched $partial --index_depth=3 --buffer=50000000 --fill_options_file=$fill_options_file</command>
4 <inputs>
5 <param format="tabular" name="input1" type="data" label="Join"/>
6 <param name="field1" label="using column" type="data_column" data_ref="input1" />
7 <param format="tabular" name="input2" type="data" label="with" />
8 <param name="field2" label="and column" type="data_column" data_ref="input2" />
9 <param name="unmatched" type="select" label="Keep lines of first input that do not join with second input">
10 <option value="-u">Yes</option>
11 <option value="" selected="true">No</option>
12 </param>
13 <param name="partial" type="select" label="Keep lines of first input that are incomplete">
14 <option value="-p">Yes</option>
15 <option value="" selected="true">No</option>
16 </param>
17 <conditional name="fill_empty_columns">
18 <param name="fill_empty_columns_switch" type="select" label="Fill empty columns">
19 <option value="no_fill" selected="True">No</option>
20 <option value="fill_empty">Yes</option>
21 </param>
22 <when value="no_fill">
23 <!-- do nothing -->
24 </when>
25 <when value="fill_empty">
26 <param type="select" name="fill_columns_by" label="Only fill unjoined rows">
27 <option value="fill_unjoined_only" selected="True">Yes</option>
28 <option value="fill_all">No</option>
29 </param>
30 <conditional name="do_fill_empty_columns">
31 <param name="column_fill_type" type="select" label="Fill Columns by">
32 <option value="single_fill_value" selected="True">Single fill value</option>
33 <option value="fill_value_by_column">Values by column</option>
34 </param>
35 <when value="single_fill_value">
36 <param type="text" name="fill_value" label="Fill value" value="."/>
37 </when>
38 <when value="fill_value_by_column">
39 <repeat name="column_fill1" title="Fill Column for Input 1">
40 <param name="column_number1" label="Column" type="data_column" data_ref="input1" />
41 <param type="text" name="fill_value1" value="."/>
42 </repeat>
43 <repeat name="column_fill2" title="Fill Column for Input 2">
44 <param name="column_number2" label="Column" type="data_column" data_ref="input2" />
45 <param type="text" name="fill_value2" value="."/>
46 </repeat>
47 </when>
48 </conditional>
49 </when>
50 </conditional>
51 </inputs>
52 <configfiles>
53 <configfile name="fill_options_file">&lt;%
54 import simplejson
55 %&gt;
56 #set $__fill_options = {}
57 #if $fill_empty_columns['fill_empty_columns_switch'] == 'fill_empty':
58 #set $__fill_options['fill_unjoined_only'] = $fill_empty_columns['fill_columns_by'].value == 'fill_unjoined_only'
59 #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'single_fill_value':
60 #set $__start_fill = $fill_empty_columns['do_fill_empty_columns']['fill_value'].value
61 #else:
62 #set $__start_fill = ""
63 #end if
64 #set $__fill_options['file1_columns'] = [ __start_fill for i in range( int( $input1.metadata.columns ) ) ]
65 #set $__fill_options['file2_columns'] = [ __start_fill for i in range( int( $input2.metadata.columns ) ) ]
66 #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'fill_value_by_column':
67 #for column_fill1 in $fill_empty_columns['do_fill_empty_columns']['column_fill1']:
68 #set $__fill_options['file1_columns'][ int( column_fill1['column_number1'].value ) - 1 ] = column_fill1['fill_value1'].value
69 #end for
70 #for column_fill2 in $fill_empty_columns['do_fill_empty_columns']['column_fill2']:
71 #set $__fill_options['file2_columns'][ int( column_fill2['column_number2'].value ) - 1 ] = column_fill2['fill_value2'].value
72 #end for
73 #end if
74 #end if
75 ${simplejson.dumps( __fill_options )}
76 </configfile>
77 </configfiles>
78 <outputs>
79 <data format="input" name="out_file1" metadata_source="input1" />
80 </outputs>
81 <tests>
82 <test>
83 <param name="input1" value="1.bed"/>
84 <param name="input2" value="2.bed"/>
85 <param name="field1" value="2"/>
86 <param name="field2" value="2"/>
87 <param name="unmatched" value=""/>
88 <param name="partial" value=""/>
89 <param name="fill_empty_columns_switch" value="no_fill"/>
90 <output name="out_file1" file="joiner_out1.bed"/>
91 </test>
92 <test>
93 <param name="input1" value="1.bed"/>
94 <param name="input2" value="2.bed"/>
95 <param name="field1" value="2"/>
96 <param name="field2" value="2"/>
97 <param name="unmatched" value="Yes"/>
98 <param name="partial" value="Yes"/>
99 <param name="fill_empty_columns_switch" value="no_fill"/>
100 <output name="out_file1" file="joiner_out2.bed"/>
101 </test>
102 <test>
103 <param name="input1" value="1.bed"/>
104 <param name="input2" value="2.bed"/>
105 <param name="field1" value="2"/>
106 <param name="field2" value="2"/>
107 <param name="unmatched" value="Yes"/>
108 <param name="partial" value="Yes"/>
109 <param name="fill_empty_columns_switch" value="fill_empty"/>
110 <param name="fill_columns_by" value="fill_all"/>
111 <param name="column_fill_type" value="single_fill_value"/>
112 <param name="fill_value" value="~"/>
113 <output name="out_file1" file="joiner_out3.bed"/>
114 </test>
115 <test>
116 <param name="input1" value="1.bed"/>
117 <param name="input2" value="2.bed"/>
118 <param name="field1" value="2"/>
119 <param name="field2" value="2"/>
120 <param name="unmatched" value="Yes"/>
121 <param name="partial" value="Yes"/>
122 <param name="fill_empty_columns_switch" value="fill_empty"/>
123 <param name="fill_columns_by" value="fill_all"/>
124 <param name="column_fill_type" value="fill_value_by_column"/>
125 <param name="column_number1" value="6"/>
126 <param name="fill_value1" value="+"/>
127 <param name="column_number2" value="1"/>
128 <param name="fill_value2" value="NoChrom"/>
129 <output name="out_file1" file="joiner_out4.bed"/>
130 </test>
131 </tests>
132 <help>
134 .. class:: warningmark
136 **This tool will attempt to reuse the metadata from your first input.** To change metadata assignments click on the "edit attributes" link of the history item generated by this tool.
138 .. class:: infomark
140 **TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
142 -----
144 **Syntax**
146 This tool joins lines of two datasets on a common field. An empty string ("") is not a valid identifier.
147 You may choose to include lines of your first input that do not join with your second input.
149 - Columns are referenced with a **number**. For example, **3** refers to the 3rd column of a tab-delimited file.
151 -----
153 **Example**
155 Dataset1::
157 chr1 10 20 geneA
158 chr1 50 80 geneB
159 chr5 10 40 geneL
161 Dataset2::
163 geneA tumor-supressor
164 geneB Foxp2
165 geneC Gnas1
166 geneE INK4a
168 Joining the 4th column of Dataset1 with the 1st column of Dataset2 will yield::
170 chr1 10 20 geneA geneA tumor-suppressor
171 chr1 50 80 geneB geneB Foxp2
173 Joining the 4th column of Dataset1 with the 1st column of Dataset2, while keeping all lines from Dataset1, will yield::
175 chr1 10 20 geneA geneA tumor-suppressor
176 chr1 50 80 geneB geneB Foxp2
177 chr5 10 40 geneL
179 </help>
180 </tool>