comparison join_files_on_column_fuzzy.xml @ 1:8750c3125ec5 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit b1763d10a1c39bc6651be891a993989c5a5617ff
author bgruening
date Fri, 01 Dec 2017 16:26:59 -0500
parents 64469e7ecf9f
children f2068690addc
comparison
equal deleted inserted replaced
0:64469e7ecf9f 1:8750c3125ec5
1 <tool id="join_files_on_column_fuzzy" name="Join two files" version="1.0.0"> 1 <tool id="join_files_on_column_fuzzy" name="Join two files" version="1.0.0">
2 <description> 2 <description>
3 on column allowing a small difference 3 on column allowing a small difference
4 </description> 4 </description>
5 <requirements> 5 <requirements>
6 <requirement type="package" version="3.6">python</requirement> 6 <requirement type="package" version="3.6">python</requirement>
7 </requirements> 7 </requirements>
8 <command> 8 <command>
9 <![CDATA[ 9 <![CDATA[
10 python '$__tool_directory__/join_files_on_column_fuzzy.py' 10 python '$__tool_directory__/join_files_on_column_fuzzy.py'
11 --f1 '$f1' 11 --f1 '$f1'
13 --c1 $c1 13 --c1 $c1
14 --c2 $c2 14 --c2 $c2
15 --outfile '$merged_file' 15 --outfile '$merged_file'
16 $header 16 $header
17 $add_distance 17 $add_distance
18 #if $merge_mode.merge_mode_select == 'closest': 18 #if $merge_mode_select == 'closest':
19 --closest 19 --closest
20 #else: 20 #end if:
21 --distance $merge_mode.distance 21 --distance $distance
22 --unit $merge_mode.units 22 --unit $units
23 #end if 23
24 ]]> 24 ]]>
25 </command> 25 </command>
26 <inputs> 26 <inputs>
27 <param argument="--f1" type="data" optional="true" format="tabular" label="1st file" 27 <param argument="--f1" type="data" optional="true" format="tabular" label="1st file"
28 help=""/> 28 help=""/>
29 <param argument="--c1" type="data_column" data_ref="f1" label="Column to use from 1st file" help="The file needs to be sorted by this column, ascending."/> 29 <param argument="--c1" type="data_column" data_ref="f1" label="Column to use from 1st file" help="The file needs to be sorted by this column, ascending."/>
30 <param argument="--f2" type="data" optional="true" format="tabular" label="2nd file" 30 <param argument="--f2" type="data" optional="true" format="tabular" label="2nd file"
31 help=""/> 31 help=""/>
32 <param argument="--c2" type="data_column" data_ref="f2" label="Column to use from 2nd file" help="The file needs to be sorted by this column, ascending."/> 32 <param argument="--c2" type="data_column" data_ref="f2" label="Column to use from 2nd file" help="The file needs to be sorted by this column, ascending."/>
33 33
34 <param argument="--header" type="boolean" checked="false" truevalue="--header" falsevalue="" label="Does the input files contain a header line" /> 34 <param argument="--header" type="boolean" checked="false" truevalue="--header" falsevalue="" label="Does the input files contain a header line" />
35 <param argument="--add_distance" type="boolean" checked="false" truevalue="--add_distance" falsevalue="" label="Add an addional column with the calculated distance." /> 35 <param argument="--add_distance" type="boolean" checked="false" truevalue="--add_distance" falsevalue="" label="Add an addional column with the calculated distance." />
36 36
37 <conditional name="merge_mode"> 37 <param name="merge_mode_select" type="select" label="Choose the mode of merging.">
38 <param name="merge_mode_select" type="select" label="Choose the mode of merging."> 38 <option value="closest" selected="True">Best match (in case of multiple best matches, only the first one is reported)</option>
39 <option value="closest" selected="True">Best match (in case of multiple best matches, only the first one is reported)</option> 39 <option value="distance">Matching with a defined distance</option>
40 <option value="distance">Matching with a defined distance</option> 40 </param>
41 </param> 41 <param name="units" display="radio" type="select" value="ppm_value" label="Choose the metrics of your distance"
42 <when value="closest"/> 42 help="ppm is useful for very small differences">
43 <when value="distance"> 43 <option value="absolute" selected="True">Absolute distance</option>
44 <param name="units" display="radio" type="select" value="ppm_value" label="Choose the metrics of your distance" 44 <option value="ppm" >Distance in ppm</option>
45 help="ppm is useful for very small differences"> 45 </param>
46 <option value="absolute" selected="True">Absolute distance</option> 46 <param name="distance" value="0.2" type="float" label="Allowed distance between the two values that will trigger a merge" help=""/>
47 <option value="ppm" >Distance in ppm</option>
48 </param>
49 <param name="distance" value="0.2" type="float" label="Allowed distance between the two values that will trigger a merge" help=""/>
50 </when>
51 </conditional>
52 </inputs> 47 </inputs>
53 <outputs> 48 <outputs>
54 <data name="merged_file" format="tabular" /> 49 <data name="merged_file" format="tabular" />
55 </outputs> 50 </outputs>
56 51
80 <param name="f1" value="file1_header.tab" ftype="tabular"/> 75 <param name="f1" value="file1_header.tab" ftype="tabular"/>
81 <param name="f2" value="file2_header.tab" ftype="tabular"/> 76 <param name="f2" value="file2_header.tab" ftype="tabular"/>
82 <param name="c1" value="1"/> 77 <param name="c1" value="1"/>
83 <param name="c2" value="1"/> 78 <param name="c2" value="1"/>
84 <param name="header" value="true"/> 79 <param name="header" value="true"/>
85 <param name="closest" value="true"/> 80 <param name="merge_mode_select" value="closest"/>
86 <output name="merged_file" file="header_closest_result3.tab" /> 81 <output name="merged_file" file="header_closest_result3.tab" />
87 </test> 82 </test>
88 <test> 83 <test>
89 <param name="f1" value="file1_ppm.tab" ftype="tabular"/> 84 <param name="f1" value="file1_ppm.tab" ftype="tabular"/>
90 <param name="f2" value="file2_ppm.tab" ftype="tabular"/> 85 <param name="f2" value="file2_ppm.tab" ftype="tabular"/>
121 </tests> 116 </tests>
122 <help> 117 <help>
123 <![CDATA[ 118 <![CDATA[
124 119
125 Join two files on a common column. It is possible to provide an allowed difference between both values (currently only numbers) 120 Join two files on a common column. It is possible to provide an allowed difference between both values (currently only numbers)
126 as the absolute differece or as PPM. 121 as the absolute differece or as PPM.
127 122
128 Two modes are available: 123 Two modes are available:
129 124
130 1. In the **best match** mode only the rows are merged for the most similar (or identical) values. In case of multiple best matches, only the first one is reported. 125 1. In the **best match** mode only the rows are merged for the most similar (or identical) values. In case of multiple best matches, only the first one is reported.
131 126
132 1. The **Matching with a defined distance** option will offer you the possibility 127 2. The **Matching with a defined distance** option will offer you the possibility
133 to provide a distance between the two values of the columns. Is the calculates distance smaller or equal than the given distance the columns will be joined. You can specify the allowed distance as an absolute distance or as PPM. 128 to provide a distance between the two values of the columns. Is the calculates distance smaller or equal than the given distance the columns will be joined. You can specify the allowed distance as an absolute distance or as PPM.
134 129
135 130
136 ]]> 131 ]]>
137 </help> 132 </help>
138 <citations> 133 <citations>
139 </citations> 134 </citations>
140 </tool> 135 </tool>
141