comparison cgatools/tools/cgatools_1.5/join.xml @ 0:182426b32995 draft default tip

Uploaded
author completegenomics
date Mon, 18 Jun 2012 20:15:00 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:182426b32995
1 <tool id="cg_join" name="join(beta) 1.5" version="1.0.0">
2 <!--
3 This tool creates a GUI for the join function of cgatools from Complete Genomics, Inc.
4 written 6-18-2012 by bcrain@completegenomics.com
5 -->
6
7 <description>two tsv files based on equal fields or overlapping regions.</description> <!--adds description in toolbar-->
8
9 <requirements>
10 <requirement type="binary">cgatools</requirement>
11 </requirements>
12
13 <command> <!--run executable-->
14 cgatools | head -1;
15 cgatools join --beta
16 --input $inputA
17 --input $inputB
18 --output $output
19 --output-mode $outmode
20 $dump
21 --select $col
22 #for $m in $matches <!--get all matched columns-->
23 --match ${m.match}
24 #end for
25 #if $range_overlap.range == 'yes'
26 #for $o in $range_overlap.overlaps <!--get all matched columns-->
27 --overlap ${o.overlap}
28 #end for
29 --overlap-mode $range_overlap.overlapmode
30 --overlap-fraction-A $range_overlap.fractionA
31 --boundary-uncertainty-A $range_overlap.boundaryA
32 --overlap-fraction-B $range_overlap.fractionB
33 --boundary-uncertainty-B $range_overlap.boundaryB
34 #end if
35 </command>
36
37 <outputs>
38 <data format="tabular" name="output" />
39 </outputs>
40
41 <inputs>
42 <!--form field to select input file A-->
43 <param name="inputA" type="data" format="tabular" label="Select input file A ">
44 <validator type="unspecified_build" />
45 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
46 metadata_name="dbkey" metadata_column="0"
47 message="cgatools is not currently available for this build."/>
48 </param>
49
50 <!--form field to select input file B-->
51 <param name="inputB" type="data" format="tabular" label="Select input file B ">
52 <validator type="unspecified_build" />
53 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
54 metadata_name="dbkey" metadata_column="0"
55 message="cgatools is not currently available for this build."/>
56 </param>
57
58 <!--form field to specify columns to print-->
59 <param name="col" type="text" value="A.*,B.*" size="40" label="Specify columns for output" help="The default value A.*,B.* prints all columns from both files, other selections enter in the format A.col_name1,A.col_name3,B.col_name1" />
60
61 <!--form field to select output-mode-->
62 <param name="outmode" type="select" label="Select output mode">
63 <option value="full" selected="true">full (1 line for each match of records in A and B)</option>
64 <option value="compact">compact (1 line for each record in A, joining multiple records in B by semicolon)</option>
65 <option value="compact-pct">compact-pct (same as compact, annotated with % overlap)</option>
66 </param>
67
68 <!--form field to select columns to match-->
69 <param name="dump" type="select" label="Select records to print">
70 <option value="--always-dump" selected="true">print all records of A even if not matched in B</option>
71 <option value="">print only records of A that are matched in B</option>
72 </param>
73
74 <!--form field to specify columns to match-->
75 <repeat name="matches" title="Exact match column">
76 <param name="match" type="text" size="40" label="Enter column:column" help="Enter column_from_A:column_from_B, e.g. chromosome:chromosome"/>
77 </repeat>
78
79 <conditional name="range_overlap">
80 <param name="range" type="select" label="Do you want to match columns by overlapping range?">
81 <option value="no">no</option>
82 <option value="yes">yes</option>
83 </param>
84
85 <when value="yes">
86 <!--form field to specify columns to overlap-->
87 <repeat name="overlaps" title="Range column">
88 <param name="overlap" type="text" size="40" label="Enter column&#91;,column&#93;:column&#91;,column&#93;" help="Enter range_start_from_A&#91;,range_stop_from_A&#93;:range_start_from_B&#91;,range_stop_from_B&#93;, e.g. begin,end:begin,end (overlapping range of positions) or begin,end:position"/>
89 </repeat>
90
91 <!--form field to select overlap-mode-->
92 <param name="overlapmode" type="select" label="Select overlap mode">
93 <option value="strict" selected="true">strict (overlap if A.begin&lt;B.end and B.begin&gt;A.end)</option>
94 <option value="allow-abutting-points">allow-abutting-points (overlap if A.begin&lt;B.end and B.begin&gt;A.end, or if A.begin&lt;=B.end and B.begin&lt;=A.end and either A or B has zero length.)</option>
95 </param>
96
97 <!--form fields to overlap options-->
98 <param name="fractionA" type="integer" value="0" label="Minimum fraction of A region overlap " />
99 <param name="boundaryA" type="integer" value="0" label="Boundary uncertainty for A for overlap filtering " help="Records failing the following boundary-uncertainty calculation are not included in the output: overlap length >= overlap-fraction-A * (A-range-length - boundary-uncertainty-A)"/>
100
101 <param name="fractionB" type="integer" value="0" label="Minimum fraction of B region overlap " />
102 <param name="boundaryB" type="integer" value="0" label="Boundary uncertainty for overlap filtering " help="Records failing the following boundary-uncertainty calculation are not included in the output: overlap length >= overlap-fraction-B * (B-range-length - boundary-uncertainty-B)"/>
103 </when>
104 </conditional>
105 </inputs>
106
107 <help>
108
109 **What it does**
110
111 This tool joins two tab-delimited files based on equal fields or overlapping regions.
112
113 **cgatools 1.5.0 Documentation**
114
115 Userguide: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-user-guide.pdf
116
117 Release notes: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-release-notes.pdf
118
119 **Command line reference**::
120
121 COMMAND NAME
122 join - Joins two tab-delimited files based on equal fields or overlapping regions.
123
124 DESCRIPTION
125 Joins two tab-delimited files based on equal fields or overlapping regions.
126 By default, an output record is produced for each match found between file
127 A and file B, but output format can be controlled by the --output-mode
128 parameter.
129
130 OPTIONS
131 -h [ --help ]
132 Print this help message.
133
134 --beta
135 This is a beta command. To run this command, you must pass the --beta
136 flag.
137
138 --input arg
139 File name to use as input (may be passed in as arguments at the end of
140 the command), or omitted for stdin). There must be exactly two input
141 files to join. If only one file is specified by name, file A is taken
142 to be stdin and file B is the named file. File B is read fully into
143 memory, and file A is streamed. File A's columns appear first in the
144 output.
145
146 --output arg (=STDOUT)
147 The output file name (may be omitted for stdout).
148
149 --match arg
150 A match specification, which is a column from A and a column from B
151 separated by a colon.
152
153 --overlap arg
154 Overlap specification. An overlap specification consists of a range
155 definition for files A and B, separated by a colon. A range definition
156 may be two columns, in which case they are interpreted as the beginning
157 and end of the range. Or it may be one column, in which case the range
158 is defined as the 1-base range starting at the given value. The records
159 from the two files must overlap in order to be considered for output.
160 Two ranges are considered to overlap if the overlap is at least one
161 base long, or if one of the ranges is length 0 and the ranges overlap
162 or abut. For example, "begin,end:offset" will match wherever end-begin
163 &gt; 0, begin&lt;offset+1, and end&gt;offset, or wherever end-begin = 0,
164 begin&lt;=offset+1, and end&gt;=offset.
165
166
167 -m [ --output-mode ] arg (=full)
168 Output mode, one of the following:
169 full Print an output record for each match found between
170 file A and file B.
171 compact Print at most one record for each record of file A,
172 joining the file B values by a semicolon and
173 suppressing repeated B values and empty B values.
174 compact-pct Same as compact, but for each distinct B value,
175 annotate with the percentage of the A record that is
176 overlapped by B records with that B value. Percentage
177 is rounded up to nearest integer.
178
179 --overlap-mode arg (=strict)
180 Overlap mode, one of the following:
181 strict Range A and B overlap if A.begin &lt; B.end and
182 B.begin &lt; A.end.
183 allow-abutting-points Range A and B overlap they meet the strict
184 requirements, or if A.begin &lt;= B.end and
185 B.begin &lt;= A.end and either A or B has zero
186 length.
187
188 --select arg (=A.*,B.*)
189 Set of fields to select for output.
190
191 -a [ --always-dump ]
192 Dump every record of A, even if there are no matches with file B.
193
194 --overlap-fraction-A arg (=0)
195 Minimum fraction of A region overlap for filtering output.
196
197 --boundary-uncertainty-A arg (=0)
198 Boundary uncertainty for overlap filtering. Specifically, records
199 failing the following predicate are filtered away: overlap &gt;=
200 overlap-fraction-A * ( A-range-length - boundary-uncertainty-A )
201
202 --overlap-fraction-B arg (=0)
203 Minimum fraction of B region overlap for filtering output.
204
205 --boundary-uncertainty-B arg (=0)
206 Boundary uncertainty for overlap filtering. Specifically, records
207 failing the following predicate are filtered away: overlap &gt;=
208 overlap-fraction-B * ( B-range-length - boundary-uncertainty-B )
209
210 SUPPORTED FORMAT_VERSION
211 Any
212 </help>
213 </tool>