Mercurial > repos > completegenomics > cg_cgatools_linux
diff cgatools/tools/cgatools_1.5/join.xml @ 0:182426b32995 draft default tip
Uploaded
author | completegenomics |
---|---|
date | Mon, 18 Jun 2012 20:15:00 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cgatools/tools/cgatools_1.5/join.xml Mon Jun 18 20:15:00 2012 -0400 @@ -0,0 +1,213 @@ +<tool id="cg_join" name="join(beta) 1.5" version="1.0.0"> +<!-- +This tool creates a GUI for the join function of cgatools from Complete Genomics, Inc. +written 6-18-2012 by bcrain@completegenomics.com +--> + + <description>two tsv files based on equal fields or overlapping regions.</description> <!--adds description in toolbar--> + + <requirements> + <requirement type="binary">cgatools</requirement> + </requirements> + + <command> <!--run executable--> + cgatools | head -1; + cgatools join --beta + --input $inputA + --input $inputB + --output $output + --output-mode $outmode + $dump + --select $col + #for $m in $matches <!--get all matched columns--> + --match ${m.match} + #end for + #if $range_overlap.range == 'yes' + #for $o in $range_overlap.overlaps <!--get all matched columns--> + --overlap ${o.overlap} + #end for + --overlap-mode $range_overlap.overlapmode + --overlap-fraction-A $range_overlap.fractionA + --boundary-uncertainty-A $range_overlap.boundaryA + --overlap-fraction-B $range_overlap.fractionB + --boundary-uncertainty-B $range_overlap.boundaryB + #end if + </command> + + <outputs> + <data format="tabular" name="output" /> + </outputs> + + <inputs> + <!--form field to select input file A--> + <param name="inputA" type="data" format="tabular" label="Select input file A "> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="0" + message="cgatools is not currently available for this build."/> + </param> + + <!--form field to select input file B--> + <param name="inputB" type="data" format="tabular" label="Select input file B "> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" + metadata_name="dbkey" metadata_column="0" + message="cgatools is not currently available for this build."/> + </param> + + <!--form field to specify columns to print--> + <param name="col" type="text" value="A.*,B.*" size="40" label="Specify columns for output" help="The default value A.*,B.* prints all columns from both files, other selections enter in the format A.col_name1,A.col_name3,B.col_name1" /> + + <!--form field to select output-mode--> + <param name="outmode" type="select" label="Select output mode"> + <option value="full" selected="true">full (1 line for each match of records in A and B)</option> + <option value="compact">compact (1 line for each record in A, joining multiple records in B by semicolon)</option> + <option value="compact-pct">compact-pct (same as compact, annotated with % overlap)</option> + </param> + + <!--form field to select columns to match--> + <param name="dump" type="select" label="Select records to print"> + <option value="--always-dump" selected="true">print all records of A even if not matched in B</option> + <option value="">print only records of A that are matched in B</option> + </param> + + <!--form field to specify columns to match--> + <repeat name="matches" title="Exact match column"> + <param name="match" type="text" size="40" label="Enter column:column" help="Enter column_from_A:column_from_B, e.g. chromosome:chromosome"/> + </repeat> + + <conditional name="range_overlap"> + <param name="range" type="select" label="Do you want to match columns by overlapping range?"> + <option value="no">no</option> + <option value="yes">yes</option> + </param> + + <when value="yes"> + <!--form field to specify columns to overlap--> + <repeat name="overlaps" title="Range column"> + <param name="overlap" type="text" size="40" label="Enter column[,column]:column[,column]" help="Enter range_start_from_A[,range_stop_from_A]:range_start_from_B[,range_stop_from_B], e.g. begin,end:begin,end (overlapping range of positions) or begin,end:position"/> + </repeat> + + <!--form field to select overlap-mode--> + <param name="overlapmode" type="select" label="Select overlap mode"> + <option value="strict" selected="true">strict (overlap if A.begin<B.end and B.begin>A.end)</option> + <option value="allow-abutting-points">allow-abutting-points (overlap if A.begin<B.end and B.begin>A.end, or if A.begin<=B.end and B.begin<=A.end and either A or B has zero length.)</option> + </param> + + <!--form fields to overlap options--> + <param name="fractionA" type="integer" value="0" label="Minimum fraction of A region overlap " /> + <param name="boundaryA" type="integer" value="0" label="Boundary uncertainty for A for overlap filtering " help="Records failing the following boundary-uncertainty calculation are not included in the output: overlap length >= overlap-fraction-A * (A-range-length - boundary-uncertainty-A)"/> + + <param name="fractionB" type="integer" value="0" label="Minimum fraction of B region overlap " /> + <param name="boundaryB" type="integer" value="0" label="Boundary uncertainty for overlap filtering " help="Records failing the following boundary-uncertainty calculation are not included in the output: overlap length >= overlap-fraction-B * (B-range-length - boundary-uncertainty-B)"/> + </when> + </conditional> + </inputs> + + <help> + +**What it does** + +This tool joins two tab-delimited files based on equal fields or overlapping regions. + +**cgatools 1.5.0 Documentation** + +Userguide: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-user-guide.pdf + +Release notes: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-release-notes.pdf + +**Command line reference**:: + + COMMAND NAME + join - Joins two tab-delimited files based on equal fields or overlapping regions. + + DESCRIPTION + Joins two tab-delimited files based on equal fields or overlapping regions. + By default, an output record is produced for each match found between file + A and file B, but output format can be controlled by the --output-mode + parameter. + + OPTIONS + -h [ --help ] + Print this help message. + + --beta + This is a beta command. To run this command, you must pass the --beta + flag. + + --input arg + File name to use as input (may be passed in as arguments at the end of + the command), or omitted for stdin). There must be exactly two input + files to join. If only one file is specified by name, file A is taken + to be stdin and file B is the named file. File B is read fully into + memory, and file A is streamed. File A's columns appear first in the + output. + + --output arg (=STDOUT) + The output file name (may be omitted for stdout). + + --match arg + A match specification, which is a column from A and a column from B + separated by a colon. + + --overlap arg + Overlap specification. An overlap specification consists of a range + definition for files A and B, separated by a colon. A range definition + may be two columns, in which case they are interpreted as the beginning + and end of the range. Or it may be one column, in which case the range + is defined as the 1-base range starting at the given value. The records + from the two files must overlap in order to be considered for output. + Two ranges are considered to overlap if the overlap is at least one + base long, or if one of the ranges is length 0 and the ranges overlap + or abut. For example, "begin,end:offset" will match wherever end-begin + > 0, begin<offset+1, and end>offset, or wherever end-begin = 0, + begin<=offset+1, and end>=offset. + + + -m [ --output-mode ] arg (=full) + Output mode, one of the following: + full Print an output record for each match found between + file A and file B. + compact Print at most one record for each record of file A, + joining the file B values by a semicolon and + suppressing repeated B values and empty B values. + compact-pct Same as compact, but for each distinct B value, + annotate with the percentage of the A record that is + overlapped by B records with that B value. Percentage + is rounded up to nearest integer. + + --overlap-mode arg (=strict) + Overlap mode, one of the following: + strict Range A and B overlap if A.begin < B.end and + B.begin < A.end. + allow-abutting-points Range A and B overlap they meet the strict + requirements, or if A.begin <= B.end and + B.begin <= A.end and either A or B has zero + length. + + --select arg (=A.*,B.*) + Set of fields to select for output. + + -a [ --always-dump ] + Dump every record of A, even if there are no matches with file B. + + --overlap-fraction-A arg (=0) + Minimum fraction of A region overlap for filtering output. + + --boundary-uncertainty-A arg (=0) + Boundary uncertainty for overlap filtering. Specifically, records + failing the following predicate are filtered away: overlap >= + overlap-fraction-A * ( A-range-length - boundary-uncertainty-A ) + + --overlap-fraction-B arg (=0) + Minimum fraction of B region overlap for filtering output. + + --boundary-uncertainty-B arg (=0) + Boundary uncertainty for overlap filtering. Specifically, records + failing the following predicate are filtered away: overlap >= + overlap-fraction-B * ( B-range-length - boundary-uncertainty-B ) + + SUPPORTED FORMAT_VERSION + Any + </help> +</tool>