annotate multi_join_left/multi_join_serial.xml @ 4:46c880ae6db2 draft

Uploaded
author mir-bioinf
date Wed, 15 Apr 2015 17:51:39 -0400
parents
children 1de2a8f041b3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
1 <tool id="Multi_Join_serial" name="Join multiple" version="0.0.1" force_history_refresh="True">
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
2 <description>tab delimited files serially</description>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
3 <!-- cms commenting out to troubleshoot -->
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
4 <command interpreter="perl">
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
5 #for $j, $s in enumerate( $Files )
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
6 #silent $j
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
7 #end for
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
8
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
9 #for $i, $s in enumerate( $Files )
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
10 run-multi_join_serial.pl --join_file $s.joinMe --join_col $s.joinCol --iteration $i --totalfiles $j --with_header $headerYes --resultsfile $Joined_all
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
11 ##print "loop iteration $i.\n";
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
12 ;
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
13 #end for
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
14 </command>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
15 <inputs>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
16 <repeat name="Files" title="Join file">
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
17 <param name="joinMe" type="data" checked="yes" format="tabular" label="Join" />
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
18 <param name="joinCol" label="using column" type="data_column" data_ref="joinMe" />
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
19 </repeat>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
20 <param name="headerYes" type="select" label="Treat first line as header?" help="If header starts with #, it will NOT be read, so this field should be set to no. Otherwise it can be set to yes if first line is header for ALL FILES.">
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
21 <option value="yes" selected="true">Yes</option>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
22 <option value="no">No</option>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
23 </param>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
24 </inputs>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
25 <outputs>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
26 <data format="tabular" name="Joined_all" label="Multi-Join result"/>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
27 </outputs>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
28 <tests>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
29 <test>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
30 <param name="Files_0|joinMe" value="multi_join_serial_in1.tab" ftype="tabular"/>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
31 <param name="Files_0|joinCol" value="1"/>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
32 <param name="Files_1|joinMe" value="multi_join_serial_in2.tab" ftype="tabular"/>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
33 <param name="Files_1|joinCol" value="1"/>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
34 <param name="Files_2|joinMe" value="multi_join_serial_in3.tab" ftype="tabular"/>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
35 <param name="Files_2joinCol" value="2"/>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
36 <param name="headerYes" value="yes"/>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
37 <output name="Joined_all" value="multi_join_serial_out.tab" ftype="tabular"/>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
38 <test/>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
39 <tests/>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
40 <help>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
41
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
42 This tool performs a left-outer join on multiple (at least two) files using a perl script that Ron wrote (thanks, Ron!). The resulting joined file will have the same number of rows as the first file chosen and subsequent files' matches will be shown if present. Rows in the first file without matches in the other files will have empty cells. If none of the input files have a header present, a simple column number header will be added to the output file to denote the start of each set of matches (from each file, start denoted by "C1").
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
43
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
44
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
45 .. class:: warningmark
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
46
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
47 This tool may fail due to the system running out of memory depending on the number and size of input files and number of matching lines. The higher all of these are, the more likely the tool is to fail. A red output dataset saying "Job killed" typically means the system ran into an out of memory error and as a result the job was killed. This issue has yet to be addressed at the moment...
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
48
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
49
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
50 **Steps:**
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
51 1. Click Add new File for each tab-delimited file you'd like to add and the column you want to join on.
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
52 2. After adding all files to join, select whether the headers should all be preserved (this should be Yes if all input datasets have headers).
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
53 3. Click Execute.
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
54
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
55 -----
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
56
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
57 **Example**
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
58
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
59 Dataset1::
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
60
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
61 chr1 10 20 geneA
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
62 chr1 50 80 geneB
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
63 chr5 10 40 geneL
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
64
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
65 Dataset2::
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
66
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
67 geneA tumor-supressor
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
68 geneB Foxp2
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
69 geneC Gnas1
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
70 geneE INK4a
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
71
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
72 Joining the 4th column of Dataset1 with the 1st column of Dataset2, no header, will yield::
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
73
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
74 C1 C2 C3 C4 C1 C2
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
75 chr1 10 20 geneA geneA tumor-suppressor
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
76 chr1 50 80 geneB geneB Foxp2
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
77 chr5 10 40 geneL
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
78
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
79
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
80 </help>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
81
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
82
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
83 </tool>
46c880ae6db2 Uploaded
mir-bioinf
parents:
diff changeset
84