annotate specify.xml @ 32:03c22b722882

remove BeautifulSoup dependency
author Richard Burhans <burhans@bx.psu.edu>
date Fri, 20 Sep 2013 13:54:23 -0400
parents 8997f2ca8c7a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
1 <tool id="gd_specify" name="Specify Individuals" version="1.1.0">
13
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
2 <description>: Define a collection of individuals from a gd_snp dataset</description>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
3
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
4 <command interpreter="python">
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
5 #import json
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
6 #import base64
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
7 #import zlib
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
8 #set $ind_names = $input.dataset.metadata.individual_names
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
9 #set $ind_colms = $input.dataset.metadata.individual_columns
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
10 #set $ind_dict = dict(zip($ind_names, $ind_colms))
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
11 #set $ind_json = json.dumps($ind_dict, separators=(',',':'))
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
12 #set $ind_comp = zlib.compress($ind_json, 9)
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
13 #set $ind_arg = base64.b64encode($ind_comp)
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
14 #set $cb_string = str($individuals).strip()
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
15 #if $cb_string != 'None'
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
16 #set $cb_dict = dict.fromkeys($cb_string.split('\t'))
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
17 #for $cb_name in $cb_dict:
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
18 #set $cb_idx = $input.dataset.metadata.individual_names.index($cb_name)
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
19 #set $cb_dict[$cb_name] = str($input.dataset.metadata.individual_columns[$cb_idx])
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
20 #end for
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
21 #else
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
22 #set $cb_dict = dict()
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
23 #end if
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
24 #set $cb_json = json.dumps($cb_dict, separators=(',',':'))
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
25 #set $cb_comp = zlib.compress($cb_json, 9)
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
26 #set $cb_arg = base64.b64encode($cb_comp)
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
27 #set $str_string = str($string).strip()
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
28 #set $str_comp = zlib.compress($str_string, 9)
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
29 #set $str_arg = base64.b64encode($str_comp)
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
30 specify.py '$input' '$output' '$ind_arg' '$cb_arg' '$str_arg'
13
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
31 </command>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
32
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
33 <inputs>
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
34 <param name="input" type="data" format="gd_snp,gd_genotype" label="SNP or Genotype dataset"/>
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
35 <param name="individuals" type="select" display="checkboxes" multiple="true" separator="&#9;" label="Individuals to include">
13
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
36 <options>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
37 <filter type="data_meta" ref="input" key="individual_names" />
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
38 </options>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
39 </param>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
40 <param name="outname" type="text" size="20" label="Label for this collection">
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
41 <validator type="empty_field" message="You must enter a label."/>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
42 #used to be "Individuals from ${input.hid}"
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
43 </param>
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
44 <param name="string" type="text" area="true" size="5x40" label="Individuals to include">
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
45 <sanitizer>
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
46 <valid initial="string.printable"/>
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
47 </sanitizer>
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
48 </param>
13
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
49 </inputs>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
50
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
51 <outputs>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
52 <data name="output" format="gd_indivs" label="${outname}" />
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
53 </outputs>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
54
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
55 <tests>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
56 <test>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
57 <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
58 <param name="individuals" value="PB1,PB2" />
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
59 <output name="output" file="test_in/a.gd_indivs" />
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
60 </test>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
61 </tests>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
62
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
63 <help>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
64
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
65 **Dataset formats**
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
66
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
67 The input dataset is in gd_snp_ or gd_genotype_ format;
13
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
68 the output is in gd_indivs_ format. (`Dataset missing?`_)
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
69
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
70 .. _gd_snp: ./static/formatHelp.html#gd_snp
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
71 .. _gd_genotype: ./static/formatHelp.html#gd_genotype
13
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
72 .. _gd_indivs: ./static/formatHelp.html#gd_indivs
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
73 .. _Dataset missing?: ./static/formatHelp.html
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
74
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
75 -----
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
76
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
77 **What it does**
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
78
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
79 This tool makes a list of selected entities, i.e., the sets of four
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
80 columns representing individuals or groups from a gd_snp dataset, or
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
81 sets of single columns in a gd_genotype file. It does not copy the
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
82 data; it just records which entities should be considered as belonging
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
83 to some collection or population. The label you specify is used to
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
84 name the output dataset in your history. This list can then be used
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
85 to instruct other tools to work on just part of the original gd_snp or
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
86 gd_genotype dataset. The entities can be specified with the checklist
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
87 and/or by pasting their names (possibly with extraneous characters, as
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
88 in a portion of the Newick-format output of the Phylogenetic Tree tool)
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 13
diff changeset
89 into the box provided at the bottom of the page.
13
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
90
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
91 -----
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
92
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
93 **Example**
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
94
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
95 - input::
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
96
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
97 Contig161_chr1_4641264_4641879 115 C T 73.5 chr1 4641382 C 6 0 2 45 8 0 2 51 15 0 2 72 5 0 2 42 6 0 2 45 10 0 2 57 Y 54 0.323 0
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
98 Contig48_chr1_10150253_10151311 11 A G 94.3 chr1 10150264 A 1 0 2 30 1 0 2 30 1 0 2 30 3 0 2 36 1 0 2 30 1 0 2 30 Y 22 +99. 0
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
99 Contig20_chr1_21313469_21313570 66 C T 54.0 chr1 21313534 C 4 0 2 39 4 0 2 39 5 0 2 42 4 0 2 39 4 0 2 39 5 0 2 42 N 1 +99. 0
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
100 etc.
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
101
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
102 - input metadata::
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
103
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
104 #{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc",
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
105 #"1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q",
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
106 #"pair","dist","prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]],
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
107 #"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"}
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
108
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
109 - output when individuals PB1, PB2, and PB3 are selected::
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
110
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
111 9 PB1
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
112 13 PB2
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
113 17 PB3
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
114
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
115 </help>
fdb4240fb565 Uploaded Miller Lab Devshed version a51c894f5bed
miller-lab
parents:
diff changeset
116 </tool>