Mercurial > repos > miller-lab > genome_diversity
comparison dpmix.xml @ 27:8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Mon, 15 Jul 2013 10:47:35 -0400 |
parents | 91e835060ad2 |
children | 4188853b940b |
comparison
equal
deleted
inserted
replaced
26:91e835060ad2 | 27:8997f2ca8c7a |
---|---|
1 <tool id="gd_dpmix" name="Admixture" version="1.1.0"> | 1 <tool id="gd_dpmix" name="Admixture" version="1.1.0"> |
2 <description>: Map genomic intervals resembling specified ancestral populations</description> | 2 <description>: Map genomic intervals resembling specified source populations</description> |
3 | 3 |
4 <command interpreter="python"> | 4 <command interpreter="python"> |
5 dpmix.py "$input" | 5 #import json |
6 #import base64 | |
7 #import zlib | |
8 #set $ind_names = $input.dataset.metadata.individual_names | |
9 #set $ind_colms = $input.dataset.metadata.individual_columns | |
10 #set $ind_dict = dict(zip($ind_names, $ind_colms)) | |
11 #set $ind_json = json.dumps($ind_dict, separators=(',',':')) | |
12 #set $ind_comp = zlib.compress($ind_json, 9) | |
13 #set $ind_arg = base64.b64encode($ind_comp) | |
14 dpmix.py '$input' | |
6 #if $input_type.choice == '0' | 15 #if $input_type.choice == '0' |
7 "gd_snp" "$input_type.data_source" | 16 'gd_snp' '$input_type.data_source' |
8 #else if $input_type.choice == '1' | 17 #else if $input_type.choice == '1' |
9 "gd_genotype" "1" | 18 'gd_genotype' '1' |
10 #end if | 19 #end if |
11 "$switch_penalty" "$ap1_input" "$ap2_input" "$p_input" "$output" "$output2" "$output2.files_path" "$input.dataset.metadata.dbkey" "$input.dataset.metadata.ref" "$GALAXY_DATA_INDEX_DIR" "gd.heterochromatic.loc" | 20 #if $third_pop.choice == '0' |
12 #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) | 21 #set $ap3_arg = '/dev/null' |
13 #set $arg = '%s:%s' % ($individual_col, $individual) | 22 #set $ap3_name_arg = '' |
14 "$arg" | 23 #else if $third_pop.choice == '1' |
15 #end for | 24 #set $ap3_arg = $third_pop.ap3_input |
25 #set $ap3_name_arg = $third_pop.ap3_input.name | |
26 #end if | |
27 #if $user_het.choice == '0' | |
28 #set $het_arg = 'use_installed' | |
29 #else if $user_het.choice == '1' | |
30 #set $het_arg = $user_het.het_file | |
31 #else if $user_het.choice == '2' | |
32 #set $het_arg = 'use_none' | |
33 #end if | |
34 '$switch_penalty' '$ap1_input' '$ap1_input.name' '$ap2_input' '$ap2_input.name' '$ap3_arg' '$ap3_name_arg' '$p_input' '$output' '$output2' '$output2.files_path' '$input.dataset.metadata.dbkey' '$input.dataset.metadata.ref' '$GALAXY_DATA_INDEX_DIR' 'gd.heterochromatic.loc' '$ind_arg' '$het_arg' '1' | |
16 </command> | 35 </command> |
17 | 36 |
18 <inputs> | 37 <inputs> |
19 <conditional name="input_type"> | 38 <conditional name="input_type"> |
20 <param name="choice" type="select" format="integer" label="Input format"> | 39 <param name="choice" type="select" format="integer" label="Input format"> |
36 <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" /> | 55 <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" /> |
37 </param> | 56 </param> |
38 </when> | 57 </when> |
39 </conditional> | 58 </conditional> |
40 | 59 |
41 <param name="ap1_input" type="data" format="gd_indivs" label="Ancestral population 1 individuals" /> | 60 <param name="ap1_input" type="data" format="gd_indivs" label="Source population 1 individuals" /> |
42 <param name="ap2_input" type="data" format="gd_indivs" label="Ancestral population 2 individuals" /> | 61 <param name="ap2_input" type="data" format="gd_indivs" label="Source population 2 individuals" /> |
62 | |
63 <conditional name="third_pop"> | |
64 <param name="choice" type="select" format="integer" label="Include third source population"> | |
65 <option value="0" selected="true">no</option> | |
66 <option value="1">yes</option> | |
67 </param> | |
68 <when value="0" /> | |
69 <when value="1"> | |
70 <param name="ap3_input" type="data" format="gd_indivs" label="Source population 3 individuals" /> | |
71 </when> | |
72 </conditional> | |
73 | |
43 <param name="p_input" type="data" format="gd_indivs" label="Potentially admixed individuals" /> | 74 <param name="p_input" type="data" format="gd_indivs" label="Potentially admixed individuals" /> |
44 | 75 |
45 <param name="switch_penalty" type="float" min="0" value="10" label="Genotype switch penalty" help="Note: Depends on the density of SNPs. For instance, with 50,000 SNPs in a vertebrate genome, 1.0 might be appropriate, with millions of SNPs, a value between 10 and 100 might be reasonable."/> | 76 <param name="switch_penalty" type="float" min="0" value="10" label="Genotype switch penalty" help="Note: Depends on the density of SNPs. For instance, with 50,000 SNPs in a vertebrate genome, 1.0 might be appropriate, with millions of SNPs, a value between 10 and 100 might be reasonable."/> |
77 | |
78 <conditional name="user_het"> | |
79 <param name="choice" type="select" format="integer" label="Heterochromatin info"> | |
80 <option value="0" selected="true">use installed</option> | |
81 <option value="1">use your own</option> | |
82 <option value="2">use none</option> | |
83 </param> | |
84 <when value="0" /> | |
85 <when value="1"> | |
86 <param name="het_file" type="data" format="txt" label="Heterochromatin dataset" /> | |
87 </when> | |
88 </conditional> | |
89 | |
90 <!-- | |
91 <param name="add_logs" type="select" format="integer" label="Probabilities"> | |
92 <option value="1" selected="true">add logs of probabilities</option> | |
93 <option value="0">add probabilities</option> | |
94 </param> | |
95 --> | |
96 | |
46 </inputs> | 97 </inputs> |
47 | 98 |
48 <outputs> | 99 <outputs> |
49 <data name="output" format="tabular" /> | 100 <data name="output" format="tabular" /> |
50 <data name="output2" format="html" /> | 101 <data name="output2" format="html" /> |
86 | 137 |
87 ----- | 138 ----- |
88 | 139 |
89 **What it does** | 140 **What it does** |
90 | 141 |
91 The user specifies two "ancestral" populations (i.e., sources for | 142 The user specifies two or three source populations (i.e., sources |
92 chromosomes) and a set of potentially admixed individuals, and chooses | 143 for chromosomes) and a set of potentially admixed individuals, and |
93 between the sequence coverage or the estimated genotypes to measure | 144 chooses between the sequence coverage or the estimated genotypes to |
94 the similarity of genomic intervals in admixed individuals to the two | 145 measure the similarity of genomic intervals in admixed individuals to |
95 classes of ancestral chromosomes. The user also picks a "genotype switch penalty", | 146 the three classes of source chromosomes. The user also specifies a |
96 typically between 10 and 100. For each potentially admixed individual, | 147 "switch penalty", controlling the strength of evidence needed to switch |
97 the program divides the genome into three "genotypes": (0) homozygous | 148 between source populations as the the program scans along a chromosome. |
98 for the first ancestral population (i.e., both chromosomes from that | 149 Choice of picksan appropriate value depends on the number of SNPs and, to |
99 population), (1) heterozygous, or (2) homozygous for the second ancestral | 150 a lesser extent, on the time since the admixture events. With several |
100 population. Parts of a chromosome that are labeled as "heterochromatic" | 151 million SNPs genome-wide, reasonable values might fall between 10 |
101 are given the non-genotype "3". Smaller values of the switch penalty | 152 and 100. If there are 3 source populatons, then for each potentially |
102 (corresponding to more ancient admixture events) generally lead to the | 153 admixed individual the program divides the genome into six "genotypes": |
103 reconstruction of more frequent changes between genotypes. | 154 |
155 1. homozygous for the first source population (i.e., both chromosomes from that population), | |
156 2. homozygous for the second source population, | |
157 3. homozygous for the third source population, | |
158 4. heterozygous for the first and second populations (i.e., one chromosome from each), | |
159 5. heterozygous for the first and third populations, or | |
160 6. heterozygous for the second and third populations. | |
161 | |
162 Parts of a reference chromosome that are labeled as "heterochromatic" | |
163 are given the "non-genotype" 0. With two source populations, only | |
164 "genotypes" 1, 2 and 3 are possible, where 3 now means heterozygous in | |
165 the two source populations. | |
104 | 166 |
105 There are two output datasets generated. A tabular dataset with chromosome, | 167 There are two output datasets generated. A tabular dataset with chromosome, |
106 start, stop, and pairs of columns containing the "genotypes" from above | 168 start, stop, and pairs of columns containing the "genotypes" from above |
107 and label from the admixed individual. The second dataset is a composite | 169 and label from the admixed individual. The second dataset is a composite |
108 dataset with general information from the run and a link to a pdf which | 170 dataset with general information from the run and a link to a pdf which |
109 graphically shows the ancestral population along each of the chromosomes. | 171 graphically shows the source population along each of the chromosomes. |
110 The second link is to a text file with summary information of the | 172 The second link is to a text file with summary information of the |
111 "genotypes" over the whole genome. | 173 "genotypes" over the whole genome. |
112 | |
113 </help> | 174 </help> |
114 </tool> | 175 </tool> |