Mercurial > repos > miller-lab > genome_diversity
annotate dpmix.py @ 26:91e835060ad2
Updates to Admixture, Aggregate Individuals, and Restore Attributes to support gd_genotype
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Mon, 03 Jun 2013 12:29:29 -0400 |
parents | 248b06e86022 |
children | 8997f2ca8c7a |
rev | line source |
---|---|
12 | 1 #!/usr/bin/env python |
2 | |
3 import errno | |
4 import sys | |
5 import os | |
6 import subprocess | |
7 from Population import Population | |
8 import gd_composite | |
9 from dpmix_plot import make_dpmix_plot | |
10 from LocationFile import LocationFile | |
11 | |
12 ################################################################################ | |
13 | |
14 def mkdir_p(path): | |
15 try: | |
16 os.makedirs(path) | |
17 except OSError, e: | |
18 if e.errno <> errno.EEXIST: | |
19 raise | |
20 | |
21 def run_program(prog, args, stdout_file=None, space_to_tab=False): | |
22 #print "args: ", ' '.join(args) | |
23 p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
24 (stdoutdata, stderrdata) = p.communicate() | |
25 rc = p.returncode | |
26 | |
27 if stdout_file is not None: | |
28 with open(stdout_file, 'w') as ofh: | |
29 lines = stdoutdata.split('\n') | |
30 for line in lines: | |
31 line = line.strip() | |
32 if line: | |
33 if space_to_tab: | |
34 line = line.replace(' ', '\t') | |
35 print >> ofh, line | |
36 | |
37 if rc != 0: | |
38 print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) | |
39 print >> sys.stderr, stderrdata | |
40 sys.exit(1) | |
41 | |
42 ################################################################################ | |
43 | |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
44 if len(sys.argv) < 16: |
12 | 45 print "usage" |
46 sys.exit(1) | |
47 | |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
48 input, input_type, data_source, switch_penalty, ap1_input, ap2_input, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file = sys.argv[1:15] |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
49 individual_metadata = sys.argv[15:] |
12 | 50 |
51 chrom = 'all' | |
52 add_logs = '0' | |
53 | |
54 loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file) | |
55 location_file = LocationFile(loc_path) | |
56 heterochrom_path = location_file.get_values_if_exists(dbkey) | |
57 if heterochrom_path is None: | |
58 heterochrom_path = '/dev/null' | |
59 | |
60 population_list = [] | |
61 | |
62 p_total = Population() | |
63 p_total.from_tag_list(individual_metadata) | |
64 | |
65 ap1 = Population(name='Ancestral population 1') | |
66 ap1.from_population_file(ap1_input) | |
67 population_list.append(ap1) | |
68 if not p_total.is_superset(ap1): | |
69 print >> sys.stderr, 'There is an individual in ancestral population 1 that is not in the SNP table' | |
70 sys.exit(1) | |
71 | |
72 ap2 = Population(name='Ancestral population 2') | |
73 ap2.from_population_file(ap2_input) | |
74 population_list.append(ap2) | |
75 if not p_total.is_superset(ap2): | |
76 print >> sys.stderr, 'There is an individual in ancestral population 2 that is not in the SNP table' | |
77 sys.exit(1) | |
78 | |
79 p = Population(name='Potentially admixed') | |
80 p.from_population_file(p_input) | |
81 population_list.append(p) | |
82 if not p_total.is_superset(p): | |
83 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' | |
84 sys.exit(1) | |
85 | |
86 mkdir_p(output2_dir) | |
87 | |
88 ################################################################################ | |
89 # Create tabular file | |
90 ################################################################################ | |
91 | |
92 misc_file = os.path.join(output2_dir, 'misc.txt') | |
93 | |
94 prog = 'dpmix' | |
95 args = [ prog ] | |
96 args.append(input) | |
97 args.append(ref_column) | |
98 args.append(chrom) | |
99 args.append(data_source) | |
100 args.append(add_logs) | |
101 args.append(switch_penalty) | |
102 args.append(heterochrom_path) | |
103 args.append(misc_file) | |
104 | |
105 columns = ap1.column_list() | |
106 for column in columns: | |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
107 if input_type == 'gd_genotype': |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
108 args.append('{0}:1:{1}'.format(int(column) - 2, ap1.individual_with_column(column).name)) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
109 else: |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
110 args.append('{0}:1:{1}'.format(column, ap1.individual_with_column(column).name)) |
12 | 111 |
112 columns = ap2.column_list() | |
113 for column in columns: | |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
114 if input_type == 'gd_genotype': |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
115 args.append('{0}:2:{1}'.format(int(column) - 2, ap2.individual_with_column(column).name)) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
116 else: |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
117 args.append('{0}:2:{1}'.format(column, ap2.individual_with_column(column).name)) |
12 | 118 |
119 columns = p.column_list() | |
120 for column in columns: | |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
121 if input_type == 'gd_genotype': |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
122 args.append('{0}:0:{1}'.format(int(column) - 2, p.individual_with_column(column).name)) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
123 else: |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
124 args.append('{0}:0:{1}'.format(column, p.individual_with_column(column).name)) |
12 | 125 |
126 run_program(None, args, stdout_file=output, space_to_tab=True) | |
127 | |
128 ################################################################################ | |
129 # Create pdf file | |
130 ################################################################################ | |
131 | |
132 pdf_file = os.path.join(output2_dir, 'dpmix.pdf') | |
133 make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir) | |
134 | |
135 ################################################################################ | |
136 # Create html | |
137 ################################################################################ | |
138 | |
139 info_page = gd_composite.InfoPage() | |
140 info_page.set_title('dpmix Galaxy Composite Dataset') | |
141 | |
142 display_file = gd_composite.DisplayFile() | |
143 display_value = gd_composite.DisplayValue() | |
144 | |
145 out_pdf = gd_composite.Parameter(name='dpmix.pdf', value='dpmix.pdf', display_type=display_file) | |
146 out_misc = gd_composite.Parameter(name='misc.txt', value='misc.txt', display_type=display_file) | |
147 | |
148 info_page.add_output_parameter(out_pdf) | |
149 info_page.add_output_parameter(out_misc) | |
150 | |
151 if data_source == '0': | |
152 data_source_value = 'sequence coverage' | |
153 elif data_source == '1': | |
154 data_source_value = 'estimated genotype' | |
155 | |
156 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) | |
157 in_switch_penalty = gd_composite.Parameter(description='Switch penalty', value=switch_penalty, display_type=display_value) | |
158 | |
159 info_page.add_input_parameter(in_data_source) | |
160 info_page.add_input_parameter(in_switch_penalty) | |
161 | |
162 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) | |
163 | |
164 info_page.add_misc(misc_populations) | |
165 | |
166 with open(output2, 'w') as ofh: | |
167 print >> ofh, info_page.render() | |
168 | |
169 sys.exit(0) | |
170 | |
171 |