annotate dpmix.py @ 26:91e835060ad2

Updates to Admixture, Aggregate Individuals, and Restore Attributes to support gd_genotype
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 03 Jun 2013 12:29:29 -0400
parents 248b06e86022
children 8997f2ca8c7a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
1 #!/usr/bin/env python
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
2
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
3 import errno
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
4 import sys
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
5 import os
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
6 import subprocess
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
7 from Population import Population
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
8 import gd_composite
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
9 from dpmix_plot import make_dpmix_plot
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
10 from LocationFile import LocationFile
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
11
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
12 ################################################################################
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
13
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
14 def mkdir_p(path):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
15 try:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
16 os.makedirs(path)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
17 except OSError, e:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
18 if e.errno <> errno.EEXIST:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
19 raise
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
20
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
21 def run_program(prog, args, stdout_file=None, space_to_tab=False):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
22 #print "args: ", ' '.join(args)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
23 p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
24 (stdoutdata, stderrdata) = p.communicate()
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
25 rc = p.returncode
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
26
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
27 if stdout_file is not None:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
28 with open(stdout_file, 'w') as ofh:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
29 lines = stdoutdata.split('\n')
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
30 for line in lines:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
31 line = line.strip()
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
32 if line:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
33 if space_to_tab:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
34 line = line.replace(' ', '\t')
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
35 print >> ofh, line
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
36
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
37 if rc != 0:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
38 print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
39 print >> sys.stderr, stderrdata
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
40 sys.exit(1)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
41
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
42 ################################################################################
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
43
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 12
diff changeset
44 if len(sys.argv) < 16:
12
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
45 print "usage"
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
46 sys.exit(1)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
47
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 12
diff changeset
48 input, input_type, data_source, switch_penalty, ap1_input, ap2_input, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file = sys.argv[1:15]
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 12
diff changeset
49 individual_metadata = sys.argv[15:]
12
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
50
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
51 chrom = 'all'
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
52 add_logs = '0'
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
53
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
54 loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
55 location_file = LocationFile(loc_path)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
56 heterochrom_path = location_file.get_values_if_exists(dbkey)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
57 if heterochrom_path is None:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
58 heterochrom_path = '/dev/null'
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
59
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
60 population_list = []
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
61
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
62 p_total = Population()
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
63 p_total.from_tag_list(individual_metadata)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
64
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
65 ap1 = Population(name='Ancestral population 1')
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
66 ap1.from_population_file(ap1_input)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
67 population_list.append(ap1)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
68 if not p_total.is_superset(ap1):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
69 print >> sys.stderr, 'There is an individual in ancestral population 1 that is not in the SNP table'
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
70 sys.exit(1)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
71
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
72 ap2 = Population(name='Ancestral population 2')
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
73 ap2.from_population_file(ap2_input)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
74 population_list.append(ap2)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
75 if not p_total.is_superset(ap2):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
76 print >> sys.stderr, 'There is an individual in ancestral population 2 that is not in the SNP table'
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
77 sys.exit(1)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
78
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
79 p = Population(name='Potentially admixed')
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
80 p.from_population_file(p_input)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
81 population_list.append(p)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
82 if not p_total.is_superset(p):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
83 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
84 sys.exit(1)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
85
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
86 mkdir_p(output2_dir)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
87
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
88 ################################################################################
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
89 # Create tabular file
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
90 ################################################################################
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
91
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
92 misc_file = os.path.join(output2_dir, 'misc.txt')
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
93
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
94 prog = 'dpmix'
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
95 args = [ prog ]
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
96 args.append(input)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
97 args.append(ref_column)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
98 args.append(chrom)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
99 args.append(data_source)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
100 args.append(add_logs)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
101 args.append(switch_penalty)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
102 args.append(heterochrom_path)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
103 args.append(misc_file)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
104
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
105 columns = ap1.column_list()
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
106 for column in columns:
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 12
diff changeset
107 if input_type == 'gd_genotype':
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 12
diff changeset
108 args.append('{0}:1:{1}'.format(int(column) - 2, ap1.individual_with_column(column).name))
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 12
diff changeset
109 else:
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 12
diff changeset
110 args.append('{0}:1:{1}'.format(column, ap1.individual_with_column(column).name))
12
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
111
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
112 columns = ap2.column_list()
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
113 for column in columns:
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 12
diff changeset
114 if input_type == 'gd_genotype':
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 12
diff changeset
115 args.append('{0}:2:{1}'.format(int(column) - 2, ap2.individual_with_column(column).name))
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 12
diff changeset
116 else:
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 12
diff changeset
117 args.append('{0}:2:{1}'.format(column, ap2.individual_with_column(column).name))
12
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
118
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
119 columns = p.column_list()
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
120 for column in columns:
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 12
diff changeset
121 if input_type == 'gd_genotype':
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 12
diff changeset
122 args.append('{0}:0:{1}'.format(int(column) - 2, p.individual_with_column(column).name))
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 12
diff changeset
123 else:
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 12
diff changeset
124 args.append('{0}:0:{1}'.format(column, p.individual_with_column(column).name))
12
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
125
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
126 run_program(None, args, stdout_file=output, space_to_tab=True)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
127
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
128 ################################################################################
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
129 # Create pdf file
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
130 ################################################################################
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
131
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
132 pdf_file = os.path.join(output2_dir, 'dpmix.pdf')
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
133 make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
134
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
135 ################################################################################
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
136 # Create html
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
137 ################################################################################
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
138
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
139 info_page = gd_composite.InfoPage()
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
140 info_page.set_title('dpmix Galaxy Composite Dataset')
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
141
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
142 display_file = gd_composite.DisplayFile()
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
143 display_value = gd_composite.DisplayValue()
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
144
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
145 out_pdf = gd_composite.Parameter(name='dpmix.pdf', value='dpmix.pdf', display_type=display_file)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
146 out_misc = gd_composite.Parameter(name='misc.txt', value='misc.txt', display_type=display_file)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
147
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
148 info_page.add_output_parameter(out_pdf)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
149 info_page.add_output_parameter(out_misc)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
150
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
151 if data_source == '0':
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
152 data_source_value = 'sequence coverage'
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
153 elif data_source == '1':
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
154 data_source_value = 'estimated genotype'
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
155
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
156 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
157 in_switch_penalty = gd_composite.Parameter(description='Switch penalty', value=switch_penalty, display_type=display_value)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
158
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
159 info_page.add_input_parameter(in_data_source)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
160 info_page.add_input_parameter(in_switch_penalty)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
161
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
162 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList())
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
163
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
164 info_page.add_misc(misc_populations)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
165
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
166 with open(output2, 'w') as ofh:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
167 print >> ofh, info_page.render()
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
168
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
169 sys.exit(0)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
170
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
171