comparison dpmix.py @ 12:4b6590dd7250

Uploaded
author miller-lab
date Wed, 12 Sep 2012 17:10:26 -0400
parents
children 248b06e86022
comparison
equal deleted inserted replaced
11:d4ec09e8079f 12:4b6590dd7250
1 #!/usr/bin/env python
2
3 import errno
4 import sys
5 import os
6 import subprocess
7 from Population import Population
8 import gd_composite
9 from dpmix_plot import make_dpmix_plot
10 from LocationFile import LocationFile
11
12 ################################################################################
13
14 def mkdir_p(path):
15 try:
16 os.makedirs(path)
17 except OSError, e:
18 if e.errno <> errno.EEXIST:
19 raise
20
21 def run_program(prog, args, stdout_file=None, space_to_tab=False):
22 #print "args: ", ' '.join(args)
23 p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
24 (stdoutdata, stderrdata) = p.communicate()
25 rc = p.returncode
26
27 if stdout_file is not None:
28 with open(stdout_file, 'w') as ofh:
29 lines = stdoutdata.split('\n')
30 for line in lines:
31 line = line.strip()
32 if line:
33 if space_to_tab:
34 line = line.replace(' ', '\t')
35 print >> ofh, line
36
37 if rc != 0:
38 print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
39 print >> sys.stderr, stderrdata
40 sys.exit(1)
41
42 ################################################################################
43
44 if len(sys.argv) < 15:
45 print "usage"
46 sys.exit(1)
47
48 input, data_source, switch_penalty, ap1_input, ap2_input, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file = sys.argv[1:14]
49 individual_metadata = sys.argv[14:]
50
51 chrom = 'all'
52 add_logs = '0'
53
54 loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file)
55 location_file = LocationFile(loc_path)
56 heterochrom_path = location_file.get_values_if_exists(dbkey)
57 if heterochrom_path is None:
58 heterochrom_path = '/dev/null'
59
60 population_list = []
61
62 p_total = Population()
63 p_total.from_tag_list(individual_metadata)
64
65 ap1 = Population(name='Ancestral population 1')
66 ap1.from_population_file(ap1_input)
67 population_list.append(ap1)
68 if not p_total.is_superset(ap1):
69 print >> sys.stderr, 'There is an individual in ancestral population 1 that is not in the SNP table'
70 sys.exit(1)
71
72 ap2 = Population(name='Ancestral population 2')
73 ap2.from_population_file(ap2_input)
74 population_list.append(ap2)
75 if not p_total.is_superset(ap2):
76 print >> sys.stderr, 'There is an individual in ancestral population 2 that is not in the SNP table'
77 sys.exit(1)
78
79 p = Population(name='Potentially admixed')
80 p.from_population_file(p_input)
81 population_list.append(p)
82 if not p_total.is_superset(p):
83 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
84 sys.exit(1)
85
86 mkdir_p(output2_dir)
87
88 ################################################################################
89 # Create tabular file
90 ################################################################################
91
92 misc_file = os.path.join(output2_dir, 'misc.txt')
93
94 prog = 'dpmix'
95 args = [ prog ]
96 args.append(input)
97 args.append(ref_column)
98 args.append(chrom)
99 args.append(data_source)
100 args.append(add_logs)
101 args.append(switch_penalty)
102 args.append(heterochrom_path)
103 args.append(misc_file)
104
105 columns = ap1.column_list()
106 for column in columns:
107 args.append('{0}:1:{1}'.format(column, ap1.individual_with_column(column).name))
108
109 columns = ap2.column_list()
110 for column in columns:
111 args.append('{0}:2:{1}'.format(column, ap2.individual_with_column(column).name))
112
113 columns = p.column_list()
114 for column in columns:
115 args.append('{0}:0:{1}'.format(column, p.individual_with_column(column).name))
116
117 run_program(None, args, stdout_file=output, space_to_tab=True)
118
119 ################################################################################
120 # Create pdf file
121 ################################################################################
122
123 pdf_file = os.path.join(output2_dir, 'dpmix.pdf')
124 make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir)
125
126 ################################################################################
127 # Create html
128 ################################################################################
129
130 info_page = gd_composite.InfoPage()
131 info_page.set_title('dpmix Galaxy Composite Dataset')
132
133 display_file = gd_composite.DisplayFile()
134 display_value = gd_composite.DisplayValue()
135
136 out_pdf = gd_composite.Parameter(name='dpmix.pdf', value='dpmix.pdf', display_type=display_file)
137 out_misc = gd_composite.Parameter(name='misc.txt', value='misc.txt', display_type=display_file)
138
139 info_page.add_output_parameter(out_pdf)
140 info_page.add_output_parameter(out_misc)
141
142 if data_source == '0':
143 data_source_value = 'sequence coverage'
144 elif data_source == '1':
145 data_source_value = 'estimated genotype'
146
147 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
148 in_switch_penalty = gd_composite.Parameter(description='Switch penalty', value=switch_penalty, display_type=display_value)
149
150 info_page.add_input_parameter(in_data_source)
151 info_page.add_input_parameter(in_switch_penalty)
152
153 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList())
154
155 info_page.add_misc(misc_populations)
156
157 with open(output2, 'w') as ofh:
158 print >> ofh, info_page.render()
159
160 sys.exit(0)
161
162