comparison dpmix.py @ 27:8997f2ca8c7a

Update to Miller Lab devshed revision bae0d3306d3b
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 15 Jul 2013 10:47:35 -0400
parents 248b06e86022
children a631c2f6d913
comparison
equal deleted inserted replaced
26:91e835060ad2 27:8997f2ca8c7a
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 import errno 3 import gd_util
4 import sys 4 import sys
5 import os 5 import os
6 import subprocess
7 from Population import Population 6 from Population import Population
8 import gd_composite 7 import gd_composite
9 from dpmix_plot import make_dpmix_plot 8 from dpmix_plot import make_dpmix_plot
10 from LocationFile import LocationFile 9 from LocationFile import LocationFile
11 10
12 ################################################################################ 11 ################################################################################
13 12
14 def mkdir_p(path): 13 if len(sys.argv) != 22:
15 try:
16 os.makedirs(path)
17 except OSError, e:
18 if e.errno <> errno.EEXIST:
19 raise
20
21 def run_program(prog, args, stdout_file=None, space_to_tab=False):
22 #print "args: ", ' '.join(args)
23 p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
24 (stdoutdata, stderrdata) = p.communicate()
25 rc = p.returncode
26
27 if stdout_file is not None:
28 with open(stdout_file, 'w') as ofh:
29 lines = stdoutdata.split('\n')
30 for line in lines:
31 line = line.strip()
32 if line:
33 if space_to_tab:
34 line = line.replace(' ', '\t')
35 print >> ofh, line
36
37 if rc != 0:
38 print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
39 print >> sys.stderr, stderrdata
40 sys.exit(1)
41
42 ################################################################################
43
44 if len(sys.argv) < 16:
45 print "usage" 14 print "usage"
46 sys.exit(1) 15 sys.exit(1)
47 16
48 input, input_type, data_source, switch_penalty, ap1_input, ap2_input, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file = sys.argv[1:15] 17 input, input_type, data_source, switch_penalty, ap1_input, ap1_name, ap2_input, ap2_name, ap3_input, ap3_name, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file, ind_arg, het_arg, add_logs = sys.argv[1:]
49 individual_metadata = sys.argv[15:] 18
19 if ap3_input == '/dev/null':
20 populations = 2
21 else:
22 populations = 3
50 23
51 chrom = 'all' 24 chrom = 'all'
52 add_logs = '0'
53 25
54 loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file) 26 if het_arg == 'use_installed':
55 location_file = LocationFile(loc_path) 27 loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file)
56 heterochrom_path = location_file.get_values_if_exists(dbkey) 28 location_file = LocationFile(loc_path)
57 if heterochrom_path is None: 29 heterochrom_path = location_file.get_values_if_exists(dbkey)
30 if heterochrom_path is None:
31 heterochrom_path = '/dev/null'
32 elif het_arg == 'use_none':
58 heterochrom_path = '/dev/null' 33 heterochrom_path = '/dev/null'
34 else:
35 heterochrom_path = het_arg
59 36
60 population_list = [] 37 population_list = []
61 38
62 p_total = Population() 39 p_total = Population()
63 p_total.from_tag_list(individual_metadata) 40 p_total.from_wrapped_dict(ind_arg)
64 41
65 ap1 = Population(name='Ancestral population 1') 42 ap1 = Population(name='Ancestral population 1')
66 ap1.from_population_file(ap1_input) 43 ap1.from_population_file(ap1_input)
67 population_list.append(ap1) 44 population_list.append(ap1)
68 if not p_total.is_superset(ap1): 45 if not p_total.is_superset(ap1):
69 print >> sys.stderr, 'There is an individual in ancestral population 1 that is not in the SNP table' 46 gd_util.die('There is an individual in ancestral population 1 that is not in the SNP table')
70 sys.exit(1)
71 47
72 ap2 = Population(name='Ancestral population 2') 48 ap2 = Population(name='Ancestral population 2')
73 ap2.from_population_file(ap2_input) 49 ap2.from_population_file(ap2_input)
74 population_list.append(ap2) 50 population_list.append(ap2)
75 if not p_total.is_superset(ap2): 51 if not p_total.is_superset(ap2):
76 print >> sys.stderr, 'There is an individual in ancestral population 2 that is not in the SNP table' 52 gd_util.die('There is an individual in ancestral population 2 that is not in the SNP table')
77 sys.exit(1) 53
54 if populations == 3:
55 ap3 = Population(name='Ancestral population 3')
56 ap3.from_population_file(ap3_input)
57 population_list.append(ap3)
58 if not p_total.is_superset(ap3):
59 gd_util.die('There is an individual in ancestral population 3 that is not in the SNP table')
78 60
79 p = Population(name='Potentially admixed') 61 p = Population(name='Potentially admixed')
80 p.from_population_file(p_input) 62 p.from_population_file(p_input)
81 population_list.append(p) 63 population_list.append(p)
82 if not p_total.is_superset(p): 64 if not p_total.is_superset(p):
83 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' 65 gd_util.die('There is an individual in the population that is not in the SNP table')
84 sys.exit(1)
85 66
86 mkdir_p(output2_dir) 67 gd_util.mkdir_p(output2_dir)
87 68
88 ################################################################################ 69 ################################################################################
89 # Create tabular file 70 # Create tabular file
90 ################################################################################ 71 ################################################################################
91 72
92 misc_file = os.path.join(output2_dir, 'misc.txt') 73 misc_file = os.path.join(output2_dir, 'summary.txt')
93 74
94 prog = 'dpmix' 75 prog = 'dpmix'
76
95 args = [ prog ] 77 args = [ prog ]
96 args.append(input) 78 args.append(input)
97 args.append(ref_column) 79 args.append(ref_column)
98 args.append(chrom) 80 args.append(chrom)
99 args.append(data_source) 81 args.append(data_source)
102 args.append(heterochrom_path) 84 args.append(heterochrom_path)
103 args.append(misc_file) 85 args.append(misc_file)
104 86
105 columns = ap1.column_list() 87 columns = ap1.column_list()
106 for column in columns: 88 for column in columns:
89 col = int(column)
90 name = ap1.individual_with_column(column).name
91 first_token = name.split()[0]
107 if input_type == 'gd_genotype': 92 if input_type == 'gd_genotype':
108 args.append('{0}:1:{1}'.format(int(column) - 2, ap1.individual_with_column(column).name)) 93 col -= 2
109 else: 94 args.append('{0}:1:{1}'.format(col, first_token))
110 args.append('{0}:1:{1}'.format(column, ap1.individual_with_column(column).name))
111 95
112 columns = ap2.column_list() 96 columns = ap2.column_list()
113 for column in columns: 97 for column in columns:
98 col = int(column)
99 name = ap2.individual_with_column(column).name
100 first_token = name.split()[0]
114 if input_type == 'gd_genotype': 101 if input_type == 'gd_genotype':
115 args.append('{0}:2:{1}'.format(int(column) - 2, ap2.individual_with_column(column).name)) 102 col -= 2
116 else: 103 args.append('{0}:2:{1}'.format(col, first_token))
117 args.append('{0}:2:{1}'.format(column, ap2.individual_with_column(column).name)) 104
105 if populations == 3:
106 columns = ap3.column_list()
107 for column in columns:
108 col = int(column)
109 name = ap3.individual_with_column(column).name
110 first_token = name.split()[0]
111 if input_type == 'gd_genotype':
112 col -= 2
113 args.append('{0}:3:{1}'.format(col, first_token))
118 114
119 columns = p.column_list() 115 columns = p.column_list()
120 for column in columns: 116 for column in columns:
117 col = int(column)
118 name = p.individual_with_column(column).name
119 first_token = name.split()[0]
121 if input_type == 'gd_genotype': 120 if input_type == 'gd_genotype':
122 args.append('{0}:0:{1}'.format(int(column) - 2, p.individual_with_column(column).name)) 121 col -= 2
123 else: 122 args.append('{0}:0:{1}'.format(col, first_token))
124 args.append('{0}:0:{1}'.format(column, p.individual_with_column(column).name))
125 123
126 run_program(None, args, stdout_file=output, space_to_tab=True) 124 with open(output, 'w') as fh:
125 gd_util.run_program(prog, args, stdout=fh)
127 126
128 ################################################################################ 127 ################################################################################
129 # Create pdf file 128 # Create pdf file
130 ################################################################################ 129 ################################################################################
131 130
132 pdf_file = os.path.join(output2_dir, 'dpmix.pdf') 131 if populations == 3:
133 make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir) 132 state2name = {
133 0:'heterochromatin',
134 1:ap1_name,
135 2:ap2_name,
136 3:ap3_name
137 }
138 else:
139 state2name = {
140 0:'heterochromatin',
141 1:ap1_name,
142 2:ap2_name
143 }
144
145 pdf_file = os.path.join(output2_dir, 'picture.pdf')
146 make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir, state2name=state2name, populations=populations)
134 147
135 ################################################################################ 148 ################################################################################
136 # Create html 149 # Create html
137 ################################################################################ 150 ################################################################################
138 151
140 info_page.set_title('dpmix Galaxy Composite Dataset') 153 info_page.set_title('dpmix Galaxy Composite Dataset')
141 154
142 display_file = gd_composite.DisplayFile() 155 display_file = gd_composite.DisplayFile()
143 display_value = gd_composite.DisplayValue() 156 display_value = gd_composite.DisplayValue()
144 157
145 out_pdf = gd_composite.Parameter(name='dpmix.pdf', value='dpmix.pdf', display_type=display_file) 158 out_pdf = gd_composite.Parameter(name='picture.pdf', value='picture.pdf', display_type=display_file)
146 out_misc = gd_composite.Parameter(name='misc.txt', value='misc.txt', display_type=display_file) 159 out_misc = gd_composite.Parameter(name='summary.txt', value='summary.txt', display_type=display_file)
147 160
148 info_page.add_output_parameter(out_pdf) 161 info_page.add_output_parameter(out_pdf)
149 info_page.add_output_parameter(out_misc) 162 info_page.add_output_parameter(out_misc)
150 163
151 if data_source == '0': 164 if data_source == '0':
166 with open(output2, 'w') as ofh: 179 with open(output2, 'w') as ofh:
167 print >> ofh, info_page.render() 180 print >> ofh, info_page.render()
168 181
169 sys.exit(0) 182 sys.exit(0)
170 183
171