comparison coverage_distributions.py @ 27:8997f2ca8c7a

Update to Miller Lab devshed revision bae0d3306d3b
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 15 Jul 2013 10:47:35 -0400
parents 2c498d40ecde
children
comparison
equal deleted inserted replaced
26:91e835060ad2 27:8997f2ca8c7a
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 import gd_util
3 import os 4 import os
4 import errno
5 import sys 5 import sys
6 import shutil
7 import subprocess
8 from Population import Population 6 from Population import Population
9 import gd_composite 7 import gd_composite
10 8
11 ################################################################################ 9 ################################################################################
12 10
13 def mkdir_p(path): 11 if len(sys.argv) < 7:
14 try: 12 gd_util.die('Usage')
15 os.makedirs(path)
16 except OSError, e:
17 if e.errno <> errno.EEXIST:
18 raise
19 13
20 ################################################################################ 14 input, data_source, output, extra_files_path, ind_arg = sys.argv[1:6]
21 15
22 if len(sys.argv) < 7:
23 print >> sys.stderr, "Usage"
24 sys.exit(1)
25
26 input, data_source, output, extra_files_path = sys.argv[1:5]
27
28 individual_metadata = []
29 population_info = [] 16 population_info = []
30 p1_input = None 17 p1_input = None
31 all_individuals = False 18 all_individuals = False
32 19
33 for arg in sys.argv[5:]: 20 for arg in sys.argv[6:]:
34 if arg == 'all_individuals': 21 if arg == 'all_individuals':
35 all_individuals = True 22 all_individuals = True
36 elif len(arg) > 12 and arg[:12] == 'individuals:': 23 elif len(arg) > 12 and arg[:12] == 'individuals:':
37 p1_input = arg[12:] 24 p1_input = arg[12:]
38 elif len(arg) > 11: 25 elif len(arg) > 11 and arg[:11] == 'population:':
39 if arg[:11] == 'population:': 26 file, name = arg[11:].split(':', 1)
40 file, name = arg[11:].split(':', 1) 27 population_info.append((file, name))
41 population_info.append((file, name))
42 elif arg[:11] == 'individual:':
43 individual_metadata.append(arg[11:])
44 28
45 p_total = Population() 29 p_total = Population()
46 p_total.from_tag_list(individual_metadata) 30 p_total.from_wrapped_dict(ind_arg)
47 31
48 ################################################################################ 32 ################################################################################
49 33
50 mkdir_p(extra_files_path) 34 gd_util.mkdir_p(extra_files_path)
51 35
52 ################################################################################ 36 ################################################################################
53 37
54 prog = 'coverage' 38 prog = 'coverage'
55 39
56 args = [] 40 args = [ prog ]
57 args.append(prog)
58 args.append(input) 41 args.append(input)
59 args.append(data_source) 42 args.append(data_source)
60 43
61 user_coverage_file = os.path.join(extra_files_path, 'coverage.txt') 44 user_coverage_file = os.path.join(extra_files_path, 'coverage.txt')
62 args.append(user_coverage_file) 45 args.append(user_coverage_file)
70 this_pop = Population() 53 this_pop = Population()
71 this_pop.from_population_file(p1_input) 54 this_pop.from_population_file(p1_input)
72 population_list.append(this_pop) 55 population_list.append(this_pop)
73 p1.from_population_file(p1_input) 56 p1.from_population_file(p1_input)
74 if not p_total.is_superset(p1): 57 if not p_total.is_superset(p1):
75 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' 58 gd_util.die('There is an individual in the population that is not in the SNP table')
76 sys.exit(1)
77 tags = p1.tag_list() 59 tags = p1.tag_list()
78 else: 60 else:
79 tags = [] 61 tags = []
80 for population_file, population_name in population_info: 62 for population_file, population_name in population_info:
81 population = Population() 63 population = Population()
82 this_pop = Population() 64 this_pop = Population()
83 this_pop.from_population_file(population_file) 65 this_pop.from_population_file(population_file)
84 population_list.append(this_pop) 66 population_list.append(this_pop)
85 population.from_population_file(population_file) 67 population.from_population_file(population_file)
86 if not p_total.is_superset(population): 68 if not p_total.is_superset(population):
87 print >> sys.stderr, 'There is an individual in the {} population that is not in the SNP table'.format(population_name) 69 gd_util.die('There is an individual in the {} population that is not in the SNP table'.format(population_name))
88 sys.exit(1)
89 columns = population.column_list() 70 columns = population.column_list()
90 for column in columns: 71 for column in columns:
91 tags.append('{0}:{1}'.format(column, population_name)) 72 tags.append('{0}:{1}'.format(column, population_name))
92 73
93 for tag in tags: 74 for tag in tags:
94 args.append(tag) 75 args.append(tag)
95 76
96 ## text output 77 ## text output
97 coverage_file = 'coverage.txt' 78 coverage_file = 'coverage.txt'
98 fh = open(coverage_file, 'w') 79 with open(coverage_file, 'w') as fh:
99 #print "args:", ' '.join(args) 80 gd_util.run_program(prog, args, stdout=fh)
100 p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr)
101 rc = p.wait()
102 fh.close()
103 81
104 ## graphical output 82 ## graphical output
105 fh = open(coverage_file)
106 coverage2_file = 'coverage2.txt' 83 coverage2_file = 'coverage2.txt'
107 ofh = open(coverage2_file, 'w') 84 with open(coverage_file) as fh, open(coverage2_file, 'w') as ofh:
108 85 for line in fh:
109 for line in fh: 86 line = line.rstrip('\r\n')
110 line = line.rstrip('\r\n') 87 elems = line.split('\t')
111 elems = line.split('\t') 88 name = elems.pop(0)
112 name = elems.pop(0) 89 values = [ elems[0] ]
113 values = [ elems[0] ] 90 for idx in range(1, len(elems)):
114 for idx in range(1, len(elems)): 91 val = str(float(elems[idx]) - float(elems[idx-1]))
115 val = str(float(elems[idx]) - float(elems[idx-1])) 92 values.append(val)
116 values.append(val) 93 print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values))
117 print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values))
118
119 fh.close()
120 ofh.close()
121 94
122 ################################################################################ 95 ################################################################################
123 96
124 prog = 'R' 97 prog = 'Rscript'
125 98
126 args = [] 99 args = [ prog ]
127 args.append(prog)
128 args.append('--vanilla')
129 args.append('--quiet')
130 100
131 _realpath = os.path.realpath(__file__) 101 _realpath = os.path.realpath(__file__)
132 _script_dir = os.path.dirname(_realpath) 102 _script_dir = os.path.dirname(_realpath)
133 r_script_file = os.path.join(_script_dir, 'coverage_plot.r') 103 r_script_file = os.path.join(_script_dir, 'coverage_plot.r')
134 104 args.append(r_script_file)
135 ifh = open(r_script_file)
136 ofh = open('/dev/null', 'w')
137 #print "args:", ' '.join(args)
138 p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None)
139 rc = p.wait()
140 ifh.close()
141 ofh.close()
142 105
143 pdf_file = os.path.join(extra_files_path, 'coverage.pdf') 106 pdf_file = os.path.join(extra_files_path, 'coverage.pdf')
144 shutil.copy2('coverage.pdf', pdf_file) 107 args.append(pdf_file)
145 os.remove('coverage.pdf') 108
146 os.remove(coverage2_file) 109 gd_util.run_program(prog, args)
147 110
148 ################################################################################ 111 ################################################################################
149 112
150 info_page = gd_composite.InfoPage() 113 info_page = gd_composite.InfoPage()
151 info_page.set_title('Coverage distributions Galaxy Composite Dataset') 114 info_page.set_title('Coverage distributions Galaxy Composite Dataset')
156 out_pdf = gd_composite.Parameter(name='coverage.pdf', value='coverage.pdf', display_type=display_file) 119 out_pdf = gd_composite.Parameter(name='coverage.pdf', value='coverage.pdf', display_type=display_file)
157 out_txt = gd_composite.Parameter(name='coverage.txt', value='coverage.txt', display_type=display_file) 120 out_txt = gd_composite.Parameter(name='coverage.txt', value='coverage.txt', display_type=display_file)
158 121
159 info_page.add_output_parameter(out_pdf) 122 info_page.add_output_parameter(out_pdf)
160 info_page.add_output_parameter(out_txt) 123 info_page.add_output_parameter(out_txt)
161
162 124
163 if data_source == '0': 125 if data_source == '0':
164 data_source_value = 'sequence coverage' 126 data_source_value = 'sequence coverage'
165 elif data_source == '1': 127 elif data_source == '1':
166 data_source_value = 'estimated genotype' 128 data_source_value = 'estimated genotype'
174 info_page.add_misc(misc_populations) 136 info_page.add_misc(misc_populations)
175 else: 137 else:
176 misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList()) 138 misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList())
177 info_page.add_misc(misc_individuals) 139 info_page.add_misc(misc_individuals)
178 140
179
180
181
182 with open (output, 'w') as ofh: 141 with open (output, 'w') as ofh:
183 print >> ofh, info_page.render() 142 print >> ofh, info_page.render()
184 143
185
186 sys.exit(0) 144 sys.exit(0)
187 145