annotate coverage_distributions.py @ 0:2c498d40ecde

Uploaded
author miller-lab
date Mon, 09 Apr 2012 12:03:06 -0400
parents
children 8997f2ca8c7a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
1 #!/usr/bin/env python
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
2
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
3 import os
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
4 import errno
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
5 import sys
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
6 import shutil
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
7 import subprocess
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
8 from Population import Population
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
9 import gd_composite
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
10
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
11 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
12
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
13 def mkdir_p(path):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
14 try:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
15 os.makedirs(path)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
16 except OSError, e:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
17 if e.errno <> errno.EEXIST:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
18 raise
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
19
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
20 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
21
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
22 if len(sys.argv) < 7:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
23 print >> sys.stderr, "Usage"
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
24 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
25
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
26 input, data_source, output, extra_files_path = sys.argv[1:5]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
27
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
28 individual_metadata = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
29 population_info = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
30 p1_input = None
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
31 all_individuals = False
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
32
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
33 for arg in sys.argv[5:]:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
34 if arg == 'all_individuals':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
35 all_individuals = True
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
36 elif len(arg) > 12 and arg[:12] == 'individuals:':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
37 p1_input = arg[12:]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
38 elif len(arg) > 11:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
39 if arg[:11] == 'population:':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
40 file, name = arg[11:].split(':', 1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
41 population_info.append((file, name))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
42 elif arg[:11] == 'individual:':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
43 individual_metadata.append(arg[11:])
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
44
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
45 p_total = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
46 p_total.from_tag_list(individual_metadata)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
47
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
48 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
49
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
50 mkdir_p(extra_files_path)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
51
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
52 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
53
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
54 prog = 'coverage'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
55
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
56 args = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
57 args.append(prog)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
58 args.append(input)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
59 args.append(data_source)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
60
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
61 user_coverage_file = os.path.join(extra_files_path, 'coverage.txt')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
62 args.append(user_coverage_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
63
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
64 population_list = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
65
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
66 if all_individuals:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
67 tags = p_total.tag_list()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
68 elif p1_input is not None:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
69 p1 = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
70 this_pop = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
71 this_pop.from_population_file(p1_input)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
72 population_list.append(this_pop)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
73 p1.from_population_file(p1_input)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
74 if not p_total.is_superset(p1):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
75 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
76 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
77 tags = p1.tag_list()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
78 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
79 tags = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
80 for population_file, population_name in population_info:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
81 population = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
82 this_pop = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
83 this_pop.from_population_file(population_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
84 population_list.append(this_pop)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
85 population.from_population_file(population_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
86 if not p_total.is_superset(population):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
87 print >> sys.stderr, 'There is an individual in the {} population that is not in the SNP table'.format(population_name)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
88 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
89 columns = population.column_list()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
90 for column in columns:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
91 tags.append('{0}:{1}'.format(column, population_name))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
92
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
93 for tag in tags:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
94 args.append(tag)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
95
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
96 ## text output
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
97 coverage_file = 'coverage.txt'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
98 fh = open(coverage_file, 'w')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
99 #print "args:", ' '.join(args)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
100 p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
101 rc = p.wait()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
102 fh.close()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
103
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
104 ## graphical output
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
105 fh = open(coverage_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
106 coverage2_file = 'coverage2.txt'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
107 ofh = open(coverage2_file, 'w')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
108
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
109 for line in fh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
110 line = line.rstrip('\r\n')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
111 elems = line.split('\t')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
112 name = elems.pop(0)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
113 values = [ elems[0] ]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
114 for idx in range(1, len(elems)):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
115 val = str(float(elems[idx]) - float(elems[idx-1]))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
116 values.append(val)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
117 print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
118
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
119 fh.close()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
120 ofh.close()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
121
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
122 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
123
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
124 prog = 'R'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
125
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
126 args = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
127 args.append(prog)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
128 args.append('--vanilla')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
129 args.append('--quiet')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
130
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
131 _realpath = os.path.realpath(__file__)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
132 _script_dir = os.path.dirname(_realpath)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
133 r_script_file = os.path.join(_script_dir, 'coverage_plot.r')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
134
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
135 ifh = open(r_script_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
136 ofh = open('/dev/null', 'w')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
137 #print "args:", ' '.join(args)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
138 p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
139 rc = p.wait()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
140 ifh.close()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
141 ofh.close()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
142
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
143 pdf_file = os.path.join(extra_files_path, 'coverage.pdf')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
144 shutil.copy2('coverage.pdf', pdf_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
145 os.remove('coverage.pdf')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
146 os.remove(coverage2_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
147
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
148 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
149
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
150 info_page = gd_composite.InfoPage()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
151 info_page.set_title('Coverage distributions Galaxy Composite Dataset')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
152
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
153 display_file = gd_composite.DisplayFile()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
154 display_value = gd_composite.DisplayValue()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
155
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
156 out_pdf = gd_composite.Parameter(name='coverage.pdf', value='coverage.pdf', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
157 out_txt = gd_composite.Parameter(name='coverage.txt', value='coverage.txt', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
158
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
159 info_page.add_output_parameter(out_pdf)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
160 info_page.add_output_parameter(out_txt)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
161
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
162
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
163 if data_source == '0':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
164 data_source_value = 'sequence coverage'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
165 elif data_source == '1':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
166 data_source_value = 'estimated genotype'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
167
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
168 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
169
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
170 info_page.add_input_parameter(in_data_source)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
171
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
172 if population_list:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
173 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList())
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
174 info_page.add_misc(misc_populations)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
175 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
176 misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList())
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
177 info_page.add_misc(misc_individuals)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
178
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
179
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
180
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
181
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
182 with open (output, 'w') as ofh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
183 print >> ofh, info_page.render()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
184
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
185
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
186 sys.exit(0)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
187