annotate coverage_distributions.py @ 30:4188853b940b

Update to Miller Lab devshed revision eb4e61d024db
author Richard Burhans <burhans@bx.psu.edu>
date Fri, 26 Jul 2013 12:51:13 -0400
parents 8997f2ca8c7a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
1 #!/usr/bin/env python
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
2
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
3 import gd_util
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
4 import os
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
5 import sys
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
6 from Population import Population
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
7 import gd_composite
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
8
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
9 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
10
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
11 if len(sys.argv) < 7:
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
12 gd_util.die('Usage')
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
13
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
14 input, data_source, output, extra_files_path, ind_arg = sys.argv[1:6]
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
15
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
16 population_info = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
17 p1_input = None
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
18 all_individuals = False
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
19
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
20 for arg in sys.argv[6:]:
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
21 if arg == 'all_individuals':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
22 all_individuals = True
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
23 elif len(arg) > 12 and arg[:12] == 'individuals:':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
24 p1_input = arg[12:]
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
25 elif len(arg) > 11 and arg[:11] == 'population:':
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
26 file, name = arg[11:].split(':', 1)
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
27 population_info.append((file, name))
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
28
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
29 p_total = Population()
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
30 p_total.from_wrapped_dict(ind_arg)
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
31
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
32 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
33
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
34 gd_util.mkdir_p(extra_files_path)
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
35
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
36 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
37
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
38 prog = 'coverage'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
39
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
40 args = [ prog ]
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
41 args.append(input)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
42 args.append(data_source)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
43
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
44 user_coverage_file = os.path.join(extra_files_path, 'coverage.txt')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
45 args.append(user_coverage_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
46
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
47 population_list = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
48
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
49 if all_individuals:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
50 tags = p_total.tag_list()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
51 elif p1_input is not None:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
52 p1 = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
53 this_pop = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
54 this_pop.from_population_file(p1_input)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
55 population_list.append(this_pop)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
56 p1.from_population_file(p1_input)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
57 if not p_total.is_superset(p1):
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
58 gd_util.die('There is an individual in the population that is not in the SNP table')
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
59 tags = p1.tag_list()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
60 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
61 tags = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
62 for population_file, population_name in population_info:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
63 population = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
64 this_pop = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
65 this_pop.from_population_file(population_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
66 population_list.append(this_pop)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
67 population.from_population_file(population_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
68 if not p_total.is_superset(population):
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
69 gd_util.die('There is an individual in the {} population that is not in the SNP table'.format(population_name))
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
70 columns = population.column_list()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
71 for column in columns:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
72 tags.append('{0}:{1}'.format(column, population_name))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
73
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
74 for tag in tags:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
75 args.append(tag)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
76
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
77 ## text output
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
78 coverage_file = 'coverage.txt'
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
79 with open(coverage_file, 'w') as fh:
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
80 gd_util.run_program(prog, args, stdout=fh)
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
81
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
82 ## graphical output
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
83 coverage2_file = 'coverage2.txt'
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
84 with open(coverage_file) as fh, open(coverage2_file, 'w') as ofh:
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
85 for line in fh:
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
86 line = line.rstrip('\r\n')
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
87 elems = line.split('\t')
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
88 name = elems.pop(0)
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
89 values = [ elems[0] ]
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
90 for idx in range(1, len(elems)):
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
91 val = str(float(elems[idx]) - float(elems[idx-1]))
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
92 values.append(val)
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
93 print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values))
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
94
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
95 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
96
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
97 prog = 'Rscript'
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
98
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
99 args = [ prog ]
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
100
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
101 _realpath = os.path.realpath(__file__)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
102 _script_dir = os.path.dirname(_realpath)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
103 r_script_file = os.path.join(_script_dir, 'coverage_plot.r')
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
104 args.append(r_script_file)
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
105
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
106 pdf_file = os.path.join(extra_files_path, 'coverage.pdf')
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
107 args.append(pdf_file)
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
108
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
109 gd_util.run_program(prog, args)
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
110
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
111 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
112
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
113 info_page = gd_composite.InfoPage()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
114 info_page.set_title('Coverage distributions Galaxy Composite Dataset')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
115
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
116 display_file = gd_composite.DisplayFile()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
117 display_value = gd_composite.DisplayValue()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
118
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
119 out_pdf = gd_composite.Parameter(name='coverage.pdf', value='coverage.pdf', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
120 out_txt = gd_composite.Parameter(name='coverage.txt', value='coverage.txt', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
121
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
122 info_page.add_output_parameter(out_pdf)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
123 info_page.add_output_parameter(out_txt)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
124
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
125 if data_source == '0':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
126 data_source_value = 'sequence coverage'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
127 elif data_source == '1':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
128 data_source_value = 'estimated genotype'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
129
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
130 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
131
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
132 info_page.add_input_parameter(in_data_source)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
133
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
134 if population_list:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
135 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList())
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
136 info_page.add_misc(misc_populations)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
137 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
138 misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList())
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
139 info_page.add_misc(misc_individuals)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
140
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
141 with open (output, 'w') as ofh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
142 print >> ofh, info_page.render()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
143
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
144 sys.exit(0)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
145