Mercurial > repos > miller-lab > genome_diversity
annotate coverage_distributions.py @ 30:4188853b940b
Update to Miller Lab devshed revision eb4e61d024db
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Fri, 26 Jul 2013 12:51:13 -0400 |
parents | 8997f2ca8c7a |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
3 import gd_util |
0 | 4 import os |
5 import sys | |
6 from Population import Population | |
7 import gd_composite | |
8 | |
9 ################################################################################ | |
10 | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
11 if len(sys.argv) < 7: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
12 gd_util.die('Usage') |
0 | 13 |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
14 input, data_source, output, extra_files_path, ind_arg = sys.argv[1:6] |
0 | 15 |
16 population_info = [] | |
17 p1_input = None | |
18 all_individuals = False | |
19 | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
20 for arg in sys.argv[6:]: |
0 | 21 if arg == 'all_individuals': |
22 all_individuals = True | |
23 elif len(arg) > 12 and arg[:12] == 'individuals:': | |
24 p1_input = arg[12:] | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
25 elif len(arg) > 11 and arg[:11] == 'population:': |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
26 file, name = arg[11:].split(':', 1) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
27 population_info.append((file, name)) |
0 | 28 |
29 p_total = Population() | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
30 p_total.from_wrapped_dict(ind_arg) |
0 | 31 |
32 ################################################################################ | |
33 | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
34 gd_util.mkdir_p(extra_files_path) |
0 | 35 |
36 ################################################################################ | |
37 | |
38 prog = 'coverage' | |
39 | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
40 args = [ prog ] |
0 | 41 args.append(input) |
42 args.append(data_source) | |
43 | |
44 user_coverage_file = os.path.join(extra_files_path, 'coverage.txt') | |
45 args.append(user_coverage_file) | |
46 | |
47 population_list = [] | |
48 | |
49 if all_individuals: | |
50 tags = p_total.tag_list() | |
51 elif p1_input is not None: | |
52 p1 = Population() | |
53 this_pop = Population() | |
54 this_pop.from_population_file(p1_input) | |
55 population_list.append(this_pop) | |
56 p1.from_population_file(p1_input) | |
57 if not p_total.is_superset(p1): | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
58 gd_util.die('There is an individual in the population that is not in the SNP table') |
0 | 59 tags = p1.tag_list() |
60 else: | |
61 tags = [] | |
62 for population_file, population_name in population_info: | |
63 population = Population() | |
64 this_pop = Population() | |
65 this_pop.from_population_file(population_file) | |
66 population_list.append(this_pop) | |
67 population.from_population_file(population_file) | |
68 if not p_total.is_superset(population): | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
69 gd_util.die('There is an individual in the {} population that is not in the SNP table'.format(population_name)) |
0 | 70 columns = population.column_list() |
71 for column in columns: | |
72 tags.append('{0}:{1}'.format(column, population_name)) | |
73 | |
74 for tag in tags: | |
75 args.append(tag) | |
76 | |
77 ## text output | |
78 coverage_file = 'coverage.txt' | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
79 with open(coverage_file, 'w') as fh: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
80 gd_util.run_program(prog, args, stdout=fh) |
0 | 81 |
82 ## graphical output | |
83 coverage2_file = 'coverage2.txt' | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
84 with open(coverage_file) as fh, open(coverage2_file, 'w') as ofh: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
85 for line in fh: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
86 line = line.rstrip('\r\n') |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
87 elems = line.split('\t') |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
88 name = elems.pop(0) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
89 values = [ elems[0] ] |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
90 for idx in range(1, len(elems)): |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
91 val = str(float(elems[idx]) - float(elems[idx-1])) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
92 values.append(val) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
93 print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values)) |
0 | 94 |
95 ################################################################################ | |
96 | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
97 prog = 'Rscript' |
0 | 98 |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
99 args = [ prog ] |
0 | 100 |
101 _realpath = os.path.realpath(__file__) | |
102 _script_dir = os.path.dirname(_realpath) | |
103 r_script_file = os.path.join(_script_dir, 'coverage_plot.r') | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
104 args.append(r_script_file) |
0 | 105 |
106 pdf_file = os.path.join(extra_files_path, 'coverage.pdf') | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
107 args.append(pdf_file) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
108 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
109 gd_util.run_program(prog, args) |
0 | 110 |
111 ################################################################################ | |
112 | |
113 info_page = gd_composite.InfoPage() | |
114 info_page.set_title('Coverage distributions Galaxy Composite Dataset') | |
115 | |
116 display_file = gd_composite.DisplayFile() | |
117 display_value = gd_composite.DisplayValue() | |
118 | |
119 out_pdf = gd_composite.Parameter(name='coverage.pdf', value='coverage.pdf', display_type=display_file) | |
120 out_txt = gd_composite.Parameter(name='coverage.txt', value='coverage.txt', display_type=display_file) | |
121 | |
122 info_page.add_output_parameter(out_pdf) | |
123 info_page.add_output_parameter(out_txt) | |
124 | |
125 if data_source == '0': | |
126 data_source_value = 'sequence coverage' | |
127 elif data_source == '1': | |
128 data_source_value = 'estimated genotype' | |
129 | |
130 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) | |
131 | |
132 info_page.add_input_parameter(in_data_source) | |
133 | |
134 if population_list: | |
135 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) | |
136 info_page.add_misc(misc_populations) | |
137 else: | |
138 misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList()) | |
139 info_page.add_misc(misc_individuals) | |
140 | |
141 with open (output, 'w') as ofh: | |
142 print >> ofh, info_page.render() | |
143 | |
144 sys.exit(0) | |
145 |