annotate prepare_population_structure.py @ 0:2c498d40ecde

Uploaded
author miller-lab
date Mon, 09 Apr 2012 12:03:06 -0400
parents
children 248b06e86022
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
1 #!/usr/bin/env python
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
2
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
3 import errno
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
4 import os
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
5 import shutil
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
6 import subprocess
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
7 import sys
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
8 from Population import Population
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
9 import gd_composite
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
10
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
11 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
12
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
13 def do_import(filename, files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
14 info_page = gd_composite.InfoPage()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
15 info_page.set_title('Prepare to look for population structure Galaxy Composite Dataset')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
16
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
17 display_file = gd_composite.DisplayFile()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
18 display_value = gd_composite.DisplayValue()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
19
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
20 out_ped = gd_composite.Parameter(name='admix.ped', value='admix.ped', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
21 out_map = gd_composite.Parameter(name='admix.map', value='admix.map', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
22 out_use = gd_composite.Parameter(description=using_info, display_type=display_value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
23
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
24 info_page.add_output_parameter(out_ped)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
25 info_page.add_output_parameter(out_map)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
26 info_page.add_output_parameter(out_use)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
27
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
28 in_min_reads = gd_composite.Parameter(description='Minimum reads covering a SNP, per individual', value=min_reads, display_type=display_value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
29 in_min_qual = gd_composite.Parameter(description='Minimum quality value, per individual', value=min_qual, display_type=display_value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
30 in_min_spacing = gd_composite.Parameter(description='Minimum spacing between SNPs on the same scaffold', value=min_spacing, display_type=display_value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
31
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
32 info_page.add_input_parameter(in_min_reads)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
33 info_page.add_input_parameter(in_min_qual)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
34 info_page.add_input_parameter(in_min_spacing)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
35
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
36 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList())
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
37 info_page.add_misc(misc_populations)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
38
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
39 with open(filename, 'w') as ofh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
40 print >> ofh, info_page.render()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
41
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
42 def mkdir_p(path):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
43 try:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
44 os.makedirs(path)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
45 except OSError, e:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
46 if e.errno <> errno.EEXIST:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
47 raise
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
48
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
49 def die(message, exit=True):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
50 print >> sys.stderr, message
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
51 if exit:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
52 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
53
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
54 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
55
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
56 if len(sys.argv) < 9:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
57 die("Usage")
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
58
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
59 # parse command line
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
60 input_snp_filename, min_reads, min_qual, min_spacing, output_filename, output_files_path = sys.argv[1:7]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
61 args = sys.argv[7:]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
62
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
63 individual_metadata = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
64 population_files = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
65 population_names = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
66 all_individuals = False
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
67
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
68 for arg in args:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
69 if arg == 'all_individuals':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
70 all_individuals = True
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
71 elif len(arg) > 11:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
72 tag = arg[:11]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
73 value = arg[11:]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
74 if tag == 'individual:':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
75 individual_metadata.append(value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
76 elif tag == 'population:':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
77 filename, name = value.split(':', 1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
78 population_files.append(filename)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
79 population_names.append(name)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
80
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
81 p_total = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
82 p_total.from_tag_list(individual_metadata)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
83
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
84 individual_population = {}
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
85
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
86 population_list = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
87
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
88 if all_individuals:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
89 p1 = p_total
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
90 p1.name = 'All Individuals'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
91 population_list.append(p1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
92 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
93 p1 = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
94 for idx in range(len(population_files)):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
95 population_file = population_files[idx]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
96 population_name = population_names[idx]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
97 this_pop = Population(population_name)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
98 this_pop.from_population_file(population_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
99 population_list.append(this_pop)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
100 p1.from_population_file(population_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
101 tags = p1.tag_list()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
102 for tag in tags:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
103 if tag not in individual_population:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
104 individual_population[tag] = population_name
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
105
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
106 if not p_total.is_superset(p1):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
107 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
108 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
109
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
110 # run tool
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
111 prog = 'admix_prep'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
112
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
113 args = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
114 args.append(prog)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
115 args.append(input_snp_filename)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
116 args.append(min_reads)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
117 args.append(min_qual)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
118 args.append(min_spacing)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
119
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
120 tags = p1.tag_list()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
121 for tag in tags:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
122 args.append(tag)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
123
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
124 #print "args:", ' '.join(args)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
125 p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=subprocess.PIPE, stderr=sys.stderr)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
126 (stdoutdata, stderrdata) = p.communicate()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
127 rc = p.returncode
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
128
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
129 if rc != 0:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
130 die('admix_prep failed: rc={0}'.format(rc))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
131
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
132 using_info = stdoutdata.rstrip('\r\n')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
133 mkdir_p(output_files_path)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
134 output_ped_filename = os.path.join(output_files_path, 'admix.ped')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
135 output_map_filename = os.path.join(output_files_path, 'admix.map')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
136 shutil.copy2('admix.ped', output_ped_filename)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
137 shutil.copy2('admix.map', output_map_filename)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
138 do_import(output_filename, output_files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
139
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
140 os.unlink('admix.ped')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
141 os.unlink('admix.map')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
142
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
143 sys.exit(0)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
144