comparison prepare_population_structure.py @ 27:8997f2ca8c7a

Update to Miller Lab devshed revision bae0d3306d3b
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 15 Jul 2013 10:47:35 -0400
parents 248b06e86022
children
comparison
equal deleted inserted replaced
26:91e835060ad2 27:8997f2ca8c7a
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 import errno 3 import gd_util
4 import os 4 import os
5 import shutil 5 import shutil
6 import subprocess
7 import sys 6 import sys
8 from Population import Population 7 from Population import Population
9 import gd_composite 8 import gd_composite
10 9
11 ################################################################################ 10 ################################################################################
12 11
13 def do_import(filename, files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list): 12 def do_import(filename, files_path, min_reads, min_qual, min_spacing, using_info, population_list):
14 info_page = gd_composite.InfoPage() 13 info_page = gd_composite.InfoPage()
15 info_page.set_title('Prepare to look for population structure Galaxy Composite Dataset') 14 info_page.set_title('Prepare to look for population structure Galaxy Composite Dataset')
16 15
17 display_file = gd_composite.DisplayFile() 16 display_file = gd_composite.DisplayFile()
18 display_value = gd_composite.DisplayValue() 17 display_value = gd_composite.DisplayValue()
37 info_page.add_misc(misc_populations) 36 info_page.add_misc(misc_populations)
38 37
39 with open(filename, 'w') as ofh: 38 with open(filename, 'w') as ofh:
40 print >> ofh, info_page.render() 39 print >> ofh, info_page.render()
41 40
42 def mkdir_p(path):
43 try:
44 os.makedirs(path)
45 except OSError, e:
46 if e.errno <> errno.EEXIST:
47 raise
48
49 def die(message, exit=True):
50 print >> sys.stderr, message
51 if exit:
52 sys.exit(1)
53
54 ################################################################################ 41 ################################################################################
55 42
56 if len(sys.argv) < 10: 43 if len(sys.argv) < 10:
57 die("Usage") 44 gd_util.die('Usage')
58 45
59 # parse command line 46 # parse command line
60 input_snp_filename, input_type, min_reads, min_qual, min_spacing, output_filename, output_files_path = sys.argv[1:8] 47 input_snp_filename, input_type, min_reads, min_qual, min_spacing, output_filename, output_files_path, ind_arg = sys.argv[1:9]
61 args = sys.argv[8:] 48 args = sys.argv[9:]
62 49
63 individual_metadata = []
64 population_files = [] 50 population_files = []
65 population_names = []
66 all_individuals = False 51 all_individuals = False
67 52
68 for arg in args: 53 for arg in args:
69 if arg == 'all_individuals': 54 if arg == 'all_individuals':
70 all_individuals = True 55 all_individuals = True
71 elif len(arg) > 11: 56 elif len(arg) > 11 and arg[:11] == 'population:':
72 tag = arg[:11] 57 file, name = arg[11:].split(':', 1)
73 value = arg[11:] 58 population_files.append((file, name))
74 if tag == 'individual:':
75 individual_metadata.append(value)
76 elif tag == 'population:':
77 filename, name = value.split(':', 1)
78 population_files.append(filename)
79 population_names.append(name)
80 59
81 p_total = Population() 60 p_total = Population()
82 p_total.from_tag_list(individual_metadata) 61 p_total.from_wrapped_dict(ind_arg)
83 62
84 individual_population = {} 63 individual_population = {}
85
86 population_list = [] 64 population_list = []
87 65
88 if all_individuals: 66 if all_individuals:
89 p1 = p_total 67 p1 = p_total
90 p1.name = 'All Individuals' 68 p1.name = 'All Individuals'
91 population_list.append(p1) 69 population_list.append(p1)
92 else: 70 else:
93 p1 = Population() 71 p1 = Population()
94 for idx in range(len(population_files)): 72 for file, name in population_files:
95 population_file = population_files[idx] 73 this_pop = Population(name)
96 population_name = population_names[idx] 74 this_pop.from_population_file(file)
97 this_pop = Population(population_name)
98 this_pop.from_population_file(population_file)
99 population_list.append(this_pop) 75 population_list.append(this_pop)
100 p1.from_population_file(population_file) 76
101 tags = p1.tag_list() 77 for tag in this_pop.tag_list():
102 for tag in tags:
103 if tag not in individual_population: 78 if tag not in individual_population:
104 individual_population[tag] = population_name 79 individual_population[tag] = name
80
81 # add individuals from this file to p1
82 p1.from_population_file(file)
83
105 84
106 if not p_total.is_superset(p1): 85 if not p_total.is_superset(p1):
107 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' 86 gd_util.die('There is an individual in the population that is not in the SNP table')
108 sys.exit(1)
109 87
110 # run tool 88 ################################################################################
89
111 prog = 'admix_prep' 90 prog = 'admix_prep'
112 91
113 args = [] 92 args = [ prog ]
114 args.append(prog)
115 args.append(input_snp_filename) 93 args.append(input_snp_filename)
116 args.append(min_reads) 94 args.append(min_reads)
117 args.append(min_qual) 95 args.append(min_qual)
118 args.append(min_spacing) 96 args.append(min_spacing)
119 97
120 tags = p1.tag_list() 98 for tag in p1.tag_list():
121 for tag in tags:
122 if input_type == 'gd_genotype': 99 if input_type == 'gd_genotype':
123 column, name = tag.split(':') 100 column, name = tag.split(':', 1)
124 tag = '{0}:{1}'.format(int(column) - 2, name) 101 tag = '{0}:{1}'.format(int(column) - 2, name)
125 args.append(tag) 102 args.append(tag)
126 103
127 #print "args:", ' '.join(args) 104 stdoutdata, stderrdata = gd_util.run_program(prog, args)
128 p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=subprocess.PIPE, stderr=sys.stderr) 105 using_info = stdoutdata.rstrip('\r\n')
129 (stdoutdata, stderrdata) = p.communicate()
130 rc = p.returncode
131 106
132 if rc != 0: 107 ################################################################################
133 die('admix_prep failed: rc={0}'.format(rc))
134 108
135 using_info = stdoutdata.rstrip('\r\n') 109 gd_util.mkdir_p(output_files_path)
136 mkdir_p(output_files_path) 110
137 output_ped_filename = os.path.join(output_files_path, 'admix.ped') 111 output_ped_filename = os.path.join(output_files_path, 'admix.ped')
138 output_map_filename = os.path.join(output_files_path, 'admix.map') 112 output_map_filename = os.path.join(output_files_path, 'admix.map')
139 shutil.copy2('admix.ped', output_ped_filename) 113 shutil.copy2('admix.ped', output_ped_filename)
140 shutil.copy2('admix.map', output_map_filename) 114 shutil.copy2('admix.map', output_map_filename)
141 do_import(output_filename, output_files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list)
142 115
143 os.unlink('admix.ped') 116 do_import(output_filename, output_files_path, min_reads, min_qual, min_spacing, using_info, population_list)
144 os.unlink('admix.map')
145
146 sys.exit(0) 117 sys.exit(0)
147 118