0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import errno
|
|
4 import os
|
|
5 import shutil
|
|
6 import subprocess
|
|
7 import sys
|
|
8 from Population import Population
|
|
9 import gd_composite
|
|
10
|
|
11 ################################################################################
|
|
12
|
|
13 def do_import(filename, files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list):
|
|
14 info_page = gd_composite.InfoPage()
|
|
15 info_page.set_title('Prepare to look for population structure Galaxy Composite Dataset')
|
|
16
|
|
17 display_file = gd_composite.DisplayFile()
|
|
18 display_value = gd_composite.DisplayValue()
|
|
19
|
|
20 out_ped = gd_composite.Parameter(name='admix.ped', value='admix.ped', display_type=display_file)
|
|
21 out_map = gd_composite.Parameter(name='admix.map', value='admix.map', display_type=display_file)
|
|
22 out_use = gd_composite.Parameter(description=using_info, display_type=display_value)
|
|
23
|
|
24 info_page.add_output_parameter(out_ped)
|
|
25 info_page.add_output_parameter(out_map)
|
|
26 info_page.add_output_parameter(out_use)
|
|
27
|
|
28 in_min_reads = gd_composite.Parameter(description='Minimum reads covering a SNP, per individual', value=min_reads, display_type=display_value)
|
|
29 in_min_qual = gd_composite.Parameter(description='Minimum quality value, per individual', value=min_qual, display_type=display_value)
|
|
30 in_min_spacing = gd_composite.Parameter(description='Minimum spacing between SNPs on the same scaffold', value=min_spacing, display_type=display_value)
|
|
31
|
|
32 info_page.add_input_parameter(in_min_reads)
|
|
33 info_page.add_input_parameter(in_min_qual)
|
|
34 info_page.add_input_parameter(in_min_spacing)
|
|
35
|
|
36 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList())
|
|
37 info_page.add_misc(misc_populations)
|
|
38
|
|
39 with open(filename, 'w') as ofh:
|
|
40 print >> ofh, info_page.render()
|
|
41
|
|
42 def mkdir_p(path):
|
|
43 try:
|
|
44 os.makedirs(path)
|
|
45 except OSError, e:
|
|
46 if e.errno <> errno.EEXIST:
|
|
47 raise
|
|
48
|
|
49 def die(message, exit=True):
|
|
50 print >> sys.stderr, message
|
|
51 if exit:
|
|
52 sys.exit(1)
|
|
53
|
|
54 ################################################################################
|
|
55
|
|
56 if len(sys.argv) < 9:
|
|
57 die("Usage")
|
|
58
|
|
59 # parse command line
|
|
60 input_snp_filename, min_reads, min_qual, min_spacing, output_filename, output_files_path = sys.argv[1:7]
|
|
61 args = sys.argv[7:]
|
|
62
|
|
63 individual_metadata = []
|
|
64 population_files = []
|
|
65 population_names = []
|
|
66 all_individuals = False
|
|
67
|
|
68 for arg in args:
|
|
69 if arg == 'all_individuals':
|
|
70 all_individuals = True
|
|
71 elif len(arg) > 11:
|
|
72 tag = arg[:11]
|
|
73 value = arg[11:]
|
|
74 if tag == 'individual:':
|
|
75 individual_metadata.append(value)
|
|
76 elif tag == 'population:':
|
|
77 filename, name = value.split(':', 1)
|
|
78 population_files.append(filename)
|
|
79 population_names.append(name)
|
|
80
|
|
81 p_total = Population()
|
|
82 p_total.from_tag_list(individual_metadata)
|
|
83
|
|
84 individual_population = {}
|
|
85
|
|
86 population_list = []
|
|
87
|
|
88 if all_individuals:
|
|
89 p1 = p_total
|
|
90 p1.name = 'All Individuals'
|
|
91 population_list.append(p1)
|
|
92 else:
|
|
93 p1 = Population()
|
|
94 for idx in range(len(population_files)):
|
|
95 population_file = population_files[idx]
|
|
96 population_name = population_names[idx]
|
|
97 this_pop = Population(population_name)
|
|
98 this_pop.from_population_file(population_file)
|
|
99 population_list.append(this_pop)
|
|
100 p1.from_population_file(population_file)
|
|
101 tags = p1.tag_list()
|
|
102 for tag in tags:
|
|
103 if tag not in individual_population:
|
|
104 individual_population[tag] = population_name
|
|
105
|
|
106 if not p_total.is_superset(p1):
|
|
107 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
|
|
108 sys.exit(1)
|
|
109
|
|
110 # run tool
|
|
111 prog = 'admix_prep'
|
|
112
|
|
113 args = []
|
|
114 args.append(prog)
|
|
115 args.append(input_snp_filename)
|
|
116 args.append(min_reads)
|
|
117 args.append(min_qual)
|
|
118 args.append(min_spacing)
|
|
119
|
|
120 tags = p1.tag_list()
|
|
121 for tag in tags:
|
|
122 args.append(tag)
|
|
123
|
|
124 #print "args:", ' '.join(args)
|
|
125 p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=subprocess.PIPE, stderr=sys.stderr)
|
|
126 (stdoutdata, stderrdata) = p.communicate()
|
|
127 rc = p.returncode
|
|
128
|
|
129 if rc != 0:
|
|
130 die('admix_prep failed: rc={0}'.format(rc))
|
|
131
|
|
132 using_info = stdoutdata.rstrip('\r\n')
|
|
133 mkdir_p(output_files_path)
|
|
134 output_ped_filename = os.path.join(output_files_path, 'admix.ped')
|
|
135 output_map_filename = os.path.join(output_files_path, 'admix.map')
|
|
136 shutil.copy2('admix.ped', output_ped_filename)
|
|
137 shutil.copy2('admix.map', output_map_filename)
|
|
138 do_import(output_filename, output_files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list)
|
|
139
|
|
140 os.unlink('admix.ped')
|
|
141 os.unlink('admix.map')
|
|
142
|
|
143 sys.exit(0)
|
|
144
|