annotate population_structure.py @ 7:e29f4d801bb0

change wsf -> snp; wpf -> sap
author Richard Burhans <burhans@bx.psu.edu>
date Wed, 18 Apr 2012 11:12:21 -0400
parents 2c498d40ecde
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
1 #!/usr/bin/env python
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
2
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
3 import errno
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
4 import os
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
5 import shutil
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
6 import subprocess
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
7 import sys
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
8 from BeautifulSoup import BeautifulSoup
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
9 import gd_composite
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
10
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
11 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
12
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
13 def run_admixture(ped_file, populations):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
14 prog = 'admixture'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
15
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
16 args = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
17 args.append(prog)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
18 args.append(input_ped_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
19 args.append(populations)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
20
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
21 #print "args:", ' '.join(args)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
22 ofh = open('/dev/null', 'w')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
23 p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=ofh, stderr=sys.stderr)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
24 rc = p.wait()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
25 ofh.close()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
26
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
27 def run_r(input_file, output_file, populations):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
28 prog = 'R'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
29
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
30 args = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
31 args.append(prog)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
32 args.append('--vanilla')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
33 args.append('--quiet')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
34 args.append('--args')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
35 args.append(input_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
36 args.append(output_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
37 args.append(populations)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
38
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
39 _realpath = os.path.realpath(__file__)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
40 _script_dir = os.path.dirname(_realpath)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
41 r_script_file = os.path.join(_script_dir, 'population_structure.r')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
42
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
43 ifh = open(r_script_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
44 ofh = open('/dev/null', 'w')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
45 p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
46 rc = p.wait()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
47 ifh.close()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
48 ofh.close()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
49
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
50 def mkdir_p(path):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
51 try:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
52 os.makedirs(path)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
53 except OSError, e:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
54 if e.errno <> errno.EEXIST:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
55 raise
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
56
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
57 def get_populations(input):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
58 pops = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
59 pop_names = {}
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
60
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
61 with open(input) as fh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
62 soup = BeautifulSoup(fh)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
63 misc = soup.find('div', {'id': 'gd_misc'})
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
64
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
65 return 'Populations\n{0}'.format(misc('ul')[0])
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
66
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
67 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
68
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
69 if len(sys.argv) != 6:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
70 print >> sys.stderr, "Usage"
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
71 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
72
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
73 input_html_file, input_ped_file, output_file, extra_files_path, populations = sys.argv[1:6]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
74 populations_html = get_populations(input_html_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
75
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
76 run_admixture(input_ped_file, populations)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
77
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
78 ped_base = os.path.basename(input_ped_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
79 if ped_base.endswith('.ped'):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
80 ped_base = ped_base[:-4]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
81
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
82 p_file = '%s.%s.P' % (ped_base, populations)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
83 q_file = '%s.%s.Q' % (ped_base, populations)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
84
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
85 mkdir_p(extra_files_path)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
86 numeric_output_file = os.path.join(extra_files_path, 'numeric.txt')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
87 shutil.copy2(q_file, numeric_output_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
88 os.remove(p_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
89 os.remove(q_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
90
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
91 graphical_output_file = os.path.join(extra_files_path, 'graphical.pdf')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
92 run_r(numeric_output_file, graphical_output_file, populations)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
93
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
94 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
95
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
96 info_page = gd_composite.InfoPage()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
97 info_page.set_title('Population structure Galaxy Composite Dataset')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
98
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
99 display_file = gd_composite.DisplayFile()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
100 display_value = gd_composite.DisplayValue()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
101
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
102 out_pdf = gd_composite.Parameter(name='graphical.pdf', value='graphical.pdf', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
103 out_txt = gd_composite.Parameter(name='numeric.txt', value='numeric.txt', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
104
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
105 info_page.add_output_parameter(out_pdf)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
106 info_page.add_output_parameter(out_txt)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
107
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
108 in_pops = gd_composite.Parameter(description='Number of populations', value=populations, display_type=display_value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
109
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
110 info_page.add_input_parameter(in_pops)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
111
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
112 misc_pops = gd_composite.Parameter(description=populations_html, display_type=display_value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
113
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
114 info_page.add_misc(misc_pops)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
115
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
116
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
117 with open (output_file, 'w') as ofh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
118 print >> ofh, info_page.render()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
119
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
120
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
121 sys.exit(0)