diff population_structure.py @ 17:a3af29edcce2

Uploaded Miller Lab Devshed version a51c894f5bed
author miller-lab
date Fri, 28 Sep 2012 11:57:18 -0400
parents 2c498d40ecde
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/population_structure.py	Fri Sep 28 11:57:18 2012 -0400
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+
+import errno
+import os
+import shutil
+import subprocess
+import sys
+from BeautifulSoup import BeautifulSoup
+import gd_composite
+
+################################################################################
+
+def run_admixture(ped_file, populations):
+    prog = 'admixture'
+
+    args = []
+    args.append(prog)
+    args.append(input_ped_file)
+    args.append(populations)
+
+    #print "args:", ' '.join(args)
+    ofh = open('/dev/null', 'w')
+    p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=ofh, stderr=sys.stderr)
+    rc = p.wait()
+    ofh.close()
+
+def run_r(input_file, output_file, populations):
+    prog = 'R'
+
+    args = []
+    args.append(prog)
+    args.append('--vanilla')
+    args.append('--quiet')
+    args.append('--args')
+    args.append(input_file)
+    args.append(output_file)
+    args.append(populations)
+
+    _realpath = os.path.realpath(__file__)
+    _script_dir = os.path.dirname(_realpath)
+    r_script_file = os.path.join(_script_dir, 'population_structure.r')
+
+    ifh = open(r_script_file)
+    ofh = open('/dev/null', 'w')
+    p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None)
+    rc = p.wait()
+    ifh.close()
+    ofh.close()
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError, e:
+        if e.errno <> errno.EEXIST:
+            raise
+
+def get_populations(input):
+    pops = []
+    pop_names = {}
+
+    with open(input) as fh:
+        soup = BeautifulSoup(fh)
+        misc = soup.find('div', {'id': 'gd_misc'})
+
+        return 'Populations\n{0}'.format(misc('ul')[0])
+
+################################################################################
+
+if len(sys.argv) != 6:
+    print >> sys.stderr, "Usage"
+    sys.exit(1)
+
+input_html_file, input_ped_file, output_file, extra_files_path, populations = sys.argv[1:6]
+populations_html = get_populations(input_html_file)
+
+run_admixture(input_ped_file, populations)
+
+ped_base = os.path.basename(input_ped_file)
+if ped_base.endswith('.ped'):
+    ped_base = ped_base[:-4]
+
+p_file = '%s.%s.P' % (ped_base, populations)
+q_file = '%s.%s.Q' % (ped_base, populations)
+
+mkdir_p(extra_files_path)
+numeric_output_file = os.path.join(extra_files_path, 'numeric.txt')
+shutil.copy2(q_file, numeric_output_file)
+os.remove(p_file)
+os.remove(q_file)
+
+graphical_output_file = os.path.join(extra_files_path, 'graphical.pdf')
+run_r(numeric_output_file, graphical_output_file, populations)
+
+################################################################################
+
+info_page = gd_composite.InfoPage()
+info_page.set_title('Population structure Galaxy Composite Dataset')
+
+display_file = gd_composite.DisplayFile()
+display_value = gd_composite.DisplayValue()
+
+out_pdf = gd_composite.Parameter(name='graphical.pdf', value='graphical.pdf', display_type=display_file)
+out_txt = gd_composite.Parameter(name='numeric.txt', value='numeric.txt', display_type=display_file)
+
+info_page.add_output_parameter(out_pdf)
+info_page.add_output_parameter(out_txt)
+
+in_pops = gd_composite.Parameter(description='Number of populations', value=populations, display_type=display_value)
+
+info_page.add_input_parameter(in_pops)
+
+misc_pops = gd_composite.Parameter(description=populations_html, display_type=display_value)
+
+info_page.add_misc(misc_pops)
+
+
+with open (output_file, 'w') as ofh:
+    print >> ofh, info_page.render()
+
+
+sys.exit(0)