diff prepare_population_structure.py @ 27:8997f2ca8c7a

Update to Miller Lab devshed revision bae0d3306d3b
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 15 Jul 2013 10:47:35 -0400
parents 248b06e86022
children
line wrap: on
line diff
--- a/prepare_population_structure.py	Mon Jun 03 12:29:29 2013 -0400
+++ b/prepare_population_structure.py	Mon Jul 15 10:47:35 2013 -0400
@@ -1,16 +1,15 @@
 #!/usr/bin/env python
 
-import errno
+import gd_util
 import os
 import shutil
-import subprocess
 import sys
 from Population import Population
 import gd_composite
 
 ################################################################################
 
-def do_import(filename, files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list):
+def do_import(filename, files_path, min_reads, min_qual, min_spacing, using_info, population_list):
     info_page = gd_composite.InfoPage()
     info_page.set_title('Prepare to look for population structure Galaxy Composite Dataset')
 
@@ -39,50 +38,29 @@
     with open(filename, 'w') as ofh:
         print >> ofh, info_page.render()
 
-def mkdir_p(path):
-    try:
-        os.makedirs(path)
-    except OSError, e:
-        if e.errno <> errno.EEXIST:
-            raise
-
-def die(message, exit=True):
-    print >> sys.stderr, message
-    if exit:
-        sys.exit(1)
-
 ################################################################################
 
 if len(sys.argv) < 10:
-    die("Usage")
+    gd_util.die('Usage')
 
 # parse command line
-input_snp_filename, input_type, min_reads, min_qual, min_spacing, output_filename, output_files_path = sys.argv[1:8]
-args = sys.argv[8:]
+input_snp_filename, input_type, min_reads, min_qual, min_spacing, output_filename, output_files_path, ind_arg = sys.argv[1:9]
+args = sys.argv[9:]
 
-individual_metadata = []
 population_files = []
-population_names = []
 all_individuals = False
 
 for arg in args:
     if arg == 'all_individuals':
         all_individuals = True
-    elif len(arg) > 11:
-        tag = arg[:11]
-        value = arg[11:]
-        if tag == 'individual:':
-            individual_metadata.append(value)
-        elif tag == 'population:':
-            filename, name = value.split(':', 1)
-            population_files.append(filename)
-            population_names.append(name)
+    elif len(arg) > 11 and arg[:11] == 'population:':
+        file, name = arg[11:].split(':', 1)
+        population_files.append((file, name))
 
 p_total = Population()
-p_total.from_tag_list(individual_metadata)
+p_total.from_wrapped_dict(ind_arg)
 
 individual_population = {}
-
 population_list = []
 
 if all_individuals:
@@ -91,57 +69,50 @@
     population_list.append(p1)
 else:
     p1 = Population()
-    for idx in range(len(population_files)):
-        population_file = population_files[idx]
-        population_name = population_names[idx]
-        this_pop = Population(population_name)
-        this_pop.from_population_file(population_file)
+    for file, name in population_files:
+        this_pop = Population(name)
+        this_pop.from_population_file(file)
         population_list.append(this_pop)
-        p1.from_population_file(population_file)
-        tags = p1.tag_list()
-        for tag in tags:
+
+        for tag in this_pop.tag_list():
             if tag not in individual_population:
-                individual_population[tag] = population_name
+                individual_population[tag] = name
+
+        # add individuals from this file to p1
+        p1.from_population_file(file)
+
 
 if not p_total.is_superset(p1):
-    print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
-    sys.exit(1)
+    gd_util.die('There is an individual in the population that is not in the SNP table')
 
-# run tool
+################################################################################
+
 prog = 'admix_prep'
 
-args = []
-args.append(prog)
+args = [ prog ]
 args.append(input_snp_filename)
 args.append(min_reads)
 args.append(min_qual)
 args.append(min_spacing)
 
-tags = p1.tag_list()
-for tag in tags:
+for tag in p1.tag_list():
     if input_type == 'gd_genotype':
-        column, name = tag.split(':')
+        column, name = tag.split(':', 1)
         tag = '{0}:{1}'.format(int(column) - 2, name)
     args.append(tag)
 
-#print "args:", ' '.join(args)
-p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=subprocess.PIPE, stderr=sys.stderr)
-(stdoutdata, stderrdata) = p.communicate()
-rc = p.returncode
+stdoutdata, stderrdata = gd_util.run_program(prog, args)
+using_info = stdoutdata.rstrip('\r\n')
 
-if rc != 0:
-    die('admix_prep failed: rc={0}'.format(rc))
+################################################################################
 
-using_info = stdoutdata.rstrip('\r\n')
-mkdir_p(output_files_path)
+gd_util.mkdir_p(output_files_path)
+
 output_ped_filename = os.path.join(output_files_path, 'admix.ped')
 output_map_filename = os.path.join(output_files_path, 'admix.map')
 shutil.copy2('admix.ped', output_ped_filename)
 shutil.copy2('admix.map', output_map_filename)
-do_import(output_filename, output_files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list)
 
-os.unlink('admix.ped')
-os.unlink('admix.map')
-
+do_import(output_filename, output_files_path, min_reads, min_qual, min_spacing, using_info, population_list)
 sys.exit(0)