diff dpmix.py @ 27:8997f2ca8c7a

Update to Miller Lab devshed revision bae0d3306d3b
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 15 Jul 2013 10:47:35 -0400
parents 248b06e86022
children a631c2f6d913
line wrap: on
line diff
--- a/dpmix.py	Mon Jun 03 12:29:29 2013 -0400
+++ b/dpmix.py	Mon Jul 15 10:47:35 2013 -0400
@@ -1,9 +1,8 @@
 #!/usr/bin/env python
 
-import errno
+import gd_util
 import sys
 import os
-import subprocess
 from Population import Population
 import gd_composite
 from dpmix_plot import make_dpmix_plot
@@ -11,87 +10,70 @@
 
 ################################################################################
 
-def mkdir_p(path):
-    try:
-        os.makedirs(path)
-    except OSError, e:
-        if e.errno <> errno.EEXIST:
-            raise
-
-def run_program(prog, args, stdout_file=None, space_to_tab=False):
-    #print "args: ", ' '.join(args)
-    p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    (stdoutdata, stderrdata) = p.communicate()
-    rc = p.returncode
-
-    if stdout_file is not None:
-        with open(stdout_file, 'w') as ofh:
-            lines = stdoutdata.split('\n')
-            for line in lines:
-                line = line.strip()
-                if line:
-                    if space_to_tab:
-                        line = line.replace(' ', '\t')
-                    print >> ofh, line
-
-    if rc != 0:
-        print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
-        print >> sys.stderr, stderrdata
-        sys.exit(1)
-
-################################################################################
-
-if len(sys.argv) < 16:
+if len(sys.argv) != 22:
     print "usage"
     sys.exit(1)
 
-input, input_type, data_source, switch_penalty, ap1_input, ap2_input, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file = sys.argv[1:15]
-individual_metadata = sys.argv[15:]
+input, input_type, data_source, switch_penalty, ap1_input, ap1_name, ap2_input, ap2_name, ap3_input, ap3_name, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file, ind_arg, het_arg, add_logs = sys.argv[1:]
+
+if ap3_input == '/dev/null':
+    populations = 2
+else:
+    populations = 3
 
 chrom = 'all'
-add_logs = '0'
 
-loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file)
-location_file = LocationFile(loc_path)
-heterochrom_path = location_file.get_values_if_exists(dbkey)
-if heterochrom_path is None:
+if het_arg == 'use_installed':
+    loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file)
+    location_file = LocationFile(loc_path)
+    heterochrom_path = location_file.get_values_if_exists(dbkey)
+    if heterochrom_path is None:
+        heterochrom_path = '/dev/null'
+elif het_arg == 'use_none':
     heterochrom_path = '/dev/null'
+else:
+    heterochrom_path = het_arg
 
 population_list = []
 
 p_total = Population()
-p_total.from_tag_list(individual_metadata)
+p_total.from_wrapped_dict(ind_arg)
 
 ap1 = Population(name='Ancestral population 1')
 ap1.from_population_file(ap1_input)
 population_list.append(ap1)
 if not p_total.is_superset(ap1):
-    print >> sys.stderr, 'There is an individual in ancestral population 1 that is not in the SNP table'
-    sys.exit(1)
+    gd_util.die('There is an individual in ancestral population 1 that is not in the SNP table')
 
 ap2 = Population(name='Ancestral population 2')
 ap2.from_population_file(ap2_input)
 population_list.append(ap2)
 if not p_total.is_superset(ap2):
-    print >> sys.stderr, 'There is an individual in ancestral population 2 that is not in the SNP table'
-    sys.exit(1)
+    gd_util.die('There is an individual in ancestral population 2 that is not in the SNP table')
+
+if populations == 3:
+    ap3 = Population(name='Ancestral population 3')
+    ap3.from_population_file(ap3_input)
+    population_list.append(ap3)
+    if not p_total.is_superset(ap3):
+        gd_util.die('There is an individual in ancestral population 3 that is not in the SNP table')
 
 p = Population(name='Potentially admixed')
 p.from_population_file(p_input)
 population_list.append(p)
 if not p_total.is_superset(p):
-    print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
-    sys.exit(1)
+    gd_util.die('There is an individual in the population that is not in the SNP table')
 
-mkdir_p(output2_dir)
+gd_util.mkdir_p(output2_dir)
 
 ################################################################################
 # Create tabular file
 ################################################################################
 
-misc_file = os.path.join(output2_dir, 'misc.txt')
+misc_file = os.path.join(output2_dir, 'summary.txt')
 
 prog = 'dpmix'
+
 args = [ prog ]
 args.append(input)
 args.append(ref_column)
@@ -104,33 +86,64 @@
 
 columns = ap1.column_list()
 for column in columns:
+    col = int(column)
+    name = ap1.individual_with_column(column).name
+    first_token = name.split()[0]
     if input_type == 'gd_genotype':
-        args.append('{0}:1:{1}'.format(int(column) - 2, ap1.individual_with_column(column).name))
-    else:
-        args.append('{0}:1:{1}'.format(column, ap1.individual_with_column(column).name))
+        col -= 2
+    args.append('{0}:1:{1}'.format(col, first_token))
 
 columns = ap2.column_list()
 for column in columns:
+    col = int(column)
+    name = ap2.individual_with_column(column).name
+    first_token = name.split()[0]
     if input_type == 'gd_genotype':
-        args.append('{0}:2:{1}'.format(int(column) - 2, ap2.individual_with_column(column).name))
-    else:
-        args.append('{0}:2:{1}'.format(column, ap2.individual_with_column(column).name))
+        col -= 2
+    args.append('{0}:2:{1}'.format(col, first_token))
+
+if populations == 3:
+    columns = ap3.column_list()
+    for column in columns:
+        col = int(column)
+        name = ap3.individual_with_column(column).name
+        first_token = name.split()[0]
+        if input_type == 'gd_genotype':
+            col -= 2
+        args.append('{0}:3:{1}'.format(col, first_token))
 
 columns = p.column_list()
 for column in columns:
+    col = int(column)
+    name = p.individual_with_column(column).name
+    first_token = name.split()[0]
     if input_type == 'gd_genotype':
-        args.append('{0}:0:{1}'.format(int(column) - 2, p.individual_with_column(column).name))
-    else:
-        args.append('{0}:0:{1}'.format(column, p.individual_with_column(column).name))
+        col -= 2
+    args.append('{0}:0:{1}'.format(col, first_token))
 
-run_program(None, args, stdout_file=output, space_to_tab=True)
+with open(output, 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
 
 ################################################################################
 # Create pdf file
 ################################################################################
 
-pdf_file = os.path.join(output2_dir, 'dpmix.pdf')
-make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir)
+if populations == 3:
+    state2name = {
+        0:'heterochromatin',
+        1:ap1_name,
+        2:ap2_name,
+        3:ap3_name
+    }
+else:
+    state2name = {
+        0:'heterochromatin',
+        1:ap1_name,
+        2:ap2_name
+    }
+
+pdf_file = os.path.join(output2_dir, 'picture.pdf')
+make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir, state2name=state2name, populations=populations)
 
 ################################################################################
 # Create html
@@ -142,8 +155,8 @@
 display_file = gd_composite.DisplayFile()
 display_value = gd_composite.DisplayValue()
 
-out_pdf = gd_composite.Parameter(name='dpmix.pdf', value='dpmix.pdf', display_type=display_file)
-out_misc = gd_composite.Parameter(name='misc.txt', value='misc.txt', display_type=display_file)
+out_pdf = gd_composite.Parameter(name='picture.pdf', value='picture.pdf', display_type=display_file)
+out_misc = gd_composite.Parameter(name='summary.txt', value='summary.txt', display_type=display_file)
 
 info_page.add_output_parameter(out_pdf)
 info_page.add_output_parameter(out_misc)
@@ -168,4 +181,3 @@
 
 sys.exit(0)
 
-