diff dpmix.py @ 31:a631c2f6d913

Update to Miller Lab devshed revision 3c4110ffacc3
author Richard Burhans <burhans@bx.psu.edu>
date Fri, 20 Sep 2013 13:25:27 -0400
parents 8997f2ca8c7a
children
line wrap: on
line diff
--- a/dpmix.py	Fri Jul 26 12:51:13 2013 -0400
+++ b/dpmix.py	Fri Sep 20 13:25:27 2013 -0400
@@ -8,6 +8,20 @@
 from dpmix_plot import make_dpmix_plot
 from LocationFile import LocationFile
 
+def load_and_check_pop(name, file, total_pop):
+    p = Population(name=name)
+    p.from_population_file(file)
+    if not total_pop.is_superset(p):
+        gd_util.die('There is an individual in {0} that is not in the SNP table'.format(name))
+    return p
+
+def append_pop_tags(the_list, p, input_type, number):
+    for tag in p.tag_list():
+        column, name = tag.split(':')
+        if input_type == 'gd_genotype':
+            column = int(column) - 2
+        the_list.append('{0}:{1}:{2}'.format(column, number, name))
+
 ################################################################################
 
 if len(sys.argv) != 22:
@@ -16,6 +30,11 @@
 
 input, input_type, data_source, switch_penalty, ap1_input, ap1_name, ap2_input, ap2_name, ap3_input, ap3_name, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file, ind_arg, het_arg, add_logs = sys.argv[1:]
 
+if ap1_input == '/dev/null':
+    use_reference = True
+else:
+    use_reference = False
+
 if ap3_input == '/dev/null':
     populations = 2
 else:
@@ -39,30 +58,19 @@
 p_total = Population()
 p_total.from_wrapped_dict(ind_arg)
 
-ap1 = Population(name='Ancestral population 1')
-ap1.from_population_file(ap1_input)
-population_list.append(ap1)
-if not p_total.is_superset(ap1):
-    gd_util.die('There is an individual in ancestral population 1 that is not in the SNP table')
+if not use_reference:
+    ap1 = load_and_check_pop('Ancestral population 1', ap1_input, p_total)
+    population_list.append(ap1)
 
-ap2 = Population(name='Ancestral population 2')
-ap2.from_population_file(ap2_input)
+ap2 = load_and_check_pop('Ancestral population 2', ap2_input, p_total)
 population_list.append(ap2)
-if not p_total.is_superset(ap2):
-    gd_util.die('There is an individual in ancestral population 2 that is not in the SNP table')
 
 if populations == 3:
-    ap3 = Population(name='Ancestral population 3')
-    ap3.from_population_file(ap3_input)
+    ap3 = load_and_check_pop('Ancestral population 3', ap3_input, p_total)
     population_list.append(ap3)
-    if not p_total.is_superset(ap3):
-        gd_util.die('There is an individual in ancestral population 3 that is not in the SNP table')
 
-p = Population(name='Potentially admixed')
-p.from_population_file(p_input)
+p = load_and_check_pop('Potentially admixed', p_input, p_total)
 population_list.append(p)
-if not p_total.is_superset(p):
-    gd_util.die('There is an individual in the population that is not in the SNP table')
 
 gd_util.mkdir_p(output2_dir)
 
@@ -84,42 +92,17 @@
 args.append(heterochrom_path)
 args.append(misc_file)
 
-columns = ap1.column_list()
-for column in columns:
-    col = int(column)
-    name = ap1.individual_with_column(column).name
-    first_token = name.split()[0]
-    if input_type == 'gd_genotype':
-        col -= 2
-    args.append('{0}:1:{1}'.format(col, first_token))
+if use_reference:
+    args.append('0:1:reference')
+else:
+    append_pop_tags(args, ap1, input_type, 1)
 
-columns = ap2.column_list()
-for column in columns:
-    col = int(column)
-    name = ap2.individual_with_column(column).name
-    first_token = name.split()[0]
-    if input_type == 'gd_genotype':
-        col -= 2
-    args.append('{0}:2:{1}'.format(col, first_token))
+append_pop_tags(args, ap2, input_type, 2)
 
 if populations == 3:
-    columns = ap3.column_list()
-    for column in columns:
-        col = int(column)
-        name = ap3.individual_with_column(column).name
-        first_token = name.split()[0]
-        if input_type == 'gd_genotype':
-            col -= 2
-        args.append('{0}:3:{1}'.format(col, first_token))
+    append_pop_tags(args, ap3, input_type, 3)
 
-columns = p.column_list()
-for column in columns:
-    col = int(column)
-    name = p.individual_with_column(column).name
-    first_token = name.split()[0]
-    if input_type == 'gd_genotype':
-        col -= 2
-    args.append('{0}:0:{1}'.format(col, first_token))
+append_pop_tags(args, p, input_type, 0)
 
 with open(output, 'w') as fh:
     gd_util.run_program(prog, args, stdout=fh)