comparison dpmix.py @ 31:a631c2f6d913

Update to Miller Lab devshed revision 3c4110ffacc3
author Richard Burhans <burhans@bx.psu.edu>
date Fri, 20 Sep 2013 13:25:27 -0400
parents 8997f2ca8c7a
children
comparison
equal deleted inserted replaced
30:4188853b940b 31:a631c2f6d913
6 from Population import Population 6 from Population import Population
7 import gd_composite 7 import gd_composite
8 from dpmix_plot import make_dpmix_plot 8 from dpmix_plot import make_dpmix_plot
9 from LocationFile import LocationFile 9 from LocationFile import LocationFile
10 10
11 def load_and_check_pop(name, file, total_pop):
12 p = Population(name=name)
13 p.from_population_file(file)
14 if not total_pop.is_superset(p):
15 gd_util.die('There is an individual in {0} that is not in the SNP table'.format(name))
16 return p
17
18 def append_pop_tags(the_list, p, input_type, number):
19 for tag in p.tag_list():
20 column, name = tag.split(':')
21 if input_type == 'gd_genotype':
22 column = int(column) - 2
23 the_list.append('{0}:{1}:{2}'.format(column, number, name))
24
11 ################################################################################ 25 ################################################################################
12 26
13 if len(sys.argv) != 22: 27 if len(sys.argv) != 22:
14 print "usage" 28 print "usage"
15 sys.exit(1) 29 sys.exit(1)
16 30
17 input, input_type, data_source, switch_penalty, ap1_input, ap1_name, ap2_input, ap2_name, ap3_input, ap3_name, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file, ind_arg, het_arg, add_logs = sys.argv[1:] 31 input, input_type, data_source, switch_penalty, ap1_input, ap1_name, ap2_input, ap2_name, ap3_input, ap3_name, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file, ind_arg, het_arg, add_logs = sys.argv[1:]
32
33 if ap1_input == '/dev/null':
34 use_reference = True
35 else:
36 use_reference = False
18 37
19 if ap3_input == '/dev/null': 38 if ap3_input == '/dev/null':
20 populations = 2 39 populations = 2
21 else: 40 else:
22 populations = 3 41 populations = 3
37 population_list = [] 56 population_list = []
38 57
39 p_total = Population() 58 p_total = Population()
40 p_total.from_wrapped_dict(ind_arg) 59 p_total.from_wrapped_dict(ind_arg)
41 60
42 ap1 = Population(name='Ancestral population 1') 61 if not use_reference:
43 ap1.from_population_file(ap1_input) 62 ap1 = load_and_check_pop('Ancestral population 1', ap1_input, p_total)
44 population_list.append(ap1) 63 population_list.append(ap1)
45 if not p_total.is_superset(ap1):
46 gd_util.die('There is an individual in ancestral population 1 that is not in the SNP table')
47 64
48 ap2 = Population(name='Ancestral population 2') 65 ap2 = load_and_check_pop('Ancestral population 2', ap2_input, p_total)
49 ap2.from_population_file(ap2_input)
50 population_list.append(ap2) 66 population_list.append(ap2)
51 if not p_total.is_superset(ap2):
52 gd_util.die('There is an individual in ancestral population 2 that is not in the SNP table')
53 67
54 if populations == 3: 68 if populations == 3:
55 ap3 = Population(name='Ancestral population 3') 69 ap3 = load_and_check_pop('Ancestral population 3', ap3_input, p_total)
56 ap3.from_population_file(ap3_input)
57 population_list.append(ap3) 70 population_list.append(ap3)
58 if not p_total.is_superset(ap3):
59 gd_util.die('There is an individual in ancestral population 3 that is not in the SNP table')
60 71
61 p = Population(name='Potentially admixed') 72 p = load_and_check_pop('Potentially admixed', p_input, p_total)
62 p.from_population_file(p_input)
63 population_list.append(p) 73 population_list.append(p)
64 if not p_total.is_superset(p):
65 gd_util.die('There is an individual in the population that is not in the SNP table')
66 74
67 gd_util.mkdir_p(output2_dir) 75 gd_util.mkdir_p(output2_dir)
68 76
69 ################################################################################ 77 ################################################################################
70 # Create tabular file 78 # Create tabular file
82 args.append(add_logs) 90 args.append(add_logs)
83 args.append(switch_penalty) 91 args.append(switch_penalty)
84 args.append(heterochrom_path) 92 args.append(heterochrom_path)
85 args.append(misc_file) 93 args.append(misc_file)
86 94
87 columns = ap1.column_list() 95 if use_reference:
88 for column in columns: 96 args.append('0:1:reference')
89 col = int(column) 97 else:
90 name = ap1.individual_with_column(column).name 98 append_pop_tags(args, ap1, input_type, 1)
91 first_token = name.split()[0]
92 if input_type == 'gd_genotype':
93 col -= 2
94 args.append('{0}:1:{1}'.format(col, first_token))
95 99
96 columns = ap2.column_list() 100 append_pop_tags(args, ap2, input_type, 2)
97 for column in columns:
98 col = int(column)
99 name = ap2.individual_with_column(column).name
100 first_token = name.split()[0]
101 if input_type == 'gd_genotype':
102 col -= 2
103 args.append('{0}:2:{1}'.format(col, first_token))
104 101
105 if populations == 3: 102 if populations == 3:
106 columns = ap3.column_list() 103 append_pop_tags(args, ap3, input_type, 3)
107 for column in columns:
108 col = int(column)
109 name = ap3.individual_with_column(column).name
110 first_token = name.split()[0]
111 if input_type == 'gd_genotype':
112 col -= 2
113 args.append('{0}:3:{1}'.format(col, first_token))
114 104
115 columns = p.column_list() 105 append_pop_tags(args, p, input_type, 0)
116 for column in columns:
117 col = int(column)
118 name = p.individual_with_column(column).name
119 first_token = name.split()[0]
120 if input_type == 'gd_genotype':
121 col -= 2
122 args.append('{0}:0:{1}'.format(col, first_token))
123 106
124 with open(output, 'w') as fh: 107 with open(output, 'w') as fh:
125 gd_util.run_program(prog, args, stdout=fh) 108 gd_util.run_program(prog, args, stdout=fh)
126 109
127 ################################################################################ 110 ################################################################################