Mercurial > repos > miller-lab > genome_diversity
comparison coverage_distributions.py @ 27:8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
| author | Richard Burhans <burhans@bx.psu.edu> |
|---|---|
| date | Mon, 15 Jul 2013 10:47:35 -0400 |
| parents | 2c498d40ecde |
| children |
comparison
equal
deleted
inserted
replaced
| 26:91e835060ad2 | 27:8997f2ca8c7a |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 | 2 |
| 3 import gd_util | |
| 3 import os | 4 import os |
| 4 import errno | |
| 5 import sys | 5 import sys |
| 6 import shutil | |
| 7 import subprocess | |
| 8 from Population import Population | 6 from Population import Population |
| 9 import gd_composite | 7 import gd_composite |
| 10 | 8 |
| 11 ################################################################################ | 9 ################################################################################ |
| 12 | 10 |
| 13 def mkdir_p(path): | 11 if len(sys.argv) < 7: |
| 14 try: | 12 gd_util.die('Usage') |
| 15 os.makedirs(path) | |
| 16 except OSError, e: | |
| 17 if e.errno <> errno.EEXIST: | |
| 18 raise | |
| 19 | 13 |
| 20 ################################################################################ | 14 input, data_source, output, extra_files_path, ind_arg = sys.argv[1:6] |
| 21 | 15 |
| 22 if len(sys.argv) < 7: | |
| 23 print >> sys.stderr, "Usage" | |
| 24 sys.exit(1) | |
| 25 | |
| 26 input, data_source, output, extra_files_path = sys.argv[1:5] | |
| 27 | |
| 28 individual_metadata = [] | |
| 29 population_info = [] | 16 population_info = [] |
| 30 p1_input = None | 17 p1_input = None |
| 31 all_individuals = False | 18 all_individuals = False |
| 32 | 19 |
| 33 for arg in sys.argv[5:]: | 20 for arg in sys.argv[6:]: |
| 34 if arg == 'all_individuals': | 21 if arg == 'all_individuals': |
| 35 all_individuals = True | 22 all_individuals = True |
| 36 elif len(arg) > 12 and arg[:12] == 'individuals:': | 23 elif len(arg) > 12 and arg[:12] == 'individuals:': |
| 37 p1_input = arg[12:] | 24 p1_input = arg[12:] |
| 38 elif len(arg) > 11: | 25 elif len(arg) > 11 and arg[:11] == 'population:': |
| 39 if arg[:11] == 'population:': | 26 file, name = arg[11:].split(':', 1) |
| 40 file, name = arg[11:].split(':', 1) | 27 population_info.append((file, name)) |
| 41 population_info.append((file, name)) | |
| 42 elif arg[:11] == 'individual:': | |
| 43 individual_metadata.append(arg[11:]) | |
| 44 | 28 |
| 45 p_total = Population() | 29 p_total = Population() |
| 46 p_total.from_tag_list(individual_metadata) | 30 p_total.from_wrapped_dict(ind_arg) |
| 47 | 31 |
| 48 ################################################################################ | 32 ################################################################################ |
| 49 | 33 |
| 50 mkdir_p(extra_files_path) | 34 gd_util.mkdir_p(extra_files_path) |
| 51 | 35 |
| 52 ################################################################################ | 36 ################################################################################ |
| 53 | 37 |
| 54 prog = 'coverage' | 38 prog = 'coverage' |
| 55 | 39 |
| 56 args = [] | 40 args = [ prog ] |
| 57 args.append(prog) | |
| 58 args.append(input) | 41 args.append(input) |
| 59 args.append(data_source) | 42 args.append(data_source) |
| 60 | 43 |
| 61 user_coverage_file = os.path.join(extra_files_path, 'coverage.txt') | 44 user_coverage_file = os.path.join(extra_files_path, 'coverage.txt') |
| 62 args.append(user_coverage_file) | 45 args.append(user_coverage_file) |
| 70 this_pop = Population() | 53 this_pop = Population() |
| 71 this_pop.from_population_file(p1_input) | 54 this_pop.from_population_file(p1_input) |
| 72 population_list.append(this_pop) | 55 population_list.append(this_pop) |
| 73 p1.from_population_file(p1_input) | 56 p1.from_population_file(p1_input) |
| 74 if not p_total.is_superset(p1): | 57 if not p_total.is_superset(p1): |
| 75 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' | 58 gd_util.die('There is an individual in the population that is not in the SNP table') |
| 76 sys.exit(1) | |
| 77 tags = p1.tag_list() | 59 tags = p1.tag_list() |
| 78 else: | 60 else: |
| 79 tags = [] | 61 tags = [] |
| 80 for population_file, population_name in population_info: | 62 for population_file, population_name in population_info: |
| 81 population = Population() | 63 population = Population() |
| 82 this_pop = Population() | 64 this_pop = Population() |
| 83 this_pop.from_population_file(population_file) | 65 this_pop.from_population_file(population_file) |
| 84 population_list.append(this_pop) | 66 population_list.append(this_pop) |
| 85 population.from_population_file(population_file) | 67 population.from_population_file(population_file) |
| 86 if not p_total.is_superset(population): | 68 if not p_total.is_superset(population): |
| 87 print >> sys.stderr, 'There is an individual in the {} population that is not in the SNP table'.format(population_name) | 69 gd_util.die('There is an individual in the {} population that is not in the SNP table'.format(population_name)) |
| 88 sys.exit(1) | |
| 89 columns = population.column_list() | 70 columns = population.column_list() |
| 90 for column in columns: | 71 for column in columns: |
| 91 tags.append('{0}:{1}'.format(column, population_name)) | 72 tags.append('{0}:{1}'.format(column, population_name)) |
| 92 | 73 |
| 93 for tag in tags: | 74 for tag in tags: |
| 94 args.append(tag) | 75 args.append(tag) |
| 95 | 76 |
| 96 ## text output | 77 ## text output |
| 97 coverage_file = 'coverage.txt' | 78 coverage_file = 'coverage.txt' |
| 98 fh = open(coverage_file, 'w') | 79 with open(coverage_file, 'w') as fh: |
| 99 #print "args:", ' '.join(args) | 80 gd_util.run_program(prog, args, stdout=fh) |
| 100 p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr) | |
| 101 rc = p.wait() | |
| 102 fh.close() | |
| 103 | 81 |
| 104 ## graphical output | 82 ## graphical output |
| 105 fh = open(coverage_file) | |
| 106 coverage2_file = 'coverage2.txt' | 83 coverage2_file = 'coverage2.txt' |
| 107 ofh = open(coverage2_file, 'w') | 84 with open(coverage_file) as fh, open(coverage2_file, 'w') as ofh: |
| 108 | 85 for line in fh: |
| 109 for line in fh: | 86 line = line.rstrip('\r\n') |
| 110 line = line.rstrip('\r\n') | 87 elems = line.split('\t') |
| 111 elems = line.split('\t') | 88 name = elems.pop(0) |
| 112 name = elems.pop(0) | 89 values = [ elems[0] ] |
| 113 values = [ elems[0] ] | 90 for idx in range(1, len(elems)): |
| 114 for idx in range(1, len(elems)): | 91 val = str(float(elems[idx]) - float(elems[idx-1])) |
| 115 val = str(float(elems[idx]) - float(elems[idx-1])) | 92 values.append(val) |
| 116 values.append(val) | 93 print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values)) |
| 117 print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values)) | |
| 118 | |
| 119 fh.close() | |
| 120 ofh.close() | |
| 121 | 94 |
| 122 ################################################################################ | 95 ################################################################################ |
| 123 | 96 |
| 124 prog = 'R' | 97 prog = 'Rscript' |
| 125 | 98 |
| 126 args = [] | 99 args = [ prog ] |
| 127 args.append(prog) | |
| 128 args.append('--vanilla') | |
| 129 args.append('--quiet') | |
| 130 | 100 |
| 131 _realpath = os.path.realpath(__file__) | 101 _realpath = os.path.realpath(__file__) |
| 132 _script_dir = os.path.dirname(_realpath) | 102 _script_dir = os.path.dirname(_realpath) |
| 133 r_script_file = os.path.join(_script_dir, 'coverage_plot.r') | 103 r_script_file = os.path.join(_script_dir, 'coverage_plot.r') |
| 134 | 104 args.append(r_script_file) |
| 135 ifh = open(r_script_file) | |
| 136 ofh = open('/dev/null', 'w') | |
| 137 #print "args:", ' '.join(args) | |
| 138 p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None) | |
| 139 rc = p.wait() | |
| 140 ifh.close() | |
| 141 ofh.close() | |
| 142 | 105 |
| 143 pdf_file = os.path.join(extra_files_path, 'coverage.pdf') | 106 pdf_file = os.path.join(extra_files_path, 'coverage.pdf') |
| 144 shutil.copy2('coverage.pdf', pdf_file) | 107 args.append(pdf_file) |
| 145 os.remove('coverage.pdf') | 108 |
| 146 os.remove(coverage2_file) | 109 gd_util.run_program(prog, args) |
| 147 | 110 |
| 148 ################################################################################ | 111 ################################################################################ |
| 149 | 112 |
| 150 info_page = gd_composite.InfoPage() | 113 info_page = gd_composite.InfoPage() |
| 151 info_page.set_title('Coverage distributions Galaxy Composite Dataset') | 114 info_page.set_title('Coverage distributions Galaxy Composite Dataset') |
| 156 out_pdf = gd_composite.Parameter(name='coverage.pdf', value='coverage.pdf', display_type=display_file) | 119 out_pdf = gd_composite.Parameter(name='coverage.pdf', value='coverage.pdf', display_type=display_file) |
| 157 out_txt = gd_composite.Parameter(name='coverage.txt', value='coverage.txt', display_type=display_file) | 120 out_txt = gd_composite.Parameter(name='coverage.txt', value='coverage.txt', display_type=display_file) |
| 158 | 121 |
| 159 info_page.add_output_parameter(out_pdf) | 122 info_page.add_output_parameter(out_pdf) |
| 160 info_page.add_output_parameter(out_txt) | 123 info_page.add_output_parameter(out_txt) |
| 161 | |
| 162 | 124 |
| 163 if data_source == '0': | 125 if data_source == '0': |
| 164 data_source_value = 'sequence coverage' | 126 data_source_value = 'sequence coverage' |
| 165 elif data_source == '1': | 127 elif data_source == '1': |
| 166 data_source_value = 'estimated genotype' | 128 data_source_value = 'estimated genotype' |
| 174 info_page.add_misc(misc_populations) | 136 info_page.add_misc(misc_populations) |
| 175 else: | 137 else: |
| 176 misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList()) | 138 misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList()) |
| 177 info_page.add_misc(misc_individuals) | 139 info_page.add_misc(misc_individuals) |
| 178 | 140 |
| 179 | |
| 180 | |
| 181 | |
| 182 with open (output, 'w') as ofh: | 141 with open (output, 'w') as ofh: |
| 183 print >> ofh, info_page.render() | 142 print >> ofh, info_page.render() |
| 184 | 143 |
| 185 | |
| 186 sys.exit(0) | 144 sys.exit(0) |
| 187 | 145 |
