view dpmix.py @ 32:03c22b722882

remove BeautifulSoup dependency
author Richard Burhans <burhans@bx.psu.edu>
date Fri, 20 Sep 2013 13:54:23 -0400
parents a631c2f6d913
children
line wrap: on
line source

#!/usr/bin/env python

import gd_util
import sys
import os
from Population import Population
import gd_composite
from dpmix_plot import make_dpmix_plot
from LocationFile import LocationFile

def load_and_check_pop(name, file, total_pop):
    p = Population(name=name)
    p.from_population_file(file)
    if not total_pop.is_superset(p):
        gd_util.die('There is an individual in {0} that is not in the SNP table'.format(name))
    return p

def append_pop_tags(the_list, p, input_type, number):
    for tag in p.tag_list():
        column, name = tag.split(':')
        if input_type == 'gd_genotype':
            column = int(column) - 2
        the_list.append('{0}:{1}:{2}'.format(column, number, name))

################################################################################

if len(sys.argv) != 22:
    print "usage"
    sys.exit(1)

input, input_type, data_source, switch_penalty, ap1_input, ap1_name, ap2_input, ap2_name, ap3_input, ap3_name, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file, ind_arg, het_arg, add_logs = sys.argv[1:]

if ap1_input == '/dev/null':
    use_reference = True
else:
    use_reference = False

if ap3_input == '/dev/null':
    populations = 2
else:
    populations = 3

chrom = 'all'

if het_arg == 'use_installed':
    loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file)
    location_file = LocationFile(loc_path)
    heterochrom_path = location_file.get_values_if_exists(dbkey)
    if heterochrom_path is None:
        heterochrom_path = '/dev/null'
elif het_arg == 'use_none':
    heterochrom_path = '/dev/null'
else:
    heterochrom_path = het_arg

population_list = []

p_total = Population()
p_total.from_wrapped_dict(ind_arg)

if not use_reference:
    ap1 = load_and_check_pop('Ancestral population 1', ap1_input, p_total)
    population_list.append(ap1)

ap2 = load_and_check_pop('Ancestral population 2', ap2_input, p_total)
population_list.append(ap2)

if populations == 3:
    ap3 = load_and_check_pop('Ancestral population 3', ap3_input, p_total)
    population_list.append(ap3)

p = load_and_check_pop('Potentially admixed', p_input, p_total)
population_list.append(p)

gd_util.mkdir_p(output2_dir)

################################################################################
# Create tabular file
################################################################################

misc_file = os.path.join(output2_dir, 'summary.txt')

prog = 'dpmix'

args = [ prog ]
args.append(input)
args.append(ref_column)
args.append(chrom)
args.append(data_source)
args.append(add_logs)
args.append(switch_penalty)
args.append(heterochrom_path)
args.append(misc_file)

if use_reference:
    args.append('0:1:reference')
else:
    append_pop_tags(args, ap1, input_type, 1)

append_pop_tags(args, ap2, input_type, 2)

if populations == 3:
    append_pop_tags(args, ap3, input_type, 3)

append_pop_tags(args, p, input_type, 0)

with open(output, 'w') as fh:
    gd_util.run_program(prog, args, stdout=fh)

################################################################################
# Create pdf file
################################################################################

if populations == 3:
    state2name = {
        0:'heterochromatin',
        1:ap1_name,
        2:ap2_name,
        3:ap3_name
    }
else:
    state2name = {
        0:'heterochromatin',
        1:ap1_name,
        2:ap2_name
    }

pdf_file = os.path.join(output2_dir, 'picture.pdf')
make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir, state2name=state2name, populations=populations)

################################################################################
# Create html
################################################################################

info_page = gd_composite.InfoPage()
info_page.set_title('dpmix Galaxy Composite Dataset')

display_file = gd_composite.DisplayFile()
display_value = gd_composite.DisplayValue()

out_pdf = gd_composite.Parameter(name='picture.pdf', value='picture.pdf', display_type=display_file)
out_misc = gd_composite.Parameter(name='summary.txt', value='summary.txt', display_type=display_file)

info_page.add_output_parameter(out_pdf)
info_page.add_output_parameter(out_misc)

if data_source == '0':
    data_source_value = 'sequence coverage'
elif data_source == '1':
    data_source_value = 'estimated genotype'

in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
in_switch_penalty = gd_composite.Parameter(description='Switch penalty', value=switch_penalty, display_type=display_value)

info_page.add_input_parameter(in_data_source)
info_page.add_input_parameter(in_switch_penalty)

misc_populations =  gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList())

info_page.add_misc(misc_populations)

with open(output2, 'w') as ofh:
    print >> ofh, info_page.render()

sys.exit(0)