Mercurial > repos > miller-lab > genome_diversity
diff filter_gd_snp.py @ 27:8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Mon, 15 Jul 2013 10:47:35 -0400 |
parents | 95a05c1ef5d5 |
children |
line wrap: on
line diff
--- a/filter_gd_snp.py Mon Jun 03 12:29:29 2013 -0400 +++ b/filter_gd_snp.py Mon Jul 15 10:47:35 2013 -0400 @@ -1,25 +1,11 @@ #!/usr/bin/env python +import gd_util import sys -import subprocess from Population import Population ################################################################################ -def convert_non_negative_int(string_value): - try: - val = int(string_value) - except: - print >> sys.stderr, '"%s" is not an integer' % string_value - sys.exit(1) - - if val < 0: - print >> sys.stderr, '"%d" is negative' % val - sys.exit(1) - - return val - - def convert_percent(string_value): if string_value.endswith('%'): val = convert_non_negative_int(string_value[:-1]) @@ -32,51 +18,66 @@ return str(val) +def convert_non_negative_int(string_value): + try: + val = int(string_value) + except: + print >> sys.stderr, '"%s" is not an integer' % string_value + sys.exit(1) + + if val < 0: + print >> sys.stderr, '"%d" is negative' % val + sys.exit(1) + + return val + ################################################################################ -if len(sys.argv) < 9: - print >> sys.stderr, "Usage" - sys.exit(1) +if len(sys.argv) != 13: + gd_util.die('Usage') -input, p1_input, output, lo, hi, lo_ind, lo_ind_qual = sys.argv[1:8] -individual_metadata = sys.argv[8:] +input, output, ref_chrom_col, min_spacing, lo_genotypes, p1_input, input_type, lo_coverage, hi_coverage, low_ind_cov, low_quality, ind_arg = sys.argv[1:] p_total = Population() -p_total.from_tag_list(individual_metadata) +p_total.from_wrapped_dict(ind_arg) p1 = Population() p1.from_population_file(p1_input) if not p_total.is_superset(p1): - print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' - sys.exit(1) + gd_util.die('There is an individual in the population that is not in the SNP table') + +lo_coverage = convert_percent(lo_coverage) +hi_coverage = convert_percent(hi_coverage) -lo = convert_percent(lo) -hi = convert_percent(hi) +if input_type == 'gd_snp': + type_arg = 1 +elif input_type == 'gd_genotype': + type_arg = 0 +else: + gd_util.die('unknown input_type: {0}'.format(input_type)) ################################################################################ prog = 'filter_snps' -args = [] -args.append(prog) -args.append(input) -args.append(lo) -args.append(hi) -args.append(lo_ind) -args.append(lo_ind_qual) +args = [ prog ] +args.append(input) # file containing a Galaxy table +args.append(type_arg) # 1 for a gd_snp file, 0 for gd_genotype +args.append(lo_coverage) # lower bound on total coverage (< 0 means interpret as percentage) +args.append(hi_coverage) # upper bound on total coveraae (< 0 means interpret as percentage) +args.append(low_ind_cov) # lower bound on individual coverage +args.append(low_quality) # lower bound on individual quality value +args.append(lo_genotypes) # lower bound on the number of defined genotypes +args.append(min_spacing) # lower bound on the spacing between SNPs +args.append(ref_chrom_col) # reference-chromosome column (base-1); ref position in next column columns = p1.column_list() - for column in sorted(columns): - args.append(column) + args.append(column) # the starting columns (base-1) for the chosen individuals -fh = open(output, 'w') - -#print "args:", ' '.join(args) -p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr) -rc = p.wait() -fh.close() +with open(output, 'w') as fh: + gd_util.run_program(prog, args, stdout=fh) sys.exit(0)