diff filter_gd_snp.py @ 27:8997f2ca8c7a

Update to Miller Lab devshed revision bae0d3306d3b
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 15 Jul 2013 10:47:35 -0400
parents 95a05c1ef5d5
children
line wrap: on
line diff
--- a/filter_gd_snp.py	Mon Jun 03 12:29:29 2013 -0400
+++ b/filter_gd_snp.py	Mon Jul 15 10:47:35 2013 -0400
@@ -1,25 +1,11 @@
 #!/usr/bin/env python
 
+import gd_util
 import sys
-import subprocess
 from Population import Population
 
 ################################################################################
 
-def convert_non_negative_int(string_value):
-    try:
-        val = int(string_value)
-    except:
-        print >> sys.stderr, '"%s" is not an integer' % string_value
-        sys.exit(1)
-
-    if val < 0:
-        print >> sys.stderr, '"%d" is negative' % val
-        sys.exit(1)
-
-    return val
-    
-
 def convert_percent(string_value):
     if string_value.endswith('%'):
         val = convert_non_negative_int(string_value[:-1])
@@ -32,51 +18,66 @@
 
     return str(val)
 
+def convert_non_negative_int(string_value):
+    try:
+        val = int(string_value)
+    except:
+        print >> sys.stderr, '"%s" is not an integer' % string_value
+        sys.exit(1)
+
+    if val < 0:
+        print >> sys.stderr, '"%d" is negative' % val
+        sys.exit(1)
+
+    return val
+
 ################################################################################
 
-if len(sys.argv) < 9:
-    print >> sys.stderr, "Usage"
-    sys.exit(1)
+if len(sys.argv) != 13:
+    gd_util.die('Usage')
 
-input, p1_input, output, lo, hi, lo_ind, lo_ind_qual = sys.argv[1:8]
-individual_metadata = sys.argv[8:]
+input, output, ref_chrom_col, min_spacing, lo_genotypes, p1_input, input_type, lo_coverage, hi_coverage, low_ind_cov, low_quality, ind_arg = sys.argv[1:]
 
 p_total = Population()
-p_total.from_tag_list(individual_metadata)
+p_total.from_wrapped_dict(ind_arg)
 
 p1 = Population()
 p1.from_population_file(p1_input)
 
 if not p_total.is_superset(p1):
-    print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
-    sys.exit(1)
+    gd_util.die('There is an individual in the population that is not in the SNP table')
+
+lo_coverage = convert_percent(lo_coverage)
+hi_coverage = convert_percent(hi_coverage)
 
-lo = convert_percent(lo)
-hi = convert_percent(hi)
+if input_type == 'gd_snp':
+    type_arg = 1
+elif input_type == 'gd_genotype':
+    type_arg = 0
+else:
+    gd_util.die('unknown input_type: {0}'.format(input_type))
 
 ################################################################################
 
 prog = 'filter_snps'
 
-args = []
-args.append(prog)
-args.append(input)
-args.append(lo)
-args.append(hi)
-args.append(lo_ind)
-args.append(lo_ind_qual)
+args = [ prog ]
+args.append(input)          # file containing a Galaxy table
+args.append(type_arg)       # 1 for a gd_snp file, 0 for gd_genotype
+args.append(lo_coverage)    # lower bound on total coverage (< 0 means interpret as percentage)
+args.append(hi_coverage)    # upper bound on total coveraae (< 0 means interpret as percentage)
+args.append(low_ind_cov)    # lower bound on individual coverage
+args.append(low_quality)    # lower bound on individual quality value
+args.append(lo_genotypes)   # lower bound on the number of defined genotypes
+args.append(min_spacing)    # lower bound on the spacing between SNPs
+args.append(ref_chrom_col)  # reference-chromosome column (base-1); ref position in next column
 
 columns = p1.column_list()
-
 for column in sorted(columns):
-    args.append(column)
+    args.append(column)     # the starting columns (base-1) for the chosen individuals
 
-fh = open(output, 'w')
-
-#print "args:", ' '.join(args)
-p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr)
-rc = p.wait()
-fh.close()
+with open(output, 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
 
 sys.exit(0)