diff coverage_distributions.py @ 27:8997f2ca8c7a

Update to Miller Lab devshed revision bae0d3306d3b
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 15 Jul 2013 10:47:35 -0400
parents 2c498d40ecde
children
line wrap: on
line diff
--- a/coverage_distributions.py	Mon Jun 03 12:29:29 2013 -0400
+++ b/coverage_distributions.py	Mon Jul 15 10:47:35 2013 -0400
@@ -1,60 +1,43 @@
 #!/usr/bin/env python
 
+import gd_util
 import os
-import errno
 import sys
-import shutil
-import subprocess
 from Population import Population
 import gd_composite
 
 ################################################################################
 
-def mkdir_p(path):
-    try:
-        os.makedirs(path)
-    except OSError, e:
-        if e.errno <> errno.EEXIST:
-            raise
+if len(sys.argv) < 7:
+    gd_util.die('Usage')
 
-################################################################################
+input, data_source, output, extra_files_path, ind_arg = sys.argv[1:6]
 
-if len(sys.argv) < 7:
-    print >> sys.stderr, "Usage"
-    sys.exit(1)
-
-input, data_source, output, extra_files_path = sys.argv[1:5]
-
-individual_metadata = []
 population_info = []
 p1_input = None
 all_individuals = False
 
-for arg in sys.argv[5:]:
+for arg in sys.argv[6:]:
     if arg == 'all_individuals':
         all_individuals = True
     elif len(arg) > 12 and arg[:12] == 'individuals:':
         p1_input = arg[12:]
-    elif len(arg) > 11:
-        if arg[:11] == 'population:':
-            file, name = arg[11:].split(':', 1)
-            population_info.append((file, name))
-        elif arg[:11] == 'individual:':
-            individual_metadata.append(arg[11:])
+    elif len(arg) > 11 and arg[:11] == 'population:':
+        file, name = arg[11:].split(':', 1)
+        population_info.append((file, name))
 
 p_total = Population()
-p_total.from_tag_list(individual_metadata)
+p_total.from_wrapped_dict(ind_arg)
 
 ################################################################################
 
-mkdir_p(extra_files_path)
+gd_util.mkdir_p(extra_files_path)
 
 ################################################################################
 
 prog = 'coverage'
 
-args = []
-args.append(prog)
+args = [ prog ]
 args.append(input)
 args.append(data_source)
 
@@ -72,8 +55,7 @@
     population_list.append(this_pop)
     p1.from_population_file(p1_input)
     if not p_total.is_superset(p1):
-        print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
-        sys.exit(1)
+        gd_util.die('There is an individual in the population that is not in the SNP table')
     tags = p1.tag_list()
 else:
     tags = []
@@ -84,8 +66,7 @@
         population_list.append(this_pop)
         population.from_population_file(population_file)
         if not p_total.is_superset(population):
-            print >> sys.stderr, 'There is an individual in the {} population that is not in the SNP table'.format(population_name)
-            sys.exit(1)
+            gd_util.die('There is an individual in the {} population that is not in the SNP table'.format(population_name))
         columns = population.column_list()
         for column in columns:
             tags.append('{0}:{1}'.format(column, population_name))
@@ -95,55 +76,37 @@
 
 ## text output
 coverage_file = 'coverage.txt'
-fh = open(coverage_file, 'w')
-#print "args:", ' '.join(args)
-p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr)
-rc = p.wait()
-fh.close()
+with open(coverage_file, 'w') as fh:
+    gd_util.run_program(prog, args, stdout=fh)
 
 ## graphical output
-fh = open(coverage_file)
 coverage2_file = 'coverage2.txt'
-ofh = open(coverage2_file, 'w')
-
-for line in fh:
-    line = line.rstrip('\r\n')
-    elems = line.split('\t')
-    name = elems.pop(0)
-    values = [ elems[0] ]
-    for idx in range(1, len(elems)):
-        val = str(float(elems[idx]) - float(elems[idx-1]))
-        values.append(val)
-    print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values))
-
-fh.close()
-ofh.close()
+with open(coverage_file) as fh, open(coverage2_file, 'w') as ofh:
+    for line in fh:
+        line = line.rstrip('\r\n')
+        elems = line.split('\t')
+        name = elems.pop(0)
+        values = [ elems[0] ]
+        for idx in range(1, len(elems)):
+            val = str(float(elems[idx]) - float(elems[idx-1]))
+            values.append(val)
+        print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values))
 
 ################################################################################
 
-prog = 'R'
+prog = 'Rscript'
 
-args = []
-args.append(prog)
-args.append('--vanilla')
-args.append('--quiet')
+args = [ prog ]
 
 _realpath = os.path.realpath(__file__)
 _script_dir = os.path.dirname(_realpath)
 r_script_file = os.path.join(_script_dir, 'coverage_plot.r')
-
-ifh = open(r_script_file)
-ofh = open('/dev/null', 'w')
-#print "args:", ' '.join(args)
-p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None)
-rc = p.wait()
-ifh.close()
-ofh.close()
+args.append(r_script_file)
 
 pdf_file = os.path.join(extra_files_path, 'coverage.pdf')
-shutil.copy2('coverage.pdf', pdf_file)
-os.remove('coverage.pdf')
-os.remove(coverage2_file)
+args.append(pdf_file)
+
+gd_util.run_program(prog, args)
 
 ################################################################################
 
@@ -159,7 +122,6 @@
 info_page.add_output_parameter(out_pdf)
 info_page.add_output_parameter(out_txt)
 
-
 if data_source == '0':
     data_source_value = 'sequence coverage'
 elif data_source == '1':
@@ -176,12 +138,8 @@
     misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList())
     info_page.add_misc(misc_individuals)
 
-
-
-
 with open (output, 'w') as ofh:
     print >> ofh, info_page.render()
 
-
 sys.exit(0)