diff tools/extract/liftOver_wrapper.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/extract/liftOver_wrapper.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+#Guruprasad Ananda
+"""
+Converts coordinates from one build/assembly to another using liftOver binary and mapping files downloaded from UCSC.
+"""
+
+import os, string, subprocess, sys
+import tempfile
+import re
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def stop_err(msg):
+    sys.stderr.write(msg)
+    sys.exit()
+
+def safe_bed_file(infile):
+    """Make a BED file with track and browser lines ready for liftOver.
+
+    liftOver will fail with track or browser lines. We can make it happy
+    by converting these to comments. See:
+
+    https://lists.soe.ucsc.edu/pipermail/genome/2007-May/013561.html
+    """
+    fix_pat = re.compile("^(track|browser)")
+    (fd, fname) = tempfile.mkstemp()
+    in_handle = open(infile)
+    out_handle = open(fname, "w")
+    for line in in_handle:
+        if fix_pat.match(line):
+            line = "#" + line
+        out_handle.write(line)
+    in_handle.close()
+    out_handle.close()
+    return fname
+    
+if len( sys.argv ) < 9:
+    stop_err( "USAGE: prog input out_file1 out_file2 input_dbkey output_dbkey infile_type minMatch multiple <minChainT> <minChainQ> <minSizeQ>" )
+
+infile = sys.argv[1]
+outfile1 = sys.argv[2]
+outfile2 = sys.argv[3]
+in_dbkey = sys.argv[4]
+mapfilepath = sys.argv[5]
+infile_type = sys.argv[6]
+gff_option = ""
+if infile_type == "gff":
+    gff_option = "-gff "
+minMatch = sys.argv[7]
+multiple = int(sys.argv[8])
+multiple_option = ""
+if multiple:
+    minChainT = sys.argv[9]
+    minChainQ = sys.argv[10]
+    minSizeQ = sys.argv[11]
+    multiple_option = " -multiple -minChainT=%s -minChainQ=%s -minSizeQ=%s " %(minChainT,minChainQ,minSizeQ)
+
+try:
+    assert float(minMatch)
+except:
+    minMatch = 0.1
+#ensure dbkey is set
+if in_dbkey == "?": 
+    stop_err( "Input dataset genome build unspecified, click the pencil icon in the history item to specify it." )
+
+if not os.path.isfile( mapfilepath ):
+    stop_err( "%s mapping is not currently available."  % ( mapfilepath.split('/')[-1].split('.')[0] ) )
+
+safe_infile = safe_bed_file(infile)
+cmd_line = "liftOver " + gff_option + "-minMatch=" + str(minMatch) + multiple_option + " "  + safe_infile + " " + mapfilepath + " " + outfile1 + " " + outfile2 + "  > /dev/null"
+
+try:
+    # have to nest try-except in try-finally to handle 2.4
+    try:
+        proc = subprocess.Popen( args=cmd_line, shell=True, stderr=subprocess.PIPE )
+        returncode = proc.wait()
+        stderr = proc.stderr.read()
+        if returncode != 0:
+            raise Exception, stderr
+    except Exception, e:
+        raise Exception, 'Exception caught attempting conversion: ' + str( e )
+finally:
+    os.remove(safe_infile)