Mercurial > repos > xuebing > sharplabtool
comparison tools/extract/liftOver_wrapper.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 #!/usr/bin/env python | |
2 #Guruprasad Ananda | |
3 """ | |
4 Converts coordinates from one build/assembly to another using liftOver binary and mapping files downloaded from UCSC. | |
5 """ | |
6 | |
7 import os, string, subprocess, sys | |
8 import tempfile | |
9 import re | |
10 | |
11 assert sys.version_info[:2] >= ( 2, 4 ) | |
12 | |
13 def stop_err(msg): | |
14 sys.stderr.write(msg) | |
15 sys.exit() | |
16 | |
17 def safe_bed_file(infile): | |
18 """Make a BED file with track and browser lines ready for liftOver. | |
19 | |
20 liftOver will fail with track or browser lines. We can make it happy | |
21 by converting these to comments. See: | |
22 | |
23 https://lists.soe.ucsc.edu/pipermail/genome/2007-May/013561.html | |
24 """ | |
25 fix_pat = re.compile("^(track|browser)") | |
26 (fd, fname) = tempfile.mkstemp() | |
27 in_handle = open(infile) | |
28 out_handle = open(fname, "w") | |
29 for line in in_handle: | |
30 if fix_pat.match(line): | |
31 line = "#" + line | |
32 out_handle.write(line) | |
33 in_handle.close() | |
34 out_handle.close() | |
35 return fname | |
36 | |
37 if len( sys.argv ) < 9: | |
38 stop_err( "USAGE: prog input out_file1 out_file2 input_dbkey output_dbkey infile_type minMatch multiple <minChainT> <minChainQ> <minSizeQ>" ) | |
39 | |
40 infile = sys.argv[1] | |
41 outfile1 = sys.argv[2] | |
42 outfile2 = sys.argv[3] | |
43 in_dbkey = sys.argv[4] | |
44 mapfilepath = sys.argv[5] | |
45 infile_type = sys.argv[6] | |
46 gff_option = "" | |
47 if infile_type == "gff": | |
48 gff_option = "-gff " | |
49 minMatch = sys.argv[7] | |
50 multiple = int(sys.argv[8]) | |
51 multiple_option = "" | |
52 if multiple: | |
53 minChainT = sys.argv[9] | |
54 minChainQ = sys.argv[10] | |
55 minSizeQ = sys.argv[11] | |
56 multiple_option = " -multiple -minChainT=%s -minChainQ=%s -minSizeQ=%s " %(minChainT,minChainQ,minSizeQ) | |
57 | |
58 try: | |
59 assert float(minMatch) | |
60 except: | |
61 minMatch = 0.1 | |
62 #ensure dbkey is set | |
63 if in_dbkey == "?": | |
64 stop_err( "Input dataset genome build unspecified, click the pencil icon in the history item to specify it." ) | |
65 | |
66 if not os.path.isfile( mapfilepath ): | |
67 stop_err( "%s mapping is not currently available." % ( mapfilepath.split('/')[-1].split('.')[0] ) ) | |
68 | |
69 safe_infile = safe_bed_file(infile) | |
70 cmd_line = "liftOver " + gff_option + "-minMatch=" + str(minMatch) + multiple_option + " " + safe_infile + " " + mapfilepath + " " + outfile1 + " " + outfile2 + " > /dev/null" | |
71 | |
72 try: | |
73 # have to nest try-except in try-finally to handle 2.4 | |
74 try: | |
75 proc = subprocess.Popen( args=cmd_line, shell=True, stderr=subprocess.PIPE ) | |
76 returncode = proc.wait() | |
77 stderr = proc.stderr.read() | |
78 if returncode != 0: | |
79 raise Exception, stderr | |
80 except Exception, e: | |
81 raise Exception, 'Exception caught attempting conversion: ' + str( e ) | |
82 finally: | |
83 os.remove(safe_infile) |