annotate commons/launcher/launchMreps.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/env python
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 from commons.core.seq.BioseqDB import BioseqDB
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 from commons.core.parsing.MrepsToSet import MrepsToSet
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 import subprocess
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 import os
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 import sys
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 import getopt
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 def help():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 Give the list of the command-line options.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 print "usage: ",sys.argv[0],"[ options ]"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 print "options:"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 print " -h: this help"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 print " -i: name of the input file (format='fasta')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 print " -o: name of the output file (default=inFileName+'.Mreps.set')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 print " -f: error filter (default=1.0)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 print " -c: clean"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 print " -v: verbosity level (default=0/1)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 def main():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 Launch Mreps.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 inFileName = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 outFileName = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 errorFilter = 1.0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 clean = False
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 verbose = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 opts=getopt.getopt(sys.argv[1:],"hi:o:f:cv:")[0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 except getopt.GetoptError, err:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 print str(err)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 for o,a in opts:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 if o == "-h":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 sys.exit(0)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 elif o == "-i":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 inFileName = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 elif o == "-o":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 outFileName = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 elif o == "-f":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 errorFilter = float(a)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 elif o == "-c":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 clean = True
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 elif o == "-v":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 verbose = int(a)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 if inFileName == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 print "ERROR: missing compulsory options"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 print "beginning of %s" % (sys.argv[0].split("/")[-1])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 # Mreps 2.5 doesn't fully support IUPAC nomenclature
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 print "* check IUPAC symbols"; sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 tmpInFileName = "%s.tmp%i" % ( inFileName, os.getpid() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 if os.path.exists( tmpInFileName ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 os.system( "rm -f %s" % ( tmpInFileName ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 bsDB = BioseqDB( inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 for bs in bsDB.db:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 print bs.header; sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 bs.partialIUPAC()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 onlyN = True
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 for nt in ["A","T","G","C"]:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 if nt in bs.sequence:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 onlyN = False
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 if onlyN == True:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 print "** Warning: only Ns"; sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 bsDB.save( tmpInFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 if not os.path.exists( tmpInFileName ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 sys.exit(0)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 print "* remove N stretches"; sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 prg = os.environ["REPET_PATH"] + "/bin/cutterDB"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92 cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 cmd += " -l 200000"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 cmd += " -o 0"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95 cmd += " -w 11"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96 cmd += " %s" % ( tmpInFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98 print cmd; sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 log = os.system( cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100 if log != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101 print "ERROR: %s returned %i" % ( prg, log )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104 # launch Mreps on the input file
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105 MrepsOutFileName = "%s.Mreps.xml" % ( tmpInFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106 prg = "mreps"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107 cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108 cmd += " -res 3"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109 cmd += " -exp 3.0"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110 cmd += " -maxsize 50"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111 cmd += " -xmloutput %s" % MrepsOutFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112 cmd += " -fasta %s_cut" % tmpInFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113 process = subprocess.Popen(cmd, shell = True)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114 process.communicate()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115 if process.returncode != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116 raise Exception("ERROR when launching '%s'" % cmd)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118 if outFileName == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119 outFileName = inFileName + ".Mreps.set"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121 # parse Mreps results in xml format
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 iMrepsToSet = MrepsToSet(inFileName, MrepsOutFileName, outFileName, errorFilter)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
123 iMrepsToSet.run()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
124 if clean:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
125 iMrepsToSet.clean()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
126
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
127 # remove temporary input filename
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
128 os.remove(tmpInFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
129 os.remove("%s_cut" % tmpInFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
130 os.remove("%s.Nstretch.map" % tmpInFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
131
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
132 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
133 print "%s finished successfully\n" % (sys.argv[0].split("/")[-1])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
134 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
135
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
136 return 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
137
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
138
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
139 if __name__ == '__main__':
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
140 main()