0
|
1 #!/usr/bin/env python
|
|
2 """
|
|
3 Input: DNA Fasta File
|
|
4 Output: Tabular
|
|
5 Return Tabular File with predicted ORF's
|
|
6 Bjoern Gruening
|
|
7 """
|
|
8 import sys, os
|
|
9 import tempfile
|
|
10 import subprocess
|
|
11 import shutil
|
|
12 from glimmer2seq import glimmer2seq
|
|
13
|
|
14 def main():
|
|
15 genome_seq_file = sys.argv[1]
|
|
16 outfile_classic_glimmer = sys.argv[2]
|
|
17 outfile_ext_path = sys.argv[3]
|
|
18 oufile_genes = sys.argv[8]
|
|
19
|
|
20 tag = 'glimmer_non_knowlegde_based_prediction'
|
|
21 tempdir = tempfile.gettempdir()
|
|
22
|
|
23 trainingset = os.path.join( tempdir, tag + ".train" )
|
|
24 icm = os.path.join( tempdir, tag + ".icm" )
|
|
25
|
|
26 longorfs = tempfile.NamedTemporaryFile()
|
|
27 trainingset = tempfile.NamedTemporaryFile()
|
|
28 icm = tempfile.NamedTemporaryFile()
|
|
29
|
|
30 #glimmeropts = "-o0 -g110 -t30 -l"
|
|
31 glimmeropts = "-o%s -g%s -t%s" % (sys.argv[4], sys.argv[5], sys.argv[6])
|
|
32 if sys.argv[7] == "true":
|
|
33 glimmeropts += " -l"
|
|
34
|
|
35 """
|
|
36 1. Find long, non-overlapping orfs to use as a training set
|
|
37 """
|
|
38 subprocess.Popen(["long-orfs", "-n", "-t", "1.15",
|
|
39 genome_seq_file, "-"], stdout = longorfs,
|
|
40 stderr = subprocess.PIPE).communicate()
|
|
41
|
|
42 """
|
|
43 2. Extract the training sequences from the genome file
|
|
44 """
|
|
45 subprocess.Popen(["extract", "-t",
|
|
46 genome_seq_file, longorfs.name], stdout=trainingset,
|
|
47 stderr=subprocess.PIPE).communicate()
|
|
48
|
|
49 """
|
|
50 3. Build the icm from the training sequences
|
|
51 """
|
|
52
|
|
53 # the "-" parameter is used to redirect the output to stdout
|
|
54 subprocess.Popen(["build-icm", "-r", "-"],
|
|
55 stdin=open(trainingset.name), stdout = icm,
|
|
56 stderr=subprocess.PIPE).communicate()
|
|
57
|
|
58 """
|
|
59 Run Glimmer3
|
|
60 """
|
|
61 b = subprocess.Popen(["glimmer3", glimmeropts,
|
|
62 genome_seq_file, icm.name, os.path.join(tempdir, tag)],
|
|
63 stdout = subprocess.PIPE, stderr=subprocess.PIPE).communicate()
|
|
64
|
|
65 if outfile_classic_glimmer.strip() != 'None':
|
|
66 shutil.copyfile( os.path.join( tempdir, tag + ".predict" ), outfile_classic_glimmer )
|
|
67 if outfile_ext_path.strip() != 'None':
|
|
68 shutil.copyfile( os.path.join( tempdir, tag + ".detail" ), outfile_ext_path )
|
|
69
|
|
70 glimmer2seq( os.path.join( tempdir, tag + ".predict" ), genome_seq_file, oufile_genes )
|
|
71
|
|
72
|
|
73 if __name__ == "__main__" :
|
|
74 main()
|