annotate glimmer_wo_icm.py @ 2:b1ad88bbc5fa draft default tip

Uploaded
author bgruening
date Mon, 12 Aug 2013 11:55:07 -0400
parents 841357e0acbf
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
841357e0acbf Uploaded
bgruening
parents:
diff changeset
1 #!/usr/bin/env python
841357e0acbf Uploaded
bgruening
parents:
diff changeset
2 """
841357e0acbf Uploaded
bgruening
parents:
diff changeset
3 Input: DNA Fasta File
841357e0acbf Uploaded
bgruening
parents:
diff changeset
4 Output: Tabular
841357e0acbf Uploaded
bgruening
parents:
diff changeset
5 Return Tabular File with predicted ORF's
841357e0acbf Uploaded
bgruening
parents:
diff changeset
6 Bjoern Gruening
841357e0acbf Uploaded
bgruening
parents:
diff changeset
7 """
841357e0acbf Uploaded
bgruening
parents:
diff changeset
8 import sys, os
841357e0acbf Uploaded
bgruening
parents:
diff changeset
9 import tempfile
841357e0acbf Uploaded
bgruening
parents:
diff changeset
10 import subprocess
841357e0acbf Uploaded
bgruening
parents:
diff changeset
11 import shutil
841357e0acbf Uploaded
bgruening
parents:
diff changeset
12 from glimmer2seq import glimmer2seq
841357e0acbf Uploaded
bgruening
parents:
diff changeset
13
841357e0acbf Uploaded
bgruening
parents:
diff changeset
14 def main():
841357e0acbf Uploaded
bgruening
parents:
diff changeset
15 genome_seq_file = sys.argv[1]
841357e0acbf Uploaded
bgruening
parents:
diff changeset
16 outfile_classic_glimmer = sys.argv[2]
841357e0acbf Uploaded
bgruening
parents:
diff changeset
17 outfile_ext_path = sys.argv[3]
841357e0acbf Uploaded
bgruening
parents:
diff changeset
18 oufile_genes = sys.argv[8]
841357e0acbf Uploaded
bgruening
parents:
diff changeset
19
841357e0acbf Uploaded
bgruening
parents:
diff changeset
20 tag = 'glimmer_non_knowlegde_based_prediction'
841357e0acbf Uploaded
bgruening
parents:
diff changeset
21 tempdir = tempfile.gettempdir()
841357e0acbf Uploaded
bgruening
parents:
diff changeset
22
841357e0acbf Uploaded
bgruening
parents:
diff changeset
23 trainingset = os.path.join( tempdir, tag + ".train" )
841357e0acbf Uploaded
bgruening
parents:
diff changeset
24 icm = os.path.join( tempdir, tag + ".icm" )
841357e0acbf Uploaded
bgruening
parents:
diff changeset
25
841357e0acbf Uploaded
bgruening
parents:
diff changeset
26 longorfs = tempfile.NamedTemporaryFile()
841357e0acbf Uploaded
bgruening
parents:
diff changeset
27 trainingset = tempfile.NamedTemporaryFile()
841357e0acbf Uploaded
bgruening
parents:
diff changeset
28 icm = tempfile.NamedTemporaryFile()
841357e0acbf Uploaded
bgruening
parents:
diff changeset
29
841357e0acbf Uploaded
bgruening
parents:
diff changeset
30 #glimmeropts = "-o0 -g110 -t30 -l"
841357e0acbf Uploaded
bgruening
parents:
diff changeset
31 glimmeropts = "-o%s -g%s -t%s" % (sys.argv[4], sys.argv[5], sys.argv[6])
841357e0acbf Uploaded
bgruening
parents:
diff changeset
32 if sys.argv[7] == "true":
841357e0acbf Uploaded
bgruening
parents:
diff changeset
33 glimmeropts += " -l"
841357e0acbf Uploaded
bgruening
parents:
diff changeset
34
841357e0acbf Uploaded
bgruening
parents:
diff changeset
35 """
841357e0acbf Uploaded
bgruening
parents:
diff changeset
36 1. Find long, non-overlapping orfs to use as a training set
841357e0acbf Uploaded
bgruening
parents:
diff changeset
37 """
841357e0acbf Uploaded
bgruening
parents:
diff changeset
38 subprocess.Popen(["long-orfs", "-n", "-t", "1.15",
841357e0acbf Uploaded
bgruening
parents:
diff changeset
39 genome_seq_file, "-"], stdout = longorfs,
841357e0acbf Uploaded
bgruening
parents:
diff changeset
40 stderr = subprocess.PIPE).communicate()
841357e0acbf Uploaded
bgruening
parents:
diff changeset
41
841357e0acbf Uploaded
bgruening
parents:
diff changeset
42 """
841357e0acbf Uploaded
bgruening
parents:
diff changeset
43 2. Extract the training sequences from the genome file
841357e0acbf Uploaded
bgruening
parents:
diff changeset
44 """
841357e0acbf Uploaded
bgruening
parents:
diff changeset
45 subprocess.Popen(["extract", "-t",
841357e0acbf Uploaded
bgruening
parents:
diff changeset
46 genome_seq_file, longorfs.name], stdout=trainingset,
841357e0acbf Uploaded
bgruening
parents:
diff changeset
47 stderr=subprocess.PIPE).communicate()
841357e0acbf Uploaded
bgruening
parents:
diff changeset
48
841357e0acbf Uploaded
bgruening
parents:
diff changeset
49 """
841357e0acbf Uploaded
bgruening
parents:
diff changeset
50 3. Build the icm from the training sequences
841357e0acbf Uploaded
bgruening
parents:
diff changeset
51 """
841357e0acbf Uploaded
bgruening
parents:
diff changeset
52
841357e0acbf Uploaded
bgruening
parents:
diff changeset
53 # the "-" parameter is used to redirect the output to stdout
841357e0acbf Uploaded
bgruening
parents:
diff changeset
54 subprocess.Popen(["build-icm", "-r", "-"],
841357e0acbf Uploaded
bgruening
parents:
diff changeset
55 stdin=open(trainingset.name), stdout = icm,
841357e0acbf Uploaded
bgruening
parents:
diff changeset
56 stderr=subprocess.PIPE).communicate()
841357e0acbf Uploaded
bgruening
parents:
diff changeset
57
841357e0acbf Uploaded
bgruening
parents:
diff changeset
58 """
841357e0acbf Uploaded
bgruening
parents:
diff changeset
59 Run Glimmer3
841357e0acbf Uploaded
bgruening
parents:
diff changeset
60 """
841357e0acbf Uploaded
bgruening
parents:
diff changeset
61 b = subprocess.Popen(["glimmer3", glimmeropts,
841357e0acbf Uploaded
bgruening
parents:
diff changeset
62 genome_seq_file, icm.name, os.path.join(tempdir, tag)],
841357e0acbf Uploaded
bgruening
parents:
diff changeset
63 stdout = subprocess.PIPE, stderr=subprocess.PIPE).communicate()
841357e0acbf Uploaded
bgruening
parents:
diff changeset
64
841357e0acbf Uploaded
bgruening
parents:
diff changeset
65 if outfile_classic_glimmer.strip() != 'None':
841357e0acbf Uploaded
bgruening
parents:
diff changeset
66 shutil.copyfile( os.path.join( tempdir, tag + ".predict" ), outfile_classic_glimmer )
841357e0acbf Uploaded
bgruening
parents:
diff changeset
67 if outfile_ext_path.strip() != 'None':
841357e0acbf Uploaded
bgruening
parents:
diff changeset
68 shutil.copyfile( os.path.join( tempdir, tag + ".detail" ), outfile_ext_path )
841357e0acbf Uploaded
bgruening
parents:
diff changeset
69
841357e0acbf Uploaded
bgruening
parents:
diff changeset
70 glimmer2seq( os.path.join( tempdir, tag + ".predict" ), genome_seq_file, oufile_genes )
841357e0acbf Uploaded
bgruening
parents:
diff changeset
71
841357e0acbf Uploaded
bgruening
parents:
diff changeset
72
841357e0acbf Uploaded
bgruening
parents:
diff changeset
73 if __name__ == "__main__" :
841357e0acbf Uploaded
bgruening
parents:
diff changeset
74 main()