annotate TEannot_lite.py @ 0:b126ea31824f draft default tip

1st Uploaded
author vmarcon
date Mon, 06 Feb 2017 13:37:49 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
1 #!/usr/bin/env python
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
2
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
3 import os
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
4 import sys
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
5 import time
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
6 import glob
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
7 import shutil
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
8 import ConfigParser
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
9 import re
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
10
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
11 if not "REPET_PATH" in os.environ.keys():
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
12 print("ERROR: no environment variable REPET_PATH")
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
13 sys.exit(1)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
14
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
15 if (not "REPET_DB" in os.environ.keys()) or (not "REPET_HOST" in os.environ.keys()) or (not "REPET_PORT" in os.environ.keys()) or (not "REPET_USER" in os.environ.keys()) or (not "REPET_PW" in os.environ.keys()):
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
16 print "ERROR: there is at least one environment database variable missing : REPET_DB, REPET_PORT, REPET_HOST, REPET_USER or REPET_PW"
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
17 sys.exit(1)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
18
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
19 if not "REPET_JOB_MANAGER" in os.environ.keys():
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
20 print "ERROR: no environment variable REPET_JOB_MANAGER"
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
21 sys.exit(1)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
22
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
23
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
24 if not "%s/bin" % os.environ["REPET_PATH"] in os.environ["PATH"]:
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
25 os.environ["PATH"] = "%s/bin:%s" % (os.environ["REPET_PATH"], os.environ["PATH"])
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
26
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
27 sys.path.append(os.environ["REPET_PATH"])
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
28 if not "PYTHONPATH" in os.environ.keys():
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
29 os.environ["PYTHONPATH"] = os.environ["REPET_PATH"]
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
30 else:
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
31 os.environ["PYTHONPATH"] = "%s:%s" % (os.environ["REPET_PATH"], os.environ["PYTHONPATH"])
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
32
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
33
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
34 from commons.core.LoggerFactory import LoggerFactory
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
35 from commons.core.checker.RepetException import RepetException
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
36 from commons.core.utils.FileUtils import FileUtils
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
37 from commons.core.utils.RepetOptionParser import RepetOptionParser
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
38 from commons.core.seq.FastaUtils import * #FastaUtils
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
39 from commons.core.sql.DbFactory import DbFactory
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
40
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
41 LOG_DEPTH = "TEannot.pipeline"
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
42
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
43 class TEannot_lite(object):
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
44
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
45 def __init__(self, configFileName = "", fastaFileName = "", libraryFileName = "", verbosity = 0):
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
46 self._configFileName = configFileName
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
47 self._fastaFileName = os.path.abspath(fastaFileName)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
48 self._libraryFileName = os.path.abspath(libraryFileName)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
49 self._projectName = time.strftime("%Y%m%d%H%M%S")
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
50 self._outputGff = ""
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
51 self._classif = ""
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
52 #self._maskedThreshold = 80
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
53 self._statsFile = ""
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
54 self._outputMasked = ""
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
55 if "REPET_TMP_DIR" in os.environ.keys():
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
56 self._tmp_dir = os.environ["REPET_TMP_DIR"]
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
57 else :
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
58 self._tmp_dir = ""
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
59 self._verbosity = verbosity
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
60 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
61
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
62 def setAttributesFromCommandLine(self):
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
63 description = "This script is a ligth version of TEannot. It writes configuration file and launches TEannot."
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
64 epilog = "Example: TEannot_lite.py -i fastaFileName -l fastaLibraryFileName \n"
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
65 version = "1.1"
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
66 parser = RepetOptionParser(description = description, epilog = epilog, version = version)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
67 parser.add_option("-i", "--fasta", dest = "fastaFileName" , action = "store" , type = "string", help ="Input fasta file name ", default = "")
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
68 parser.add_option("-l", "--lib", dest = "libraryFileName" , action = "store" , type = "string", help ="Input fasta library file name ", default = "")
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
69 parser.add_option("-c", "--withClassif", dest = "withClassif" , action = "store" , type = "string" , metavar="CLASSIFFILE" , help ="[optional] To add classification informations in GFF3 file, please put classif file from TEdenovo step. ", default = "")
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
70 #parser.add_option("-t", "--maskedThreshold", dest = "maskedThreshold" , action = "store", type = "int", metavar="80", help ="[optional] [default: 80] To choose the threshold of the identity percent for the masked fasta file. ", default = 80)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
71 parser.add_option("-s", "--stats", dest="withStats", action="store_true",help = " Get statistical file in output.", default = False)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
72 parser.add_option("-o", "--output", dest="outputLabel" , action = "store", type = "string", help = " [optional] Label for GFF3 output file", default = "")
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
73 parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", metavar="2", help = "Verbosity [optional] [default: 2]", default = 2)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
74 options = parser.parse_args()[0]
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
75 self._setAttributesFromOptions(options)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
76
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
77 def _setAttributesFromOptions(self, options):
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
78 self.setConfigFileName("")
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
79 if options.fastaFileName=="":
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
80 print "ERROR : You have to enter an input fasta file"
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
81 print "Example: TEdenovo_lite.py -i fastaFileName \n"
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
82 print "More option : TEdenovo_lite.py --help "
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
83 exit(1)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
84 else :
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
85 self._fastaFileName = os.path.abspath(options.fastaFileName)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
86 if options.libraryFileName=="":
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
87 print "ERROR : You have to enter an input libary fasta file"
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
88 print "Example: TEannot_lite.py -i fastaFileName -l fastaLibraryFileName \n"
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
89 print "More option : TEannot_lite.py --help "
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
90 exit(1)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
91 else :
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
92 self._libraryFileName = os.path.abspath(options.libraryFileName)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
93 if options.outputLabel=="":
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
94 fastaBaseName=os.path.abspath(re.search(r'([^\/\\]*)\.[fa|fasta|fsa|fas]',options.fastaFileName).groups()[0])
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
95 options.outputLabel = fastaBaseName
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
96 self._outputGff = os.path.abspath(options.outputLabel+'-%s.gff3'%self._projectName[:8])
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
97
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
98 if options.withClassif!='':
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
99 self._classif = os.path.abspath(options.withClassif)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
100
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
101 self._outputMasked = os.path.abspath(options.outputLabel+'-%s.mask'%self._projectName[:8])
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
102 #if options.maskedThreshold :
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
103 # self._maskedThreshold = options.maskedThreshold
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
104 if options.withStats :
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
105 self._statsFile = os.path.abspath(options.outputLabel+'-%s-TEstats.txt'%self._projectName[:8])
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
106 self._verbosity = options.verbosity
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
107
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
108 def setConfigFileName(self, configFileName):
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
109 self._configFileName = configFileName
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
110 if not self._configFileName:
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
111 self._configFileName = "TEannot_Galaxy_config_%s" % self._projectName
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
112
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
113 def setAttributesFromConfigFile(self, configFileName):
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
114 config = ConfigParser.ConfigParser()
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
115 config.readfp( open(configFileName) )
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
116
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
117 def _writeConfigFile(self):
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
118 if FileUtils.isRessourceExists(self._configFileName):
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
119 self._logAndRaise("Configuration file '%s' already exists. Won't be overwritten.")
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
120
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
121 shutil.copy("%s/config/TEannot.cfg" % os.environ.get("REPET_PATH"), self._configFileName)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
122 self.setAttributesFromConfigFile(self._configFileName)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
123
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
124 os.system("sed -i 's|repet_host: <your_MySQL_host>|repet_host: %s|' %s" % (os.environ["REPET_HOST"], self._configFileName))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
125 os.system("sed -i 's|repet_user: <your_MySQL_login>|repet_user: %s|' %s" % (os.environ["REPET_USER"], self._configFileName))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
126 os.system("sed -i 's|repet_pw: <your_MySQL_password>|repet_pw: %s|' %s" % (os.environ["REPET_PW"], self._configFileName))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
127 os.system("sed -i 's|repet_db: <your_MySQL_db>|repet_db: %s|' %s" % (os.environ["REPET_DB"], self._configFileName))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
128 os.system("sed -i 's|repet_port: 3306|repet_port: %s|' %s" % (os.environ["REPET_PORT"], self._configFileName))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
129 os.system("sed -i 's|repet_job_manager: SGE|repet_job_manager: %s|' %s" % (os.environ["REPET_JOB_MANAGER"], self._configFileName))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
130 os.system("sed -i 's|project_name: <your_project_name>|project_name: %s|' %s" % (self._projectName, self._configFileName))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
131 os.system("sed -i 's|project_dir: <absolute_path_to_your_project_directory>|project_dir: %s|' %s" % (os.getcwd().replace("/", "\/"), self._configFileName))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
132 os.system("sed -i 's|do_join: yes|do_join: no|' %s" % ( self._configFileName))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
133 os.system("sed -i 's|add_SSRs: no|add_SSRs: yes|' %s" % ( self._configFileName))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
134 os.system("sed -i 's|gff3_compulsory_match_part: no|gff3_compulsory_match_part: yes|' %s" % ( self._configFileName))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
135 os.system("sed -i 's|BLR_sensitivity: 3|BLR_sensitivity: 2|' %s" % ( self._configFileName))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
136 os.system("sed -i 's|tmpDir:|tmpDir: %s|g' %s" % (self._tmp_dir,self._configFileName))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
137 if self._classif!="" :
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
138 os.system("sed -i 's|gff3_with_classif_info: no|gff3_with_classif_info: yes|' %s" % ( self._configFileName))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
139 os.system("sed -i 's|classif_table_name: <name_of_TEs_table>|classif_table_name: %s_consensus_classif|' %s" % ( self._projectName,self._configFileName))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
140
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
141 def _mergeOutputGff(self):
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
142 file_out=open(self._outputGff,'w')
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
143 file_out.write('##gff-version 3\n')
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
144 file_out.close()
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
145 directory="%s_GFF3chr/"%self._projectName
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
146 outGffs = glob.glob("%s*.gff3"%directory)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
147 for outGff in outGffs :
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
148 os.system("grep -v '#' %s >> %s"%(outGff,self._outputGff))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
149 os.system("sed -i 's|%s_REPET_TEs|REPET_TEs|g' %s" % (self._projectName,self._outputGff))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
150
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
151 def _launchTEannot(self):
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
152 print "START time: %s" % time.strftime("%Y-%m-%d %H:%M:%S")
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
153 lCmds = []
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
154 lCmds.append( "TEannot.py -P %s -C %s -S 1 -v %i" % (self._projectName, self._configFileName, self._verbosity) )
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
155 lCmds.append( "TEannot.py -P %s -C %s -S 2 -a BLR -v %i" % (self._projectName, self._configFileName, self._verbosity) )
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
156 lCmds.append( "TEannot.py -P %s -C %s -S 2 -a RM -v %i" % (self._projectName, self._configFileName, self._verbosity) )
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
157 lCmds.append( "TEannot.py -P %s -C %s -S 2 -a CEN -v %i" % (self._projectName, self._configFileName, self._verbosity) )
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
158 lCmds.append( "TEannot.py -P %s -C %s -S 2 -a BLR -r -v %i" % (self._projectName, self._configFileName, self._verbosity) ) #
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
159 lCmds.append( "TEannot.py -P %s -C %s -S 2 -a RM -r -v %i" % (self._projectName, self._configFileName, self._verbosity) ) #
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
160 lCmds.append( "TEannot.py -P %s -C %s -S 2 -a CEN -r -v %i" % (self._projectName, self._configFileName, self._verbosity) ) #
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
161 lCmds.append( "TEannot.py -P %s -C %s -S 4 -s TRF -v %i" % (self._projectName, self._configFileName, self._verbosity) )
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
162 lCmds.append( "TEannot.py -P %s -C %s -S 4 -s RMSSR -v %i" % (self._projectName, self._configFileName, self._verbosity) )
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
163 lCmds.append( "TEannot.py -P %s -C %s -S 4 -s Mreps -v %i" % (self._projectName, self._configFileName, self._verbosity) )
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
164 lCmds.append( "TEannot.py -P %s -C %s -S 5 -v %i" % (self._projectName, self._configFileName, self._verbosity) )
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
165 lCmds.append( "TEannot.py -P %s -C %s -S 3 -c BLR+RM+CEN -v %i" % (self._projectName, self._configFileName, self._verbosity) )
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
166 lCmds.append( "TEannot.py -P %s -C %s -S 7 -v %i" % (self._projectName, self._configFileName, self._verbosity) )
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
167 lCmds.append( "TEannot.py -P %s -C %s -S 8 -v %i -o GFF3" % (self._projectName, self._configFileName, self._verbosity) )
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
168
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
169 if self._classif!='':
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
170 self._setClassifTable()
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
171
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
172 for cmd in lCmds:
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
173 returnValue = os.system(cmd)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
174 if returnValue != 0:
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
175 print "ERROR: command '%s' returned %i" % (cmd, returnValue)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
176 self._cleanTables()
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
177 sys.exit(1)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
178
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
179 print "END time: %s" % time.strftime("%Y-%m-%d %H:%M:%S")
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
180
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
181
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
182 def _maskFasta(self):
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
183 pathFile = self._outputMasked+"_tmp.path"
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
184 setFile = self._outputMasked+"_tmp.set"
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
185 lCmds = []
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
186 lCmds.append("srptExportTable.py -i %s_chr_allTEs_nr_noSSR_path -C %s -o %s -v %s" % (self._projectName,self._configFileName,pathFile,self._verbosity))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
187 lCmds.append("MaskSeqFromCoord.py -i %s -m %s -f path -X -o %s -v %s" % (self._fastaFileName,pathFile,self._outputMasked,self._verbosity))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
188 lCmds.append("srptExportTable.py -i %s_chr_allSSRs_set -C %s -o %s -v %s " % (self._projectName,self._configFileName, setFile,self._verbosity))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
189 lCmds.append("MaskSeqFromCoord.py -i %s -m %s -f set -X -o %s_SSRmask.fa -v %s" % (self._outputMasked, setFile, self._outputMasked, self._verbosity))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
190
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
191 for cmd in lCmds:
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
192 returnValue = os.system(cmd)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
193 if returnValue != 0:
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
194 print "ERROR: command '%s' returned %i" % (cmd, returnValue)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
195 self._cleanTables()
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
196 sys.exit(1)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
197
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
198 #os.system("rm -f %s"%pathFile)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
199
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
200 def _createStatsFile(self):
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
201 fastaFile=open(self._fastaFileName)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
202 fastaLength=FastaUtils.dbCumLength( fastaFile )
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
203 cmd = "PostAnalyzeTELib.py -a 3 -g {0} -p {1}_chr_allTEs_nr_noSSR_path -s {1}_refTEs_seq".format(fastaLength,self._projectName)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
204 os.system(cmd)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
205 cmd = "mv %s_chr_allTEs_nr_noSSR_path.globalAnnotStatsPerTE.txt %s"%(self._projectName,self._statsFile)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
206 os.system(cmd)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
207
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
208 def _setClassifTable(self):
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
209 iDb = DbFactory.createInstance()
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
210 iDb.createTable("%s_consensus_classif" % self._projectName, "classif", self._classif, True)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
211 iDb.close()
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
212
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
213 def _launchListAndDropTables(self):
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
214 cmd = "ListAndDropTables.py"
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
215 cmd += " -C %s" % self._configFileName
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
216 cmd += " -d '%s'" % self._projectName
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
217 os.system(cmd)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
218
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
219 def _cleanJobsTable(self):
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
220 db = DbFactory.createInstance( configFileName = self._configFileName )
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
221 sql_cmd="DELETE FROM jobs WHERE groupid like '%s%%';"%self._projectName
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
222 db.execute( sql_cmd )
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
223 db.close()
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
224
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
225 def _cleanTables(self):
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
226 self._launchListAndDropTables()
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
227 self. _cleanJobsTable()
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
228
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
229 def run(self):
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
230 os.mkdir(self._projectName)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
231 os.chdir(self._projectName)
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
232 self._writeConfigFile()
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
233 os.symlink(self._fastaFileName,"%s/%s.fa" %(os.getcwd(),self._projectName)) #creer repertoire projet
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
234 os.symlink(self._libraryFileName,"%s/%s_refTEs.fa" %(os.getcwd(),self._projectName))
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
235 self._launchTEannot()
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
236 self._mergeOutputGff()
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
237 self._maskFasta()
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
238 if self._statsFile :
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
239 self._createStatsFile()
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
240 self._cleanTables()
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
241
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
242 if __name__ == '__main__':
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
243 iTEannot= TEannot_lite()
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
244 iTEannot.setAttributesFromCommandLine()
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
245 iTEannot.run()