Mercurial > repos > vmarcon > repet_teannot
comparison TEannot_lite.py @ 0:b126ea31824f draft default tip
1st Uploaded
| author | vmarcon |
|---|---|
| date | Mon, 06 Feb 2017 13:37:49 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:b126ea31824f |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import os | |
| 4 import sys | |
| 5 import time | |
| 6 import glob | |
| 7 import shutil | |
| 8 import ConfigParser | |
| 9 import re | |
| 10 | |
| 11 if not "REPET_PATH" in os.environ.keys(): | |
| 12 print("ERROR: no environment variable REPET_PATH") | |
| 13 sys.exit(1) | |
| 14 | |
| 15 if (not "REPET_DB" in os.environ.keys()) or (not "REPET_HOST" in os.environ.keys()) or (not "REPET_PORT" in os.environ.keys()) or (not "REPET_USER" in os.environ.keys()) or (not "REPET_PW" in os.environ.keys()): | |
| 16 print "ERROR: there is at least one environment database variable missing : REPET_DB, REPET_PORT, REPET_HOST, REPET_USER or REPET_PW" | |
| 17 sys.exit(1) | |
| 18 | |
| 19 if not "REPET_JOB_MANAGER" in os.environ.keys(): | |
| 20 print "ERROR: no environment variable REPET_JOB_MANAGER" | |
| 21 sys.exit(1) | |
| 22 | |
| 23 | |
| 24 if not "%s/bin" % os.environ["REPET_PATH"] in os.environ["PATH"]: | |
| 25 os.environ["PATH"] = "%s/bin:%s" % (os.environ["REPET_PATH"], os.environ["PATH"]) | |
| 26 | |
| 27 sys.path.append(os.environ["REPET_PATH"]) | |
| 28 if not "PYTHONPATH" in os.environ.keys(): | |
| 29 os.environ["PYTHONPATH"] = os.environ["REPET_PATH"] | |
| 30 else: | |
| 31 os.environ["PYTHONPATH"] = "%s:%s" % (os.environ["REPET_PATH"], os.environ["PYTHONPATH"]) | |
| 32 | |
| 33 | |
| 34 from commons.core.LoggerFactory import LoggerFactory | |
| 35 from commons.core.checker.RepetException import RepetException | |
| 36 from commons.core.utils.FileUtils import FileUtils | |
| 37 from commons.core.utils.RepetOptionParser import RepetOptionParser | |
| 38 from commons.core.seq.FastaUtils import * #FastaUtils | |
| 39 from commons.core.sql.DbFactory import DbFactory | |
| 40 | |
| 41 LOG_DEPTH = "TEannot.pipeline" | |
| 42 | |
| 43 class TEannot_lite(object): | |
| 44 | |
| 45 def __init__(self, configFileName = "", fastaFileName = "", libraryFileName = "", verbosity = 0): | |
| 46 self._configFileName = configFileName | |
| 47 self._fastaFileName = os.path.abspath(fastaFileName) | |
| 48 self._libraryFileName = os.path.abspath(libraryFileName) | |
| 49 self._projectName = time.strftime("%Y%m%d%H%M%S") | |
| 50 self._outputGff = "" | |
| 51 self._classif = "" | |
| 52 #self._maskedThreshold = 80 | |
| 53 self._statsFile = "" | |
| 54 self._outputMasked = "" | |
| 55 if "REPET_TMP_DIR" in os.environ.keys(): | |
| 56 self._tmp_dir = os.environ["REPET_TMP_DIR"] | |
| 57 else : | |
| 58 self._tmp_dir = "" | |
| 59 self._verbosity = verbosity | |
| 60 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) | |
| 61 | |
| 62 def setAttributesFromCommandLine(self): | |
| 63 description = "This script is a ligth version of TEannot. It writes configuration file and launches TEannot." | |
| 64 epilog = "Example: TEannot_lite.py -i fastaFileName -l fastaLibraryFileName \n" | |
| 65 version = "1.1" | |
| 66 parser = RepetOptionParser(description = description, epilog = epilog, version = version) | |
| 67 parser.add_option("-i", "--fasta", dest = "fastaFileName" , action = "store" , type = "string", help ="Input fasta file name ", default = "") | |
| 68 parser.add_option("-l", "--lib", dest = "libraryFileName" , action = "store" , type = "string", help ="Input fasta library file name ", default = "") | |
| 69 parser.add_option("-c", "--withClassif", dest = "withClassif" , action = "store" , type = "string" , metavar="CLASSIFFILE" , help ="[optional] To add classification informations in GFF3 file, please put classif file from TEdenovo step. ", default = "") | |
| 70 #parser.add_option("-t", "--maskedThreshold", dest = "maskedThreshold" , action = "store", type = "int", metavar="80", help ="[optional] [default: 80] To choose the threshold of the identity percent for the masked fasta file. ", default = 80) | |
| 71 parser.add_option("-s", "--stats", dest="withStats", action="store_true",help = " Get statistical file in output.", default = False) | |
| 72 parser.add_option("-o", "--output", dest="outputLabel" , action = "store", type = "string", help = " [optional] Label for GFF3 output file", default = "") | |
| 73 parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", metavar="2", help = "Verbosity [optional] [default: 2]", default = 2) | |
| 74 options = parser.parse_args()[0] | |
| 75 self._setAttributesFromOptions(options) | |
| 76 | |
| 77 def _setAttributesFromOptions(self, options): | |
| 78 self.setConfigFileName("") | |
| 79 if options.fastaFileName=="": | |
| 80 print "ERROR : You have to enter an input fasta file" | |
| 81 print "Example: TEdenovo_lite.py -i fastaFileName \n" | |
| 82 print "More option : TEdenovo_lite.py --help " | |
| 83 exit(1) | |
| 84 else : | |
| 85 self._fastaFileName = os.path.abspath(options.fastaFileName) | |
| 86 if options.libraryFileName=="": | |
| 87 print "ERROR : You have to enter an input libary fasta file" | |
| 88 print "Example: TEannot_lite.py -i fastaFileName -l fastaLibraryFileName \n" | |
| 89 print "More option : TEannot_lite.py --help " | |
| 90 exit(1) | |
| 91 else : | |
| 92 self._libraryFileName = os.path.abspath(options.libraryFileName) | |
| 93 if options.outputLabel=="": | |
| 94 fastaBaseName=os.path.abspath(re.search(r'([^\/\\]*)\.[fa|fasta|fsa|fas]',options.fastaFileName).groups()[0]) | |
| 95 options.outputLabel = fastaBaseName | |
| 96 self._outputGff = os.path.abspath(options.outputLabel+'-%s.gff3'%self._projectName[:8]) | |
| 97 | |
| 98 if options.withClassif!='': | |
| 99 self._classif = os.path.abspath(options.withClassif) | |
| 100 | |
| 101 self._outputMasked = os.path.abspath(options.outputLabel+'-%s.mask'%self._projectName[:8]) | |
| 102 #if options.maskedThreshold : | |
| 103 # self._maskedThreshold = options.maskedThreshold | |
| 104 if options.withStats : | |
| 105 self._statsFile = os.path.abspath(options.outputLabel+'-%s-TEstats.txt'%self._projectName[:8]) | |
| 106 self._verbosity = options.verbosity | |
| 107 | |
| 108 def setConfigFileName(self, configFileName): | |
| 109 self._configFileName = configFileName | |
| 110 if not self._configFileName: | |
| 111 self._configFileName = "TEannot_Galaxy_config_%s" % self._projectName | |
| 112 | |
| 113 def setAttributesFromConfigFile(self, configFileName): | |
| 114 config = ConfigParser.ConfigParser() | |
| 115 config.readfp( open(configFileName) ) | |
| 116 | |
| 117 def _writeConfigFile(self): | |
| 118 if FileUtils.isRessourceExists(self._configFileName): | |
| 119 self._logAndRaise("Configuration file '%s' already exists. Won't be overwritten.") | |
| 120 | |
| 121 shutil.copy("%s/config/TEannot.cfg" % os.environ.get("REPET_PATH"), self._configFileName) | |
| 122 self.setAttributesFromConfigFile(self._configFileName) | |
| 123 | |
| 124 os.system("sed -i 's|repet_host: <your_MySQL_host>|repet_host: %s|' %s" % (os.environ["REPET_HOST"], self._configFileName)) | |
| 125 os.system("sed -i 's|repet_user: <your_MySQL_login>|repet_user: %s|' %s" % (os.environ["REPET_USER"], self._configFileName)) | |
| 126 os.system("sed -i 's|repet_pw: <your_MySQL_password>|repet_pw: %s|' %s" % (os.environ["REPET_PW"], self._configFileName)) | |
| 127 os.system("sed -i 's|repet_db: <your_MySQL_db>|repet_db: %s|' %s" % (os.environ["REPET_DB"], self._configFileName)) | |
| 128 os.system("sed -i 's|repet_port: 3306|repet_port: %s|' %s" % (os.environ["REPET_PORT"], self._configFileName)) | |
| 129 os.system("sed -i 's|repet_job_manager: SGE|repet_job_manager: %s|' %s" % (os.environ["REPET_JOB_MANAGER"], self._configFileName)) | |
| 130 os.system("sed -i 's|project_name: <your_project_name>|project_name: %s|' %s" % (self._projectName, self._configFileName)) | |
| 131 os.system("sed -i 's|project_dir: <absolute_path_to_your_project_directory>|project_dir: %s|' %s" % (os.getcwd().replace("/", "\/"), self._configFileName)) | |
| 132 os.system("sed -i 's|do_join: yes|do_join: no|' %s" % ( self._configFileName)) | |
| 133 os.system("sed -i 's|add_SSRs: no|add_SSRs: yes|' %s" % ( self._configFileName)) | |
| 134 os.system("sed -i 's|gff3_compulsory_match_part: no|gff3_compulsory_match_part: yes|' %s" % ( self._configFileName)) | |
| 135 os.system("sed -i 's|BLR_sensitivity: 3|BLR_sensitivity: 2|' %s" % ( self._configFileName)) | |
| 136 os.system("sed -i 's|tmpDir:|tmpDir: %s|g' %s" % (self._tmp_dir,self._configFileName)) | |
| 137 if self._classif!="" : | |
| 138 os.system("sed -i 's|gff3_with_classif_info: no|gff3_with_classif_info: yes|' %s" % ( self._configFileName)) | |
| 139 os.system("sed -i 's|classif_table_name: <name_of_TEs_table>|classif_table_name: %s_consensus_classif|' %s" % ( self._projectName,self._configFileName)) | |
| 140 | |
| 141 def _mergeOutputGff(self): | |
| 142 file_out=open(self._outputGff,'w') | |
| 143 file_out.write('##gff-version 3\n') | |
| 144 file_out.close() | |
| 145 directory="%s_GFF3chr/"%self._projectName | |
| 146 outGffs = glob.glob("%s*.gff3"%directory) | |
| 147 for outGff in outGffs : | |
| 148 os.system("grep -v '#' %s >> %s"%(outGff,self._outputGff)) | |
| 149 os.system("sed -i 's|%s_REPET_TEs|REPET_TEs|g' %s" % (self._projectName,self._outputGff)) | |
| 150 | |
| 151 def _launchTEannot(self): | |
| 152 print "START time: %s" % time.strftime("%Y-%m-%d %H:%M:%S") | |
| 153 lCmds = [] | |
| 154 lCmds.append( "TEannot.py -P %s -C %s -S 1 -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 155 lCmds.append( "TEannot.py -P %s -C %s -S 2 -a BLR -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 156 lCmds.append( "TEannot.py -P %s -C %s -S 2 -a RM -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 157 lCmds.append( "TEannot.py -P %s -C %s -S 2 -a CEN -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 158 lCmds.append( "TEannot.py -P %s -C %s -S 2 -a BLR -r -v %i" % (self._projectName, self._configFileName, self._verbosity) ) # | |
| 159 lCmds.append( "TEannot.py -P %s -C %s -S 2 -a RM -r -v %i" % (self._projectName, self._configFileName, self._verbosity) ) # | |
| 160 lCmds.append( "TEannot.py -P %s -C %s -S 2 -a CEN -r -v %i" % (self._projectName, self._configFileName, self._verbosity) ) # | |
| 161 lCmds.append( "TEannot.py -P %s -C %s -S 4 -s TRF -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 162 lCmds.append( "TEannot.py -P %s -C %s -S 4 -s RMSSR -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 163 lCmds.append( "TEannot.py -P %s -C %s -S 4 -s Mreps -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 164 lCmds.append( "TEannot.py -P %s -C %s -S 5 -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 165 lCmds.append( "TEannot.py -P %s -C %s -S 3 -c BLR+RM+CEN -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 166 lCmds.append( "TEannot.py -P %s -C %s -S 7 -v %i" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 167 lCmds.append( "TEannot.py -P %s -C %s -S 8 -v %i -o GFF3" % (self._projectName, self._configFileName, self._verbosity) ) | |
| 168 | |
| 169 if self._classif!='': | |
| 170 self._setClassifTable() | |
| 171 | |
| 172 for cmd in lCmds: | |
| 173 returnValue = os.system(cmd) | |
| 174 if returnValue != 0: | |
| 175 print "ERROR: command '%s' returned %i" % (cmd, returnValue) | |
| 176 self._cleanTables() | |
| 177 sys.exit(1) | |
| 178 | |
| 179 print "END time: %s" % time.strftime("%Y-%m-%d %H:%M:%S") | |
| 180 | |
| 181 | |
| 182 def _maskFasta(self): | |
| 183 pathFile = self._outputMasked+"_tmp.path" | |
| 184 setFile = self._outputMasked+"_tmp.set" | |
| 185 lCmds = [] | |
| 186 lCmds.append("srptExportTable.py -i %s_chr_allTEs_nr_noSSR_path -C %s -o %s -v %s" % (self._projectName,self._configFileName,pathFile,self._verbosity)) | |
| 187 lCmds.append("MaskSeqFromCoord.py -i %s -m %s -f path -X -o %s -v %s" % (self._fastaFileName,pathFile,self._outputMasked,self._verbosity)) | |
| 188 lCmds.append("srptExportTable.py -i %s_chr_allSSRs_set -C %s -o %s -v %s " % (self._projectName,self._configFileName, setFile,self._verbosity)) | |
| 189 lCmds.append("MaskSeqFromCoord.py -i %s -m %s -f set -X -o %s_SSRmask.fa -v %s" % (self._outputMasked, setFile, self._outputMasked, self._verbosity)) | |
| 190 | |
| 191 for cmd in lCmds: | |
| 192 returnValue = os.system(cmd) | |
| 193 if returnValue != 0: | |
| 194 print "ERROR: command '%s' returned %i" % (cmd, returnValue) | |
| 195 self._cleanTables() | |
| 196 sys.exit(1) | |
| 197 | |
| 198 #os.system("rm -f %s"%pathFile) | |
| 199 | |
| 200 def _createStatsFile(self): | |
| 201 fastaFile=open(self._fastaFileName) | |
| 202 fastaLength=FastaUtils.dbCumLength( fastaFile ) | |
| 203 cmd = "PostAnalyzeTELib.py -a 3 -g {0} -p {1}_chr_allTEs_nr_noSSR_path -s {1}_refTEs_seq".format(fastaLength,self._projectName) | |
| 204 os.system(cmd) | |
| 205 cmd = "mv %s_chr_allTEs_nr_noSSR_path.globalAnnotStatsPerTE.txt %s"%(self._projectName,self._statsFile) | |
| 206 os.system(cmd) | |
| 207 | |
| 208 def _setClassifTable(self): | |
| 209 iDb = DbFactory.createInstance() | |
| 210 iDb.createTable("%s_consensus_classif" % self._projectName, "classif", self._classif, True) | |
| 211 iDb.close() | |
| 212 | |
| 213 def _launchListAndDropTables(self): | |
| 214 cmd = "ListAndDropTables.py" | |
| 215 cmd += " -C %s" % self._configFileName | |
| 216 cmd += " -d '%s'" % self._projectName | |
| 217 os.system(cmd) | |
| 218 | |
| 219 def _cleanJobsTable(self): | |
| 220 db = DbFactory.createInstance( configFileName = self._configFileName ) | |
| 221 sql_cmd="DELETE FROM jobs WHERE groupid like '%s%%';"%self._projectName | |
| 222 db.execute( sql_cmd ) | |
| 223 db.close() | |
| 224 | |
| 225 def _cleanTables(self): | |
| 226 self._launchListAndDropTables() | |
| 227 self. _cleanJobsTable() | |
| 228 | |
| 229 def run(self): | |
| 230 os.mkdir(self._projectName) | |
| 231 os.chdir(self._projectName) | |
| 232 self._writeConfigFile() | |
| 233 os.symlink(self._fastaFileName,"%s/%s.fa" %(os.getcwd(),self._projectName)) #creer repertoire projet | |
| 234 os.symlink(self._libraryFileName,"%s/%s_refTEs.fa" %(os.getcwd(),self._projectName)) | |
| 235 self._launchTEannot() | |
| 236 self._mergeOutputGff() | |
| 237 self._maskFasta() | |
| 238 if self._statsFile : | |
| 239 self._createStatsFile() | |
| 240 self._cleanTables() | |
| 241 | |
| 242 if __name__ == '__main__': | |
| 243 iTEannot= TEannot_lite() | |
| 244 iTEannot.setAttributesFromCommandLine() | |
| 245 iTEannot.run() |
