annotate TEdenovo_lite.py @ 0:baea09e6722b draft default tip

1st Uploaded
author vmarcon
date Mon, 06 Feb 2017 13:31:53 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
1 #!/usr/bin/env python
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
2
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
3
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
4 import os
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
5 import sys
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
6 import time
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
7 import glob
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
8 import shutil
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
9 import ConfigParser
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
10 from commons.core.seq.FastaUtils import *
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
11 import operator
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
12 import re
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
13
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
14
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
15
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
16 if not "REPET_PATH" in os.environ.keys():
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
17 print "ERROR: no environment variable REPET_PATH"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
18 sys.exit(1)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
19
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
20 if (not "REPET_DB" in os.environ.keys()) or (not "REPET_HOST" in os.environ.keys()) or (not "REPET_PORT" in os.environ.keys()) or (not "REPET_USER" in os.environ.keys()) or (not "REPET_PW" in os.environ.keys()):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
21 print "ERROR: there is at least one environment database variable missing : REPET_DB, REPET_PORT, REPET_HOST, REPET_USER or REPET_PW"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
22 sys.exit(1)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
23
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
24 if not "REPET_JOB_MANAGER" in os.environ.keys():
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
25 print "ERROR: no environment variable REPET_JOB_MANAGER"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
26 sys.exit(1)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
27
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
28
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
29 if not "%s/bin" % os.environ["REPET_PATH"] in os.environ["PATH"]:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
30 os.environ["PATH"] = "%s/bin:%s" % (os.environ["REPET_PATH"], os.environ["PATH"])
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
31
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
32 sys.path.append(os.environ["REPET_PATH"])
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
33 if not "PYTHONPATH" in os.environ.keys():
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
34 os.environ["PYTHONPATH"] = os.environ["REPET_PATH"]
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
35 else:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
36 os.environ["PYTHONPATH"] = "%s:%s" % (os.environ["REPET_PATH"], os.environ["PYTHONPATH"])
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
37
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
38 from commons.core.LoggerFactory import LoggerFactory
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
39 from commons.core.checker.RepetException import RepetException
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
40 from commons.core.utils.FileUtils import FileUtils
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
41 from commons.core.utils.RepetOptionParser import RepetOptionParser
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
42 from commons.core.seq.FastaUtils import FastaUtils
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
43 from commons.core.sql.DbFactory import DbFactory
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
44 from itertools import islice
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
45
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
46 LOG_DEPTH = "TEdenovo.pipeline"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
47
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
48 class TEdenovo_lite(object):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
49
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
50 def __init__(self, configFileName = "", fastaFileName = "", verbosity = 0):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
51 self._configFileName = configFileName
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
52 self._fastaFileName = os.path.abspath(fastaFileName)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
53 self._projectName = time.strftime("%Y%m%d%H%M%S")
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
54 self._limitSeqSize = 200000000
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
55
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
56 if "REPET_NUCL_BANK" in os.environ.keys():
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
57 if os.path.exists(os.environ["REPET_NUCL_BANK"]):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
58 self._nucl_bank = os.environ["REPET_NUCL_BANK"]
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
59 else :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
60 print "ERROR : the nucleotides bank configured doesn't exist. Please correct it in the REPET_NUCL_BANK variable"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
61 sys.exit(1)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
62 else :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
63 self._nucl_bank = ""
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
64 if "REPET_PROT_BANK" in os.environ.keys():
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
65 if os.path.exists(os.environ["REPET_PROT_BANK"]):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
66 self._prot_bank = os.environ["REPET_PROT_BANK"]
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
67 else :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
68 print "ERROR : the proteins bank configured doesn't exist. Please correct it in the REPET_PROT_BANK variable"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
69 sys.exit(1)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
70 else :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
71 self._prot_bank = ""
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
72 if "REPET_HMM_PROFILES" in os.environ.keys():
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
73 if os.path.exists(os.environ["REPET_HMM_PROFILES"]):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
74 self._HMM_profiles = os.environ["REPET_HMM_PROFILES"]
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
75 else :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
76 print "ERROR : the hmm profiles bank configured doesn't exist. Please correct it in the REPET_HMM_PROFILES variable"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
77 sys.exit(1)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
78 else :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
79 self._HMM_profiles = ""
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
80 if "REPET_RDNA_BANK" in os.environ.keys():
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
81 if os.path.exists(os.environ["REPET_RDNA_BANK"]):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
82 self._rdna_bank = os.environ["REPET_RDNA_BANK"]
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
83 else :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
84 print "ERROR : the rDNA bank configured doesn't exist. Please correct it in the REPET_PROT_BANK variable"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
85 sys.exit(1)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
86 else :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
87 self._rdna_bank = ""
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
88 if self._nucl_bank == "" and self._prot_bank == "" and self._HMM_profiles == "" and self._rdna_bank == "" :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
89 print "WARNING : No bank are configured ... To set banks please add REPET_NUCL_BANK, REPET_PROT_BANK, REPET_HMM_PROFILES and/or REPET_RDNA_BANK in your environment"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
90 if "REPET_TMP_DIR" in os.environ.keys():
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
91 self._tmp_dir = os.environ["REPET_TMP_DIR"]
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
92 else :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
93 self._tmp_dir = ""
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
94 self._outputFasta = ""
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
95 self._classif = False
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
96 self._outputClassif = ""
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
97 self._outputStats = ""
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
98 self._verbosity = verbosity
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
99 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
100
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
101 def setAttributesFromCommandLine(self):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
102 description = "This script is a ligth version of TEdenovo. It writes configuration file and launches TEdenovo."
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
103 epilog = "Example: TEdenovo_lite.py -i fastaFileName \n"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
104 version = "2.0"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
105 parser = RepetOptionParser(description = description, epilog = epilog, version = version)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
106 parser.add_option("-i", "--fasta", dest = "fastaFileName" , action = "store" , type = "string", help ="input fasta file name ", default = "")
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
107 parser.add_option("-c", "--withClassif", dest="withClassif", action="store_true", help = " Get classification files in output.", default = False)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
108 parser.add_option("-o", "--output", dest="outputLabel" , action = "store", type = "string", help = "[optional] Prefix label for output file(s).", default = "")
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
109 parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "Verbosity [optional] [default: 2]", default = 2)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
110 options = parser.parse_args()[0]
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
111 self._setAttributesFromOptions(options)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
112
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
113 def _setAttributesFromOptions(self, options):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
114 self.setConfigFileName("")
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
115 if options.fastaFileName=="":
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
116 print "ERROR : You have to enter an input fasta file"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
117 print "Example: TEdenovo_lite.py -i fastaFileName \n"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
118 print "More option : TEdenovo_lite.py --help "
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
119 exit(1)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
120 else :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
121 self._fastaFileName = os.path.abspath(options.fastaFileName)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
122 if options.outputLabel=="":
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
123 fastaBaseName=os.path.abspath(re.search(r'([^\/\\]*)\.[fa|fasta|fsa|fas]',options.fastaFileName).groups()[0])
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
124 options.outputLabel=fastaBaseName
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
125 self._outputFasta = os.path.abspath(options.outputLabel+"-%s-denovoLibTEs_filtered.fa"%self._projectName[:8])
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
126 self._outputStats = os.path.abspath(options.outputLabel+"-%s-classif_stats.txt"%self._projectName[:8])
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
127 self._verbosity = options.verbosity
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
128 if options.withClassif :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
129 self._classif=True
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
130 self._outputClassif = os.path.abspath(options.outputLabel+'-%s.classif'%self._projectName[:8])
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
131
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
132 def setConfigFileName(self, configFileName):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
133 self._configFileName = configFileName
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
134 if not self._configFileName:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
135 self._configFileName = "TEdenovo_Galaxy_config_%s" % self._projectName
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
136
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
137 def setAttributesFromConfigFile(self, configFileName):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
138 config = ConfigParser.ConfigParser()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
139 config.readfp( open(configFileName) )
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
140
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
141
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
142 def _writeConfigFile(self):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
143 if FileUtils.isRessourceExists(self._configFileName):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
144 self._logAndRaise("Configuration file '%s' already exists. Won't be overwritten.")
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
145
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
146 shutil.copy("%s/config/TEdenovo.cfg" % os.environ.get("REPET_PATH"), self._configFileName)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
147 self.setAttributesFromConfigFile(self._configFileName)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
148
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
149 os.system("sed -i 's|repet_host: <your_MySQL_host>|repet_host: %s|' %s" % (os.environ["REPET_HOST"], self._configFileName))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
150 os.system("sed -i 's|repet_user: <your_MySQL_login>|repet_user: %s|' %s" % (os.environ["REPET_USER"], self._configFileName))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
151 os.system("sed -i 's|repet_pw: <your_MySQL_password>|repet_pw: %s|' %s" % (os.environ["REPET_PW"], self._configFileName))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
152 os.system("sed -i 's|repet_db: <your_MySQL_db>|repet_db: %s|' %s" % (os.environ["REPET_DB"], self._configFileName))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
153 os.system("sed -i 's|repet_port: 3306|repet_port: %s|' %s" % (os.environ["REPET_PORT"], self._configFileName))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
154 os.system("sed -i 's|repet_job_manager: SGE|repet_job_manager: %s|' %s" % (os.environ["REPET_JOB_MANAGER"], self._configFileName))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
155 os.system("sed -i 's|project_name: <your_project_name>|project_name: %s|' %s" % (self._projectName, self._configFileName))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
156 os.system("sed -i 's|project_dir: <absolute_path_to_your_project_directory>|project_dir: %s|' %s" % (os.getcwd().replace("/", "\/"), self._configFileName))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
157 os.system("sed -i 's|tmpDir:|tmpDir: %s|g' %s" % (self._tmp_dir, self._configFileName))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
158
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
159 if self._nucl_bank != "" and self._nucl_bank != None:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
160 os.system("sed -i 's|TE_BLRn: no|TE_BLRn: yes|' %s" % self._configFileName)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
161 os.system("sed -i 's|TE_BLRtx: no|TE_BLRtx: yes|' %s" % self._configFileName)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
162 os.system("sed -i 's|TE_nucl_bank: <bank_of_TE_nucleotide_sequences_such_as_Repbase>|TE_nucl_bank: %s|' %s" % (os.path.basename(self._nucl_bank), self._configFileName))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
163
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
164 if self._prot_bank != "" and self._prot_bank != None:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
165 os.system("sed -i 's|TE_BLRx: no|TE_BLRx: yes|' %s" % self._configFileName)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
166 os.system("sed -i 's|TE_prot_bank: <bank_of_TE_amino-acid_sequences_such_as_Repbase>|TE_prot_bank: %s|' %s" % (os.path.basename(self._prot_bank), self._configFileName))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
167
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
168 if self._HMM_profiles != "" and self._HMM_profiles != None:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
169 os.system("sed -i 's|TE_HMMER: no|TE_HMMER: yes|' %s" % self._configFileName)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
170 os.system("sed -i 's|TE_HMM_profiles: <bank_of_HMM_profiles>|TE_HMM_profiles: %s|' %s" % (os.path.basename(self._HMM_profiles),self._configFileName))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
171
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
172 if self._rdna_bank != "" and self._rdna_bank != None:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
173 os.system("sed -i 's|rDNA_BLRn: no|rDNA_BLRn: yes|' %s" % self._configFileName)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
174 os.system("sed -i 's|rDNA_bank: <bank_of_rDNA_sequences_from_eukaryota>|rDNA_bank: %s|' %s" % (os.path.basename(self._rdna_bank),self._configFileName))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
175
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
176 os.system("sed -i 's|filter_host_gene: no|filter_host_gene: yes|' %s" % (self._configFileName))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
177
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
178
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
179 def removeNstretches(self,maxNstretchesSize=11,minContigsize=10000):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
180 if self._verbosity > 0:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
181 print "Removing Nstretches longer than %d pb and removing conting shorter than %d pb"%(maxNstretchesSize,minContigsize)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
182 t0=time.time()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
183 Nstretches=FastaUtils.getNstretchesRangesList(self._fastaFileName,maxNstretchesSize)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
184 t1=time.time()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
185 refBSDB = BioseqDB(self._fastaFileName)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
186 t3=time.time()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
187 debut=1
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
188 t2=time.time()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
189 if len(Nstretches)>0:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
190 currentchrom=Nstretches[0].seqname
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
191 refBS=refBSDB.fetch(currentchrom)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
192 t3=time.time()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
193 newBSDB = BioseqDB()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
194 i=0
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
195 seqInNstretches = []
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
196 for Nstretch in Nstretches :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
197 i+=1
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
198 tmpBS=""
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
199 if Nstretch.seqname not in seqInNstretches :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
200 seqInNstretches.append(Nstretch.seqname)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
201 if currentchrom==Nstretch.seqname:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
202 fin=Nstretch.start-1
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
203 size=fin-debut+1
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
204 if size>minContigsize :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
205 tmpBS=refBS.subseq(debut,fin)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
206 newBSDB.add(tmpBS)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
207
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
208 debut=Nstretch.end+1
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
209
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
210 else :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
211 fin = refBSDB.getSeqLength(currentchrom)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
212 size=fin-debut+1
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
213 if size>minContigsize :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
214 tmpBS=refBS.subseq(debut,fin)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
215 newBSDB.add(tmpBS)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
216 currentchrom=Nstretch.seqname
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
217 refBS=refBSDB.fetch(currentchrom)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
218 debut=1
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
219 fin==Nstretch.start
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
220 size=fin-debut+1
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
221 if size>minContigsize :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
222 tmpBS=refBS.subseq(debut,fin)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
223 newBSDB.add(tmpBS)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
224 debut=Nstretch.end+1
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
225
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
226 if len(Nstretches)>0:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
227 fin = refBSDB.getSeqLength(currentchrom)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
228 size=fin-debut+1
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
229 if size>minContigsize :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
230 tmpBS=refBS.subseq(debut,fin)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
231 newBSDB.add(tmpBS)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
232
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
233 for refName in refBSDB.getHeaderList() :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
234 if refName not in seqInNstretches:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
235 debut=1
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
236 fin=refBSDB.getSeqLength(refName)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
237 size=fin-debut+1
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
238 if size>minContigsize :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
239 refBS=refBSDB.fetch(refName)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
240 tmpBS=refBS.subseq(debut,fin)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
241 newBSDB.add(tmpBS)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
242
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
243
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
244 t5b=time.time()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
245 if self._verbosity >= 2:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
246 print "%s contigs selected from %s scaffolds"%(newBSDB.getSize(),refBSDB.getSize())
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
247 #newBSDB.sortByLength(reverse=True)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
248
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
249 return newBSDB
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
250
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
251
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
252
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
253 #TODO refactoring about min size of genome for preprocess
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
254 def selectContigs4givenSize(self,BSDB,limit=200000000):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
255 if self._verbosity > 0:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
256 print "Selecting contigs to reach %s pb "%limit
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
257 contigsHeadersAndLength=zip(BSDB.getHeaderList(),BSDB.getListOfSequencesLength())
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
258 size=0
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
259 size_small=0
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
260 size_big=500000000
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
261 lselectedContigs=[]
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
262
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
263 for seq in BSDB.db :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
264 size+=seq.getLength()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
265 if size<limit:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
266 lselectedContigs.append(seq)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
267 size_small=size
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
268 else :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
269 size_big=size
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
270 break
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
271
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
272 if size_big-limit<limit-size_small :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
273 lselectedContigs.append(seq)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
274
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
275 if self._verbosity > 0:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
276 print "%s contigs selected to reach %s pb (%s contigs initially) "%(len(lselectedContigs),limit,len(contigsHeadersAndLength))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
277
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
278 selectedContigsBSDB=BioseqDB()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
279 selectedContigsBSDB.setData(lselectedContigs)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
280 return selectedContigsBSDB
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
281
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
282 def writeFastaInput(self,BSDB,outFileName=''):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
283 if self._verbosity > 0:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
284 print "Writing fasta file"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
285
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
286 if not outFileName:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
287 outFileName = self._projectName + ".fastaExtract"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
288
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
289 BSDB.save(outFileName)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
290 if self._verbosity > 0:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
291 print '%d sequences saved.'%BSDB.getSize()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
292
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
293 return outFileName
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
294
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
295 def correctHeader(self,BSDB):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
296 if self._verbosity > 0:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
297 print "Correcting fasta headers"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
298 replacedSeqNb=0
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
299 for header in BSDB.getHeaderList() :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
300 p = re.compile('[^a-zA-Z0-9_:\.\-]', re.IGNORECASE)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
301 if p.search(header):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
302 sub=list(set(p.findall(header)))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
303 correctedHeader=header
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
304 for s in sub :
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
305 correctedHeader=correctedHeader.replace(s,'_')
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
306 if self._verbosity>2:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
307 print "Correct Header : '%s' replaced by '%s'"%(header,correctedHeader)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
308 BSDB.fetch(header).setHeader(correctedHeader)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
309 replacedSeqNb+=1
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
310 if self._verbosity > 0:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
311 print '%s sequence headers corrected'%replacedSeqNb
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
312 return BSDB
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
313
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
314
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
315 def _launchTEdenovo(self):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
316 print "START time: %s" % time.strftime("%Y-%m-%d %H:%M:%S")
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
317 lCmds = []
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
318 lCmds.append( "TEdenovo.py -P %s -C %s -S 1 -v %i" % (self._projectName, self._configFileName, self._verbosity) )
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
319 lCmds.append( "TEdenovo.py -P %s -C %s -S 2 -s Blaster -v %i" % (self._projectName, self._configFileName, self._verbosity) )
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
320 lCmds.append( "TEdenovo.py -P %s -C %s -S 3 -s Blaster -c Grouper -v %i" % (self._projectName, self._configFileName, self._verbosity) )
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
321 lCmds.append( "TEdenovo.py -P %s -C %s -S 3 -s Blaster -c Recon -v %i" % (self._projectName, self._configFileName, self._verbosity) )
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
322 lCmds.append( "TEdenovo.py -P %s -C %s -S 3 -s Blaster -c Piler -v %i" % (self._projectName, self._configFileName, self._verbosity) )
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
323 lCmds.append( "TEdenovo.py -P %s -C %s -S 4 -s Blaster -c Grouper -m Map -v %i" % (self._projectName, self._configFileName, self._verbosity) )
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
324 lCmds.append( "TEdenovo.py -P %s -C %s -S 4 -s Blaster -c Recon -m Map -v %i" % (self._projectName, self._configFileName, self._verbosity) )
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
325 lCmds.append( "TEdenovo.py -P %s -C %s -S 4 -s Blaster -c Piler -m Map -v %i" % (self._projectName, self._configFileName, self._verbosity) )
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
326 lCmds.append( "TEdenovo.py -P %s -C %s -S 5 -s Blaster -c GrpRecPil -m Map -v %i" % (self._projectName, self._configFileName, self._verbosity) )
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
327 lCmds.append( "TEdenovo.py -P %s -C %s -S 6 -s Blaster -c GrpRecPil -m Map -v %i" % (self._projectName, self._configFileName, self._verbosity) )
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
328 lCmds.append( "TEdenovo.py -P %s -C %s -S 7 -s Blaster -c GrpRecPil -m Map -v %i" % (self._projectName, self._configFileName, self._verbosity) )
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
329
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
330 for cmd in lCmds:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
331 returnValue = os.system(cmd)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
332 if returnValue != 0:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
333 print "ERROR: command '%s' returned %i" % (cmd, returnValue)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
334 self._cleanTables()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
335 sys.exit(1)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
336
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
337 print "END time: %s" % time.strftime("%Y-%m-%d %H:%M:%S")
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
338 outFastaFile = glob.glob("%s_Blaster_GrpRecPil_Map_TEclassif_Filtered/*_denovoLibTEs_filtered.fa"%self._projectName)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
339 shutil.copy(outFastaFile[0], self._outputFasta)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
340 outStatsFile = glob.glob("%s_Blaster_GrpRecPil_Map_TEclassif_Filtered/*.classif_stats.txt"%self._projectName)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
341 shutil.copy(outStatsFile[0], self._outputStats)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
342 if self._classif:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
343 outClassifFile = glob.glob("%s_Blaster_GrpRecPil_Map_TEclassif/classifConsensus/*_withoutRedundancy_negStrandReversed_WickerH.classif"%self._projectName)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
344 shutil.copy(outClassifFile[0], self._outputClassif)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
345 self._renameTE()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
346
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
347 def _renameTE(self):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
348 name=re.search(r'([^\/\\]*)-\d{8}-denovoLibTEs_filtered\.[fa|fasta|fsa|fas]',self._outputFasta).groups()[0]
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
349 os.system("sed -i 's|%s|%s|' %s" % (self._projectName,name, self._outputFasta))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
350 if self._classif:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
351 os.system("sed -i 's|%s|%s|' %s" % (self._projectName,name, self._outputClassif))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
352
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
353 def preprocessFastaFile(self):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
354 inFileHandler = open(self._fastaFileName, "r")
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
355 cumulLength = FastaUtils.dbCumLength(inFileHandler)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
356 inFileHandler.close()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
357 if cumulLength >= self._limitSeqSize:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
358 print "Preprocess lauched"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
359 allContigsBSDB=self.removeNstretches()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
360 selectedContigsBSDB=self.selectContigs4givenSize(allContigsBSDB)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
361 self.correctHeader(selectedContigsBSDB)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
362 fastaFile=self.writeFastaInput(selectedContigsBSDB)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
363 print "Preprocess finished"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
364 else:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
365 fastaFile=self._fastaFileName
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
366 print "No preprocess : the genome size %s lower than %s Mbp" % (cumulLength, self._limitSeqSize/1000000)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
367 os.symlink(fastaFile,"%s/%s.fa" %(os.getcwd(),self._projectName)) #creer repertoire projet
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
368
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
369 def _launchListAndDropTables(self):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
370 cmd = "ListAndDropTables.py"
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
371 cmd += " -C %s" % self._configFileName
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
372 cmd += " -d '%s'" % self._projectName
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
373 os.system(cmd)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
374
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
375 def _cleanJobsTable(self):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
376 db = DbFactory.createInstance( configFileName = self._configFileName )
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
377 sql_cmd="DELETE FROM jobs WHERE groupid like '%s%%';"%self._projectName
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
378 db.execute( sql_cmd )
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
379 db.close()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
380
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
381 def _cleanTables(self):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
382 self._launchListAndDropTables()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
383 self. _cleanJobsTable()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
384
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
385
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
386 def run(self):
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
387 os.mkdir(self._projectName)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
388 os.chdir(self._projectName)
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
389 self._writeConfigFile()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
390 self.preprocessFastaFile()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
391 if self._nucl_bank != "" and self._nucl_bank != None:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
392 os.symlink(self._nucl_bank,"%s/%s" %(os.getcwd(),os.path.basename(self._nucl_bank)))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
393 if self._prot_bank != "" and self._prot_bank != None:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
394 os.symlink(self._prot_bank,"%s/%s" %(os.getcwd(),os.path.basename(self._prot_bank)))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
395 if self._HMM_profiles != "" and self._HMM_profiles != None:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
396 os.symlink(self._HMM_profiles,"%s/%s" %(os.getcwd(),os.path.basename(self._HMM_profiles)))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
397 if self._rdna_bank != "" and self._rdna_bank != None:
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
398 os.symlink(self._rdna_bank,"%s/%s" %(os.getcwd(),os.path.basename(self._rdna_bank)))
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
399
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
400 self._launchTEdenovo()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
401 self._cleanTables()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
402
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
403 if __name__ == '__main__':
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
404 iTEdenovo = TEdenovo_lite()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
405 iTEdenovo.setAttributesFromCommandLine()
baea09e6722b 1st Uploaded
vmarcon
parents:
diff changeset
406 iTEdenovo.run()