Mercurial > repos > yufei-luo > s_mart
comparison commons/pyRepetUnit/blastnForClassifierStep1/RepbaseBLRnForClassifierStep1.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
comparison
equal
deleted
inserted
replaced
30:5677346472b5 | 31:0ab839023fe4 |
---|---|
1 """ | |
2 Launch Blaster and then Matcher to compare the input sequences with known TEs via blastn and record the results into a MySQL table. | |
3 """ | |
4 | |
5 import os | |
6 import ConfigParser | |
7 from commons.core.utils.FileUtils import FileUtils | |
8 from commons.core.LoggerFactory import LoggerFactory | |
9 | |
10 LOG_DEPTH = "repet.tools" | |
11 | |
12 class RepbaseBLRnForClassifierStep1( object ): | |
13 | |
14 """ | |
15 Launch Blaster and then Matcher to compare the input sequences with known TEs via blastn and record the results into a MySQL table. | |
16 | |
17 @param inFileName: name of the input fasta file | |
18 @type inFileName: string | |
19 | |
20 @param launch_1: generic command at the beginning of a specific command | |
21 @type launch_1: string | |
22 | |
23 @param launch_2: generic command at the end of a specific command | |
24 @type launch_2: string | |
25 | |
26 @return: all the commands to run the job | |
27 @rtype: string | |
28 | |
29 @param cDir: current directory (where to retrieve the result files) | |
30 @ype cDir: string | |
31 | |
32 @param tmpDir: temporary directory (where the job will run) | |
33 @type tmpDir: string | |
34 | |
35 @param configFileName: configuration file name | |
36 @type configFileName: string | |
37 | |
38 @param logger: a logger Instance | |
39 @type logger: logger | |
40 | |
41 @param verbose: verbose(0/1/2) | |
42 @type verbose: int | |
43 | |
44 @param pL: program launcher | |
45 @type pL: programLauncher Instance | |
46 | |
47 @param project: project name | |
48 @type project: string | |
49 | |
50 """ | |
51 | |
52 def __init__(self, inFileName, launch_1, launch_2, cDir, tmpDir, configFileName, verbose, pL, project): | |
53 """ | |
54 Constructor | |
55 """ | |
56 self._inFileName = inFileName | |
57 self._launch_1 = launch_1 | |
58 self._launch_2 = launch_2 | |
59 self._cDir = cDir | |
60 self._tmpDir = tmpDir | |
61 self._verbose = verbose | |
62 self._pL = pL | |
63 self._project = project | |
64 self._fileUtils = FileUtils() | |
65 self._config = ConfigParser.ConfigParser() | |
66 self._configFileName = configFileName | |
67 self._config.readfp( open(self._configFileName) ) | |
68 self._bank = self._config.get("detect_features","TE_nucl_bank") | |
69 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbose) | |
70 | |
71 def formatRepbase_ntIfNecessary( self ): | |
72 """ | |
73 Format Repbase (make 'cut' files). | |
74 """ | |
75 if not os.path.exists( "%s_cut" % ( self._bank ) ): | |
76 self._log.debug("prepare bank '%s'..." % ( self._bank )) | |
77 prg = os.environ["REPET_PATH"] + "/bin/blaster" | |
78 cmd = prg | |
79 cmd += " -s %s" % ( self._bank ) | |
80 cmd += " -n blastn" | |
81 if self._config.get("detect_features","wublast") == "yes": | |
82 cmd += " -W" | |
83 cmd += " -r" | |
84 cmd += " -P" | |
85 self._pL.launch( prg, cmd ) | |
86 os.system( "rm -f %s-blastn-*.param" % ( self._bank ) ) | |
87 | |
88 def createCmdToLaunch( self ): | |
89 cmd = self._launch_1 + os.environ["REPET_PATH"] + "/bin/blaster" | |
90 cmd += " -q %s" % ( self._inFileName ) | |
91 cmd += " -s %s/%s" % ( self._cDir, self._bank ) | |
92 cmd += " -B %s_BLRn_%s" % ( self._inFileName, self._bank ) | |
93 cmd += " -n blastn" | |
94 if self._config.get("detect_features","wublast") == "yes": | |
95 cmd += " -W" | |
96 cmd += " -r" | |
97 cmd += " -v 1" | |
98 cmd += self._launch_2 | |
99 | |
100 cmd += "if not os.path.exists( \"%s/%s_BLRn_%s.param\" ):\n" % ( self._cDir, self._inFileName, self._bank ) | |
101 cmd += "\tos.system( \"mv %s_BLRn_%s.param %s\" )\n" % ( self._inFileName, self._bank, self._cDir ) | |
102 cmd += "if os.path.exists( \"%s_cut\" ):\n" % ( self._inFileName ) | |
103 cmd += "\tos.system( \"rm -f %s_cut*\" )\n" % ( self._inFileName ) | |
104 cmd += "if os.path.exists( \"%s.Nstretch.map\" ):\n" % ( self._inFileName ) | |
105 cmd += "\tos.remove( \"%s.Nstretch.map\" )\n" % ( self._inFileName ) | |
106 cmd += "if os.path.exists( \"%s_BLRn_%s.raw\" ):\n" % ( self._inFileName, self._bank ) | |
107 cmd += "\tos.remove( \"%s_BLRn_%s.raw\" )\n" % ( self._inFileName, self._bank ) | |
108 cmd += "if os.path.exists( \"%s_BLRn_%s.seq_treated\" ):\n" % ( self._inFileName, self._bank ) | |
109 cmd += "\tos.remove( \"%s_BLRn_%s.seq_treated\" )\n" % ( self._inFileName, self._bank ) | |
110 | |
111 cmd += self._launch_1 | |
112 cmd += os.environ["REPET_PATH"] + "/bin/matcher" | |
113 cmd += " -m %s_BLRn_%s.align" % ( self._inFileName, self._bank ) | |
114 cmd += " -q %s" % ( self._inFileName ) | |
115 cmd += " -s %s/%s" % ( self._cDir, self._bank ) | |
116 cmd += " -j" | |
117 cmd += " -v 1" | |
118 cmd += self._launch_2 | |
119 | |
120 cmd += "if not os.path.exists( \"%s/%s_BLRn_%s.align.clean_match.path\" ):\n" % ( self._cDir, self._inFileName, self._bank ) | |
121 cmd += "\tos.system( \"mv %s_BLRn_%s.align.clean_match.path %s\" )\n" % ( self._inFileName, self._bank, self._cDir ) | |
122 cmd += "if not os.path.exists( \"%s/%s_BLRn_%s.align.clean_match.param\" ):\n" % ( self._cDir, self._inFileName, self._bank ) | |
123 cmd += "\tos.system( \"mv %s_BLRn_%s.align.clean_match.param %s\" )\n" % ( self._inFileName, self._bank, self._cDir ) | |
124 cmd += "if os.path.exists( \"%s_BLRn_%s.align\" ):\n" % ( self._inFileName, self._bank ) | |
125 cmd += "\tos.remove( \"%s_BLRn_%s.align\" )\n" % ( self._inFileName, self._bank ) | |
126 cmd += "if os.path.exists( \"%s_BLRn_%s.align.clean_match.fa\" ):\n" % ( self._inFileName, self._bank ) | |
127 cmd += "\tos.remove( \"%s_BLRn_%s.align.clean_match.fa\" )\n" % ( self._inFileName, self._bank ) | |
128 cmd += "if os.path.exists( \"%s_BLRn_%s.align.clean_match.map\" ):\n" % ( self._inFileName, self._bank ) | |
129 cmd += "\tos.remove( \"%s_BLRn_%s.align.clean_match.map\" )\n" % ( self._inFileName, self._bank ) | |
130 cmd += "if os.path.exists( \"%s_BLRn_%s.align.clean_match.tab\" ):\n" % ( self._inFileName, self._bank ) | |
131 cmd += "\tos.remove( \"%s_BLRn_%s.align.clean_match.tab\" )\n" % ( self._inFileName, self._bank ) | |
132 | |
133 if self._tmpDir != self._cDir: | |
134 cmd += "if os.path.exists( \"%s\" ):\n" % ( self._bank ) | |
135 cmd += "\tos.remove( \"%s\" )\n" % ( self._bank ) | |
136 | |
137 return cmd | |
138 | |
139 def collectRepbaseBLRn( self ): | |
140 """ | |
141 Concatenate the outputs of blastn, adapt the ID and load the results into a table. | |
142 """ | |
143 bankFull = self._bank | |
144 bankPath, bank = os.path.split( bankFull ) | |
145 self._concatPathFile(bank) | |
146 self._adaptIDInPathFile(bank) | |
147 self._loadPathFileInTable(bank) | |
148 self._findAndRemoveUselessFiles(bank) | |
149 | |
150 def _concatPathFile(self, bank): | |
151 FileUtils.catFilesByPattern("../batch_*.fa_BLRn_%s.align.clean_match.path" % bank, | |
152 "%s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank)) | |
153 | |
154 def _adaptIDInPathFile(self, bank): | |
155 if os.path.exists(os.environ["REPET_PATH"] + "/bin/pathnum2id"): | |
156 prg = os.environ["REPET_PATH"] + "/bin/pathnum2id" | |
157 cmd = prg | |
158 cmd += " -i %s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank) | |
159 cmd += " -o %s_BLRn_%s.align.clean_match.path" % (self._project, bank) | |
160 cmd += " -v %i" % (self._verbose - 1) | |
161 self._pL.launch(prg, cmd) | |
162 else: | |
163 prg = os.environ["REPET_PATH"] + "/bin/pathnum2id.py" | |
164 cmd = prg | |
165 cmd += " -i %s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank) | |
166 cmd += " -o %s_BLRn_%s.align.clean_match.path" % (self._project, bank) | |
167 self._pL.launch(prg, cmd) | |
168 | |
169 def _loadPathFileInTable(self, bank): | |
170 prg = os.environ["REPET_PATH"] + "/bin/srptCreateTable.py" | |
171 cmd = prg | |
172 cmd += " -f %s_BLRn_%s.align.clean_match.path" % (self._project, bank) | |
173 cmd += " -n %s_TE_BLRn_path" % (self._project) | |
174 cmd += " -t path" | |
175 cmd += " -c ../%s" % (self._configFileName) | |
176 self._pL.launch(prg, cmd) | |
177 | |
178 def _findAndRemoveUselessFiles(self, bank): | |
179 prg = "find" | |
180 cmd = prg | |
181 cmd += " .. -name \"batch_*.fa_BLRn_%s.*\" -exec rm {} \;" % (bank) | |
182 self._pL.launch(prg, cmd) | |
183 prg = "rm" | |
184 cmd = prg | |
185 cmd += " %s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank) | |
186 self._pL.launch(prg, cmd) |