6
|
1 #!/usr/bin/env perl
|
|
2 #
|
|
3 # hhblits.pl
|
|
4 # Creates HH-suite database files from A3M and HHM/HMMER-formatted files
|
|
5 # Usage: Usage: perl hhblitsdb.pl -o <db_name> [-ia3m <a3m_dir>] [-ihhm <hhm_dir>] [-ics <cs_dir>] [more_options]
|
|
6 #
|
|
7 # HHsuite version 2.0.16 (Sept 2012)
|
|
8 #
|
|
9 # Reference:
|
|
10 # Remmert M., Biegert A., Hauser A., and Soding J.
|
|
11 # HHblits: Lightning-fast iterative protein sequence searching by HMM-HMM alignment.
|
|
12 # Nat. Methods, epub Dec 25, doi: 10.1038/NMETH.1818 (2011).
|
|
13
|
|
14 # (C) Johannes Soeding, 2012
|
|
15
|
|
16 # This program is free software: you can redistribute it and/or modify
|
|
17 # it under the terms of the GNU General Public License as published by
|
|
18 # the Free Software Foundation, either version 3 of the License, or
|
|
19 # (at your option) any later version.
|
|
20
|
|
21 # This program is distributed in the hope that it will be useful,
|
|
22 # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
23 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
24 # GNU General Public License for more details.
|
|
25
|
|
26 # You should have received a copy of the GNU General Public License
|
|
27 # along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
28
|
|
29 # We are very grateful for bug reports! Please contact us at soeding@genzentrum.lmu.de
|
|
30
|
|
31 use lib $ENV{"HHLIB"}."/scripts";
|
|
32 use HHPaths; # config file with path variables for nr, blast, psipred, pdb, dssp etc.
|
|
33 use strict;
|
|
34 #use File::Glob 'bsd_glob'; # splits patterns delimited by spaces into multiple patterns and applies them using OR
|
|
35
|
|
36 $|= 1; # Activate autoflushing on STDOUT
|
|
37
|
|
38 # Default values:
|
|
39 our $v=2; # verbose mode
|
|
40 my $a_if_append = ""; # do not append by default (default: create new db)
|
|
41 my $remove = 0; # do not remove by default (default: create new db)
|
|
42 my $hhmext = "hhm"; # default HHM-file extension
|
|
43 my $csext = "seq219"; # default HHM-file extension
|
|
44 my $cpu = 8;
|
|
45
|
|
46 # Variable declarations
|
|
47 my $line;
|
|
48 my $command;
|
|
49 my $a3mdir = ""; # name of input A3M directory
|
|
50 my $hhmdir = ""; # name of input HHM/HMM directory
|
|
51 my $csdir = ""; # name of input cs directory
|
|
52 my $a3mfile = ""; # name of packed ouput A3M file
|
|
53 my $hhmfile = ""; # name of packed ouput HHM file
|
|
54 my $csfile = ""; # name of cs sequence db file
|
|
55 my $dbname = ""; # output db name
|
|
56 my $logfile = "/dev/null"; # log file
|
|
57 my $file;
|
|
58 my $numcsfiles= 0;
|
|
59 my $num_chars = 0;
|
|
60 my $numa3mfiles=0;
|
|
61 my $numhhmfiles=0;
|
|
62 my $fileglob="";
|
|
63 my $help="
|
|
64 hhblitsdb.pl from HHsuite $VERSION
|
|
65 Builds HH-suite database from a3m formatted MSAs and/or from HMMs (-o).
|
|
66 MSAs and HMMs can also be added (-a) to or removed (-r) from an existing database.
|
|
67
|
|
68 Usage: hhblitsdb.pl -o|-a|-r <db_name> [-ia3m <a3m_dir>] [-ihhm <hhm_dir>] [-ics <cs_dir>]...
|
|
69
|
|
70 With option -o, the following HH-suite database files can be generated:
|
|
71 <db_name>.cs219 column-state sequences, one for each MSA/HMM (for prefilter)
|
|
72 <db_name>.cs219.sizes number of sequences and characters in <db_name>.cs219
|
|
73 <db_name>_a3m_db packed file containing A3M alignments read from <a3m_dir>
|
|
74 <db_name>_a3m_db.index index file for packed A3M file
|
|
75 <db_name>_a3m.db.index.sizes number of lines in <db_name>_a3m_db.index
|
|
76 <db_name>_hhm_db packed file containing HHM-formatted HMMs read from <hhm_dir>
|
|
77 <db_name>_hhm_db.index index file for packed HHM file
|
|
78 <db_name>_hhm_db.index.sizes number of lines in <db_name>_hhm_db.index
|
|
79
|
|
80 Options:
|
|
81 -o <db_name> create database with this name
|
|
82 -a <db_name> append files to database with this name
|
|
83 -r <db_name> remove files from database with this name
|
|
84 -ia3m <a3m_dir> input directory (or glob of directories) with A3M-formatted files
|
|
85 These files MUST have extension 'a3m'.
|
|
86 -ihhm <hhm_dir> input directory (or glob of directories) with HHM (or HMMER) files
|
|
87 These files MUST have extension 'hhm' (HHsuite) or 'hmm' (HMMER3).
|
|
88 -ics <cs_dir> input directory (or glob of directories) with column state sequences
|
|
89 -log <logfile> log file recording stderr stream of cstranslate and hhmake commands
|
|
90 -csext <ext> extension of column state sequences (default: $csext)
|
|
91 -hmm use HMMER-formatted files. These MUST have extension hmm
|
|
92 (WARNING! HMMER format results in decreased performance over HHM format)
|
|
93 -v [1-3] verbose mode (default: $v)
|
|
94 -cpu <int> number of threads to generate cs219 and hhm files (default = $cpu)
|
|
95 -f 'file_glob' string with list of glob expression of files to remove
|
|
96
|
|
97 Example 1: only -ia3m given; cs sequences and hhm files are generated from a3m files
|
|
98 perl hhblitsdb.pl -o databases/mydb -ia3m mydb/a3ms/
|
|
99
|
|
100 Example 2: only -ihhm given; cs sequences are generated from hhm files, but no a3m db file
|
|
101 perl hhblitsdb.pl -o databases/mydb -ihhm mydb/hhms/
|
|
102
|
|
103 Example 3: -ia3m and -ihhm given; cs sequences are generated from a3m files
|
|
104 perl hhblitsdb.pl -o databases/mydb -ia3m mydb/a3ms/ -ihhm mydb/hhms/
|
|
105
|
|
106 Example 4: -ics, -ia3m, and -ihhm given; all db files are created
|
|
107 perl hhblitsdb.pl -o databases/mydb -ia3m mydb/a3ms/ -ihhm mydb/hhms/ -ics mydb/cs/
|
|
108
|
|
109 Example 5: using glob expression to specify files (note the singe quotes)
|
|
110 perl hhblitsdb.pl -o databases/mydb -ihhm 'mydbs*/hhms/*.hhm'
|
|
111
|
|
112 Example 6: add files to database; cs sequences and hhm files are generated from a3m files
|
|
113 perl hhblitsdb.pl -a databases/mydb -ia3m 'mydbs/a3ms/g1a*.a3m'
|
|
114
|
|
115 Example 7: remove files from database
|
|
116 perl hhblitsdb.pl -r databases/mydb -f 'mydbs/a3ms/g1a*.* mydbs2/'
|
|
117 \n";
|
|
118
|
|
119
|
|
120 ###############################################################################################
|
|
121 # Processing command line input
|
|
122 ###############################################################################################
|
|
123
|
|
124 if (@ARGV<1) {die ($help);}
|
|
125
|
|
126 for (my $i=0; $i<@ARGV; $i++) {
|
|
127 if ($ARGV[$i] eq "-ics") {
|
|
128 if (++$i<@ARGV) {
|
|
129 $csdir=$ARGV[$i];
|
|
130 } else {
|
|
131 die ("$help\n\nERROR! Missing directory after -ics option!\n");
|
|
132 }
|
|
133 } elsif ($ARGV[$i] eq "-ia3m") {
|
|
134 if (++$i<@ARGV) {
|
|
135 $a3mdir=$ARGV[$i];
|
|
136 } else {
|
|
137 die ("$help\n\nERROR! Missing directory after -ia3m option!\n");
|
|
138 }
|
|
139 } elsif ($ARGV[$i] eq "-ihhm") {
|
|
140 if (++$i<@ARGV) {
|
|
141 $hhmdir=$ARGV[$i];
|
|
142 } else {
|
|
143 die ("$help\n\nERROR! Missing directory after -ihhm option!\n");
|
|
144 }
|
|
145 } elsif ($ARGV[$i] eq "-log") {
|
|
146 if (++$i<@ARGV) {
|
|
147 $logfile=$ARGV[$i];
|
|
148 unlink $logfile;
|
|
149 } else {
|
|
150 die ("$help\n\nERROR! Missing filename after -log option!\n");
|
|
151 }
|
|
152 } elsif ($ARGV[$i] eq "-csext") {
|
|
153 if (++$i<@ARGV) {
|
|
154 $csext=$ARGV[$i];
|
|
155 } else {
|
|
156 die ("$help\n\nERROR! Missing extension after -csext option!\n");
|
|
157 }
|
|
158 } elsif ($ARGV[$i] eq "-hmm") {
|
|
159 $hhmext="hmm";
|
|
160 print("\nWARNING! HMMER format results in decreased performance over HHM format. We recommend to generate hhm files directly from multiple sequence alignments using hmake.\n");
|
|
161 } elsif ($ARGV[$i] eq "-v") {
|
|
162 if (++$i<@ARGV) {
|
|
163 $v=$ARGV[$i];
|
|
164 } else {
|
|
165 $v = 2;
|
|
166 }
|
|
167 } elsif ($ARGV[$i] eq "-cpu") {
|
|
168 if (++$i<@ARGV) {
|
|
169 $cpu=$ARGV[$i];
|
|
170 }
|
|
171 } elsif ($ARGV[$i] eq "-f") {
|
|
172 if (++$i<@ARGV) {
|
|
173 $fileglob=$ARGV[$i];
|
|
174 } else {
|
|
175 die ("$help\n\nERROR! Missing expression after -f option!\n");
|
|
176 }
|
|
177 } elsif ($ARGV[$i] eq "-r") {
|
|
178 if (++$i<@ARGV) {
|
|
179 if ($dbname!="") {die("$help\n\nERROR! options -o and -r not compatible!\n");}
|
|
180 $dbname=$ARGV[$i];
|
|
181 $remove=1;
|
|
182 } else {
|
|
183 die ("$help\n\nERROR! Missing filename after -o option!\n");
|
|
184 }
|
|
185 } elsif ($ARGV[$i] eq "-a") {
|
|
186 if (++$i<@ARGV) {
|
|
187 if ($remove==1) {die("$help\n\nERROR! options -r and -a not compatible!\n");}
|
|
188 if ($dbname!="") {die("$help\n\nERROR! options -o and -a not compatible!\n");}
|
|
189 $dbname=$ARGV[$i];
|
|
190 $a_if_append="a";
|
|
191 } else {
|
|
192 die ("$help\n\nERROR! Missing filename after -o option!\n");
|
|
193 }
|
|
194 } elsif ($ARGV[$i] eq "-o") {
|
|
195 if (++$i<@ARGV) {
|
|
196 if ($remove==1) {die("$help\n\nERROR! options -r and -o not compatible!\n");}
|
|
197 if ($a_if_append) {die("$help\n\nERROR! options -a and -o not compatible!\n");}
|
|
198 $dbname=$ARGV[$i];
|
|
199 } else {
|
|
200 die ("$help\n\nERROR! Missing filename after -o option!\n");
|
|
201 }
|
|
202 } else {
|
|
203 if ($dbname="") {
|
|
204 $dbname=$ARGV[$i];
|
|
205 } else {
|
|
206 print "WARNING! Unknown option $ARGV[$i]!\n";
|
|
207 }
|
|
208 }
|
|
209 }
|
|
210
|
|
211 # Check input
|
|
212 if (!$dbname) {print($help); die("ERROR! Name of database is missing! Use -o <db_name>\n");}
|
|
213 $a3mdir=~s/\/$//;
|
|
214 $a3mfile = $dbname."_a3m_db";
|
|
215 $hhmfile = $dbname."_hhm_db";
|
|
216 $csfile = $dbname.".cs219";
|
|
217 if ($hhmdir) {$hhmdir=~s/\/$//;}
|
|
218
|
|
219 if ($a_if_append eq "" && $remove==0) {
|
|
220 unlink $csfile, $a3mfile, $a3mfile.".index", $hhmfile, $hhmfile.".index";
|
|
221 }
|
|
222
|
|
223 if ($a3mdir eq "" && $hhmdir eq "" && $csdir eq "" && $remove==0) {
|
|
224 print($help); print "ERROR! At least one input directory must be given!\n"; exit(1);
|
|
225 }
|
|
226
|
|
227 # If $csdir is simple directory instead of glob expression, turn it into glob expression
|
|
228 if ($csdir) {
|
|
229 if ($csdir !~ /\*/ && $csdir !~ /\?/ && $csdir !~ / /) {
|
|
230 $csdir .= "/*.".$csext;
|
|
231 }
|
|
232 }
|
|
233 if ($a3mdir) {
|
|
234 if ($a3mdir !~ /\*/ && $a3mdir !~ /\?/ && $a3mdir !~ / /) {
|
|
235 $a3mdir .= "/*.a3m";
|
|
236 }
|
|
237 }
|
|
238 if ($hhmdir) {
|
|
239 if ($hhmdir !~ /\*/ && $hhmdir !~ /\?/ && $hhmdir !~ / /) {
|
|
240 $hhmdir .= "/*.".$hhmext;
|
|
241 }
|
|
242 }
|
|
243
|
|
244
|
|
245 # If in append mode, initialize size counters with present sizes
|
|
246 if ($a_if_append || $remove==1) {
|
|
247 open (IN, "<$a3mfile.index.sizes") || die("Error: can't open $a3mfile.index.sizes: $!");
|
|
248 $line = <IN>;
|
|
249 close IN;
|
|
250 $line =~ /^(\S*)/;
|
|
251 $numa3mfiles = $1;
|
|
252
|
|
253 open (IN, "<$hhmfile.index.sizes") || die("Error: can't open $hhmfile.index.sizes: $!");
|
|
254 $line = <IN>;
|
|
255 close IN;
|
|
256 $line =~ /^(\S*)/;
|
|
257 $numhhmfiles = $1;
|
|
258
|
|
259 open (IN, "<$csfile.sizes") || die("Error: can't open $csfile.sizes: $!");
|
|
260 $line = <IN>;
|
|
261 close IN;
|
|
262 $line =~ /(\S*)\s+(\S*)/;
|
|
263 $numcsfiles = $1;
|
|
264 $num_chars = $2;
|
|
265
|
|
266 printf("Current number of a3m files in db: %i\n",$numa3mfiles);
|
|
267 printf("Current number of $hhmext files in db: %i\n",$numhhmfiles);
|
|
268 printf("Current number of $csext files in db: %i\n\n",$numcsfiles);
|
|
269
|
|
270 } else {
|
|
271 $numa3mfiles = 0;
|
|
272 $numhhmfiles = 0;
|
|
273 $numcsfiles = 0;
|
|
274 $num_chars = 0;
|
|
275 }
|
|
276
|
|
277 # Create tmp directory (plus path, if necessary)
|
|
278 my $tmpdir="/tmp/$ENV{USER}/$$"; # directory where all temporary files are written: /tmp/UID/PID
|
|
279 my $suffix=$tmpdir;
|
|
280 while ($suffix=~s/^\/[^\/]+//) {
|
|
281 $tmpdir=~/(.*)$suffix/;
|
|
282 if (!-d $1) {mkdir($1,0777);}
|
|
283 }
|
|
284 unlink glob("$tmpdir/*"); # clean up directory if it already exists
|
|
285 unlink $logfile;
|
|
286
|
|
287
|
|
288
|
|
289 ##############################################################################################
|
|
290 # Remove files?
|
|
291 ##############################################################################################
|
|
292 if ($remove==1) {
|
|
293
|
|
294 printf("Removing files from indices...\n");
|
|
295
|
|
296 # Read numbers of sequences and characters in csfile
|
|
297 open (IN, "<$csfile.sizes");
|
|
298 $line = <IN>;
|
|
299 close IN;
|
|
300 $line =~ /(\S*)\s+(\S*)/;
|
|
301 $numcsfiles = $1;
|
|
302 $num_chars = $2;
|
|
303
|
|
304 # Remove names from a3m and hhm index files
|
|
305 my $files = " ".join(" ", glob($fileglob));
|
|
306 $files =~ s/\S*\///g;
|
|
307 &HHPaths::System("ffindex_modify -su $dbname"."_a3m_db.index ".$files);
|
|
308 &HHPaths::System("ffindex_modify -su $dbname"."_hhm_db.index ".$files);
|
|
309
|
|
310 # Adjust number of files in $a3mfile.index.sizes
|
|
311 $numa3mfiles = 0;
|
|
312 open (IN, "<$a3mfile.index") || die("Error: can't open $a3mfile.index: $!");
|
|
313 while(<IN>) {$numa3mfiles++;}
|
|
314 close IN;
|
|
315 open (OUT, ">$a3mfile.index.sizes") || die("Error: can't open $a3mfile.index.sizes: $!");
|
|
316 printf(OUT "%i\n",$numa3mfiles);
|
|
317 close(OUT);
|
|
318
|
|
319 # Adjust number of files in $hhmfile.index.sizes
|
|
320 $numhhmfiles = 0;
|
|
321 open (IN, "<$hhmfile.index") || die("Error: can't open $hhmfile.index: $!");
|
|
322 while(<IN>) {$numhhmfiles++;}
|
|
323 close IN;
|
|
324 open (OUT, ">$hhmfile.index.sizes") || die("Error: can't open $hhmfile.index.sizes: $!");
|
|
325 printf(OUT "%i\n",$numa3mfiles);
|
|
326 close(OUT);
|
|
327
|
|
328
|
|
329 # Remove sequences of globbed files from cs file
|
|
330 my $skipseq=0;
|
|
331 $numcsfiles = 0;
|
|
332 $num_chars = 0;
|
|
333 open (IN, "<$csfile");
|
|
334 open (OUT, ">$csfile".".tmp");
|
|
335 foreach my $line (<IN>) {
|
|
336 if ($line =~ /^>(\w*)/) {
|
|
337 my $name = $1;
|
|
338 if ($files =~ / $name\./) { # found name in list of globbed file names?
|
|
339 $skipseq=1;
|
|
340 } else {
|
|
341 $skipseq=0;
|
|
342 printf(OUT "%s",$line);
|
|
343 $numcsfiles++;
|
|
344 }
|
|
345 } else {
|
|
346 if (!$skipseq) {
|
|
347 printf(OUT "%s",$line);
|
|
348 $num_chars += length($line);
|
|
349 }
|
|
350 }
|
|
351 }
|
|
352 close(OUT);
|
|
353 close(IN);
|
|
354 unlink($csfile);
|
|
355 &HHPaths::System("mv $csfile".".tmp ".$csfile);
|
|
356
|
|
357 # Adjust $csfile.sizes
|
|
358 open (OUT, ">$csfile.sizes");
|
|
359 print OUT "$numcsfiles $num_chars\n";
|
|
360 close OUT;
|
|
361
|
|
362 } else {
|
|
363
|
|
364 ##############################################################################################
|
|
365 # Generate new db or append to old
|
|
366 ##############################################################################################
|
|
367
|
|
368
|
|
369 ##############################################################################################
|
|
370 # Generate column-state database file
|
|
371 ##############################################################################################
|
|
372
|
|
373 # Generate column-state sequences in $tmpdir if no -ics directory given
|
|
374 if (!$csdir)
|
|
375 {
|
|
376 my $x = 0.3; # parameters for cstranslate
|
|
377 my $c = 4; # parameters for cstranslate
|
|
378
|
|
379 if ($a3mdir) {
|
|
380 print("Generating seq219 files in $tmpdir/ from a3m files $a3mdir\n\n");
|
|
381 $command = "$hhbin/cstranslate -i \$file -o $tmpdir/\$base.seq219 -D $context_lib -A $cs_lib -x $x -c $c 1>>$logfile 2>>$logfile";
|
|
382 &HHPaths::System("$hhscripts/multithread.pl '".$a3mdir."' '$command' -cpu $cpu");
|
|
383
|
|
384 } elsif ($hhmdir) {
|
|
385
|
|
386 if ($hhmext eq "hmm") {
|
|
387 print("\nGenerating prf profile files in $tmpdir/ from hmm files $hhmdir/\n\n");
|
|
388 $command = "$hhscripts/create_profile_from_hmmer.pl -i \$file -o $tmpdir/\$base.prf 1>/dev/null 2>>$logfile";
|
|
389 &HHPaths::System("$hhscripts/multithread.pl '".$hhmdir."' '$command' -cpu $cpu");
|
|
390 } else { # $hhmext eq "hhm"
|
|
391 print("\nGenerating prf profile files in $tmpdir/ from hhm files $hhmdir/\n\n");
|
|
392 $command = "$hhscripts/create_profile_from_hhm.pl -i \$file -o $tmpdir/\$base.prf 1>/dev/null 2>>$logfile";
|
|
393 &HHPaths::System("$hhscripts/multithread.pl '".$hhmdir."' '$command' -cpu $cpu");
|
|
394 }
|
|
395
|
|
396 print("\nGenerating seq219 files in $tmpdir/ from prf files in $tmpdir/\n\n");
|
|
397 if ($hhmext eq "hmm") {
|
|
398 $command = "$hhbin/cstranslate -i \$file -o \$name.seq219 -A $cs_lib 1>>$logfile 2>>$logfile";
|
|
399 } else { # $hhmext eq "hhm"
|
|
400 $command = "$hhbin/cstranslate -i \$file -o \$name.seq219 -A $cs_lib -D $context_lib -x $x -c $c 1>>$logfile 2>>$logfile";
|
|
401 }
|
|
402 &HHPaths::System("$hhscripts/multithread.pl '".$tmpdir."/*.prf' '$command' -cpu $cpu");
|
|
403 }
|
|
404
|
|
405 $csdir = $tmpdir."/*.$csext";
|
|
406 }
|
|
407
|
|
408
|
|
409 # Write columns state sequences into cs database file,
|
|
410 # replace names in cs sequences with filenames: ">name+description" => ">filename"
|
|
411 if ($a_if_append) {
|
|
412 open (OUT, ">>$csfile");
|
|
413 } else {
|
|
414 open (OUT, ">$csfile");
|
|
415 }
|
|
416 foreach my $seq219file (glob($csdir)) {
|
|
417 open (IN, "<$seq219file");
|
|
418 my @lines = <IN>;
|
|
419 close(IN);
|
|
420 $seq219file =~ s/.*?([^\/]*)\.$csext\s*/$1/ or die ("Error: $seq219file does not have the extension $csext!?\n");
|
|
421 foreach my $line (@lines) {
|
|
422 if ($line =~ /^>/) {
|
|
423 $line = ">".$seq219file."\n";
|
|
424 $numcsfiles++;
|
|
425 } else {
|
|
426 $num_chars += length($line);
|
|
427 }
|
|
428 printf(OUT "%s",$line);
|
|
429 }
|
|
430 }
|
|
431 close(OUT);
|
|
432
|
|
433 open (OUT, ">$csfile.sizes");
|
|
434 print OUT "$numcsfiles $num_chars\n";
|
|
435 close OUT;
|
|
436
|
|
437
|
|
438 ##############################################################################################
|
|
439 # Generate hhm files with hhmake from a3m files if no -ihhm directory given
|
|
440 ##############################################################################################
|
|
441
|
|
442 if (!$hhmdir)
|
|
443 {
|
|
444 if ($a3mdir) {
|
|
445 print("\nGenerating hhm files in $tmpdir/ from a3m files $a3mdir/\n\n");
|
|
446 $command = "hhmake -i \$file -o $tmpdir/\$base.hhm 1>/dev/null 2>>$logfile";
|
|
447 &HHPaths::System("$hhscripts/multithread.pl '".$a3mdir."' '$command' -cpu $cpu");
|
|
448 $hhmdir = $tmpdir."/*.$hhmext";;
|
|
449 $numhhmfiles += scalar(glob("$hhmdir"));
|
|
450 }
|
|
451 }
|
|
452
|
|
453
|
|
454 ##############################################################################################
|
|
455 # Generate packed A3M and HMM files and index files
|
|
456 ##############################################################################################
|
|
457
|
|
458 # Generate packed A3M file and index file?
|
|
459 if ($a3mdir ne "") {
|
|
460 print "Creating packed A3M database file $a3mfile ...\n";
|
|
461
|
|
462 open (OUT, ">$tmpdir/a3m.filelist");
|
|
463 my @files = glob("$a3mdir");
|
|
464 $numa3mfiles += scalar(@files);
|
|
465 foreach $file (@files) {
|
|
466 print OUT "$file\n";
|
|
467 }
|
|
468 close OUT;
|
|
469
|
|
470 # Build packed file (concatenated with '\0' as delimiters) and index file from files in file list
|
|
471 # The ffindex binaries are contained in <install_dir>/lib/ffindex/bin/
|
|
472 $command = "ffindex_build -".$a_if_append."s -f $tmpdir/a3m.filelist $a3mfile $a3mfile.index";
|
|
473 &HHPaths::System($command);
|
|
474
|
|
475 open (OUT, ">$a3mfile.index.sizes");
|
|
476 print OUT "$numa3mfiles\n";
|
|
477 close OUT;
|
|
478 }
|
|
479
|
|
480 # Generate packed HHMM file and index file?
|
|
481 if ($hhmdir ne "") {
|
|
482 print "Creating packed HHM database file $hhmfile ...\n";
|
|
483
|
|
484 open (OUT, ">$tmpdir/hhm.filelist");
|
|
485 my @files = glob("$hhmdir");
|
|
486 $numhhmfiles += scalar(@files);
|
|
487 foreach $file (@files) {
|
|
488 print OUT "$file\n";
|
|
489 }
|
|
490 close OUT;
|
|
491
|
|
492 # Build packed file (concatenated with '\0' as delimiters) and index file from files in file list
|
|
493 # The ffindex binaries are contained in <install_dir>/lib/ffindex/bin/
|
|
494 $command = "ffindex_build -".$a_if_append."s -f $tmpdir/hhm.filelist $hhmfile $hhmfile.index";
|
|
495 &HHPaths::System($command);
|
|
496
|
|
497 open (OUT, ">$hhmfile.index.sizes");
|
|
498 print OUT "$numhhmfiles\n";
|
|
499 close OUT;
|
|
500 }
|
|
501 } # end if $remove==0
|
|
502
|
|
503 print("\n");
|
|
504 printf("New number of a3m files in db: %i\n",$numa3mfiles);
|
|
505 printf("New number of $hhmext files in db: %i\n",$numhhmfiles);
|
|
506 printf("New number of $csext files in db: %i\n\n",$numcsfiles);
|
|
507
|
|
508 my $err=0;
|
|
509 if ($numa3mfiles && $numhhmfiles && $numa3mfiles != $numhhmfiles) {
|
|
510 print("**************************************************************************
|
|
511 WARNING: Number of a3m files not equal to number of $hhmext files\n"); $err=1;
|
|
512 }
|
|
513 if ($numcsfiles && $numhhmfiles && $numcsfiles != $numhhmfiles) {
|
|
514 print("**************************************************************************
|
|
515 WARNING: Number of $csext files not equal to number of $hhmext files\n"); $err=1;
|
|
516 }
|
|
517 if ($numcsfiles && $numa3mfiles && $numcsfiles != $numa3mfiles) {
|
|
518 print("**************************************************************************
|
|
519 WARNING: Number of $csext files not equal to number of a3m files\n"); $err=1;
|
|
520 }
|
|
521
|
|
522 if ($err==1) {print("**************************************************************************
|
|
523 $tmpdir will not be removed to check for missing files
|
|
524 **************************************************************************\n");}
|
|
525 elsif ($v<3) {
|
|
526 $command = "rm -rf $tmpdir";
|
|
527 # &System($command);
|
|
528 }
|
|
529 wait;
|
|
530 exit;
|
|
531
|