annotate external_tools/darwin/lib/hh/scripts/multithread.pl @ 6:2277dd59b9f9 draft

Uploaded
author hammock
date Wed, 01 Nov 2017 05:54:28 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
1 #!/usr/bin/env perl
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
2 #
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
3 # multithread.pl:
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
4 # Run a command with different file names as arguments on multiple threads in parallel
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
5 # Usage: multithread.pl <fileglob> '<command>' [-cpu <int>]
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
6 #
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
7 #
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
8 # HHsuite version 2.0.16 (January 2013)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
9 #
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
10 # Reference:
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
11 # Remmert M., Biegert A., Hauser A., and Soding J.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
12 # HHblits: Lightning-fast iterative protein sequence searching by HMM-HMM alignment.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
13 # Nat. Methods, epub Dec 25, doi: 10.1038/NMETH.1818 (2011).
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
14
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
15 # (C) Johannes Soeding, 2012
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
16
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
17 # This program is free software: you can redistribute it and/or modify
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
18 # it under the terms of the GNU General Public License as published by
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
19 # the Free Software Foundation, either version 3 of the License, or
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
20 # (at your option) any later version.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
21
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
22 # This program is distributed in the hope that it will be useful,
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
23 # but WITHOUT ANY WARRANTY; without even the implied warranty of
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
24 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
25 # GNU General Public License for more details.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
26
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
27 # You should have received a copy of the GNU General Public License
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
28 # along with this program. If not, see <http://www.gnu.org/licenses/>.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
29
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
30 # We are very grateful for bug reports! Please contact us at soeding@genzentrum.lmu.de
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
31
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
32 use lib $ENV{"HHLIB"}."/scripts";
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
33 use HHPaths; # config file with path variables for nr, blast, psipred, pdb, dssp etc.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
34 use strict;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
35 use POSIX;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
36
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
37 # Variables
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
38 my $cpu=8; # number of cpus to use
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
39 my $parent_pid=$$; # main process id
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
40 my $pid; # process id of child
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
41 my %pid=(); # hash has all running PIDs as keys and the file name as data
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
42 my $children=0; # number of child processes running
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
43 my $options="";
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
44 my $file;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
45 my $ifile=0;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
46 my $v=1;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
47 my $numerr=0;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
48
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
49 if (scalar(@ARGV)<2) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
50 die("
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
51 multithread.pl from HHsuite $VERSION
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
52 Run a command for many files in parallel using multiple threads
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
53 Usage: multithread.pl '<fileglob>' '<command>' [-cpu <int>] [-v {0,1,2}]
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
54
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
55 <command> can include symbol
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
56 \$file for the full filename, e.g. /tmp/hh/1c1g_A.a3m,
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
57 \$name for the filename without extension, e.g. /tmp/hh/1c1g_A, and
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
58 \$base for the filename without extension and path, e.g. 1c1g_A.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
59
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
60 -cpu <int> number of threads to launch (default = $cpu)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
61 -v {0,1,2} verbose mode (default = $v)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
62
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
63 Example: multithread.pl '*.a3m' 'hhmake -i \$file 1>\$name.log 2>>error.log' -cpu 16
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
64 \n");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
65 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
66
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
67 $|=1; # autoflush on
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
68
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
69
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
70 my @files=glob($ARGV[0]);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
71 my $command=$ARGV[1];
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
72 $SIG{'CHLD'}='IGNORE';
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
73 $SIG{'USR1'}=\&ChildFinished;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
74 $SIG{'INT'} =\&KillAllProcesses;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
75
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
76 if (@ARGV>2) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
77 $options.=join(" ",@ARGV[2..$#ARGV]);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
78 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
79 # Set number of cpus to use
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
80 if ($options=~s/-cpu\s*(\d+)\s*//g) {$cpu=$1;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
81 if ($options=~s/-v\s*(\d+)\s*//g) {$v=$1;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
82
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
83 # Warn if unknown options found
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
84 if ($options!~/^\s*$/) {$options=~s/^\s*(.*?)\s*$/$1/g; print("WARNING: unknown options '$options'\n");}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
85
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
86 if ($v>=1) {print (scalar(@files)." files read in ...\n");}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
87
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
88 foreach $file (@files) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
89 $ifile++;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
90
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
91 # All cpus occupied? -> wait for a cpu to become free
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
92 if ($children>=$cpu) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
93 if ($v>=2) {print("\nParent $$ is sleeping (children=$children) ");}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
94 my $count=0;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
95 while ($children>=$cpu) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
96 if ($count++>=10) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
97 $count=0;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
98 if ($v>=2) {print("\nProcesses running:");}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
99 $children=0;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
100 foreach $pid (keys(%pid)) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
101 if (! kill(0,$pid)) { # kill($pid,0) returns false if process is dead (finished)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
102 if ($v>=2) {printf("\nPID %5.5s: %s is removed from process table",$pid,$pid{$pid});}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
103 delete($pid{$pid}); # remove process from hash of PIDs
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
104 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
105 if ($v>=2) {printf("\nPID %5.5s: %s",$pid,$pid{$pid});}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
106 $children++; # In case a USR1 signal was caught twice (??)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
107 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
108 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
109 if ($v>=2) {print("\n");}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
110 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
111 if ($v==1) {print(".");}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
112 select(undef, undef, undef, 0.1); # sleep 0.1 seconds
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
113 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
114 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
115 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
116
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
117 if ($pid=fork()) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
118 # Main process
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
119 $children++;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
120 $pid{$pid}="$file ($ifile)";
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
121
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
122 # Print out running processes and remove defunct ones
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
123 select(undef, undef, undef, 0.1); # sleep 0.1 seconds
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
124
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
125 } elsif (defined $pid) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
126 # Child process
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
127 my $name; # filename without extension
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
128 my $base; # basename without path
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
129 if ($file =~/(.*)\..*?$/) {$name=$1;} else {$name=$file;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
130 if ($name =~/.*\/(.*?)$/) {$base=$1;} else {$base=$name;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
131 my $lcommand = $command; # need local variable for thread
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
132 $lcommand=~s/\$file/$file/g;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
133 $lcommand=~s/\$name/$name/g;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
134 $lcommand=~s/\$base/$base/g;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
135 &System("$lcommand");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
136 if ($v>=2) {printf("\nProcess $$ for file %s (%i) finished.",$file,$ifile);}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
137 kill(USR1 => $parent_pid);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
138 $SIG{'CHLD'}='IGNORE';
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
139 exit;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
140 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
141 die("\nError: fork returned undefined PID: $!\n");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
142 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
143 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
144
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
145 # Wait for all children to finish
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
146 while (wait() != -1) {}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
147
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
148 if ($v>=1) {print ("\nAll processes should be finished now\n");}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
149 if ($numerr>0) {print(STDERR "WARNING: $numerr commands returned with error code.\n");}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
150 exit(0);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
151
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
152
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
153 sub ChildFinished() {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
154 $children--;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
155 $SIG{'USR1'}=\&ChildFinished;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
156 if ($v>=2) {printf("\nChildren counter reduced to children=$children",$file,$ifile);}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
157 return;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
158 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
159
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
160 sub KillAllProcesses()
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
161 {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
162 foreach $pid (keys(%pid)) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
163 if ($v>=2) {printf("\nKill process $pid: returned %i\n",kill(-9,$pid));}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
164 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
165 die ("\nInterrupt: Killed main process $$\n");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
166 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
167
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
168 ################################################################################################
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
169 ### System command
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
170 ################################################################################################
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
171 sub System {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
172 if ($v>=2) {print("\n");}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
173 if ($v>=1) {print("\n".$_[0]," ");}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
174 if (system($_[0])) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
175 # Why is always -1 returned???
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
176 # print(STDERR "\nERROR: command '$command' returned error code $?\n");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
177 # $numerr++;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
178 };
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
179 }