Mercurial > repos > hammock > hammock
comparison external_tools/darwin/lib/hh/scripts/multithread.pl @ 6:2277dd59b9f9 draft
Uploaded
author | hammock |
---|---|
date | Wed, 01 Nov 2017 05:54:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:b7652b7c97bd | 6:2277dd59b9f9 |
---|---|
1 #!/usr/bin/env perl | |
2 # | |
3 # multithread.pl: | |
4 # Run a command with different file names as arguments on multiple threads in parallel | |
5 # Usage: multithread.pl <fileglob> '<command>' [-cpu <int>] | |
6 # | |
7 # | |
8 # HHsuite version 2.0.16 (January 2013) | |
9 # | |
10 # Reference: | |
11 # Remmert M., Biegert A., Hauser A., and Soding J. | |
12 # HHblits: Lightning-fast iterative protein sequence searching by HMM-HMM alignment. | |
13 # Nat. Methods, epub Dec 25, doi: 10.1038/NMETH.1818 (2011). | |
14 | |
15 # (C) Johannes Soeding, 2012 | |
16 | |
17 # This program is free software: you can redistribute it and/or modify | |
18 # it under the terms of the GNU General Public License as published by | |
19 # the Free Software Foundation, either version 3 of the License, or | |
20 # (at your option) any later version. | |
21 | |
22 # This program is distributed in the hope that it will be useful, | |
23 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
24 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
25 # GNU General Public License for more details. | |
26 | |
27 # You should have received a copy of the GNU General Public License | |
28 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
29 | |
30 # We are very grateful for bug reports! Please contact us at soeding@genzentrum.lmu.de | |
31 | |
32 use lib $ENV{"HHLIB"}."/scripts"; | |
33 use HHPaths; # config file with path variables for nr, blast, psipred, pdb, dssp etc. | |
34 use strict; | |
35 use POSIX; | |
36 | |
37 # Variables | |
38 my $cpu=8; # number of cpus to use | |
39 my $parent_pid=$$; # main process id | |
40 my $pid; # process id of child | |
41 my %pid=(); # hash has all running PIDs as keys and the file name as data | |
42 my $children=0; # number of child processes running | |
43 my $options=""; | |
44 my $file; | |
45 my $ifile=0; | |
46 my $v=1; | |
47 my $numerr=0; | |
48 | |
49 if (scalar(@ARGV)<2) { | |
50 die(" | |
51 multithread.pl from HHsuite $VERSION | |
52 Run a command for many files in parallel using multiple threads | |
53 Usage: multithread.pl '<fileglob>' '<command>' [-cpu <int>] [-v {0,1,2}] | |
54 | |
55 <command> can include symbol | |
56 \$file for the full filename, e.g. /tmp/hh/1c1g_A.a3m, | |
57 \$name for the filename without extension, e.g. /tmp/hh/1c1g_A, and | |
58 \$base for the filename without extension and path, e.g. 1c1g_A. | |
59 | |
60 -cpu <int> number of threads to launch (default = $cpu) | |
61 -v {0,1,2} verbose mode (default = $v) | |
62 | |
63 Example: multithread.pl '*.a3m' 'hhmake -i \$file 1>\$name.log 2>>error.log' -cpu 16 | |
64 \n"); | |
65 } | |
66 | |
67 $|=1; # autoflush on | |
68 | |
69 | |
70 my @files=glob($ARGV[0]); | |
71 my $command=$ARGV[1]; | |
72 $SIG{'CHLD'}='IGNORE'; | |
73 $SIG{'USR1'}=\&ChildFinished; | |
74 $SIG{'INT'} =\&KillAllProcesses; | |
75 | |
76 if (@ARGV>2) { | |
77 $options.=join(" ",@ARGV[2..$#ARGV]); | |
78 } | |
79 # Set number of cpus to use | |
80 if ($options=~s/-cpu\s*(\d+)\s*//g) {$cpu=$1;} | |
81 if ($options=~s/-v\s*(\d+)\s*//g) {$v=$1;} | |
82 | |
83 # Warn if unknown options found | |
84 if ($options!~/^\s*$/) {$options=~s/^\s*(.*?)\s*$/$1/g; print("WARNING: unknown options '$options'\n");} | |
85 | |
86 if ($v>=1) {print (scalar(@files)." files read in ...\n");} | |
87 | |
88 foreach $file (@files) { | |
89 $ifile++; | |
90 | |
91 # All cpus occupied? -> wait for a cpu to become free | |
92 if ($children>=$cpu) { | |
93 if ($v>=2) {print("\nParent $$ is sleeping (children=$children) ");} | |
94 my $count=0; | |
95 while ($children>=$cpu) { | |
96 if ($count++>=10) { | |
97 $count=0; | |
98 if ($v>=2) {print("\nProcesses running:");} | |
99 $children=0; | |
100 foreach $pid (keys(%pid)) { | |
101 if (! kill(0,$pid)) { # kill($pid,0) returns false if process is dead (finished) | |
102 if ($v>=2) {printf("\nPID %5.5s: %s is removed from process table",$pid,$pid{$pid});} | |
103 delete($pid{$pid}); # remove process from hash of PIDs | |
104 } else { | |
105 if ($v>=2) {printf("\nPID %5.5s: %s",$pid,$pid{$pid});} | |
106 $children++; # In case a USR1 signal was caught twice (??) | |
107 } | |
108 } | |
109 if ($v>=2) {print("\n");} | |
110 } else { | |
111 if ($v==1) {print(".");} | |
112 select(undef, undef, undef, 0.1); # sleep 0.1 seconds | |
113 } | |
114 } | |
115 } | |
116 | |
117 if ($pid=fork()) { | |
118 # Main process | |
119 $children++; | |
120 $pid{$pid}="$file ($ifile)"; | |
121 | |
122 # Print out running processes and remove defunct ones | |
123 select(undef, undef, undef, 0.1); # sleep 0.1 seconds | |
124 | |
125 } elsif (defined $pid) { | |
126 # Child process | |
127 my $name; # filename without extension | |
128 my $base; # basename without path | |
129 if ($file =~/(.*)\..*?$/) {$name=$1;} else {$name=$file;} | |
130 if ($name =~/.*\/(.*?)$/) {$base=$1;} else {$base=$name;} | |
131 my $lcommand = $command; # need local variable for thread | |
132 $lcommand=~s/\$file/$file/g; | |
133 $lcommand=~s/\$name/$name/g; | |
134 $lcommand=~s/\$base/$base/g; | |
135 &System("$lcommand"); | |
136 if ($v>=2) {printf("\nProcess $$ for file %s (%i) finished.",$file,$ifile);} | |
137 kill(USR1 => $parent_pid); | |
138 $SIG{'CHLD'}='IGNORE'; | |
139 exit; | |
140 } else { | |
141 die("\nError: fork returned undefined PID: $!\n"); | |
142 } | |
143 } | |
144 | |
145 # Wait for all children to finish | |
146 while (wait() != -1) {} | |
147 | |
148 if ($v>=1) {print ("\nAll processes should be finished now\n");} | |
149 if ($numerr>0) {print(STDERR "WARNING: $numerr commands returned with error code.\n");} | |
150 exit(0); | |
151 | |
152 | |
153 sub ChildFinished() { | |
154 $children--; | |
155 $SIG{'USR1'}=\&ChildFinished; | |
156 if ($v>=2) {printf("\nChildren counter reduced to children=$children",$file,$ifile);} | |
157 return; | |
158 } | |
159 | |
160 sub KillAllProcesses() | |
161 { | |
162 foreach $pid (keys(%pid)) { | |
163 if ($v>=2) {printf("\nKill process $pid: returned %i\n",kill(-9,$pid));} | |
164 } | |
165 die ("\nInterrupt: Killed main process $$\n"); | |
166 } | |
167 | |
168 ################################################################################################ | |
169 ### System command | |
170 ################################################################################################ | |
171 sub System { | |
172 if ($v>=2) {print("\n");} | |
173 if ($v>=1) {print("\n".$_[0]," ");} | |
174 if (system($_[0])) { | |
175 # Why is always -1 returned??? | |
176 # print(STDERR "\nERROR: command '$command' returned error code $?\n"); | |
177 # $numerr++; | |
178 }; | |
179 } |