diff external_tools/darwin/lib/hh/scripts/multithread.pl @ 6:2277dd59b9f9 draft

Uploaded
author hammock
date Wed, 01 Nov 2017 05:54:28 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/external_tools/darwin/lib/hh/scripts/multithread.pl	Wed Nov 01 05:54:28 2017 -0400
@@ -0,0 +1,179 @@
+#!/usr/bin/env perl
+#
+# multithread.pl: 
+# Run a command with different file names as arguments on multiple threads in parallel 
+# Usage:   multithread.pl <fileglob> '<command>' [-cpu <int>] 
+#
+#
+#     HHsuite version 2.0.16 (January 2013)
+#
+#     Reference: 
+#     Remmert M., Biegert A., Hauser A., and Soding J.
+#     HHblits: Lightning-fast iterative protein sequence searching by HMM-HMM alignment.
+#     Nat. Methods, epub Dec 25, doi: 10.1038/NMETH.1818 (2011).
+
+#     (C) Johannes Soeding, 2012
+
+#     This program is free software: you can redistribute it and/or modify
+#     it under the terms of the GNU General Public License as published by
+#     the Free Software Foundation, either version 3 of the License, or
+#     (at your option) any later version.
+
+#     This program is distributed in the hope that it will be useful,
+#     but WITHOUT ANY WARRANTY; without even the implied warranty of
+#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#     GNU General Public License for more details.
+
+#     You should have received a copy of the GNU General Public License
+#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+#     We are very grateful for bug reports! Please contact us at soeding@genzentrum.lmu.de
+
+use lib $ENV{"HHLIB"}."/scripts";
+use HHPaths;   # config file with path variables for nr, blast, psipred, pdb, dssp etc.
+use strict;
+use POSIX;
+
+# Variables 
+my $cpu=8;         # number of cpus to use
+my $parent_pid=$$; # main process id
+my $pid;           # process id of child
+my %pid=();        # hash has all running PIDs as keys and the file name as data
+my $children=0;    # number of child processes running
+my $options="";
+my $file;
+my $ifile=0;
+my $v=1;
+my $numerr=0;
+
+if (scalar(@ARGV)<2) {
+    die("
+multithread.pl from HHsuite $VERSION  
+Run a command for many files in parallel using multiple threads
+Usage: multithread.pl '<fileglob>' '<command>' [-cpu <int>] [-v {0,1,2}]
+
+<command> can include symbol 
+   \$file for the full filename, e.g. /tmp/hh/1c1g_A.a3m, 
+   \$name for the filename without extension, e.g. /tmp/hh/1c1g_A, and 
+   \$base for the filename without extension and path, e.g. 1c1g_A.
+
+ -cpu <int>  number of threads to launch (default = $cpu)
+ -v {0,1,2}  verbose mode (default = $v)
+
+Example: multithread.pl '*.a3m' 'hhmake -i \$file 1>\$name.log 2>>error.log' -cpu 16
+\n"); 
+}
+
+$|=1; # autoflush on
+
+
+my @files=glob($ARGV[0]); 
+my $command=$ARGV[1]; 
+$SIG{'CHLD'}='IGNORE';
+$SIG{'USR1'}=\&ChildFinished;
+$SIG{'INT'} =\&KillAllProcesses;
+
+if (@ARGV>2) {
+    $options.=join(" ",@ARGV[2..$#ARGV]);
+}
+# Set number of cpus to use
+if ($options=~s/-cpu\s*(\d+)\s*//g) {$cpu=$1;}
+if ($options=~s/-v\s*(\d+)\s*//g) {$v=$1;}
+
+# Warn if unknown options found
+if ($options!~/^\s*$/) {$options=~s/^\s*(.*?)\s*$/$1/g; print("WARNING: unknown options '$options'\n");}
+
+if ($v>=1) {print (scalar(@files)." files read in ...\n");}
+
+foreach $file (@files) {   
+    $ifile++;
+
+    # All cpus occupied? -> wait for a cpu to become free
+    if ($children>=$cpu) {
+	if ($v>=2) {print("\nParent $$ is sleeping (children=$children) ");}
+	my $count=0;
+	while ($children>=$cpu) {
+	    if ($count++>=10) {
+		$count=0;
+		if ($v>=2) {print("\nProcesses running:");}
+		$children=0;
+		foreach $pid (keys(%pid)) { 
+		    if (! kill(0,$pid)) {    # kill($pid,0) returns false if process is dead (finished)
+			if ($v>=2) {printf("\nPID %5.5s: %s is removed from process table",$pid,$pid{$pid});}
+			delete($pid{$pid});  # remove process from hash of PIDs
+		    } else {
+			if ($v>=2) {printf("\nPID %5.5s: %s",$pid,$pid{$pid});}
+			$children++;   # In case a USR1 signal was caught twice (??)
+		    }
+		}
+		if ($v>=2) {print("\n");}
+	    } else {
+		if ($v==1) {print(".");}
+		select(undef, undef, undef, 0.1); # sleep 0.1 seconds
+	    }
+	} 
+    }
+    
+    if ($pid=fork()) {
+	# Main process
+	$children++;
+	$pid{$pid}="$file ($ifile)";
+
+	# Print out running processes and remove defunct ones
+	select(undef, undef, undef, 0.1); # sleep 0.1 seconds
+
+    } elsif (defined $pid) {
+	# Child process
+	my $name;   # filename without extension
+	my $base;   # basename without path
+	if ($file =~/(.*)\..*?$/) {$name=$1;} else {$name=$file;}  
+	if ($name =~/.*\/(.*?)$/) {$base=$1;} else {$base=$name;} 
+	my $lcommand = $command; # need local variable for thread
+	$lcommand=~s/\$file/$file/g;
+	$lcommand=~s/\$name/$name/g;
+	$lcommand=~s/\$base/$base/g;
+	&System("$lcommand");
+	if ($v>=2) {printf("\nProcess $$ for file %s (%i) finished.",$file,$ifile);}
+	kill(USR1 => $parent_pid);
+	$SIG{'CHLD'}='IGNORE';
+	exit;
+    } else {
+	die("\nError: fork returned undefined PID: $!\n");
+    }
+}
+
+# Wait for all children to finish
+while (wait() != -1) {}
+
+if ($v>=1) {print ("\nAll processes should be finished now\n");}
+if ($numerr>0) {print(STDERR "WARNING: $numerr commands returned with error code.\n");}
+exit(0);
+
+
+sub ChildFinished() {
+    $children--;
+    $SIG{'USR1'}=\&ChildFinished;
+    if ($v>=2) {printf("\nChildren counter reduced to children=$children",$file,$ifile);}
+    return;
+}
+
+sub KillAllProcesses() 
+{
+    foreach $pid (keys(%pid)) { 
+	if ($v>=2) {printf("\nKill process $pid: returned %i\n",kill(-9,$pid));}
+    }
+    die ("\nInterrupt: Killed main process $$\n");
+}
+
+################################################################################################
+### System command
+################################################################################################
+sub System {
+    if ($v>=2) {print("\n");} 
+    if ($v>=1) {print("\n".$_[0]," ");}
+    if (system($_[0])) {
+# Why is always -1 returned???
+#       print(STDERR "\nERROR: command '$command' returned error code $?\n");
+#       $numerr++;
+    };
+}