diff tools/rdock/bin/sdsplit @ 0:bc03dbb6eb37 draft

planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
author marpiech
date Mon, 29 Aug 2016 03:38:13 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/rdock/bin/sdsplit	Mon Aug 29 03:38:13 2016 -0400
@@ -0,0 +1,72 @@
+#!/usr/bin/perl
+# Splits SD records into multiple files of equal no. of records
+use lib "$ENV{'RBT_ROOT'}/lib";
+
+use FileHandle;
+use SDRecord;
+
+# Record size to split into
+my $recSize = 1000;
+
+#output root
+my $outRoot = "tmp";
+
+#Print help if no command line arguments
+printHelpAndExit() if (scalar(@ARGV) == 0);
+
+#Parse command line arguments
+my @files;
+while (scalar(@ARGV)) {
+  my $arg = shift @ARGV;
+  printHelpAndExit() if ($arg eq '-h');
+  if (index($arg,'-o')==0) {
+    $outRoot = substr($arg,2);
+  }    
+  elsif (index($arg,'-')==0) {
+    $recSize = substr($arg,1);
+  }    
+  else {
+    push @files,$arg;#must be a filename
+  }
+}
+push @ARGV,@files;#put the filenames back in the arg list
+
+my $sdRec = new SDRecord;
+my $nRec=0;
+my $nFile=0;
+my $sdfh;
+
+#read records
+while ($sdRec->readRec('LINES'=>1)) {
+  #check if we need to start a new output file
+  if ($nRec % $recSize == 0) {
+    $nFile++;
+    my $outFile = $outRoot . $nFile . ".sd";
+    if (defined $sdfh) {
+      undef $sdfh;
+    }
+    $sdfh = new FileHandle ">$outFile";
+    if (!defined $sdfh) {
+      die "Can't open $outFile";
+    }
+    else {
+      print STDOUT "Opening $outFile\n";
+    }
+    $sdfh->autoflush(1);
+    select($sdfh);
+  }
+  $nRec++;
+  $sdRec->writeRec();
+}
+undef $sdfh;
+select(STDOUT);#reselect STDOUT as default
+
+#######################################################################
+sub printHelpAndExit {
+  print "\Splits SD records into multiple files of equal size\n";
+  print "\nUsage:\tsdsplit [-<RecSize>] [-o<OutputRoot>] [sdFiles]\n\n";
+  print "\t-<RecSize>\trecord size to split into (default = 1000 records)\n";
+  print "\t-o<OutputRoot>\tRoot name for output files (default = tmp)\n";
+  print "\n\tIf SD file list not given, reads from standard input\n";
+  exit;
+}