view tools/rdock/bin/sdsplit @ 0:bc03dbb6eb37 draft

planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
author marpiech
date Mon, 29 Aug 2016 03:38:13 -0400
parents
children
line wrap: on
line source

#!/usr/bin/perl
# Splits SD records into multiple files of equal no. of records
use lib "$ENV{'RBT_ROOT'}/lib";

use FileHandle;
use SDRecord;

# Record size to split into
my $recSize = 1000;

#output root
my $outRoot = "tmp";

#Print help if no command line arguments
printHelpAndExit() if (scalar(@ARGV) == 0);

#Parse command line arguments
my @files;
while (scalar(@ARGV)) {
  my $arg = shift @ARGV;
  printHelpAndExit() if ($arg eq '-h');
  if (index($arg,'-o')==0) {
    $outRoot = substr($arg,2);
  }    
  elsif (index($arg,'-')==0) {
    $recSize = substr($arg,1);
  }    
  else {
    push @files,$arg;#must be a filename
  }
}
push @ARGV,@files;#put the filenames back in the arg list

my $sdRec = new SDRecord;
my $nRec=0;
my $nFile=0;
my $sdfh;

#read records
while ($sdRec->readRec('LINES'=>1)) {
  #check if we need to start a new output file
  if ($nRec % $recSize == 0) {
    $nFile++;
    my $outFile = $outRoot . $nFile . ".sd";
    if (defined $sdfh) {
      undef $sdfh;
    }
    $sdfh = new FileHandle ">$outFile";
    if (!defined $sdfh) {
      die "Can't open $outFile";
    }
    else {
      print STDOUT "Opening $outFile\n";
    }
    $sdfh->autoflush(1);
    select($sdfh);
  }
  $nRec++;
  $sdRec->writeRec();
}
undef $sdfh;
select(STDOUT);#reselect STDOUT as default

#######################################################################
sub printHelpAndExit {
  print "\Splits SD records into multiple files of equal size\n";
  print "\nUsage:\tsdsplit [-<RecSize>] [-o<OutputRoot>] [sdFiles]\n\n";
  print "\t-<RecSize>\trecord size to split into (default = 1000 records)\n";
  print "\t-o<OutputRoot>\tRoot name for output files (default = tmp)\n";
  print "\n\tIf SD file list not given, reads from standard input\n";
  exit;
}