annotate galaxy-tools/tools/rdock/bin/sdsplit @ 0:4eb3f9cb2a51 draft

Uploaded
author dzesikah
date Fri, 26 Aug 2016 09:53:37 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
1 #!/usr/bin/perl
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
2 # Splits SD records into multiple files of equal no. of records
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
3 use lib "$ENV{'RBT_ROOT'}/lib";
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
4
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
5 use FileHandle;
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
6 use SDRecord;
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
7
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
8 # Record size to split into
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
9 my $recSize = 1000;
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
10
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
11 #output root
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
12 my $outRoot = "tmp";
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
13
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
14 #Print help if no command line arguments
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
15 printHelpAndExit() if (scalar(@ARGV) == 0);
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
16
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
17 #Parse command line arguments
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
18 my @files;
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
19 while (scalar(@ARGV)) {
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
20 my $arg = shift @ARGV;
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
21 printHelpAndExit() if ($arg eq '-h');
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
22 if (index($arg,'-o')==0) {
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
23 $outRoot = substr($arg,2);
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
24 }
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
25 elsif (index($arg,'-')==0) {
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
26 $recSize = substr($arg,1);
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
27 }
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
28 else {
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
29 push @files,$arg;#must be a filename
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
30 }
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
31 }
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
32 push @ARGV,@files;#put the filenames back in the arg list
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
33
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
34 my $sdRec = new SDRecord;
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
35 my $nRec=0;
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
36 my $nFile=0;
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
37 my $sdfh;
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
38
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
39 #read records
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
40 while ($sdRec->readRec('LINES'=>1)) {
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
41 #check if we need to start a new output file
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
42 if ($nRec % $recSize == 0) {
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
43 $nFile++;
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
44 my $outFile = $outRoot . $nFile . ".sd";
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
45 if (defined $sdfh) {
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
46 undef $sdfh;
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
47 }
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
48 $sdfh = new FileHandle ">$outFile";
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
49 if (!defined $sdfh) {
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
50 die "Can't open $outFile";
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
51 }
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
52 else {
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
53 print STDOUT "Opening $outFile\n";
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
54 }
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
55 $sdfh->autoflush(1);
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
56 select($sdfh);
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
57 }
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
58 $nRec++;
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
59 $sdRec->writeRec();
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
60 }
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
61 undef $sdfh;
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
62 select(STDOUT);#reselect STDOUT as default
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
63
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
64 #######################################################################
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
65 sub printHelpAndExit {
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
66 print "\Splits SD records into multiple files of equal size\n";
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
67 print "\nUsage:\tsdsplit [-<RecSize>] [-o<OutputRoot>] [sdFiles]\n\n";
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
68 print "\t-<RecSize>\trecord size to split into (default = 1000 records)\n";
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
69 print "\t-o<OutputRoot>\tRoot name for output files (default = tmp)\n";
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
70 print "\n\tIf SD file list not given, reads from standard input\n";
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
71 exit;
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
72 }