Mercurial > repos > marpiech > norwich_tools
view tools/rdock/bin/sdsort @ 0:bc03dbb6eb37 draft
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
author | marpiech |
---|---|
date | Mon, 29 Aug 2016 03:38:13 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl # Sorts SD records by given data field use lib "$ENV{'RBT_ROOT'}/lib"; use SDRecord; my $SDSORTKEY; # sort key my $SDSORTASCEND = 1;# 1 = ascending, 0 = descending my $SDSORTTEXT = 1;# 1 = text sort, 0 = numeric sort my $FASTFORMAT = 0; my $FASTKEY = "_TITLE1"; #Print help if no command line arguments printHelpAndExit() if (scalar(@ARGV) == 0); #Parse command line arguments my @files; while (scalar(@ARGV)) { $arg = shift @ARGV; printHelpAndExit() if ($arg eq '-h'); if ($arg eq '-r') { $SDSORTASCEND = 0;#descending sort requested } elsif ($arg eq '-n') { $SDSORTTEXT = 0;;#numeric sort requested } elsif (index($arg,'-s')==0) { $FASTFORMAT = 1; } elsif (index($arg,'-id')==0) { $FASTKEY = substr($arg,3); } elsif (index($arg,'-f')==0) { $SDSORTKEY = substr($arg,2);#sort key } else { push @files,$arg;#must be a filename } } push @ARGV,@files;#put the filenames back in the arg list #read records my $sdRec = new SDRecord; my @records; my $nRec=0; my $lastid=""; while ($sdRec->readRec('DATA'=>1,'LINES'=>1)) { $sdRec->addData('_REC' => ++$nRec);#add record# as temp data field if ($FASTFORMAT) { my $id = $sdRec->{'DATA'}->{$FASTKEY}; if (($lastid ne "") && ($lastid ne $id)) { foreach $rec (sort sortSD @records) { $rec->writeRec(); } @records = ();#clear the list } $lastid = $id; } push(@records,$sdRec->copy('DATA'=>1,'LINES'=>1)); } #write sorted records foreach $rec (sort sortSD @records) { $rec->writeRec(); } ####################################################### # sort function to sort SD records by given field # handles text/numeric and ascending/descending sort sub sortSD { if ($SDSORTTEXT) { return $a->{'DATA'}->{$SDSORTKEY} cmp $b->{'DATA'}->{$SDSORTKEY} if ($SDSORTASCEND); return $b->{'DATA'}->{$SDSORTKEY} cmp $a->{'DATA'}->{$SDSORTKEY}; } else { return $a->{'DATA'}->{$SDSORTKEY} <=> $b->{'DATA'}->{$SDSORTKEY} if ($SDSORTASCEND); return $b->{'DATA'}->{$SDSORTKEY} <=> $a->{'DATA'}->{$SDSORTKEY}; } } ####################################################################### sub printHelpAndExit { print "\nSorts SD records by given data field\n"; print "\nUsage:\tsdsort [-n] [-r] [-f<DataField>] [sdFiles]\n\n"; print "\t-n\t\tnumeric sort (default is text sort)\n"; print "\t-r\t\tdescending sort (default is ascending sort)\n"; print "\t-f<DataField>\tspecifies sort field\n"; print "\t-s\t\tfast mode. Sorts the records for each named compound independently (must be consecutive)\n"; print "\t-id<NameField>\tspecifies compound name field (default = 1st title line)\n\n"; print "Note:\t_REC (record #) is provided as a pseudo-data field\n"; print "\n\tIf SD file list not given, reads from standard input\n"; print "\tOutput is to standard output\n"; print "\tFast mode can be safely used for partial sorting of huge SD files of raw docking hits\n"; print "\twithout running into memory problems.\n\n"; exit; }