Mercurial > repos > marpiech > norwich_tools
comparison tools/rdock/bin/sdsort @ 0:bc03dbb6eb37 draft
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
| author | marpiech |
|---|---|
| date | Mon, 29 Aug 2016 03:38:13 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:bc03dbb6eb37 |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 # Sorts SD records by given data field | |
| 3 use lib "$ENV{'RBT_ROOT'}/lib"; | |
| 4 | |
| 5 use SDRecord; | |
| 6 | |
| 7 my $SDSORTKEY; # sort key | |
| 8 my $SDSORTASCEND = 1;# 1 = ascending, 0 = descending | |
| 9 my $SDSORTTEXT = 1;# 1 = text sort, 0 = numeric sort | |
| 10 | |
| 11 my $FASTFORMAT = 0; | |
| 12 my $FASTKEY = "_TITLE1"; | |
| 13 | |
| 14 #Print help if no command line arguments | |
| 15 printHelpAndExit() if (scalar(@ARGV) == 0); | |
| 16 | |
| 17 #Parse command line arguments | |
| 18 my @files; | |
| 19 while (scalar(@ARGV)) { | |
| 20 $arg = shift @ARGV; | |
| 21 printHelpAndExit() if ($arg eq '-h'); | |
| 22 if ($arg eq '-r') { | |
| 23 $SDSORTASCEND = 0;#descending sort requested | |
| 24 } | |
| 25 elsif ($arg eq '-n') { | |
| 26 $SDSORTTEXT = 0;;#numeric sort requested | |
| 27 } | |
| 28 elsif (index($arg,'-s')==0) { | |
| 29 $FASTFORMAT = 1; | |
| 30 } | |
| 31 elsif (index($arg,'-id')==0) { | |
| 32 $FASTKEY = substr($arg,3); | |
| 33 } | |
| 34 elsif (index($arg,'-f')==0) { | |
| 35 $SDSORTKEY = substr($arg,2);#sort key | |
| 36 } | |
| 37 else { | |
| 38 push @files,$arg;#must be a filename | |
| 39 } | |
| 40 } | |
| 41 push @ARGV,@files;#put the filenames back in the arg list | |
| 42 | |
| 43 #read records | |
| 44 my $sdRec = new SDRecord; | |
| 45 my @records; | |
| 46 my $nRec=0; | |
| 47 | |
| 48 my $lastid=""; | |
| 49 while ($sdRec->readRec('DATA'=>1,'LINES'=>1)) { | |
| 50 $sdRec->addData('_REC' => ++$nRec);#add record# as temp data field | |
| 51 if ($FASTFORMAT) { | |
| 52 my $id = $sdRec->{'DATA'}->{$FASTKEY}; | |
| 53 if (($lastid ne "") && ($lastid ne $id)) { | |
| 54 foreach $rec (sort sortSD @records) { | |
| 55 $rec->writeRec(); | |
| 56 } | |
| 57 @records = ();#clear the list | |
| 58 } | |
| 59 $lastid = $id; | |
| 60 } | |
| 61 push(@records,$sdRec->copy('DATA'=>1,'LINES'=>1)); | |
| 62 } | |
| 63 | |
| 64 #write sorted records | |
| 65 foreach $rec (sort sortSD @records) { | |
| 66 $rec->writeRec(); | |
| 67 } | |
| 68 | |
| 69 ####################################################### | |
| 70 # sort function to sort SD records by given field | |
| 71 # handles text/numeric and ascending/descending sort | |
| 72 sub sortSD { | |
| 73 if ($SDSORTTEXT) { | |
| 74 return $a->{'DATA'}->{$SDSORTKEY} cmp $b->{'DATA'}->{$SDSORTKEY} if ($SDSORTASCEND); | |
| 75 return $b->{'DATA'}->{$SDSORTKEY} cmp $a->{'DATA'}->{$SDSORTKEY}; | |
| 76 } | |
| 77 else { | |
| 78 return $a->{'DATA'}->{$SDSORTKEY} <=> $b->{'DATA'}->{$SDSORTKEY} if ($SDSORTASCEND); | |
| 79 return $b->{'DATA'}->{$SDSORTKEY} <=> $a->{'DATA'}->{$SDSORTKEY}; | |
| 80 } | |
| 81 } | |
| 82 | |
| 83 ####################################################################### | |
| 84 sub printHelpAndExit { | |
| 85 print "\nSorts SD records by given data field\n"; | |
| 86 print "\nUsage:\tsdsort [-n] [-r] [-f<DataField>] [sdFiles]\n\n"; | |
| 87 print "\t-n\t\tnumeric sort (default is text sort)\n"; | |
| 88 print "\t-r\t\tdescending sort (default is ascending sort)\n"; | |
| 89 print "\t-f<DataField>\tspecifies sort field\n"; | |
| 90 print "\t-s\t\tfast mode. Sorts the records for each named compound independently (must be consecutive)\n"; | |
| 91 print "\t-id<NameField>\tspecifies compound name field (default = 1st title line)\n\n"; | |
| 92 print "Note:\t_REC (record #) is provided as a pseudo-data field\n"; | |
| 93 print "\n\tIf SD file list not given, reads from standard input\n"; | |
| 94 print "\tOutput is to standard output\n"; | |
| 95 print "\tFast mode can be safely used for partial sorting of huge SD files of raw docking hits\n"; | |
| 96 print "\twithout running into memory problems.\n\n"; | |
| 97 exit; | |
| 98 } |
