comparison tools/rdock/bin/sdsort @ 0:bc03dbb6eb37 draft

planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
author marpiech
date Mon, 29 Aug 2016 03:38:13 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:bc03dbb6eb37
1 #!/usr/bin/perl
2 # Sorts SD records by given data field
3 use lib "$ENV{'RBT_ROOT'}/lib";
4
5 use SDRecord;
6
7 my $SDSORTKEY; # sort key
8 my $SDSORTASCEND = 1;# 1 = ascending, 0 = descending
9 my $SDSORTTEXT = 1;# 1 = text sort, 0 = numeric sort
10
11 my $FASTFORMAT = 0;
12 my $FASTKEY = "_TITLE1";
13
14 #Print help if no command line arguments
15 printHelpAndExit() if (scalar(@ARGV) == 0);
16
17 #Parse command line arguments
18 my @files;
19 while (scalar(@ARGV)) {
20 $arg = shift @ARGV;
21 printHelpAndExit() if ($arg eq '-h');
22 if ($arg eq '-r') {
23 $SDSORTASCEND = 0;#descending sort requested
24 }
25 elsif ($arg eq '-n') {
26 $SDSORTTEXT = 0;;#numeric sort requested
27 }
28 elsif (index($arg,'-s')==0) {
29 $FASTFORMAT = 1;
30 }
31 elsif (index($arg,'-id')==0) {
32 $FASTKEY = substr($arg,3);
33 }
34 elsif (index($arg,'-f')==0) {
35 $SDSORTKEY = substr($arg,2);#sort key
36 }
37 else {
38 push @files,$arg;#must be a filename
39 }
40 }
41 push @ARGV,@files;#put the filenames back in the arg list
42
43 #read records
44 my $sdRec = new SDRecord;
45 my @records;
46 my $nRec=0;
47
48 my $lastid="";
49 while ($sdRec->readRec('DATA'=>1,'LINES'=>1)) {
50 $sdRec->addData('_REC' => ++$nRec);#add record# as temp data field
51 if ($FASTFORMAT) {
52 my $id = $sdRec->{'DATA'}->{$FASTKEY};
53 if (($lastid ne "") && ($lastid ne $id)) {
54 foreach $rec (sort sortSD @records) {
55 $rec->writeRec();
56 }
57 @records = ();#clear the list
58 }
59 $lastid = $id;
60 }
61 push(@records,$sdRec->copy('DATA'=>1,'LINES'=>1));
62 }
63
64 #write sorted records
65 foreach $rec (sort sortSD @records) {
66 $rec->writeRec();
67 }
68
69 #######################################################
70 # sort function to sort SD records by given field
71 # handles text/numeric and ascending/descending sort
72 sub sortSD {
73 if ($SDSORTTEXT) {
74 return $a->{'DATA'}->{$SDSORTKEY} cmp $b->{'DATA'}->{$SDSORTKEY} if ($SDSORTASCEND);
75 return $b->{'DATA'}->{$SDSORTKEY} cmp $a->{'DATA'}->{$SDSORTKEY};
76 }
77 else {
78 return $a->{'DATA'}->{$SDSORTKEY} <=> $b->{'DATA'}->{$SDSORTKEY} if ($SDSORTASCEND);
79 return $b->{'DATA'}->{$SDSORTKEY} <=> $a->{'DATA'}->{$SDSORTKEY};
80 }
81 }
82
83 #######################################################################
84 sub printHelpAndExit {
85 print "\nSorts SD records by given data field\n";
86 print "\nUsage:\tsdsort [-n] [-r] [-f<DataField>] [sdFiles]\n\n";
87 print "\t-n\t\tnumeric sort (default is text sort)\n";
88 print "\t-r\t\tdescending sort (default is ascending sort)\n";
89 print "\t-f<DataField>\tspecifies sort field\n";
90 print "\t-s\t\tfast mode. Sorts the records for each named compound independently (must be consecutive)\n";
91 print "\t-id<NameField>\tspecifies compound name field (default = 1st title line)\n\n";
92 print "Note:\t_REC (record #) is provided as a pseudo-data field\n";
93 print "\n\tIf SD file list not given, reads from standard input\n";
94 print "\tOutput is to standard output\n";
95 print "\tFast mode can be safely used for partial sorting of huge SD files of raw docking hits\n";
96 print "\twithout running into memory problems.\n\n";
97 exit;
98 }