annotate tools/rdock/bin/sdsort @ 0:bc03dbb6eb37 draft

planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
author marpiech
date Mon, 29 Aug 2016 03:38:13 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
1 #!/usr/bin/perl
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
2 # Sorts SD records by given data field
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
3 use lib "$ENV{'RBT_ROOT'}/lib";
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
4
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
5 use SDRecord;
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
6
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
7 my $SDSORTKEY; # sort key
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
8 my $SDSORTASCEND = 1;# 1 = ascending, 0 = descending
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
9 my $SDSORTTEXT = 1;# 1 = text sort, 0 = numeric sort
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
10
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
11 my $FASTFORMAT = 0;
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
12 my $FASTKEY = "_TITLE1";
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
13
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
14 #Print help if no command line arguments
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
15 printHelpAndExit() if (scalar(@ARGV) == 0);
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
16
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
17 #Parse command line arguments
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
18 my @files;
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
19 while (scalar(@ARGV)) {
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
20 $arg = shift @ARGV;
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
21 printHelpAndExit() if ($arg eq '-h');
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
22 if ($arg eq '-r') {
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
23 $SDSORTASCEND = 0;#descending sort requested
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
24 }
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
25 elsif ($arg eq '-n') {
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
26 $SDSORTTEXT = 0;;#numeric sort requested
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
27 }
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
28 elsif (index($arg,'-s')==0) {
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
29 $FASTFORMAT = 1;
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
30 }
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
31 elsif (index($arg,'-id')==0) {
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
32 $FASTKEY = substr($arg,3);
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
33 }
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
34 elsif (index($arg,'-f')==0) {
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
35 $SDSORTKEY = substr($arg,2);#sort key
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
36 }
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
37 else {
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
38 push @files,$arg;#must be a filename
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
39 }
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
40 }
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
41 push @ARGV,@files;#put the filenames back in the arg list
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
42
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
43 #read records
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
44 my $sdRec = new SDRecord;
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
45 my @records;
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
46 my $nRec=0;
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
47
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
48 my $lastid="";
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
49 while ($sdRec->readRec('DATA'=>1,'LINES'=>1)) {
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
50 $sdRec->addData('_REC' => ++$nRec);#add record# as temp data field
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
51 if ($FASTFORMAT) {
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
52 my $id = $sdRec->{'DATA'}->{$FASTKEY};
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
53 if (($lastid ne "") && ($lastid ne $id)) {
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
54 foreach $rec (sort sortSD @records) {
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
55 $rec->writeRec();
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
56 }
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
57 @records = ();#clear the list
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
58 }
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
59 $lastid = $id;
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
60 }
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
61 push(@records,$sdRec->copy('DATA'=>1,'LINES'=>1));
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
62 }
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
63
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
64 #write sorted records
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
65 foreach $rec (sort sortSD @records) {
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
66 $rec->writeRec();
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
67 }
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
68
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
69 #######################################################
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
70 # sort function to sort SD records by given field
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
71 # handles text/numeric and ascending/descending sort
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
72 sub sortSD {
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
73 if ($SDSORTTEXT) {
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
74 return $a->{'DATA'}->{$SDSORTKEY} cmp $b->{'DATA'}->{$SDSORTKEY} if ($SDSORTASCEND);
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
75 return $b->{'DATA'}->{$SDSORTKEY} cmp $a->{'DATA'}->{$SDSORTKEY};
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
76 }
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
77 else {
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
78 return $a->{'DATA'}->{$SDSORTKEY} <=> $b->{'DATA'}->{$SDSORTKEY} if ($SDSORTASCEND);
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
79 return $b->{'DATA'}->{$SDSORTKEY} <=> $a->{'DATA'}->{$SDSORTKEY};
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
80 }
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
81 }
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
82
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
83 #######################################################################
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
84 sub printHelpAndExit {
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
85 print "\nSorts SD records by given data field\n";
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
86 print "\nUsage:\tsdsort [-n] [-r] [-f<DataField>] [sdFiles]\n\n";
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
87 print "\t-n\t\tnumeric sort (default is text sort)\n";
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
88 print "\t-r\t\tdescending sort (default is ascending sort)\n";
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
89 print "\t-f<DataField>\tspecifies sort field\n";
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
90 print "\t-s\t\tfast mode. Sorts the records for each named compound independently (must be consecutive)\n";
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
91 print "\t-id<NameField>\tspecifies compound name field (default = 1st title line)\n\n";
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
92 print "Note:\t_REC (record #) is provided as a pseudo-data field\n";
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
93 print "\n\tIf SD file list not given, reads from standard input\n";
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
94 print "\tOutput is to standard output\n";
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
95 print "\tFast mode can be safely used for partial sorting of huge SD files of raw docking hits\n";
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
96 print "\twithout running into memory problems.\n\n";
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
97 exit;
bc03dbb6eb37 planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff changeset
98 }