Mercurial > repos > marpiech > norwich_tools
annotate tools/rdock/bin/sdsort @ 0:bc03dbb6eb37 draft
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
author | marpiech |
---|---|
date | Mon, 29 Aug 2016 03:38:13 -0400 |
parents | |
children |
rev | line source |
---|---|
0
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
1 #!/usr/bin/perl |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
2 # Sorts SD records by given data field |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
3 use lib "$ENV{'RBT_ROOT'}/lib"; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
4 |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
5 use SDRecord; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
6 |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
7 my $SDSORTKEY; # sort key |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
8 my $SDSORTASCEND = 1;# 1 = ascending, 0 = descending |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
9 my $SDSORTTEXT = 1;# 1 = text sort, 0 = numeric sort |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
10 |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
11 my $FASTFORMAT = 0; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
12 my $FASTKEY = "_TITLE1"; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
13 |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
14 #Print help if no command line arguments |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
15 printHelpAndExit() if (scalar(@ARGV) == 0); |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
16 |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
17 #Parse command line arguments |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
18 my @files; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
19 while (scalar(@ARGV)) { |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
20 $arg = shift @ARGV; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
21 printHelpAndExit() if ($arg eq '-h'); |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
22 if ($arg eq '-r') { |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
23 $SDSORTASCEND = 0;#descending sort requested |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
24 } |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
25 elsif ($arg eq '-n') { |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
26 $SDSORTTEXT = 0;;#numeric sort requested |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
27 } |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
28 elsif (index($arg,'-s')==0) { |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
29 $FASTFORMAT = 1; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
30 } |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
31 elsif (index($arg,'-id')==0) { |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
32 $FASTKEY = substr($arg,3); |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
33 } |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
34 elsif (index($arg,'-f')==0) { |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
35 $SDSORTKEY = substr($arg,2);#sort key |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
36 } |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
37 else { |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
38 push @files,$arg;#must be a filename |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
39 } |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
40 } |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
41 push @ARGV,@files;#put the filenames back in the arg list |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
42 |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
43 #read records |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
44 my $sdRec = new SDRecord; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
45 my @records; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
46 my $nRec=0; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
47 |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
48 my $lastid=""; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
49 while ($sdRec->readRec('DATA'=>1,'LINES'=>1)) { |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
50 $sdRec->addData('_REC' => ++$nRec);#add record# as temp data field |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
51 if ($FASTFORMAT) { |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
52 my $id = $sdRec->{'DATA'}->{$FASTKEY}; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
53 if (($lastid ne "") && ($lastid ne $id)) { |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
54 foreach $rec (sort sortSD @records) { |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
55 $rec->writeRec(); |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
56 } |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
57 @records = ();#clear the list |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
58 } |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
59 $lastid = $id; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
60 } |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
61 push(@records,$sdRec->copy('DATA'=>1,'LINES'=>1)); |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
62 } |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
63 |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
64 #write sorted records |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
65 foreach $rec (sort sortSD @records) { |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
66 $rec->writeRec(); |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
67 } |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
68 |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
69 ####################################################### |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
70 # sort function to sort SD records by given field |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
71 # handles text/numeric and ascending/descending sort |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
72 sub sortSD { |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
73 if ($SDSORTTEXT) { |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
74 return $a->{'DATA'}->{$SDSORTKEY} cmp $b->{'DATA'}->{$SDSORTKEY} if ($SDSORTASCEND); |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
75 return $b->{'DATA'}->{$SDSORTKEY} cmp $a->{'DATA'}->{$SDSORTKEY}; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
76 } |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
77 else { |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
78 return $a->{'DATA'}->{$SDSORTKEY} <=> $b->{'DATA'}->{$SDSORTKEY} if ($SDSORTASCEND); |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
79 return $b->{'DATA'}->{$SDSORTKEY} <=> $a->{'DATA'}->{$SDSORTKEY}; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
80 } |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
81 } |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
82 |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
83 ####################################################################### |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
84 sub printHelpAndExit { |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
85 print "\nSorts SD records by given data field\n"; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
86 print "\nUsage:\tsdsort [-n] [-r] [-f<DataField>] [sdFiles]\n\n"; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
87 print "\t-n\t\tnumeric sort (default is text sort)\n"; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
88 print "\t-r\t\tdescending sort (default is ascending sort)\n"; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
89 print "\t-f<DataField>\tspecifies sort field\n"; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
90 print "\t-s\t\tfast mode. Sorts the records for each named compound independently (must be consecutive)\n"; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
91 print "\t-id<NameField>\tspecifies compound name field (default = 1st title line)\n\n"; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
92 print "Note:\t_REC (record #) is provided as a pseudo-data field\n"; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
93 print "\n\tIf SD file list not given, reads from standard input\n"; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
94 print "\tOutput is to standard output\n"; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
95 print "\tFast mode can be safely used for partial sorting of huge SD files of raw docking hits\n"; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
96 print "\twithout running into memory problems.\n\n"; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
97 exit; |
bc03dbb6eb37
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
marpiech
parents:
diff
changeset
|
98 } |