Mercurial > repos > marpiech > norwich_tools
comparison tools/rdock/bin/sdsort @ 0:bc03dbb6eb37 draft
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
author | marpiech |
---|---|
date | Mon, 29 Aug 2016 03:38:13 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:bc03dbb6eb37 |
---|---|
1 #!/usr/bin/perl | |
2 # Sorts SD records by given data field | |
3 use lib "$ENV{'RBT_ROOT'}/lib"; | |
4 | |
5 use SDRecord; | |
6 | |
7 my $SDSORTKEY; # sort key | |
8 my $SDSORTASCEND = 1;# 1 = ascending, 0 = descending | |
9 my $SDSORTTEXT = 1;# 1 = text sort, 0 = numeric sort | |
10 | |
11 my $FASTFORMAT = 0; | |
12 my $FASTKEY = "_TITLE1"; | |
13 | |
14 #Print help if no command line arguments | |
15 printHelpAndExit() if (scalar(@ARGV) == 0); | |
16 | |
17 #Parse command line arguments | |
18 my @files; | |
19 while (scalar(@ARGV)) { | |
20 $arg = shift @ARGV; | |
21 printHelpAndExit() if ($arg eq '-h'); | |
22 if ($arg eq '-r') { | |
23 $SDSORTASCEND = 0;#descending sort requested | |
24 } | |
25 elsif ($arg eq '-n') { | |
26 $SDSORTTEXT = 0;;#numeric sort requested | |
27 } | |
28 elsif (index($arg,'-s')==0) { | |
29 $FASTFORMAT = 1; | |
30 } | |
31 elsif (index($arg,'-id')==0) { | |
32 $FASTKEY = substr($arg,3); | |
33 } | |
34 elsif (index($arg,'-f')==0) { | |
35 $SDSORTKEY = substr($arg,2);#sort key | |
36 } | |
37 else { | |
38 push @files,$arg;#must be a filename | |
39 } | |
40 } | |
41 push @ARGV,@files;#put the filenames back in the arg list | |
42 | |
43 #read records | |
44 my $sdRec = new SDRecord; | |
45 my @records; | |
46 my $nRec=0; | |
47 | |
48 my $lastid=""; | |
49 while ($sdRec->readRec('DATA'=>1,'LINES'=>1)) { | |
50 $sdRec->addData('_REC' => ++$nRec);#add record# as temp data field | |
51 if ($FASTFORMAT) { | |
52 my $id = $sdRec->{'DATA'}->{$FASTKEY}; | |
53 if (($lastid ne "") && ($lastid ne $id)) { | |
54 foreach $rec (sort sortSD @records) { | |
55 $rec->writeRec(); | |
56 } | |
57 @records = ();#clear the list | |
58 } | |
59 $lastid = $id; | |
60 } | |
61 push(@records,$sdRec->copy('DATA'=>1,'LINES'=>1)); | |
62 } | |
63 | |
64 #write sorted records | |
65 foreach $rec (sort sortSD @records) { | |
66 $rec->writeRec(); | |
67 } | |
68 | |
69 ####################################################### | |
70 # sort function to sort SD records by given field | |
71 # handles text/numeric and ascending/descending sort | |
72 sub sortSD { | |
73 if ($SDSORTTEXT) { | |
74 return $a->{'DATA'}->{$SDSORTKEY} cmp $b->{'DATA'}->{$SDSORTKEY} if ($SDSORTASCEND); | |
75 return $b->{'DATA'}->{$SDSORTKEY} cmp $a->{'DATA'}->{$SDSORTKEY}; | |
76 } | |
77 else { | |
78 return $a->{'DATA'}->{$SDSORTKEY} <=> $b->{'DATA'}->{$SDSORTKEY} if ($SDSORTASCEND); | |
79 return $b->{'DATA'}->{$SDSORTKEY} <=> $a->{'DATA'}->{$SDSORTKEY}; | |
80 } | |
81 } | |
82 | |
83 ####################################################################### | |
84 sub printHelpAndExit { | |
85 print "\nSorts SD records by given data field\n"; | |
86 print "\nUsage:\tsdsort [-n] [-r] [-f<DataField>] [sdFiles]\n\n"; | |
87 print "\t-n\t\tnumeric sort (default is text sort)\n"; | |
88 print "\t-r\t\tdescending sort (default is ascending sort)\n"; | |
89 print "\t-f<DataField>\tspecifies sort field\n"; | |
90 print "\t-s\t\tfast mode. Sorts the records for each named compound independently (must be consecutive)\n"; | |
91 print "\t-id<NameField>\tspecifies compound name field (default = 1st title line)\n\n"; | |
92 print "Note:\t_REC (record #) is provided as a pseudo-data field\n"; | |
93 print "\n\tIf SD file list not given, reads from standard input\n"; | |
94 print "\tOutput is to standard output\n"; | |
95 print "\tFast mode can be safely used for partial sorting of huge SD files of raw docking hits\n"; | |
96 print "\twithout running into memory problems.\n\n"; | |
97 exit; | |
98 } |