annotate tools/rdock/bin/sdreport @ 3:279ba0732f87 draft default tip

planemo upload
author marpiech
date Mon, 29 Aug 2016 09:07:58 -0400
parents 30e2440b2173
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
1 #!/usr/bin/perl
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
2 # Produces text summaries of SD records *
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
3 # *
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
4 # Usage: sdreport [-l] [-t] [-s<DataField>] [sdFiles] *
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
5 # *
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
6 # -l output data fields for each record as processed *
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
7 # -t tabulate Rbt.Score.* fields for each record as processed *
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
8 # -s summarise data fields for all records *
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
9 # -s<DataField> summarise data fields for each unique value *
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
10 # of <DataField> *
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
11 # *
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
12 # Note: If -l or -t are combined with -s, the listing/table is output *
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
13 # within each ligand summary *
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
14 # If sdFiles not given, reads from standard input *
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
15 # Output is to standard output *
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
16 # Default is equivalent to sdfilter -l *
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
17 #*******************************************************************************
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
18 use lib "$ENV{'RBT_ROOT'}/lib";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
19
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
20 use SDRecord;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
21
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
22 # Default field names and headings for rDock v4.00 scores
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
23 my @defaultFields = ('SCORE','SCORE.INTER','SCORE.INTRA','SCORE.RESTR','SCORE.INTER.VDW');
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
24 my @defaultHeadings = ('TOTAL','INTER','INTRA','RESTR','VDW');
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
25
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
26 # Default field names and headings for normalised scores (score / #ligand heavy atoms)
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
27 my @defaultNormFields = ('SCORE.norm','SCORE.INTER.norm','SCORE.INTRA.norm','SCORE.RESTR.norm','SCORE.heavy');
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
28 my @defaultNormHeadings = ('TOTALn','INTERn','INTRAn','RESTRn','#heavy');
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
29
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
30 # Default field names and headings for rDock v3.00 scores
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
31 my @defaultOldFields = ('Rbt.Score.Corrected','Rbt.Score.Inter','Rbt.Score.Intra','Rbt.Score.IntraMin','Rbt.Score.Restraint');
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
32 my @defaultOldHeadings = ('TOTAL','INTER','INTRA','INTRAMIN','RESTR');
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
33
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
34 my $listFormat = 0;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
35 my $summaryFormat = 0;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
36 my $tableFormat = 0;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
37 my $supplierFormat = 0;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
38 my $csvFormat = 0;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
39 my $summaryKey = "_TITLE1";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
40 my $oldFields = 0;#If true, use old default field names for component scores
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
41 my $headings = 1;#DM 21 Nov 2000, If false, don't output headings
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
42 my @outputFields;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
43 my @outputHeadings;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
44
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
45 #Print help if no command line arguments
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
46 printHelpAndExit() if (scalar(@ARGV) == 0);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
47
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
48 #Parse command line arguments
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
49 my $nArgs = scalar(@ARGV);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
50
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
51 while (scalar(@ARGV)) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
52 my $arg = shift @ARGV;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
53 printHelpAndExit() if ($arg eq '-h');
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
54 if (index($arg,'-l')==0) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
55 $listFormat = 1;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
56 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
57 elsif (index($arg,'-o')==0) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
58 $oldFields = 1;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
59 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
60 # 7 Feb 2005 (DM) Option to report normalised aggregate scores
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
61 elsif (index($arg,'-norm')==0) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
62 $oldFields = 2;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
63 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
64 elsif (index($arg,'-sup')==0) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
65 $supplierFormat = 1;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
66 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
67 elsif (index($arg,'-s')==0) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
68 $summaryFormat = 1;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
69 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
70 elsif (index($arg,'-id')==0) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
71 $summaryKey = substr($arg,3);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
72 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
73 elsif (index($arg,'-nh')==0) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
74 $headings = 0;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
75 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
76 elsif (index($arg,'-t')==0) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
77 $tableFormat = 1;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
78 push @outputFields, split(',',substr($arg,2));
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
79 push @outputHeadings, @outputFields;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
80 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
81 elsif (index($arg,'-c')==0) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
82 $csvFormat = 1;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
83 push @outputFields, split(',',substr($arg,2));
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
84 push @outputHeadings, @outputFields;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
85 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
86 else {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
87 push @files,$arg;#must be a filename
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
88 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
89 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
90 push @ARGV,@files;#put the filenames back in the arg list
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
91
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
92 #use -l if neither table format is specified
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
93 $listFormat = (!$tableFormat && !$csvFormat && !$supplierFormat);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
94
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
95 #If no output fields defined for -t or -c use the defaults (old or new)
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
96 if (scalar(@outputFields)==0) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
97 if ($oldFields == 1) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
98 @outputFields = @defaultOldFields;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
99 @outputHeadings = @defaultOldHeadings;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
100 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
101 elsif ($oldFields == 2) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
102 @outputFields = @defaultNormFields;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
103 @outputHeadings = @defaultNormHeadings;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
104 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
105 else {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
106 @outputFields = @defaultFields;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
107 @outputHeadings = @defaultHeadings;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
108 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
109 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
110
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
111 my $sdRec = new SDRecord;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
112 my %summary;#hash of SDRecord lists, indexed by user-defined summary key
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
113 my %indexByName;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
114 my %indexByNum;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
115 my $idx = 0;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
116 my $nRec = 0;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
117
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
118 #Column headings for tab and csv format
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
119 #DM 21 Nov 2000 - if $headings is false, then don't output the column headings
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
120 if ($tableFormat && !$summaryFormat && $headings) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
121 tabHeadings($summaryKey,@outputHeadings);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
122 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
123 if ($csvFormat && !$summaryFormat && $headings) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
124 csvHeadings($summaryKey,@outputHeadings);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
125 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
126
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
127 #read records
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
128 while ($sdRec->readRec('LINES'=>1,'DATA'=>1)) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
129 $sdRec->addData('_REC' => ++$nRec);#add record# as temp data field
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
130 if ($listFormat && !$summaryFormat) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
131 print "\n\nRECORD #$nRec\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
132 $sdRec->writeData();
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
133 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
134 if ($tableFormat && !$summaryFormat) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
135 @recList = ($sdRec);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
136 tabScores(\@recList,$summaryKey,@outputFields);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
137 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
138 elsif ($csvFormat && !$summaryFormat) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
139 @recList = ($sdRec);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
140 csvScores(\@recList,$summaryKey,@outputFields);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
141 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
142 elsif ($supplierFormat && !$summaryFormat) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
143 @recList = ($sdRec);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
144 tabulateSuppliers(\@recList,$summaryKey);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
145 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
146 #add record to summary, indexed by user field value
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
147 #keep a separate index of unique values of user field values,
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
148 #indexed by number in the order the values first appear
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
149 #In this way we can maintain the sorted order of ligands
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
150 #when we come to print out the summary
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
151 if ($summaryFormat) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
152 my $summaryValue = $sdRec->{'DATA'}->{$summaryKey};
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
153 #New data field value encountered
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
154 if (!defined $indexByName{$summaryValue}) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
155 $idx++;#incr the number of unique values
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
156 #keep cross-referenced indexes (field value <-> number)
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
157 $indexByName{$summaryValue} = $idx;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
158 $indexByNum{$idx} = $summaryValue;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
159 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
160 push @{$summary{$summaryValue}},$sdRec->copy('DATA'=>1);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
161 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
162 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
163
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
164 #Print summary if required
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
165 if ($summaryFormat) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
166 print "\n===============================================================\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
167 print "SUMMARY BY $summaryKey\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
168 foreach $idx (sort {$a<=>$b} keys %indexByNum) {#numberic sort of index numbers
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
169 my $key = $indexByNum{$idx};#look up corresponding data field value
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
170 print "\n===============================================================\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
171 print "$summaryKey = $key (#$idx)\n\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
172 writeSummary($summary{$key});
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
173 if ($listFormat) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
174 print "\nIndividual records:\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
175 foreach $rec (@{$summary{$key}}) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
176 print "\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
177 $rec->writeData();
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
178 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
179 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
180 if ($tableFormat) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
181 print "\nScores:\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
182 tabHeadings($summaryKey,@outputHeadings);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
183 tabScores($summary{$key},$summaryKey,@outputFields);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
184 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
185 if ($csvFormat) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
186 print "\nScores:\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
187 csvHeadings($summaryKey,@outputHeadings);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
188 csvScores($summary{$key},$summaryKey,@outputFields);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
189 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
190 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
191 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
192
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
193 ##############################################################
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
194 # writes a summary to STDOUT for a list of SDRecords
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
195 # Input is a reference to an array of SDRecords
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
196 sub writeSummary {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
197 my $recListRef = shift;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
198
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
199 #Extract the list of data values for each fieldname into a hash array
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
200 #(key=fieldname, value=array ref)
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
201 my %fields;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
202 foreach $rec (@{$recListRef}) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
203 my ($key,$value);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
204 while ( ($key,$value) = each %{$rec->{'DATA'}}) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
205 push @{$fields{$key}},$value;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
206 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
207 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
208
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
209 #Look for constant fields and store separately
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
210 my %constFields;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
211 foreach $key (keys %fields) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
212 my @values = sort @{$fields{$key}};
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
213 my $nVal = scalar(@values);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
214 if ($values[0] eq $values[$nVal -1]) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
215 $constFields{$key} = $values[0];#store the field name and the constant value
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
216 delete $fields{$key};#remove from (non-const) array
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
217 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
218 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
219
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
220 #Print constant fields
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
221 print "\nConstant fields:\n\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
222 foreach $key (sort keys %constFields) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
223 printf "%-40s%s\n",$key,$constFields{$key};
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
224 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
225 #Print min and max value for non-const fields
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
226 print "\nVariable fields:\n\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
227 foreach $key (sort keys %fields) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
228 my @values = @{$fields{$key}};
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
229 #Look at first value to decide whether to do text or numeric sort
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
230 if (isNaN($values[0])) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
231 @values = sort @values;#text sort
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
232 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
233 else {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
234 @values = sort {$a <=> $b} @values;#numeric sort
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
235 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
236 my $nVal = scalar(@values);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
237 printf "%-40s",$key;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
238 print "Min = $values[0]\tMax = $values[$nVal-1]\t(N = $nVal)\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
239 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
240 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
241
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
242 ##############################################################
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
243 # function isNaN equivalent to the C++, java, javascript isNaN
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
244 # From P Vaglio, ~intranet/lib/rbt_func.pl
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
245 sub isNaN {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
246 local($_) = @_;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
247 s/\s+//g; # strip white space
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
248 # match +or- beginning of line 0 or 1 time
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
249 # then any numeric 0 or more
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
250 # then a decimal point
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
251 # then any numeric 0 or more after decimal point
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
252 # then possibly an e or E then + or - then any numreci at least once
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
253 if (/^([+-]?)(\d*)(\.(\d*))?([Ee]([+-]?\d+))?$/ && (defined $2 || defined $4)) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
254 return 0;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
255 } else {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
256 return 1;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
257 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
258 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
259
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
260 ##############################################################
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
261 # output corresponding headings for use with tabScores
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
262 sub tabHeadings {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
263 my $summaryKey = shift;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
264 my @fieldNames = @_;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
265 printf("%-10s%-30s","REC",$summaryKey);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
266 foreach $field (@fieldNames) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
267 printf("%10s",$field);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
268 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
269 print "\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
270 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
271
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
272 ##############################################################
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
273 # tab-delimited output of named data field values
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
274 sub tabScores {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
275 my $recListRef = shift;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
276 my $summaryKey = shift;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
277 my @fieldNames = @_;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
278 foreach $rec (@{$recListRef}) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
279 printf("%03d\t%-30.30s",$rec->{'DATA'}->{'_REC'},$rec->{'DATA'}->{$summaryKey});
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
280 foreach $field (@fieldNames) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
281 my $val = $rec->{'DATA'}->{$field};
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
282 if (isNaN($val)) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
283 printf("%-10.12s",$val);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
284 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
285 else {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
286 printf("%10.3f",$val);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
287 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
288 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
289 print "\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
290 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
291 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
292
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
293 ##############################################################
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
294 # output corresponding headings for use with csvScores
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
295 sub csvHeadings {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
296 my $summaryKey = shift;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
297 my @fieldNames = @_;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
298 printf("%s,%s","REC",$summaryKey);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
299 foreach $field (@fieldNames) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
300 printf(",%s",$field);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
301 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
302 print "\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
303 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
304
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
305 ##############################################################
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
306 # comma-delimited output of named data field values
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
307 sub csvScores {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
308 my $recListRef = shift;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
309 my $summaryKey = shift;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
310 my @fieldNames = @_;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
311 foreach $rec (@{$recListRef}) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
312 printf("%d,%s",$rec->{'DATA'}->{'_REC'},$rec->{'DATA'}->{$summaryKey});
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
313 foreach $field (@fieldNames) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
314 my $val = $rec->{'DATA'}->{$field};
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
315 if (isNaN($val)) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
316 printf(",%s",$val);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
317 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
318 else {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
319 printf(",%.3f",$val);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
320 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
321 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
322 print "\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
323 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
324 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
325
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
326
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
327 ##############################################################
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
328 # standardised output of Catalyst supplier field
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
329 # Input is a reference to an array of SDRecords
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
330 # and a ligand identifier field to output in column 1 (def=Name)
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
331 sub tabulateSuppliers {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
332 my $recListRef = shift;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
333 my $summaryKey = shift || 'Name';
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
334 foreach $rec (@{$recListRef}) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
335 my $suppBase = $rec->{'DATAREF'}->{'Supplier'}+1;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
336 my $linesRef = $rec->{'LINES'};
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
337 my $name = $rec->{'DATA'}->{$summaryKey};
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
338
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
339 #Output some useful compound info
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
340 my $name = $rec->{'DATA'}->{$summaryKey};
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
341 my $molFormula = $rec->{'DATA'}->{'MolFormula'};
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
342 my $molWt = $rec->{'DATA'}->{'MolWt'};
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
343 my $casNum = $rec->{'DATA'}->{'CAS_num'};
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
344 my $mdlNum = $rec->{'DATA'}->{'MDLNUMBER'};
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
345 print "\n\n====================================================================================================\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
346 printf("%-10.10s%s\n","Name:",$name);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
347 printf("%-10.10s%s\n","Formula:",$molFormula);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
348 printf("%-10.10s%s\n","Mol.wt:",$molWt);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
349 printf("%-10.10s%s\n","CAS #:",$casNum);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
350 printf("%-10.10s%s\n","MDL #:",$mdlNum);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
351
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
352 #Get all the supplier record lines into a list
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
353 #Record is terminated by blank line
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
354 my @lines;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
355 my $nLines = 0;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
356 for (; $$linesRef[$suppBase+$nLines] ne ""; $nLines++) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
357 push @lines,$$linesRef[$suppBase+$nLines];
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
358 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
359
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
360 #Column headings
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
361 printf("\n%-20.20s%-40.40s%-40.40s\n",
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
362 "Supplier",
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
363 "Comment",
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
364 "Price"
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
365 );
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
366 print "----------------------------------------------------------------------------------------------------\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
367
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
368 #Loop over each supplier
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
369 my $iLine = 0;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
370 for (; $iLine < $nLines; $iLine++) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
371 #collect supplier info lines
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
372 my @supplierInfo = ();
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
373 for (; $lines[$iLine] ne "." && $iLine < $nLines; $iLine++) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
374 push @supplierInfo,$lines[$iLine];
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
375 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
376 #Check for incomplete record
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
377 if ($iLine == $nLines) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
378 print "** INCOMPLETE RECORD **\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
379 last;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
380 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
381 my $nSupplierInfo = scalar(@supplierInfo);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
382 my $supplier = $supplierInfo[0];
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
383 #loop over each grade
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
384 for ($iLine++; ($lines[$iLine] ne "........................") && ($iLine < $nLines); $iLine++) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
385 #collect grade info lines
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
386 my @gradeInfo = ();
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
387 for (; index($lines[$iLine],"_") ne 0 && $iLine < $nLines; $iLine++) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
388 push @gradeInfo,$lines[$iLine];
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
389 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
390 #Check for incomplete record
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
391 if ($iLine == $nLines) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
392 print "** INCOMPLETE RECORD **\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
393 last;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
394 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
395 my $grade = $gradeInfo[0];
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
396 #loop over each price info line
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
397 for (; index($lines[$iLine],".") ne 0 && $iLine < $nLines; $iLine++) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
398 my @priceInfo = split(" ",$lines[$iLine]);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
399 my $price = join(" ",@priceInfo);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
400 printf("%-20.20s%-40.39s%-40.40s\n",
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
401 $supplier,
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
402 $grade,
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
403 $price);
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
404 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
405 #Check for incomplete record
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
406 if ($iLine == $nLines) {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
407 print "** INCOMPLETE RECORD **\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
408 last;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
409 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
410 last if $lines[$iLine] eq "........................";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
411 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
412 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
413 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
414 }
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
415
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
416
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
417 #######################################################################
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
418 sub printHelpAndExit {
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
419 print "\nProduces text summaries of SD records\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
420 print "\nUsage:\tsdreport [-l] [-t[<FieldName,FieldName...>]] [-c<FieldName,FieldName...>] [-id<IDField>] [-nh] [-o] [-s] [-sup] [sdFiles]\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
421 print "\n\t-l (list format) output all data fields for each record as processed\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
422 print "\t-t (tab format) tabulate selected fields for each record as processed\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
423 print "\t-c (csv format) comma delimited output of selected fields for each record as processed\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
424 print "\t-s (summary format) output summary statistics for each unique value of ligand ID\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
425 print "\t-sup (supplier format) tabulate supplier details (from Catalyst)\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
426 print "\t-id<IDField> data field to use as ligand ID\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
427 print "\t-nh don't output column headings in -t and -c formats\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
428 print "\t-o use old (v3.00) score field names as default columns in -t and -c formats, else use v4.00 field names\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
429 print "\t-norm use normalised score field names as default columns in -t and -c formats (normalised = score / #ligand heavy atoms)\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
430 print "\nNote:\tIf -l, -t or -c are combined with -s, the listing/table is output within each ligand summary\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
431 print "\t-sup should not be combined with other options\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
432 print "\tDefault field names for -t and -c are rDock score field names\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
433 print "\tDefault ID field name is Name\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
434 print "\n\tIf sdFiles not given, reads from standard input\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
435 print "\tOutput is to standard output\n\n";
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
436 exit;
30e2440b2173 planemo upload
marpiech
parents:
diff changeset
437 }