annotate 2.4/lib/perl5/x86_64-linux-gnu-thread-multi/String/Approx.pm @ 18:1163c16cb3c0 draft

Uploaded
author plus91-technologies-pvt-ltd
date Mon, 02 Jun 2014 07:35:53 -0400
parents e3609c8714fb
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
1 package String::Approx;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
2
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
3 require v5.8.0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
4
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
5 $VERSION = '3.27';
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
6
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
7 use strict;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
8 local $^W = 1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
9
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
10 use Carp;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
11 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
12
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
13 require Exporter;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
14 require DynaLoader;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
15
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
16 @ISA = qw(Exporter DynaLoader);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
17
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
18 @EXPORT_OK = qw(amatch asubstitute aindex aslice arindex
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
19 adist adistr adistword adistrword);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
20
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
21 bootstrap String::Approx $VERSION;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
22
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
23 my $CACHE_MAX = 1000; # high water mark
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
24 my $CACHE_PURGE = 0.75; # purge this much of the least used
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
25 my $CACHE_N_PURGE; # purge this many of the least used
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
26
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
27 sub cache_n_purge () {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
28 $CACHE_N_PURGE = $CACHE_MAX * $CACHE_PURGE;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
29 $CACHE_N_PURGE = 1 if $CACHE_N_PURGE < 1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
30 return $CACHE_N_PURGE;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
31 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
32
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
33 cache_n_purge();
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
34
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
35 sub cache_max (;$) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
36 if (@_ == 0) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
37 return $CACHE_MAX;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
38 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
39 $CACHE_MAX = shift;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
40 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
41 $CACHE_MAX = 0 if $CACHE_MAX < 0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
42 cache_n_purge();
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
43 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
44
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
45 sub cache_purge (;$) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
46 if (@_ == 0) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
47 return $CACHE_PURGE;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
48 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
49 $CACHE_PURGE = shift;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
50 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
51 if ($CACHE_PURGE < 0) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
52 $CACHE_PURGE = 0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
53 } elsif ($CACHE_PURGE > 1) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
54 $CACHE_PURGE = 1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
55 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
56 cache_n_purge();
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
57 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
58
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
59 my %_simple;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
60 my %_simple_usage_count;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
61
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
62 sub _cf_simple {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
63 my $P = shift;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
64
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
65 my @usage =
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
66 sort { $_simple_usage_count{$a} <=> $_simple_usage_count{$b} }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
67 grep { $_ ne $P }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
68 keys %_simple_usage_count;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
69
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
70 # Make room, delete the least used entries.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
71 $#usage = $CACHE_N_PURGE - 1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
72
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
73 delete @_simple_usage_count{@usage};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
74 delete @_simple{@usage};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
75 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
76
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
77 sub _simple {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
78 my $P = shift;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
79
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
80 my $_simple = new(__PACKAGE__, $P);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
81
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
82 if ($CACHE_MAX) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
83 $_simple{$P} = $_simple unless exists $_simple{$P};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
84
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
85 $_simple_usage_count{$P}++;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
86
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
87 if (keys %_simple_usage_count > $CACHE_MAX) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
88 _cf_simple($P);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
89 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
90 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
91
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
92 return ( $_simple );
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
93 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
94
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
95 sub _parse_param {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
96 use integer;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
97
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
98 my ($n, @param) = @_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
99 my %param;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
100
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
101 foreach (@param) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
102 while ($_ ne '') {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
103 s/^\s+//;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
104 if (s/^([IDS]\s*)?(\d+)(\s*%)?//) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
105 my $k = defined $3 ? (($2-1) * $n) / 100 + ($2 ? 1 : 0) : $2;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
106
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
107 if (defined $1) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
108 $param{$1} = $k;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
109 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
110 $param{k} = $k;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
111 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
112 } elsif (s/^initial_position\W+(\d+)\b//) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
113 $param{'initial_position'} = $1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
114 } elsif (s/^final_position\W+(\d+)\b//) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
115 $param{'final_position'} = $1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
116 } elsif (s/^position_range\W+(\d+)\b//) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
117 $param{'position_range'} = $1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
118 } elsif (s/^minimal_distance\b//) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
119 $param{'minimal_distance'} = 1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
120 } elsif (s/^i//) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
121 $param{ i } = 1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
122 } elsif (s/^g//) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
123 $param{ g } = 1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
124 } elsif (s/^\?//) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
125 $param{'?'} = 1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
126 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
127 warn "unknown parameter: '$_'\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
128 return;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
129 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
130 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
131 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
132
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
133 return %param;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
134 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
135
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
136 my %_param_key;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
137 my %_parsed_param;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
138
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
139 my %_complex;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
140 my %_complex_usage_count;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
141
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
142 sub _cf_complex {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
143 my $P = shift;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
144
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
145 my @usage =
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
146 sort { $_complex_usage_count{$a} <=>
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
147 $_complex_usage_count{$b} }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
148 grep { $_ ne $P }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
149 keys %_complex_usage_count;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
150
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
151 # Make room, delete the least used entries.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
152 $#usage = $CACHE_N_PURGE - 1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
153
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
154 delete @_complex_usage_count{@usage};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
155 delete @_complex{@usage};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
156 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
157
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
158 sub _complex {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
159 my ($P, @param) = @_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
160 unshift @param, length $P;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
161 my $param = "@param";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
162 my $_param_key;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
163 my %param;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
164 my $complex;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
165 my $is_new;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
166
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
167 unless (exists $_param_key{$param}) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
168 %param = _parse_param(@param);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
169 $_parsed_param{$param} = { %param };
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
170 $_param_key{$param} = join(" ", %param);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
171 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
172 %param = %{ $_parsed_param{$param} };
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
173 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
174
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
175 $_param_key = $_param_key{$param};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
176
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
177 if ($CACHE_MAX) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
178 if (exists $_complex{$P}->{$_param_key}) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
179 $complex = $_complex{$P}->{$_param_key};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
180 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
181 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
182
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
183 unless (defined $complex) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
184 if (exists $param{'k'}) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
185 $complex = new(__PACKAGE__, $P, $param{k});
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
186 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
187 $complex = new(__PACKAGE__, $P);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
188 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
189 $_complex{$P}->{$_param_key} = $complex if $CACHE_MAX;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
190 $is_new = 1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
191 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
192
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
193 if ($is_new) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
194 $complex->set_greedy unless exists $param{'?'};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
195
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
196 $complex->set_insertions($param{'I'})
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
197 if exists $param{'I'};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
198 $complex->set_deletions($param{'D'})
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
199 if exists $param{'D'};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
200 $complex->set_substitutions($param{'S'})
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
201 if exists $param{'S'};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
202
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
203 $complex->set_caseignore_slice
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
204 if exists $param{'i'};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
205
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
206 $complex->set_text_initial_position($param{'initial_position'})
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
207 if exists $param{'initial_position'};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
208
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
209 $complex->set_text_final_position($param{'final_position'})
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
210 if exists $param{'final_position'};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
211
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
212 $complex->set_text_position_range($param{'position_range'})
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
213 if exists $param{'position_range'};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
214
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
215 $complex->set_minimal_distance($param{'minimal_distance'})
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
216 if exists $param{'minimal_distance'};
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
217 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
218
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
219 if ($CACHE_MAX) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
220 $_complex_usage_count{$P}->{$_param_key}++;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
221
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
222 # If our cache overfloweth.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
223 if (scalar keys %_complex_usage_count > $CACHE_MAX) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
224 _cf_complex($P);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
225 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
226 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
227
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
228 return ( $complex, %param );
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
229 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
230
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
231 sub cache_disable {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
232 cache_max(0);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
233 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
234
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
235 sub cache_flush_all {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
236 my $old_purge = cache_purge();
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
237 cache_purge(1);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
238 _cf_simple('');
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
239 _cf_complex('');
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
240 cache_purge($old_purge);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
241 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
242
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
243 sub amatch {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
244 my $P = shift;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
245 return 1 unless length $P;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
246 my $a = ((@_ && ref $_[0] eq 'ARRAY') ?
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
247 _complex($P, @{ shift(@_) }) : _simple($P))[0];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
248
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
249 if (@_) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
250 if (wantarray) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
251 return grep { $a->match($_) } @_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
252 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
253 foreach (@_) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
254 return 1 if $a->match($_);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
255 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
256 return 0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
257 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
258 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
259 if (defined $_) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
260 if (wantarray) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
261 return $a->match($_) ? $_ : undef;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
262 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
263 return 1 if $a->match($_);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
264 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
265 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
266 return $a->match($_) if defined $_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
267
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
268 warn "amatch: \$_ is undefined: what are you matching?\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
269 return;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
270 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
271
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
272 sub _find_substitute {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
273 my ($ri, $rs, $i, $s, $S, $rn) = @_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
274
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
275 push @{ $ri }, $i;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
276 push @{ $rs }, $s;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
277
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
278 my $pre = substr($_, 0, $i);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
279 my $old = substr($_, $i, $s);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
280 my $suf = substr($_, $i + $s);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
281 my $new = $S;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
282
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
283 $new =~ s/\$\`/$pre/g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
284 $new =~ s/\$\&/$old/g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
285 $new =~ s/\$\'/$suf/g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
286
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
287 push @{ $rn }, $new;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
288 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
289
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
290 sub _do_substitute {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
291 my ($rn, $ri, $rs, $rS) = @_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
292
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
293 my $d = 0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
294 my $n = $_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
295
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
296 foreach my $i (0..$#$rn) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
297 substr($n, $ri->[$i] + $d, $rs->[$i]) = $rn->[$i];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
298 $d += length($rn->[$i]) - $rs->[$i];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
299 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
300
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
301 push @{ $rS }, $n;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
302 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
303
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
304 sub asubstitute {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
305 my $P = shift;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
306 my $S = shift;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
307 my ($a, %p) =
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
308 (@_ && ref $_[0] eq 'ARRAY') ?
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
309 _complex($P, @{ shift(@_) }) : _simple($P);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
310
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
311 my ($i, $s, @i, @s, @n, @S);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
312
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
313 if (@_) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
314 if (exists $p{ g }) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
315 foreach (@_) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
316 @s = @i = @n = ();
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
317 while (($i, $s) = $a->slice_next($_)) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
318 if (defined $i) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
319 _find_substitute(\@i, \@s, $i, $s, $S, \@n);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
320 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
321 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
322 _do_substitute(\@n, \@i, \@s, \@S) if @n;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
323 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
324 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
325 foreach (@_) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
326 @s = @i = @n = ();
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
327 ($i, $s) = $a->slice($_);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
328 if (defined $i) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
329 _find_substitute(\@i, \@s, $i, $s, $S, \@n);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
330 _do_substitute(\@n, \@i, \@s, \@S);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
331 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
332 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
333 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
334 return @S;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
335 } elsif (defined $_) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
336 if (exists $p{ g }) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
337 while (($i, $s) = $a->slice_next($_)) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
338 if (defined $i) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
339 _find_substitute(\@i, \@s, $i, $s, $S, \@n);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
340 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
341 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
342 _do_substitute(\@n, \@i, \@s, \@S) if @n;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
343 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
344 ($i, $s) = $a->slice($_);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
345 if (defined $i) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
346 _find_substitute(\@i, \@s, $i, $s, $S, \@n);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
347 _do_substitute(\@n, \@i, \@s, \@S);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
348 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
349 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
350 return $_ = $n[0];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
351 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
352 warn "asubstitute: \$_ is undefined: what are you substituting?\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
353 return;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
354 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
355 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
356
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
357 sub aindex {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
358 my $P = shift;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
359 return 0 unless length $P;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
360 my $a = ((@_ && ref $_[0] eq 'ARRAY') ?
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
361 _complex($P, @{ shift(@_) }) : _simple($P))[0];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
362
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
363 $a->set_greedy; # The *first* match, thank you.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
364
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
365 if (@_) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
366 if (wantarray) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
367 return map { $a->index($_) } @_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
368 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
369 return $a->index($_[0]);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
370 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
371 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
372 return $a->index($_) if defined $_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
373
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
374 warn "aindex: \$_ is undefined: what are you indexing?\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
375 return;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
376 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
377
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
378 sub aslice {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
379 my $P = shift;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
380 return (0, 0) unless length $P;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
381 my $a = ((@_ && ref $_[0] eq 'ARRAY') ?
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
382 _complex($P, @{ shift(@_) }) : _simple($P))[0];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
383
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
384 $a->set_greedy; # The *first* match, thank you.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
385
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
386 if (@_) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
387 return map { [ $a->slice($_) ] } @_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
388 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
389 return $a->slice($_) if defined $_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
390
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
391 warn "aslice: \$_ is undefined: what are you slicing?\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
392 return;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
393 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
394
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
395 sub _adist {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
396 my $s0 = shift;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
397 my $s1 = shift;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
398 my ($aslice) = aslice($s0, ['minimal_distance', @_], $s1);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
399 my ($index, $size, $distance) = @$aslice;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
400 my ($l0, $l1) = map { length } ($s0, $s1);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
401 return $l0 <= $l1 ? $distance : -$distance;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
402 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
403
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
404 sub adist {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
405 my $a0 = shift;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
406 my $a1 = shift;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
407 if (length($a0) == 0) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
408 return length($a1);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
409 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
410 if (length($a1) == 0) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
411 return length($a0);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
412 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
413 my @m = ref $_[0] eq 'ARRAY' ? @{shift()} : ();
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
414 if (ref $a0 eq 'ARRAY') {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
415 if (ref $a1 eq 'ARRAY') {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
416 return [ map { adist($a0, $_, @m) } @{$a1} ];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
417 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
418 return [ map { _adist($_, $a1, @m) } @{$a0} ];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
419 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
420 } elsif (ref $a1 eq 'ARRAY') {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
421 return [ map { _adist($a0, $_, @m) } @{$a1} ];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
422 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
423 if (wantarray) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
424 return map { _adist($a0, $_, @m) } ($a1, @_);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
425 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
426 return _adist($a0, $a1, @m);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
427 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
428 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
429 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
430
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
431 sub adistr {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
432 my $a0 = shift;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
433 my $a1 = shift;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
434 my @m = ref $_[0] eq 'ARRAY' ? shift : ();
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
435 if (ref $a0 eq 'ARRAY') {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
436 if (ref $a1 eq 'ARRAY') {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
437 my $l0 = length();
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
438 return $l0 ? [ map { adist($a0, $_, @m) }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
439 @{$a1} ] :
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
440 [ ];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
441 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
442 return [ map { my $l0 = length();
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
443 $l0 ? _adist($_, $a1, @m) / $l0 : undef
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
444 } @{$a0} ];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
445 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
446 } elsif (ref $a1 eq 'ARRAY') {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
447 my $l0 = length($a0);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
448 return [] unless $l0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
449 return [ map { _adist($a0, $_, @m) / $l0 } @{$a1} ];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
450 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
451 my $l0 = length($a0);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
452 if (wantarray) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
453 return map { $l0 ? _adist($a0, $_, @m) / $l0 : undef } ($a1, @_);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
454 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
455 return undef unless $l0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
456 return _adist($a0, $a1, @m) / $l0;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
457 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
458 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
459 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
460
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
461 sub adistword {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
462 return adist($_[0], $_[1], ['position_range=0']);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
463 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
464
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
465 sub adistrword {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
466 return adistr($_[0], $_[1], ['position_range=0']);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
467 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
468
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
469 sub arindex {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
470 my $P = shift;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
471 my $l = length $P;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
472 return 0 unless $l;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
473 my $R = reverse $P;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
474 my $a = ((@_ && ref $_[0] eq 'ARRAY') ?
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
475 _complex($R, @{ shift(@_) }) : _simple($R))[0];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
476
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
477 $a->set_greedy; # The *first* match, thank you.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
478
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
479 if (@_) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
480 if (wantarray) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
481 return map {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
482 my $aindex = $a->index(scalar reverse());
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
483 $aindex == -1 ? $aindex : (length($_) - $aindex - $l);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
484 } @_;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
485 } else {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
486 my $aindex = $a->index(scalar reverse $_[0]);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
487 return $aindex == -1 ? $aindex : (length($_[0]) - $aindex - $l);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
488 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
489 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
490 if (defined $_) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
491 my $aindex = $a->index(scalar reverse());
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
492 return $aindex == -1 ? $aindex : (length($_) - $aindex - $l);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
493 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
494
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
495 warn "arindex: \$_ is undefined: what are you indexing?\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
496 return;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
497 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
498
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
499 1;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
500 __END__
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
501 =pod
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
502
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
503 =head1 NAME
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
504
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
505 String::Approx - Perl extension for approximate matching (fuzzy matching)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
506
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
507 =head1 SYNOPSIS
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
508
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
509 use String::Approx 'amatch';
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
510
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
511 print if amatch("foobar");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
512
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
513 my @matches = amatch("xyzzy", @inputs);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
514
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
515 my @catches = amatch("plugh", ['2'], @inputs);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
516
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
517 =head1 DESCRIPTION
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
518
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
519 String::Approx lets you match and substitute strings approximately.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
520 With this you can emulate errors: typing errorrs, speling errors,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
521 closely related vocabularies (colour color), genetic mutations (GAG
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
522 ACT), abbreviations (McScot, MacScot).
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
523
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
524 NOTE: String::Approx suits the task of B<string matching>, not
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
525 B<string comparison>, and it works for B<strings>, not for B<text>.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
526
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
527 If you want to compare strings for similarity, you probably just want
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
528 the Levenshtein edit distance (explained below), the Text::Levenshtein
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
529 and Text::LevenshteinXS modules in CPAN. See also Text::WagnerFischer
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
530 and Text::PhraseDistance. (There are functions for this in String::Approx,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
531 e.g. adist(), but their results sometimes differ from the bare Levenshtein
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
532 et al.)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
533
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
534 If you want to compare things like text or source code, consisting of
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
535 B<words> or B<tokens> and B<phrases> and B<sentences>, or
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
536 B<expressions> and B<statements>, you should probably use some other
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
537 tool than String::Approx, like for example the standard UNIX diff(1)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
538 tool, or the Algorithm::Diff module from CPAN.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
539
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
540 The measure of B<approximateness> is the I<Levenshtein edit distance>.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
541 It is the total number of "edits": insertions,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
542
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
543 word world
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
544
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
545 deletions,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
546
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
547 monkey money
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
548
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
549 and substitutions
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
550
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
551 sun fun
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
552
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
553 required to transform a string to another string. For example, to
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
554 transform I<"lead"> into I<"gold">, you need three edits:
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
555
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
556 lead gead goad gold
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
557
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
558 The edit distance of "lead" and "gold" is therefore three, or 75%.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
559
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
560 B<String::Approx> uses the Levenshtein edit distance as its measure, but
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
561 String::Approx is not well-suited for comparing strings of different
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
562 length, in other words, if you want a "fuzzy eq", see above.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
563 String::Approx is more like regular expressions or index(), it finds
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
564 substrings that are close matches.>
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
565
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
566 =head1 MATCH
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
567
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
568 use String::Approx 'amatch';
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
569
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
570 $matched = amatch("pattern")
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
571 $matched = amatch("pattern", [ modifiers ])
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
572
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
573 $any_matched = amatch("pattern", @inputs)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
574 $any_matched = amatch("pattern", [ modifiers ], @inputs)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
575
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
576 @match = amatch("pattern")
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
577 @match = amatch("pattern", [ modifiers ])
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
578
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
579 @matches = amatch("pattern", @inputs)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
580 @matches = amatch("pattern", [ modifiers ], @inputs)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
581
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
582 Match B<pattern> approximately. In list context return the matched
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
583 B<@inputs>. If no inputs are given, match against the B<$_>. In scalar
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
584 context return true if I<any> of the inputs match, false if none match.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
585
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
586 Notice that the pattern is a string. Not a regular expression. None
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
587 of the regular expression notations (^, ., *, and so on) work. They
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
588 are characters just like the others. Note-on-note: some limited form
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
589 of I<"regular expressionism"> is planned in future: for example
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
590 character classes ([abc]) and I<any-chars> (.). But that feature will
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
591 be turned on by a special I<modifier> (just a guess: "r"), so there
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
592 should be no backward compatibility problem.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
593
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
594 Notice also that matching is not symmetric. The inputs are matched
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
595 against the pattern, not the other way round. In other words: the
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
596 pattern can be a substring, a submatch, of an input element. An input
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
597 element is always a superstring of the pattern.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
598
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
599 =head2 MODIFIERS
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
600
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
601 With the modifiers you can control the amount of approximateness and
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
602 certain other control variables. The modifiers are one or more
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
603 strings, for example B<"i">, within a string optionally separated by
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
604 whitespace. The modifiers are inside an anonymous array: the B<[ ]>
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
605 in the syntax are not notational, they really do mean B<[ ]>, for
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
606 example B<[ "i", "2" ]>. B<["2 i"]> would be identical.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
607
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
608 The implicit default approximateness is 10%, rounded up. In other
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
609 words: every tenth character in the pattern may be an error, an edit.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
610 You can explicitly set the maximum approximateness by supplying a
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
611 modifier like
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
612
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
613 number
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
614 number%
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
615
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
616 Examples: B<"3">, B<"15%">.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
617
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
618 Note that C<0%> is not rounded up, it is equal to C<0>.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
619
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
620 Using a similar syntax you can separately control the maximum number
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
621 of insertions, deletions, and substitutions by prefixing the numbers
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
622 with I, D, or S, like this:
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
623
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
624 Inumber
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
625 Inumber%
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
626 Dnumber
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
627 Dnumber%
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
628 Snumber
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
629 Snumber%
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
630
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
631 Examples: B<"I2">, B<"D20%">, B<"S0">.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
632
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
633 You can ignore case (B<"A"> becames equal to B<"a"> and vice versa)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
634 by adding the B<"i"> modifier.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
635
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
636 For example
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
637
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
638 [ "i 25%", "S0" ]
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
639
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
640 means I<ignore case>, I<allow every fourth character to be "an edit">,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
641 but allow I<no substitutions>. (See L<NOTES> about disallowing
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
642 substitutions or insertions.)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
643
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
644 NOTE: setting C<I0 D0 S0> is not equivalent to using index().
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
645 If you want to use index(), use index().
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
646
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
647 =head1 SUBSTITUTE
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
648
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
649 use String::Approx 'asubstitute';
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
650
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
651 @substituted = asubstitute("pattern", "replacement")
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
652 @substituted = asubstitute("pattern", "replacement", @inputs)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
653 @substituted = asubstitute("pattern", "replacement", [ modifiers ])
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
654 @substituted = asubstitute("pattern", "replacement",
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
655 [ modifiers ], @inputs)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
656
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
657 Substitute approximate B<pattern> with B<replacement> and return as a
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
658 list <copies> of B<@inputs>, the substitutions having been made on the
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
659 elements that did match the pattern. If no inputs are given,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
660 substitute in the B<$_>. The replacement can contain magic strings
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
661 B<$&>, B<$`>, B<$'> that stand for the matched string, the string
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
662 before it, and the string after it, respectively. All the other
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
663 arguments are as in C<amatch()>, plus one additional modifier, B<"g">
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
664 which means substitute globally (all the matches in an element and not
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
665 just the first one, as is the default).
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
666
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
667 See L<BAD NEWS> about the unfortunate stinginess of C<asubstitute()>.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
668
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
669 =head1 INDEX
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
670
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
671 use String::Approx 'aindex';
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
672
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
673 $index = aindex("pattern")
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
674 @indices = aindex("pattern", @inputs)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
675 $index = aindex("pattern", [ modifiers ])
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
676 @indices = aindex("pattern", [ modifiers ], @inputs)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
677
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
678 Like C<amatch()> but returns the index/indices at which the pattern
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
679 matches approximately. In list context and if C<@inputs> are used,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
680 returns a list of indices, one index for each input element.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
681 If there's no approximate match, C<-1> is returned as the index.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
682
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
683 NOTE: if there is character repetition (e.g. "aa") either in
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
684 the pattern or in the text, the returned index might start
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
685 "too early". This is consistent with the goal of the module
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
686 of matching "as early as possible", just like regular expressions
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
687 (that there might be a "less approximate" match starting later is
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
688 of somewhat irrelevant).
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
689
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
690 There's also backwards-scanning C<arindex()>.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
691
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
692 =head1 SLICE
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
693
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
694 use String::Approx 'aslice';
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
695
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
696 ($index, $size) = aslice("pattern")
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
697 ([$i0, $s0], ...) = aslice("pattern", @inputs)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
698 ($index, $size) = aslice("pattern", [ modifiers ])
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
699 ([$i0, $s0], ...) = aslice("pattern", [ modifiers ], @inputs)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
700
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
701 Like C<aindex()> but returns also the size (length) of the match.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
702 If the match fails, returns an empty list (when matching against C<$_>)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
703 or an empty anonymous list corresponding to the particular input.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
704
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
705 NOTE: size of the match will very probably be something you did not
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
706 expect (such as longer than the pattern, or a negative number). This
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
707 may or may not be fixed in future releases. Also the beginning of the
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
708 match may vary from the expected as with aindex(), see above.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
709
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
710 If the modifier
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
711
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
712 "minimal_distance"
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
713
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
714 is used, the minimal possible edit distance is returned as the
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
715 third element:
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
716
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
717 ($index, $size, $distance) = aslice("pattern", [ modifiers ])
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
718 ([$i0, $s0, $d0], ...) = aslice("pattern", [ modifiers ], @inputs)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
719
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
720 =head1 DISTANCE
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
721
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
722 use String::Approx 'adist';
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
723
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
724 $dist = adist("pattern", $input);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
725 @dist = adist("pattern", @input);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
726
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
727 Return the I<edit distance> or distances between the pattern and the
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
728 input or inputs. Zero edit distance means exact match. (Remember
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
729 that the match can 'float' in the inputs, the match is a substring
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
730 match.) If the pattern is longer than the input or inputs, the
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
731 returned distance or distances is or are negative.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
732
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
733 use String::Approx 'adistr';
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
734
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
735 $dist = adistr("pattern", $input);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
736 @dist = adistr("pattern", @inputs);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
737
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
738 Return the B<relative> I<edit distance> or distances between the
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
739 pattern and the input or inputs. Zero relative edit distance means
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
740 exact match, one means completely different. (Remember that the
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
741 match can 'float' in the inputs, the match is a substring match.) If
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
742 the pattern is longer than the input or inputs, the returned distance
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
743 or distances is or are negative.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
744
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
745 You can use adist() or adistr() to sort the inputs according to their
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
746 approximateness:
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
747
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
748 my %d;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
749 @d{@inputs} = map { abs } adistr("pattern", @inputs);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
750 my @d = sort { $d{$a} <=> $d{$b} } @inputs;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
751
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
752 Now C<@d> contains the inputs, the most like C<"pattern"> first.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
753
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
754 =head1 CONTROLLING THE CACHE
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
755
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
756 C<String::Approx> maintains a LU (least-used) cache that holds the
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
757 'matching engines' for each instance of a I<pattern+modifiers>. The
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
758 cache is intended to help the case where you match a small set of
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
759 patterns against a large set of string. However, the more engines you
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
760 cache the more you eat memory. If you have a lot of different
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
761 patterns or if you have a lot of memory to burn, you may want to
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
762 control the cache yourself. For example, allowing a larger cache
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
763 consumes more memory but probably runs a little bit faster since the
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
764 cache fills (and needs flushing) less often.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
765
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
766 The cache has two parameters: I<max> and I<purge>. The first one
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
767 is the maximum size of the cache and the second one is the cache
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
768 flushing ratio: when the number of cache entries exceeds I<max>,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
769 I<max> times I<purge> cache entries are flushed. The default
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
770 values are 1000 and 0.75, respectively, which means that when
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
771 the 1001st entry would be cached, 750 least used entries will
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
772 be removed from the cache. To access the parameters you can
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
773 use the calls
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
774
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
775 $now_max = String::Approx::cache_max();
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
776 String::Approx::cache_max($new_max);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
777
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
778 $now_purge = String::Approx::cache_purge();
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
779 String::Approx::cache_purge($new_purge);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
780
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
781 $limit = String::Approx::cache_n_purge();
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
782
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
783 To be honest, there are actually B<two> caches: the first one is used
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
784 far the patterns with no modifiers, the second one for the patterns
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
785 with pattern modifiers. Using the standard parameters you will
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
786 therefore actually cache up to 2000 entries. The above calls control
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
787 both caches for the same price.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
788
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
789 To disable caching completely use
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
790
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
791 String::Approx::cache_disable();
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
792
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
793 Note that this doesn't flush any possibly existing cache entries,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
794 to do that use
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
795
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
796 String::Approx::cache_flush_all();
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
797
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
798 =head1 NOTES
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
799
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
800 Because matching is by I<substrings>, not by whole strings, insertions
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
801 and substitutions produce often very similar results: "abcde" matches
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
802 "axbcde" either by insertion B<or> substitution of "x".
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
803
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
804 The maximum edit distance is also the maximum number of edits.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
805 That is, the B<"I2"> in
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
806
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
807 amatch("abcd", ["I2"])
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
808
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
809 is useless because the maximum edit distance is (implicitly) 1.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
810 You may have meant to say
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
811
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
812 amatch("abcd", ["2D1S1"])
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
813
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
814 or something like that.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
815
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
816 If you want to simulate transposes
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
817
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
818 feet fete
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
819
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
820 you need to allow at least edit distance of two because in terms of
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
821 our edit primitives a transpose is first one deletion and then one
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
822 insertion.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
823
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
824 =head2 TEXT POSITION
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
825
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
826 The starting and ending positions of matching, substituting, indexing, or
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
827 slicing can be changed from the beginning and end of the input(s) to
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
828 some other positions by using either or both of the modifiers
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
829
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
830 "initial_position=24"
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
831 "final_position=42"
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
832
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
833 or the both the modifiers
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
834
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
835 "initial_position=24"
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
836 "position_range=10"
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
837
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
838 By setting the B<"position_range"> to be zero you can limit
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
839 (anchor) the operation to happen only once (if a match is possible)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
840 at the position.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
841
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
842 =head1 VERSION
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
843
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
844 Major release 3.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
845
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
846 =head1 CHANGES FROM VERSION 2
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
847
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
848 =head2 GOOD NEWS
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
849
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
850 =over 4
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
851
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
852 =item The version 3 is 2-3 times faster than version 2
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
853
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
854 =item No pattern length limitation
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
855
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
856 The algorithm is independent on the pattern length: its time
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
857 complexity is I<O(kn)>, where I<k> is the number of edits and I<n> the
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
858 length of the text (input). The preprocessing of the pattern will of
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
859 course take some I<O(m)> (I<m> being the pattern length) time, but
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
860 C<amatch()> and C<asubstitute()> cache the result of this
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
861 preprocessing so that it is done only once per pattern.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
862
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
863 =back
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
864
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
865 =head2 BAD NEWS
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
866
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
867 =over 4
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
868
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
869 =item You do need a C compiler to install the module
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
870
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
871 Perl's regular expressions are no more used; instead a faster and more
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
872 scalable algorithm written in C is used.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
873
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
874 =item C<asubstitute()> is now always stingy
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
875
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
876 The string matched and substituted is now always stingy, as short
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
877 as possible. It used to be as long as possible. This is an unfortunate
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
878 change stemming from switching the matching algorithm. Example: with
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
879 edit distance of two and substituting for B<"word"> from B<"cork"> and
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
880 B<"wool"> previously did match B<"cork"> and B<"wool">. Now it does
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
881 match B<"or"> and B<"wo">. As little as possible, or, in other words,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
882 with as much approximateness, as many edits, as possible. Because
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
883 there is no I<need> to match the B<"c"> of B<"cork">, it is not matched.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
884
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
885 =item no more C<aregex()> because regular expressions are no more used
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
886
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
887 =item no more C<compat1> for String::Approx version 1 compatibility
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
888
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
889 =back
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
890
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
891 =head1 ACKNOWLEDGEMENTS
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
892
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
893 The following people have provided valuable test cases, documentation
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
894 clarifications, and other feedback:
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
895
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
896 Jared August, Arthur Bergman, Anirvan Chatterjee, Steve A. Chervitz,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
897 Aldo Calpini, David Curiel, Teun van den Dool, Alberto Fontaneda,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
898 Rob Fugina, Dmitrij Frishman, Lars Gregersen, Kevin Greiner,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
899 B. Elijah Griffin, Mike Hanafey, Mitch Helle, Ricky Houghton,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
900 'idallen', Helmut Jarausch, Damian Keefe, Ben Kennedy, Craig Kelley,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
901 Franz Kirsch, Dag Kristian, Mark Land, J. D. Laub, John P. Linderman,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
902 Tim Maher, Juha Muilu, Sergey Novoselov, Andy Oram, Ji Y Park,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
903 Eric Promislow, Nikolaus Rath, Stefan Ram, Slaven Rezic,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
904 Dag Kristian Rognlien, Stewart Russell, Slaven Rezic, Chris Rosin,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
905 Pasha Sadri, Ilya Sandler, Bob J.A. Schijvenaars, Ross Smith,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
906 Frank Tobin, Greg Ward, Rich Williams, Rick Wise.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
907
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
908 The matching algorithm was developed by Udi Manber, Sun Wu, and Burra
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
909 Gopal in the Department of Computer Science, University of Arizona.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
910
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
911 =head1 AUTHOR
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
912
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
913 Jarkko Hietaniemi <jhi@iki.fi>
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
914
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
915 =head1 COPYRIGHT AND LICENSE
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
916
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
917 Copyright 2001-2013 by Jarkko Hietaniemi
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
918
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
919 This library is free software; you can redistribute it and/or modify
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
920 under either the terms of the Artistic License 2.0, or the GNU Library
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
921 General Public License, Version 2. See the files Artistic and LGPL
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
922 for more details.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
923
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
924 Furthermore: no warranties or obligations of any kind are given, and
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
925 the separate file F<COPYRIGHT> must be included intact in all copies
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
926 and derived materials.
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
927
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
928 =cut