casacore
Loading...
Searching...
No Matches
StringDistance.h
Go to the documentation of this file.
1//# StringDistance.h: Class to deal with Levensthein distance of strings
2//# Copyright (C) 2009
3//# Associated Universities, Inc. Washington DC, USA.
4//#
5//# This library is free software; you can redistribute it and/or modify it
6//# under the terms of the GNU Library General Public License as published by
7//# the Free Software Foundation; either version 2 of the License, or (at your
8//# option) any later version.
9//#
10//# This library is distributed in the hope that it will be useful, but WITHOUT
11//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13//# License for more details.
14//#
15//# You should have received a copy of the GNU Library General Public License
16//# along with this library; if not, write to the Free Software Foundation,
17//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18//#
19//# Correspondence concerning AIPS++ should be addressed as follows:
20//# Internet email: casa-feedback@nrao.edu.
21//# Postal address: AIPS++ Project Office
22//# National Radio Astronomy Observatory
23//# 520 Edgemont Road
24//# Charlottesville, VA 22903-2475 USA
25
26#ifndef CASA_STRINGDISTANCE_H
27#define CASA_STRINGDISTANCE_H
28
29//# Includes
30#include <casacore/casa/aips.h>
31#include <casacore/casa/BasicSL/String.h>
32#include <casacore/casa/Arrays/Matrix.h>
33
34namespace casacore {
35
36// <summary>
37// Class to deal with Levensthein distance of strings.
38// </summary>
39
40// <synopsis>
41// The Levenshtein Distance is a metric telling how similar strings are.
42// It is also known as the Edit Distance.
43//
44// The distance tells how many operations (i.e., character substitutions,
45// insertions, and deletions are needed to transform one string into another.
46// <br>There are several extensions to the basic definition:
47// <ul>
48// <li> Treat a swap of adjacent characters as one operation.
49// <li> Give a weight to operations (e.g., insertions and deletions have
50// half the weight of the other operations).
51// <li> Take locality into account. For example, successive substitutions
52// weigh more than non-successive.
53// <li> Extend to wildcarded strings.
54// </ul>
55// This class optionally uses the swap extension. Furthermore one can
56// optionally ignore blanks. By default both options are used.
57//
58// The code is based on code written by Anders Sewerin Johansen.
59// Calculating the distance is an expensive O(N^2) operation, thus
60// should be used with care.
61//
62// The class is constructed with the source string to compare against.
63// Thereafter its <code>match</code> or <code>distance</code>
64// function can be used for each target string.
65// </synopsis>
66
68{
69public:
70 // Default constructor sets maxDistance to 0.
72
73 // Construct from the source string and maximum distance.
74 // If the maximum distance is negative, it defaults to 1+strlength/3.
75 // Note that maximum distance 0 means that the strings must match exactly.
77 Bool countSwaps=True, Bool ignoreBlanks=True,
78 Bool caseInsensitive=False);
79
80 // Get data members.
81 // <group>
82 const string& source() const
83 { return itsSource; }
85 { return itsMaxDistance; }
86 const Matrix<Int>& matrix() const
87 { return itsMatrix; }
88 // </group>
89
90 // Test if the given target string is within the maximum distance.
91 Bool match (const String& target) const;
92
93 // Calculate the distance from the string to the string given in the constructor.
94 // If the length of target exceeds source length + maxDistance,
95 // the difference in lengths is returned.
96 Int distance (const String& target) const;
97
98 // Calculate the distance between the two strings.
99 // This is slower than the <src>distance</src> member function, because
100 // it has to allocate the underlying Matrix for each invocation.
101 static Int distance (const String& source, const String& target,
102 Bool countSwaps=True);
103
104 // Remove blanks from the given string.
106
107private:
108 // Calculate the distance.
109 static Int doDistance (const String& source, const String& target,
110 Bool countSwaps, Matrix<Int>& matrix);
111
112
113private:
120};
121
122} //# end namespace
123
124#endif
StringDistance(const String &source, Int maxDistance=-1, Bool countSwaps=True, Bool ignoreBlanks=True, Bool caseInsensitive=False)
Construct from the source string and maximum distance.
static Int doDistance(const String &source, const String &target, Bool countSwaps, Matrix< Int > &matrix)
Calculate the distance.
const Matrix< Int > & matrix() const
static Int distance(const String &source, const String &target, Bool countSwaps=True)
Calculate the distance between the two strings.
static String removeBlanks(const String &source)
Remove blanks from the given string.
Int distance(const String &target) const
Calculate the distance from the string to the string given in the constructor.
StringDistance()
Default constructor sets maxDistance to 0.
Bool match(const String &target) const
Test if the given target string is within the maximum distance.
const string & source() const
Get data members.
String: the storage and methods of handling collections of characters.
Definition String.h:223
this file contains all the compiler specific defines
Definition mainpage.dox:28
const Bool False
Definition aipstype.h:42
int Int
Definition aipstype.h:48
bool Bool
Define the standard types used by Casacore.
Definition aipstype.h:40
const Bool True
Definition aipstype.h:41