casacore
HashMap.h
Go to the documentation of this file.
1 //# HashMap.h: this defines HashMap, which is a hashed associative array
2 //# Copyright (C) 1995,1996,1999,2000,2001
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //# $Id$
27 
28 #ifndef CASA_HASHMAP_H
29 #define CASA_HASHMAP_H
30 
31 #ifndef AIPS_USE_DEPRECATED
32 #error "HashMap.h is deprecated; use -DBUILD_DEPRECATED=ON to use it"
33 #endif
34 
35 //# Includes
36 #include <casacore/casa/aips.h>
37 #include <casacore/casa/Containers/Block.h>
38 #include <casacore/casa/Containers/List.h>
39 #include <casacore/casa/Containers/OrderedPair.h>
40 #include <casacore/casa/Exceptions/Error.h>
41 
42 namespace casacore { //# NAMESPACE CASACORE - BEGIN
43 
44 //# Forward Declarations
45 template<class key,class val> class ConstHashMapIter;
48 
49 // <summary>
50 // Hash functions for standard types
51 // </summary>
52 //
53 // <synopsis>
54 // These are the declarations for the standard hash functions
55 // used by <linkto class=HashMap>HashMap</linkto>. In general, a function
56 // such as these is defined for each type that is to be used as
57 // a key in <linkto class=HashMap>HashMap</linkto>.
58 //
59 // These hash functions map the key type to any integer. This
60 // integer is then used by <linkto class=HashMap>HashMap</linkto> to
61 // select a bucket in the hash table.
62 // </synopsis>
63 //
64 // <group name=hashfunc>
65 uInt hashFunc(const String &);
66 uInt hashFunc(const float &);
67 uInt hashFunc(const double &);
68 uInt hashFunc(const int &);
69 uInt hashFunc(const unsigned &);
70 //</group>
71 
72 
73 // <summary>
74 // Specify the default values for HashMap keys
75 // </summary>
76 //
77 // <synopsis>
78 // These are the declarations for a set of functions which provide
79 // the default values for types which are used as keys in
80 // <linkto class=HashMap>HashMap</linkto>.
81 // </synopsis>
82 //
83 // <group name=defaulthashvalue>
84 const Int &defaultHashValue(const Int *);
85 const uInt &defaultHashValue(const uInt *);
86 const Long &defaultHashValue(const Long *);
87 const uLong &defaultHashValue(const uLong *);
88 const Float &defaultHashValue(const Float *);
89 const Double &defaultHashValue(const Double *);
91 // </group>
92 
93 // <summary>
94 // Hash function with state
95 // </summary>
96 // <use visibility=export>
97 // <reviewed reviewer="" date="yyyy/mm/dd" tests="" demos="">
98 //
99 // <etymology>
100 // This is basically a way of specifying a hash function, but
101 // it is implemented as a class. Thus it is called HashClass,
102 // similar to "hash function".
103 // </etymology>
104 //
105 // <synopsis>
106 // This class is used to specify a hash function. Sometimes a hash
107 // function may require state, it may be useful to create a
108 // hierarchy of hash functions, or it may be useful to create a class
109 // which provides for hashing as well as other functionality. This
110 // class can be used as a base class for any of these purposed. This
111 // class is intended for parameterization of
112 // <linkto class=HashMap>HashMap</linkto>.
113 //
114 // The hash function maps the key type to any integer. This
115 // integer is then used by <linkto class=HashMap>HashMap</linkto> to
116 // select a bucket in the hash table.
117 // </synopsis>
118 //
119 // <example>
120 // If one wished to make a HashClass for integers, something like the
121 // following might be done:
122 // <srcblock>
123 // class IntHash : public HashClass<Int> {
124 // public:
125 // uInt hash(const Int &v) const { return (uInt) v; }
126 // uInt hash(const Int &v) { return (uInt) v; }
127 // HashClass<Int> *clone() const { return new IntHash; }
128 // IntHash() : HashClass<Int>() { }
129 // };
130 // </srcblock>
131 // </example>
132 //
133 // <motivation>
134 // There may be occasions when it is more convenient to use a class
135 // instead of a single function. This base class provides a starting
136 // point plus, and <src>HashMap<k,v></src> has the necessary hooks to
137 // make use of classes derived from this class.
138 // </motivation>
139 //
140 template<class key> class HashClass {
141 public:
142  //
143  // This function maps elements of <src>key</src> type to any integer.
144  // This integer is then used by <linkto class=HashMap>HashMap</linkto> to
145  // select a bucket in the hash table.
146  //
147  virtual uInt hash(const key &) = 0;
148 
149  //
150  // This function is used to make a <em>deep copy</em>. This means that
151  // the copy, which this function returns, contains all derived information.
152  //
153  virtual HashClass<key> *clone() const = 0;
154 
156  virtual ~HashClass();
157 };
158 
159 
160 // <summary>
161 // Associative Array with a hash table implementation
162 // </summary>
163 // <use visibility=export>
164 // <reviewed reviewer="" date="yyyy/mm/dd" tests="" demos="">
165 //
166 // <prerequisite>
167 // <li> basic concepts behind hash tables
168 // </prerequisite>
169 //
170 // <etymology>
171 // This is an associative array, also known as a map, and it is implemented
172 // using a hash table, so it is called HashMap.
173 // </etymology>
174 //
175 // <synopsis>
176 // This class is an implementation of an associative array. This is a common
177 // data structure which associates a key of one type with a value of the same
178 // or different type. Essentially it is an (unordered) array which is indexed
179 // by an arbitrary type of index, e.g. strings.
180 //
181 // This class has two template type parameters. The first is the type of the
182 // key and the second is the type of the value. Thus the associative array
183 // is a mapping from the domain, any valid object of the key type, to the
184 // range, any valid object of the value type. This is a <em>complete</em>
185 // map which means that every element in the domain maps to one and only
186 // one element in the range. Those elements which have not been set by the
187 // user of this class map to a default value which is set at construction
188 // time.
189 //
190 // One of the important features of this class which must be understood
191 // is the load factor. This factor indicates the average number of items
192 // in the buckets of the hash table which are currently in use. The goal
193 // is to have the hash function greatly reduce the number of item which
194 // must be searched, i.e. to have a limited number of items in each bucket.
195 // The load factor determines this. Thus a load factor of 1000 or 0 is a
196 // poor choice. The default load factor is 4 which should generally be
197 // fine. The load factor is set with <src>setMaxLoad()</src> and retrieved
198 // with <src>maxLoad()</src>.
199 //
200 // For this class to be used,
201 // three things must be defined:
202 // <ul>
203 // <li> a specialization of the <src>hash()</src> templated function for
204 // the key type or a class derived from <src>HashClass<key></src>.
205 // Either of which can be used to implement the hash function for
206 // a particular type.
207 // <li> an equality operator ( '==' ) for the key
208 // <li> a default constructor or a specialization of
209 // <src>defaultHashValue()</src> for the key type
210 // </ul>
211 //
212 // The implementation of this hash map is derived from work on a proposed
213 // addition to the Standard Template Library by Javier Barreiro, Robert Fraley
214 // and <a href="http://www.cs.rpi.edu/~musser/">David R. Musser</a>. The information
215 // which is available includes:
216 // <ul>
217 // <li> <a href="ftp://ftp.cs.rpi.edu/pub/stl/hashrationale.ps.Z">
218 // rationale for hash map addition to the STL </a>
219 // <li> <a href="ftp://ftp.cs.rpi.edu/pub/stl/hashdoc.ps.Z">
220 // hash map addition proposal</a>
221 // <li> <a href="ftp://ftp.cs.rpi.edu/pub/stl/hashimp2.Z">
222 // preliminary implementation</a>
223 // </ul>
224 // each of these sources was utilized in the development of this set of classes,
225 // and in particular, the preliminary implementation was the source for the hashing
226 // algorithm used in this set of classes.
227 // </synopsis>
228 //
229 // <example>
230 // <srcblock>
231 // #include <casacore/casa/Containers/HashMap.h>
232 // #include <casacore/casa/BasicSL/String.h>
233 // #include <casacore/casa/iostream.h>
234 //
235 // main() {
236 // HashMap<String,Int> hash;
237 //
238 // hash("one") = 1; // sets the value of key "one" to "1"
239 // hash.define("two",2); // defines a mapping from key "two" to "2"
240 // hash("three") = 3;
241 // hash.define("four",4);
242 // hash("five") = 5;
243 // hash.define("six",6);
244 //
245 // HashMapIter<String,Int> iter(hash);
246 // for ( iter.toStart(); ! iter.atEnd(); iter++ )
247 // cout << iter.getVal() << ": " << iter.getKey() << endl;
248 //
249 // cout << endl << "Diagnostics" << endl <<
250 // "========================" << endl;
251 // cout << "number defined: " << hash.ndefined() << endl;
252 // cout << "buckets used: " << hash.usedBuckets() << endl;
253 // cout << "buckets available: " << hash.availableBuckets() << endl;
254 // cout << "buckets allocated: " << hash.allocBuckets() << endl;
255 // cout << "loading: " << hash.loading() << endl;
256 // cout << "size (in bytes): " << hash.totalSize() << endl;
257 //
258 // }
259 // </srcblock>
260 // </example>
261 //
262 // <motivation>
263 // There are a couple of reasons why this class was built:
264 // <ul>
265 // <li> use of a hash table can be more efficient
266 // <li> there are a couple of Map classes currently:
267 // <ul>
268 // <li> <linkto class=OrderedMap>OrderedMap</linkto>
269 // <li> <linkto class=SimpleOrderedMap>SimpleOrderedMap</linkto>
270 // </ul>
271 // <src>OrderedMap</src> is derived from a map base class,
272 // <linkto class=Map><src>Map</src></linkto> while
273 // <src>SimpleOrderedMap</src> is not. This collection of classes
274 // has resulted in confusion for the users. It is hoped that this
275 // class can in time replace these other "map" classes by
276 // satisfying the performance demands of
277 // <src>SimpleOrderedMap</src> while still meeting the constraints
278 // of the other map classes.
279 // </ul>
280 // </motivation>
281 //
282 // <templating arg=key>
283 // <li> equality operator (operator==)
284 // <li> function hashFunc() or HashClass derived class provided
285 // <li> default constructor or defaultHashValue() specialization provided or
286 // default value provided at time of construction
287 // </templating>
288 // <templating arg=val>
289 // <li> copy constructor
290 // </templating>
291 //
292 // <thrown>
293 // <li> AipsError
294 // </thrown>
295 //
296 // <todo asof="yyyy/mm/dd">
297 // <li> add this feature
298 // <li> fix this bug
299 // <li> start discussion of this possible extension
300 // </todo>
301 template<class key, class val> class HashMap {
302 friend class ConstHashMapIter<key,val>;
303 private:
305 public:
306  static float defaultMaxLoad() { return float(defaultMaxLoad_); }
307  static uInt defaultSize() { return uInt(defaultSize_); }
308 
309  // Signature of the hash functions
310  typedef uInt (*Func)(const key&);
311  //
312  // Copy constructor with copy semantics
313  //
314  HashMap(const HashMap &);
315 
316  //
317  // Default constructor (and variation) which allows for
318  // specifying:
319  // <ul>
320  // <li> a default value, <src>dflt</src>
321  // <li> the initial number of buckets, <src>size</src>
322  // <li> the hash function, <src>newfunc</src>
323  // <li> the maximum load factor, <src>maxlf</src>
324  // </ul>
325  //
326  // This is a pair because the hash function can either be
327  // a simple function or a class derived from
328  // <linkto class=HashClass><src>HashClass</src></linkto>.
329  // <group>
330  HashMap(const val &dflt = defaultHashValue((const val*)(0)),
331  uInt size = uInt(defaultSize_),
332  Func newfunc = hashFunc,
333  float maxlf = float(defaultMaxLoad_))
334  : total_(size),
335  used_(size),
336  filled_(0),
337  defs_(0),
338  maxLoad_(maxlf),
339  blk(size, static_cast<List<OrderedPair<key,val> >*>(0)),
340  func(newfunc),
341  hashClass(0),
342  dfltVal(dflt)
343  { }
344 
345  HashMap(const val &dflt, uInt size, const HashClass<key> &newfunc,
346  float maxlf = float(defaultMaxLoad_))
347  : total_(size),
348  used_(size),
349  filled_(0),
350  defs_(0),
351  maxLoad_(maxlf),
352  blk(size, static_cast<List<OrderedPair<key,val> >*>(0)),
353  func(0),
354  hashClass(newfunc.clone()),
355  dfltVal(dflt)
356  { }
357  // </group>
358 
359 
360  //
361  // Constructor which allows for specifying:
362  // <ul>
363  // <li> default value, <src>dflt</src>
364  // <li> hash function, <src>newfunc</src>
365  // <li> maximum load factor, <src>maxlf</src>
366  // </ul>
367  // This is provided because often the user will not be interested
368  // in specifying the initial number of buckets since the number is
369  // increased as needed to maintain the max load factor.
370  //
371  // This is a pair because the hash function can either be
372  // a simple function or a class derived from
373  // <linkto class=HashClass><src>HashClass</src></linkto>.
374  // <group>
375  HashMap(const val &dflt, Func newfunc, float maxlf = float(defaultMaxLoad_))
377  filled_(0), defs_(0), maxLoad_(maxlf),
378  blk(uInt(defaultSize()), static_cast<List<OrderedPair<key,val> >*>(0)),
379  func(newfunc), hashClass(0), dfltVal(dflt)
380  { }
381 
382  HashMap(const val &dflt, const HashClass<key> &newfunc,
383  float maxlf = float(defaultMaxLoad_))
385  filled_(0), defs_(0), maxLoad_(maxlf),
386  blk(uInt(defaultSize_), static_cast<List<OrderedPair<key,val> >*>(0)), func(0),
387  hashClass(newfunc.clone()), dfltVal(dflt)
388  { }
389  // </group>
390 
391  //
392  // This copies the right hand side of the assignment. Assignment is done
393  // with <em>copy semantics</em>. This means that all the state is copied.
394  //
396 
397  //
398  // Retrieve values from the map, possibly for later assignment.
399  // It is important to realize that for the <em>non-const</em> version
400  // accessing the key causes an entry to be created in the map if it
401  // didn't already exist. The "value" for this new entry is the
402  // default value. "isDefined()" should be used if this behavior is
403  // not desired.
404  // <group>
405  const val &operator() (const key &ky) const;
406  val &operator() (const key &ky);
407  // </group>
408 
409  //
410  // Define a complete mapping.
411  //
412  val &define(const key &k, const val &v);
413  //
414  // Remove a user defined mapping from <src>k</src> to a value.
415  // After this, the value which <src>k</src> maps to reverts to
416  // the default value.
417  //
418  void remove(const key &k);
419  //
420  // Check to see if a user defined mapping exists for
421  // <src>k</src>. This does <em>not</em> modify the map.
422  //
423  Bool isDefined(const key &k) const;
424  //
425  // Retrieve the default value.
426  // <group>
427  const val &defaultVal() const { return dfltVal; }
428  val &defaultVal() { return dfltVal; }
429  // </group>
430 
431  //
432  // Remove all user defined mapping.
433  //
434  void clear() { freeTable(); }
435  //
436  // Get or set the maximum load factor.
437  //
438  // <group>
439  float maxLoad() const { return maxLoad_; }
440  void setMaxLoad( float new_max ) { maxLoad_ = new_max; }
441  // </group>
442 
443  // Total number of buckets, i.e. the number the
444  // hashing mechanism thinks it has. This is the total
445  // number of buckets used for calculations in the hashing
446  // mechanism. This may be smaller than <src>allocBuckets()</src>
447  // because more underlying storage may be allocated than the
448  // hashing mechanism needs.
449  uInt totalBuckets() const { return total_; }
450  // Number of buckets available, i.e. those which
451  // the hashing mechanism allows itself to use. This
452  // may be smaller than <src>totalBuckets()</src> because
453  // the hashing mechanism can restrict itself to some subset
454  // of the buckets available.
455  uInt availableBuckets() const { return used_; }
456  // Number of buckets currently populated by a bucket list.
457  uInt usedBuckets() const { return filled_; }
458  // Number of buckets which have been allocated, i.e. the
459  // total number which have currently been allocated. This
460  // is the number of buckets created. It may be bigger than
461  // <src>totalBuckets()</src> because more memory can be
462  // allocated than the hashing mechanism needs.
463  uInt allocBuckets() const { return blk.nelements(); }
464 
465  // Number of mappings which have been defined by the user.
466  uInt ndefined() const { return defs_; }
467 
468  // Current hash table loading factor.
469  float loading() const { return ndefined() ? (float) ndefined() / (float) availableBuckets() : 0.0; }
470  // Have any mappings been defined by the user.
471  Bool empty() const { return ndefined() == 0 ? True : False; }
472  // This returns a Block which has the number of elements in each bucket
473  // of the table.
475  // Total size of this HashMap minus dynamically allocated memory for
476  // key or val.
477  uInt totalSize() const;
478 
479  //
480  // dtor
481  //
482  virtual ~HashMap();
483 
484  enum {HashMapVersion = 1};
485 
486 protected:
487  // Call the hash function.
488  uInt hash(const key &k) const {
489  uInt off = func ? (*func)(k) % totalBuckets() :
490  hashClass ? hashClass->hash(k) % totalBuckets() : 0;
491  return off >= availableBuckets() ? off - (totalBuckets() >> 1) : off;
492  }
493  //
494  // delete the contents of the hash table
495  //
496  void freeTable();
497  //
498  // enlarge the hash table. Returns the bucket which is being
499  // moved...
500  //
502  //
503  // populate bucket "to". Returns the bucket which is being
504  // moved...
505  //
507 
508 private:
509  // Total Slots
511  // Slots Being Used
513  // Slots with At Least One Value in the Bucket
515  // Number of Defined Mappings
517  // Maximum load
518  float maxLoad_;
522  val dfltVal;
523 };
524 
525 
526 } //# NAMESPACE CASACORE - END
527 
528 #ifndef CASACORE_NO_AUTO_TEMPLATES
529 #include <casacore/casa/Containers/HashMap.tcc>
530 #endif //# CASACORE_NO_AUTO_TEMPLATES
531 #endif
Hash function with state.
Definition: HashMap.h:140
virtual uInt hash(const key &)=0
This function maps elements of key type to any integer.
virtual HashClass< key > * clone() const =0
This function is used to make a deep copy.
Associative Array with a hash table implementation.
Definition: HashMap.h:301
val & defaultVal()
Definition: HashMap.h:428
uInt allocBuckets() const
Number of buckets which have been allocated, i.e.
Definition: HashMap.h:463
uInt used_
Slots Being Used.
Definition: HashMap.h:512
uInt hash(const key &k) const
Call the hash function.
Definition: HashMap.h:488
void setMaxLoad(float new_max)
Definition: HashMap.h:440
PtrBlock< List< OrderedPair< key, val > > * > blk
Definition: HashMap.h:519
void freeTable()
delete the contents of the hash table
const val & defaultVal() const
Retrieve the default value.
Definition: HashMap.h:427
uInt totalBuckets() const
Total number of buckets, i.e.
Definition: HashMap.h:449
uInt(* Func)(const key &)
Signature of the hash functions.
Definition: HashMap.h:310
HashMap(const val &dflt, Func newfunc, float maxlf=float(defaultMaxLoad_))
Constructor which allows for specifying:
Definition: HashMap.h:375
uInt defs_
Number of Defined Mappings.
Definition: HashMap.h:516
uInt availableBuckets() const
Number of buckets available, i.e.
Definition: HashMap.h:455
HashClass< key > * hashClass
Definition: HashMap.h:521
uInt enlarge()
enlarge the hash table.
static uInt defaultSize()
Definition: HashMap.h:307
HashMap(const val &dflt, const HashClass< key > &newfunc, float maxlf=float(defaultMaxLoad_))
Definition: HashMap.h:382
uInt totalSize() const
Total size of this HashMap minus dynamically allocated memory for key or val.
uInt total_
Total Slots.
Definition: HashMap.h:510
void remove(const key &k)
Remove a user defined mapping from k to a value.
static float defaultMaxLoad()
Definition: HashMap.h:306
Bool empty() const
Have any mappings been defined by the user.
Definition: HashMap.h:471
Block< uInt > distribution() const
This returns a Block which has the number of elements in each bucket of the table.
HashMap(const val &dflt, uInt size, const HashClass< key > &newfunc, float maxlf=float(defaultMaxLoad_))
Definition: HashMap.h:345
uInt ndefined() const
Number of mappings which have been defined by the user.
Definition: HashMap.h:466
Bool isDefined(const key &k) const
Check to see if a user defined mapping exists for k.
float maxLoad() const
Get or set the maximum load factor.
Definition: HashMap.h:439
HashMap< key, val > & operator=(const HashMap< key, val > &)
This copies the right hand side of the assignment.
float loading() const
Current hash table loading factor.
Definition: HashMap.h:469
uInt usedBuckets() const
Number of buckets currently populated by a bucket list.
Definition: HashMap.h:457
HashMap(const val &dflt=defaultHashValue((const val *)(0)), uInt size=uInt(defaultSize_), Func newfunc=hashFunc, float maxlf=float(defaultMaxLoad_))
Default constructor (and variation) which allows for specifying:
Definition: HashMap.h:330
HashMap(const HashMap &)
Copy constructor with copy semantics.
uInt populate(uInt to)
populate bucket "to".
const val & operator()(const key &ky) const
Retrieve values from the map, possibly for later assignment.
float maxLoad_
Maximum load.
Definition: HashMap.h:518
val & define(const key &k, const val &v)
Define a complete mapping.
uInt filled_
Slots with At Least One Value in the Bucket.
Definition: HashMap.h:514
virtual ~HashMap()
dtor
void clear()
Remove all user defined mapping.
Definition: HashMap.h:434
Doubly linked list.
Definition: List.h:187
A drop-in replacement for Block<T*>.
Definition: Block.h:814
String: the storage and methods of handling collections of characters.
Definition: String.h:225
this file contains all the compiler specific defines
Definition: mainpage.dox:28
const Bool False
Definition: aipstype.h:44
unsigned long uLong
Definition: aipstype.h:53
long Long
Definition: aipstype.h:52
void throw_invalid_hashmapiter_error()
uInt hashFunc(const ObjectID &)
unsigned int uInt
Definition: aipstype.h:51
float Float
Definition: aipstype.h:54
void throw_hashmapiter_init_error()
int Int
Definition: aipstype.h:50
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
const Bool True
Definition: aipstype.h:43
double Double
Definition: aipstype.h:55
long double lDouble
Definition: aipstype.h:56
const Double & defaultHashValue(const Double *)