casacore
Sort.h
Go to the documentation of this file.
1 //# Sort.h: Sort objects on one or more keys
2 //# Copyright (C) 1995,1996,1997,1998,1999,2000,2001
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //#
27 //# $Id$
28 
29 #ifndef CASA_SORT_H
30 #define CASA_SORT_H
31 
32 //# Includes
33 #include <casacore/casa/aips.h>
34 #include <casacore/casa/Arrays/ArrayFwd.h>
35 #include <casacore/casa/Containers/Block.h>
36 #include <casacore/casa/Utilities/ValType.h>
37 #include <casacore/casa/Utilities/Compare.h>
38 #include <casacore/casa/Utilities/CountedPtr.h>
39 
40 namespace casacore { //# NAMESPACE CASACORE - BEGIN
41 
42 // <summary> Define a Sort key </summary>
43 // <use visibility=local>
44 // <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1">
45 // </reviewed>
46 
47 // <synopsis>
48 // SortKey is a helper class for the <linkto class=Sort>Sort</linkto> class.
49 // It holds the following information about a sort key:
50 // <ul>
51 // <li> Address of the data array containing the sort key;
52 // <li> A CountedPtr to a comparison object to be used
53 // (of a class derived from the abstract base class BaseCompare).
54 // <li> Increment for the next data point -- this lets you specify a
55 // stride for keys embedded in a struct;
56 // <li> Sort order -- ascending or descending;
57 // </ul>
58 // </synopsis>
59 
60 class SortKey
61 {
62 public:
63  friend class Sort;
64 
65  // Define a sort key in a given data array using the indicated
66  // comparison object, stride and sort order.
67  SortKey (const void* data, const CountedPtr<BaseCompare>&,
68  uInt increment, int order);
69 
70  // Copy constructor (copy semantics).
71  SortKey (const SortKey&);
72 
74 
75  // Assignment (copy semantics).
77 
78  // Try if GenSort can be used for this single key.
79  // If it succeeds, it returns the resulting number of elements.
80  // Otherwise it returns 0.
81  uInt tryGenSort (Vector<uInt>& indexVector, uInt nrrec, int opt) const;
82  uInt64 tryGenSort (Vector<uInt64>& indexVector, uInt64 nrrec, int opt) const;
83 
84  // Get the sort order.
85  int order() const
86  { return order_p; }
87 
88 protected:
89  // sort order; -1 = ascending, 1 = descending
90  int order_p;
91  // address of first data point
92  const void* data_p;
93  // increment for next data point
95  // comparison object; use CountedPtr for memory management
97  // comparison object; use raw pointer for performance
99 };
100 
101 
102 
103 // <summary> Sort on one or more keys, ascending and/or descending </summary>
104 // <use visibility=export>
105 // <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1">
106 // </reviewed>
107 
108 // <synopsis>
109 // <src>Sort</src> lets you sort data on one or more keys in a mix of
110 // <src>Sort::ascending</src> and <src>Sort::descending</src> order.
111 // Duplicates can be skipped by giving the option
112 // <src>Sort::NoDuplicates</src>. Only in this case the number of output
113 // elements can be different from the number of input elements.
114 // <br>The <src>unique</src> function offers another way of getting
115 // unique values.
116 // <p>
117 // Class <src>Sort</src> does not sort the data themselves, but
118 // returns an index to them. This gives more flexibility and
119 // allows the sort to be stable; but it is slower.
120 // <br>Very fast sorting of the data themselves can be done with the
121 // functions in class <linkto class=GenSort>GenSort</linkto>.
122 // If sorting on a single key with a standard data type is done,
123 // Sort will use GenSortIndirect to speed up the sort.
124 // <br>
125 // Four sort algorithms are provided:
126 // <DL>
127 // <DT> <src>Sort::ParSort</src>
128 // <DD> The parallel merge sort is the fastest if it can use multiple threads.
129 // For a single thread it has O(n*log(n)) behaviour, but is slower
130 // than quicksort.
131 // A drawback is that it needs an extra index array to do the merge.
132 // <DT> <src>Sort::InsSort</src>
133 // <DD> Insertion sort has O(n*n) behaviour, thus is very slow for large
134 // arrays. However, it is the fastest method for small arrays
135 // (< 50 elements) and for arrays already (almost) in the right order.
136 // <DT> <src>Sort::QuickSort</src>
137 // <DD> Care has been taken to solve the well-known quicksort problems
138 // like "array already in order" or "many equal elements". The
139 // behaviour is O(n*log(n)) in all the cases tested, even in
140 // degenerated cases where the SUN Solaris qsort algorithm is O(n*n).
141 // <DT> <src>Sort::HeapSort</src>
142 // <DD> Heapsort has O(n*log(n)) behaviour. Its speed is lower than
143 // that of QuickSort, so QuickSort is the default algorithm.
144 // </DL>
145 // The default is to use QuickSort for small arrays or if only a single
146 // thread can be used. Otherwise ParSort is the default.
147 //
148 // All sort algorithms are <em>stable</em>, which means that the original
149 // order is kept when keys are equal.
150 //
151 // The sort is a four step process:
152 // <ol>
153 // <li> Construct the <src>Sort</src> object.
154 // <li> Define the sort keys. The function <src>sortKey</src> must be
155 // called for each sort key (the most significant one first).
156 // The comparison object can be passed in directly, or a
157 // <linkto group="DataType.h#DataType">basic data type</linkto>
158 // can be given. In the latter case the appropriate ObjCompare
159 // comparison object will be created.
160 // <li> Sort the data. The function <src>sort</src> returns an index
161 // array, which is allocated when needed.
162 // <li> Destruct the <src>Sort</src> object (usually done automatically)
163 // and delete the index array.
164 // </ol>
165 // The data can be in a single array of structs, in separate arrays, or
166 // in a mix of those. Of course, all arrays must have the same length.
167 // The data can be passed to the <src>Sort</src> constructor and/or to the
168 // <src>sortKey</src> function. If passed to the <src>Sort</src> constructor,
169 // the offset of the data item in the data array must be given to
170 // <src>sortKey</src>.
171 // </synopsis>
172 
173 // <example>
174 // In the first example we sort the data contained in two "parallel"
175 // arrays, <src>idata</src> and <src>ddata</src>, both of length
176 // <src>nrdata</src>.
177 // <srcblock>
178 // Sort sort;
179 // sort.sortKey (idata, TpInt); // define 1st sort key
180 // sort.sortKey (ddata, TpDouble,0,Sort::Descending); // define 2nd sort key
181 // Vector<uInt> inx;
182 // sort.sort (inx, nrdata);
183 // for (uInt i=0; i<nrdata; i++) { // show sorted data
184 // cout << idata[inx[i]] << " " << ddata[inx[i]] << endl;
185 // }
186 // </srcblock>
187 // Now <src>nr</src> contains the nr of records (=<src>nrdata</src>)
188 // and <src>inx</src> an array of (sorted) indices.
189 //
190 // In the second example we sort the data stored in an array of structs
191 // on the double (ascending) and the string (descending). We can pass
192 // the data to the <src>Sort</src> constructor, and the offsets of the
193 // struct elements to the <src>sortKey</src> function.
194 // <srcblock>
195 // struct Ts {
196 // String as;
197 // double ad;
198 // }
199 // Vector<uInt> inx;
200 // Sort sort (tsarr, sizeof(Ts));
201 // sort.sortKey ((char*)&tsarr[0].ad - (char*)tsarr, TpDouble);
202 // sort.sortKey ((char*)&tsarr[0].as - (char*)tsarr, TpString,
203 // Sort::Descending);
204 // sort.sort (inx, nrts);
205 // </srcblock>
206 // Note that the first argument in function <src>sortKey</src> gives
207 // the offset of the variable in the struct.
208 //
209 // Alternatively, and probably slightly easier, we could pass the data
210 // to the <src>sortKey</src> function and use an increment:
211 // <srcblock>
212 // struct Ts {
213 // String as;
214 // double ad;
215 // }
216 // Vector<uInt> inx;
217 // Sort sort;
218 // sort.sortKey (&tsarr[0].ad, TpDouble, sizeof(Ts));
219 // sort.sortKey (&tsarr[0].as, TpString, sizeof(Ts), Sort::Descending);
220 // sort.sort (inx, nrts);
221 // </srcblock>
222 //
223 // Finally, we could provide a comparison object for the struct.
224 // <srcblock>
225 // struct Ts {
226 // String as;
227 // double ad;
228 // }
229 // class MyCompare: public BaseCompare {
230 // virtual int comp (const void* val1, const void* val2) const
231 // {
232 // const Ts& t1 = *(Ts*)val1;
233 // const Ts& t2 = *(Ts*)val2;
234 // if (t1.ad < t2.ad) return -1;
235 // if (t1.ad > t2.ad) return 1;
236 // if (t1.as < t2.as) return 1; // string must be descending
237 // if (t1.as > t2.as) return -1;
238 // return 0;
239 // }
240 // };
241 // Vector<uInt> inx;
242 // Sort sort;
243 // sort.sortKey (tsarr, compareTs, sizeof(Ts));
244 // sort.sort (inx, nrts);
245 // </srcblock>
246 
247 class Sort
248 {
249 public:
250  // Enumerate the sort options:
251  enum Option {DefaultSort=0, // ParSort, but QuickSort for small array
252  HeapSort=1, // use Heapsort algorithm
253  InsSort=2, // use insertion sort algorithm
254  QuickSort=4, // use Quicksort algorithm
255  ParSort=8, // use parallel merge sort algorithm
256  NoDuplicates=16}; // skip data with equal sort keys
257 
258  // Enumerate the sort order:
259  enum Order {Ascending=-1,
261 
262  // The default constructor can be used when the data is only passed
263  // in via function <src>sortKey</src>.
264  Sort();
265 
266  // Construct a Sort object for the given data array with elements
267  // of <src>elementSize</src> bytes. This data array will be used
268  // when an offset is given to the <src>sortKey</src> functions.
269  // You can still pass additional data arrays to the
270  // <src>sortKey</src> functions.
271  Sort (const void* data, uInt elementSize);
272 
273  // Copy constructor (copy semantics).
274  Sort (const Sort&);
275 
276  ~Sort();
277 
278  // Assignment (copy semantics).
279  Sort& operator= (const Sort&);
280 
281  // Define a sort key (the most significant key should be defined first).
282  // The key contains:
283  // <ul>
284  // <li> A pointer to the start of the data array. --- When structs are
285  // sorted on an element in the struct, the pointer must point to
286  // that element in the first struct.
287  // <li> A pointer to the comparison object to be used. --- The
288  // comparison object can be specified in two ways:
289  // <ul>
290  // <li> by giving a
291  // <linkto group="DataType.h#DataType">basic data type</linkto>,
292  // in which case the appropriate comparison object will be
293  // created automatically, or
294  // <li> by a CountedPtr of a comparison object.
295  // You may want to use the templated comparison classes
296  // <linkto class=ObjCompare>ObjCompare</linkto>(),
297  // but you are free to use any other class derived from BaseCompare
298  // that implements the <src>comp</src> function.
299  // </ul>
300  // <li> The increment from one data element to the next. --- When
301  // structs are sorted on an element in the struct, the increment
302  // should be the size of the struct. If the comparison object is
303  // automatically created using the data type specified, the default
304  // increment is the size of the data type.
305  // <li> The sort order. --- <src>Ascending</src> (default) or
306  // <src>Descending</src>;
307  // </ul>
308  //
309  // When the data array has been passed to the Sort constructor,
310  // the data pointer and the increment arguments can be replaced by a
311  // single argument: the offset of the key in each element of the array.
312  //
313  // <group>
314  void sortKey (const void* data, DataType, uInt increment = 0,
315  Order = Ascending);
316  void sortKey (const void* data, const CountedPtr<BaseCompare>&,
317  uInt increment, Order = Ascending);
318  void sortKey (uInt offset, DataType, Order = Ascending);
319  void sortKey (uInt offset, const CountedPtr<BaseCompare>&,
320  Order = Ascending);
321  // </group>
322 
323  // Sort the data array of <src>nrrec</src> records.
324  // The result is an array of indices giving the requested order.
325  // It returns the number of resulting records. The indices array
326  // is resized to that number.
327  // <br> By default it'll try if the faster GenSortIndirect can be used
328  // if a sort on a single key is used.
329  uInt sort (Vector<uInt>& indexVector, uInt nrrec,
330  int options = DefaultSort, Bool tryGenSort = True) const;
331  uInt64 sort (Vector<uInt64>& indexVector, uInt64 nrrec,
332  int options = DefaultSort, Bool tryGenSort = True) const;
333 
334  // Get all unique records in a sorted array. The array order is
335  // given in the indexVector (as possibly returned by the sort function).
336  // The default indexVector is 0..nrrec-1.
337  // The index of each first unique record is returned in the uniqueVector.
338  // They are indices in the supplied indexVector, so
339  // <src>data[indexVector(uniqueVector(i))]</src>
340  // is giving the i-th unique record.
341  // Note that the records indexed by <src>indexVector(uniqueVector(i))</src>
342  // till <src>indexVector(uniqueVector(i+1))</src> are all the same.
343  // <br>
344  // It returns the number of unique records. The unique array
345  // is resized to that number.
346  // The third version also gives back a vector with the keys that
347  // change in each sorting group. The size of changeKey is the same as
348  // uniqueVector, and for each unique sorting group indicates the index
349  // of the keyword that will change at the end of the group.
350  // <group>
351  uInt unique (Vector<uInt>& uniqueVector, uInt nrrec) const;
352  uInt unique (Vector<uInt>& uniqueVector,
353  const Vector<uInt>& indexVector) const;
354  uInt unique (Vector<uInt>& uniqueVector,
355  Vector<size_t>& changeKey,
356  const Vector<uInt>& indexVector) const;
357  uInt64 unique (Vector<uInt64>& uniqueVector, uInt64 nrrec) const;
358  uInt64 unique (Vector<uInt64>& uniqueVector,
359  const Vector<uInt64>& indexVector) const;
360  uInt64 unique (Vector<uInt64>& uniqueVector,
361  Vector<size_t>& changeKey,
362  const Vector<uInt64>& indexVector) const;
363  // </group>
364 
365 private:
366  template<typename T>
367  T doSort (Vector<T>& indexVector, T nrrec,
368  int options = DefaultSort, Bool tryGenSort = True) const;
369 
370  template <typename T>
371  T doUnique (Vector<T>& uniqueVector, T nrrec) const;
372  template <typename T>
373  T doUnique (Vector<T>& uniqueVector, const Vector<T>& indexVector) const;
374  template <typename T>
375  T doUnique (Vector<T>& uniqueVector, Vector<size_t>& changeKey,
376  const Vector<T>& indexVector) const;
377 
378  // Copy that Sort object to this.
379  void copy (const Sort& that);
380 
381  // Add a sort key giving a data type and stride or the sort key.
382  // <group>
383  void addKey (const void* data, DataType, uInt increment, int options);
384  void addKey (SortKey*);
385  // </group>
386 
387  // Do an insertion sort, optionally skipping duplicates.
388  // <group>
389  template<typename T>
390  T insSort (T nr, T* indices) const;
391  template<typename T>
392  T insSortNoDup (T nr, T* indices) const;
393  // </group>
394 
395  // Do a merge sort, if possible in parallel using OpenMP.
396  // Note that the env.var. OMP_NUM_TRHEADS sets the maximum nr of threads
397  // to use. It defaults to the number of cores.
398  template<typename T>
399  T parSort (int nthr, T nrrec, T* inx) const;
400  template<typename T>
401  void merge (T* inx, T* tmp, T size, T* index,
402  T nparts) const;
403 
404  // Do a quicksort, optionally skipping duplicates
405  // (qkSort is the actual quicksort function).
406  // <group>
407  template<typename T>
408  T quickSort (T nr, T* indices) const;
409  template<typename T>
410  T quickSortNoDup (T nr, T* indices) const;
411  template<typename T>
412  void qkSort (T nr, T* indices) const;
413  // </group>
414 
415  // Do a heapsort, optionally skipping duplicates.
416  // <group>
417  template<typename T>
418  T heapSort (T nr, T* indices) const;
419  template<typename T>
420  T heapSortNoDup (T nr, T* indices) const;
421  // </group>
422 
423  // Siftdown algorithm for heapsort.
424  template<typename T>
425  void siftDown (T low, T up, T* indices) const;
426 
427  // Compare 2 records based on the comparison functions
428  template<typename T>
429  int compare (T index1, T index2) const;
430 
431  // As compare() but it also gives back the index of the first comparison
432  // function that didn't match.
433  template<typename T>
434  int compareChangeIdx(T i1, T i2, size_t& idxComp) const;
435 
436  // Swap 2 indices.
437  template<typename T>
438  inline void swap (T index1, T index2, T* indices) const
439  {
440  T t = indices[index1];
441  indices[index1] = indices[index2];
442  indices[index2] = t;
443  }
444 
445  //# Data memebers
446  PtrBlock<SortKey*> keys_p; //# keys to sort on
447  size_t nrkey_p; //# #sort-keys
448  const void* data_p; //# pointer to data records
449  uInt size_p; //# size of data record
450  int order_p; //# -1=asc 0=mixed 1=desc
451 };
452 
453 
454 } //# NAMESPACE CASACORE - END
455 
456 #endif
abstract base class for comparing two objects
Definition: Compare.h:65
Referenced counted pointer for constant data.
Definition: CountedPtr.h:81
A drop-in replacement for Block<T*>.
Definition: Block.h:814
int order() const
Get the sort order.
Definition: Sort.h:85
uInt tryGenSort(Vector< uInt > &indexVector, uInt nrrec, int opt) const
Try if GenSort can be used for this single key.
BaseCompare * cmpObj_p
comparison object; use raw pointer for performance
Definition: Sort.h:98
SortKey(const void *data, const CountedPtr< BaseCompare > &, uInt increment, int order)
Define a sort key in a given data array using the indicated comparison object, stride and sort order.
CountedPtr< BaseCompare > ccmpObj_p
comparison object; use CountedPtr for memory management
Definition: Sort.h:96
uInt64 tryGenSort(Vector< uInt64 > &indexVector, uInt64 nrrec, int opt) const
SortKey(const SortKey &)
Copy constructor (copy semantics).
const void * data_p
address of first data point
Definition: Sort.h:92
int order_p
sort order; -1 = ascending, 1 = descending
Definition: Sort.h:90
uInt incr_p
increment for next data point
Definition: Sort.h:94
SortKey & operator=(const SortKey &)
Assignment (copy semantics).
Sort on one or more keys, ascending and/or descending.
Definition: Sort.h:248
uInt64 unique(Vector< uInt64 > &uniqueVector, const Vector< uInt64 > &indexVector) const
Option
Enumerate the sort options:
Definition: Sort.h:251
@ DefaultSort
Definition: Sort.h:251
@ NoDuplicates
Definition: Sort.h:256
T heapSortNoDup(T nr, T *indices) const
const void * data_p
Definition: Sort.h:448
void addKey(const void *data, DataType, uInt increment, int options)
Add a sort key giving a data type and stride or the sort key.
uInt64 sort(Vector< uInt64 > &indexVector, uInt64 nrrec, int options=DefaultSort, Bool tryGenSort=True) const
int compareChangeIdx(T i1, T i2, size_t &idxComp) const
As compare() but it also gives back the index of the first comparison function that didn't match.
T parSort(int nthr, T nrrec, T *inx) const
Do a merge sort, if possible in parallel using OpenMP.
void sortKey(uInt offset, const CountedPtr< BaseCompare > &, Order=Ascending)
uInt size_p
Definition: Sort.h:449
T doUnique(Vector< T > &uniqueVector, Vector< size_t > &changeKey, const Vector< T > &indexVector) const
size_t nrkey_p
Definition: Sort.h:447
void copy(const Sort &that)
Copy that Sort object to this.
uInt unique(Vector< uInt > &uniqueVector, uInt nrrec) const
Get all unique records in a sorted array.
T insSort(T nr, T *indices) const
Do an insertion sort, optionally skipping duplicates.
uInt unique(Vector< uInt > &uniqueVector, const Vector< uInt > &indexVector) const
T doSort(Vector< T > &indexVector, T nrrec, int options=DefaultSort, Bool tryGenSort=True) const
uInt unique(Vector< uInt > &uniqueVector, Vector< size_t > &changeKey, const Vector< uInt > &indexVector) const
T quickSort(T nr, T *indices) const
Do a quicksort, optionally skipping duplicates (qkSort is the actual quicksort function).
void merge(T *inx, T *tmp, T size, T *index, T nparts) const
int compare(T index1, T index2) const
Compare 2 records based on the comparison functions.
T heapSort(T nr, T *indices) const
Do a heapsort, optionally skipping duplicates.
void sortKey(uInt offset, DataType, Order=Ascending)
void sortKey(const void *data, DataType, uInt increment=0, Order=Ascending)
Define a sort key (the most significant key should be defined first).
uInt64 unique(Vector< uInt64 > &uniqueVector, uInt64 nrrec) const
Order
Enumerate the sort order:
Definition: Sort.h:259
@ Descending
Definition: Sort.h:260
T quickSortNoDup(T nr, T *indices) const
Sort & operator=(const Sort &)
Assignment (copy semantics).
void swap(T index1, T index2, T *indices) const
Swap 2 indices.
Definition: Sort.h:438
T doUnique(Vector< T > &uniqueVector, const Vector< T > &indexVector) const
Sort(const void *data, uInt elementSize)
Construct a Sort object for the given data array with elements of elementSize bytes.
T doUnique(Vector< T > &uniqueVector, T nrrec) const
uInt sort(Vector< uInt > &indexVector, uInt nrrec, int options=DefaultSort, Bool tryGenSort=True) const
Sort the data array of nrrec records.
uInt64 unique(Vector< uInt64 > &uniqueVector, Vector< size_t > &changeKey, const Vector< uInt64 > &indexVector) const
T insSortNoDup(T nr, T *indices) const
void addKey(SortKey *)
Sort()
The default constructor can be used when the data is only passed in via function sortKey.
PtrBlock< SortKey * > keys_p
Definition: Sort.h:446
Sort(const Sort &)
Copy constructor (copy semantics).
void qkSort(T nr, T *indices) const
void sortKey(const void *data, const CountedPtr< BaseCompare > &, uInt increment, Order=Ascending)
int order_p
Definition: Sort.h:450
void siftDown(T low, T up, T *indices) const
Siftdown algorithm for heapsort.
this file contains all the compiler specific defines
Definition: mainpage.dox:28
unsigned int uInt
Definition: aipstype.h:51
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
const Bool True
Definition: aipstype.h:43
unsigned long long uInt64
Definition: aipsxtype.h:39