casacore
Loading...
Searching...
No Matches
Sort.h
Go to the documentation of this file.
1//# Sort.h: Sort objects on one or more keys
2//# Copyright (C) 1995,1996,1997,1998,1999,2000,2001
3//# Associated Universities, Inc. Washington DC, USA.
4//#
5//# This library is free software; you can redistribute it and/or modify it
6//# under the terms of the GNU Library General Public License as published by
7//# the Free Software Foundation; either version 2 of the License, or (at your
8//# option) any later version.
9//#
10//# This library is distributed in the hope that it will be useful, but WITHOUT
11//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13//# License for more details.
14//#
15//# You should have received a copy of the GNU Library General Public License
16//# along with this library; if not, write to the Free Software Foundation,
17//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18//#
19//# Correspondence concerning AIPS++ should be addressed as follows:
20//# Internet email: casa-feedback@nrao.edu.
21//# Postal address: AIPS++ Project Office
22//# National Radio Astronomy Observatory
23//# 520 Edgemont Road
24//# Charlottesville, VA 22903-2475 USA
25
26#ifndef CASA_SORT_H
27#define CASA_SORT_H
28
29//# Includes
30#include <casacore/casa/aips.h>
31#include <casacore/casa/Arrays/ArrayFwd.h>
32#include <casacore/casa/Containers/Block.h>
33#include <casacore/casa/Utilities/ValType.h>
34#include <casacore/casa/Utilities/Compare.h>
35#include <memory>
36
37namespace casacore { //# NAMESPACE CASACORE - BEGIN
38
39// <summary> Define a Sort key </summary>
40// <use visibility=local>
41// <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1">
42// </reviewed>
43
44// <synopsis>
45// SortKey is a helper class for the <linkto class=Sort>Sort</linkto> class.
46// It holds the following information about a sort key:
47// <ul>
48// <li> Address of the data array containing the sort key;
49// <li> A std::shared_ptr to a comparison object to be used
50// (of a class derived from the abstract base class BaseCompare).
51// <li> Increment for the next data point -- this lets you specify a
52// stride for keys embedded in a struct;
53// <li> Sort order -- ascending or descending;
54// </ul>
55// </synopsis>
56
58{
59public:
60 friend class Sort;
61
62 // Define a sort key in a given data array using the indicated
63 // comparison object, stride and sort order.
64 SortKey (const void* data, const std::shared_ptr<BaseCompare>&,
65 uInt increment, int order);
66
67 // Copy constructor (copy semantics).
68 SortKey (const SortKey&);
69
71
72 // Assignment (copy semantics).
74
75 // Try if GenSort can be used for this single key.
76 // If it succeeds, it returns the resulting number of elements.
77 // Otherwise it returns 0.
78 uInt tryGenSort (Vector<uInt>& indexVector, uInt nrrec, int opt) const;
79 uInt64 tryGenSort (Vector<uInt64>& indexVector, uInt64 nrrec, int opt) const;
80
81 // Get the sort order.
82 int order() const
83 { return order_p; }
84
85protected:
86 // sort order; -1 = ascending, 1 = descending
88 // address of first data point
89 const void* data_p;
90 // increment for next data point
92 // comparison object; use std::shared_ptr for memory management
93 std::shared_ptr<BaseCompare> ccmpObj_p;
94 // comparison object; use raw pointer for performance
96};
97
98
99
100// <summary> Sort on one or more keys, ascending and/or descending </summary>
101// <use visibility=export>
102// <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1">
103// </reviewed>
104
105// <synopsis>
106// <src>Sort</src> lets you sort data on one or more keys in a mix of
107// <src>Sort::ascending</src> and <src>Sort::descending</src> order.
108// Duplicates can be skipped by giving the option
109// <src>Sort::NoDuplicates</src>. Only in this case the number of output
110// elements can be different from the number of input elements.
111// <br>The <src>unique</src> function offers another way of getting
112// unique values.
113// <p>
114// Class <src>Sort</src> does not sort the data themselves, but
115// returns an index to them. This gives more flexibility and
116// allows the sort to be stable; but it is slower.
117// <br>Very fast sorting of the data themselves can be done with the
118// functions in class <linkto class=GenSort>GenSort</linkto>.
119// If sorting on a single key with a standard data type is done,
120// Sort will use GenSortIndirect to speed up the sort.
121// <br>
122// Four sort algorithms are provided:
123// <DL>
124// <DT> <src>Sort::ParSort</src>
125// <DD> The parallel merge sort is the fastest if it can use multiple threads.
126// For a single thread it has O(n*log(n)) behaviour, but is slower
127// than quicksort.
128// A drawback is that it needs an extra index array to do the merge.
129// <DT> <src>Sort::InsSort</src>
130// <DD> Insertion sort has O(n*n) behaviour, thus is very slow for large
131// arrays. However, it is the fastest method for small arrays
132// (< 50 elements) and for arrays already (almost) in the right order.
133// <DT> <src>Sort::QuickSort</src>
134// <DD> Care has been taken to solve the well-known quicksort problems
135// like "array already in order" or "many equal elements". The
136// behaviour is O(n*log(n)) in all the cases tested, even in
137// degenerated cases where the SUN Solaris qsort algorithm is O(n*n).
138// <DT> <src>Sort::HeapSort</src>
139// <DD> Heapsort has O(n*log(n)) behaviour. Its speed is lower than
140// that of QuickSort, so QuickSort is the default algorithm.
141// </DL>
142// The default is to use QuickSort for small arrays or if only a single
143// thread can be used. Otherwise ParSort is the default.
144//
145// All sort algorithms are <em>stable</em>, which means that the original
146// order is kept when keys are equal.
147//
148// The sort is a four step process:
149// <ol>
150// <li> Construct the <src>Sort</src> object.
151// <li> Define the sort keys. The function <src>sortKey</src> must be
152// called for each sort key (the most significant one first).
153// The comparison object can be passed in directly, or a
154// <linkto group="DataType.h#DataType">basic data type</linkto>
155// can be given. In the latter case the appropriate ObjCompare
156// comparison object will be created.
157// <li> Sort the data. The function <src>sort</src> returns an index
158// array, which is allocated when needed.
159// <li> Destruct the <src>Sort</src> object (usually done automatically)
160// and delete the index array.
161// </ol>
162// The data can be in a single array of structs, in separate arrays, or
163// in a mix of those. Of course, all arrays must have the same length.
164// The data can be passed to the <src>Sort</src> constructor and/or to the
165// <src>sortKey</src> function. If passed to the <src>Sort</src> constructor,
166// the offset of the data item in the data array must be given to
167// <src>sortKey</src>.
168// </synopsis>
169
170// <example>
171// In the first example we sort the data contained in two "parallel"
172// arrays, <src>idata</src> and <src>ddata</src>, both of length
173// <src>nrdata</src>.
174// <srcblock>
175// Sort sort;
176// sort.sortKey (idata, TpInt); // define 1st sort key
177// sort.sortKey (ddata, TpDouble,0,Sort::Descending); // define 2nd sort key
178// Vector<uInt> inx;
179// sort.sort (inx, nrdata);
180// for (uInt i=0; i<nrdata; i++) { // show sorted data
181// cout << idata[inx[i]] << " " << ddata[inx[i]] << endl;
182// }
183// </srcblock>
184// Now <src>nr</src> contains the nr of records (=<src>nrdata</src>)
185// and <src>inx</src> an array of (sorted) indices.
186//
187// In the second example we sort the data stored in an array of structs
188// on the double (ascending) and the string (descending). We can pass
189// the data to the <src>Sort</src> constructor, and the offsets of the
190// struct elements to the <src>sortKey</src> function.
191// <srcblock>
192// struct Ts {
193// String as;
194// double ad;
195// }
196// Vector<uInt> inx;
197// Sort sort (tsarr, sizeof(Ts));
198// sort.sortKey ((char*)&tsarr[0].ad - (char*)tsarr, TpDouble);
199// sort.sortKey ((char*)&tsarr[0].as - (char*)tsarr, TpString,
200// Sort::Descending);
201// sort.sort (inx, nrts);
202// </srcblock>
203// Note that the first argument in function <src>sortKey</src> gives
204// the offset of the variable in the struct.
205//
206// Alternatively, and probably slightly easier, we could pass the data
207// to the <src>sortKey</src> function and use an increment:
208// <srcblock>
209// struct Ts {
210// String as;
211// double ad;
212// }
213// Vector<uInt> inx;
214// Sort sort;
215// sort.sortKey (&tsarr[0].ad, TpDouble, sizeof(Ts));
216// sort.sortKey (&tsarr[0].as, TpString, sizeof(Ts), Sort::Descending);
217// sort.sort (inx, nrts);
218// </srcblock>
219//
220// Finally, we could provide a comparison object for the struct.
221// <srcblock>
222// struct Ts {
223// String as;
224// double ad;
225// }
226// class MyCompare: public BaseCompare {
227// virtual int comp (const void* val1, const void* val2) const
228// {
229// const Ts& t1 = *(Ts*)val1;
230// const Ts& t2 = *(Ts*)val2;
231// if (t1.ad < t2.ad) return -1;
232// if (t1.ad > t2.ad) return 1;
233// if (t1.as < t2.as) return 1; // string must be descending
234// if (t1.as > t2.as) return -1;
235// return 0;
236// }
237// };
238// Vector<uInt> inx;
239// Sort sort;
240// sort.sortKey (tsarr, compareTs, sizeof(Ts));
241// sort.sort (inx, nrts);
242// </srcblock>
243
244class Sort
245{
246public:
247 // Enumerate the sort options:
248 enum Option {DefaultSort=0, // ParSort, but QuickSort for small array
249 HeapSort=1, // use Heapsort algorithm
250 InsSort=2, // use insertion sort algorithm
251 QuickSort=4, // use Quicksort algorithm
252 ParSort=8, // use parallel merge sort algorithm
253 NoDuplicates=16}; // skip data with equal sort keys
254
255 // Enumerate the sort order:
256 enum Order {Ascending=-1,
258
259 // The default constructor can be used when the data is only passed
260 // in via function <src>sortKey</src>.
262
263 // Construct a Sort object for the given data array with elements
264 // of <src>elementSize</src> bytes. This data array will be used
265 // when an offset is given to the <src>sortKey</src> functions.
266 // You can still pass additional data arrays to the
267 // <src>sortKey</src> functions.
268 Sort (const void* data, uInt elementSize);
269
270 // Copy constructor (copy semantics).
271 Sort (const Sort&);
272
274
275 // Assignment (copy semantics).
277
278 // Define a sort key (the most significant key should be defined first).
279 // The key contains:
280 // <ul>
281 // <li> A pointer to the start of the data array. --- When structs are
282 // sorted on an element in the struct, the pointer must point to
283 // that element in the first struct.
284 // <li> A pointer to the comparison object to be used. --- The
285 // comparison object can be specified in two ways:
286 // <ul>
287 // <li> by giving a
288 // <linkto group="DataType.h#DataType">basic data type</linkto>,
289 // in which case the appropriate comparison object will be
290 // created automatically, or
291 // <li> by a std::shared_ptr of a comparison object.
292 // You may want to use the templated comparison classes
293 // <linkto class=ObjCompare>ObjCompare</linkto>(),
294 // but you are free to use any other class derived from BaseCompare
295 // that implements the <src>comp</src> function.
296 // </ul>
297 // <li> The increment from one data element to the next. --- When
298 // structs are sorted on an element in the struct, the increment
299 // should be the size of the struct. If the comparison object is
300 // automatically created using the data type specified, the default
301 // increment is the size of the data type.
302 // <li> The sort order. --- <src>Ascending</src> (default) or
303 // <src>Descending</src>;
304 // </ul>
305 //
306 // When the data array has been passed to the Sort constructor,
307 // the data pointer and the increment arguments can be replaced by a
308 // single argument: the offset of the key in each element of the array.
309 //
310 // <group>
311 void sortKey (const void* data, DataType, uInt increment = 0,
312 Order = Ascending);
313 void sortKey (const void* data, const std::shared_ptr<BaseCompare>&,
314 uInt increment, Order = Ascending);
315 void sortKey (uInt offset, DataType, Order = Ascending);
316 void sortKey (uInt offset, const std::shared_ptr<BaseCompare>&,
317 Order = Ascending);
318 // </group>
319
320 // Sort the data array of <src>nrrec</src> records.
321 // The result is an array of indices giving the requested order.
322 // It returns the number of resulting records. The indices array
323 // is resized to that number.
324 // <br> By default it'll try if the faster GenSortIndirect can be used
325 // if a sort on a single key is used.
326 uInt sort (Vector<uInt>& indexVector, uInt nrrec,
327 int options = DefaultSort, Bool tryGenSort = True) const;
328 uInt64 sort (Vector<uInt64>& indexVector, uInt64 nrrec,
329 int options = DefaultSort, Bool tryGenSort = True) const;
330
331 // Get all unique records in a sorted array. The array order is
332 // given in the indexVector (as possibly returned by the sort function).
333 // The default indexVector is 0..nrrec-1.
334 // The index of each first unique record is returned in the uniqueVector.
335 // They are indices in the supplied indexVector, so
336 // <src>data[indexVector(uniqueVector(i))]</src>
337 // is giving the i-th unique record.
338 // Note that the records indexed by <src>indexVector(uniqueVector(i))</src>
339 // till <src>indexVector(uniqueVector(i+1))</src> are all the same.
340 // <br>
341 // It returns the number of unique records. The unique array
342 // is resized to that number.
343 // The third version also gives back a vector with the keys that
344 // change in each sorting group. The size of changeKey is the same as
345 // uniqueVector, and for each unique sorting group indicates the index
346 // of the keyword that will change at the end of the group.
347 // <group>
348 uInt unique (Vector<uInt>& uniqueVector, uInt nrrec) const;
349 uInt unique (Vector<uInt>& uniqueVector,
350 const Vector<uInt>& indexVector) const;
351 uInt unique (Vector<uInt>& uniqueVector,
352 Vector<size_t>& changeKey,
353 const Vector<uInt>& indexVector) const;
354 uInt64 unique (Vector<uInt64>& uniqueVector, uInt64 nrrec) const;
356 const Vector<uInt64>& indexVector) const;
358 Vector<size_t>& changeKey,
359 const Vector<uInt64>& indexVector) const;
360 // </group>
361
362private:
363 template<typename T>
364 T doSort (Vector<T>& indexVector, T nrrec,
365 int options = DefaultSort, Bool tryGenSort = True) const;
366
367 template <typename T>
368 T doUnique (Vector<T>& uniqueVector, T nrrec) const;
369 template <typename T>
370 T doUnique (Vector<T>& uniqueVector, const Vector<T>& indexVector) const;
371 template <typename T>
372 T doUnique (Vector<T>& uniqueVector, Vector<size_t>& changeKey,
373 const Vector<T>& indexVector) const;
374
375 // Copy that Sort object to this.
376 void copy (const Sort& that);
377
378 // Add a sort key giving a data type and stride or the sort key.
379 // <group>
380 void addKey (const void* data, DataType, uInt increment, int options);
382 // </group>
383
384 // Do an insertion sort, optionally skipping duplicates.
385 // <group>
386 template<typename T>
387 T insSort (T nr, T* indices) const;
388 template<typename T>
389 T insSortNoDup (T nr, T* indices) const;
390 // </group>
391
392 // Do a merge sort, if possible in parallel using OpenMP.
393 // Note that the env.var. OMP_NUM_TRHEADS sets the maximum nr of threads
394 // to use. It defaults to the number of cores.
395 template<typename T>
396 T parSort (int nthr, T nrrec, T* inx) const;
397 template<typename T>
398 void merge (T* inx, T* tmp, T size, T* index,
399 T nparts) const;
400
401 // Do a quicksort, optionally skipping duplicates
402 // (qkSort is the actual quicksort function).
403 // <group>
404 template<typename T>
405 T quickSort (T nr, T* indices) const;
406 template<typename T>
407 T quickSortNoDup (T nr, T* indices) const;
408 template<typename T>
409 void qkSort (T nr, T* indices) const;
410 // </group>
411
412 // Do a heapsort, optionally skipping duplicates.
413 // <group>
414 template<typename T>
415 T heapSort (T nr, T* indices) const;
416 template<typename T>
417 T heapSortNoDup (T nr, T* indices) const;
418 // </group>
419
420 // Siftdown algorithm for heapsort.
421 template<typename T>
422 void siftDown (T low, T up, T* indices) const;
423
424 // Compare 2 records based on the comparison functions
425 template<typename T>
426 int compare (T index1, T index2) const;
427
428 // As compare() but it also gives back the index of the first comparison
429 // function that didn't match.
430 template<typename T>
431 int compareChangeIdx(T i1, T i2, size_t& idxComp) const;
432
433 // Swap 2 indices.
434 template<typename T>
435 inline void swap (T index1, T index2, T* indices) const
436 {
437 T t = indices[index1];
438 indices[index1] = indices[index2];
439 indices[index2] = t;
440 }
441
442 //# Data memebers
443 PtrBlock<SortKey*> keys_p; //# keys to sort on
444 size_t nrkey_p; //# #sort-keys
445 const void* data_p; //# pointer to data records
446 uInt size_p; //# size of data record
447 int order_p; //# -1=asc 0=mixed 1=desc
448};
449
450
451} //# NAMESPACE CASACORE - END
452
453#endif
abstract base class for comparing two objects
Definition Compare.h:63
A drop-in replacement for Block<T*>.
Definition Block.h:812
int order() const
Get the sort order.
Definition Sort.h:82
uInt tryGenSort(Vector< uInt > &indexVector, uInt nrrec, int opt) const
Try if GenSort can be used for this single key.
BaseCompare * cmpObj_p
comparison object; use raw pointer for performance
Definition Sort.h:95
uInt64 tryGenSort(Vector< uInt64 > &indexVector, uInt64 nrrec, int opt) const
SortKey & operator=(const SortKey &)
Assignment (copy semantics).
SortKey(const SortKey &)
Copy constructor (copy semantics).
const void * data_p
address of first data point
Definition Sort.h:89
int order_p
sort order; -1 = ascending, 1 = descending
Definition Sort.h:87
std::shared_ptr< BaseCompare > ccmpObj_p
comparison object; use std::shared_ptr for memory management
Definition Sort.h:93
uInt incr_p
increment for next data point
Definition Sort.h:91
SortKey(const void *data, const std::shared_ptr< BaseCompare > &, uInt increment, int order)
Define a sort key in a given data array using the indicated comparison object, stride and sort order.
Sort on one or more keys, ascending and/or descending.
Definition Sort.h:245
uInt64 unique(Vector< uInt64 > &uniqueVector, const Vector< uInt64 > &indexVector) const
Option
Enumerate the sort options:
Definition Sort.h:248
T heapSortNoDup(T nr, T *indices) const
const void * data_p
Definition Sort.h:445
void addKey(const void *data, DataType, uInt increment, int options)
Add a sort key giving a data type and stride or the sort key.
uInt64 sort(Vector< uInt64 > &indexVector, uInt64 nrrec, int options=DefaultSort, Bool tryGenSort=True) const
int compareChangeIdx(T i1, T i2, size_t &idxComp) const
As compare() but it also gives back the index of the first comparison function that didn't match.
T parSort(int nthr, T nrrec, T *inx) const
Do a merge sort, if possible in parallel using OpenMP.
uInt size_p
Definition Sort.h:446
void sortKey(const void *data, const std::shared_ptr< BaseCompare > &, uInt increment, Order=Ascending)
T doUnique(Vector< T > &uniqueVector, Vector< size_t > &changeKey, const Vector< T > &indexVector) const
void sortKey(uInt offset, const std::shared_ptr< BaseCompare > &, Order=Ascending)
size_t nrkey_p
Definition Sort.h:444
void copy(const Sort &that)
Copy that Sort object to this.
uInt unique(Vector< uInt > &uniqueVector, uInt nrrec) const
Get all unique records in a sorted array.
T insSort(T nr, T *indices) const
Do an insertion sort, optionally skipping duplicates.
uInt unique(Vector< uInt > &uniqueVector, const Vector< uInt > &indexVector) const
T doSort(Vector< T > &indexVector, T nrrec, int options=DefaultSort, Bool tryGenSort=True) const
uInt unique(Vector< uInt > &uniqueVector, Vector< size_t > &changeKey, const Vector< uInt > &indexVector) const
T quickSort(T nr, T *indices) const
Do a quicksort, optionally skipping duplicates (qkSort is the actual quicksort function).
Sort & operator=(const Sort &)
Assignment (copy semantics).
void merge(T *inx, T *tmp, T size, T *index, T nparts) const
int compare(T index1, T index2) const
Compare 2 records based on the comparison functions.
T heapSort(T nr, T *indices) const
Do a heapsort, optionally skipping duplicates.
void sortKey(uInt offset, DataType, Order=Ascending)
void sortKey(const void *data, DataType, uInt increment=0, Order=Ascending)
Define a sort key (the most significant key should be defined first).
uInt64 unique(Vector< uInt64 > &uniqueVector, uInt64 nrrec) const
Order
Enumerate the sort order:
Definition Sort.h:256
T quickSortNoDup(T nr, T *indices) const
void swap(T index1, T index2, T *indices) const
Swap 2 indices.
Definition Sort.h:435
T doUnique(Vector< T > &uniqueVector, const Vector< T > &indexVector) const
Sort(const void *data, uInt elementSize)
Construct a Sort object for the given data array with elements of elementSize bytes.
T doUnique(Vector< T > &uniqueVector, T nrrec) const
uInt sort(Vector< uInt > &indexVector, uInt nrrec, int options=DefaultSort, Bool tryGenSort=True) const
Sort the data array of nrrec records.
uInt64 unique(Vector< uInt64 > &uniqueVector, Vector< size_t > &changeKey, const Vector< uInt64 > &indexVector) const
T insSortNoDup(T nr, T *indices) const
void addKey(SortKey *)
Sort()
The default constructor can be used when the data is only passed in via function sortKey.
PtrBlock< SortKey * > keys_p
Definition Sort.h:443
Sort(const Sort &)
Copy constructor (copy semantics).
void qkSort(T nr, T *indices) const
int order_p
Definition Sort.h:447
void siftDown(T low, T up, T *indices) const
Siftdown algorithm for heapsort.
this file contains all the compiler specific defines
Definition mainpage.dox:28
unsigned int uInt
Definition aipstype.h:49
bool Bool
Define the standard types used by Casacore.
Definition aipstype.h:40
const Bool True
Definition aipstype.h:41
unsigned long long uInt64
Definition aipsxtype.h:37