casacore
Loading...
Searching...
No Matches
GenSort.h
Go to the documentation of this file.
1//# GenSort.h: General sort functions
2//# Copyright (C) 1993,1994,1995,1996,1997,1999
3//# Associated Universities, Inc. Washington DC, USA.
4//#
5//# This library is free software; you can redistribute it and/or modify it
6//# under the terms of the GNU Library General Public License as published by
7//# the Free Software Foundation; either version 2 of the License, or (at your
8//# option) any later version.
9//#
10//# This library is distributed in the hope that it will be useful, but WITHOUT
11//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13//# License for more details.
14//#
15//# You should have received a copy of the GNU Library General Public License
16//# along with this library; if not, write to the Free Software Foundation,
17//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18//#
19//# Correspondence concerning AIPS++ should be addressed as follows:
20//# Internet email: casa-feedback@nrao.edu.
21//# Postal address: AIPS++ Project Office
22//# National Radio Astronomy Observatory
23//# 520 Edgemont Road
24//# Charlottesville, VA 22903-2475 USA
25
26#ifndef CASA_GENSORT_H
27#define CASA_GENSORT_H
28
29#include <casacore/casa/aips.h>
30#include <casacore/casa/Arrays/ArrayFwd.h>
31#include <casacore/casa/Utilities/Sort.h>
32
33namespace casacore { //# NAMESPACE CASACORE - BEGIN
34
35//# Forward declarations.
36template<class T> class Block;
37
38// <summary> General in-place sort functions </summary>
39// <use visibility=local>
40// <reviewed reviewer="Friso Olnon" date="1995/03/16" tests="tGenSort" demos="">
41
42// <synopsis>
43//
44// The static member functions of this templated class are highly optimized
45// sort functions. They do an in-place sort of an array of values. The
46// functions are templated, so they can in principle be used with any
47// data type. However, if used with non-builtin data types, their
48// class must provide certain functions (see <em>Template Type Argument
49// Requirements</em>).
50//
51// If it is impossible or too expensive to define these functions, the
52// <linkto class=Sort>Sort</linkto> class can be used instead. This sorts
53// indirectly using an index array. Instead of the functions mentioned
54// above it requires a comparison routine.
55//
56// The <src>GenSort</src> functions can sort:
57// <ul>
58// <li> C-arrays of values;
59// <li> <src>Array</src>s of values -- the array can have any shape
60// and the increment can be >1;
61// <li> <src>Block</src>s of values -- there is a special function to
62// sort less elements than the size of the <src>Block</src>.
63// </ul>
64//
65// The sort order can be specified in the order field:
66// <dl>
67// <dt> <src>Sort::Ascending</src> (default),
68// <dt> <src>Sort::Descending</src>.
69// </dl>
70//
71// Previously the sort algorithm to use could be given in the options field.
72// <dl>
73// <dt> <src>Sort::QuickSort</src> (default)
74// <dd> is the fastest. It is about 4-6 times faster
75// than the qsort function on the SUN. No worst case has been
76// found, even not for cases where qsort is terribly slow.
77// <dt> <src>Sort::HeapSort</src>
78// <dd> is about twice as slow as quicksort.
79// It has the advantage that the worst case is always o(n*log(n)),
80// while quicksort can have hypothetical inputs with o(n*n).
81// <dt> <src>Sort::InsSort</src>
82// <dd> is o(n*n) for random inputs. It is, however, the
83// only stable sort (i.e. equal values remain in the original order).
84// </dl>
85// However, these options are not used anymore because the sort now always
86// uses a merge sort that is equally fast for random input and much faster for
87// degenerated cases like an already ordered or reversely ordered array.
88// Furthermore, merge sort is always stable and will be parallelized if OpenMP
89// support is enabled giving a 6-fold speedup on 8 cores.
90// <br><src>Sort::NoDuplicates</src> in the options field indicates that
91// duplicate values will be removed (only the first occurrance is kept).
92// <br>The previous sort functionality is still available through the functions
93// quickSort, heapSort, and insSort.
94// <p>
95// All the sort functions return the number of values sorted as their
96// function value; when duplicate values have been removed, the number of
97// unique valuess will be returned.
98// <p>
99// The class also provides a function to find the k-th largest value
100// in an array of values. This uses a stripped-down version of quicksort
101// and is at least 6 times faster than a full quicksort.
102// </synopsis>
103
104// <templating arg=T>
105// <li> <src>operator=</src> to assign when swapping elements
106// <li> <src>operator<</src>, <src>operator></src> and
107// <src>operator==</src> to compare elements
108// <li> default constructor to allocate a temporary
109// </templating>
110
111template<class T> class GenSort
112{
113public:
114
115 // Sort a C-array containing <src>nr</src> <src>T</src>-type objects.
116 // The sort is done in place and is always stable (thus equal keys keep
117 // their original order). It returns the number of values, which
118 // can be different if a NoDuplicates sort is done.
119 // <br>Insertion sort is used for short arrays (<50 elements). Otherwise,
120 // a merge sort is used which will be parallelized if casacore is built
121 // with OpenMP support.
122 // <group>
124 int options = 0);
125
127 int options = 0);
128
130 int options = 0);
131 // <group>
132
133 // Find the k-th largest value.
134 // <br>Note: it does a partial quicksort, thus the data array gets changed.
135 static T kthLargest (T* data, uInt nr, uInt k);
136
137 // Sort C-array using quicksort.
139 int options = 0);
140 // Sort C-array using heapsort.
142 int options = 0);
143 // Sort C-array using insertion sort.
145 int options = 0);
146 // Sort C-array using parallel merge sort (using OpenMP).
147 // By default OpenMP determines the number of threads that can be used.
149 int options = 0, int nthread = 0);
150
151 // Swap 2 elements in array.
152 static inline void swap (T&, T&);
153
154 // Reverse the elements in <src>res</src> and put them into <src>data</src>.
155 // Care is taken if both pointers reference the same data.
156 static void reverse (T* data, const T* res, uInt nrrec);
157
158private:
159 // The<src>data</src> buffer is divided in <src>nparts</src> parts.
160 // In each part the values are in ascending order.
161 // The index tells the nr of elements in each part.
162 // Recursively each two subsequent parts are merged until only part is left
163 // (giving the sorted array). Alternately <src>data</src> and <src>tmp</src>
164 // are used for the merge result. The pointer containing the final result
165 // is returned.
166 // <br>If possible, merging the parts is done in parallel (using OpenMP).
167 static T* merge (T* data, T* tmp, uInt nrrec, uInt* index,
168 uInt nparts);
169
170 // Quicksort in ascending order.
171 static void quickSortAsc (T*, Int, Bool multiThread=False, Int rec_lim=128);
172
173 // Heapsort in ascending order.
174 static void heapSortAsc (T*, Int);
175 // Helper function for ascending heapsort.
176 static void heapAscSiftDown (Int, Int, T*);
177
178 // Insertion sort in ascending order.
179 static uInt insSortAsc (T*, Int, int option);
180 // Insertion sort in ascending order allowing duplicates.
181 // This is also used by quicksort for its last steps.
182 static uInt insSortAscDup (T*, Int);
183 // Insertion sort in ascending order allowing no duplicates.
184 // This is also used by the other sort algorithms to skip duplicates.
186};
187
188
189// <summary> General indirect sort functions </summary>
190// <use visibility=local>
191// <reviewed reviewer="" date="" tests="" demos="">
192
193// <synopsis>
194// This class is similar to <linkto class=GenSort>GenSort</linkto>.
195// The only difference is that the functions in this class sort
196// indirectly instead of in-place.
197// They return the result of the sort as an sorted vector of indices
198// This is slower, because an extra indirection is involved in each
199// comparison. However, this sort allows to sort const data.
200// Another advantage is that this sort is always stable (i.e. equal
201// values are kept in their original order).
202//
203// The class is templated on the type T of the sort key and the type
204// INX of the index vector. In principle INX can be any type, but
205// it should be a sufficiently large integer type (say uInt or uInt64).
206template<class T, class INX=uInt> class GenSortIndirect
207{
208public:
209
210 // Sort a C-array containing <src>nr</src> <src>T</src>-type objects.
211 // The resulting index vector gives the sorted indices.
212 static INX sort (Vector<INX>& indexVector, const T* data, INX nr,
214 int options = Sort::QuickSort);
215
216 // Sort a C-array containing <src>nr</src> <src>T</src>-type objects.
217 // The resulting index vector gives the sorted indices.
218 static INX sort (Vector<INX>& indexVector, const Array<T>& data,
220 int options = Sort::QuickSort);
221
222 // Sort a C-array containing <src>nr</src> <src>T</src>-type objects.
223 // The resulting index vector gives the sorted indices.
224 static INX sort (Vector<INX>& indexVector, const Block<T>& data, INX nr,
226 int options = Sort::QuickSort);
227
228 // Find the index of the k-th largest value.
229 static INX kthLargest (T* data, INX nr, INX k);
230
231 // Sort container using quicksort.
232 // The argument <src>inx</src> gives the index defining the order of the
233 // values in the data array. Its length must be at least <src>nr</src>
234 // and it must be filled with the index values of the data.
235 // Usually this is 0..nr, but it could contain a selection of the data.
236 static INX quickSort (INX* inx, const T* data,
237 INX nr, Sort::Order, int options);
238 // Sort container using heapsort.
239 static INX heapSort (INX* inx, const T* data,
240 INX nr, Sort::Order, int options);
241 // Sort container using insertion sort.
242 static INX insSort (INX* inx, const T* data,
243 INX nr, Sort::Order, int options);
244 // Sort container using parallel merge sort (using OpenMP).
245 // By default the maximum number of threads is used.
246 static INX parSort (INX* inx, const T* data,
247 INX nr, Sort::Order, int options, int nthreads=0);
248
249private:
250 // Swap 2 indices.
251 static inline void swapInx (INX& index1, INX& index2);
252
253 // The<src>data</src> buffer is divided in <src>nparts</src> parts.
254 // In each part the values are in ascending order.
255 // The index tells the nr of elements in each part.
256 // Recursively each two subsequent parts are merged until only part is left
257 // (giving the sorted array). Alternately <src>data</src> and <src>tmp</src>
258 // are used for the merge result. The pointer containing the final result
259 // is returned.
260 // <br>If possible, merging the parts is done in parallel (using OpenMP).
261 static INX* merge (const T* data, INX* inx, INX* tmp, INX nrrec,
262 INX* index, INX nparts);
263
264 // Check if 2 values are in ascending order.
265 // When equal, the order is correct if index1<index2.
266 static inline int isAscending (const T* data, INX index1, INX index2);
267
268
269 // Quicksort in ascending order.
270 static void quickSortAsc (INX* inx, const T*, INX nr,
271 Bool multiThread=False, Int rec_lim=128);
272
273 // Heapsort in ascending order.
274 static void heapSortAsc (INX* inx, const T*, INX nr);
275 // Helper function for ascending heapsort.
276 static void heapAscSiftDown (INX* inx, INX, INX, const T*);
277
278 // Insertion sort in ascending order.
279 static INX insSortAsc (INX* inx, const T*, INX nr, int option);
280 // Insertion sort in ascending order allowing duplicates.
281 // This is also used by quicksort for its last steps.
282 static INX insSortAscDup (INX* inx, const T*, INX nr);
283 // Insertion sort in ascending order allowing no duplicates.
284 // This is also used by the other sort algorithms to skip duplicates.
285 static INX insSortAscNoDup (INX* inx, const T*, INX nr);
286};
287
288
289// <summary> Global in-place sort functions </summary>
290
291// The following global functions are easier to use than the static
292// <linkto class=GenSort>GenSort</linkto> member functions.
293// They do an in-place sort of data, thus the data themselves are moved
294// ending up in the requested order.
295// <p>
296// The default sorting method is QuickSort, which is the fastest.
297// However, there are pathological cases where it can be slow.
298// HeapSort is about twice a slow, but its speed is guaranteed.
299// InsSort (insertion sort) can be very, very slow, but it is the only
300// stable sort method (i.e. equal values are kept in their original order).
301// However, <linkto name=genSortIndirect> indirect sorting methods </linkto>
302// are available to make QuickSort and HeapSort stable.
303// <p>
304// All sort methods have an option to skip duplicate values. This is the
305// only case where the returned number of values can be less than the
306// original number of values.
307
308// <group name=genSortInPlace>
309
310template<class T>
311inline
312uInt genSort (T* data, uInt nr,
313 Sort::Order order = Sort::Ascending, int options=0)
314 { return GenSort<T>::sort (data, nr, order, options); }
315
316template<class T>
317inline
319 Sort::Order order = Sort::Ascending, int options=0)
320 { return GenSort<T>::sort (data, order, options); }
321
322template<class T>
323inline
325 Sort::Order order = Sort::Ascending, int options=0)
326 { return GenSort<T>::sort (data, data.nelements(), order, options); }
327
328template<class T>
329inline
331 Sort::Order order = Sort::Ascending, int options=0)
332 { return GenSort<T>::sort (data, nr, order, options); }
333// </group>
334
335
336// <summary> Global indirect sort functions </summary>
337
338// The following global functions easier to use than the static
339// <linkto class=GenSortIndirect>GenSortIndirect</linkto> member functions.
340// They do an indirect sort of data, thus the data themselves are not moved.
341// Rather an index vector is returned giving the sorted data indices.
342// <p>
343// The sorting method used is merge sort, which is always stable.
344// It is the fastest, especially if it can use multiple threads.
345// <p>
346// Unlike the <linkto name=genSortInPlace> in-place sorting methods
347// </linkto>, all indirect sorting methods are stable (i.e. equal
348// values are left in their original order).
349// <p>
350// All sort methods have an option to skip duplicate values. This is the
351// only case where the returned number of values can be less than the
352// original number of values.
353
354// <group name=genSortIndirect>
355
356template<class T, class INX=uInt>
357inline
358uInt genSort (Vector<INX>& indexVector, const T* data, INX nr,
359 Sort::Order order = Sort::Ascending, int options=0)
360 { return GenSortIndirect<T,INX>::sort (indexVector, data, nr, order, options); }
361
362template<class T, class INX=uInt>
363inline
364uInt genSort (Vector<INX>& indexVector, const Array<T>& data,
365 Sort::Order order = Sort::Ascending, int options=0)
366 { return GenSortIndirect<T,INX>::sort (indexVector, data, order, options); }
367
368template<class T, class INX=uInt>
369inline
370uInt genSort (Vector<INX>& indexVector, const Block<T>& data,
371 Sort::Order order = Sort::Ascending, int options=0)
372 { return GenSortIndirect<T,INX>::sort (indexVector, data, data.nelements(),
373 order, options); }
374
375template<class T, class INX=uInt>
376inline
377uInt genSort (Vector<INX>& indexVector, const Block<T>& data, INX nr,
378 Sort::Order order = Sort::Ascending, int options=0)
379 { return GenSortIndirect<T,INX>::sort (indexVector, data, nr, order, options); }
380// </group>
381
382
383
384// Implement inline member functions.
385
386template<class T>
387inline void GenSort<T>::swap (T& l, T& r)
388{
389 T t = l;
390 l = r;
391 r = t;
392}
393
394template<class T, class INX>
395inline void GenSortIndirect<T,INX>::swapInx (INX& i, INX& j)
396{
397 INX t = i;
398 i = j;
399 j = t;
400}
401template<class T, class INX>
402inline int GenSortIndirect<T,INX>::isAscending (const T* data, INX i, INX j)
403{
404 return (data[i] > data[j] || (data[i] == data[j] && i > j));
405}
406
407
408
409} //# NAMESPACE CASACORE - END
410
411#ifndef CASACORE_NO_AUTO_TEMPLATES
412#include <casacore/casa/Utilities/GenSort.tcc>
413#endif //# CASACORE_NO_AUTO_TEMPLATES
414#endif
simple 1-D array
Definition Block.h:198
size_t nelements() const
The number of elements contained in this Block<T>.
Definition Block.h:609
General indirect sort functions.
Definition GenSort.h:207
static INX insSortAscNoDup(INX *inx, const T *, INX nr)
Insertion sort in ascending order allowing no duplicates.
static INX quickSort(INX *inx, const T *data, INX nr, Sort::Order, int options)
Sort container using quicksort.
static INX * merge(const T *data, INX *inx, INX *tmp, INX nrrec, INX *index, INX nparts)
Thedata buffer is divided in nparts parts.
static INX sort(Vector< INX > &indexVector, const Array< T > &data, Sort::Order=Sort::Ascending, int options=Sort::QuickSort)
Sort a C-array containing nr T-type objects.
static void heapSortAsc(INX *inx, const T *, INX nr)
Heapsort in ascending order.
static void swapInx(INX &index1, INX &index2)
Swap 2 indices.
Definition GenSort.h:395
static INX insSort(INX *inx, const T *data, INX nr, Sort::Order, int options)
Sort container using insertion sort.
static INX insSortAsc(INX *inx, const T *, INX nr, int option)
Insertion sort in ascending order.
static INX sort(Vector< INX > &indexVector, const Block< T > &data, INX nr, Sort::Order=Sort::Ascending, int options=Sort::QuickSort)
Sort a C-array containing nr T-type objects.
static INX heapSort(INX *inx, const T *data, INX nr, Sort::Order, int options)
Sort container using heapsort.
static void heapAscSiftDown(INX *inx, INX, INX, const T *)
Helper function for ascending heapsort.
static INX insSortAscDup(INX *inx, const T *, INX nr)
Insertion sort in ascending order allowing duplicates.
static INX parSort(INX *inx, const T *data, INX nr, Sort::Order, int options, int nthreads=0)
Sort container using parallel merge sort (using OpenMP).
static INX kthLargest(T *data, INX nr, INX k)
Find the index of the k-th largest value.
static int isAscending(const T *data, INX index1, INX index2)
Check if 2 values are in ascending order.
Definition GenSort.h:402
static void quickSortAsc(INX *inx, const T *, INX nr, Bool multiThread=False, Int rec_lim=128)
Quicksort in ascending order.
static INX sort(Vector< INX > &indexVector, const T *data, INX nr, Sort::Order=Sort::Ascending, int options=Sort::QuickSort)
Sort a C-array containing nr T-type objects.
static uInt insSortAscDup(T *, Int)
Insertion sort in ascending order allowing duplicates.
static uInt sort(T *, uInt nr, Sort::Order=Sort::Ascending, int options=0)
Sort a C-array containing nr T-type objects.
static uInt quickSort(T *, uInt nr, Sort::Order=Sort::Ascending, int options=0)
Sort C-array using quicksort.
static uInt sort(Block< T > &, uInt nr, Sort::Order=Sort::Ascending, int options=0)
static void reverse(T *data, const T *res, uInt nrrec)
Reverse the elements in res and put them into data.
static void heapAscSiftDown(Int, Int, T *)
Helper function for ascending heapsort.
static uInt sort(Array< T > &, Sort::Order=Sort::Ascending, int options=0)
static void swap(T &, T &)
Swap 2 elements in array.
Definition GenSort.h:387
static T * merge(T *data, T *tmp, uInt nrrec, uInt *index, uInt nparts)
Thedata buffer is divided in nparts parts.
static uInt insSort(T *, uInt nr, Sort::Order=Sort::Ascending, int options=0)
Sort C-array using insertion sort.
static uInt heapSort(T *, uInt nr, Sort::Order=Sort::Ascending, int options=0)
Sort C-array using heapsort.
static void quickSortAsc(T *, Int, Bool multiThread=False, Int rec_lim=128)
Quicksort in ascending order.
static uInt parSort(T *, uInt nr, Sort::Order=Sort::Ascending, int options=0, int nthread=0)
Sort C-array using parallel merge sort (using OpenMP).
static void heapSortAsc(T *, Int)
Heapsort in ascending order.
static uInt insSortAsc(T *, Int, int option)
Insertion sort in ascending order.
static uInt insSortAscNoDup(T *, Int)
Insertion sort in ascending order allowing no duplicates.
static T kthLargest(T *data, uInt nr, uInt k)
Find the k-th largest value.
Order
Enumerate the sort order:
Definition Sort.h:256
uInt genSort(T *data, uInt nr, Sort::Order order=Sort::Ascending, int options=0)
Global in-place sort functions The following global functions are easier to use than the static GenSo...
Definition GenSort.h:312
this file contains all the compiler specific defines
Definition mainpage.dox:28
const Bool False
Definition aipstype.h:42
unsigned int uInt
Definition aipstype.h:49
int Int
Definition aipstype.h:48
bool Bool
Define the standard types used by Casacore.
Definition aipstype.h:40