casacore
GenSort.h
Go to the documentation of this file.
1 //# GenSort.h: General sort functions
2 //# Copyright (C) 1993,1994,1995,1996,1997,1999
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //# $Id$
27 
28 #ifndef CASA_GENSORT_H
29 #define CASA_GENSORT_H
30 
31 #include <casacore/casa/aips.h>
32 #include <casacore/casa/Arrays/ArrayFwd.h>
33 #include <casacore/casa/Utilities/Sort.h>
34 
35 namespace casacore { //# NAMESPACE CASACORE - BEGIN
36 
37 //# Forward declarations.
38 template<class T> class Block;
39 
40 // <summary> General in-place sort functions </summary>
41 // <use visibility=local>
42 // <reviewed reviewer="Friso Olnon" date="1995/03/16" tests="tGenSort" demos="">
43 
44 // <synopsis>
45 //
46 // The static member functions of this templated class are highly optimized
47 // sort functions. They do an in-place sort of an array of values. The
48 // functions are templated, so they can in principle be used with any
49 // data type. However, if used with non-builtin data types, their
50 // class must provide certain functions (see <em>Template Type Argument
51 // Requirements</em>).
52 //
53 // If it is impossible or too expensive to define these functions, the
54 // <linkto class=Sort>Sort</linkto> class can be used instead. This sorts
55 // indirectly using an index array. Instead of the functions mentioned
56 // above it requires a comparison routine.
57 //
58 // The <src>GenSort</src> functions can sort:
59 // <ul>
60 // <li> C-arrays of values;
61 // <li> <src>Array</src>s of values -- the array can have any shape
62 // and the increment can be >1;
63 // <li> <src>Block</src>s of values -- there is a special function to
64 // sort less elements than the size of the <src>Block</src>.
65 // </ul>
66 //
67 // The sort order can be specified in the order field:
68 // <dl>
69 // <dt> <src>Sort::Ascending</src> (default),
70 // <dt> <src>Sort::Descending</src>.
71 // </dl>
72 //
73 // Previously the sort algorithm to use could be given in the options field.
74 // <dl>
75 // <dt> <src>Sort::QuickSort</src> (default)
76 // <dd> is the fastest. It is about 4-6 times faster
77 // than the qsort function on the SUN. No worst case has been
78 // found, even not for cases where qsort is terribly slow.
79 // <dt> <src>Sort::HeapSort</src>
80 // <dd> is about twice as slow as quicksort.
81 // It has the advantage that the worst case is always o(n*log(n)),
82 // while quicksort can have hypothetical inputs with o(n*n).
83 // <dt> <src>Sort::InsSort</src>
84 // <dd> is o(n*n) for random inputs. It is, however, the
85 // only stable sort (i.e. equal values remain in the original order).
86 // </dl>
87 // However, these options are not used anymore because the sort now always
88 // uses a merge sort that is equally fast for random input and much faster for
89 // degenerated cases like an already ordered or reversely ordered array.
90 // Furthermore, merge sort is always stable and will be parallelized if OpenMP
91 // support is enabled giving a 6-fold speedup on 8 cores.
92 // <br><src>Sort::NoDuplicates</src> in the options field indicates that
93 // duplicate values will be removed (only the first occurrance is kept).
94 // <br>The previous sort functionality is still available through the functions
95 // quickSort, heapSort, and insSort.
96 // <p>
97 // All the sort functions return the number of values sorted as their
98 // function value; when duplicate values have been removed, the number of
99 // unique valuess will be returned.
100 // <p>
101 // The class also provides a function to find the k-th largest value
102 // in an array of values. This uses a stripped-down version of quicksort
103 // and is at least 6 times faster than a full quicksort.
104 // </synopsis>
105 
106 // <templating arg=T>
107 // <li> <src>operator=</src> to assign when swapping elements
108 // <li> <src>operator<</src>, <src>operator></src> and
109 // <src>operator==</src> to compare elements
110 // <li> default constructor to allocate a temporary
111 // </templating>
112 
113 template<class T> class GenSort
114 {
115 public:
116 
117  // Sort a C-array containing <src>nr</src> <src>T</src>-type objects.
118  // The sort is done in place and is always stable (thus equal keys keep
119  // their original order). It returns the number of values, which
120  // can be different if a NoDuplicates sort is done.
121  // <br>Insertion sort is used for short arrays (<50 elements). Otherwise,
122  // a merge sort is used which will be parallelized if casacore is built
123  // with OpenMP support.
124  // <group>
126  int options = 0);
127 
129  int options = 0);
130 
132  int options = 0);
133  // <group>
134 
135  // Find the k-th largest value.
136  // <br>Note: it does a partial quicksort, thus the data array gets changed.
137  static T kthLargest (T* data, uInt nr, uInt k);
138 
139  // Sort C-array using quicksort.
141  int options = 0);
142  // Sort C-array using heapsort.
144  int options = 0);
145  // Sort C-array using insertion sort.
147  int options = 0);
148  // Sort C-array using parallel merge sort (using OpenMP).
149  // By default OpenMP determines the number of threads that can be used.
151  int options = 0, int nthread = 0);
152 
153  // Swap 2 elements in array.
154  static inline void swap (T&, T&);
155 
156  // Reverse the elements in <src>res</src> and put them into <src>data</src>.
157  // Care is taken if both pointers reference the same data.
158  static void reverse (T* data, const T* res, uInt nrrec);
159 
160 private:
161  // The<src>data</src> buffer is divided in <src>nparts</src> parts.
162  // In each part the values are in ascending order.
163  // The index tells the nr of elements in each part.
164  // Recursively each two subsequent parts are merged until only part is left
165  // (giving the sorted array). Alternately <src>data</src> and <src>tmp</src>
166  // are used for the merge result. The pointer containing the final result
167  // is returned.
168  // <br>If possible, merging the parts is done in parallel (using OpenMP).
169  static T* merge (T* data, T* tmp, uInt nrrec, uInt* index,
170  uInt nparts);
171 
172  // Quicksort in ascending order.
173  static void quickSortAsc (T*, Int, Bool multiThread=False, Int rec_lim=128);
174 
175  // Heapsort in ascending order.
176  static void heapSortAsc (T*, Int);
177  // Helper function for ascending heapsort.
178  static void heapAscSiftDown (Int, Int, T*);
179 
180  // Insertion sort in ascending order.
181  static uInt insSortAsc (T*, Int, int option);
182  // Insertion sort in ascending order allowing duplicates.
183  // This is also used by quicksort for its last steps.
184  static uInt insSortAscDup (T*, Int);
185  // Insertion sort in ascending order allowing no duplicates.
186  // This is also used by the other sort algorithms to skip duplicates.
187  static uInt insSortAscNoDup (T*, Int);
188 };
189 
190 
191 // <summary> General indirect sort functions </summary>
192 // <use visibility=local>
193 // <reviewed reviewer="" date="" tests="" demos="">
194 
195 // <synopsis>
196 // This class is similar to <linkto class=GenSort>GenSort</linkto>.
197 // The only difference is that the functions in this class sort
198 // indirectly instead of in-place.
199 // They return the result of the sort as an sorted vector of indices
200 // This is slower, because an extra indirection is involved in each
201 // comparison. However, this sort allows to sort const data.
202 // Another advantage is that this sort is always stable (i.e. equal
203 // values are kept in their original order).
204 
205 template<class T, class INX=uInt> class GenSortIndirect
206 {
207 public:
208 
209  // Sort a C-array containing <src>nr</src> <src>T</src>-type objects.
210  // The resulting index vector gives the sorted indices.
211  static INX sort (Vector<INX>& indexVector, const T* data, INX nr,
213  int options = Sort::QuickSort);
214 
215  // Sort a C-array containing <src>nr</src> <src>T</src>-type objects.
216  // The resulting index vector gives the sorted indices.
217  static INX sort (Vector<INX>& indexVector, const Array<T>& data,
219  int options = Sort::QuickSort);
220 
221  // Sort a C-array containing <src>nr</src> <src>T</src>-type objects.
222  // The resulting index vector gives the sorted indices.
223  static INX sort (Vector<INX>& indexVector, const Block<T>& data, INX nr,
225  int options = Sort::QuickSort);
226 
227  // Find the index of the k-th largest value.
228  static INX kthLargest (T* data, INX nr, INX k);
229 
230  // Sort container using quicksort.
231  // The argument <src>inx</src> gives the index defining the order of the
232  // values in the data array. Its length must be at least <src>nr</src>
233  // and it must be filled with the index values of the data.
234  // Usually this is 0..nr, but it could contain a selection of the data.
235  static INX quickSort (INX* inx, const T* data,
236  INX nr, Sort::Order, int options);
237  // Sort container using heapsort.
238  static INX heapSort (INX* inx, const T* data,
239  INX nr, Sort::Order, int options);
240  // Sort container using insertion sort.
241  static INX insSort (INX* inx, const T* data,
242  INX nr, Sort::Order, int options);
243  // Sort container using parallel merge sort (using OpenMP).
244  // By default the maximum number of threads is used.
245  static INX parSort (INX* inx, const T* data,
246  INX nr, Sort::Order, int options, int nthreads=0);
247 
248 private:
249  // Swap 2 indices.
250  static inline void swapInx (INX& index1, INX& index2);
251 
252  // The<src>data</src> buffer is divided in <src>nparts</src> parts.
253  // In each part the values are in ascending order.
254  // The index tells the nr of elements in each part.
255  // Recursively each two subsequent parts are merged until only part is left
256  // (giving the sorted array). Alternately <src>data</src> and <src>tmp</src>
257  // are used for the merge result. The pointer containing the final result
258  // is returned.
259  // <br>If possible, merging the parts is done in parallel (using OpenMP).
260  static INX* merge (const T* data, INX* inx, INX* tmp, INX nrrec,
261  INX* index, INX nparts);
262 
263  // Check if 2 values are in ascending order.
264  // When equal, the order is correct if index1<index2.
265  static inline int isAscending (const T* data, INX index1, INX index2);
266 
267 
268  // Quicksort in ascending order.
269  static void quickSortAsc (INX* inx, const T*, INX nr,
270  Bool multiThread=False, Int rec_lim=128);
271 
272  // Heapsort in ascending order.
273  static void heapSortAsc (INX* inx, const T*, INX nr);
274  // Helper function for ascending heapsort.
275  static void heapAscSiftDown (INX* inx, INX, INX, const T*);
276 
277  // Insertion sort in ascending order.
278  static INX insSortAsc (INX* inx, const T*, INX nr, int option);
279  // Insertion sort in ascending order allowing duplicates.
280  // This is also used by quicksort for its last steps.
281  static INX insSortAscDup (INX* inx, const T*, INX nr);
282  // Insertion sort in ascending order allowing no duplicates.
283  // This is also used by the other sort algorithms to skip duplicates.
284  static INX insSortAscNoDup (INX* inx, const T*, INX nr);
285 };
286 
287 
288 // <summary> Global in-place sort functions </summary>
289 
290 // The following global functions are easier to use than the static
291 // <linkto class=GenSort>GenSort</linkto> member functions.
292 // They do an in-place sort of data, thus the data themselves are moved
293 // ending up in the requested order.
294 // <p>
295 // The default sorting method is QuickSort, which is the fastest.
296 // However, there are pathological cases where it can be slow.
297 // HeapSort is about twice a slow, but its speed is guaranteed.
298 // InsSort (insertion sort) can be very, very slow, but it is the only
299 // stable sort method (i.e. equal values are kept in their original order).
300 // However, <linkto name=genSortIndirect> indirect sorting methods </linkto>
301 // are available to make QuickSort and HeapSort stable.
302 // <p>
303 // All sort methods have an option to skip duplicate values. This is the
304 // only case where the returned number of values can be less than the
305 // original number of values.
306 
307 // <group name=genSortInPlace>
308 
309 template<class T>
310 inline
311 uInt genSort (T* data, uInt nr,
312  Sort::Order order = Sort::Ascending, int options=0)
313  { return GenSort<T>::sort (data, nr, order, options); }
314 
315 template<class T>
316 inline
318  Sort::Order order = Sort::Ascending, int options=0)
319  { return GenSort<T>::sort (data, order, options); }
320 
321 template<class T>
322 inline
324  Sort::Order order = Sort::Ascending, int options=0)
325  { return GenSort<T>::sort (data, data.nelements(), order, options); }
326 
327 template<class T>
328 inline
330  Sort::Order order = Sort::Ascending, int options=0)
331  { return GenSort<T>::sort (data, nr, order, options); }
332 // </group>
333 
334 
335 // <summary> Global indirect sort functions </summary>
336 
337 // The following global functions easier to use than the static
338 // <linkto class=GenSortIndirect>GenSortIndirect</linkto> member functions.
339 // They do an indirect sort of data, thus the data themselves are not moved.
340 // Rather an index vector is returned giving the sorted data indices.
341 // <p>
342 // The sorting method used is merge sort, which is always stable.
343 // It is the fastest, especially if it can use multiple threads.
344 // <p>
345 // Unlike the <linkto name=genSortInPlace> in-place sorting methods
346 // </linkto>, all indirect sorting methods are stable (i.e. equal
347 // values are left in their original order).
348 // <p>
349 // All sort methods have an option to skip duplicate values. This is the
350 // only case where the returned number of values can be less than the
351 // original number of values.
352 
353 // <group name=genSortIndirect>
354 
355 template<class T, class INX=uInt>
356 inline
357 uInt genSort (Vector<INX>& indexVector, const T* data, INX nr,
358  Sort::Order order = Sort::Ascending, int options=0)
359  { return GenSortIndirect<T,INX>::sort (indexVector, data, nr, order, options); }
360 
361 template<class T, class INX=uInt>
362 inline
363 uInt genSort (Vector<INX>& indexVector, const Array<T>& data,
364  Sort::Order order = Sort::Ascending, int options=0)
365  { return GenSortIndirect<T,INX>::sort (indexVector, data, order, options); }
366 
367 template<class T, class INX=uInt>
368 inline
369 uInt genSort (Vector<INX>& indexVector, const Block<T>& data,
370  Sort::Order order = Sort::Ascending, int options=0)
371  { return GenSortIndirect<T,INX>::sort (indexVector, data, data.nelements(),
372  order, options); }
373 
374 template<class T, class INX=uInt>
375 inline
376 uInt genSort (Vector<INX>& indexVector, const Block<T>& data, INX nr,
377  Sort::Order order = Sort::Ascending, int options=0)
378  { return GenSortIndirect<T,INX>::sort (indexVector, data, nr, order, options); }
379 // </group>
380 
381 
382 
383 // Implement inline member functions.
384 
385 template<class T>
386 inline void GenSort<T>::swap (T& l, T& r)
387 {
388  T t = l;
389  l = r;
390  r = t;
391 }
392 
393 template<class T, class INX>
394 inline void GenSortIndirect<T,INX>::swapInx (INX& i, INX& j)
395 {
396  INX t = i;
397  i = j;
398  j = t;
399 }
400 template<class T, class INX>
401 inline int GenSortIndirect<T,INX>::isAscending (const T* data, INX i, INX j)
402 {
403  return (data[i] > data[j] || (data[i] == data[j] && i > j));
404 }
405 
406 
407 
408 } //# NAMESPACE CASACORE - END
409 
410 #ifndef CASACORE_NO_AUTO_TEMPLATES
411 #include <casacore/casa/Utilities/GenSort.tcc>
412 #endif //# CASACORE_NO_AUTO_TEMPLATES
413 #endif
simple 1-D array
Definition: Block.h:200
size_t nelements() const
The number of elements contained in this Block<T>.
Definition: Block.h:611
General indirect sort functions.
Definition: GenSort.h:206
static INX insSortAscNoDup(INX *inx, const T *, INX nr)
Insertion sort in ascending order allowing no duplicates.
static INX quickSort(INX *inx, const T *data, INX nr, Sort::Order, int options)
Sort container using quicksort.
static INX sort(Vector< INX > &indexVector, const Array< T > &data, Sort::Order=Sort::Ascending, int options=Sort::QuickSort)
Sort a C-array containing nr T-type objects.
static void heapSortAsc(INX *inx, const T *, INX nr)
Heapsort in ascending order.
static void swapInx(INX &index1, INX &index2)
Swap 2 indices.
Definition: GenSort.h:394
static INX insSort(INX *inx, const T *data, INX nr, Sort::Order, int options)
Sort container using insertion sort.
static INX insSortAsc(INX *inx, const T *, INX nr, int option)
Insertion sort in ascending order.
static INX sort(Vector< INX > &indexVector, const Block< T > &data, INX nr, Sort::Order=Sort::Ascending, int options=Sort::QuickSort)
Sort a C-array containing nr T-type objects.
static INX heapSort(INX *inx, const T *data, INX nr, Sort::Order, int options)
Sort container using heapsort.
static void heapAscSiftDown(INX *inx, INX, INX, const T *)
Helper function for ascending heapsort.
static INX insSortAscDup(INX *inx, const T *, INX nr)
Insertion sort in ascending order allowing duplicates.
static INX parSort(INX *inx, const T *data, INX nr, Sort::Order, int options, int nthreads=0)
Sort container using parallel merge sort (using OpenMP).
static INX * merge(const T *data, INX *inx, INX *tmp, INX nrrec, INX *index, INX nparts)
Thedata buffer is divided in nparts parts.
static INX kthLargest(T *data, INX nr, INX k)
Find the index of the k-th largest value.
static int isAscending(const T *data, INX index1, INX index2)
Check if 2 values are in ascending order.
Definition: GenSort.h:401
static void quickSortAsc(INX *inx, const T *, INX nr, Bool multiThread=False, Int rec_lim=128)
Quicksort in ascending order.
static INX sort(Vector< INX > &indexVector, const T *data, INX nr, Sort::Order=Sort::Ascending, int options=Sort::QuickSort)
Sort a C-array containing nr T-type objects.
static uInt insSortAscDup(T *, Int)
Insertion sort in ascending order allowing duplicates.
static uInt sort(T *, uInt nr, Sort::Order=Sort::Ascending, int options=0)
Sort a C-array containing nr T-type objects.
static uInt quickSort(T *, uInt nr, Sort::Order=Sort::Ascending, int options=0)
Sort C-array using quicksort.
static uInt sort(Block< T > &, uInt nr, Sort::Order=Sort::Ascending, int options=0)
static void reverse(T *data, const T *res, uInt nrrec)
Reverse the elements in res and put them into data.
static void heapAscSiftDown(Int, Int, T *)
Helper function for ascending heapsort.
static T * merge(T *data, T *tmp, uInt nrrec, uInt *index, uInt nparts)
Thedata buffer is divided in nparts parts.
static uInt sort(Array< T > &, Sort::Order=Sort::Ascending, int options=0)
static void swap(T &, T &)
Swap 2 elements in array.
Definition: GenSort.h:386
static uInt insSort(T *, uInt nr, Sort::Order=Sort::Ascending, int options=0)
Sort C-array using insertion sort.
static uInt heapSort(T *, uInt nr, Sort::Order=Sort::Ascending, int options=0)
Sort C-array using heapsort.
static void quickSortAsc(T *, Int, Bool multiThread=False, Int rec_lim=128)
Quicksort in ascending order.
static uInt parSort(T *, uInt nr, Sort::Order=Sort::Ascending, int options=0, int nthread=0)
Sort C-array using parallel merge sort (using OpenMP).
static void heapSortAsc(T *, Int)
Heapsort in ascending order.
static uInt insSortAsc(T *, Int, int option)
Insertion sort in ascending order.
static uInt insSortAscNoDup(T *, Int)
Insertion sort in ascending order allowing no duplicates.
static T kthLargest(T *data, uInt nr, uInt k)
Find the k-th largest value.
Order
Enumerate the sort order:
Definition: Sort.h:259
uInt genSort(T *data, uInt nr, Sort::Order order=Sort::Ascending, int options=0)
Global in-place sort functions The following global functions are easier to use than the static GenSo...
Definition: GenSort.h:311
this file contains all the compiler specific defines
Definition: mainpage.dox:28
const Bool False
Definition: aipstype.h:44
unsigned int uInt
Definition: aipstype.h:51
int Int
Definition: aipstype.h:50
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42