casacore
Loading...
Searching...
No Matches
ClassicalStatistics.h
Go to the documentation of this file.
1//# Copyright (C) 2000,2001
2//# Associated Universities, Inc. Washington DC, USA.
3//#
4//# This library is free software; you can redistribute it and/or modify it
5//# under the terms of the GNU Library General Public License as published by
6//# the Free Software Foundation; either version 2 of the License, or (at your
7//# option) any later version.
8//#
9//# This library is distributed in the hope that it will be useful, but WITHOUT
10//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12//# License for more details.
13//#
14//# You should have received a copy of the GNU Library General Public License
15//# along with this library; if not, write to the Free Software Foundation,
16//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17//#
18//# Correspondence concerning AIPS++ should be addressed as follows:
19//# Internet email: casa-feedback@nrao.edu.
20//# Postal address: AIPS++ Project Office
21//# National Radio Astronomy Observatory
22//# 520 Edgemont Road
23//# Charlottesville, VA 22903-2475 USA
24//#
25
26#ifndef SCIMATH_CLASSICALSTATISTICSS_H
27#define SCIMATH_CLASSICALSTATISTICSS_H
28
29#include <casacore/casa/aips.h>
30
31#include <casacore/scimath/StatsFramework/StatisticsAlgorithm.h>
32
33#include <casacore/scimath/StatsFramework/ClassicalQuantileComputer.h>
34#include <casacore/scimath/StatsFramework/StatisticsTypes.h>
35#include <casacore/scimath/StatsFramework/StatisticsUtilities.h>
36#include <set>
37#include <vector>
38#include <utility>
39
40namespace casacore {
41
42// Class to calculate statistics in a "classical" sense, ie using accumulators
43// with no special filtering beyond optional range filtering etc.
44//
45// setCalculateAsAdded() allows one to specify if statistics should be
46// calculated and updated on upon each call to set/addData(). If False,
47// statistics will be calculated only when getStatistic(), getStatistics(), or
48// similar statistics computing methods are called. Setting this value to True
49// allows the caller to not have to keep all the data accessible at once. Note
50// however, that all data must be simultaneously accessible if quantile-like
51// (eg median) calculations are desired.
52//
53// Objects of this class are instantiated using a ClassicalQuantileComputer
54// object for computation of quantile-like statistics. See the documentation
55// of StatisticsAlgorithm for details relating QuantileComputer classes.
56
57template <
58 class AccumType, class DataIterator, class MaskIterator=const Bool*,
59 class WeightsIterator=DataIterator
60>
62 : public StatisticsAlgorithm<CASA_STATP> {
63
65
66public:
67
69
70 // copy semantics
72
74
75 // copy semantics
77
78 // Clone this instance
80
81 // get the algorithm that this object uses for computing stats
85
86 // <group>
87 // In the following group of methods, if the size of the composite dataset
88 // is smaller than <src>binningThreshholdSizeBytes</src>, the composite
89 // dataset will be (perhaps partially) sorted and persisted in memory during
90 // the call. In that case, and if <src>persistSortedArray</src> is True,
91 // this sorted array will remain in memory after the call and will be used
92 // on subsequent calls of this method when
93 // <src>binningThreshholdSizeBytes</src> is greater than the size of the
94 // composite dataset. If <src>persistSortedArray</src> is False, the sorted
95 // array will not be stored after this call completes and so any subsequent
96 // calls for which the dataset size is less than
97 // <src>binningThreshholdSizeBytes</src>, the dataset will be sorted from
98 // scratch. Values which are not included due to non-unity strides, are not
99 // included in any specified ranges, are masked, or have associated weights
100 // of zero are not considered as dataset members for quantile computations.
101 // If one has a priori information regarding the number of points (npts)
102 // and/or the minimum and maximum values of the data set, these can be
103 // supplied to improve performance. Note however, that if these values are
104 // not correct, the resulting median and/or quantile values will also not be
105 // correct (although see the following notes regarding max/min). Note that
106 // if this object has already had getStatistics() called, and the min and
107 // max were calculated, there is no need to pass these values in as they
108 // have been stored internally and used (although passing them in shouldn't
109 // hurt anything). If provided, npts, the number of points falling in the
110 // specified ranges which are not masked and have weights > 0, should be
111 // exactly correct. <src>min</src> can be less than the true minimum, and
112 // <src>max</src> can be greater than the True maximum, but for best
113 // performance, these should be as close to the actual min and max as
114 // possible. In order for quantile computations to occur over multiple
115 // datasets, all datasets must be available. This means that if
116 // setCalculateAsAdded() was previously called by passing in a value of
117 // True, these methods will throw an exception as the previous call
118 // indicates that there is no guarantee that all datasets will be available.
119 // If one uses a data provider (by having called setDataProvider()), then
120 // this should not be an issue.
121
122 // Get the median of the distribution. For a dataset with an odd number of
123 // good points, the median is just the value at index int(N/2) in the
124 // equivalent sorted dataset, where N is the number of points. For a dataset
125 // with an even number of points, the median is the mean of the values at
126 // indices int(N/2)-1 and int(N/2) in the sorted dataset. <src>nBins</src>
127 // is the number of bins, per histogram, to use to bin the data. More
128 // bins decrease the likelihood that multiple passes of the data set will be
129 // necessary, but also increase the amount of memory used. If nBins is set
130 // to less than 1,000, it is automatically increased to 1,000; there should
131 // be no reason to ever set nBins to be this small.
132 virtual AccumType getMedian(
133 std::shared_ptr<uInt64> knownNpts=nullptr,
134 std::shared_ptr<AccumType> knownMin=nullptr,
135 std::shared_ptr<AccumType> knownMax=nullptr,
136 uInt binningThreshholdSizeBytes=4096*4096,
137 Bool persistSortedArray=False, uInt nBins=10000
138 );
139
140 // If one needs to compute both the median and quantile values, it is better
141 // to call getMedianAndQuantiles() rather than getMedian() and
142 // getQuantiles() separately, as the first will scan large data sets fewer
143 // times than calling the separate methods. The return value is the median;
144 // the quantiles are returned in the <src>quantiles</src> map. Values in the
145 // <src>fractions</src> set represent the locations in the CDF and should be
146 // between 0 and 1, exclusive.
147 virtual AccumType getMedianAndQuantiles(
148 std::map<Double, AccumType>& quantiles,
149 const std::set<Double>& fractions, std::shared_ptr<uInt64> knownNpts=nullptr,
150 std::shared_ptr<AccumType> knownMin=nullptr,
151 std::shared_ptr<AccumType> knownMax=nullptr,
152 uInt binningThreshholdSizeBytes=4096*4096,
153 Bool persistSortedArray=False, uInt nBins=10000
154 );
155
156 // get the median of the absolute deviation about the median of the data.
157 virtual AccumType getMedianAbsDevMed(
158 std::shared_ptr<uInt64> knownNpts=nullptr,
159 std::shared_ptr<AccumType> knownMin=nullptr,
160 std::shared_ptr<AccumType> knownMax=nullptr,
161 uInt binningThreshholdSizeBytes=4096*4096,
162 Bool persistSortedArray=False, uInt nBins=10000
163 );
164
165 // Get the specified quantiles. <src>fractions</src> must be between 0 and
166 // 1, noninclusive.
167 virtual std::map<Double, AccumType> getQuantiles(
168 const std::set<Double>& fractions,
169 std::shared_ptr<uInt64> knownNpts=nullptr,
170 std::shared_ptr<AccumType> knownMin=nullptr,
171 std::shared_ptr<AccumType> knownMax=nullptr,
172 uInt binningThreshholdSizeBytes=4096*4096,
173 Bool persistSortedArray=False, uInt nBins=10000
174 );
175 // </group>
176
177 // <group>
178 // scan the dataset(s) that have been added, and find the min and max. This
179 // method may be called even if setStatsToCaclulate has been called and MAX
180 // and MIN has been excluded. If setCalculateAsAdded(True) has previously
181 // been called after this object has been (re)initialized, an exception will
182 // be thrown. The second version also determines npts in the same scan.
183 virtual void getMinMax(AccumType& mymin, AccumType& mymax);
184
185 virtual void getMinMaxNpts(
186 uInt64& npts, AccumType& mymin, AccumType& mymax
187 );
188 // </group>
189
190 // scan the dataset(s) that have been added, and find the number of good
191 // points. This method may be called even if setStatsToCaclulate has been
192 // called and NPTS has been excluded. If setCalculateAsAdded(True) has
193 // previously been called after this object has been (re)initialized, an
194 // exception will be thrown.
195 virtual uInt64 getNPts();
196
197 // see base class description
198 virtual std::pair<Int64, Int64> getStatisticIndex(
200 );
201
202 // reset object to initial state. Clears all private fields including data,
203 // accumulators, etc.
204 virtual void reset();
205
206 // Should statistics be updated with calls to addData or should they only be
207 // calculated upon calls to getStatistics() etc? Beware that calling this
208 // will automatically reinitialize the object, so that it will contain no
209 // references to data et al. after this method has been called.
210 virtual void setCalculateAsAdded(Bool c);
211
212 // An exception will be thrown if setCalculateAsAdded(True) has been called.
214
215 // Allow derived objects to set the quantile computer object. API developers
216 // shouldn't need to call this, unless they are writing derived classes
217 // of ClassicalStatistics. Purposefully non-virtual. Derived classes should
218 // not implement.
220 std::shared_ptr<ClassicalQuantileComputer<CASA_STATP>> qc
221 ) {
222 _qComputer = qc;
223 }
224
225 virtual void setStatsToCalculate(std::set<StatisticsData::STATS>& stats);
226
227protected:
228
229 // This constructor should be used by derived objects in order to set
230 // the proper quantile computer object
232
233 // <group>
234 // scan through the data set to determine the number of good (unmasked,
235 // weight > 0, within range) points. The first with no mask, no ranges, and
236 // no weights is trivial with npts = nr in this class, but is implemented
237 // here so that derived classes may override it.
238 virtual void _accumNpts(
239 uInt64& npts, const DataIterator& dataBegin, uInt64 nr, uInt dataStride
240 ) const;
241
242 virtual void _accumNpts(
243 uInt64& npts, const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
244 const DataRanges& ranges, Bool isInclude
245 ) const;
246
247 virtual void _accumNpts(
248 uInt64& npts, const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
249 const MaskIterator& maskBegin, uInt maskStride
250 ) const;
251
252 virtual void _accumNpts(
253 uInt64& npts, const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
254 const MaskIterator& maskBegin, uInt maskStride,
255 const DataRanges& ranges, Bool isInclude
256 ) const;
257
258 virtual void _accumNpts(
259 uInt64& npts, const DataIterator& dataBegin,
260 const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride
261 ) const;
262
263 virtual void _accumNpts(
264 uInt64& npts, const DataIterator& dataBegin,
265 const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
266 const DataRanges& ranges, Bool isInclude
267 ) const;
268
269 virtual void _accumNpts(
270 uInt64& npts, const DataIterator& dataBegin,
271 const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
272 const MaskIterator& maskBegin, uInt maskStride,
273 const DataRanges& ranges, Bool isInclude
274 ) const;
275
276 virtual void _accumNpts(
277 uInt64& npts, const DataIterator& dataBegin,
278 const WeightsIterator& weightBegin, uInt64 nr, uInt dataStride,
279 const MaskIterator& maskBegin, uInt maskStride
280 ) const;
281 // </group>
282
283 // <group>
284 inline void _accumulate(
285 StatsData<AccumType>& stats, const AccumType& datum,
286 const LocationType& location
287 );
288
289 inline void _accumulate(
290 StatsData<AccumType>& stats, const AccumType& datum,
291 const AccumType& weight, const LocationType& location
292 );
293 // </group>
294
295 void _addData();
296
298
299 Bool _getDoMaxMin() const { return _doMaxMin; }
300
302
303 virtual AccumType _getStatistic(StatisticsData::STATS stat);
304
306
307 // Retrieve stats structure. Allows derived classes to maintain their own
308 // StatsData structs.
310
311 virtual const StatsData<AccumType>& _getStatsData() const {
312 return _statsData;
313 }
314
315 // <group>
316 virtual void _minMax(
317 std::shared_ptr<AccumType>& mymin, std::shared_ptr<AccumType>& mymax,
318 const DataIterator& dataBegin, uInt64 nr, uInt dataStride
319 ) const;
320
321 virtual void _minMax(
322 std::shared_ptr<AccumType>& mymin, std::shared_ptr<AccumType>& mymax,
323 const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
324 const DataRanges& ranges, Bool isInclude
325 ) const;
326
327 virtual void _minMax(
328 std::shared_ptr<AccumType>& mymin, std::shared_ptr<AccumType>& mymax,
329 const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
330 const MaskIterator& maskBegin, uInt maskStride
331 ) const;
332
333 virtual void _minMax(
334 std::shared_ptr<AccumType>& mymin, std::shared_ptr<AccumType>& mymax,
335 const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
336 const MaskIterator& maskBegin, uInt maskStride,
337 const DataRanges& ranges, Bool isInclude
338 ) const;
339
340 virtual void _minMax(
341 std::shared_ptr<AccumType>& mymin, std::shared_ptr<AccumType>& mymax,
342 const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
343 uInt64 nr, uInt dataStride
344 ) const;
345
346 virtual void _minMax(
347 std::shared_ptr<AccumType>& mymin, std::shared_ptr<AccumType>& mymax,
348 const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
349 uInt64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
350 ) const;
351
352 virtual void _minMax(
353 std::shared_ptr<AccumType>& mymin, std::shared_ptr<AccumType>& mymax,
354 const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
355 uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
356 uInt maskStride, const DataRanges& ranges, Bool isInclude
357 ) const;
358
359 virtual void _minMax(
360 std::shared_ptr<AccumType>& mymin, std::shared_ptr<AccumType>& mymax,
361 const DataIterator& dataBegin, const WeightsIterator& weightBegin,
362 uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
363 uInt maskStride
364 ) const;
365 // </group>
366
367 // <group>
368 // Sometimes we want the min, max, and npts all in one scan.
369 virtual void _minMaxNpts(
370 uInt64& npts, std::shared_ptr<AccumType>& mymin,
371 std::shared_ptr<AccumType>& mymax, const DataIterator& dataBegin, uInt64 nr,
372 uInt dataStride
373 ) const;
374
375 virtual void _minMaxNpts(
376 uInt64& npts, std::shared_ptr<AccumType>& mymin,
377 std::shared_ptr<AccumType>& mymax, const DataIterator& dataBegin, uInt64 nr,
378 uInt dataStride, const DataRanges& ranges, Bool isInclude
379 ) const;
380
381 virtual void _minMaxNpts(
382 uInt64& npts, std::shared_ptr<AccumType>& mymin,
383 std::shared_ptr<AccumType>& mymax, const DataIterator& dataBegin, uInt64 nr,
384 uInt dataStride, const MaskIterator& maskBegin, uInt maskStride
385 ) const;
386
387 virtual void _minMaxNpts(
388 uInt64& npts, std::shared_ptr<AccumType>& mymin,
389 std::shared_ptr<AccumType>& mymax, const DataIterator& dataBegin, uInt64 nr,
390 uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
391 const DataRanges& ranges, Bool isInclude
392 ) const;
393
394 virtual void _minMaxNpts(
395 uInt64& npts, std::shared_ptr<AccumType>& mymin,
396 std::shared_ptr<AccumType>& mymax, const DataIterator& dataBegin,
397 const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride
398 ) const;
399
400 virtual void _minMaxNpts(
401 uInt64& npts, std::shared_ptr<AccumType>& mymin,
402 std::shared_ptr<AccumType>& mymax, const DataIterator& dataBegin,
403 const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
404 const DataRanges& ranges, Bool isInclude
405 ) const;
406
407 virtual void _minMaxNpts(
408 uInt64& npts, std::shared_ptr<AccumType>& mymin,
409 std::shared_ptr<AccumType>& mymax, const DataIterator& dataBegin,
410 const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
411 const MaskIterator& maskBegin, uInt maskStride,
412 const DataRanges& ranges, Bool isInclude
413 ) const;
414
415 virtual void _minMaxNpts(
416 uInt64& npts, std::shared_ptr<AccumType>& mymin,
417 std::shared_ptr<AccumType>& mymax, const DataIterator& dataBegin,
418 const WeightsIterator& weightBegin, uInt64 nr, uInt dataStride,
419 const MaskIterator& maskBegin, uInt maskStride
420 ) const;
421 // </group>
422
423 std::shared_ptr<StatisticsAlgorithmQuantileComputer<CASA_STATP>>
425 return _qComputer;
426 }
427
428 // <group>
429 // no weights, no mask, no ranges
430 virtual void _unweightedStats(
431 StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
432 const DataIterator& dataBegin, uInt64 nr, uInt dataStride
433 );
434
435 // no weights, no mask
436 virtual void _unweightedStats(
437 StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
438 const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
439 const DataRanges& ranges, Bool isInclude
440 );
441
442 virtual void _unweightedStats(
443 StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
444 const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
445 const MaskIterator& maskBegin, uInt maskStride
446 );
447
448 virtual void _unweightedStats(
449 StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
450 const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
451 const MaskIterator& maskBegin, uInt maskStride,
452 const DataRanges& ranges, Bool isInclude
453 );
454
455 // </group>
457 const StatsData<AccumType>& threadStats
458 );
459
460 // <group>
461 // has weights, but no mask, no ranges
462 virtual void _weightedStats(
463 StatsData<AccumType>& stats, LocationType& location,
464 const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
465 uInt64 nr, uInt dataStride
466 );
467
468 virtual void _weightedStats(
469 StatsData<AccumType>& stats, LocationType& location,
470 const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
471 uInt64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
472 );
473
474 virtual void _weightedStats(
475 StatsData<AccumType>& stats, LocationType& location,
476 const DataIterator& dataBegin, const WeightsIterator& weightBegin,
477 uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
478 uInt maskStride
479 );
480
481 virtual void _weightedStats(
482 StatsData<AccumType>& stats, LocationType& location,
483 const DataIterator& dataBegin, const WeightsIterator& weightBegin,
484 uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
485 uInt maskStride, const DataRanges& ranges, Bool isInclude
486 );
487 // </group>
488
489private:
492
493 std::shared_ptr<ClassicalQuantileComputer<CASA_STATP>> _qComputer{};
494
496 std::shared_ptr<AccumType>& mymax, std::shared_ptr<AccumType>& mymin,
497 DataIterator dataIter, MaskIterator maskIter,
498 WeightsIterator weightsIter, uInt64 dataCount, const ChunkType& chunk
499 );
500
502 uInt64& npts, std::shared_ptr<AccumType>& mymax,
503 std::shared_ptr<AccumType>& mymin, DataIterator dataIter,
504 MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount,
505 const ChunkType& chunk
506 );
507
509 uInt64& npts, DataIterator dataIter, MaskIterator maskIter,
510 WeightsIterator weightsIter, uInt64 dataCount, const ChunkType& chunk
511 );
512
514 StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
515 DataIterator dataIter, MaskIterator maskIter,
516 WeightsIterator weightsIter, uInt64 count, const ChunkType& chunk
517 );
518
519 // scan dataset(s) to find min and max
520 void _doMinMax(AccumType& vmin, AccumType& vmax);
521
522 uInt64 _doMinMaxNpts(AccumType& vmin, AccumType& vmax);
523
525
526 // for quantile computations, if necessary, determines npts, min, max to
527 // send to quantile calculator methods
529 uInt64& mynpts, AccumType& mymin, AccumType& mymax,
530 std::shared_ptr<uInt64> knownNpts, std::shared_ptr<AccumType> knownMin,
531 std::shared_ptr<AccumType> knownMax
532 );
533
534};
535
536}
537
538#ifndef CASACORE_NO_AUTO_TEMPLATES
539#include <casacore/scimath/StatsFramework/ClassicalStatistics.tcc>
540#endif
541
542#endif
#define DataRanges
This class is used internally by ClassicalStatistics objects.
Class to calculate statistics in a "classical" sense, ie using accumulators with no special filtering...
virtual AccumType getMedianAndQuantiles(std::map< Double, AccumType > &quantiles, const std::set< Double > &fractions, std::shared_ptr< uInt64 > knownNpts=nullptr, std::shared_ptr< AccumType > knownMin=nullptr, std::shared_ptr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
If one needs to compute both the median and quantile values, it is better to call getMedianAndQuantil...
virtual void _minMax(std::shared_ptr< AccumType > &mymin, std::shared_ptr< AccumType > &mymax, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude) const
virtual void reset()
reset object to initial state.
virtual const StatsData< AccumType > & _getStatsData() const
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, uInt64 nr, uInt dataStride) const
scan through the data set to determine the number of good (unmasked, weight > 0, within range) points...
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, const WeightsIterator &weightBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride) const
void _computeMinMaxNpts(uInt64 &npts, std::shared_ptr< AccumType > &mymax, std::shared_ptr< AccumType > &mymin, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount, const ChunkType &chunk)
void _accumulate(StatsData< AccumType > &stats, const AccumType &datum, const LocationType &location)
virtual void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude)
no weights, no mask
virtual AccumType getMedian(std::shared_ptr< uInt64 > knownNpts=nullptr, std::shared_ptr< AccumType > knownMin=nullptr, std::shared_ptr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
In the following group of methods, if the size of the composite dataset is smaller than binningThresh...
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride) const
virtual void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride)
virtual void _minMaxNpts(uInt64 &npts, std::shared_ptr< AccumType > &mymin, std::shared_ptr< AccumType > &mymax, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude) const
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude) const
virtual void _minMaxNpts(uInt64 &npts, std::shared_ptr< AccumType > &mymin, std::shared_ptr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude) const
typename StatisticsDataset< CASA_STATP >::ChunkData ChunkType
virtual std::map< Double, AccumType > getQuantiles(const std::set< Double > &fractions, std::shared_ptr< uInt64 > knownNpts=nullptr, std::shared_ptr< AccumType > knownMin=nullptr, std::shared_ptr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
Get the specified quantiles.
virtual AccumType _getStatistic(StatisticsData::STATS stat)
ClassicalStatistics & operator=(const ClassicalStatistics &other)
copy semantics
virtual StatsData< AccumType > _getInitialStats() const
virtual void _minMax(std::shared_ptr< AccumType > &mymin, std::shared_ptr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride) const
virtual void _minMaxNpts(uInt64 &npts, std::shared_ptr< AccumType > &mymin, std::shared_ptr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride) const
virtual std::pair< Int64, Int64 > getStatisticIndex(StatisticsData::STATS stat)
see base class description
virtual uInt64 getNPts()
scan the dataset(s) that have been added, and find the number of good points.
virtual void _minMax(std::shared_ptr< AccumType > &mymin, std::shared_ptr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude) const
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude) const
virtual void _minMax(std::shared_ptr< AccumType > &mymin, std::shared_ptr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride) const
virtual void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride)
has weights, but no mask, no ranges
void setQuantileComputer(std::shared_ptr< ClassicalQuantileComputer< CASA_STATP > > qc)
Allow derived objects to set the quantile computer object.
virtual void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude)
virtual void _updateDataProviderMaxMin(const StatsData< AccumType > &threadStats)
std::shared_ptr< StatisticsAlgorithmQuantileComputer< CASA_STATP > > _getQuantileComputer()
void _accumulate(StatsData< AccumType > &stats, const AccumType &datum, const AccumType &weight, const LocationType &location)
virtual void _minMaxNpts(uInt64 &npts, std::shared_ptr< AccumType > &mymin, std::shared_ptr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude) const
virtual void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude)
virtual void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride)
virtual void setDataProvider(StatsDataProvider< CASA_STATP > *dataProvider)
An exception will be thrown if setCalculateAsAdded(True) has been called.
virtual void _minMaxNpts(uInt64 &npts, std::shared_ptr< AccumType > &mymin, std::shared_ptr< AccumType > &mymax, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude) const
virtual void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude)
virtual void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride)
no weights, no mask, no ranges
virtual void _minMax(std::shared_ptr< AccumType > &mymin, std::shared_ptr< AccumType > &mymax, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride) const
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude) const
void _doMinMax(AccumType &vmin, AccumType &vmax)
scan dataset(s) to find min and max
virtual void _minMaxNpts(uInt64 &npts, std::shared_ptr< AccumType > &mymin, std::shared_ptr< AccumType > &mymax, const DataIterator &dataBegin, const WeightsIterator &weightBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride) const
virtual void _minMax(std::shared_ptr< AccumType > &mymin, std::shared_ptr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude) const
ClassicalStatistics(const ClassicalStatistics &cs)
copy semantics
std::shared_ptr< ClassicalQuantileComputer< CASA_STATP > > _qComputer
virtual StatsData< AccumType > _getStatistics()
virtual void _minMaxNpts(uInt64 &npts, std::shared_ptr< AccumType > &mymin, std::shared_ptr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride) const
Sometimes we want the min, max, and npts all in one scan.
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride) const
virtual StatisticsAlgorithm< CASA_STATP > * clone() const
Clone this instance.
virtual void setStatsToCalculate(std::set< StatisticsData::STATS > &stats)
Provide guidance to algorithms by specifying a priori which statistics the caller would like calculat...
virtual void getMinMaxNpts(uInt64 &npts, AccumType &mymin, AccumType &mymax)
void _computeStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 count, const ChunkType &chunk)
virtual void _minMax(std::shared_ptr< AccumType > &mymin, std::shared_ptr< AccumType > &mymax, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude) const
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude) const
virtual void _minMax(std::shared_ptr< AccumType > &mymin, std::shared_ptr< AccumType > &mymax, const DataIterator &dataBegin, const WeightsIterator &weightBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride) const
virtual StatisticsData::ALGORITHM algorithm() const
get the algorithm that this object uses for computing stats
virtual AccumType getMedianAbsDevMed(std::shared_ptr< uInt64 > knownNpts=nullptr, std::shared_ptr< AccumType > knownMin=nullptr, std::shared_ptr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
get the median of the absolute deviation about the median of the data.
virtual StatsData< AccumType > & _getStatsData()
Retrieve stats structure.
void _computeMinMax(std::shared_ptr< AccumType > &mymax, std::shared_ptr< AccumType > &mymin, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount, const ChunkType &chunk)
void _computeNpts(uInt64 &npts, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount, const ChunkType &chunk)
virtual void setCalculateAsAdded(Bool c)
Should statistics be updated with calls to addData or should they only be calculated upon calls to ge...
void _addData()
Allows derived classes to do things after data is set or added.
virtual void _minMaxNpts(uInt64 &npts, std::shared_ptr< AccumType > &mymin, std::shared_ptr< AccumType > &mymax, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride) const
virtual void getMinMax(AccumType &mymin, AccumType &mymax)
scan the dataset(s) that have been added, and find the min and max.
void _doNptsMinMax(uInt64 &mynpts, AccumType &mymin, AccumType &mymax, std::shared_ptr< uInt64 > knownNpts, std::shared_ptr< AccumType > knownMin, std::shared_ptr< AccumType > knownMax)
for quantile computations, if necessary, determines npts, min, max to send to quantile calculator met...
StatsData< AccumType > _statsData
uInt64 _doMinMaxNpts(AccumType &vmin, AccumType &vmax)
ClassicalStatistics(std::shared_ptr< ClassicalQuantileComputer< CASA_STATP > > qc)
This constructor should be used by derived objects in order to set the proper quantile computer objec...
Base class of statistics algorithm class hierarchy.
ALGORITHM
implemented algorithms
Abstract base class which defines interface for providing "datasets" to the statistics framework in c...
this file contains all the compiler specific defines
Definition mainpage.dox:28
const Bool False
Definition aipstype.h:42
unsigned int uInt
Definition aipstype.h:49
bool Bool
Define the standard types used by Casacore.
Definition aipstype.h:40
const Bool True
Definition aipstype.h:41
unsigned long long uInt64
Definition aipsxtype.h:37
std::pair< Int64, Int64 > LocationType
holds information about a data chunk.