casacore
ClassicalStatistics.h
Go to the documentation of this file.
1 //# Copyright (C) 2000,2001
2 //# Associated Universities, Inc. Washington DC, USA.
3 //#
4 //# This library is free software; you can redistribute it and/or modify it
5 //# under the terms of the GNU Library General Public License as published by
6 //# the Free Software Foundation; either version 2 of the License, or (at your
7 //# option) any later version.
8 //#
9 //# This library is distributed in the hope that it will be useful, but WITHOUT
10 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12 //# License for more details.
13 //#
14 //# You should have received a copy of the GNU Library General Public License
15 //# along with this library; if not, write to the Free Software Foundation,
16 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17 //#
18 //# Correspondence concerning AIPS++ should be addressed as follows:
19 //# Internet email: aips2-request@nrao.edu.
20 //# Postal address: AIPS++ Project Office
21 //# National Radio Astronomy Observatory
22 //# 520 Edgemont Road
23 //# Charlottesville, VA 22903-2475 USA
24 //#
25 
26 #ifndef SCIMATH_CLASSICALSTATISTICSS_H
27 #define SCIMATH_CLASSICALSTATISTICSS_H
28 
29 #include <casacore/casa/aips.h>
30 
31 #include <casacore/scimath/StatsFramework/StatisticsAlgorithm.h>
32 
33 #include <casacore/scimath/StatsFramework/ClassicalQuantileComputer.h>
34 #include <casacore/scimath/StatsFramework/StatisticsTypes.h>
35 #include <casacore/scimath/StatsFramework/StatisticsUtilities.h>
36 #include <set>
37 #include <vector>
38 #include <utility>
39 
40 namespace casacore {
41 
42 template <class T> class PtrHolder;
43 
44 // Class to calculate statistics in a "classical" sense, ie using accumulators
45 // with no special filtering beyond optional range filtering etc.
46 //
47 // setCalculateAsAdded() allows one to specify if statistics should be
48 // calculated and updated on upon each call to set/addData(). If False,
49 // statistics will be calculated only when getStatistic(), getStatistics(), or
50 // similar statistics computing methods are called. Setting this value to True
51 // allows the caller to not have to keep all the data accessible at once. Note
52 // however, that all data must be simultaneously accessible if quantile-like
53 // (eg median) calculations are desired.
54 //
55 // Objects of this class are instantiated using a ClassicalQuantileComputer
56 // object for computation of quantile-like statistics. See the documentation
57 // of StatisticsAlgorithm for details relating QuantileComputer classes.
58 
59 template <
60  class AccumType, class DataIterator, class MaskIterator=const Bool*,
61  class WeightsIterator=DataIterator
62 >
64  : public StatisticsAlgorithm<CASA_STATP> {
65 
67 
68 public:
69 
71 
72  // copy semantics
74 
76 
77  // copy semantics
79 
80  // Clone this instance
82 
83  // get the algorithm that this object uses for computing stats
86  };
87 
88  // <group>
89  // In the following group of methods, if the size of the composite dataset
90  // is smaller than <src>binningThreshholdSizeBytes</src>, the composite
91  // dataset will be (perhaps partially) sorted and persisted in memory during
92  // the call. In that case, and if <src>persistSortedArray</src> is True,
93  // this sorted array will remain in memory after the call and will be used
94  // on subsequent calls of this method when
95  // <src>binningThreshholdSizeBytes</src> is greater than the size of the
96  // composite dataset. If <src>persistSortedArray</src> is False, the sorted
97  // array will not be stored after this call completes and so any subsequent
98  // calls for which the dataset size is less than
99  // <src>binningThreshholdSizeBytes</src>, the dataset will be sorted from
100  // scratch. Values which are not included due to non-unity strides, are not
101  // included in any specified ranges, are masked, or have associated weights
102  // of zero are not considered as dataset members for quantile computations.
103  // If one has a priori information regarding the number of points (npts)
104  // and/or the minimum and maximum values of the data set, these can be
105  // supplied to improve performance. Note however, that if these values are
106  // not correct, the resulting median and/or quantile values will also not be
107  // correct (although see the following notes regarding max/min). Note that
108  // if this object has already had getStatistics() called, and the min and
109  // max were calculated, there is no need to pass these values in as they
110  // have been stored internally and used (although passing them in shouldn't
111  // hurt anything). If provided, npts, the number of points falling in the
112  // specified ranges which are not masked and have weights > 0, should be
113  // exactly correct. <src>min</src> can be less than the true minimum, and
114  // <src>max</src> can be greater than the True maximum, but for best
115  // performance, these should be as close to the actual min and max as
116  // possible. In order for quantile computations to occur over multiple
117  // datasets, all datasets must be available. This means that if
118  // setCalculateAsAdded() was previously called by passing in a value of
119  // True, these methods will throw an exception as the previous call
120  // indicates that there is no guarantee that all datasets will be available.
121  // If one uses a data provider (by having called setDataProvider()), then
122  // this should not be an issue.
123 
124  // Get the median of the distribution. For a dataset with an odd number of
125  // good points, the median is just the value at index int(N/2) in the
126  // equivalent sorted dataset, where N is the number of points. For a dataset
127  // with an even number of points, the median is the mean of the values at
128  // indices int(N/2)-1 and int(N/2) in the sorted dataset. <src>nBins</src>
129  // is the number of bins, per histogram, to use to bin the data. More
130  // bins decrease the likelihood that multiple passes of the data set will be
131  // necessary, but also increase the amount of memory used. If nBins is set
132  // to less than 1,000, it is automatically increased to 1,000; there should
133  // be no reason to ever set nBins to be this small.
134  virtual AccumType getMedian(
135  CountedPtr<uInt64> knownNpts=nullptr,
136  CountedPtr<AccumType> knownMin=nullptr,
137  CountedPtr<AccumType> knownMax=nullptr,
138  uInt binningThreshholdSizeBytes=4096*4096,
139  Bool persistSortedArray=False, uInt nBins=10000
140  );
141 
142  // If one needs to compute both the median and quantile values, it is better
143  // to call getMedianAndQuantiles() rather than getMedian() and
144  // getQuantiles() separately, as the first will scan large data sets fewer
145  // times than calling the separate methods. The return value is the median;
146  // the quantiles are returned in the <src>quantiles</src> map. Values in the
147  // <src>fractions</src> set represent the locations in the CDF and should be
148  // between 0 and 1, exclusive.
149  virtual AccumType getMedianAndQuantiles(
150  std::map<Double, AccumType>& quantiles,
151  const std::set<Double>& fractions, CountedPtr<uInt64> knownNpts=nullptr,
152  CountedPtr<AccumType> knownMin=nullptr,
153  CountedPtr<AccumType> knownMax=nullptr,
154  uInt binningThreshholdSizeBytes=4096*4096,
155  Bool persistSortedArray=False, uInt nBins=10000
156  );
157 
158  // get the median of the absolute deviation about the median of the data.
159  virtual AccumType getMedianAbsDevMed(
160  CountedPtr<uInt64> knownNpts=nullptr,
161  CountedPtr<AccumType> knownMin=nullptr,
162  CountedPtr<AccumType> knownMax=nullptr,
163  uInt binningThreshholdSizeBytes=4096*4096,
164  Bool persistSortedArray=False, uInt nBins=10000
165  );
166 
167  // Get the specified quantiles. <src>fractions</src> must be between 0 and
168  // 1, noninclusive.
169  virtual std::map<Double, AccumType> getQuantiles(
170  const std::set<Double>& fractions,
171  CountedPtr<uInt64> knownNpts=nullptr,
172  CountedPtr<AccumType> knownMin=nullptr,
173  CountedPtr<AccumType> knownMax=nullptr,
174  uInt binningThreshholdSizeBytes=4096*4096,
175  Bool persistSortedArray=False, uInt nBins=10000
176  );
177  // </group>
178 
179  // <group>
180  // scan the dataset(s) that have been added, and find the min and max. This
181  // method may be called even if setStatsToCaclulate has been called and MAX
182  // and MIN has been excluded. If setCalculateAsAdded(True) has previously
183  // been called after this object has been (re)initialized, an exception will
184  // be thrown. The second version also determines npts in the same scan.
185  virtual void getMinMax(AccumType& mymin, AccumType& mymax);
186 
187  virtual void getMinMaxNpts(
188  uInt64& npts, AccumType& mymin, AccumType& mymax
189  );
190  // </group>
191 
192  // scan the dataset(s) that have been added, and find the number of good
193  // points. This method may be called even if setStatsToCaclulate has been
194  // called and NPTS has been excluded. If setCalculateAsAdded(True) has
195  // previously been called after this object has been (re)initialized, an
196  // exception will be thrown.
197  virtual uInt64 getNPts();
198 
199  // see base class description
200  virtual std::pair<Int64, Int64> getStatisticIndex(
202  );
203 
204  // reset object to initial state. Clears all private fields including data,
205  // accumulators, etc.
206  virtual void reset();
207 
208  // Should statistics be updated with calls to addData or should they only be
209  // calculated upon calls to getStatistics() etc? Beware that calling this
210  // will automatically reinitialize the object, so that it will contain no
211  // references to data et al. after this method has been called.
212  virtual void setCalculateAsAdded(Bool c);
213 
214  // An exception will be thrown if setCalculateAsAdded(True) has been called.
215  virtual void setDataProvider(StatsDataProvider<CASA_STATP> *dataProvider);
216 
217  // Allow derived objects to set the quantile computer object. API developers
218  // shouldn't need to call this, unless they are writing derived classes
219  // of ClassicalStatistics. Purposefully non-virtual. Derived classes should
220  // not implement.
223  ) {
224  _qComputer = qc;
225  }
226 
227  virtual void setStatsToCalculate(std::set<StatisticsData::STATS>& stats);
228 
229 protected:
230 
231  // This constructor should be used by derived objects in order to set
232  // the proper quantile computer object
234 
235  // <group>
236  // scan through the data set to determine the number of good (unmasked,
237  // weight > 0, within range) points. The first with no mask, no ranges, and
238  // no weights is trivial with npts = nr in this class, but is implemented
239  // here so that derived classes may override it.
240  virtual void _accumNpts(
241  uInt64& npts, const DataIterator& dataBegin, uInt64 nr, uInt dataStride
242  ) const;
243 
244  virtual void _accumNpts(
245  uInt64& npts, const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
246  const DataRanges& ranges, Bool isInclude
247  ) const;
248 
249  virtual void _accumNpts(
250  uInt64& npts, const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
251  const MaskIterator& maskBegin, uInt maskStride
252  ) const;
253 
254  virtual void _accumNpts(
255  uInt64& npts, const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
256  const MaskIterator& maskBegin, uInt maskStride,
257  const DataRanges& ranges, Bool isInclude
258  ) const;
259 
260  virtual void _accumNpts(
261  uInt64& npts, const DataIterator& dataBegin,
262  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride
263  ) const;
264 
265  virtual void _accumNpts(
266  uInt64& npts, const DataIterator& dataBegin,
267  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
268  const DataRanges& ranges, Bool isInclude
269  ) const;
270 
271  virtual void _accumNpts(
272  uInt64& npts, const DataIterator& dataBegin,
273  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
274  const MaskIterator& maskBegin, uInt maskStride,
275  const DataRanges& ranges, Bool isInclude
276  ) const;
277 
278  virtual void _accumNpts(
279  uInt64& npts, const DataIterator& dataBegin,
280  const WeightsIterator& weightBegin, uInt64 nr, uInt dataStride,
281  const MaskIterator& maskBegin, uInt maskStride
282  ) const;
283  // </group>
284 
285  // <group>
286  inline void _accumulate(
287  StatsData<AccumType>& stats, const AccumType& datum,
288  const LocationType& location
289  );
290 
291  inline void _accumulate(
292  StatsData<AccumType>& stats, const AccumType& datum,
293  const AccumType& weight, const LocationType& location
294  );
295  // </group>
296 
297  void _addData();
298 
299  void _clearStats();
300 
301  Bool _getDoMaxMin() const { return _doMaxMin; }
302 
304 
305  virtual AccumType _getStatistic(StatisticsData::STATS stat);
306 
308 
309  // Retrieve stats structure. Allows derived classes to maintain their own
310  // StatsData structs.
312 
313  virtual const StatsData<AccumType>& _getStatsData() const {
314  return _statsData;
315  }
316 
317  // <group>
318  virtual void _minMax(
320  const DataIterator& dataBegin, uInt64 nr, uInt dataStride
321  ) const;
322 
323  virtual void _minMax(
325  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
326  const DataRanges& ranges, Bool isInclude
327  ) const;
328 
329  virtual void _minMax(
331  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
332  const MaskIterator& maskBegin, uInt maskStride
333  ) const;
334 
335  virtual void _minMax(
337  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
338  const MaskIterator& maskBegin, uInt maskStride,
339  const DataRanges& ranges, Bool isInclude
340  ) const;
341 
342  virtual void _minMax(
344  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
345  uInt64 nr, uInt dataStride
346  ) const;
347 
348  virtual void _minMax(
350  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
351  uInt64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
352  ) const;
353 
354  virtual void _minMax(
356  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
357  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
358  uInt maskStride, const DataRanges& ranges, Bool isInclude
359  ) const;
360 
361  virtual void _minMax(
363  const DataIterator& dataBegin, const WeightsIterator& weightBegin,
364  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
365  uInt maskStride
366  ) const;
367  // </group>
368 
369  // <group>
370  // Sometimes we want the min, max, and npts all in one scan.
371  virtual void _minMaxNpts(
372  uInt64& npts, CountedPtr<AccumType>& mymin,
373  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin, uInt64 nr,
374  uInt dataStride
375  ) const;
376 
377  virtual void _minMaxNpts(
378  uInt64& npts, CountedPtr<AccumType>& mymin,
379  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin, uInt64 nr,
380  uInt dataStride, const DataRanges& ranges, Bool isInclude
381  ) const;
382 
383  virtual void _minMaxNpts(
384  uInt64& npts, CountedPtr<AccumType>& mymin,
385  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin, uInt64 nr,
386  uInt dataStride, const MaskIterator& maskBegin, uInt maskStride
387  ) const;
388 
389  virtual void _minMaxNpts(
390  uInt64& npts, CountedPtr<AccumType>& mymin,
391  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin, uInt64 nr,
392  uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
393  const DataRanges& ranges, Bool isInclude
394  ) const;
395 
396  virtual void _minMaxNpts(
397  uInt64& npts, CountedPtr<AccumType>& mymin,
398  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin,
399  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride
400  ) const;
401 
402  virtual void _minMaxNpts(
403  uInt64& npts, CountedPtr<AccumType>& mymin,
404  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin,
405  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
406  const DataRanges& ranges, Bool isInclude
407  ) const;
408 
409  virtual void _minMaxNpts(
410  uInt64& npts, CountedPtr<AccumType>& mymin,
411  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin,
412  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
413  const MaskIterator& maskBegin, uInt maskStride,
414  const DataRanges& ranges, Bool isInclude
415  ) const;
416 
417  virtual void _minMaxNpts(
418  uInt64& npts, CountedPtr<AccumType>& mymin,
419  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin,
420  const WeightsIterator& weightBegin, uInt64 nr, uInt dataStride,
421  const MaskIterator& maskBegin, uInt maskStride
422  ) const;
423  // </group>
424 
427  return _qComputer;
428  }
429 
430  // <group>
431  // no weights, no mask, no ranges
432  virtual void _unweightedStats(
433  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
434  const DataIterator& dataBegin, uInt64 nr, uInt dataStride
435  );
436 
437  // no weights, no mask
438  virtual void _unweightedStats(
439  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
440  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
441  const DataRanges& ranges, Bool isInclude
442  );
443 
444  virtual void _unweightedStats(
445  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
446  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
447  const MaskIterator& maskBegin, uInt maskStride
448  );
449 
450  virtual void _unweightedStats(
451  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
452  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
453  const MaskIterator& maskBegin, uInt maskStride,
454  const DataRanges& ranges, Bool isInclude
455  );
456 
457  // </group>
459  const StatsData<AccumType>& threadStats
460  );
461 
462  // <group>
463  // has weights, but no mask, no ranges
464  virtual void _weightedStats(
465  StatsData<AccumType>& stats, LocationType& location,
466  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
467  uInt64 nr, uInt dataStride
468  );
469 
470  virtual void _weightedStats(
471  StatsData<AccumType>& stats, LocationType& location,
472  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
473  uInt64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
474  );
475 
476  virtual void _weightedStats(
477  StatsData<AccumType>& stats, LocationType& location,
478  const DataIterator& dataBegin, const WeightsIterator& weightBegin,
479  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
480  uInt maskStride
481  );
482 
483  virtual void _weightedStats(
484  StatsData<AccumType>& stats, LocationType& location,
485  const DataIterator& dataBegin, const WeightsIterator& weightBegin,
486  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
487  uInt maskStride, const DataRanges& ranges, Bool isInclude
488  );
489  // </group>
490 
491 private:
494 
496 
499  DataIterator dataIter, MaskIterator maskIter,
500  WeightsIterator weightsIter, uInt64 dataCount, const ChunkType& chunk
501  );
502 
504  uInt64& npts, CountedPtr<AccumType>& mymax,
505  CountedPtr<AccumType>& mymin, DataIterator dataIter,
506  MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount,
507  const ChunkType& chunk
508  );
509 
511  uInt64& npts, DataIterator dataIter, MaskIterator maskIter,
512  WeightsIterator weightsIter, uInt64 dataCount, const ChunkType& chunk
513  );
514 
516  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
517  DataIterator dataIter, MaskIterator maskIter,
518  WeightsIterator weightsIter, uInt64 count, const ChunkType& chunk
519  );
520 
521  // scan dataset(s) to find min and max
522  void _doMinMax(AccumType& vmin, AccumType& vmax);
523 
524  uInt64 _doMinMaxNpts(AccumType& vmin, AccumType& vmax);
525 
527 
528  // for quantile computations, if necessary, determines npts, min, max to
529  // send to quantile calculator methods
531  uInt64& mynpts, AccumType& mymin, AccumType& mymax,
532  CountedPtr<uInt64> knownNpts, CountedPtr<AccumType> knownMin,
533  CountedPtr<AccumType> knownMax
534  );
535 
536 };
537 
538 }
539 
540 #ifndef CASACORE_NO_AUTO_TEMPLATES
541 #include <casacore/scimath/StatsFramework/ClassicalStatistics.tcc>
542 #endif
543 
544 #endif
#define DataRanges
Class to calculate statistics in a "classical" sense, ie using accumulators with no special filtering...
virtual void _minMaxNpts(uInt64 &npts, CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, const WeightsIterator &weightBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride) const
virtual void reset()
reset object to initial state.
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, uInt64 nr, uInt dataStride) const
scan through the data set to determine the number of good (unmasked, weight > 0, within range) points...
virtual AccumType getMedianAbsDevMed(CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
get the median of the absolute deviation about the median of the data.
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, const WeightsIterator &weightBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride) const
void _computeMinMax(CountedPtr< AccumType > &mymax, CountedPtr< AccumType > &mymin, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount, const ChunkType &chunk)
void _accumulate(StatsData< AccumType > &stats, const AccumType &datum, const LocationType &location)
virtual StatsData< AccumType > _getInitialStats() const
virtual void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude)
no weights, no mask
virtual void _minMax(CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude) const
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride) const
virtual void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride)
virtual void _minMaxNpts(uInt64 &npts, CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride) const
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude) const
virtual StatsData< AccumType > _getStatistics()
typename StatisticsDataset< CASA_STATP >::ChunkData ChunkType
virtual AccumType _getStatistic(StatisticsData::STATS stat)
virtual std::pair< Int64, Int64 > getStatisticIndex(StatisticsData::STATS stat)
see base class description
CountedPtr< ClassicalQuantileComputer< CASA_STATP > > _qComputer
void setQuantileComputer(CountedPtr< ClassicalQuantileComputer< CASA_STATP >> qc)
Allow derived objects to set the quantile computer object.
virtual void _minMax(CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride) const
virtual uInt64 getNPts()
scan the dataset(s) that have been added, and find the number of good points.
virtual void _minMaxNpts(uInt64 &npts, CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride) const
Sometimes we want the min, max, and npts all in one scan.
virtual StatisticsAlgorithm< CASA_STATP > * clone() const
Clone this instance.
virtual const StatsData< AccumType > & _getStatsData() const
virtual AccumType getMedianAndQuantiles(std::map< Double, AccumType > &quantiles, const std::set< Double > &fractions, CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
If one needs to compute both the median and quantile values, it is better to call getMedianAndQuantil...
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude) const
void _doNptsMinMax(uInt64 &mynpts, AccumType &mymin, AccumType &mymax, CountedPtr< uInt64 > knownNpts, CountedPtr< AccumType > knownMin, CountedPtr< AccumType > knownMax)
for quantile computations, if necessary, determines npts, min, max to send to quantile calculator met...
virtual void _minMaxNpts(uInt64 &npts, CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride) const
virtual void _minMax(CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude) const
virtual void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride)
has weights, but no mask, no ranges
virtual void _minMax(CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, const WeightsIterator &weightBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride) const
virtual void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude)
virtual void _updateDataProviderMaxMin(const StatsData< AccumType > &threadStats)
virtual StatsData< AccumType > & _getStatsData()
Retrieve stats structure.
void _accumulate(StatsData< AccumType > &stats, const AccumType &datum, const AccumType &weight, const LocationType &location)
virtual void _minMax(CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude) const
virtual void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude)
virtual void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride)
virtual void setDataProvider(StatsDataProvider< CASA_STATP > *dataProvider)
An exception will be thrown if setCalculateAsAdded(True) has been called.
ClassicalStatistics(CountedPtr< ClassicalQuantileComputer< CASA_STATP > > qc)
This constructor should be used by derived objects in order to set the proper quantile computer objec...
virtual void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude)
virtual void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride)
no weights, no mask, no ranges
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude) const
virtual void _minMaxNpts(uInt64 &npts, CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude) const
void _doMinMax(AccumType &vmin, AccumType &vmax)
scan dataset(s) to find min and max
ClassicalStatistics(const ClassicalStatistics &cs)
copy semantics
CountedPtr< StatisticsAlgorithmQuantileComputer< CASA_STATP > > _getQuantileComputer()
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride) const
virtual void setStatsToCalculate(std::set< StatisticsData::STATS > &stats)
Provide guidance to algorithms by specifying a priori which statistics the caller would like calculat...
virtual void getMinMaxNpts(uInt64 &npts, AccumType &mymin, AccumType &mymax)
virtual AccumType getMedian(CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
In the following group of methods, if the size of the composite dataset is smaller than binningThresh...
void _computeStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 count, const ChunkType &chunk)
virtual void _minMaxNpts(uInt64 &npts, CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude) const
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude) const
virtual StatisticsData::ALGORITHM algorithm() const
get the algorithm that this object uses for computing stats
virtual std::map< Double, AccumType > getQuantiles(const std::set< Double > &fractions, CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
Get the specified quantiles.
void _computeNpts(uInt64 &npts, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount, const ChunkType &chunk)
virtual void _minMaxNpts(uInt64 &npts, CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude) const
virtual void setCalculateAsAdded(Bool c)
Should statistics be updated with calls to addData or should they only be calculated upon calls to ge...
void _addData()
Allows derived classes to do things after data is set or added.
virtual void _minMax(CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride) const
ClassicalStatistics & operator=(const ClassicalStatistics &other)
copy semantics
virtual void _minMax(CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride) const
virtual void getMinMax(AccumType &mymin, AccumType &mymax)
scan the dataset(s) that have been added, and find the min and max.
void _computeMinMaxNpts(uInt64 &npts, CountedPtr< AccumType > &mymax, CountedPtr< AccumType > &mymin, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount, const ChunkType &chunk)
StatsData< AccumType > _statsData
virtual void _minMaxNpts(uInt64 &npts, CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude) const
virtual void _minMax(CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude) const
uInt64 _doMinMaxNpts(AccumType &vmin, AccumType &vmax)
Referenced counted pointer for constant data.
Definition: CountedPtr.h:81
Base class of statistics algorithm class hierarchy.
ALGORITHM
implemented algorithms
Representation of a statistics dataset used in statistics framework calculatations.
const Double c
Fundamental physical constants (SI units):
this file contains all the compiler specific defines
Definition: mainpage.dox:28
const Bool False
Definition: aipstype.h:44
unsigned int uInt
Definition: aipstype.h:51
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
PtrHolder(const PtrHolder< T > &other)
const Bool True
Definition: aipstype.h:43
unsigned long long uInt64
Definition: aipsxtype.h:39
std::pair< Int64, Int64 > LocationType