casacore
Loading...
Searching...
No Matches
FitToHalfStatistics.h
Go to the documentation of this file.
1//# Copyright (C) 2000,2001
2//# Associated Universities, Inc. Washington DC, USA.
3//#
4//# This library is free software; you can redistribute it and/or modify it
5//# under the terms of the GNU Library General Public License as published by
6//# the Free Software Foundation; either version 2 of the License, or (at your
7//# option) any later version.
8//#
9//# This library is distributed in the hope that it will be useful, but WITHOUT
10//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12//# License for more details.
13//#
14//# You should have received a copy of the GNU Library General Public License
15//# along with this library; if not, write to the Free Software Foundation,
16//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17//#
18//# Correspondence concerning AIPS++ should be addressed as follows:
19//# Internet email: casa-feedback@nrao.edu.
20//# Postal address: AIPS++ Project Office
21//# National Radio Astronomy Observatory
22//# 520 Edgemont Road
23//# Charlottesville, VA 22903-2475 USA
24//#
25
26#ifndef SCIMATH_FITTOHALFSTATISTICS_H
27#define SCIMATH_FITTOHALFSTATISTICS_H
28
29#include <casacore/casa/aips.h>
30
31#include <casacore/scimath/StatsFramework/ConstrainedRangeStatistics.h>
32#include <casacore/scimath/StatsFramework/FitToHalfStatisticsData.h>
33
34namespace casacore {
35
36// Class to calculate statistics using the so-called fit to half algorithm. In
37// this algorithm, a center value is specified, and only points greater or equal
38// or less or equal this value are included. Furthermore, each of the included
39// points is reflected about the center value, and these virtual points are
40// added to the included points and the union of sets of included real points
41// and virtual points are used for computing statistics. The specified center
42// point is therefore the mean and median of the resulting distribution, and the
43// total number of points is exactly twice the number of real data points that
44// are included.
45//
46// This class uses a ConstrainedRangeQuantileComputer object for computing
47// quantile-like statistics. See class documentation for StatisticsAlgorithm for
48// details regarding QuantileComputer classes.
49
50template <
51 class AccumType, class DataIterator, class MaskIterator=const Bool *,
52 class WeightsIterator=DataIterator
53>
55 : public ConstrainedRangeStatistics<CASA_STATP> {
56public:
57
58 const static AccumType TWO;
59
60 // <src>value</src> is only used if <src>center</src>=CVALUE
65 );
66
67 // copy semantics
69
71
72 // copy semantics
75 );
76
77 // Clone this instance. Caller is responsible for deleting the returned
78 // pointer.
80
81 // get the algorithm that this object uses for computing stats
85
86 // The median is just the center value, so none of the parameters to this
87 // method are used.
88 AccumType getMedian(
89 std::shared_ptr<uInt64> knownNpts=nullptr,
90 std::shared_ptr<AccumType> knownMin=nullptr,
91 std::shared_ptr<AccumType> knownMax=nullptr,
92 uInt binningThreshholdSizeBytes=4096*4096,
93 Bool persistSortedArray=False, uInt nBins=10000
94 );
95
96 // <group>
97 // In the following group of methods, if the size of the composite dataset
98 // is smaller than <src>binningThreshholdSizeBytes</src>, the composite
99 // dataset will be (perhaps partially) sorted and persisted in memory during
100 // the call. In that case, and if <src>persistSortedArray</src> is True,
101 // this sorted array will remain in memory after the call and will be used
102 // on subsequent calls of this method when
103 // <src>binningThreshholdSizeBytes</src> is greater than the size of the
104 // composite dataset. If <src>persistSortedArray</src> is False, the sorted
105 // array will not be stored after this call completes and so any subsequent
106 // calls for which the dataset size is less than
107 // <src>binningThreshholdSizeBytes</src>, the dataset will be sorted from
108 // scratch. Values which are not included due to non-unity strides, are not
109 // included in any specified ranges, are masked, or have associated weights
110 // of zero are not considered as dataset members for quantile computations.
111 // If one has a priori information regarding the number of points (npts)
112 // and/or the minimum and maximum values of the data set, these can be
113 // supplied to improve performance. Note however, that if these values are
114 // not correct, the resulting median and/or quantile values will also not be
115 // correct (although see the following notes regarding max/min). Note that
116 // if this object has already had getStatistics() called, and the min and
117 // max were calculated, there is no need to pass these values in as they
118 // have been stored internally and will be used (although passing them
119 // explicitly shouldn't hurt anything). If provided, npts, the number of
120 // points falling in the specified ranges which are not masked and have
121 // weights > 0, should be correct. <src>min</src> can be less than the true
122 // minimum, and <src>max</src> can be greater than the True maximum, but for
123 // best performance, these should be as close to the actual min and max as
124 // possible (and ideally the actual min/max values of the data set).
126 std::map<Double, AccumType>& quantiles,
127 const std::set<Double>& fractions,
128 std::shared_ptr<uInt64> knownNpts=nullptr,
129 std::shared_ptr<AccumType> knownMin=nullptr,
130 std::shared_ptr<AccumType> knownMax=nullptr,
131 uInt binningThreshholdSizeBytes=4096*4096,
132 Bool persistSortedArray=False, uInt nBins=10000
133 );
134
135 // get the median of the absolute deviation about the median of the data.
137 std::shared_ptr<uInt64> knownNpts=nullptr,
138 std::shared_ptr<AccumType> knownMin=nullptr,
139 std::shared_ptr<AccumType> knownMax=nullptr,
140 uInt binningThreshholdSizeBytes=4096*4096,
141 Bool persistSortedArray=False, uInt nBins=10000
142 );
143
144 // Get the specified quantiles. <src>fractions</src> must be between 0 and
145 // 1, noninclusive.
146 std::map<Double, AccumType> getQuantiles(
147 const std::set<Double>& fractions,
148 std::shared_ptr<uInt64> knownNpts=nullptr,
149 std::shared_ptr<AccumType> knownMin=nullptr,
150 std::shared_ptr<AccumType> knownMax=nullptr,
151 uInt binningThreshholdSizeBytes=4096*4096,
152 Bool persistSortedArray=False, uInt nBins=10000
153 );
154 // </group>
155
156 // scan the dataset(s) that have been added, and find the min and max.
157 // This method may be called even if setStatsToCaclulate has been called and
158 // MAX and MIN has been excluded.
159 virtual void getMinMax(AccumType& mymin, AccumType& mymax);
160
161 // scan the dataset(s) that have been added, and find the number of good
162 // points. This method may be called even if setStatsToCaclulate has been
163 // called and NPTS has been excluded. If setCalculateAsAdded(True) has
164 // previously been called after this object has been (re)initialized, an
165 // exception will be thrown.
167
168 // reset object to initial state. Clears all private fields including data,
169 // accumulators, global range. It does not affect the center type, center
170 // value, or which "side" to use which were set at construction.
171 virtual void reset();
172
173 // This class does not allow statistics to be calculated as datasets are
174 // added, so an exception will be thrown if <src>c</src> is True.
176
177 // Override base class method by requiring mean to be computed in addition
178 // to what is added in stats if the requested center value is CMEAN.
179 virtual void setStatsToCalculate(std::set<StatisticsData::STATS>& stats);
180
181protected:
182
184
186
188
189 inline const StatsData<AccumType>& _getStatsData() const {
190 return _statsData;
191 }
192
193 // <group>
194 // no weights, no mask, no ranges
196 StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
197 const DataIterator& dataBegin, uInt64 nr, uInt dataStride
198 );
199
200 // no weights, no mask
202 StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
203 const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
204 const DataRanges& ranges, Bool isInclude
205 );
206
208 StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
209 const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
210 const MaskIterator& maskBegin, uInt maskStride
211 );
212
214 StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
215 const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
216 const MaskIterator& maskBegin, uInt maskStride,
217 const DataRanges& ranges, Bool isInclude
218 );
219 // </group>
220
222
223 // <group>
224 // has weights, but no mask, no ranges
226 StatsData<AccumType>& stats, LocationType& location,
227 const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
228 uInt64 nr, uInt dataStride
229 );
230
232 StatsData<AccumType>& stats, LocationType& location,
233 const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
234 uInt64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
235 );
236
238 StatsData<AccumType>& stats, LocationType& location,
239 const DataIterator& dataBegin, const WeightsIterator& weightBegin,
240 uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
241 uInt maskStride
242 );
243
245 StatsData<AccumType>& stats, LocationType& location,
246 const DataIterator& dataBegin, const WeightsIterator& weightBegin,
247 uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
248 uInt maskStride, const DataRanges& ranges, Bool isInclude
249 );
250 // </group>
251
252private:
255 AccumType _centerValue;
258 // these are the max and min for the real portion of the dataset
259 std::shared_ptr<AccumType> _realMax{}, _realMin{};
261 // This is used for convenience and performance. It should always
262 // be the same as the _range value used in the base
263 // ConstrainedRangeStatistics object
264 std::shared_ptr<std::pair<AccumType, AccumType>> _range;
265
266 // get the min max of the entire (real + virtual) data set. Only
267 // used for quantile computation
269 std::shared_ptr<AccumType>& realMin, std::shared_ptr<AccumType>& realMax,
270 std::shared_ptr<AccumType> knownMin, std::shared_ptr<AccumType> knownMax
271 );
272
273 // get the min/max of the real portion only of the dataset
274 void _getRealMinMax(AccumType& realMin, AccumType& realMax);
275
276 void _setRange();
277
278};
279
280}
281
282#ifndef CASACORE_NO_AUTO_TEMPLATES
283#include <casacore/scimath/StatsFramework/FitToHalfStatistics.tcc>
284#endif
285
286#endif
#define DataRanges
Abstract base class for statistics algorithms which are characterized by a range of good values.
USE_DATA
which section of data to use, greater than or less than the center value
CENTER
choice of center point based on the corresponding statistics from the entire distribution of data,...
Class to calculate statistics using the so-called fit to half algorithm.
virtual StatsData< AccumType > _getStatistics()
void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude)
void _updateDataProviderMaxMin(const StatsData< AccumType > &threadStats)
virtual void getMinMax(AccumType &mymin, AccumType &mymax)
scan the dataset(s) that have been added, and find the min and max.
FitToHalfStatistics< CASA_STATP > & operator=(const FitToHalfStatistics< CASA_STATP > &other)
copy semantics
FitToHalfStatistics(const FitToHalfStatistics< CASA_STATP > &other)
copy semantics
virtual StatsData< AccumType > _getInitialStats() const
const StatsData< AccumType > & _getStatsData() const
AccumType getMedianAndQuantiles(std::map< Double, AccumType > &quantiles, const std::set< Double > &fractions, std::shared_ptr< uInt64 > knownNpts=nullptr, std::shared_ptr< AccumType > knownMin=nullptr, std::shared_ptr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
In the following group of methods, if the size of the composite dataset is smaller than binningThresh...
StatsData< AccumType > & _getStatsData()
Retrieve stats structure.
FitToHalfStatisticsData::CENTER _centerType
virtual StatisticsAlgorithm< CASA_STATP > * clone() const
Clone this instance.
AccumType getMedianAbsDevMed(std::shared_ptr< uInt64 > knownNpts=nullptr, std::shared_ptr< AccumType > knownMin=nullptr, std::shared_ptr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
get the median of the absolute deviation about the median of the data.
std::shared_ptr< AccumType > _realMin
void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude)
void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude)
void _setRange()
derived classes need to implement how to set their respective range
FitToHalfStatistics(FitToHalfStatisticsData::CENTER center=FitToHalfStatisticsData::CMEAN, FitToHalfStatisticsData::USE_DATA useData=FitToHalfStatisticsData::LE_CENTER, AccumType value=0)
value is only used if center=CVALUE
void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude)
no weights, no mask
std::shared_ptr< AccumType > _realMax
these are the max and min for the real portion of the dataset
void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride)
no weights, no mask, no ranges
virtual void setStatsToCalculate(std::set< StatisticsData::STATS > &stats)
Override base class method by requiring mean to be computed in addition to what is added in stats if ...
AccumType getMedian(std::shared_ptr< uInt64 > knownNpts=nullptr, std::shared_ptr< AccumType > knownMin=nullptr, std::shared_ptr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
The median is just the center value, so none of the parameters to this method are used.
std::shared_ptr< std::pair< AccumType, AccumType > > _range
This is used for convenience and performance.
StatsData< AccumType > _statsData
virtual StatisticsData::ALGORITHM algorithm() const
get the algorithm that this object uses for computing stats
void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride)
void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride)
uInt64 getNPts()
scan the dataset(s) that have been added, and find the number of good points.
void _getMinMax(std::shared_ptr< AccumType > &realMin, std::shared_ptr< AccumType > &realMax, std::shared_ptr< AccumType > knownMin, std::shared_ptr< AccumType > knownMax)
get the min max of the entire (real + virtual) data set.
void setCalculateAsAdded(Bool c)
This class does not allow statistics to be calculated as datasets are added, so an exception will be ...
std::map< Double, AccumType > getQuantiles(const std::set< Double > &fractions, std::shared_ptr< uInt64 > knownNpts=nullptr, std::shared_ptr< AccumType > knownMin=nullptr, std::shared_ptr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
Get the specified quantiles.
virtual void reset()
reset object to initial state.
void _getRealMinMax(AccumType &realMin, AccumType &realMax)
get the min/max of the real portion only of the dataset
void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride)
has weights, but no mask, no ranges
Base class of statistics algorithm class hierarchy.
ALGORITHM
implemented algorithms
this file contains all the compiler specific defines
Definition mainpage.dox:28
const Bool False
Definition aipstype.h:42
unsigned int uInt
Definition aipstype.h:49
bool Bool
Define the standard types used by Casacore.
Definition aipstype.h:40
LatticeExprNode value(const LatticeExprNode &expr)
This function returns the value of the expression without a mask.
unsigned long long uInt64
Definition aipsxtype.h:37
std::pair< Int64, Int64 > LocationType