casacore
Loading...
Searching...
No Matches
StatisticsUtilities.h
Go to the documentation of this file.
1//# Copyright (C) 2000,2001
2//# Associated Universities, Inc. Washington DC, USA.
3//#
4//# This library is free software; you can redistribute it and/or modify it
5//# under the terms of the GNU Library General Public License as published by
6//# the Free Software Foundation; either version 2 of the License, or (at your
7//# option) any later version.
8//#
9//# This library is distributed in the hope that it will be useful, but WITHOUT
10//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12//# License for more details.
13//#
14//# You should have received a copy of the GNU Library General Public License
15//# along with this library; if not, write to the Free Software Foundation,
16//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17//#
18//# Correspondence concerning AIPS++ should be addressed as follows:
19//# Internet email: casa-feedback@nrao.edu.
20//# Postal address: AIPS++ Project Office
21//# National Radio Astronomy Observatory
22//# 520 Edgemont Road
23//# Charlottesville, VA 22903-2475 USA
24//#
25
26#ifndef SCIMATH_STATISTICSUTILITIES_H
27#define SCIMATH_STATISTICSUTILITIES_H
28
29#include <casacore/casa/Exceptions/Error.h>
30#include <casacore/scimath/StatsFramework/StatisticsTypes.h>
31#include <casacore/scimath/StatsFramework/StatsHistogram.h>
32#include <casacore/casa/Utilities/DataType.h>
33#include <casacore/casa/aips.h>
34
35#include <iostream>
36#include <casacore/casa/iosfwd.h>
37
38namespace casacore {
39
40CASA_STATD class StatsDataProvider;
41
42// Various statistics related methods for the statistics framework.
43
44template <class AccumType> class StatisticsUtilities {
45public:
46
48
50
51 // <group>
52 // accumulate values. It is the responsibility of the caller to keep track
53 // of the accumulated values after each call. This class does not since it
54 // has no state. The accumulation derivation for mean and variance can be
55 // found at
56 // www.itl.nist.gov/div898/software/dataplot/refman2/ch2/weighvar.pdf
57 // nvariance is an accumulated value. It is related to the variance via
58 // variance = nvariance/npts or nvariance/(npts-1) depending on your
59 // preferred definition in the non-weighted case and
60 // wvariance = wnvariance/sumofweights or wnvariance/(sumofweights-1)
61 // in the weighted case Its basic definition is
62 // nvariance = sum((x_i - mean)**2),
63 // wnvariance = sum((weight_i*(x_i - mean)**2)
64 // npts is a Double rather than an Int64 because of compilation issues when
65 // T is a Complex
66 inline static void accumulate (
67 Double& npts, AccumType& sum, AccumType& mean, const AccumType& datum
68 );
69
70 // in order to optimize performance, no checking is done for the
71 // weight == 0 case callers should ensure that the weigth is not zero before
72 // calling this method, and shouldn't call this method if the weight is 0.
73 // Expect a segfault because of division by zero if sumweights and weight
74 // are both zero.
75 inline static void waccumulate (
76 Double& npts, AccumType& sumweights, AccumType& wsum, AccumType& wmean,
77 const AccumType& datum, const AccumType& weight
78 );
79
80 inline static void accumulate (
81 Double& npts, AccumType& sum, AccumType& mean, AccumType& nvariance,
82 AccumType& sumsq, const AccumType& datum
83 );
84
85 // wsumsq is the weighted sum of squares, sum(w_i*x_i*x_i)
86 inline static void waccumulate (
87 Double& npts, AccumType& sumweights, AccumType& wsum, AccumType& wmean,
88 AccumType& wnvariance, AccumType& wsumsq, const AccumType& datum,
89 const AccumType& weight
90 );
91 // </group>
92
93 // <group>
94 // The assignment operator of class LocationType should use copy, not
95 // reference, semantics.
96 template <class LocationType> inline static void accumulate (
97 Double& npts, AccumType& sum, AccumType& mean, AccumType& nvariance,
98 AccumType& sumsq, AccumType& datamin, AccumType& datamax,
99 LocationType& minpos, LocationType& maxpos, const AccumType& datum,
100 const LocationType& location
101 );
102
103 template <class LocationType, class DataType>
104 inline static void accumulate (
105 Double& npts, AccumType& sum, AccumType& mean, AccumType& nvariance,
106 AccumType& sumsq, DataType& datamin, DataType& datamax,
107 LocationType& minpos, LocationType& maxpos, const DataType& datum,
108 const LocationType& location
109 );
110
111 template <class LocationType>
112 inline static void waccumulate (
113 Double& npts, AccumType& sumofweights, AccumType& sum, AccumType& mean,
114 AccumType& nvariance, AccumType& sumsq, AccumType& datamin,
115 AccumType& datamax, LocationType& minpos, LocationType& maxpos,
116 const AccumType& datum, const AccumType& weight,
117 const LocationType& location
118 );
119 // </group>
120
121 // <group>
122 // return True if the max or min was updated, False otherwise.
123 template <class LocationType>
124 inline static Bool doMax(
125 AccumType& datamax, LocationType& maxpos, Bool isFirst,
126 const AccumType& datum, const LocationType& location
127 );
128
129 template <class LocationType>
130 inline static Bool doMin(
131 AccumType& datamin, LocationType& minpos, Bool isFirst,
132 const AccumType& datum, const LocationType& location
133 );
134 // </group>
135
136 // <group>
137 // These versions are for symmetric accumulation about a specified center
138 // point. The actual point is accumulated, as is a "virtual" point that is
139 // symmetric about the specified center. Of course, the trivial relationship
140 // that the mean is the specified center is used to simplify things
141 inline static void accumulateSym (
142 Double& npts, AccumType& nvariance, AccumType& sumsq,
143 const AccumType& datum, const AccumType& center
144 );
145
146 // wsumsq is the weighted sum of squares, sum(w_i*x_i*x_i)
147 inline static void waccumulateSym (
148 Double& npts, AccumType& sumweights, AccumType& wnvariance,
149 AccumType& wsumsq, const AccumType& datum, const AccumType& weight,
150 const AccumType& center
151 );
152
153 // <src>maxpos</src> and <src>minpos</src> refer to actual, not
154 // virtually created, data only.
155 template <class LocationType> inline static void accumulateSym (
156 Double& npts, AccumType& nvariance, AccumType& sumsq,
157 AccumType& datamin, AccumType& datamax, LocationType& minpos,
158 LocationType& maxpos, const AccumType& datum,
159 const LocationType& location, const AccumType& center
160 );
161
162 template <class LocationType> inline static void waccumulateSym (
163 Double& npts, AccumType& sumofweights, AccumType& nvariance,
164 AccumType& sumsq, AccumType& datamin, AccumType& datamax,
165 LocationType& minpos, LocationType& maxpos, const AccumType& datum,
166 const AccumType& weight, const LocationType& location,
167 const AccumType& center
168 );
169
170 // convert in place by taking the absolute value of the difference of the
171 // std::vector and the median
172 inline static void convertToAbsDevMedArray(
173 DataArray& myArray, AccumType median
174 );
175 // </group>
176
177 inline static Bool includeDatum(
178 const AccumType& datum, typename DataRanges::const_iterator beginRange,
179 typename DataRanges::const_iterator endRange, Bool isInclude
180 );
181
182
183 // The array can be changed by partially sorting it up to the largest index.
184 // Return a map of index to value in the sorted array.
185 static std::map<uInt64, AccumType> indicesToValues(
186 std::vector<AccumType>& myArray, const std::set<uInt64>& indices
187 );
188
189 static void mergeResults(
190 std::vector<BinCountArray>& bins,
191 std::vector<std::shared_ptr<AccumType>>& sameVal,
192 std::vector<Bool>& allSame,
193 const std::unique_ptr<std::vector<BinCountArray>[]>& tBins,
194 const std::unique_ptr<std::vector<std::shared_ptr<AccumType>>[]>& tSameVal,
195 const std::unique_ptr<std::vector<Bool>[]>& tAllSame, uInt nThreadsMax
196 );
197
198 // use two statistics sets to get the statistics set that would
199 // result in combining the two data sets used to produce the
200 // individual statistics sets. The quantile related stats are
201 // not considered, since it is not in general possible to determine
202 // the resultant quantiles from the information provided; only
203 // the aggregate statistics make sense.
205 const std::vector<StatsData<AccumType>>& stats
206 );
207
208 template <class DataIterator, class MaskIterator, class WeightsIterator>
210 const StatsDataProvider<CASA_STATP> *const dataProvider
211 );
212
213 static uInt threadIdx();
214
215private:
216
217 const static AccumType TWO;
218
219};
220
221}
222
223#ifndef CASACORE_NO_AUTO_TEMPLATES
224#include <casacore/scimath/StatsFramework/StatisticsUtilities.tcc>
225#endif //# CASACORE_NO_AUTO_TEMPLATES
226
227#endif
#define DataArray
Commonly used types in statistics framework.
#define CASA_STATD
because the template signature has become unwieldy
Various statistics related methods for the statistics framework.
static void accumulate(Double &npts, AccumType &sum, AccumType &mean, const AccumType &datum)
accumulate values.
static void mergeResults(std::vector< BinCountArray > &bins, std::vector< std::shared_ptr< AccumType > > &sameVal, std::vector< Bool > &allSame, const std::unique_ptr< std::vector< BinCountArray >[]> &tBins, const std::unique_ptr< std::vector< std::shared_ptr< AccumType > >[]> &tSameVal, const std::unique_ptr< std::vector< Bool >[]> &tAllSame, uInt nThreadsMax)
static uInt nThreadsMax(const StatsDataProvider< CASA_STATP > *const dataProvider)
static Bool doMax(AccumType &datamax, LocationType &maxpos, Bool isFirst, const AccumType &datum, const LocationType &location)
return True if the max or min was updated, False otherwise.
static void accumulateSym(Double &npts, AccumType &nvariance, AccumType &sumsq, const AccumType &datum, const AccumType &center)
These versions are for symmetric accumulation about a specified center point.
static void waccumulateSym(Double &npts, AccumType &sumweights, AccumType &wnvariance, AccumType &wsumsq, const AccumType &datum, const AccumType &weight, const AccumType &center)
wsumsq is the weighted sum of squares, sum(w_i*x_i*x_i)
static StatsData< AccumType > combine(const std::vector< StatsData< AccumType > > &stats)
use two statistics sets to get the statistics set that would result in combining the two data sets us...
static void accumulateSym(Double &npts, AccumType &nvariance, AccumType &sumsq, AccumType &datamin, AccumType &datamax, LocationType &minpos, LocationType &maxpos, const AccumType &datum, const LocationType &location, const AccumType &center)
maxpos and minpos refer to actual, not virtually created, data only.
static void waccumulate(Double &npts, AccumType &sumofweights, AccumType &sum, AccumType &mean, AccumType &nvariance, AccumType &sumsq, AccumType &datamin, AccumType &datamax, LocationType &minpos, LocationType &maxpos, const AccumType &datum, const AccumType &weight, const LocationType &location)
static void waccumulate(Double &npts, AccumType &sumweights, AccumType &wsum, AccumType &wmean, AccumType &wnvariance, AccumType &wsumsq, const AccumType &datum, const AccumType &weight)
wsumsq is the weighted sum of squares, sum(w_i*x_i*x_i)
static void waccumulateSym(Double &npts, AccumType &sumofweights, AccumType &nvariance, AccumType &sumsq, AccumType &datamin, AccumType &datamax, LocationType &minpos, LocationType &maxpos, const AccumType &datum, const AccumType &weight, const LocationType &location, const AccumType &center)
static std::map< uInt64, AccumType > indicesToValues(std::vector< AccumType > &myArray, const std::set< uInt64 > &indices)
The array can be changed by partially sorting it up to the largest index.
static void convertToAbsDevMedArray(DataArray &myArray, AccumType median)
convert in place by taking the absolute value of the difference of the std::vector and the median
static void accumulate(Double &npts, AccumType &sum, AccumType &mean, AccumType &nvariance, AccumType &sumsq, const AccumType &datum)
static Bool includeDatum(const AccumType &datum, typename DataRanges::const_iterator beginRange, typename DataRanges::const_iterator endRange, Bool isInclude)
static void waccumulate(Double &npts, AccumType &sumweights, AccumType &wsum, AccumType &wmean, const AccumType &datum, const AccumType &weight)
in order to optimize performance, no checking is done for the weight == 0 case callers should ensure ...
static void accumulate(Double &npts, AccumType &sum, AccumType &mean, AccumType &nvariance, AccumType &sumsq, DataType &datamin, DataType &datamax, LocationType &minpos, LocationType &maxpos, const DataType &datum, const LocationType &location)
static void accumulate(Double &npts, AccumType &sum, AccumType &mean, AccumType &nvariance, AccumType &sumsq, AccumType &datamin, AccumType &datamax, LocationType &minpos, LocationType &maxpos, const AccumType &datum, const LocationType &location)
The assignment operator of class LocationType should use copy, not reference, semantics.
static Bool doMin(AccumType &datamin, LocationType &minpos, Bool isFirst, const AccumType &datum, const LocationType &location)
Abstract base class which defines interface for providing "datasets" to the statistics framework in c...
this file contains all the compiler specific defines
Definition mainpage.dox:28
LatticeExprNode mean(const LatticeExprNode &expr)
LatticeExprNode sum(const LatticeExprNode &expr)
unsigned int uInt
Definition aipstype.h:49
bool Bool
Define the standard types used by Casacore.
Definition aipstype.h:40
double Double
Definition aipstype.h:53
LatticeExprNode median(const LatticeExprNode &expr)
std::pair< Int64, Int64 > LocationType