casacore
StatisticsUtilities.h
Go to the documentation of this file.
1 //# Copyright (C) 2000,2001
2 //# Associated Universities, Inc. Washington DC, USA.
3 //#
4 //# This library is free software; you can redistribute it and/or modify it
5 //# under the terms of the GNU Library General Public License as published by
6 //# the Free Software Foundation; either version 2 of the License, or (at your
7 //# option) any later version.
8 //#
9 //# This library is distributed in the hope that it will be useful, but WITHOUT
10 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12 //# License for more details.
13 //#
14 //# You should have received a copy of the GNU Library General Public License
15 //# along with this library; if not, write to the Free Software Foundation,
16 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17 //#
18 //# Correspondence concerning AIPS++ should be addressed as follows:
19 //# Internet email: aips2-request@nrao.edu.
20 //# Postal address: AIPS++ Project Office
21 //# National Radio Astronomy Observatory
22 //# 520 Edgemont Road
23 //# Charlottesville, VA 22903-2475 USA
24 //#
25 
26 #ifndef SCIMATH_STATISTICSUTILITIES_H
27 #define SCIMATH_STATISTICSUTILITIES_H
28 
29 #include <casacore/casa/Exceptions/Error.h>
30 #include <casacore/scimath/StatsFramework/StatisticsTypes.h>
31 #include <casacore/scimath/StatsFramework/StatsHistogram.h>
32 #include <casacore/casa/Utilities/DataType.h>
33 #include <casacore/casa/aips.h>
34 
35 #include <iostream>
36 #include <casacore/casa/iosfwd.h>
37 
38 namespace casacore {
39 
40 template <class T> class PtrHolder;
41 
42 CASA_STATD class StatsDataProvider;
43 
44 // Various statistics related methods for the statistics framework.
45 
46 template <class AccumType> class StatisticsUtilities {
47 public:
48 
49  StatisticsUtilities() = delete;
50 
52 
53  // <group>
54  // accumulate values. It is the responsibility of the caller to keep track
55  // of the accumulated values after each call. This class does not since it
56  // has no state. The accumulation derivation for mean and variance can be
57  // found at
58  // www.itl.nist.gov/div898/software/dataplot/refman2/ch2/weighvar.pdf
59  // nvariance is an accumulated value. It is related to the variance via
60  // variance = nvariance/npts or nvariance/(npts-1) depending on your
61  // preferred definition in the non-weighted case and
62  // wvariance = wnvariance/sumofweights or wnvariance/(sumofweights-1)
63  // in the weighted case Its basic definition is
64  // nvariance = sum((x_i - mean)**2),
65  // wnvariance = sum((weight_i*(x_i - mean)**2)
66  // npts is a Double rather than an Int64 because of compilation issues when
67  // T is a Complex
68  inline static void accumulate (
69  Double& npts, AccumType& sum, AccumType& mean, const AccumType& datum
70  );
71 
72  // in order to optimize performance, no checking is done for the
73  // weight == 0 case callers should ensure that the weigth is not zero before
74  // calling this method, and shouldn't call this method if the weight is 0.
75  // Expect a segfault because of division by zero if sumweights and weight
76  // are both zero.
77  inline static void waccumulate (
78  Double& npts, AccumType& sumweights, AccumType& wsum, AccumType& wmean,
79  const AccumType& datum, const AccumType& weight
80  );
81 
82  inline static void accumulate (
83  Double& npts, AccumType& sum, AccumType& mean, AccumType& nvariance,
84  AccumType& sumsq, const AccumType& datum
85  );
86 
87  // wsumsq is the weighted sum of squares, sum(w_i*x_i*x_i)
88  inline static void waccumulate (
89  Double& npts, AccumType& sumweights, AccumType& wsum, AccumType& wmean,
90  AccumType& wnvariance, AccumType& wsumsq, const AccumType& datum,
91  const AccumType& weight
92  );
93  // </group>
94 
95  // <group>
96  // The assignment operator of class LocationType should use copy, not
97  // reference, semantics.
98  template <class LocationType> inline static void accumulate (
99  Double& npts, AccumType& sum, AccumType& mean, AccumType& nvariance,
100  AccumType& sumsq, AccumType& datamin, AccumType& datamax,
101  LocationType& minpos, LocationType& maxpos, const AccumType& datum,
102  const LocationType& location
103  );
104 
105  template <class LocationType, class DataType>
106  inline static void accumulate (
107  Double& npts, AccumType& sum, AccumType& mean, AccumType& nvariance,
108  AccumType& sumsq, DataType& datamin, DataType& datamax,
109  LocationType& minpos, LocationType& maxpos, const DataType& datum,
110  const LocationType& location
111  );
112 
113  template <class LocationType>
114  inline static void waccumulate (
115  Double& npts, AccumType& sumofweights, AccumType& sum, AccumType& mean,
116  AccumType& nvariance, AccumType& sumsq, AccumType& datamin,
117  AccumType& datamax, LocationType& minpos, LocationType& maxpos,
118  const AccumType& datum, const AccumType& weight,
119  const LocationType& location
120  );
121  // </group>
122 
123  // <group>
124  // return True if the max or min was updated, False otherwise.
125  template <class LocationType>
126  inline static Bool doMax(
127  AccumType& datamax, LocationType& maxpos, Bool isFirst,
128  const AccumType& datum, const LocationType& location
129  );
130 
131  template <class LocationType>
132  inline static Bool doMin(
133  AccumType& datamin, LocationType& minpos, Bool isFirst,
134  const AccumType& datum, const LocationType& location
135  );
136  // </group>
137 
138  // <group>
139  // These versions are for symmetric accumulation about a specified center
140  // point. The actual point is accumulated, as is a "virtual" point that is
141  // symmetric about the specified center. Of course, the trivial relationship
142  // that the mean is the specified center is used to simplify things
143  inline static void accumulateSym (
144  Double& npts, AccumType& nvariance, AccumType& sumsq,
145  const AccumType& datum, const AccumType& center
146  );
147 
148  // wsumsq is the weighted sum of squares, sum(w_i*x_i*x_i)
149  inline static void waccumulateSym (
150  Double& npts, AccumType& sumweights, AccumType& wnvariance,
151  AccumType& wsumsq, const AccumType& datum, const AccumType& weight,
152  const AccumType& center
153  );
154 
155  // <src>maxpos</src> and <src>minpos</src> refer to actual, not
156  // virtually created, data only.
157  template <class LocationType> inline static void accumulateSym (
158  Double& npts, AccumType& nvariance, AccumType& sumsq,
159  AccumType& datamin, AccumType& datamax, LocationType& minpos,
160  LocationType& maxpos, const AccumType& datum,
161  const LocationType& location, const AccumType& center
162  );
163 
164  template <class LocationType> inline static void waccumulateSym (
165  Double& npts, AccumType& sumofweights, AccumType& nvariance,
166  AccumType& sumsq, AccumType& datamin, AccumType& datamax,
167  LocationType& minpos, LocationType& maxpos, const AccumType& datum,
168  const AccumType& weight, const LocationType& location,
169  const AccumType& center
170  );
171 
172  // convert in place by taking the absolute value of the difference of the
173  // std::vector and the median
174  inline static void convertToAbsDevMedArray(
175  DataArray& myArray, AccumType median
176  );
177  // </group>
178 
179  inline static Bool includeDatum(
180  const AccumType& datum, typename DataRanges::const_iterator beginRange,
181  typename DataRanges::const_iterator endRange, Bool isInclude
182  );
183 
184 
185  // The array can be changed by partially sorting it up to the largest index.
186  // Return a map of index to value in the sorted array.
187  static std::map<uInt64, AccumType> indicesToValues(
188  std::vector<AccumType>& myArray, const std::set<uInt64>& indices
189  );
190 
191  static void mergeResults(
192  std::vector<BinCountArray>& bins,
193  std::vector<CountedPtr<AccumType> >& sameVal,
194  std::vector<Bool>& allSame,
195  const PtrHolder<std::vector<BinCountArray>>& tBins,
196  const PtrHolder<std::vector<CountedPtr<AccumType>>>& tSameVal,
197  const PtrHolder<std::vector<Bool>>& tAllSame, uInt nThreadsMax
198  );
199 
200  // use two statistics sets to get the statistics set that would
201  // result in combining the two data sets used to produce the
202  // individual statistics sets. The quantile related stats are
203  // not considered, since it is not in general possible to determine
204  // the resultant quantiles from the information provided; only
205  // the aggregate statistics make sense.
207  const std::vector<StatsData<AccumType>>& stats
208  );
209 
210  template <class DataIterator, class MaskIterator, class WeightsIterator>
212  const StatsDataProvider<CASA_STATP> *const dataProvider
213  );
214 
215  static uInt threadIdx();
216 
217 private:
218 
219  const static AccumType TWO;
220 
221 };
222 
223 }
224 
225 #ifndef CASACORE_NO_AUTO_TEMPLATES
226 #include <casacore/scimath/StatsFramework/StatisticsUtilities.tcc>
227 #endif //# CASACORE_NO_AUTO_TEMPLATES
228 
229 #endif
#define DataArray
Commonly used types in statistics framework.
#define CASA_STATD
because the template signature has become unwieldy
Various statistics related methods for the statistics framework.
static void accumulate(Double &npts, AccumType &sum, AccumType &mean, const AccumType &datum)
accumulate values.
static std::map< uInt64, AccumType > indicesToValues(std::vector< AccumType > &myArray, const std::set< uInt64 > &indices)
The array can be changed by partially sorting it up to the largest index.
static uInt nThreadsMax(const StatsDataProvider< CASA_STATP > *const dataProvider)
static Bool doMax(AccumType &datamax, LocationType &maxpos, Bool isFirst, const AccumType &datum, const LocationType &location)
return True if the max or min was updated, False otherwise.
static void mergeResults(std::vector< BinCountArray > &bins, std::vector< CountedPtr< AccumType > > &sameVal, std::vector< Bool > &allSame, const PtrHolder< std::vector< BinCountArray >> &tBins, const PtrHolder< std::vector< CountedPtr< AccumType >>> &tSameVal, const PtrHolder< std::vector< Bool >> &tAllSame, uInt nThreadsMax)
static void accumulateSym(Double &npts, AccumType &nvariance, AccumType &sumsq, const AccumType &datum, const AccumType &center)
These versions are for symmetric accumulation about a specified center point.
static void waccumulateSym(Double &npts, AccumType &sumweights, AccumType &wnvariance, AccumType &wsumsq, const AccumType &datum, const AccumType &weight, const AccumType &center)
wsumsq is the weighted sum of squares, sum(w_i*x_i*x_i)
static void accumulateSym(Double &npts, AccumType &nvariance, AccumType &sumsq, AccumType &datamin, AccumType &datamax, LocationType &minpos, LocationType &maxpos, const AccumType &datum, const LocationType &location, const AccumType &center)
maxpos and minpos refer to actual, not virtually created, data only.
static StatsData< AccumType > combine(const std::vector< StatsData< AccumType >> &stats)
use two statistics sets to get the statistics set that would result in combining the two data sets us...
static void waccumulate(Double &npts, AccumType &sumofweights, AccumType &sum, AccumType &mean, AccumType &nvariance, AccumType &sumsq, AccumType &datamin, AccumType &datamax, LocationType &minpos, LocationType &maxpos, const AccumType &datum, const AccumType &weight, const LocationType &location)
static void waccumulate(Double &npts, AccumType &sumweights, AccumType &wsum, AccumType &wmean, AccumType &wnvariance, AccumType &wsumsq, const AccumType &datum, const AccumType &weight)
wsumsq is the weighted sum of squares, sum(w_i*x_i*x_i)
static void waccumulateSym(Double &npts, AccumType &sumofweights, AccumType &nvariance, AccumType &sumsq, AccumType &datamin, AccumType &datamax, LocationType &minpos, LocationType &maxpos, const AccumType &datum, const AccumType &weight, const LocationType &location, const AccumType &center)
static void convertToAbsDevMedArray(DataArray &myArray, AccumType median)
convert in place by taking the absolute value of the difference of the std::vector and the median
static void accumulate(Double &npts, AccumType &sum, AccumType &mean, AccumType &nvariance, AccumType &sumsq, const AccumType &datum)
static Bool includeDatum(const AccumType &datum, typename DataRanges::const_iterator beginRange, typename DataRanges::const_iterator endRange, Bool isInclude)
static void waccumulate(Double &npts, AccumType &sumweights, AccumType &wsum, AccumType &wmean, const AccumType &datum, const AccumType &weight)
in order to optimize performance, no checking is done for the weight == 0 case callers should ensure ...
static void accumulate(Double &npts, AccumType &sum, AccumType &mean, AccumType &nvariance, AccumType &sumsq, DataType &datamin, DataType &datamax, LocationType &minpos, LocationType &maxpos, const DataType &datum, const LocationType &location)
static void accumulate(Double &npts, AccumType &sum, AccumType &mean, AccumType &nvariance, AccumType &sumsq, AccumType &datamin, AccumType &datamax, LocationType &minpos, LocationType &maxpos, const AccumType &datum, const LocationType &location)
The assignment operator of class LocationType should use copy, not reference, semantics.
static Bool doMin(AccumType &datamin, LocationType &minpos, Bool isFirst, const AccumType &datum, const LocationType &location)
this file contains all the compiler specific defines
Definition: mainpage.dox:28
LatticeExprNode mean(const LatticeExprNode &expr)
LatticeExprNode sum(const LatticeExprNode &expr)
unsigned int uInt
Definition: aipstype.h:51
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
PtrHolder(const PtrHolder< T > &other)
double Double
Definition: aipstype.h:55
LatticeExprNode median(const LatticeExprNode &expr)
std::pair< Int64, Int64 > LocationType