casacore
Loading...
Searching...
No Matches
SparseDiff.h
Go to the documentation of this file.
1//# SparseDiff.h: An automatic differentiating class for functions
2//# Copyright (C) 2007,2008
3//# Associated Universities, Inc. Washington DC, USA.
4//#
5//# This library is free software; you can redistribute it and/or modify it
6//# under the terms of the GNU Library General Public License as published by
7//# the Free Software Foundation; either version 2 of the License, or (at your
8//# option) any later version.
9//#
10//# This library is distributed in the hope that it will be useful, but WITHOUT
11//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13//# License for more details.
14//#
15//# You should have received a copy of the GNU Library General Public License
16//# along with this library; if not, write to the Free Software Foundation,
17//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18//#
19//# Correspondence concerning AIPS++ should be addressed as follows:
20//# Internet email: casa-feedback@nrao.edu.
21//# Postal address: AIPS++ Project Office
22//# National Radio Astronomy Observatory
23//# 520 Edgemont Road
24//# Charlottesville, VA 22903-2475 USA
25
26#ifndef SCIMATH_SPARSEDIFF_H
27#define SCIMATH_SPARSEDIFF_H
28
29//# Includes
30#include <casacore/casa/aips.h>
31#include <casacore/scimath/Mathematics/AutoDiff.h>
32#include <casacore/scimath/Mathematics/SparseDiffRep.h>
33#include <casacore/casa/vector.h>
34#include <utility>
35
36// Using
37using std::pair;
38
39namespace casacore { //# NAMESPACE CASACORE - BEGIN
40
41 //# Forward declarations
42 template <class T> class SparseDiff;
43
44 // <summary>
45 // Class that computes partial derivatives by automatic differentiation.
46 // </summary>
47 //
48 // <use visibility=export>
49 //
50 // <reviewed reviewer="UNKNOWN" date="" tests="tSparseDiff.cc" demos="dSparseDiff.cc">
51 // </reviewed>
52 //
53 // <prerequisite>
54 // <li> <linkto class=AutoDiff>AutoDiff</linkto> class
55 // </prerequisite>
56 //
57 // <etymology>
58 // Class that computes partial derivatives for some parameters by automatic
59 // differentiation, thus SparseDiff.
60 // </etymology>
61 //
62 // <synopsis>
63 // Class that computes partial derivatives by automatic differentiation.
64 // It does this by storing the value of a function and the values of its first
65 // derivatives with respect to some of its independent parameters.
66 // When a mathematical
67 // operation is applied to a SparseDiff object, the derivative values of the
68 // resulting new object are computed according to the chain rules
69 // of differentiation. SparseDiff operates like the
70 // <linkto class=AutoDiff>AutoDiff</linkto> class, but only determines the
71 // derivatives with respect to the actual dependent variables.
72 //
73 // Suppose we have a function f(x0,x1,...,xn) and its differential is
74 // <srcblock>
75 // df = (df/dx0)*dx0 + (df/dx1)*dx1 + ... + (df/dxn)*dxn
76 // </srcblock>
77 // We can build a class that has the value of the function,
78 // f(x0,x1,...,xn), and the values of the derivatives, (df/dx0), (df/dx1),
79 // ..., (df/dxn) at (x0,x1,...,xn), as class members.
80 //
81 // Now if we have another function, g(x0,x1,...,xn) and its differential is
82 // dg = (dg/dx0)*dx0 + (dg/dx1)*dx1 + ... + (dg/dxn)*dxn,
83 // since
84 // <srcblock>
85 // d(f+g) = df + dg,
86 // d(f*g) = g*df + f*dg,
87 // d(f/g) = df/g - fdg/g^2,
88 // dsin(f) = cos(f)df,
89 // ...,
90 // </srcblock>
91 // we can calculate
92 // <srcblock>
93 // d(f+g), d(f*g), ...,
94 // </srcblock> based on our information on
95 // <srcblock>
96 // df/dx0, df/dx1, ..., dg/dx0, dg/dx1, ..., dg/dxn.
97 // </srcblock>
98 // All we need to do is to define the operators and derivatives of common
99 // mathematical functions.
100 //
101 // To be able to use the class as an automatic differentiator of a function
102 // object, we need a templated function object, i.e. an object with:
103 // <ul>
104 // <li> a <src> template <class T> T operator()(const T)</src>
105 // <li> or multiple variable input like:
106 // <src> template <class T> T operator()(const Vector<T> &)</src>
107 // <li> all dependent variables used in the calculation of the function
108 // value should have been typed with T.
109 // </ul>
110 // A simple example of such a function object could be:
111 // <srcblock>
112 // template <class T> f {
113 // public:
114 // T operator()(const T &x, const T &a, const T &b) {
115 // return a*b*x; }
116 // };
117 // // Instantiate the following versions:
118 // template class f<Double>;
119 // template class f<SparseDiff<Double> >;
120 // </srcblock>
121 // A call with values will produce the function value:
122 // <srcblock>
123 // cout << f(7.0, 2.0, 3.0) << endl;
124 // // will produce the value at x=7 for a=2; b=3:
125 // 42
126 // // But a call indicating that we want derivatives to a and b:
127 // cout << f(SparseDiff<Double>(7.0), SparseDiff<Double>(2.0, 0),
128 // SparseDiff<Double>(3.0, 1)) << endl;
129 // // will produce the value at x=7 for a=2; b=3:
130 // // and the partial derivatives wrt a and b at x=7:
131 // (42, [21, 14])
132 // // The following will calculate the derivate wrt x:
133 // cout << f(SparseDiff<Double>(7.0, 0), SparseDiff<Double>(2.0),
134 // SparseDiff<Double>(3.0)) << endl;
135 // (42,[6])
136 // </srcblock>
137 // Note that in practice constants may be given as Double constants.
138 // In actual practice, there are a few rules to obey for the structure of
139 // the function object if you want to use the function object and its
140 // derivatives in least squares fitting procedures in the Fitting
141 // module. The major one is to view the function object having 'fixed' and
142 // 'variable' parameters. I.e., rather than viewing the function as
143 // depending on parameters <em>a, b, x</em> (<src>f(a,b,x)</src>), the
144 // function is considered to be <src>f(x; a,b)</src>, where <em>a, b</em>
145 // are 'fixed' parameters, and <em>x</em> a variable parameter.
146 // Fixed parameters should be contained in a
147 // <linkto class=FunctionParam>FunctionParam</linkto> container object;
148 // while the variable parameter(s) are given in the function
149 // <src>operator()</src>. See <linkto class=Function>Function</linkto> class
150 // for details.
151 //
152 // A Gaussian spectral profile would in general have the center frequency,
153 // the width and the amplitude as fixed parameters, and the frequency as
154 // a variable. Given a spectrum, you would solve for the fixed parameters,
155 // given spectrum values. However, in other cases the role of the
156 // parameters could be reversed. An example could be a whole stack of
157 // observed (in the laboratory) spectra at different temperatures at
158 // one frequency. In that case the width would be the variable parameter,
159 // and the frequency one of the fixed (and to be solved for)parameters.
160 //
161 // Since the calculation of the derivatives is done with simple overloading,
162 // the calculation of second (and higher) derivatives is easy. It should be
163 // noted that higher deivatives are inefficient in the current incarnation
164 // (there is no knowledge e.g. about symmetry in the Jacobian). However,
165 // it is a very good way to get the correct answers of the derivatives. In
166 // practice actual production code will be better off with specialization
167 // of the <src>f<SparseDiff<> ></src> implementation.
168 //
169 // The <src>SparseDiff</src> class is the class the user communicates with.
170 // Alias classes (<linkto class=SparseDiffA>SparseDiffA</linkto> and
171 // <linkto class=SparseDiffA>SparseDiffX</linkto>) exist
172 // to make it possible to have different incarnations of a templated
173 // method (e.g. a generic one and a specialized one). See the
174 // <src>dSparseDiff</src> demo for an example of its use.
175 //
176 // All operators and functions are declared in <linkto file=SparseDiffMath.h>
177 // SparseDiffMath</linkto>. The output operator in
178 // <linkto file=SparseDiffIO.h>SparseDiffIO</linkto>.
179 // The actual structure of the
180 // data block used by <src>SparseDiff</src> is described in
181 // <linkto class=SparseDiffRep>SparseDiffRep</linkto>.
182 //
183 // A SparseDiff can be constructed from an AutoDiff.
184 // <em>toAutoDiff(n)</em> can convert it to an AutoDiff.
185 // </synopsis>
186 //
187 // <example>
188 // <srcblock>
189 // // First a simple example.
190 // // We have a function of the form f(x,y,z); and want to know the
191 // // value of the function for x=10; y=20; z=30; and for
192 // // the derivatives at those points.
193 // // Specify the values; and indicate the parameter dependence:
194 // SparseDiff<Double> x(10.0, 0);
195 // SparseDiff<Double> y(20.0, 1);
196 // SparseDiff<Double> z(30.0, 2);
197 // // The result will be:
198 // SparseDiff<Double> result = x*y + sin(z);
199 // cout << result.value() << endl;
200 // // 199.012
201 // cout << result.derivatives() << endl;
202 // // [20, 10, 0.154251]
203 // // Note: sin(30) = -0.988; cos(30) = 0.154251;
204 // </srcblock>
205 //
206 // See for an extensive example the demo program dSparseDiff. It is
207 // based on the example given above, and shows also the use of second
208 // derivatives (which is just using <src>SparseDiff<SparseDiff<Double> ></src>
209 // as template argument).
210 // <srcblock>
211 // // The function, with fixed parameters a,b:
212 // template <class T> class f {
213 // public:
214 // T operator()(const T& x) { return a_p*a_p*a_p*b_p*b_p*x; }
215 // void set(const T& a, const T& b) { a_p = a; b_p = b; }
216 // private:
217 // T a_p;
218 // T b_p;
219 // };
220 // // Call it with different template arguments:
221 // Double a0(2), b0(3), x0(7);
222 // f<Double> f0; f0.set(a0, b0);
223 // cout << "Value: " << f0(x0) << endl;
224 //
225 // SparseDiff<Double> a1(2,0), b1(3,1), x1(7);
226 // f<SparseDiff<Double> > f1; f1.set(a1, b1);
227 // cout << "Diff a,b: " << f1(x1) << endl;
228 //
229 // SparseDiff<Double> a2(2), b2(3), x2(7,0);
230 // f<SparseDiff<Double> > f2; f2.set(a2, b2);
231 // cout << "Diff x: " << f2(x2) << endl;
232 //
233 // SparseDiff<SparseDiff<Double> > a3(SparseDiff<Double>(2,0),0),
234 // b3(SparseDiff<Double>(3,1),1), x3(SparseDiff<Double>(7));
235 // f<SparseDiff<SparseDiff<Double> > > f3; f3.set(a3, b3);
236 // cout << "Diff2 a,b: " << f3(x3) << endl;
237 //
238 // SparseDiff<SparseDiff<Double> > a4(SparseDiff<Double>(2)),
239 // b4(SparseDiff<Double>(3)),
240 // x4(SparseDiff<Double>(7,0),0);
241 // f<SparseDiff<SparseDiff<Double> > > f4; f4.set(a4, b4);
242 // cout << "Diff2 x: " << f4(x4) << endl;
243 //
244 // // Result will be:
245 // // Value: 504
246 // // Diff a,b: (504, [756, 336])
247 // // Diff x: (504, [72])
248 // // Diff2 a,b: ((504, [756, 336]), [(756, [756, 504]), (336, [504, 112])])
249 // // Diff2 x: ((504, [72]), [(72, [0])])
250 //
251 // // It needed the template instantiations definitions:
252 // template class f<Double>;
253 // template class f<SparseDiff<Double> >;
254 // template class f<SparseDiff<SparseDiff<Double> > >;
255 // </srcblock>
256 // </example>
257 //
258 // <motivation>
259 // The creation of the class was motivated by least-squares non-linear fits
260 // in cases where each individual condition equation depends only on a
261 // fraction of the fixed parameters (e.g. self-calibration where only pairs
262 // of antennas are present per equation), and hence only a few
263 // partial derivatives of a fitted function are needed. It would be tedious
264 // to create functionals for all partial derivatives of a function.
265 // </motivation>
266 //
267 // <templating arg=T>
268 // <li> any class that has the standard mathematical and comparison
269 // operators and functions defined.
270 // </templating>
271 //
272 // <todo asof="2007/11/27">
273 // <li> Nothing I know of.
274 // </todo>
275
276 template <class T> class SparseDiff {
277 public:
278 //# Typedefs
279 typedef T value_type;
284
285 //# Constructors
286 // Construct a constant with a value of zero. Zero derivatives.
288
289 // Construct a constant with a value of v. Zero derivatives.
290 SparseDiff(const T &v);
291
292 // A function f(x0,x1,...,xn,...) with a value of v. The
293 // nth derivative is one, and all other derivatives are zero.
294 SparseDiff(const T &v, const uInt n);
295
296 // A function f(x0,x1,...,xn,...) with a value of v. The
297 // nth derivative is der, and all other derivatives are zero.
298 SparseDiff(const T &v, const uInt n, const T &der);
299
300 // Construct from an AutoDiff
301 SparseDiff(const AutoDiff<T> &other);
302
303 // Construct one from another (deep copy)
305
306 // Destructor
308
309 // Assignment operator. Assign a constant to variable.
311
312 // Assignment operator. Add a gradient to variable.
313 SparseDiff<T> &operator=(const pair<uInt, T> &der);
314
315 // Assignment operator. Assign gradients to variable.
316 SparseDiff<T> &operator=(const vector<pair<uInt, T> > &der);
317
318 // Assign from an Autodiff
320
321 // Assign one to another (deep copy)
323
324 // Assignment operators
325 // <group>
326 void operator*=(const SparseDiff<T> &other);
327 void operator/=(const SparseDiff<T> &other);
328 void operator+=(const SparseDiff<T> &other);
329 void operator-=(const SparseDiff<T> &other);
330 void operator*=(const T other) { rep_p->operator*=(other);
331 value() *= other; }
332 void operator/=(const T other) { rep_p->operator/=(other);
333 value() /= other; }
334 void operator+=(const T other) { value() += other; }
335 void operator-=(const T other) { value() -= other; }
336 // </group>
337
338 // Convert to an AutoDiff of length <em>n</em>
340
341 // Returns the pointer to the structure of value and derivatives.
342 // <group>
344 const SparseDiffRep<T> *theRep() const { return rep_p; }
345 // </group>
346
347 // Returns the value of the function
348 // <group>
349 T &value() { return rep_p->val_p; }
350 const T &value() const { return rep_p->val_p; }
351 // </group>
352
353 // Returns a vector of the derivatives of a SparseDiff
354 // <group>
355 vector<pair<uInt, T> > &derivatives() const;
356 void derivatives(vector<pair<uInt, T> > &res) const;
357 const vector<pair<uInt, T> > &grad() const { return rep_p->grad_p; }
358 vector<pair<uInt, T> > &grad() { return rep_p->grad_p; }
359 // </group>
360
361 // Returns a specific derivative. No check for a valid which.
362 // <group>
363 pair<uInt, T> &derivative(uInt which) { return rep_p->grad_p[which]; }
364 const pair<uInt, T> &derivative(uInt which) const {
365 return rep_p->grad_p[which]; }
366 // </group>
367
368 // Return total number of derivatives
369 uInt nDerivatives() const { return rep_p->grad_p.size(); }
370
371 // Is it a constant, i.e., with zero derivatives?
372 Bool isConstant() const { return rep_p->grad_p.empty(); }
373
374 // Sort criterium
375 static Bool ltSort(pair<uInt, T> &lhs, pair<uInt, T> &rhs);
376
377 // Sort derivative list; cater for doubles and zeroes
378 void sort();
379
380 private:
381 //# Data
382 // Value representation
384
385 };
386
387
388} //# NAMESPACE CASACORE - END
389
390#ifndef CASACORE_NO_AUTO_TEMPLATES
391#include <casacore/scimath/Mathematics/SparseDiff.tcc>
392#endif //# CASACORE_NO_AUTO_TEMPLATES
393#endif
static Bool ltSort(pair< uInt, T > &lhs, pair< uInt, T > &rhs)
Sort criterium.
void sort()
Sort derivative list; cater for doubles and zeroes.
vector< pair< uInt, T > > & derivatives() const
Returns a vector of the derivatives of a SparseDiff.
const T & value() const
Definition SparseDiff.h:350
SparseDiff< T > & operator=(const vector< pair< uInt, T > > &der)
Assignment operator.
void operator*=(const SparseDiff< T > &other)
Assignment operators.
SparseDiff(const T &v, const uInt n)
A function f(x0,x1,...,xn,...) with a value of v.
SparseDiff(const T &v, const uInt n, const T &der)
A function f(x0,x1,...,xn,...) with a value of v.
SparseDiffRep< T > * theRep()
Returns the pointer to the structure of value and derivatives.
Definition SparseDiff.h:343
void operator/=(const SparseDiff< T > &other)
uInt nDerivatives() const
Return total number of derivatives.
Definition SparseDiff.h:369
void operator+=(const SparseDiff< T > &other)
SparseDiff< T > & operator=(const AutoDiff< T > &other)
Assign from an Autodiff.
void operator-=(const SparseDiff< T > &other)
pair< uInt, T > & derivative(uInt which)
Returns a specific derivative.
Definition SparseDiff.h:363
SparseDiff()
Construct a constant with a value of zero.
SparseDiff(const AutoDiff< T > &other)
Construct from an AutoDiff.
value_type & reference
Definition SparseDiff.h:280
const vector< pair< uInt, T > > & grad() const
Definition SparseDiff.h:357
~SparseDiff()
Destructor.
void operator-=(const T other)
Definition SparseDiff.h:335
const value_type & const_reference
Definition SparseDiff.h:281
SparseDiffRep< T > * rep_p
Value representation.
Definition SparseDiff.h:383
AutoDiff< T > toAutoDiff(uInt n) const
Convert to an AutoDiff of length n
SparseDiff< T > & operator=(const T &v)
Assignment operator.
const SparseDiffRep< T > * theRep() const
Definition SparseDiff.h:344
SparseDiff(const T &v)
Construct a constant with a value of v.
Bool isConstant() const
Is it a constant, i.e., with zero derivatives?
Definition SparseDiff.h:372
value_type * iterator
Definition SparseDiff.h:282
const pair< uInt, T > & derivative(uInt which) const
Definition SparseDiff.h:364
T & value()
Returns the value of the function.
Definition SparseDiff.h:349
SparseDiff(const SparseDiff< T > &other)
Construct one from another (deep copy)
SparseDiff< T > & operator=(const SparseDiff< T > &other)
Assign one to another (deep copy)
SparseDiff< T > & operator=(const pair< uInt, T > &der)
Assignment operator.
vector< pair< uInt, T > > & grad()
Definition SparseDiff.h:358
void operator+=(const T other)
Definition SparseDiff.h:334
void operator/=(const T other)
Definition SparseDiff.h:332
const value_type * const_iterator
Definition SparseDiff.h:283
void operator*=(const T other)
Definition SparseDiff.h:330
void derivatives(vector< pair< uInt, T > > &res) const
this file contains all the compiler specific defines
Definition mainpage.dox:28
unsigned int uInt
Definition aipstype.h:49
bool Bool
Define the standard types used by Casacore.
Definition aipstype.h:40