casacore
Loading...
Searching...
No Matches
UDFBase.h
Go to the documentation of this file.
1//# UDFBase.h: Abstract base class for a user-defined TaQL function
2//# Copyright (C) 2010
3//# Associated Universities, Inc. Washington DC, USA.
4//#
5//# This library is free software; you can redistribute it and/or modify it
6//# under the terms of the GNU Library General Public License as published by
7//# the Free Software Foundation; either version 2 of the License, or (at your
8//# option) any later version.
9//#
10//# This library is distributed in the hope that it will be useful, but WITHOUT
11//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13//# License for more details.
14//#
15//# You should have received a copy of the GNU Library General Public License
16//# along with this library; if not, write to the Free Software Foundation,
17//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18//#
19//# Correspondence concerning AIPS++ should be addressed as follows:
20//# Internet email: casa-feedback@nrao.edu.
21//# Postal address: AIPS++ Project Office
22//# National Radio Astronomy Observatory
23//# 520 Edgemont Road
24//# Charlottesville, VA 22903-2475 USA
25
26#ifndef TABLES_UDFBASE_H
27#define TABLES_UDFBASE_H
28
29//# Includes
30#include <casacore/casa/aips.h>
31#include <casacore/tables/TaQL/ExprNodeRep.h>
32#include <casacore/tables/Tables/Table.h>
33#include <casacore/tables/TaQL/TaQLStyle.h>
34#include <casacore/casa/Containers/Record.h>
35#include <casacore/casa/Containers/Block.h>
36#include <casacore/casa/stdmap.h>
37
38
39namespace casacore {
40
41 // <summary>
42 // Abstract base class for a user-defined TaQL function
43 // </summary>
44 //
45 // <synopsis>
46 // This class makes it possible to add user-defined functions (UDF) to TaQL.
47 // A UDF has to be implemented in a class derived from this class and can
48 // contain one or more user-defined functions.
49 // <br>A few functions have to be implemented in the class as described below.
50 // In this way TaQL can be extended with arbitrary functions, which can be
51 // normal functions as well as aggregate functions (often used with GROUPBY).
52 //
53 // A UDF is a class derived from this base class. It must contain the
54 // following member functions. See also the example below.
55 // <table border=0>
56 // <tr>
57 // <td><src>makeObject</src></td>
58 // <td>a static function to create an object of the UDF class. This function
59 // needs to be registered.
60 // </td>
61 // </tr>
62 // <tr>
63 // <td><src>setup</src></td>
64 // <td>this virtual function is called after the object has been created.
65 // It should initialize the object using the function arguments that
66 // can be obtained using the function <src>operands()</src>. The setup
67 // function should perform the following:
68 // <ul>
69 // <li>Define the data type of the result using <src>setDataType<src>.
70 // The data type should be derived from the data types of the function
71 // arguments. The possible data types are defined in class
72 // TableExprNodeRep.
73 // Note that a UDF can support multiple data types. For example, a
74 // function like <src>min</src> can be used for Int, Double, or a mix.
75 // Function 'checkDT' in class TableExprNodeMulti can be used to
76 // check the data types of the operands and determine the result
77 // data type.
78 // <li>Define if the function is an aggregate function calculating
79 // an aggregated value in a group (e.g., minimum or mean).
80 // <src>setAggregate</src> can be used to tell so.
81 // <li>Define the dimensionality of the result using <src>setNDim</src>.
82 // A value of 0 means a scalar. A value of -1 means an array with
83 // a dimensionality that can vary from row to row.
84 // <li>Optionally use <src>setShape</src> to define the shape if the
85 // results are arrays with a shape that is the same for all rows.
86 // It will also set ndim if setNDim was not used yet, otherwise
87 // it checks if it ndim matches.
88 // <li>Optionally set the unit of the result using <src>setUnit</src>.
89 // TaQL has full support of units, so UDFs should behave the same.
90 // It is possible to change the unit of the function arguments.
91 // For example:
92 // <ul>
93 // <li>a function like 'sin' can force its argument to be
94 // in radians; TaQL will scale the argument as needed. This can be
95 // done like
96 // <src>TableExprNodeUnit::adaptUnit (operands()[i], "rad");</src>
97 // <li>A function like 'asin' will have a result in radians.
98 // Such a UDF should set its result unit to rad.
99 // <li>A function like 'min' wants its arguments to have the same
100 // unit and will set its result unit to it. It can be done like:
101 // <src>setUnit (TableExprFuncNode::makeEqualUnits
102 // (operands(), 0, operands().size()));</src>
103 // </ul>
104 // See class TableExprFuncNode for more info about these functions.
105 // <li>Optionally define attributes as a Record object. They can be used
106 // by UDFs to tell something more about the type of value.
107 // <li>Optionally define if the result is a constant value using
108 // <src>setConstant</src>. It means that the function is not
109 // dependent on the row number in the table being queried.
110 // This is usually the case if all UDF arguments are constant.
111 // </ul>
112 // </td>
113 // </tr>
114 // <tr>
115 // <td><src>getXXX</src></td>
116 // <td>these are virtual get functions for each possible data type. The
117 // get functions matching the data types set by the setup
118 // function need to be implemented.
119 // The <src>get</src> functions have an argument TableExprId
120 // defining the table row (or record) for which the function has
121 // to be evaluated.
122 // If the UDF is an aggregate functions the TableExprId has to be
123 // upcasted to an TableExprIdAggr object from which all TableExprId
124 // objects in an aggregation group can be retrieved.
125 // <srcblock>
126 // const TableExprIdAggr& aid = TableExprIdAggr::cast (id);
127 // const vector<TableExprId>& ids = aid.result().ids(id.rownr());
128 // </srcblock>
129 // </td>
130 // </tr>
131 // </table>
132 //
133 // A UDF has to be made known to TaQL by adding it to the UDF registry with
134 // its name and 'makeObject' function.
135 // UDFs will usually reside in a shared library that is loaded dynamically.
136 // TaQL will load a UDF in the following way:
137 // <ul>
138 // <li> The UDF name used in TaQL consists of two parts: a library name
139 // and a function name separated by a dot. Both parts need to be given.
140 // Note that the library name can also be seen as a UDF scope, so
141 // different UDFs with equal names can be used from different libraries.
142 // A UDF should be registered with this full name.
143 // <br>The "USING STYLE" clause can be used to define a synonym for
144 // a (long) library name in the TaQLStyle object. The library part
145 // of the UDF will always be looked up in this synonym map.
146 // <li> If a UDF is not found in the registry, it will be tried to load
147 // a shared library using the library name part. The libraries tried
148 // to be loaded are lib<library>.so and libcasa_<library>.so.
149 // On Mac .dylib will be tried. If loaded successfully, a special
150 // function 'register_libname' will be called first. It should
151 // register each UDF in the shared library using UDFBase::register.
152 // </ul>
153 // </synopsis>
154 //
155 // <example>
156 // The following examples show a normal UDF function.
157 // <br>It returns True if the function argument matches 1.
158 // It can be seen that it checks if the argument is an integer scalar.
159 // <srcblock>
160 // class TestUDF: public UDFBase
161 // {
162 // public:
163 // TestUDF() {}
164 // // Registered function to create the UDF object.
165 // // The name of the function is not important here.
166 // static UDFBase* makeObject (const String&)
167 // { return new TestUDF(); }
168 // // Setup and check the details; result is a bool scalar value.
169 // virtual void setup (const Table&, const TaQLStyle&)
170 // {
171 // AlwaysAssert (operands().size() == 1, AipsError);
172 // AlwaysAssert (operands()[0]->dataType() == TableExprNodeRep::NTInt,
173 // AipsError);
174 // AlwaysAssert (operands()[0]->valueType() == TableExprNodeRep::VTScalar,
175 // AipsError);
176 // setDataType (TableExprNodeRep::NTBool);
177 // setNDim (0); // scalar result
178 // setConstant (operands()[0].isConstant()); // constant result?
179 // }
180 // // Get the value for the given id.
181 // // It gets the value of the operand and checks if it is 1.
182 // Bool getBool (const TableExprId& id)
183 // { return operands()[0]->getInt(id) == 1; }
184 // };
185 // </srcblock>
186 // </example>
187
188 // <example>
189 // The following example shows an aggregate UDF function.
190 // It calculates the sum of the cubes of the values in a group.
191 // <srcblock>
192 // class TestUDFAggr: public UDFBase
193 // {
194 // public:
195 // TestUDFAggr() {}
196 // // Registered function to create the UDF object.
197 // // The name of the function is not important here.
198 // static UDFBase* makeObject (const String&) { return new TestUDFAggr(); }
199 // // Setup and check the details; result is an integer scalar value.
200 // // It aggregates the values of multiple rows.
201 // virtual void setup (const Table&, const TaQLStyle&)
202 // {
203 // AlwaysAssert (operands().size() == 1, AipsError);
204 // AlwaysAssert (operands()[0]->dataType() == TableExprNodeRep::NTInt, AipsError);
205 // AlwaysAssert (operands()[0]->valueType() == TableExprNodeRep::VTScalar, AipsError);
206 // setDataType (TableExprNodeRep::NTInt);
207 // setNDim (0); // scalar
208 // setAggregate (True); // aggregate function
209 // }
210 // // Get the value of a group.
211 // // It aggregates the values of multiple rows.
212 // Int64 getInt (const TableExprId& id)
213 // {
214 // // Cast the id to a TableExprIdAggr object.
215 // const TableExprIdAggr& aid = TableExprIdAggr::cast (id);
216 // // Get the vector of ids for this group.
217 // const vector<TableExprId>& ids = aid.result().ids(id.rownr());
218 // // Get the values for all ids and accumulate them.
219 // Int64 sum3 = 0;
220 // for (vector<TableExprId>::const_iterator it=ids.begin();
221 // it!=ids.end(); ++it){
222 // Int64 v = operands()[0]->getInt(*it);
223 // sum3 += v*v*v;
224 // }
225 // return sum3;
226 // }
227 // };
228 // </srcblock>
229 // </example>
230 // More examples of UDF functions can be found in classes UDFMSCal
231 // and DirectionUDF.
232
234 {
235 public:
236 // The signature of a global or static member function creating an object
237 // of the UDF.
238 typedef UDFBase* MakeUDFObject (const String& functionName);
239
240 // Only default constructor is needed.
242
243 // Destructor.
244 virtual ~UDFBase();
245
246 // Evaluate the function and return the result.
247 // Their default implementations throw a "not implemented" exception.
248 // <group>
249 virtual Bool getBool (const TableExprId& id);
250 virtual Int64 getInt (const TableExprId& id);
251 virtual Double getDouble (const TableExprId& id);
252 virtual DComplex getDComplex (const TableExprId& id);
253 virtual String getString (const TableExprId& id);
254 virtual TaqlRegex getRegex (const TableExprId& id);
255 virtual MVTime getDate (const TableExprId& id);
262 // </group>
263
264 // Get the unit.
265 const String& getUnit() const
266 { return itsUnit; }
267
268 // Get the attributes.
269 const Record& getAttributes() const
270 { return itsAttributes; }
271
272 // Flatten the node tree by adding the node and its children to the vector.
273 virtual void flattenTree (std::vector<TableExprNodeRep*>&);
274
275 private:
276 // Set up the function object.
277 virtual void setup (const Table& table,
278 const TaQLStyle&) = 0;
279
280 protected:
281 // Get the operands.
282 std::vector<TENShPtr>& operands()
283 { return itsOperands; }
284
285 // Set the data type.
286 // This function must be called by the setup function of the derived class.
288
289 // Set the dimensionality of the results.
290 // <br> 0 means that the results are scalars.
291 // <br> -1 means that the results are arrays with unknown dimensionality.
292 // <br> >0 means that the results are arrays with that dimensionality.
293 // This function must be called by the setup function of the derived class.
295
296 // Set the shape of the results if it is fixed and known.
297 void setShape (const IPosition& shape);
298
299 // Set the unit of the result.
300 // If this function is not called by the setup function of the derived
301 // class, the result has no unit.
302 void setUnit (const String& unit);
303
304 // Set the attributes of the result.
305 // If this function is not called by the setup function of the derived
306 // class, the result has no attributes.
307 void setAttributes (const Record& attributes);
308
309 // Define if the result is constant (e.g. if all arguments are constant).
310 // If this function is not called by the setup function of the derived
311 // class, the result is not constant.
313
314 // Define if the UDF is an aggregate function (usually used in GROUPBY).
316
317 // Let a derived class recreate its column objects in case a selection
318 // has to be applied.
319 // The default implementation does nothing.
320 virtual void recreateColumnObjects (const Vector<rownr_t>& rownrs);
321
322 public:
323 // Register the name and construction function of a UDF (thread-safe).
324 // An exception is thrown if this name already exists with a different
325 // construction function.
326 static void registerUDF (const String& name, MakeUDFObject* func);
327
328 // Initialize the function object.
329 void init (const std::vector<TENShPtr>& arg,
330 const TableExprInfo& tableInfo, const TaQLStyle&);
331
332 // Get the data type.
335
336 // Get the dimensionality of the results.
337 // (0=scalar, -1=array with variable ndim, >0=array with fixed ndim
338 Int ndim() const
339 { return itsNDim; }
340
341 // Get the result shape if the same for all results.
342 const IPosition& shape() const
343 { return itsShape; }
344
345 // Tell if the UDF gives a constant result.
347 { return itsIsConstant; }
348
349 // Tell if the UDF is an aggregate function.
351 { return itsIsAggregate; }
352
353 // Do not apply the selection.
356
357 // If needed, let the UDF re-create column objects for a selection of rows.
358 // It calls the function recreateColumnObjects.
359 void applySelection (const Vector<rownr_t>& rownrs);
360
361 // Create a UDF object (thread-safe).
362 // It looks in the map with fixed function names. If unknown,
363 // it looks if a wildcarded function name is supported (for PyTaQL).
364 static UDFBase* createUDF (const String& name, const TaQLStyle& style);
365
366 private:
367 //# Data members.
368 std::vector<TENShPtr> itsOperands;
377 //# The registry is used for two purposes:
378 //# 1. It is a map of known function names (lib.func) to funcptr.
379 //# Function name * means that the library can contain any function,
380 //# which is intended for python functions (through PyTaQL).
381 //# 2. The loaded libraries are kept in the map (with 0 funcptr).
382 static map<String, MakeUDFObject*> theirRegistry;
383 static std::recursive_mutex theirMutex;
384 };
385
386} // end namespace
387
388#endif
String: the storage and methods of handling collections of characters.
Definition String.h:223
Class to connect a Table and its alias name.
NodeDataType
Define the data types of a node.
virtual MArray< MVTime > getArrayDate(const TableExprId &id)
virtual MArray< Int64 > getArrayInt(const TableExprId &id)
virtual Double getDouble(const TableExprId &id)
const Record & getAttributes() const
Get the attributes.
Definition UDFBase.h:269
const String & getUnit() const
Get the unit.
Definition UDFBase.h:265
IPosition itsShape
Definition UDFBase.h:371
TableExprNodeRep::NodeDataType itsDataType
Definition UDFBase.h:369
void setAttributes(const Record &attributes)
Set the attributes of the result.
void init(const std::vector< TENShPtr > &arg, const TableExprInfo &tableInfo, const TaQLStyle &)
Initialize the function object.
std::vector< TENShPtr > & operands()
Get the operands.
Definition UDFBase.h:282
void setDataType(TableExprNodeRep::NodeDataType)
Set the data type.
virtual Int64 getInt(const TableExprId &id)
TableExprNodeRep::NodeDataType dataType() const
Get the data type.
Definition UDFBase.h:333
virtual MVTime getDate(const TableExprId &id)
Bool itsApplySelection
Definition UDFBase.h:376
virtual TaqlRegex getRegex(const TableExprId &id)
UDFBase * MakeUDFObject(const String &functionName)
The signature of a global or static member function creating an object of the UDF.
Definition UDFBase.h:238
void applySelection(const Vector< rownr_t > &rownrs)
If needed, let the UDF re-create column objects for a selection of rows.
Int ndim() const
Get the dimensionality of the results.
Definition UDFBase.h:338
void setAggregate(Bool isAggregate)
Define if the UDF is an aggregate function (usually used in GROUPBY).
Bool isAggregate() const
Tell if the UDF is an aggregate function.
Definition UDFBase.h:350
void setUnit(const String &unit)
Set the unit of the result.
virtual void recreateColumnObjects(const Vector< rownr_t > &rownrs)
Let a derived class recreate its column objects in case a selection has to be applied.
std::vector< TENShPtr > itsOperands
Definition UDFBase.h:368
virtual MArray< DComplex > getArrayDComplex(const TableExprId &id)
UDFBase()
Only default constructor is needed.
static UDFBase * createUDF(const String &name, const TaQLStyle &style)
Create a UDF object (thread-safe).
virtual Bool getBool(const TableExprId &id)
Evaluate the function and return the result.
virtual DComplex getDComplex(const TableExprId &id)
void setConstant(Bool isConstant)
Define if the result is constant (e.g.
const IPosition & shape() const
Get the result shape if the same for all results.
Definition UDFBase.h:342
void setNDim(Int ndim)
Set the dimensionality of the results.
virtual void flattenTree(std::vector< TableExprNodeRep * > &)
Flatten the node tree by adding the node and its children to the vector.
static std::recursive_mutex theirMutex
Definition UDFBase.h:383
virtual MArray< Double > getArrayDouble(const TableExprId &id)
virtual ~UDFBase()
Destructor.
Record itsAttributes
Definition UDFBase.h:373
static map< String, MakeUDFObject * > theirRegistry
Definition UDFBase.h:382
Bool isConstant() const
Tell if the UDF gives a constant result.
Definition UDFBase.h:346
void disableApplySelection()
Do not apply the selection.
Definition UDFBase.h:354
virtual String getString(const TableExprId &id)
static void registerUDF(const String &name, MakeUDFObject *func)
Register the name and construction function of a UDF (thread-safe).
virtual MArray< String > getArrayString(const TableExprId &id)
void setShape(const IPosition &shape)
Set the shape of the results if it is fixed and known.
virtual MArray< Bool > getArrayBool(const TableExprId &id)
virtual void setup(const Table &table, const TaQLStyle &)=0
Set up the function object.
this file contains all the compiler specific defines
Definition mainpage.dox:28
const Bool False
Definition aipstype.h:42
long long Int64
Define the extra non-standard types used by Casacore (like proposed uSize, Size)
Definition aipsxtype.h:36
int Int
Definition aipstype.h:48
bool Bool
Define the standard types used by Casacore.
Definition aipstype.h:40
double Double
Definition aipstype.h:53