casacore
Loading...
Searching...
No Matches
TableParseGroupby.h
Go to the documentation of this file.
1//# TableParseGroupby.h: Class handling GROUPBY and aggregate functions
2//# Copyright (C) 1994-2022
3//# Associated Universities, Inc. Washington DC, USA.
4//#
5//# This library is free software; you can redistribute it and/or modify it
6//# under the terms of the GNU Library General Public License as published by
7//# the Free Software Foundation; either version 2 of the License, or (at your
8//# option) any later version.
9//#
10//# This library is distributed in the hope that it will be useful, but WITHOUT
11//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13//# License for more details.
14//#
15//# You should have received a copy of the GNU Library General Public License
16//# along with this library; if not, write to the Free Software Foundation,
17//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18//#
19//# Correspondence concerning AIPS++ should be addressed as follows:
20//# Internet email: casa-feedback@nrao.edu.
21//# Postal address: AIPS++ Project Office
22//# National Radio Astronomy Observatory
23//# 520 Edgemont Road
24//# Charlottesville, VA 22903-2475 USA
25
26#ifndef TABLES_TABLEPARSEGROUPBY_H
27#define TABLES_TABLEPARSEGROUPBY_H
28
29//# Includes
30#include <casacore/casa/aips.h>
31#include <casacore/tables/TaQL/ExprNode.h>
32#include <casacore/tables/TaQL/ExprGroup.h>
33#include <vector>
34
35namespace casacore { //# NAMESPACE CASACORE - BEGIN
36
37 //# Forward declarations
38 class TableParseQuery;
39
40
41 // <summary>
42 // Class handling GROUPBY and aggregate functions
43 // </summary>
44
45 // <use visibility=local>
46
47 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="">
48 // </reviewed>
49
50 // <synopsis>
51 // This class is used by TableParseQuery to handle TaQL's GROUPBY and HAVING
52 // clauses and to setup and evaluate aggregate functions.
53 // It checks that the commands and functions are given in a valid way.
54 // <br>Note that some hooks are present for the ROLLUP keyword, but it is not
55 // possible to use it yet.
56 // </synopsis>
57
59 {
60 public:
66
67 // Keep the groupby expressions.
68 // It checks if they are all scalar expressions and do not contain
69 // aggregate functions..
70 void handleGroupby (const std::vector<TableExprNode>&, Bool rollup);
71
72 // Keep the having expression.
73 // It checks if the node results in a bool scalar value.
75
76 // Find if groupby and/or aggregate functions are given.
77 // The column nodes can only contain aggregate functions if SELECT is used.
78 // Finally it checks that HAVING is only used if a column node contains
79 // an aggregate function (it makes no sense otherwise).
80 void findGroupAggr (const Block<TableExprNode>& columnNodes,
81 Bool isSelect);
82
83 // Is GROUPBY and/or aggregation used?
84 Bool isUsed() const
85 { return itsGroupAggrUsed != 0; }
86
87 // Is only aggregation used?
89 { return itsGroupAggrUsed != 0 && (itsGroupAggrUsed & GROUPBY) == 0; }
90
91 // Get the number of aggregation ndes.
92 uInt size() const
93 { return itsAggrNodes.size(); }
94
95 // Disable applySelection for the column nodes of aggregate functions.
97
98 // An exception is thrown if the node uses an aggregate function.
99 static void checkAggrFuncs (const TableExprNode& node);
100
101 // Execute the grouping and aggregation and return the results.
102 // The rownrs are adapted to the resulting rownrs consisting of the
103 // first row of each group.
104 std::shared_ptr<TableExprGroupResult> execGroupAggr (Vector<rownr_t>& rownrs) const;
105
106 // Execute the HAVING clause (if present).
107 // Return False in no HAVING.
109 const std::shared_ptr<TableExprGroupResult>& groups);
110
111 private:
112 // Do the grouping and aggregation and return the results.
113 // It distinguishes the immediate and lazy aggregate functions.
114 // The rownrs are adapted to the resulting rownrs consisting of the
115 // first row of each group.
116 std::shared_ptr<TableExprGroupResult> aggregate (Vector<rownr_t>& rownrs) const;
117
118 // Do the grouping and aggregation and return the results.
119 // It consists of a single COUNTALL operation.
120 // The rownrs are adapted to the resulting rownrs consisting of the
121 // first row of each group.
122 std::shared_ptr<TableExprGroupResult> countAll (Vector<rownr_t>& rownrs) const;
123
124 // Create the set of aggregate functions and groupby keys.
125 std::vector<std::shared_ptr<TableExprGroupFuncSet>> multiKey
126 (const std::vector<TableExprNodeRep*>&, const Vector<rownr_t>& rownrs) const;
127
128 // Create the set of aggregate functions and groupby keys in case
129 // a single groupby key is given.
130 // This offers much faster map access then the general multipleKeys.
131 template<typename T>
132 std::vector<std::shared_ptr<TableExprGroupFuncSet>> singleKey
133 (const std::vector<TableExprNodeRep*>& nodes,
134 const Vector<rownr_t>& rownrs) const
135 {
136 // We have to group the data according to the (possibly empty) groupby.
137 // We step through the table in the normal order which may not be the
138 // groupby order.
139 // A map<key,int> is used to keep track of the results where the int
140 // is the index in a vector of a set of aggregate function objects.
141 std::vector<std::shared_ptr<TableExprGroupFuncSet>> funcSets;
142 std::map<T, int> keyFuncMap;
143 T lastKey = std::numeric_limits<T>::max();
144 int groupnr = -1;
145 // Loop through all rows.
146 // For each row generate the key to get the right entry.
147 TableExprId rowid(0);
148 T key;
149 for (rownr_t i=0; i<rownrs.size(); ++i) {
150 rowid.setRownr (rownrs[i]);
151 itsGroupbyNodes[0].get (rowid, key);
152 if (key != lastKey) {
153 typename std::map<T, int>::iterator iter = keyFuncMap.find (key);
154 if (iter == keyFuncMap.end()) {
155 groupnr = funcSets.size();
156 keyFuncMap[key] = groupnr;
157 funcSets.push_back (std::shared_ptr<TableExprGroupFuncSet>
158 (new TableExprGroupFuncSet (nodes)));
159 } else {
160 groupnr = iter->second;
161 }
162 }
163 rowid.setRownr (rownrs[i]);
164 funcSets[groupnr]->apply (rowid);
165 }
166 return funcSets;
167 }
168
169 // Get pointers to the aggregate nodes in the node expression.
170 void getAggrNodes (const TableExprNode& node,
171 std::vector<TableExprNodeRep*>& aggrNodes) const;
172
173
174 //# Data members.
175 // The possible GROUPBY expressions.
176 std::vector<TableExprNode> itsGroupbyNodes;
177 Bool itsGroupbyRollup; //# use ROLLUP in GROUPBY?
178 // The possible HAVING expression.
180 // Pointers to the aggregate function nodes.
181 std::vector<TableExprNodeRep*> itsAggrNodes;
183 };
184
185
186} //# NAMESPACE CASACORE - END
187
188#endif
size_t size() const
Definition ArrayBase.h:103
simple 1-D array
Definition Block.h:198
Class containing the results of aggregated values in a group.
Definition ExprGroup.h:799
void setRownr(rownr_t rownr)
Set the row number.
std::shared_ptr< TableExprGroupResult > execGroupAggr(Vector< rownr_t > &rownrs) const
Execute the grouping and aggregation and return the results.
std::vector< std::shared_ptr< TableExprGroupFuncSet > > singleKey(const std::vector< TableExprNodeRep * > &nodes, const Vector< rownr_t > &rownrs) const
Create the set of aggregate functions and groupby keys in case a single groupby key is given.
TableExprNode itsHavingNode
The possible HAVING expression.
Bool execHaving(Vector< rownr_t > &rownrs, const std::shared_ptr< TableExprGroupResult > &groups)
Execute the HAVING clause (if present).
Bool isUsed() const
Is GROUPBY and/or aggregation used?
std::shared_ptr< TableExprGroupResult > countAll(Vector< rownr_t > &rownrs) const
Do the grouping and aggregation and return the results.
void getAggrNodes(const TableExprNode &node, std::vector< TableExprNodeRep * > &aggrNodes) const
Get pointers to the aggregate nodes in the node expression.
std::vector< std::shared_ptr< TableExprGroupFuncSet > > multiKey(const std::vector< TableExprNodeRep * > &, const Vector< rownr_t > &rownrs) const
Create the set of aggregate functions and groupby keys.
std::vector< TableExprNode > itsGroupbyNodes
The possible GROUPBY expressions.
void handleHaving(const TableExprNode &)
Keep the having expression.
uInt size() const
Get the number of aggregation ndes.
std::shared_ptr< TableExprGroupResult > aggregate(Vector< rownr_t > &rownrs) const
Do the grouping and aggregation and return the results.
Bool isOnlyAggr() const
Is only aggregation used?
void findGroupAggr(const Block< TableExprNode > &columnNodes, Bool isSelect)
Find if groupby and/or aggregate functions are given.
static void checkAggrFuncs(const TableExprNode &node)
An exception is thrown if the node uses an aggregate function.
void handleGroupby(const std::vector< TableExprNode > &, Bool rollup)
Keep the groupby expressions.
uInt disableApplySelection()
Disable applySelection for the column nodes of aggregate functions.
std::vector< TableExprNodeRep * > itsAggrNodes
Pointers to the aggregate function nodes.
this file contains all the compiler specific defines
Definition mainpage.dox:28
unsigned int uInt
Definition aipstype.h:49
int Int
Definition aipstype.h:48
bool Bool
Define the standard types used by Casacore.
Definition aipstype.h:40
uInt64 rownr_t
Define the type of a row number in a table.
Definition aipsxtype.h:44