casacore
Loading...
Searching...
No Matches
ISMBucket.h
Go to the documentation of this file.
1//# ISMBucket.h: A bucket in the Incremental Storage Manager
2//# Copyright (C) 1996,1999,2000,2001
3//# Associated Universities, Inc. Washington DC, USA.
4//#
5//# This library is free software; you can redistribute it and/or modify it
6//# under the terms of the GNU Library General Public License as published by
7//# the Free Software Foundation; either version 2 of the License, or (at your
8//# option) any later version.
9//#
10//# This library is distributed in the hope that it will be useful, but WITHOUT
11//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13//# License for more details.
14//#
15//# You should have received a copy of the GNU Library General Public License
16//# along with this library; if not, write to the Free Software Foundation,
17//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18//#
19//# Correspondence concerning AIPS++ should be addressed as follows:
20//# Internet email: casa-feedback@nrao.edu.
21//# Postal address: AIPS++ Project Office
22//# National Radio Astronomy Observatory
23//# 520 Edgemont Road
24//# Charlottesville, VA 22903-2475 USA
25
26#ifndef TABLES_ISMBUCKET_H
27#define TABLES_ISMBUCKET_H
28
29//# Includes
30#include <casacore/casa/aips.h>
31#include <casacore/casa/Containers/Block.h>
32#include <casacore/casa/BasicSL/String.h>
33#include <casacore/casa/iosfwd.h>
34
35namespace casacore { //# NAMESPACE CASACORE - BEGIN
36
37//# Forward declarations
38class ISMBase;
39
40// <summary>
41// A bucket in the Incremental Storage Manager
42// </summary>
43
44// <use visibility=local>
45
46// <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="">
47// </reviewed>
48
49// <prerequisite>
50//# Classes you should understand before using this one.
51// <li> <linkto class=IncrementalStMan>IncrementalStMan</linkto>
52// <li> <linkto class=BucketCache>BucketCache</linkto>
53// </prerequisite>
54
55// <etymology>
56// ISMBucket represents a bucket in the Incremental Storage Manager.
57// </etymology>
58
59// <synopsis>
60// The Incremental Storage Manager uses a <linkto class=BucketCache>
61// BucketCache</linkto> object to read/write/cache the buckets
62// containing the data. An <src>ISMBucket</src> object is the
63// internal representation of the contents of a bucket. <src>ISMBucket</src>
64// contains static callback functions which are called by
65// <src>BucketCache</src> when reading/writing a bucket. These callback
66// functions do the mapping of bucket data to <src>ISMBucket</src> object
67// and vice-versa.
68// <p>
69// A bucket contains the values of several rows
70// of all columns bound to this Incremental Storage Manager.
71// A bucket is split into a data part and an index part.
72// Each part has an arbitrary length but together they do not exceed
73// the fixed bucket length.
74// <p>
75// The beginning of the data part contains the values of all columns
76// bound. The remainder of the data part contains the values of
77// the rows/columns with a changed value.
78// <br>
79// The index part contains an index per column. Each index contains the
80// row number and an offset for a row with a stored value. The row numbers
81// are relative to the beginning of the bucket, so the bucket has
82// no knowledge about the absolute row numbers. In this way deletion of
83// rows is much simpler.
84// <p>
85// The contents of a bucket looks like:
86// <srcblock>
87// -------------------------------------------------------------------
88// | index offset | data part | index part | free |
89// -------------------------------------------------------------------
90// 0 4 4+length(data part)
91// <--------------------------bucketsize----------------------------->
92// </srcblock>
93// The data part contains all data value belonging to the bucket.
94// The index part contains for each column the following data:
95// <srcblock>
96// -----------------------------------------------------------------------
97// | #values stored | row numbers of values | offset in data part of |
98// | for column i | stored for column i | values stored for column i |
99// -----------------------------------------------------------------------
100// 0 4 4+4*nrval
101// </srcblock>
102// Note that the row numbers in the bucket start at 0, thus are relative
103// to the beginning of the bucket. The main index kept in
104// <linkto class=ISMIndex>ISMIndex</linkto> knows the starting row of
105// each bucket. In this way bucket splitting and especially row removal
106// is much easier.
107// <p>
108// The bucket can be stored in canonical or local (i.e. native) data format.
109// When a bucket is read into memory, its data are read, converted, and
110// stored in the ISMBucket object. When flushed, the contents are
111// written. ISMBucket takes care that the values stored in its object
112// do not exceed the size of the bucket. When full, the user can call
113// a function to split it into a left and right bucket. When the new
114// value has to be written at the end, the split merely consist of
115// creating a new bucket. In any case, care is taken that a row is
116// not split. Thus a row is always entirely contained in one bucket.
117// <p>
118// Class <linkto class=ISMColumn>ISMColumn</linkto> does the actual
119// writing of data in a bucket and uses the relevant ISMBucket functions.
120
121// <motivation>
122// ISMBucket encapsulates the data of a bucket.
123// </motivation>
124
125//# <todo asof="$DATE:$">
126//# A List of bugs, limitations, extensions or planned refinements.
127//# </todo>
128
129
131{
132public:
133
134 // Create a bucket with the given parent.
135 // When <src>bucketStorage</src> is non-zero, reconstruct the
136 // object from it.
137 // It keeps the pointer to its parent (but does not own it).
138 ISMBucket (ISMBase* parent, const char* bucketStorage);
139
141
142 // Forbid copy constructor.
143 ISMBucket (const ISMBucket&) = delete;
144
145 // Forbid assignment.
146 ISMBucket& operator= (const ISMBucket&) = delete;
147
148 // Get the row-interval for given column and row.
149 // It sets the start and end of the interval to which the row belongs
150 // and the offset of its current value.
151 // It returns the index where the row number can be put in the
152 // bucket index.
153 uInt getInterval (uInt colnr, rownr_t rownr, rownr_t bucketNrrow,
154 rownr_t& start, rownr_t& end, uInt& offset) const;
155
156 // Is the bucket large enough to add a value?
157 Bool canAddData (uInt leng) const;
158
159 // Add the data to the data part.
160 // It updates the bucket index at the given index.
161 // An exception is thrown if the bucket is too small.
162 void addData (uInt colnr, rownr_t rownr, uInt index,
163 const char* data, uInt leng);
164
165 // Is the bucket large enough to replace a value?
166 Bool canReplaceData (uInt newLeng, uInt oldLeng) const;
167
168 // Replace a data item.
169 // When its length is variable (indicated by fixedLength=0), the old
170 // value will be removed and the new one appended at the end.
171 // An exception is thrown if the bucket is too small.
172 void replaceData (uInt& offset, const char* data, uInt newLeng,
173 uInt fixedLength);
174
175 // Get a pointer to the data for the given offset.
176 const char* get (uInt offset) const;
177
178 // Get the length of the data value.
179 // It is <src>fixedLength</src> when non-zero,
180 // otherwise read it from the data value.
181 uInt getLength (uInt fixedLength, const char* data) const;
182
183 // Get access to the offset of the data for given column and row.
184 // It allows to change it (used for example by replaceData).
185 uInt& getOffset (uInt colnr, rownr_t rownr);
186
187 // Get access to the index information for the given column.
188 // This is used by ISMColumn when putting the data.
189 // <group>
190 // Return the row numbers with a stored value.
192 // Return the offsets of the values stored in the data part.
193 Block<uInt>& offIndex (uInt colnr);
194 // Return the number of values stored.
195 uInt& indexUsed (uInt colnr);
196 // </group>
197
198 // Split the bucket in the middle.
199 // It returns the row number where the bucket was split and the
200 // new left and right bucket. The caller is responsible for
201 // deleting the newly created buckets.
202 // When possible a simple split is done.
203 // <br>
204 // The starting values in the right bucket may be copies of the
205 // values in the left bucket. The duplicated Block contains a switch
206 // per column indicating if the value is copied.
207 rownr_t split (ISMBucket*& left, ISMBucket*& right, Block<Bool>& duplicated,
208 rownr_t bucketStartRow, rownr_t bucketNrrow,
209 uInt colnr, rownr_t rownr, uInt lengToAdd);
210
211 // Determine whether a simple split is possible. If so, do it.
212 // This is possible if the new row is at the end of the last bucket,
213 // which will often be the case.
214 // <br>A simple split means adding a new bucket for the new row.
215 // If the old bucket already contains values for that row, those
216 // values are moved to the new bucket.
217 // <br>This fuction is only called by split, which created the
218 // left and right bucket.
220 Block<Bool>& duplicated,
221 rownr_t& splitRownr, rownr_t rownr);
222
223 // Return the index where the bucket should be split to get
224 // two parts with almost identical length.
225 uInt getSplit (uInt totLeng, const Block<uInt>& rowLeng,
226 const Block<uInt>& cumLeng);
227
228 // Remove <src>nr</src> items from data and index part by shifting
229 // to the left. The <src>rowIndex</src>, <src>offIndex</src>, and
230 // <src>nused</src> get updated. The caller is responsible for
231 // removing data when needed (e.g. <src>ISMIndColumn</src> removes
232 // the indirect arrays from its file).
234 Block<uInt>& offIndex, uInt& nused, uInt leng);
235
236 // Copy the contents of that bucket to this bucket.
237 // This is used after a split operation.
238 void copy (const ISMBucket& that);
239
240 // Callback function when BucketCache reads a bucket.
241 // It creates an ISMBucket object and converts the raw bucketStorage
242 // to that object.
243 // It returns the pointer to ISMBucket object which gets part of the cache.
244 // The object gets deleted by the deleteCallBack function.
245 static char* readCallBack (void* owner, const char* bucketStorage);
246
247 // Callback function when BucketCache writes a bucket.
248 // It converts the ISMBucket bucket object to the raw bucketStorage.
249 static void writeCallBack (void* owner, char* bucketStorage,
250 const char* bucket);
251
252 // Callback function when BucketCache adds a new bucket to the data file.
253 // This function creates an empty ISMBucket object.
254 // It returns the pointer to ISMBucket object which gets part of the cache.
255 // The object gets deleted by the deleteCallBack function.
256 static char* initCallBack (void* owner);
257
258 // Callback function when BucketCache removes a bucket from the cache.
259 // This function dletes the ISMBucket bucket object.
260 static void deleteCallBack (void*, char* bucket);
261
262 // Show the layout of the bucket.
263 void show (ostream& os) const;
264
265 // Check that there are no repeated rowIds in the bucket
266 Bool check (uInt& offendingCol, uInt& offendingIndex,
267 rownr_t& offendingRow, rownr_t& offendingPrevRow) const;
268
269private:
270 // Remove a data item with the given length.
271 // If the length is zero, its variable length is read first.
272 void removeData (uInt offset, uInt leng);
273
274 // Insert a data value by appending it to the end.
275 // It returns the offset of the data value.
276 uInt insertData (const char* data, uInt leng);
277
278 // Copy a data item from this bucket to the other bucket.
279 uInt copyData (ISMBucket& other, uInt colnr, rownr_t toRownr,
280 uInt fromIndex, uInt toIndex) const;
281
282 // Read the data from the storage into this bucket.
283 void read (const char* bucketStorage);
284
285 // Write the bucket into the storage.
286 void write (char* bucketStorage) const;
287
288
289 //# Declare member variables.
290 // Pointer to the parent storage manager.
292 // The size (in bytes) of an uInt and rownr_t (used in index, etc.).
295 // The size (in bytes) of the data.
297 // The size (in bytes) of the index.
299 // The row index per column; each index contains the row number
300 // of each value stored in the bucket (for that column).
302 // The offset index per column; each index contains the offset (in bytes)
303 // of each value stored in the bucket (for that column).
305 // Nr of used elements in each index; i.e. the number of stored values
306 // per column.
308 // The data space (in external (e.g. canonical) format).
309 char* data_p;
310};
311
312
313inline const char* ISMBucket::get (uInt offset) const
314{
315 return data_p + offset;
316}
318{
319 return *(rowIndex_p[colnr]);
320}
322{
323 return *(offIndex_p[colnr]);
324}
326{
327 return indexUsed_p[colnr];
328}
329
330
331
332} //# NAMESPACE CASACORE - END
333
334#endif
simple 1-D array
Definition Block.h:198
void read(const char *bucketStorage)
Read the data from the storage into this bucket.
static char * initCallBack(void *owner)
Callback function when BucketCache adds a new bucket to the data file.
uInt & indexUsed(uInt colnr)
Return the number of values stored.
Definition ISMBucket.h:325
ISMBase * stmanPtr_p
Pointer to the parent storage manager.
Definition ISMBucket.h:291
const char * get(uInt offset) const
Get a pointer to the data for the given offset.
Definition ISMBucket.h:313
Block< rownr_t > & rowIndex(uInt colnr)
Get access to the index information for the given column.
Definition ISMBucket.h:317
ISMBucket & operator=(const ISMBucket &)=delete
Forbid assignment.
uInt uIntSize_p
The size (in bytes) of an uInt and rownr_t (used in index, etc.).
Definition ISMBucket.h:293
uInt getInterval(uInt colnr, rownr_t rownr, rownr_t bucketNrrow, rownr_t &start, rownr_t &end, uInt &offset) const
Get the row-interval for given column and row.
void removeData(uInt offset, uInt leng)
Remove a data item with the given length.
Block< uInt > indexUsed_p
Nr of used elements in each index; i.e.
Definition ISMBucket.h:307
uInt copyData(ISMBucket &other, uInt colnr, rownr_t toRownr, uInt fromIndex, uInt toIndex) const
Copy a data item from this bucket to the other bucket.
Bool canReplaceData(uInt newLeng, uInt oldLeng) const
Is the bucket large enough to replace a value?
PtrBlock< Block< uInt > * > offIndex_p
The offset index per column; each index contains the offset (in bytes) of each value stored in the bu...
Definition ISMBucket.h:304
Bool check(uInt &offendingCol, uInt &offendingIndex, rownr_t &offendingRow, rownr_t &offendingPrevRow) const
Check that there are no repeated rowIds in the bucket.
Block< uInt > & offIndex(uInt colnr)
Return the offsets of the values stored in the data part.
Definition ISMBucket.h:321
void copy(const ISMBucket &that)
Copy the contents of that bucket to this bucket.
char * data_p
The data space (in external (e.g.
Definition ISMBucket.h:309
static char * readCallBack(void *owner, const char *bucketStorage)
Callback function when BucketCache reads a bucket.
void shiftLeft(uInt index, uInt nr, Block< rownr_t > &rowIndex, Block< uInt > &offIndex, uInt &nused, uInt leng)
Remove nr items from data and index part by shifting to the left.
Bool simpleSplit(ISMBucket *left, ISMBucket *right, Block< Bool > &duplicated, rownr_t &splitRownr, rownr_t rownr)
Determine whether a simple split is possible.
uInt & getOffset(uInt colnr, rownr_t rownr)
Get access to the offset of the data for given column and row.
ISMBucket(const ISMBucket &)=delete
Forbid copy constructor.
uInt dataLeng_p
The size (in bytes) of the data.
Definition ISMBucket.h:296
void addData(uInt colnr, rownr_t rownr, uInt index, const char *data, uInt leng)
Add the data to the data part.
void write(char *bucketStorage) const
Write the bucket into the storage.
void show(ostream &os) const
Show the layout of the bucket.
uInt insertData(const char *data, uInt leng)
Insert a data value by appending it to the end.
static void deleteCallBack(void *, char *bucket)
Callback function when BucketCache removes a bucket from the cache.
uInt getLength(uInt fixedLength, const char *data) const
Get the length of the data value.
void replaceData(uInt &offset, const char *data, uInt newLeng, uInt fixedLength)
Replace a data item.
uInt indexLeng_p
The size (in bytes) of the index.
Definition ISMBucket.h:298
static void writeCallBack(void *owner, char *bucketStorage, const char *bucket)
Callback function when BucketCache writes a bucket.
Bool canAddData(uInt leng) const
Is the bucket large enough to add a value?
PtrBlock< Block< rownr_t > * > rowIndex_p
The row index per column; each index contains the row number of each value stored in the bucket (for ...
Definition ISMBucket.h:301
rownr_t split(ISMBucket *&left, ISMBucket *&right, Block< Bool > &duplicated, rownr_t bucketStartRow, rownr_t bucketNrrow, uInt colnr, rownr_t rownr, uInt lengToAdd)
Split the bucket in the middle.
uInt getSplit(uInt totLeng, const Block< uInt > &rowLeng, const Block< uInt > &cumLeng)
Return the index where the bucket should be split to get two parts with almost identical length.
ISMBucket(ISMBase *parent, const char *bucketStorage)
Create a bucket with the given parent.
A drop-in replacement for Block<T*>.
Definition Block.h:812
this file contains all the compiler specific defines
Definition mainpage.dox:28
unsigned int uInt
Definition aipstype.h:49
bool Bool
Define the standard types used by Casacore.
Definition aipstype.h:40
uInt64 rownr_t
Define the type of a row number in a table.
Definition aipsxtype.h:44