casacore
ISMBucket.h
Go to the documentation of this file.
1 //# ISMBucket.h: A bucket in the Incremental Storage Manager
2 //# Copyright (C) 1996,1999,2000,2001
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //# $Id$
27 
28 #ifndef TABLES_ISMBUCKET_H
29 #define TABLES_ISMBUCKET_H
30 
31 //# Includes
32 #include <casacore/casa/aips.h>
33 #include <casacore/casa/Containers/Block.h>
34 #include <casacore/casa/BasicSL/String.h>
35 #include <casacore/casa/iosfwd.h>
36 
37 namespace casacore { //# NAMESPACE CASACORE - BEGIN
38 
39 //# Forward declarations
40 class ISMBase;
41 
42 // <summary>
43 // A bucket in the Incremental Storage Manager
44 // </summary>
45 
46 // <use visibility=local>
47 
48 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="">
49 // </reviewed>
50 
51 // <prerequisite>
52 //# Classes you should understand before using this one.
53 // <li> <linkto class=IncrementalStMan>IncrementalStMan</linkto>
54 // <li> <linkto class=BucketCache>BucketCache</linkto>
55 // </prerequisite>
56 
57 // <etymology>
58 // ISMBucket represents a bucket in the Incremental Storage Manager.
59 // </etymology>
60 
61 // <synopsis>
62 // The Incremental Storage Manager uses a <linkto class=BucketCache>
63 // BucketCache</linkto> object to read/write/cache the buckets
64 // containing the data. An <src>ISMBucket</src> object is the
65 // internal representation of the contents of a bucket. <src>ISMBucket</src>
66 // contains static callback functions which are called by
67 // <src>BucketCache</src> when reading/writing a bucket. These callback
68 // functions do the mapping of bucket data to <src>ISMBucket</src> object
69 // and vice-versa.
70 // <p>
71 // A bucket contains the values of several rows
72 // of all columns bound to this Incremental Storage Manager.
73 // A bucket is split into a data part and an index part.
74 // Each part has an arbitrary length but together they do not exceed
75 // the fixed bucket length.
76 // <p>
77 // The beginning of the data part contains the values of all columns
78 // bound. The remainder of the data part contains the values of
79 // the rows/columns with a changed value.
80 // <br>
81 // The index part contains an index per column. Each index contains the
82 // row number and an offset for a row with a stored value. The row numbers
83 // are relative to the beginning of the bucket, so the bucket has
84 // no knowledge about the absolute row numbers. In this way deletion of
85 // rows is much simpler.
86 // <p>
87 // The contents of a bucket looks like:
88 // <srcblock>
89 // -------------------------------------------------------------------
90 // | index offset | data part | index part | free |
91 // -------------------------------------------------------------------
92 // 0 4 4+length(data part)
93 // <--------------------------bucketsize----------------------------->
94 // </srcblock>
95 // The data part contains all data value belonging to the bucket.
96 // The index part contains for each column the following data:
97 // <srcblock>
98 // -----------------------------------------------------------------------
99 // | #values stored | row numbers of values | offset in data part of |
100 // | for column i | stored for column i | values stored for column i |
101 // -----------------------------------------------------------------------
102 // 0 4 4+4*nrval
103 // </srcblock>
104 // Note that the row numbers in the bucket start at 0, thus are relative
105 // to the beginning of the bucket. The main index kept in
106 // <linkto class=ISMIndex>ISMIndex</linkto> knows the starting row of
107 // each bucket. In this way bucket splitting and especially row removal
108 // is much easier.
109 // <p>
110 // The bucket can be stored in canonical or local (i.e. native) data format.
111 // When a bucket is read into memory, its data are read, converted, and
112 // stored in the ISMBucket object. When flushed, the contents are
113 // written. ISMBucket takes care that the values stored in its object
114 // do not exceed the size of the bucket. When full, the user can call
115 // a function to split it into a left and right bucket. When the new
116 // value has to be written at the end, the split merely consist of
117 // creating a new bucket. In any case, care is taken that a row is
118 // not split. Thus a row is always entirely contained in one bucket.
119 // <p>
120 // Class <linkto class=ISMColumn>ISMColumn</linkto> does the actual
121 // writing of data in a bucket and uses the relevant ISMBucket functions.
122 
123 // <motivation>
124 // ISMBucket encapsulates the data of a bucket.
125 // </motivation>
126 
127 //# <todo asof="$DATE:$">
128 //# A List of bugs, limitations, extensions or planned refinements.
129 //# </todo>
130 
131 
133 {
134 public:
135 
136  // Create a bucket with the given parent.
137  // When <src>bucketStorage</src> is non-zero, reconstruct the
138  // object from it.
139  // It keeps the pointer to its parent (but does not own it).
140  ISMBucket (ISMBase* parent, const char* bucketStorage);
141 
143 
144  // Get the row-interval for given column and row.
145  // It sets the start and end of the interval to which the row belongs
146  // and the offset of its current value.
147  // It returns the index where the row number can be put in the
148  // bucket index.
149  uInt getInterval (uInt colnr, rownr_t rownr, rownr_t bucketNrrow,
150  rownr_t& start, rownr_t& end, uInt& offset) const;
151 
152  // Is the bucket large enough to add a value?
153  Bool canAddData (uInt leng) const;
154 
155  // Add the data to the data part.
156  // It updates the bucket index at the given index.
157  // An exception is thrown if the bucket is too small.
158  void addData (uInt colnr, rownr_t rownr, uInt index,
159  const char* data, uInt leng);
160 
161  // Is the bucket large enough to replace a value?
162  Bool canReplaceData (uInt newLeng, uInt oldLeng) const;
163 
164  // Replace a data item.
165  // When its length is variable (indicated by fixedLength=0), the old
166  // value will be removed and the new one appended at the end.
167  // An exception is thrown if the bucket is too small.
168  void replaceData (uInt& offset, const char* data, uInt newLeng,
169  uInt fixedLength);
170 
171  // Get a pointer to the data for the given offset.
172  const char* get (uInt offset) const;
173 
174  // Get the length of the data value.
175  // It is <src>fixedLength</src> when non-zero,
176  // otherwise read it from the data value.
177  uInt getLength (uInt fixedLength, const char* data) const;
178 
179  // Get access to the offset of the data for given column and row.
180  // It allows to change it (used for example by replaceData).
181  uInt& getOffset (uInt colnr, rownr_t rownr);
182 
183  // Get access to the index information for the given column.
184  // This is used by ISMColumn when putting the data.
185  // <group>
186  // Return the row numbers with a stored value.
187  Block<rownr_t>& rowIndex (uInt colnr);
188  // Return the offsets of the values stored in the data part.
189  Block<uInt>& offIndex (uInt colnr);
190  // Return the number of values stored.
191  uInt& indexUsed (uInt colnr);
192  // </group>
193 
194  // Split the bucket in the middle.
195  // It returns the row number where the bucket was split and the
196  // new left and right bucket. The caller is responsible for
197  // deleting the newly created buckets.
198  // When possible a simple split is done.
199  // <br>
200  // The starting values in the right bucket may be copies of the
201  // values in the left bucket. The duplicated Block contains a switch
202  // per column indicating if the value is copied.
203  rownr_t split (ISMBucket*& left, ISMBucket*& right, Block<Bool>& duplicated,
204  rownr_t bucketStartRow, rownr_t bucketNrrow,
205  uInt colnr, rownr_t rownr, uInt lengToAdd);
206 
207  // Determine whether a simple split is possible. If so, do it.
208  // This is possible if the new row is at the end of the last bucket,
209  // which will often be the case.
210  // <br>A simple split means adding a new bucket for the new row.
211  // If the old bucket already contains values for that row, those
212  // values are moved to the new bucket.
213  // <br>This fuction is only called by split, which created the
214  // left and right bucket.
216  Block<Bool>& duplicated,
217  rownr_t& splitRownr, rownr_t rownr);
218 
219  // Return the index where the bucket should be split to get
220  // two parts with almost identical length.
221  uInt getSplit (uInt totLeng, const Block<uInt>& rowLeng,
222  const Block<uInt>& cumLeng);
223 
224  // Remove <src>nr</src> items from data and index part by shifting
225  // to the left. The <src>rowIndex</src>, <src>offIndex</src>, and
226  // <src>nused</src> get updated. The caller is responsible for
227  // removing data when needed (e.g. <src>ISMIndColumn</src> removes
228  // the indirect arrays from its file).
230  Block<uInt>& offIndex, uInt& nused, uInt leng);
231 
232  // Copy the contents of that bucket to this bucket.
233  // This is used after a split operation.
234  void copy (const ISMBucket& that);
235 
236  // Callback function when BucketCache reads a bucket.
237  // It creates an ISMBucket object and converts the raw bucketStorage
238  // to that object.
239  // It returns the pointer to ISMBucket object which gets part of the cache.
240  // The object gets deleted by the deleteCallBack function.
241  static char* readCallBack (void* owner, const char* bucketStorage);
242 
243  // Callback function when BucketCache writes a bucket.
244  // It converts the ISMBucket bucket object to the raw bucketStorage.
245  static void writeCallBack (void* owner, char* bucketStorage,
246  const char* bucket);
247 
248  // Callback function when BucketCache adds a new bucket to the data file.
249  // This function creates an empty ISMBucket object.
250  // It returns the pointer to ISMBucket object which gets part of the cache.
251  // The object gets deleted by the deleteCallBack function.
252  static char* initCallBack (void* owner);
253 
254  // Callback function when BucketCache removes a bucket from the cache.
255  // This function dletes the ISMBucket bucket object.
256  static void deleteCallBack (void*, char* bucket);
257 
258  // Show the layout of the bucket.
259  void show (ostream& os) const;
260 
261  // Check that there are no repeated rowIds in the bucket
262  Bool check (uInt& offendingCol, uInt& offendingIndex,
263  rownr_t& offendingRow, rownr_t& offendingPrevRow) const;
264 
265 private:
266  // Forbid copy constructor.
268 
269  // Forbid assignment.
271 
272  // Remove a data item with the given length.
273  // If the length is zero, its variable length is read first.
274  void removeData (uInt offset, uInt leng);
275 
276  // Insert a data value by appending it to the end.
277  // It returns the offset of the data value.
278  uInt insertData (const char* data, uInt leng);
279 
280  // Copy a data item from this bucket to the other bucket.
281  uInt copyData (ISMBucket& other, uInt colnr, rownr_t toRownr,
282  uInt fromIndex, uInt toIndex) const;
283 
284  // Read the data from the storage into this bucket.
285  void read (const char* bucketStorage);
286 
287  // Write the bucket into the storage.
288  void write (char* bucketStorage) const;
289 
290 
291  //# Declare member variables.
292  // Pointer to the parent storage manager.
294  // The size (in bytes) of an uInt and rownr_t (used in index, etc.).
297  // The size (in bytes) of the data.
299  // The size (in bytes) of the index.
301  // The row index per column; each index contains the row number
302  // of each value stored in the bucket (for that column).
304  // The offset index per column; each index contains the offset (in bytes)
305  // of each value stored in the bucket (for that column).
307  // Nr of used elements in each index; i.e. the number of stored values
308  // per column.
310  // The data space (in external (e.g. canonical) format).
311  char* data_p;
312 };
313 
314 
315 inline const char* ISMBucket::get (uInt offset) const
316 {
317  return data_p + offset;
318 }
320 {
321  return *(rowIndex_p[colnr]);
322 }
324 {
325  return *(offIndex_p[colnr]);
326 }
328 {
329  return indexUsed_p[colnr];
330 }
331 
332 
333 
334 } //# NAMESPACE CASACORE - END
335 
336 #endif
void read(const char *bucketStorage)
Read the data from the storage into this bucket.
uInt & indexUsed(uInt colnr)
Return the number of values stored.
Definition: ISMBucket.h:327
ISMBase * stmanPtr_p
Pointer to the parent storage manager.
Definition: ISMBucket.h:293
const char * get(uInt offset) const
Get a pointer to the data for the given offset.
Definition: ISMBucket.h:315
Block< rownr_t > & rowIndex(uInt colnr)
Get access to the index information for the given column.
Definition: ISMBucket.h:319
uInt uIntSize_p
The size (in bytes) of an uInt and rownr_t (used in index, etc.).
Definition: ISMBucket.h:295
uInt getInterval(uInt colnr, rownr_t rownr, rownr_t bucketNrrow, rownr_t &start, rownr_t &end, uInt &offset) const
Get the row-interval for given column and row.
void removeData(uInt offset, uInt leng)
Remove a data item with the given length.
Block< uInt > indexUsed_p
Nr of used elements in each index; i.e.
Definition: ISMBucket.h:309
uInt copyData(ISMBucket &other, uInt colnr, rownr_t toRownr, uInt fromIndex, uInt toIndex) const
Copy a data item from this bucket to the other bucket.
Bool canReplaceData(uInt newLeng, uInt oldLeng) const
Is the bucket large enough to replace a value?
PtrBlock< Block< uInt > * > offIndex_p
The offset index per column; each index contains the offset (in bytes) of each value stored in the bu...
Definition: ISMBucket.h:306
Bool check(uInt &offendingCol, uInt &offendingIndex, rownr_t &offendingRow, rownr_t &offendingPrevRow) const
Check that there are no repeated rowIds in the bucket.
Block< uInt > & offIndex(uInt colnr)
Return the offsets of the values stored in the data part.
Definition: ISMBucket.h:323
void copy(const ISMBucket &that)
Copy the contents of that bucket to this bucket.
char * data_p
The data space (in external (e.g.
Definition: ISMBucket.h:311
ISMBucket(const ISMBucket &)
Forbid copy constructor.
void shiftLeft(uInt index, uInt nr, Block< rownr_t > &rowIndex, Block< uInt > &offIndex, uInt &nused, uInt leng)
Remove nr items from data and index part by shifting to the left.
Bool simpleSplit(ISMBucket *left, ISMBucket *right, Block< Bool > &duplicated, rownr_t &splitRownr, rownr_t rownr)
Determine whether a simple split is possible.
uInt & getOffset(uInt colnr, rownr_t rownr)
Get access to the offset of the data for given column and row.
uInt dataLeng_p
The size (in bytes) of the data.
Definition: ISMBucket.h:298
void addData(uInt colnr, rownr_t rownr, uInt index, const char *data, uInt leng)
Add the data to the data part.
void write(char *bucketStorage) const
Write the bucket into the storage.
void show(ostream &os) const
Show the layout of the bucket.
uInt insertData(const char *data, uInt leng)
Insert a data value by appending it to the end.
static void deleteCallBack(void *, char *bucket)
Callback function when BucketCache removes a bucket from the cache.
static char * initCallBack(void *owner)
Callback function when BucketCache adds a new bucket to the data file.
uInt getLength(uInt fixedLength, const char *data) const
Get the length of the data value.
void replaceData(uInt &offset, const char *data, uInt newLeng, uInt fixedLength)
Replace a data item.
uInt indexLeng_p
The size (in bytes) of the index.
Definition: ISMBucket.h:300
static void writeCallBack(void *owner, char *bucketStorage, const char *bucket)
Callback function when BucketCache writes a bucket.
Bool canAddData(uInt leng) const
Is the bucket large enough to add a value?
ISMBucket & operator=(const ISMBucket &)
Forbid assignment.
static char * readCallBack(void *owner, const char *bucketStorage)
Callback function when BucketCache reads a bucket.
PtrBlock< Block< rownr_t > * > rowIndex_p
The row index per column; each index contains the row number of each value stored in the bucket (for ...
Definition: ISMBucket.h:303
rownr_t split(ISMBucket *&left, ISMBucket *&right, Block< Bool > &duplicated, rownr_t bucketStartRow, rownr_t bucketNrrow, uInt colnr, rownr_t rownr, uInt lengToAdd)
Split the bucket in the middle.
uInt getSplit(uInt totLeng, const Block< uInt > &rowLeng, const Block< uInt > &cumLeng)
Return the index where the bucket should be split to get two parts with almost identical length.
ISMBucket(ISMBase *parent, const char *bucketStorage)
Create a bucket with the given parent.
A drop-in replacement for Block<T*>.
Definition: Block.h:814
this file contains all the compiler specific defines
Definition: mainpage.dox:28
unsigned int uInt
Definition: aipstype.h:51
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
uInt64 rownr_t
Define the type of a row number in a table.
Definition: aipsxtype.h:46