casacore
SSMBase.h
Go to the documentation of this file.
1 //# SSMBase.h: Base class of the Standard Storage Manager
2 //# Copyright (C) 2000,2001,2002
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //# $Id$
27 
28 #ifndef TABLES_SSMBASE_H
29 #define TABLES_SSMBASE_H
30 
31 
32 //# Includes
33 #include <casacore/casa/aips.h>
34 #include <casacore/tables/DataMan/DataManager.h>
35 #include <casacore/casa/Containers/Block.h>
36 
37 namespace casacore { //# NAMESPACE CASACORE - BEGIN
38 
39 //# Forward declarations
40 class BucketCache;
41 class BucketFile;
42 class StManArrayFile;
43 class SSMIndex;
44 class SSMColumn;
45 class SSMStringHandler;
46 
47 // <summary>
48 // Base class of the Standard Storage Manager
49 // </summary>
50 
51 // <use visibility=local>
52 
53 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="tStandardStMan.cc">
54 // </reviewed>
55 
56 // <prerequisite>
57 //# Classes you should understand before using this one.
58 // <li> <linkto class=StandardStMan>StandardStMan</linkto>
59 // <li> <linkto class=SSMColumn>SSMColumn</linkto>
60 // </prerequisite>
61 
62 // <etymology>
63 // SSMBase is the base class of the Standard Storage Manager.
64 // </etymology>
65 
66 // <synopsis>
67 // The global principles of this class are described in
68 // <linkto class="StandardStMan:description">StandardStMan</linkto>.
69 // <p>
70 // The Standard Storage Manager divides the data file in equally sized
71 // chunks called buckets. There are 3 types of buckets:
72 // <ul>
73 // <li> Data buckets containing the fixed length data (scalars and
74 // direct arrays of data type Int, Float, Bool, etc.).
75 // For variable shaped data (strings and indirect arrays) they
76 // contain references to the actual data position in the
77 // string buckets or in an external file.
78 // <li> String buckets containing strings and array of strings.
79 // <li> Index buckets containing the index info for the data buckets.
80 // </ul>
81 // Bucket access is handled by class
82 // <linkto class=BucketCache>BucketCache</linkto>.
83 // It also keeps a list of free buckets. A bucket is freed when it is
84 // not needed anymore (e.g. all data from it are deleted).
85 // <p>
86 // Data buckets form the main part of the SSM. The data can be viewed as
87 // a few streams of buckets, where each stream contains the data of
88 // a given number of columns. Each stream has an
89 // <linkto class=SSMIndex>SSMIndex</linkto> object describing the
90 // number of rows stored in each data bucket of the stream.
91 // The SSM starts with a single bucket stream (holding all columns),
92 // but when columns are added, new bucket streams might be created.
93 // <p>
94 // For example, we have an SSM with a bucket size of 100 bytes.
95 // There are 5 Int columns (A,B,C,D,E) each taking 4 bytes per row.
96 // Column A, B, C, and D are stored in bucket stream 1, while column
97 // E is stored in bucket stream 2. So in stream 1 each bucket can hold
98 // 6 rows, while in stream 2 each bucket can hold 25 rows.
99 // For a 100 row table it will result in 17+4 data buckets.
100 // <p>
101 // A few classes collaborate to make it work:
102 // <ul>
103 // <li> Each bucket stream has an <linkto class=SSMIndex>SSMIndex</linkto>
104 // object to map row number to bucket number.
105 // Note that in principle each bucket in a stream contains the same
106 // number of rows. However, when a row is deleted it is removed
107 // from its bucket shifting the remainder to the left. Data in the
108 // next buckets is not shifted, so that bucket has now one row less.
109 // <li> For each column SSMBase knows to which bucket stream it belongs
110 // and at which offset the column starts in a bucket.
111 // Note that column data in a bucket are adjacent, which is done
112 // to make it easier to use the
113 // <linkto class=ColumnCache>ColumnCache</linkto> object in SSMColumn
114 // and to be able to efficiently store Bool values as bits.
115 // <li> Each column has an <linkto class=SSMColumn>SSMColumn</linkto>
116 // object knowing how many bits each data cell takes in a bucket.
117 // The SSMColumn objects handle all access to data in the columns
118 // (using SSMBase and SSMIndex).
119 // </ul>
120 // <p>
121 // String buckets are used by class
122 // <linkto class=SSMStringHandler>SSMStringHandler</linkto> to
123 // store scalar strings and fixed and variable shaped arrays of strings.
124 // The bucketnr, offset, and length of such string (arrays) are stored
125 // in the data buckets.
126 // <br>
127 // Indirect arrays of other data types are also stored indirectly
128 // and their offset is stored in the data buckets. Such arrays are
129 // handled by class <linkto class=StIndArray>StIndArray</linkto>
130 // which uses an extra file to store the arrays.
131 // <p>
132 // Index buckets are used by SSMBase to make the SSMIndex data persistent.
133 // It uses alternately 2 sets of index buckets. In that way there is
134 // always an index availanle in case the system crashes.
135 // If possible 2 halfs of a single bucket are used alternately, otherwise
136 // separate buckets are used.
137 // </synopsis>
138 
139 // <motivation>
140 // The public interface of SSMBase is quite large, because the other
141 // internal SSM classes need these functions. To have a class with a
142 // minimal interface for the normal user, class <src>StandardStMan</src>
143 // is derived from it.
144 // <br>StandardStMan needs an isA- instead of hasA-relation to be
145 // able to bind columns to it in class <linkto class=SetupNewTable>
146 // SetupNewTable</linkto>.
147 // </motivation>
148 
149 // <todo asof="$DATE:$">
150 //# A List of bugs, limitations, extensions or planned refinements.
151 // <li> Remove AipsIO argument from open and close.
152 // <li> When only 1 bucket in use addcolumn can check if there's enough
153 // room to fit the new column (so rearange the bucket) in the free
154 // row space.
155 // </todo>
156 
157 
158 class SSMBase: public DataManager
159 {
160 public:
161  // Create a Standard storage manager with default name SSM.
162  explicit SSMBase (Int aBucketSize=0,
163  uInt aCacheSize=1);
164 
165  // Create a Standard storage manager with the given name.
166  explicit SSMBase (const String& aDataManName,
167  Int aBucketSize=0,
168  uInt aCacheSize=1);
169 
170  // Create a Standard storage manager with the given name.
171  // The specifications are part of the record (as created by dataManagerSpec).
172  SSMBase (const String& aDataManName,
173  const Record& spec);
174 
176 
177  // Clone this object.
178  // It does not clone SSMColumn objects possibly used.
179  // The caller has to delete the newly created object.
180  virtual DataManager* clone() const;
181 
182  // Get the type name of the data manager (i.e. StandardStMan).
183  virtual String dataManagerType() const;
184 
185  // Get the name given to the storage manager (in the constructor).
186  virtual String dataManagerName() const;
187 
188  // Record a record containing data manager specifications.
189  virtual Record dataManagerSpec() const;
190 
191  // Get data manager properties that can be modified.
192  // It is only ActualCacheSize (the actual cache size in buckets).
193  // It is a subset of the data manager specification.
194  virtual Record getProperties() const;
195 
196  // Modify data manager properties.
197  // Only MaxCacheSize can be used. It is similar to function setCacheSize
198  // with <src>canExceedNrBuckets=False</src>.
199  virtual void setProperties (const Record& spec);
200 
201  // Get the version of the class.
202  uInt getVersion() const;
203 
204  // Set the cache size (in buckets).
205  // If <src>canExceedNrBuckets=True</src>, the given cache size can be
206  // larger than the nr of buckets in the file. In this way the cache can
207  // be made large enough for a future file extension.
208  // Otherwise, it is limited to the actual number of buckets. This is useful
209  // if one wants the entire file to be cached.
210  void setCacheSize (uInt aCacheSize, Bool canExceedNrBuckets=True);
211 
212  // Get the current cache size (in buckets).
213  uInt getCacheSize() const;
214 
215  // Clear the cache used by this storage manager.
216  // It will flush the cache as needed and remove all buckets from it.
217  void clearCache();
218 
219  // Show the statistics of all caches used.
220  virtual void showCacheStatistics (ostream& anOs) const;
221 
222  // Show statistics of all indices used.
223  void showIndexStatistics (ostream & anOs) const;
224 
225  // Show statistics of the Base offsets/index etc.
226  void showBaseStatistics (ostream & anOs) const;
227 
228  // Get the bucket size.
229  uInt getBucketSize() const;
230 
231  // Get the number of rows in this storage manager.
232  rownr_t getNRow() const;
233 
234  // The storage manager can add rows.
235  virtual Bool canAddRow() const;
236 
237  // The storage manager can delete rows.
238  virtual Bool canRemoveRow() const;
239 
240  // The storage manager can add columns.
241  virtual Bool canAddColumn() const;
242 
243  // The storage manager can delete columns.
244  virtual Bool canRemoveColumn() const;
245 
246  // Make the object from the type name string.
247  // This function gets registered in the DataManager "constructor" map.
248  // The caller has to delete the object.
249  static DataManager* makeObject (const String& aDataManType,
250  const Record& spec);
251 
252  // Get access to the given column.
253  SSMColumn& getColumn (uInt aColNr);
254 
255  // Get access to the given Index.
256  SSMIndex& getIndex (uInt anIdxNr);
257 
258  // Make the current bucket in the cache dirty (i.e. something has been
259  // changed in it and it needs to be written when removed from the cache).
260  // (used by SSMColumn::putValue).
262 
263  // Open (if needed) the file for indirect arrays with the given mode.
264  // Return a pointer to the object.
266 
267  // Find the bucket containing the column and row and return the pointer
268  // to the beginning of the column data in that bucket.
269  // It also fills in the start and end row for the column data.
270  char* find (rownr_t aRowNr, uInt aColNr,
271  rownr_t& aStartRow, rownr_t& anEndRow,
272  const String& colName);
273 
274  // Add a new bucket and get its bucket number.
276 
277  // Read the bucket (if needed) and return the pointer to it.
278  char* getBucket (uInt aBucketNr);
279 
280  // Remove a bucket from the bucket cache.
281  void removeBucket (uInt aBucketNr);
282 
283  // Get rows per bucket for the given column.
284  uInt getRowsPerBucket (uInt aColumn) const;
285 
286  // Return a pointer to the (one and only) StringHandler object.
288 
289  // <group>
290  // Callbacks for BucketCache access.
291  static char* readCallBack (void* anOwner, const char* aBucketStorage);
292  static void writeCallBack (void* anOwner, char* aBucketStorage,
293  const char* aBucket);
294  static void deleteCallBack (void*, char* aBucket);
295  static char* initCallBack (void* anOwner);
296  // </group>
297 
298 private:
299  // Copy constructor (only meant for clone function).
300  SSMBase (const SSMBase& that);
301 
302  // Assignment cannot be used.
303  SSMBase& operator= (const SSMBase& that);
304 
305  // (Re)create the index, file, and cache object.
306  // It is used when all rows are deleted from the table.
307  void recreate();
308 
309  // The data manager supports use of MultiFile.
310  virtual Bool hasMultiFileSupport() const;
311 
312  // Flush and optionally fsync the data.
313  // It returns a True status if it had to flush (i.e. if data have changed).
314  virtual Bool flush (AipsIO&, Bool doFsync);
315 
316  // Let the storage manager create files as needed for a new table.
317  // This allows a column with an indirect array to create its file.
318  virtual void create64 (rownr_t aNrRows);
319 
320  // Open the storage manager file for an existing table, read in
321  // the data, and let the SSMColumn objects read their data.
322  virtual rownr_t open64 (rownr_t aRowNr, AipsIO&);
323 
324  // Resync the storage manager with the new file contents.
325  // This is done by clearing the cache.
326  virtual rownr_t resync64 (rownr_t aRowNr);
327 
328  // Reopen the storage manager files for read/write.
329  virtual void reopenRW();
330 
331  // The data manager will be deleted (because all its columns are
332  // requested to be deleted).
333  // So clean up the things needed (e.g. delete files).
334  virtual void deleteManager();
335 
336  // Let the storage manager initialize itself (upon creation).
337  // It determines the bucket size and fills the index.
338  void init();
339 
340  // Determine and set the bucket size.
341  // It returns the number of rows per bucket.
343 
344  // Get the number of indices in use.
345  uInt getNrIndices() const;
346 
347  // Add rows to the storage manager.
348  // Per column it extends number of rows.
349  virtual void addRow64 (rownr_t aNrRows);
350 
351  // Delete a row from all columns.
352  virtual void removeRow64 (rownr_t aRowNr);
353 
354  // Do the final addition of a column.
355  virtual void addColumn (DataManagerColumn*);
356 
357  // Remove a column from the data file.
359 
360  // Create a column in the storage manager on behalf of a table column.
361  // The caller has to delete the newly created object.
362  // <group>
363  // Create a scalar column.
364  virtual DataManagerColumn* makeScalarColumn (const String& aName,
365  int aDataType,
366  const String& aDataTypeID);
367  // Create a direct array column.
368  virtual DataManagerColumn* makeDirArrColumn (const String& aName,
369  int aDataType,
370  const String& aDataTypeID);
371  // Create an indirect array column.
372  virtual DataManagerColumn* makeIndArrColumn (const String& aName,
373  int aDataType,
374  const String& aDataTypeID);
375  // </group>
376 
377  // Get the cache object.
378  // This will construct the cache object if not present yet.
379  // The cache object will be deleted by the destructor.
381 
382  // Construct the cache object (if not constructed yet).
383  void makeCache();
384 
385  // Read the header.
386  void readHeader();
387 
388  // Read the index from its buckets.
390 
391  // Write the header and the indices.
392  void writeIndex();
393 
394 
395  //# Declare member variables.
396  // Name of data manager.
398 
399  // The file containing the indirect arrays.
401 
402  // The number of rows in the columns.
404 
405  // Column offset
407 
408  // Row Index ID containing all the columns in a bucket
410 
411  // Will contain all indices
413 
414  // The cache with the SSM buckets.
416 
417  // The file containing all data.
419 
420  // String handler class
422 
423  // The persistent cache size.
425 
426  // The actual cache size.
428 
429  // The initial number of buckets in the cache.
431 
432  // Nr of buckets needed for index.
434 
435  // Number of the first index bucket
437 
438  // Offset of index in first bucket.
439  // If >0, the index fits in a single bucket.
441 
442  // Number of the first String Bucket
444 
445  // length of index memoryblock
447 
448  // The nr of free buckets.
450 
451  // The first free bucket.
453 
454  // The bucket size.
457 
458  // The assembly of all columns.
460 
461  // Has the data changed since the last flush?
463 };
464 
465 
467 {
468  return itsPtrIndex.nelements();
469 }
470 
472 {
473  return itsCacheSize;
474 }
475 
476 inline rownr_t SSMBase::getNRow() const
477 {
478  return itsNrRows;
479 }
480 
482 {
483  return itsBucketSize;
484 }
485 
487 {
488  if (itsCache == 0) {
489  makeCache();
490  }
491  return *itsCache;
492 }
493 
495 {
496  return *(itsPtrColumn[aColNr]);
497 }
498 
499 inline SSMIndex& SSMBase::getIndex (uInt anIdxNr)
500 {
501  return *(itsPtrIndex[anIdxNr]);
502 }
503 
505 {
506  return itsStringHandler;
507 }
508 
509 
510 
511 } //# NAMESPACE CASACORE - END
512 
513 #endif
Cache for buckets in a part of a file.
Definition: BucketCache.h:218
OpenOption
Define the possible ByteIO open options.
Definition: ByteIO.h:65
Abstract base class for a data manager.
Definition: DataManager.h:221
A drop-in replacement for Block<T*>.
Definition: Block.h:814
SSMIndex & getIndex(uInt anIdxNr)
Get access to the given Index.
Definition: SSMBase.h:499
virtual void create64(rownr_t aNrRows)
Let the storage manager create files as needed for a new table.
static char * readCallBack(void *anOwner, const char *aBucketStorage)
Callbacks for BucketCache access.
SSMStringHandler * getStringHandler()
Return a pointer to the (one and only) StringHandler object.
Definition: SSMBase.h:504
SSMColumn & getColumn(uInt aColNr)
Get access to the given column.
Definition: SSMBase.h:494
void makeCache()
Construct the cache object (if not constructed yet).
uInt itsIndexLength
length of index memoryblock
Definition: SSMBase.h:446
static void deleteCallBack(void *, char *aBucket)
uInt getCacheSize() const
Get the current cache size (in buckets).
Definition: SSMBase.h:471
Int itsLastStringBucket
Number of the first String Bucket.
Definition: SSMBase.h:443
virtual void addRow64(rownr_t aNrRows)
Add rows to the storage manager.
uInt itsPersCacheSize
The persistent cache size.
Definition: SSMBase.h:424
StManArrayFile * openArrayFile(ByteIO::OpenOption anOpt)
Open (if needed) the file for indirect arrays with the given mode.
virtual Record dataManagerSpec() const
Record a record containing data manager specifications.
PtrBlock< SSMIndex * > itsPtrIndex
Will contain all indices.
Definition: SSMBase.h:412
uInt getRowsPerBucket(uInt aColumn) const
Get rows per bucket for the given column.
uInt itsBucketSize
The bucket size.
Definition: SSMBase.h:455
SSMStringHandler * itsStringHandler
String handler class.
Definition: SSMBase.h:421
BucketCache * itsCache
The cache with the SSM buckets.
Definition: SSMBase.h:415
uInt getNewBucket()
Add a new bucket and get its bucket number.
uInt itsNrBuckets
The initial number of buckets in the cache.
Definition: SSMBase.h:430
void recreate()
(Re)create the index, file, and cache object.
char * find(rownr_t aRowNr, uInt aColNr, rownr_t &aStartRow, rownr_t &anEndRow, const String &colName)
Find the bucket containing the column and row and return the pointer to the beginning of the column d...
String itsDataManName
Name of data manager.
Definition: SSMBase.h:397
void init()
Let the storage manager initialize itself (upon creation).
void readHeader()
Read the header.
void showBaseStatistics(ostream &anOs) const
Show statistics of the Base offsets/index etc.
virtual Bool canAddColumn() const
The storage manager can add columns.
uInt itsFreeBucketsNr
The nr of free buckets.
Definition: SSMBase.h:449
Int itsFirstIdxBucket
Number of the first index bucket.
Definition: SSMBase.h:436
BucketFile * itsFile
The file containing all data.
Definition: SSMBase.h:418
Block< uInt > itsColIndexMap
Row Index ID containing all the columns in a bucket.
Definition: SSMBase.h:409
rownr_t getNRow() const
Get the number of rows in this storage manager.
Definition: SSMBase.h:476
Int itsFirstFreeBucket
The first free bucket.
Definition: SSMBase.h:452
void setCacheSize(uInt aCacheSize, Bool canExceedNrBuckets=True)
Set the cache size (in buckets).
virtual Bool hasMultiFileSupport() const
The data manager supports use of MultiFile.
SSMBase(const String &aDataManName, Int aBucketSize=0, uInt aCacheSize=1)
Create a Standard storage manager with the given name.
uInt getVersion() const
Get the version of the class.
virtual Record getProperties() const
Get data manager properties that can be modified.
virtual void setProperties(const Record &spec)
Modify data manager properties.
static void writeCallBack(void *anOwner, char *aBucketStorage, const char *aBucket)
uInt itsIdxBucketOffset
Offset of index in first bucket.
Definition: SSMBase.h:440
void removeBucket(uInt aBucketNr)
Remove a bucket from the bucket cache.
rownr_t itsNrRows
The number of rows in the columns.
Definition: SSMBase.h:403
void showIndexStatistics(ostream &anOs) const
Show statistics of all indices used.
SSMBase(const String &aDataManName, const Record &spec)
Create a Standard storage manager with the given name.
BucketCache & getCache()
Get the cache object.
Definition: SSMBase.h:486
static DataManager * makeObject(const String &aDataManType, const Record &spec)
Make the object from the type name string.
virtual String dataManagerType() const
Get the type name of the data manager (i.e.
virtual void removeRow64(rownr_t aRowNr)
Delete a row from all columns.
uInt getNrIndices() const
Get the number of indices in use.
Definition: SSMBase.h:466
uInt itsCacheSize
The actual cache size.
Definition: SSMBase.h:427
virtual DataManagerColumn * makeIndArrColumn(const String &aName, int aDataType, const String &aDataTypeID)
Create an indirect array column.
void setBucketDirty()
Make the current bucket in the cache dirty (i.e.
virtual DataManagerColumn * makeDirArrColumn(const String &aName, int aDataType, const String &aDataTypeID)
Create a direct array column.
SSMBase(Int aBucketSize=0, uInt aCacheSize=1)
Create a Standard storage manager with default name SSM.
virtual Bool flush(AipsIO &, Bool doFsync)
Flush and optionally fsync the data.
virtual Bool canAddRow() const
The storage manager can add rows.
Block< uInt > itsColumnOffset
Column offset.
Definition: SSMBase.h:406
virtual void reopenRW()
Reopen the storage manager files for read/write.
PtrBlock< SSMColumn * > itsPtrColumn
The assembly of all columns.
Definition: SSMBase.h:459
char * getBucket(uInt aBucketNr)
Read the bucket (if needed) and return the pointer to it.
virtual void showCacheStatistics(ostream &anOs) const
Show the statistics of all caches used.
virtual void deleteManager()
The data manager will be deleted (because all its columns are requested to be deleted).
void readIndexBuckets()
Read the index from its buckets.
virtual void removeColumn(DataManagerColumn *)
Remove a column from the data file.
uInt setBucketSize()
Determine and set the bucket size.
virtual Bool canRemoveRow() const
The storage manager can delete rows.
virtual rownr_t resync64(rownr_t aRowNr)
Resync the storage manager with the new file contents.
virtual String dataManagerName() const
Get the name given to the storage manager (in the constructor).
StManArrayFile * itsIosFile
The file containing the indirect arrays.
Definition: SSMBase.h:400
virtual Bool canRemoveColumn() const
The storage manager can delete columns.
virtual DataManagerColumn * makeScalarColumn(const String &aName, int aDataType, const String &aDataTypeID)
Create a column in the storage manager on behalf of a table column.
uInt getBucketSize() const
Get the bucket size.
Definition: SSMBase.h:481
SSMBase(const SSMBase &that)
Copy constructor (only meant for clone function).
void writeIndex()
Write the header and the indices.
uInt itsNrIdxBuckets
Nr of buckets needed for index.
Definition: SSMBase.h:433
static char * initCallBack(void *anOwner)
Bool isDataChanged
Has the data changed since the last flush?
Definition: SSMBase.h:462
void clearCache()
Clear the cache used by this storage manager.
virtual DataManager * clone() const
Clone this object.
uInt itsBucketRows
Definition: SSMBase.h:456
SSMBase & operator=(const SSMBase &that)
Assignment cannot be used.
virtual rownr_t open64(rownr_t aRowNr, AipsIO &)
Open the storage manager file for an existing table, read in the data, and let the SSMColumn objects ...
virtual void addColumn(DataManagerColumn *)
Do the final addition of a column.
String: the storage and methods of handling collections of characters.
Definition: String.h:225
this file contains all the compiler specific defines
Definition: mainpage.dox:28
unsigned int uInt
Definition: aipstype.h:51
int Int
Definition: aipstype.h:50
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
const Bool True
Definition: aipstype.h:43
uInt64 rownr_t
Define the type of a row number in a table.
Definition: aipsxtype.h:46