casacore
IncrementalStMan.h
Go to the documentation of this file.
1 //# IncrementalStMan.h: The Incremental Storage Manager
2 //# Copyright (C) 1996,1997,1999
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //# $Id$
27 
28 #ifndef TABLES_INCREMENTALSTMAN_H
29 #define TABLES_INCREMENTALSTMAN_H
30 
31 //# Includes
32 #include <casacore/casa/aips.h>
33 #include <casacore/tables/DataMan/ISMBase.h>
34 
35 
36 namespace casacore { //# NAMESPACE CASACORE - BEGIN
37 
38 // <summary>
39 // The Incremental Storage Manager
40 // </summary>
41 
42 // <use visibility=export>
43 
44 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="tIncrementalStMan.cc">
45 // </reviewed>
46 
47 // <prerequisite>
48 //# Classes you should understand before using this one.
49 // <li> The Table Data Managers concept as described in module file
50 // <linkto module="Tables:Data Managers">Tables.h</linkto>
51 // <li> <linkto class=ROIncrementalStManAccessor>
52 // ROIncrementalStManAccessor</linkto>
53 // for a discussion of the cache size
54 // </prerequisite>
55 
56 // <etymology>
57 // IncrementalStMan is the data manager storing values in an incremental way
58 // (similar to an incremental backup). A value is only stored when it
59 // differs from the previous value.
60 // </etymology>
61 
62 // <synopsis>
63 // IncrementalStMan stores the data in a way that a value is only stored
64 // when it is different from the value in the previous row. This storage
65 // manager is very well suited for columns with slowly changing values,
66 // because the resulting file can be much smaller. It is not suited at
67 // all for columns with continuously changing data.
68 // <p>
69 // In general it can be advantageous to use this storage manager when
70 // a value changes at most every 4 rows (although it depends on the length
71 // of the data values themselves). The following simple example
72 // shows the approximate savings that can be achieved when storing a column
73 // with double values changing every CH rows.
74 // <srcblock>
75 // #rows CH normal length ISM length compress ratio
76 // 50000 5 4000000 1606000 2.5
77 // 50000 50 4000000 164000 24.5
78 // 50000 500 4000000 32800 122
79 // </srcblock>
80 // There is a special test program <src>nISMBucket</src> in the Tables module
81 // doing a simple, but usually adequate, simulation of the amount of
82 // storage needed for a scenario.
83 // <p>
84 // IncrementalStMan stores the values (and associated indices) in
85 // fixed-length buckets. A <linkto class=BucketCache>BucketCache</linkto>
86 // object is used to read/write
87 // the buckets. The default cache size is 1 bucket (which is fine for
88 // sequential access), but for random access it can make sense to
89 // increase the size of the cache. This can be done using
90 // the class <linkto class=ROIncrementalStManAccessor>
91 // ROIncrementalStManAccessor</linkto>.
92 // <p>
93 // The IncrementalStMan can hold values of any standard data type (thus
94 // from Bool to String). It can handle scalars, direct and indirect
95 // arrays. It can support an arbitrary number of columns. The values in
96 // each of them can vary at its own speed.
97 // <br>
98 // A bucket contains the values of several consecutive rows.
99 // At the beginning of a bucket the values of the starting row of all
100 // columns for this storage manager are repeated. In this way the value
101 // of a cell can always be found in the bucket and no references
102 // to previous buckets are needed.
103 // <br>A bucket should be big enough to hold all starting values and
104 // a reasonable number of other values. As a rule of thumb it should be
105 // big enough to hold at least 100 values of each column. In general the
106 // default bucket size will do. Only in special cases (e.g. when storing
107 // large variable length strings) the bucket size should be set explicitly.
108 // Giving a zero bucket size means that a suitale default bucket size
109 // will be calculated.
110 // <br>
111 // When a table is filled sequentially each bucket can be filled as
112 // much as possible. When writing in a random way, buckets can contain
113 // some unused space, because a bucket in the middle of the file
114 // has to be split when a new value has to be put in it.
115 // <p>
116 // Each column in the IncrementalStMan has the following properties to
117 // achieve the "store-different-values-only" behaviour.
118 // <ul>
119 // <li> When a row is not explicitly put, it has the same value as the
120 // previous row.
121 // The first row gets the standard undefined values when not put.
122 // The order of put's and addRow's is not important.
123 // <br>E.g. when a table has N rows and row N and the following M rows
124 // have the same value, the following schematic code has the same effect:
125 // <br><src> add 1 row; put value in row N; add M rows;</src>
126 // <br><src> add M+1 rows; put value in row N;</src>
127 // <li> When putting a scalar or direct array, it is tested if it matches
128 // the previous row. If so, it is not stored again.
129 // This test is not done for indirect arrays, because those can
130 // be (very) big and it would be too time-consuming. So the only
131 // way to save space for indirect arrays is by not putting them
132 // as explained in the previous item.
133 // <li> For indirect arrays the buckets contain a pointer only. The
134 // arrays themselves are stored in a separate file.
135 // <li> When a value of an existing row is updated, only that one row is
136 // updated. The next row(s) keep their value, even if it was
137 // shared with the row being updated.
138 // <br>For scalars and direct arrays it will be tested if the
139 // new value matches the value in the previous and/or next row.
140 // If so, those rows will be combined to save storage.
141 // <li> The IncrementalStMan is optimized for sequential access to a table.
142 // <br>- A bucket is accessed only once, because a bucket contains
143 // consecutive rows.
144 // <br>- For each column a copy is kept of the last value read.
145 // So the value for the next rows (with that same value)
146 // is immediately available.
147 // <br>For random access the performance can be improved by setting
148 // the cache size using class
149 // <linkto class=ROIncrementalStManAccessor>
150 // ROIncrementalStManAccessor</linkto>.
151 // </ul>
152 //
153 // <note>This class contains many public functions which are only used
154 // by other ISM classes. The only useful function for the user is the
155 // constructor.
156 // </note>
157 
158 // <motivation>
159 // IncrementalStMan can save a lot of storage space.
160 // Unlike the old StManMirAIO it stores the values directly in the
161 // file to save on memory usage.
162 // </motivation>
163 
164 // <example>
165 // This example shows how to create a table and how to attach
166 // the storage manager to some columns.
167 // <srcblock>
168 // SetupNewTable newtab("name.data", tableDesc, Table::New);
169 // IncrementalStMan stman; // define storage manager
170 // newtab.bindColumn ("column1", stman); // bind column to st.man.
171 // newtab.bindColumn ("column2", stman); // bind column to st.man.
172 // Table tab(newtab); // actually create table
173 // </srcblock>
174 // </example>
175 
176 //# <todo asof="$DATE:$">
177 //# A List of bugs, limitations, extensions or planned refinements.
178 //# </todo>
179 
180 
181 class IncrementalStMan : public ISMBase
182 {
183 public:
184  // Create an incremental storage manager with the given name.
185  // If no name is used, it is set to an empty string.
186  // The name can be used to construct a
187  // <linkto class=ROIncrementalStManAccessor>ROIncrementalStManAccessor
188  // </linkto> object (e.g. to set the cache size).
189  // <br>
190  // The bucket size has to be given in bytes and the cache size in buckets.
191  // Bucket size 0 means that the storage manager will set the bucket
192  // size such that it can contain about 100 rows
193  // (with a minimum size of 32768 bytes). However, if that results
194  // in a very large bucket size (>327680) it'll make it smaller.
195  // Note it uses 32 bytes for the size of variable length strings,
196  // so this heuristic may fail when a column contains large strings.
197  // When <src>checkBucketSize</src> is set and Bucket size > 0
198  // the storage manager throws an exception
199  // when the size is too small to hold the values of at least 2 rows.
200  // For this check it uses 0 for the length of variable length strings.
201  // <group>
203  Bool checkBucketSize = True,
204  uInt cacheSize = 1);
206  uInt bucketSize = 0,
207  Bool checkBucketSize = True,
208  uInt cacheSize = 1);
209  // </group>
210 
212 
213 private:
214  // Copy constructor cannot be used.
216 
217  // Assignment cannot be used.
219 };
220 
221 
222 
223 } //# NAMESPACE CASACORE - END
224 
225 #endif
uInt bucketSize() const
Get the bucket size (in bytes).
Definition: ISMBase.h:407
virtual String dataManagerName() const
Get the name given to the storage manager (in the constructor).
uInt cacheSize() const
Get the current cache size (in buckets).
Definition: ISMBase.h:392
IncrementalStMan(const IncrementalStMan &that)
Copy constructor cannot be used.
IncrementalStMan & operator=(const IncrementalStMan &that)
Assignment cannot be used.
IncrementalStMan(uInt bucketSize=0, Bool checkBucketSize=True, uInt cacheSize=1)
Create an incremental storage manager with the given name.
IncrementalStMan(const String &dataManagerName, uInt bucketSize=0, Bool checkBucketSize=True, uInt cacheSize=1)
String: the storage and methods of handling collections of characters.
Definition: String.h:225
this file contains all the compiler specific defines
Definition: mainpage.dox:28
unsigned int uInt
Definition: aipstype.h:51
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
const Bool True
Definition: aipstype.h:43