casacore
Loading...
Searching...
No Matches
Tables.h
Go to the documentation of this file.
1//# Tables.h: The Tables module - Casacore data storage
2//# Copyright (C) 1994-2010
3//# Associated Universities, Inc. Washington DC, USA.
4//#
5//# This library is free software; you can redistribute it and/or modify it
6//# under the terms of the GNU Library General Public License as published by
7//# the Free Software Foundation; either version 2 of the License, or (at your
8//# option) any later version.
9//#
10//# This library is distributed in the hope that it will be useful, but WITHOUT
11//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13//# License for more details.
14//#
15//# You should have received a copy of the GNU Library General Public License
16//# along with this library; if not, write to the Free Software Foundation,
17//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18//#
19//# Correspondence concerning AIPS++ should be addressed as follows:
20//# Internet email: casa-feedback@nrao.edu.
21//# Postal address: AIPS++ Project Office
22//# National Radio Astronomy Observatory
23//# 520 Edgemont Road
24//# Charlottesville, VA 22903-2475 USA
25
26#ifndef TABLES_TABLES_H
27#define TABLES_TABLES_H
28
29//# Includes
30//# table description
31#include <casacore/casa/aips.h>
32#include <casacore/tables/Tables/TableDesc.h>
33#include <casacore/tables/Tables/ColumnDesc.h>
34#include <casacore/tables/Tables/ScaColDesc.h>
35#include <casacore/tables/Tables/ArrColDesc.h>
36#include <casacore/tables/Tables/ScaRecordColDesc.h>
37
38//# table access
39#include <casacore/tables/Tables/Table.h>
40#include <casacore/tables/Tables/TableLock.h>
41#include <casacore/tables/Tables/SetupNewTab.h>
42#include <casacore/tables/Tables/ScalarColumn.h>
43#include <casacore/tables/Tables/ArrayColumn.h>
44#include <casacore/tables/Tables/TableRow.h>
45#include <casacore/tables/Tables/TableCopy.h>
46#include <casacore/tables/Tables/TableUtil.h>
47#include <casacore/casa/Arrays/Array.h>
48#include <casacore/casa/Arrays/Slicer.h>
49#include <casacore/casa/Arrays/Slice.h>
50
51//# keywords
52#include <casacore/tables/Tables/TableRecord.h>
53#include <casacore/casa/Containers/RecordField.h>
54
55//# table lookup
56#include <casacore/tables/Tables/ColumnsIndex.h>
57#include <casacore/tables/Tables/ColumnsIndexArray.h>
58
59//# table vectors
60#include <casacore/tables/Tables/TableVector.h>
61#include <casacore/tables/Tables/TabVecMath.h>
62#include <casacore/tables/Tables/TabVecLogic.h>
63
64//# data managers
65#include <casacore/tables/DataMan.h>
66
67//# table expressions (for selection of rows)
68#include <casacore/tables/TaQL.h>
69
70
71namespace casacore { //# NAMESPACE CASACORE - BEGIN
72
73// <module>
74
75// <summary>
76// CTDS (Casacore Table Data System) is the data storage mechanism for Casacore
77// </summary>
78
79// <use visibility=export>
80
81// <reviewed reviewer="jhorstko" date="1994/08/30" tests="" demos="">
82// </reviewed>
83
84// <prerequisite>
85// <li> <linkto class="Record:description">Record</linkto> class
86// </prerequisite>
87
88// <etymology>
89// "Table" is a formal term from relational database theory:
90// <em> "The organizing principle in a relational database is the TABLE,
91// a rectangular, row/column arrangement of data values."</em>
92// Casacore tables are extensions to traditional tables, but are similar
93// enough that we use the same name. There is also a strong resemblance
94// between the uses of Casacore tables, and FITS binary tables, which
95// provides another reason to use "Tables" to describe the Casacore data
96// storage mechanism.
97// </etymology>
98
99// <synopsis>
100// Tables are the fundamental storage mechanism for Casacore. This document
101// explains <A HREF="#Tables:motivation">why</A> they had to be made,
102// <A HREF="#Tables:properties">what</A> their properties are, and
103// <A HREF="#Tables:open">how</A> to use them. The last subject is
104// discussed and illustrated in a sequence of sections:
105// <UL>
106// <LI> <A HREF="#Tables:open">opening</A> an existing table,
107// <LI> <A HREF="#Tables:read">reading</A> from a table,
108// <LI> <A HREF="#Tables:creation">creating</A> a new table,
109// <LI> <A HREF="#Tables:write">writing</A> into a table,
110// <LI> <A HREF="#Tables:row-access">accessing rows</A> in a table,
111// <LI> <A HREF="#Tables:select and sort">selection and sorting</A>
112// (see also <A HREF="../notes/199.html">Table Query Language</A>),
113// <LI> <A HREF="#Tables:concatenation">concatenating similar tables</A>
114// <LI> <A HREF="#Tables:iterate">iterating</A> through a table,
115// <LI> <A HREF="#Tables:LockSync">locking/synchronization</A>
116// for concurrent access,
117// <LI> <A HREF="#Tables:KeyLookup">indexing</A> a table for faster lookup,
118// <LI> <A HREF="#Tables:vectors">vector operations</A> on a column.
119// <LI> <A HREF="#Tables:performance">performance and robustness</A>
120// considerations with some information on
121// <A HREF="#Tables:iotracing">IO tracing</A>.
122// </UL>
123// A few <A HREF="Tables:applications">applications</A> exist to inspect
124// and manipulate a table.
125//
126// Several UML diagrams describe the class structure of the Tables module.
127// <ul>
128// <li> <a href="TableOverview.drawio.svg.html">Global overview of Table access</a>.
129// <li> <a href="TableDesc.drawio.svg.html">Table and column descriptions</a>.
130// <li> <a href="TableRecord.drawio.svg.html">Table keywords</a>.
131// <li> <a href="Table.drawio.svg.html">Table class structure</a>.
132// <li> <a href="PlainTable.drawio.svg.html">Detailed PlainTable class structure</a>.
133// <li> <a href="DataManager.drawio.svg.html">DataManagers for storage</a>.
134// </ul>
135
136// <ANCHOR NAME="Tables:motivation">
137// <motivation></ANCHOR>
138//
139// The Casacore tables are mainly based upon the ideas of Allen Farris,
140// as laid out in the
141// <A HREF="http://aips2.cv.nrao.edu/aips++/docs/reference/Database.ps.gz">
142// AIPS++ Database document</A>, from where the following paragraph is taken:
143//
144// <p>
145// Traditional relational database tables have two features that
146// decisively limit their applicability to scientific data. First, an item of
147// data in a column of a table must be atomic -- it must have no internal
148// structure. A consequence of this restriction is that relational
149// databases are unable to deal with arrays of data items. Second, an
150// item of data in a column of a table must not have any direct or
151// implied linkages to other items of data or data aggregates. This
152// restriction makes it difficult to model complex relationships between
153// collections of data. While these restrictions may make it easy to
154// define a mathematically complete set of data manipulation operations,
155// they are simply intolerable in a scientific data-handling context.
156// Multi-dimensional arrays are frequently the most natural modes in
157// which to discuss and think about scientific data. In addition,
158// scientific data often requires complex calibration operations that
159// must draw on large bodies of data about equipment and its performance
160// in various states. The restrictions imposed by the relational model
161// make it very difficult to deal with complex problems of this nature.
162// <p>
163//
164// In response to these limitations, and other needs, the Casacore tables were
165// designed.
166// </motivation>
167
168// <ANCHOR NAME="Tables:properties">
169// <h3>Table Properties</h3></ANCHOR>
170//
171// Casacore tables have the following properties:
172// <ul>
173// <li> A table consists of a number of rows and columns.
174// <A HREF="#Tables:keywords">Keyword/value pairs</A> may be defined
175// for the table as a whole and for individual columns. A keyword/value
176// pair for a column could, for instance, define its unit.
177// <li> Each table has a <A HREF="#Tables:Table Description">description</A>
178// which specifies the number and type of columns, and maybe initial
179// keyword sets and default values for the columns.
180// <li> A cell in a column may contain
181// <UL>
182// <LI> a scalar;
183// <LI> a "direct" array -- which must have the same shape in all
184// cells of a column, is usually small, and is stored in the
185// table itself;
186// <LI> an "indirect" array -- which may have different shapes in
187// different cells of the same column, is arbitrarily large,
188// and is stored in a separate file;
189// </UL>
190// <li> A column may be
191// <UL>
192// <LI> "filled" -- containing actual data, or
193// <LI> "virtual" -- containing a recipe telling how the data will
194// be generated dynamically
195// </UL>
196// <li> Only the standard Casacore data types can be used in filled
197// columns, be they scalars or arrays: Bool, uChar, Short, uShort,
198// Int, uInt, Int64, float, double, Complex, DComplex and String.
199// Furthermore scalars containing
200// <linkto class=TableRecord>record</linkto> values are possible
201// <li> A column can have a default value, which will automatically be stored
202// in a cell of the column, when a row is added to the table.
203// <li> <A HREF="#Tables:Data Managers">Data managers</A> handle the
204// reading, writing and generation of data. Each column in a table can
205// be assigned its own data manager, which allows for optimization of
206// the data storage per column. The choice of data manager determines
207// whether a column is filled or virtual.
208// <li> Table data are stored in a canonical format, so they can be read
209// on any machine. To avoid needless swapping of bytes, the data can
210// be stored in big endian (as used on e.g. SUN) or little endian
211// (as used on Intel PC-s) canonical format.
212// By default it uses the format specified in the aipsrc variable
213// <code>table.endianformat</code> which defaults to
214// <code>Table::LocalEndian</code> (the endian format of the
215// machine being used when creating the table).
216// <li> The SQL-like
217// <a href="../notes/199.html">Table Query Language</a> (TaQL)
218// can be used to do operations on tables like
219// select, sort, update, insert, delete, and create.
220// </ul>
221//
222// Tables can be in one of four forms:
223// <ul>
224// <li> A plain table is a table stored on disk.
225// It can be shared by multiple processes.
226// <li> A memory table is a table held in memory.
227// It is a process specific table, thus not sharable.
228// The <linkto class=Table>Table::copy</linkto> function can be used
229// to turn a memory table into a plain table.
230// <li> A reference table is a table referencing a plain or memory table.
231// It is the result of a selection or sort on another table.
232// A reference table references the data in the other table, thus
233// changing data in a reference table means that the data in the
234// original table are changed.
235// The <linkto class=Table>Table::deepCopy</linkto> function can be
236// used to turn a reference table into a plain table.
237// <li> <A HREF="#Tables:concatenation">a concatenated table</A>
238// is a union of tables (of any form) with the same description.
239// They are concatenated in a virtual way, thus no copy is made.
240// </ul>
241// Concurrent access from different processes to the same plain table is
242// fully supported by means of a <A HREF="#Tables:LockSync">
243// locking/synchronization</A> mechanism. Concurrent access over NFS is also
244// supported.
245// <p>
246// A (somewhat primitive) mechanism is available to do a
247// <A HREF="#Tables:KeyLookup">table lookup</A> based on the contents
248// of a key.
249
250// <ANCHOR NAME="Tables:open">
251// <h3>Opening an Existing Table</h3></ANCHOR>
252//
253// To open an existing table you just create a
254// <linkto class="Table:description">Table</linkto> object giving
255// the name of the table, like:
256//
257// <srcblock>
258// Table readonly_table ("tableName");
259// // or
260// Table read_and_write_table ("tableName", Table::Update);
261// </srcblock>
262//
263// The constructor option determines whether the table will be opened as
264// readonly or as read/write. A readonly table file must be opened
265// as readonly, otherwise an exception is thrown. The functions
266// <linkto class="Table">Table::isWritable(...)</linkto>
267// can be used to determine if a table is writable.
268//
269// When the table is opened, the data managers are reinstantiated
270// according to their definition at table creation.
271// <p>
272// <ANCHOR NAME="Tables:openTable">
273// The static function <src>TableUtil::openTable</src> can be used to open a table,
274// in particular a subtable, in a simple way by means of the :: notation like
275// <src>maintable::subtable</src>. The :: notation is much better than specifying
276// an explicit path (such as <src>maintable/subtable</src>, because it also works
277// fine if the main table is a reference table (e.g. the result of a selection).
278
279// <ANCHOR NAME="Tables:read">
280// <h3>Reading from a Table</h3></ANCHOR>
281//
282// You can read data from a table column with the "get" functions
283// in the classes
284// <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto>
285// and
286// <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto>.
287// For scalars of a standard data type (i.e. Bool, uChar, Int, Short,
288// uShort, uInt, float, double, Complex, DComplex and String) you could
289// instead use
290// <linkto class="TableColumn">TableColumn::getScalar(...)</linkto> or
291// <linkto class="TableColumn">TableColumn::asXXX(...)</linkto>.
292// These functions offer an extra: they do automatic data type promotion;
293// so that you can, for example, get a double value from a float column.
294//
295// These "get" functions are used in the same way as the simple "put"
296// functions described in the previous section.
297// <p>
298// <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto>
299// can be constructed for a non-writable column. However, an exception
300// is thrown if the put function is used for it.
301// The same is true for
302// <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto> and
303// <linkto class="TableColumn:description">TableColumn</linkto>.
304// <p>
305// A typical program could look like:
306// <srcblock>
307// #include <casacore/tables/Tables/Table.h>
308// #include <casacore/tables/Tables/ScalarColumn.h>
309// #include <casacore/tables/Tables/ArrayColumn.h>
310// #include <casacore/casa/Arrays/Vector.h>
311// #include <casacore/casa/Arrays/Slicer.h>
312// #include <casacore/casa/Arrays/ArrayMath.h>
313// #include <iostream>
314//
315// main()
316// {
317// // Open the table (readonly).
318// Table tab ("some.name");
319//
320// // Construct the various column objects.
321// // Their data type has to match the data type in the table description.
322// ScalarColumn<Int> acCol (tab, "ac");
323// ArrayColumn<Float> arr2Col (tab, "arr2");
324//
325// // Loop through all rows in the table.
326// uInt nrrow = tab.nrow();
327// for (uInt i=0; i<nrow; i++) {
328// // Read the row for both columns.
329// cout << "Column ac in row i = " << acCol(i) << endl;
330// Array<Float> array = arr2Col.get (i);
331// }
332//
333// // Show the entire column ac,
334// // and show the 10th element of arr2 in each row..
335// cout << ac.getColumn();
336// cout << arr2.getColumn (Slicer(Slice(10)));
337// }
338// </srcblock>
339
340// <ANCHOR NAME="Tables:creation">
341// <h3>Creating a Table</h3></ANCHOR>
342//
343// The creation of a table is a multi-step process:
344// <ol>
345// <li>
346// Create a <A HREF="#Tables:Table Description">table description</A>.
347// <li>
348// Create a <linkto class="SetupNewTable:description">SetupNewTable</linkto>
349// object with the name of the new table.
350// <li>
351// Create the necessary <A HREF="#Tables:Data Managers">data managers</A>.
352// <li>
353// Bind each column to the appropriate data manager.
354// The system will bind unbound columns to data managers which
355// are created internally using the default data manager name
356// defined in the column description.
357// <li>
358// Define the shape of direct columns (if that was not already done in the
359// column description).
360// <li>
361// Create the <linkto class="Table:description">Table</linkto>
362// object from the SetupNewTable object. Here, a final check is performed
363// and the necessary files are created.
364// </ol>
365// The recipe above is meant for the creation a plain table, but the
366// creation of a memory table is exactly the same. The only difference
367// is that in call to construct the Table object the Table::Memory
368// type has to be given. Note that in the SetupNewTable object the columns
369// can be bound to any data manager. <src>MemoryTable</src> will rebind
370// stored columns to the <linkto class=MemoryStMan>MemoryStMan</linkto>
371// storage manager, but virtual columns bindings are not changed.
372//
373// The following example shows how you can create a table. An example
374// specifically illustrating the creation of the
375// <A HREF="#Tables:Table Description">table description</A> is given
376// in that section. Other sections discuss the access to the table.
377//
378// <srcblock>
379// #include <casacore/tables/Tables/TableDesc.h>
380// #include <casacore/tables/Tables/SetupNewTab.h>
381// #include <casacore/tables/Tables/Table.h>
382// #include <casacore/tables/Tables/ScaColDesc.h>
383// #include <casacore/tables/Tables/ScaRecordColDesc.h>
384// #include <casacore/tables/Tables/ArrColDesc.h>
385// #include <casacore/tables/Tables/StandardStMan.h>
386// #include <casacore/tables/Tables/IncrementalStMan.h>
387//
388// main()
389// {
390// // Step1 -- Build the table description.
391// TableDesc td("tTableDesc", "1", TableDesc::Scratch);
392// td.comment() = "A test of class SetupNewTable";
393// td.addColumn (ScalarColumnDesc<Int> ("ab" ,"Comment for column ab"));
394// td.addColumn (ScalarColumnDesc<Int> ("ac"));
395// td.addColumn (ScalarColumnDesc<uInt> ("ad","comment for ad"));
396// td.addColumn (ScalarColumnDesc<Float> ("ae"));
397// td.addColumn (ScalarRecordColumnDesc ("arec"));
398// td.addColumn (ArrayColumnDesc<Float> ("arr1",3,ColumnDesc::Direct));
399// td.addColumn (ArrayColumnDesc<Float> ("arr2",0));
400// td.addColumn (ArrayColumnDesc<Float> ("arr3",0,ColumnDesc::Direct));
401//
402// // Step 2 -- Setup a new table from the description.
403// SetupNewTable newtab("newtab.data", td, Table::New);
404//
405// // Step 3 -- Create storage managers for it.
406// StandardStMan stmanStand_1;
407// StandardStMan stmanStand_2;
408// IncrementalStMan stmanIncr;
409//
410// // Step 4 -- First, bind all columns to the first storage
411// // manager. Then, bind a few columns to another storage manager
412// // (which will overwrite the previous bindings).
413// newtab.bindAll (stmanStand_1);
414// newtab.bindColumn ("ab", stmanStand_2);
415// newtab.bindColumn ("ae", stmanIncr);
416// newtab.bindColumn ("arr3", stmanIncr);
417//
418// // Step 5 -- Define the shape of the direct columns.
419// // (this could have been done in the column description).
420// newtab.setShapeColumn( "arr1", IPosition(3,2,3,4));
421// newtab.setShapeColumn( "arr3", IPosition(3,3,4,5));
422//
423// // Step 6 -- Finally, create the table consisting of 10 rows.
424// Table tab(newtab, 10);
425//
426// // Now we can fill the table, which is shown in a next section.
427// // The Table destructor will flush the table to the files.
428// }
429// </srcblock>
430// To create a table in memory, only step 6 has to be modified slightly to:
431// <srcblock>
432// Table tab(newtab, Table::Memory, 10);
433// </srcblock>
434//
435// Note that the function <src>TableUtil::createTable</src> can be used to create a table
436// in a simpler way. It can also be used to create a subtable using the :: notation
437// similar to the <A HREF="#Tables:openTable"><src>Tableutil::openTable</src></A>
438// function described above.
439
440// <ANCHOR NAME="Tables:write">
441// <h3>Writing into a Table</h3></ANCHOR>
442//
443// Once a table has been created or has been opened for read/write,
444// you want to write data into it. Before doing that you may have
445// to add one or more rows to the table.
446// <note role=tip> If a table was created with a given number of rows, you
447// do not need to add rows; you may not even be able to do so.
448// </note>
449//
450// When adding new rows to the table, either via the
451// <linkto class="Table">Table(...) constructor</linkto>
452// or via the
453// <linkto class="Table">Table::addRow(...)</linkto>
454// function, you can choose to have those rows initialized with the
455// default values given in the description.
456//
457// To actually write the data into the table you need the classes
458// <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto> and
459// <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto>.
460// For each column you can construct one or
461// more of these objects. Their put(...) functions
462// let you write a value at a time or the entire column in one go.
463// For arrays you can "put" subsections of the arrays.
464//
465// As an alternative for scalars of a standard data type (i.e. Bool,
466// uChar, Int, Short, uShort, uInt, float, double, Complex, DComplex
467// and String) you could use the functions
468// <linkto class="TableColumn">TableColumn::putScalar(...)</linkto>.
469// These functions offer an extra: automatic data type promotion; so that
470// you can, for example, put a float value in a double column.
471//
472// A typical program could look like:
473// <srcblock>
474// #include <casacore/tables/Tables/TableDesc.h>
475// #include <casacore/tables/Tables/SetupNewTab.h>
476// #include <casacore/tables/Tables/Table.h>
477// #include <casacore/tables/Tables/ScaColDesc.h>
478// #include <casacore/tables/Tables/ArrColDesc.h>
479// #include <casacore/tables/Tables/ScalarColumn.h>
480// #include <casacore/tables/Tables/ArrayColumn.h>
481// #include <casacore/casa/Arrays/Vector.h>
482// #include <casacore/casa/Arrays/Slicer.h>
483// #include <casacore/casa/Arrays/ArrayMath.h>
484// #include <iostream>
485//
486// main()
487// {
488// // First build the table description.
489// TableDesc td("tTableDesc", "1", TableDesc::Scratch);
490// td.comment() = "A test of class SetupNewTable";
491// td.addColumn (ScalarColumnDesc<Int> ("ac"));
492// td.addColumn (ArrayColumnDesc<Float> ("arr2",0));
493//
494// // Setup a new table from the description,
495// // and create the (still empty) table.
496// // Note that since we do not explicitly bind columns to
497// // data managers, all columns will be bound to the default
498// // standard storage manager StandardStMan.
499// SetupNewTable newtab("newtab.data", td, Table::New);
500// Table tab(newtab);
501//
502// // Construct the various column objects.
503// // Their data type has to match the data type in the description.
504// ScalarColumn<Int> ac (tab, "ac");
505// ArrayColumn<Float> arr2 (tab, "arr2");
506// Vector<Float> vec2(100);
507//
508// // Write the data into the columns.
509// // In each cell arr2 will be a vector of length 100.
510// // Since its shape is not set explicitly, it is done implicitly.
511// for (uInt i=0; i<10; i++) {
512// tab.addRow(); // First add a row.
513// ac.put (i, i+10); // value is i+10 in row i
514// indgen (vec2, float(i+20)); // vec2 gets i+20, i+21, ..., i+119
515// arr2.put (i, vec2);
516// }
517//
518// // Finally, show the entire column ac,
519// // and show the 10th element of arr2.
520// cout << ac.getColumn();
521// cout << arr2.getColumn (Slicer(Slice(10)));
522//
523// // The Table destructor writes the table.
524// }
525// </srcblock>
526//
527// In this example we added rows in the for loop, but we could also have
528// created 10 rows straightaway by constructing the Table object as:
529// <srcblock>
530// Table tab(newtab, 10);
531// </srcblock>
532// in which case we would not include
533// <srcblock>
534// tab.addRow()
535// </srcblock>
536//
537// The classes
538// <linkto class="TableColumn:description">TableColumn</linkto>,
539// <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto>, and
540// <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto>
541// contain several functions to put values into a single cell or into the
542// whole column. This may look confusing, but is actually quite simple.
543// The functions can be divided in two groups:
544// <ol>
545// <li>
546// Put the given value into the column cell(s).
547// <ul>
548// <li>
549// The simplest put functions,
550// <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto> and
551// <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>,
552// put a value into the given column cell. For convenience, there is an
553// <linkto class="ArrayColumn">ArrayColumn::putSlice(...)</linkto>
554// to put only a part of the array.
555// <li>
556// <linkto class="ScalarColumn">ScalarColumn::fillColumn(...)</linkto> and
557// <linkto class="ArrayColumn">ArrayColumn::fillColumn(...)</linkto>
558// fill an entire column by putting the given value into all the cells
559// of the column.
560// <li>
561// The simplest putColumn functions,
562// <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto> and
563// <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>,
564// put an array of values into the column. There is a special
565// <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>
566// version which puts only a part of the arrays.
567// </ul>
568//
569// <li>
570// Copy values from another column to this column.<BR>
571// These functions have the advantage that the
572// data type of the input and/or output column can be unknown.
573// The generic TableColumn objects can be used for this purpose.
574// The put(Column) function checks the data types and, if possible,
575// converts them. If the conversion is not possible, it throws an
576// exception.
577// <ul>
578// <li>
579// The put functions copy the value in a cell of the input column
580// to a cell in the output column. The row numbers of the cells
581// in the columns can be different.
582// <li>
583// The putColumn functions copy the entire contents of the input column
584// to the output column. The lengths of the columns must be equal.
585// </ul>
586// Each class has its own set of these functions.
587// <ul>
588// <li>
589// <linkto class="TableColumn">TableColumn::put(...)</linkto> and
590// <linkto class="TableColumn">TableColumn::putColumn(...)</linkto> and
591// are the most generic. They can be
592// used if the data types of both input and output column are unknown.
593// Note that these functions are virtual.
594// <li>
595// <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto>,
596// <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>,
597// <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto>, and
598// <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>
599// are less generic and therefore potentially more efficient.
600// The most efficient variants are the ones taking a
601// Scalar/ArrayColumn&lt;T&gt;, because they require no data type
602// conversion.
603// </ul>
604// </ol>
605
606// <ANCHOR NAME="Tables:row-access">
607// <h3>Accessing rows in a Table</h3></ANCHOR>
608//
609// Apart from accessing a table column-wise as described in the
610// previous two sections, it is also possible to access a table row-wise.
611// The <linkto class=TableRow>TableRow</linkto> class makes it possible
612// to access multiple fields in a table row as a whole. Note that like the
613// XXColumn classes described above, there is also an ROTableRow class
614// for access to readonly tables.
615// <p>
616// On construction of a TableRow object it has to be specified which
617// fields (i.e. columns) are part of the row. For these fields a
618// fixed structured <linkto class=TableRecord>TableRecord</linkto>
619// object is constructed as part of the TableRow object. The TableRow::get
620// function will fill this record with the table data for the given row.
621// The user has access to the record and can use
622// <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> objects for
623// speedier access to the record.
624// <p>
625// The class could be used as shown in the following example.
626// <srcblock>
627// // Open the table as readonly and define a row object to contain
628// // the given columns.
629// // Note that the function stringToVector is a very convenient
630// // way to construct a Vector<String>.
631// // Show the description of the fields in the row.
632// Table table("Some.table");
633// ROTableRow row (table, stringToVector("col1,col2,col3"));
634// cout << row.record().description();
635// // Since the structure of the record is known, the RecordFieldPtr
636// // objects could be used to allow for easy and fast access to
637// // the record which is refilled for each get.
638// RORecordFieldPtr<String> col1(row.record(), "col1");
639// RORecordFieldPtr<Double> col2(row.record(), "col2");
640// RORecordFieldPtr<Array<Int> > col3(row.record(), "col3");
641// for (uInt i=0; i<table.nrow(); i++) {
642// row.get (i);
643// someString = *col1;
644// somedouble = *col2;
645// someArrayInt = *col3;
646// }
647// </srcblock>
648// The description of TableRow contains some more extensive examples.
649
650// <ANCHOR NAME="Tables:select and sort">
651// <h3>Table Selection and Sorting</h3></ANCHOR>
652//
653// The result of a select and sort of a table is another table,
654// which references the original table. This means that an update
655// of a sorted or selected table results in the update of the original
656// table. The result is, however, a table in itself, so all table
657// functions (including select and sort) can be used with it.
658// Note that a true copy of such a reference table can be made with
659// the <linkto class=Table>Table::deepCopy</linkto> function.
660// <p>
661// Rows or columns can be selected from a table. Columns can be selected
662// by the
663// <linkto class="Table">Table::project(...)</linkto>
664// function, while rows can be selected by the various
665// <linkto class="Table">Table operator()</linkto> functions.
666// Usually a row is selected by giving a select expression with
667// <linkto class="TableExprNode:description">TableExprNode</linkto>
668// objects. These objects represent the various nodes
669// in an expression, e.g. a constant, a column, or a subexpression.
670// The Table function
671// <linkto class="Table">Table::col(...)</linkto>
672// creates a TableExprNode object for a column. The function
673// <linkto class="Table">Table::key(...)</linkto>
674// does the same for a keyword by reading
675// the keyword value and storing it as a constant in an expression node.
676// All column nodes in an expression must belong to the same table,
677// otherwise an exception is thrown.
678// In the following example we select all rows with RA>10:
679// <srcblock>
680// #include <casacore/tables/Tables/ExprNode.h>
681// Table table ("Table.name");
682// Table result = table (table.col("RA") > 10);
683// </srcblock>
684// while in the next one we select rows with RA and DEC in the given
685// intervals:
686// <srcblock>
687// Table result = table (table.col("RA") > 10
688// && table.col("RA") < 14
689// && table.col("DEC") >= -10
690// && table.col("DEC") <= 10);
691// </srcblock>
692// The following operators can be used to form arbitrarily
693// complex expressions:
694// <ul>
695// <li> Relational operators ==, !=, >, >=, < and <=.
696// <li> Logical operators &&, || and !.
697// <li> Arithmetic operators +, -, *, /, %, and unary + and -.
698// <li> Bit operators ^, &, |, and unary ~.
699// <li> Operator() to take a subsection of an array.
700// </ul>
701// Many functions (like sin, max, conj) can be used in an expression.
702// Class <linkto class=TableExprNode>TableExprNode</linkto> shows
703// the available functions.
704// E.g.
705// <srcblock>
706// Table result = table (sin (table.col("RA")) > 0.5);
707// </srcblock>
708// Function <src>in</src> can be used to select from a set of values.
709// A value set can be constructed using class
710// <linkto class=TableExprNodeSet>TableExprNodeSet</linkto>.
711// <srcblock>
712// TableExprNodeSet set;
713// set.add (TableExprNodeSetElem ("abc"));
714// set.add (TableExprNodeSetElem ("defg"));
715// set.add (TableExprNodeSetElem ("h"));
716// Table result = table (table.col("NAME).in (set));
717// </srcblock>
718// select rows with a NAME equal to <src>abc</src>,
719// <src>defg</src>, or <src>h</src>.
720//
721// <p>
722// You can sort a table on one or more columns containing scalars.
723// In this example we simply sort on column RA (default is ascending):
724// <srcblock>
725// Table table ("Table.name");
726// Table result = table.sort ("RA");
727// </srcblock>
728// Multiple
729// <linkto class="Table">Table::sort(...)</linkto>
730// functions exist which allow for more flexible control over the sort order.
731// In the next example we sort first on RA in descending order
732// and then on DEC in ascending order:
733// <srcblock>
734// Table table ("Table.name");
735// Block<String> sortKeys(2);
736// Block<int> sortOrders(2);
737// sortKeys(0) = "RA";
738// sortOrders(0) = Sort::Descending;
739// sortKeys(1) = "DEC";
740// sortOrders(1) = Sort::Ascending;
741// Table result = table.sort (sortKeys, sortOrders);
742// </srcblock>
743//
744// Tables stemming from the same root, can be combined in several
745// ways with the help of the various logical
746// <linkto class="Table">Table operators</linkto> (operator|, etc.).
747
748// <h4>Table Query Language</h4>
749// The selection and sorting mechanism described above can only be used
750// in a hard-coded way in a C++ program.
751// There is, however, another way. Strings containing selection and
752// sorting commands can be used.
753// The syntax of these commands is based on SQL and is described in the
754// <a href="../notes/199.html">Table Query Language</a> (TaQL) note 199.
755// The language supports UDFs (User Defined Functions) in dynamically
756// loadable libraries as explained in the note.
757// <br>A TaQL command can be executed with the static function
758// <src>tableCommand</src> defined in class
759// <linkto class=TableParse>TableParse</linkto>.
760
761// <ANCHOR NAME="Tables:concatenation">
762// <h3>Table Concatenation</h3></ANCHOR>
763// Tables with identical descriptions can be concatenated in a virtual way
764// using the Table concatenation constructor. Such a Table object behaves
765// as any other Table object, thus any operation can be performed on it.
766// An identical description means that the number of columns, the column names,
767// and their data types of the columns must be the same. The columns do not
768// need to be ordered in the same way nor to be stored in the same way.
769// <br>Note that if tables have different column names, it is possible
770// to form a projection (as described in the previous section) first
771// to make them appear identical.
772//
773// Sometimes a MeasurementSet is partitioned, for instance in chunks of
774// one hour. All those chunks can be virtually concatenated this way.
775// Note that all tables in the concatenation will be opened, thus one might
776// run out of file descriptors if there are many chunks.
777//
778// Similar to reference tables, it is possible to make a concatenated Table
779// persistent by using the <src>rename</src> function. It will not copy the
780// data; only the names of the tables used are written.
781//
782// The keywords of a concatenated table are taken from the first table.
783// It is possible to change or add keywords, but that is not persistent,
784// not even if the concatenated table is made persistent.
785// <br>The keywords holding subtables can be handled in a special way.
786// Normally the subtables of the concatenation are the subtables of the first
787// table are used, but is it possible to concatenate subtables as well by
788// giving their names in the constructor.
789// In this way the, say, SYSCAL subtable of a MeasurementSet can be
790// concatenated as well.
791// <srcblock>
792// // Create virtual concatenation of ms0 and ms1.
793// Block<String> names(2);
794// names[0] = "ms0";
795// names[1] = "ms1";
796// // Also concatenate their SYSCAL subtables.
797// Block<String> subNames(1, "SYSCAL");
798// Table concTab (names, subNames);
799// </srcblock>
800
801// <ANCHOR NAME="Tables:iterate">
802// <h3>Table Iterators</h3></ANCHOR>
803//
804// You can iterate through a table in an arbitrary order by getting
805// a subset of the table consisting of the rows in which the iteration
806// columns have the same value.
807// An iterator object is created by constructing a
808// <linkto class="TableIterator:description">TableIterator</linkto>
809// object with the appropriate column names.
810//
811// In the next example we define an iteration on the columns Time and
812// Baseline. Each iteration step returns a table subset in which Time and
813// Baseline have the same value.
814//
815// <srcblock>
816// // Iterate over Time and Baseline (by default in ascending order).
817// // Time is the main iteration order, thus the first column specified.
818// Table t;
819// Table tab ("UV_Table.data");
820// Block<String> iv0(2);
821// iv0[0] = "Time";
822// iv0[1] = "Baseline";
823// //
824// // Create the iterator. This will prepare the first subtable.
825// TableIterator iter(tab, iv0);
826// Int nr = 0;
827// while (!iter.pastEnd()) {
828// // Get the first subtable.
829// // This will contain rows with equal Time and Baseline.
830// t = iter.table();
831// cout << t.nrow() << " ";
832// nr++;
833// // Prepare the next subtable with the next Time,Baseline value.
834// iter.next();
835// }
836// cout << endl << nr << " iteration steps" << endl;
837// </srcblock>
838//
839// You can define more than one iterator on the same table; they operate
840// independently.
841//
842// Note that the result of each iteration step is a table in itself which
843// references the original table, just as in the case of a sort or select.
844// This means that the resulting table can be used again in a sort, select,
845// iteration, etc..
846
847// <ANCHOR NAME="Tables:vectors">
848// <h3>Table Vectors</h3></ANCHOR>
849//
850// A table vector makes it possible to treat a column in a table
851// as a vector. Almost all operators and functions defined for normal
852// vectors, are also defined for table vectors. So it is, for instance,
853// possible to add a constant to a table vector. This has the effect
854// that the underlying column gets changed.
855//
856// You can use the templated class
857// <linkto class="TableVector:description">TableVector</linkto>
858// to make a scalar column appear as a (table) vector.
859// Columns containing arrays or tables are not supported.
860// The data type of the TableVector object must match the
861// data type of the column.
862// A table vector can also hold a normal vector so that (temporary)
863// results of table vector operations can be handled.
864//
865// In the following example we double the data in column COL1 and
866// store the result in a temporary table vector.
867// <srcblock>
868// // Create a table vector for column COL1.
869// // Note that if the table is readonly, putting data in the table vector
870// // results in an exception.
871// Table tab ("Table.data");
872// TableVector<Int> tabvec(tab, "COL1");
873// // Multiply it by a constant. Result is kept in a Vector in memory.
874// TableVector<Int> temp = 2 * tabvec;
875// </srcblock>
876//
877// In the next example we double the data in COL1 and put the result back
878// in the column.
879// <srcblock>
880// // Create a table vector for column COL1.
881// // It has to be a TableVector to be able to change the column.
882// Table tab ("Table.data", Table::Update);
883// TableVector<Int> tabvec(tab, "COL1");
884// // Multiply it by a constant.
885// tabvec *= 2;
886// </srcblock>
887
888// <ANCHOR NAME="Tables:keywords">
889// <h3>Table Keywords</h3></ANCHOR>
890//
891// Any number of keyword/value pairs may be attached to the table as a whole,
892// or to any individual column. They may be freely added, retrieved,
893// re-assigned, or deleted. They are, in essence, a self-resizing list of
894// values (any of the primitive types) indexed by Strings (the keyword).
895//
896// A table keyword/value pair might be
897// <srcblock>
898// Observer = Grote Reber
899// Date = 10 october 1942
900// </srcblock>
901// Column keyword/value pairs might be
902// <srcblock>
903// Units = mJy
904// Reference Pixel = 320
905// </srcblock>
906// The class
907// <linkto class="TableRecord:description">TableRecord</linkto>
908// represents the keywords in a table.
909// It is (indirectly) derived from the standard record classes in the class
910// <linkto class="Record:description">Record</linkto>
911
912// <ANCHOR NAME="Tables:Table Description">
913// <h3>Table Description</h3></ANCHOR>
914//
915// A table contains a description of itself, which defines the layout of the
916// columns and the keyword sets for the table and for the individual columns.
917// It may also define initial keyword sets and default values for the columns.
918// Such a default value is automatically stored in a cell in the table column,
919// whenever a row is added to the table.
920//
921// The creation of the table descriptor is the first step in the creation of
922// a new table. The description is part of the table itself, but may also
923// exist in a separate file. This is useful if you need to create a number
924// of tables with the same structure; in other circumstances it probably
925// should be avoided.
926//
927// The public classes to set up a table description are:
928// <ul>
929// <li> <linkto class="TableDesc:description">TableDesc</linkto>
930// -- holds the table description.
931// <li> <linkto class="ColumnDesc:description">ColumnDesc</linkto>
932// -- holds a generic column description.
933// <li> <linkto class="ScalarColumnDesc:description">ScalarColumnDesc&lt;T&gt;
934// </linkto>
935// -- defines a column containing a scalar value.
936// <li> <linkto class="ScalarRecordColumnDesc:description">ScalarRecordColumnDesc;
937// </linkto>
938// -- defines a column containing a scalar record value.
939// <li> <linkto class="ArrayColumnDesc:description">ArrayColumnDesc&lt;T&gt;
940// </linkto>
941// -- defines a column containing an (in)direct array.
942// </ul>
943//
944// Here follows a typical example of the construction of a table
945// description. For more specialized things -- like the definition of a
946// default data manager -- we refer to the descriptions of the above
947// mentioned classes.
948//
949// <srcblock>
950// #include <casacore/tables/Tables/TableDesc.h>
951// #include <casacore/tables/Tables/ScaColDesc.h>
952// #include <casacore/tables/Tables/ArrColDesc.h>
953// #include <casacore/tables/Tables/ScaRecordTabDesc.h>
954// #include <casacore/tables/Tables/TableRecord.h>
955// #include <casacore/casa/Arrays/IPosition.h>
956// #include <casacore/casa/Arrays/Vector.h>
957//
958// main()
959// {
960// // Create a new table description
961// // Define a comment for the table description.
962// // Define some keywords.
963// ColumnDesc colDesc1, colDesc2;
964// TableDesc td("tTableDesc", "1", TableDesc::New);
965// td.comment() = "A test of class TableDesc";
966// td.rwKeywordSet().define ("ra" float(3.14));
967// td.rwKeywordSet().define ("equinox", double(1950));
968// td.rwKeywordSet().define ("aa", Int(1));
969//
970// // Define an integer column ab.
971// td.addColumn (ScalarColumnDesc<Int> ("ab", "Comment for column ab"));
972//
973// // Add a scalar integer column ac, define keywords for it
974// // and define a default value 0.
975// // Overwrite the value of keyword unit.
976// ScalarColumnDesc<Int> acColumn("ac");
977// acColumn.rwKeywordSet().define ("scale" Complex(0,0));
978// acColumn.rwKeywordSet().define ("unit", "");
979// acColumn.setDefault (0);
980// td.addColumn (acColumn);
981// td.rwColumnDesc("ac").rwKeywordSet().define ("unit", "DEG");
982//
983// // Add a scalar string column ad and define its comment string.
984// td.addColumn (ScalarColumnDesc<String> ("ad","comment for ad"));
985//
986// // Now define array columns.
987// // This one is indirect and has no dimensionality mentioned yet.
988// td.addColumn (ArrayColumnDesc<Complex> ("Arr1","comment for Arr1"));
989// // This one is indirect and has 3-dim arrays.
990// td.addColumn (ArrayColumnDesc<Int> ("A2r1","comment for Arr1",3));
991// // This one is direct and has 2-dim arrays with axes length 4 and 7.
992// td.addColumn (ArrayColumnDesc<uInt> ("Arr3","comment for Arr1",
993// IPosition(2,4,7),
994// ColumnDesc::Direct));
995//
996// // Add columns containing records.
997// td.addColumn (ScalarRecordColumnDesc ("Rec1"));
998// }
999// </srcblock>
1000
1001// <ANCHOR NAME="Tables:Data Managers">
1002// <h3>Data Managers</h3></ANCHOR>
1003//
1004// Data managers take care of the actual access to the data in a column.
1005// There are two kinds of data managers:
1006// <ol>
1007// <li> <A HREF="#Tables:storage managers">Storage managers</A> --
1008// which store the data as such. They can only handle the standard
1009// data types (Bool,...,String) as discussed in the section about the
1010// <A HREF="#Tables:properties">table properties</A>).
1011// <li> <A HREF="#Tables:virtual column engines">Virtual column engines</A>
1012// -- which manipulate the data.
1013// An engine could be a simple thing like scaling the data (as done
1014// in classic AIPS to reduce data storage), but it could also be an
1015// elaborate thing like applying corrections on-the-fly.
1016// <br>A special engine is VirtualTaQLColumn which can be used to define
1017// the contents of a column by means of a TaQL expression. In particular,
1018// it can be used to define a constant value for the entire column.
1019// But it can also be used to calculate the UVW-coordinates on-the-fly.
1020// <br>An engine must be used when storing data objects with a non-standard type.
1021// It has to break down the object into items with standard data types
1022// which can be stored with a storage manager.
1023// </ol>
1024// In general the user of a table does not need to be aware which
1025// data managers are being used underneath. Only when the table is created
1026// data managers have to be bound to the columns. Thereafter it is
1027// completely transparent.
1028//
1029// Data managers needs to be registered, so they can be found when a table is
1030// opened. All data managers mentioned below are part of the system and
1031// pre-registered.
1032// It is, however, also possible to load data managers on demand. If a data
1033// manager is not registered it is tried to load a shared library with the
1034// part of the data manager name (in lowercase) before a dot or left arrow.
1035// The dot makes it possible to have multiple data managers in a shared library,
1036// while the left arrow is meant for templated data manager classes.
1037// <br>E.g. if <src>BitFlagsEngine<uChar></src> was not registered, the shared
1038// library <src>libbitflagsengine.so</src> (or .dylib) will be loaded. If
1039// successful, its function <src>register_bitflagsengine()</src> will be
1040// executed which should register the data manager(s). Thereafter it is known
1041// and will be used. For example in a file Register.h and Register.cc:
1042// <srcblock>
1043// // Declare in .h file as C function, so no name mangling is done.
1044// extern "C" {
1045// void register_bitflagsengine();
1046// }
1047// // Implement in .cc file.
1048// void register_bitflagsengine()
1049// {
1050// BitFlagsEngine<uChar>::registerClass();
1051// BitFlagsEngine<Short>::registerClass();
1052// BitFlagsEngine<Int>::registerClass();
1053// }
1054// </srcblock>
1055// There are several functions that can give information which data managers
1056// are used for which columns and to obtain the characteristics and properties
1057// of them. Class RODataManAccessor and derived classes can be used for it
1058// as well as the functions <src>dataManagerInfo</src> and
1059// <src>showStructure</src> in class Table.
1060
1061// <ANCHOR NAME="Tables:storage managers">
1062// <h3>Storage Managers</h3></ANCHOR>
1063//
1064// Storage managers are used to store the data contained in the column cells.
1065// At table construction time the binding of columns to storage managers is done.
1066// <br>Each storage manager uses one or more files (usually called table.fi_xxx
1067// where i is a sequence number and _xxx is some kind of extension).
1068// Typically several file are used to store the data of the columns of a table.
1069// <br>In order to reduce the number of files (and to support large block sizes),
1070// it is possible to have a single container file (a MultiFile) containing all
1071// data files used by the storage managers. Such a file is called table.mf.
1072// Note that the program <em>lsmf</em> can be used to see which
1073// files are contained in a MultiFile. The program <em>tomf</em> can
1074// convert the files in a MultiFile to regular files.
1075// <br>At table creation time it is decided if a MultiFile will be used. It
1076// can be done by means of the StorageOption object given to the SetupNewTable
1077// constructor and/or by the aipsrc variables:
1078// <ul>
1079// <li> <src>table.storage.option</src> which can have the value
1080// 'multifile', 'sepfile' (meaning separate files), or 'default'.
1081// Currently the default is to use separate files.
1082// <li> <src>table.storage.blocksize</src> defines the block size to be
1083// used by a MultiFile. If 0 is given, the file system's block size
1084// will be used.
1085// </ul>
1086// About all standard storage managers support the MultiFile.
1087// The exception is StManAipsIO, because it is hardly ever used.
1088//
1089// Several storage managers exist, each with its own storage characteristics.
1090// The default and preferred storage manager is <src>StandardStMan</src>.
1091// Other storage managers should only be used if they pay off in
1092// file space (like <src>IncrementalStMan</src> for slowly varying data)
1093// or access speed (like the tiled storage managers for large data arrays).
1094// <br>The storage managers store the data in a big or little endian
1095// canonical format. The format can be specified when the table is created.
1096// By default it uses the endian format as specified in the aipsrc variable
1097// <code>table.endianformat</code> which can have the value local, big,
1098// or little. The default is local.
1099// <ol>
1100// <li>
1101// <linkto class="StandardStMan:description">StandardStMan</linkto>
1102// stores all the values in so-called buckets (equally sized chunks
1103// in the file). It requires little memory.
1104// <br>It replaces the old <src>StManAipsIO</src>.
1105//
1106// <li>
1107// <linkto class="IncrementalStMan:description">IncrementalStMan</linkto>
1108// uses a storage mechanism resembling "incremental backups". A value
1109// is only stored if it is different from the previous row. It is
1110// very well suited for slowly varying data.
1111// <br>The class <linkto class="ROIncrementalStManAccessor:description">
1112// ROIncrementalStManAccessor</linkto> can be used to tune the
1113// behaviour of the <src>IncrementalStMan</src>. It contains functions
1114// to deal with the cache size and to show the behaviour of the cache.
1115//
1116// <li>
1117// The <a href="#Tables:TiledStMan">Tiled Storage Managers</a>
1118// store the data as a tiled hypercube allowing for more or less equally
1119// efficient data access along all main axes. It can be used for
1120// UV-data as well as for image data.
1121//
1122// <li>
1123// <linkto class="StManAipsIO:description">StManAipsIO</linkto>
1124// uses <src>AipsIO</src> to store the data in the columns.
1125// It supports all table functionality, but its I/O is probably not
1126// as efficient as other storage managers. It also requires that
1127// a large part of the table fits in memory.
1128// <br>It should not be used anymore, because it uses a lot of memory
1129// for larger tables and because it is not very robust in case an
1130// application or system crashes.
1131//
1132// <li>
1133// <linkto class="MemoryStMan:description">MemoryStMan</linkto>
1134// holds the data in memory. It means that data 'stored' with this
1135// storage manager are NOT persistent.
1136// <br>This storage manager is primarily meant for tables held in
1137// memory, but it can also be useful for temporary columns in
1138// normal tables. Note, however, that if a table is accessed
1139// concurrently from multiple processes, MemoryStMan data cannot be
1140// synchronized.
1141//
1142// <li>
1143// @ref dyscostman.DyscoStMan is a class that stores data with lossy
1144// compression. It combines non-linear least-squares quantization and
1145// different kinds of normalizaton. With the typical factor of 4
1146// compression, the loss in accuracy from lossy compression is
1147// negligable. It should only be used for real (non-simulated) data
1148// that is in a Measurement Set.
1149// The method is described in this article:
1150// https://arxiv.org/abs/1609.02019.
1151//
1152// <li>
1153// <linkto class="Adios2StMan:description">Adios2StMan</linkto> uses the
1154// <A HREF="https://github.com/ornladios/ADIOS2">ADIOS2 framework</A> to
1155// store and load column data.
1156// <br>ADIOS2 has several configurable storage backend itself, and this
1157// flexibility is also available via Adios2StMan. This includes, among other
1158// things, storing compressed data, or choosing a different on-disk formats.
1159// <br>This storage manager is also special in that it provides parallel
1160// writing capabilities for MPI processes, so that multiple processes can
1161// write into different sections of the same column concurrently.
1162// </ol>
1163//
1164// The storage manager framework makes it possible to support arbitrary files
1165// as tables. This has been used in a case where a file is filled
1166// by the data acquisition system of a telescope. The file is simultaneously
1167// used as a table using a dedicated storage manager. The table
1168// system and storage manager provide a sync function to synchronize
1169// the processes, i.e. to make CTDS aware of changes
1170// in the file size (thus in the table size) by the filling process.
1171//
1172// <note role=tip>
1173// Not all data managers support all the table functionality. So, the choice
1174// of a data manager can greatly influence the type of operations you can do
1175// on the table as a whole.
1176// For example, if a column uses the tiled storage manager,
1177// it is not possible to delete rows from the table, because that storage
1178// manager will not support deletion of rows.
1179// However, it is always possible to delete all columns of a data
1180// manager in one single call.
1181// </note>
1182
1183// <ANCHOR NAME="Tables:TiledStMan">
1184// <h3>Tiled Storage Manager</h3></ANCHOR>
1185// The Tiled Storage Managers allow one to store the data of
1186// one or more columns in a tiled way. Tiling means
1187// that the data are stored without a preferred order to make access
1188// along the different main axes equally efficient. This is done by
1189// storing the data in so-called tiles (i.e. equally shaped subsets of an
1190// array) to increase data locality. The user can define the tile shape
1191// to optimize for the most frequently used access.
1192// <p>
1193// The Tiled Storage Manager has the following properties:
1194// <ul>
1195// <li> There can be more than one Tiled Storage Manager in
1196// a table; each with its own (unique) name.
1197// <li> Each Tiled Storage Manager can store an
1198// N-dimensional so-called hypercolumn.
1199// Elaborate hypercolumns can be defined using
1200// <linkto file="TableDesc.h#defineHypercolumn">
1201// TableDesc::defineHypercolumn</linkto>).
1202// <br>Note that defining a hypercolumn is only necessary if it
1203// contains multiple columns or if the TiledDataStMan is used.
1204// It means that in practice it is hardly ever needed to define a
1205// hypercolumn.
1206// <br>A hypercolumn consists of up to three types of columns:
1207// <dl>
1208// <dt> Data columns
1209// <dd> contain the data to be stored in a tiled way. This will
1210// be done in tiled hypercubes.
1211// There must be at least one data column.
1212// <br> For example: a table contains UV-data with
1213// data columns "Visibility" and "Weight".
1214// <dt> Coordinate columns
1215// <dd> define the world coordinates of the pixels in the data columns.
1216// Coordinate columns are optional, but if given there must
1217// be N coordinate columns for an N-dimensional hypercolumn.
1218// <br>
1219// For example: the data in the example above is 4-dimensional
1220// and has coordinate columns "Time", "Baseline", "Frequency",
1221// and "Polarization".
1222// <dt> Id columns
1223// <dd> are needed if TiledDataStMan is used.
1224// Different rows in the data columns can be stored in different
1225// hypercubes. The values in the id column(s) uniquely identify
1226// the hypercube a row is stored in.
1227// <br>
1228// For example: the line and continuum data in a MeasurementSet
1229// table need to be stored in 2 different hypercubes (because
1230// their shapes are different (see below)). A column containing
1231// the type (line or continuum) has to be used as an id column.
1232// </dl>
1233// <li> If multiple data columns are used, the shape of their data
1234// must be conforming in each individual row.
1235// If data in different rows have different shapes, they must be
1236// stored in different hypercubes, because a hypercube can only hold
1237// data with conforming shapes.
1238// <br>
1239// Thus in the example above, rows with line data will have conforming
1240// shapes and can be stored in one hypercube. The continuum data
1241// will have another shape and can be stored in another hypercube.
1242// <br>
1243// The storage manager keeps track of the mapping of rows to/from
1244// hypercubes.
1245// <li> Each hypercube can be tiled in its own way. It is not required
1246// that an integer number of tiles fits in the hypercube. The last
1247// tiles will be padded as needed.
1248// <li> The last axis of a hypercube can be extensible. This means that
1249// the size of that axis does not need to be defined when the
1250// hypercube is defined in the storage manager. Instead, the hypercube
1251// can be extended when another chunk of data has to be stored.
1252// This can be very useful in, for example, a (quasi-)realtime
1253// environment where the size of the time axis is not known.
1254// <li> If coordinate columns are defined, they describe the coordinates
1255// of the axes of the hypercubes. Each hypercube has its own set of
1256// coordinates.
1257// <li> Data and id columns have to be stored with the Tiled
1258// Storage Manager. However, coordinate columns do not need to be
1259// stored with the Tiled Storage Manager.
1260// Especially in the case where the coordinates for a hypercube axis
1261// are varying (i.e. dependent on other axes), another storage manager
1262// has to be used (because the Tiled Storage Manager can only
1263// hold constant coordinates).
1264// </ul>
1265// <p>
1266// The following Tiled Storage Managers are available:
1267// <dl>
1268// <dt> <linkto class=TiledShapeStMan:description>TiledShapeStMan</linkto>
1269// <dd> can be seen as a specialization of <src>TiledDataStMan</src>
1270// by using the array shape as the id value.
1271// Similarly to <src>TiledDataStMan</src> it can maintain multiple
1272// hypercubes and store multiple rows in a hypercube, but it is
1273// easier to use, because the special <src>addHypercube</src> and
1274// <src>extendHypercube</src> functions are not needed.
1275// An hypercube is automatically added when a new array shape is
1276// encountered.
1277// <br>
1278// This storage manager could be used for a table with a column
1279// containing line and continuum data, which will result
1280// in 2 hypercubes.
1281// <dt> <linkto class=TiledCellStMan:description>TiledCellStMan</linkto>
1282// <dd> creates (automatically) a new hypercube for each row.
1283// Thus each row of the hypercolumn is stored in a separate hypercube.
1284// Note that the row number serves as the id value. So an id column
1285// is not needed, although there are multiple hypercubes.
1286// <br>
1287// This storage manager is meant for tables where the data arrays
1288// in the different rows are not accessed together. One can think
1289// of a column containing images. Each row contains an image and
1290// only one image is shown at a time.
1291// <dt> <linkto class=TiledColumnStMan:description>TiledColumnStMan</linkto>
1292// <dd> creates one hypercube for the entire hypercolumn. Thus all cells
1293// in the hypercube have to have the same shape and therefore this
1294// storage manager is only possible if all columns in the hypercolumn
1295// have the attribute FixedShape.
1296// <br>
1297// This storage manager could be used for a table with a column
1298// containing images for the Stokes parameters I, Q, U, and V.
1299// By storing them in one hypercube, it is possible to retrieve
1300// the 4 Stokes values for a subset of the image or for an individual
1301// pixel in a very efficient way.
1302// <dt> <linkto class=TiledDataStMan:description>TiledDataStMan</linkto>
1303// <dd> allows one to control the creation and extension of hypercubes.
1304// This is done by means of the class
1305// <linkto class=TiledDataStManAccessor:description>
1306// TiledDataStManAccessor</linkto>.
1307// It makes it possible to store, say, row 0-9 in hypercube A,
1308// row 10-34 in hypercube B, row 35-54 in hypercube A again, etc..
1309// <br>
1310// The drawback of this storage manager is that its hypercubes are not
1311// automatically extended when adding new rows. The special functions
1312// <src>addHypercube</src> and <src>extendHypercube</src> have to be
1313// used making it somewhat tedious to use.
1314// Therefore this storage manager may become obsolete in the near future.
1315// </dl>
1316// The Tiled Storage Managers have 3 ways to access and cache the data.
1317// Class <linkto class=TSMOption>TSMOption</linkto> can be used to setup an
1318// access choice and use it in a Table constructor.
1319// <ul>
1320// <li> The old way (the only way until January 2010) uses a cache
1321// of its own to keep tiles that might need to be reused. It will always
1322// access entire tiles, even if only a small part is needed.
1323// It is possible to define a maximum cache size. The description of class
1324// <linkto class=ROTiledStManAccessor>ROTiledStManAccessor</linkto>
1325// contains a discussion about the effect of defining a maximum cache
1326// size.
1327// <li> Memory-mapping the data files. In this way the operating system
1328// takes care of the IO and caching. However, the limited address space
1329// may preclude using it for large tables on 32-bit systems.
1330// <li> Use buffered IO and let the kernel's file cache take care of caching.
1331// It will access the data in chunks of the given buffer size, so the
1332// entire tile does not need to be accessed if only a small part is
1333// needed.
1334// </ul>
1335// Apart from reading, all access ways described above can also handle writing
1336// and extending tables. They create fully equal files. Both little and big
1337// endian data can be read or written.
1338
1339// <ANCHOR NAME="Tables:virtual column engines">
1340// <h3>Virtual Column Engines</h3></ANCHOR>
1341//
1342// Virtual column engines are used to implement the virtual (i.e.
1343// calculated-on-the-fly) columns. CTDS provides
1344// an abstract base class (or "interface class")
1345// <linkto class="VirtualColumnEngine:description">VirtualColumnEngine</linkto>
1346// that specifies the protocol for these engines.
1347// The programmer must derive a concrete class to implement
1348// the application-specific virtual column.
1349// <p>
1350// For example: the programmer
1351// needs a column in a table which is the difference between two other
1352// columns. (Perhaps these two other columns are updated periodically
1353// during the execution of a program.) A good way to handle this would
1354// be to have a virtual column in the table, and write a virtual column
1355// engine which knows how to calculate the difference between corresponding
1356// cells of the two other columns. So the result is that accessing a
1357// particular cell of the virtual column invokes the virtual column engine,
1358// which then gets the values from the other two columns, and returns their
1359// difference. This particular example could be done using
1360// <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>.
1361// <p>
1362// Several virtual column engines exist:
1363// <ol>
1364// <li> The class
1365// <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>
1366// makes it possible to define a column as an arbitrary expression of
1367// other columns. It uses the <a href="../notes/199.html">TaQL</a>
1368// CALC command. The virtual column can be a scalar or an array and
1369// can have one of the standard data types supported by CTDS.
1370// <li> The class
1371// <linkto class="BitFlagsEngine:description">BitFlagsEngine</linkto>
1372// maps an integer bit flags column to a Bool column. A read and write mask
1373// can be defined telling which bits to take into account when mapping
1374// to and from Bool (thus when reading or writing the Bool).
1375// <li> The class
1376// <linkto class="CompressFloat:description">CompressFloat</linkto>
1377// compresses a single precision floating point array by scaling the
1378// values to shorts (16-bit integer).
1379// <li> The class
1380// <linkto class="CompressComplex:description">CompressComplex</linkto>
1381// compresses a single precision complex array by scaling the
1382// values to shorts (16-bit integer). In fact, the 2 parts of the complex
1383// number are combined to an 32-bit integer.
1384// <li> The class
1385// <linkto class="CompressComplexSD:description">CompressComplexSD</linkto>
1386// does the same as CompressComplex, but optimizes for the case where the
1387// imaginary part is zero (which is often the case for Single Dish data).
1388// <li> The double templated class
1389// <linkto class="ScaledArrayEngine:description">ScaledArrayEngine</linkto>
1390// scales the data in an array from, for example,
1391// float to short before putting it.
1392// <li> The double templated class
1393// <linkto class="MappedArrayEngine:description">MappedArrayEngine</linkto>
1394// converts the data from one data type to another. Sometimes it might be
1395// needed to store the residual data in an MS in double precision.
1396// Because the imaging task can only handle single precision, this enigne
1397// can be used to map the data from double to single precision.
1398// <li> The double templated class
1399// <linkto class="RetypedArrayEngine:description">RetypedArrayEngine</linkto>
1400// converts the data from one data type to another with the possibility
1401// to reduce the number of dimensions. For example, it can be used to
1402// store an 2-d array of StokesVector objects as a 3-d array of floats
1403// by treating the 4 data elements as an extra array axis. If the
1404// StokesVector class is simple, it can be done very efficiently.
1405// <li> The class
1406// <linkto class="ForwardColumnEngine:description">
1407// ForwardColumnEngine</linkto>
1408// forwards the gets and puts on a row in a column to the same row
1409// in a column with the same name in another table. This provides
1410// a virtual copy of the referenced column.
1411// <li> The class
1412// <linkto class="ForwardColumnIndexedRowEngine:description">
1413// ForwardColumnIndexedRowEngine</linkto>
1414// is similar to <src>ForwardColumnEngine.</src>.
1415// However, instead of forwarding it to the same row it uses a
1416// a column to map its row number to a row number in the referenced
1417// table. In this way multiple rows can share the same data.
1418// This data manager only allows for get operations.
1419// <li> The calibration module has implemented a virtual column engine
1420// to do on-the-fly calibration in a transparent way.
1421// </ol>
1422// To handle arbitrary data types the templated abstract base class
1423// <linkto class="VSCEngine:description">VSCEngine</linkto>
1424// has been written. An example of how to use this class can be
1425// found in the demo program <src>dVSCEngine.cc</src>.
1426
1427// <ANCHOR NAME="Tables:LockSync">
1428// <h3>Table locking and synchronization</h3></ANCHOR>
1429//
1430// Multiple concurrent readers and writers (also via NFS) of a
1431// table are supported by means of a locking/synchronization mechanism.
1432// This mechanism is not very sophisticated in the sense that it is
1433// very coarsely grained. When locking, the entire table gets locked.
1434// A special lock file is used to lock the table. This lock file also
1435// contains some synchronization data.
1436// <p>
1437// Five ways of locking are supported (see class
1438// <linkto class=TableLock>TableLock</linkto>):
1439// <dl>
1440// <dt> TableLock::PermanentLocking(Wait)
1441// <dd> locks the table permanently (from open till close). This means
1442// that one writer OR multiple readers are possible.
1443// <dt> TableLock::AutoLocking
1444// <dd> does the locking automatically. This is the default mode.
1445// This mode makes it possible that a table is shared amongst
1446// processes without the user needing to write any special code.
1447// It also means that a lock is only released when needed.
1448// <dt> TableLock::AutoNoReadLocking
1449// <dd> is similar to AutoLocking. However, no lock is acquired when
1450// reading the table making it possible to read the table while
1451// another process holds a write-lock. It also means that for read
1452// purposes no automatic synchronization is done when the table is
1453// updated in another process.
1454// Explicit synchronization can be done by means of the function
1455// <src>Table::resync</src>.
1456// <dt> TableLock::UserLocking
1457// <dd> requires that the programmer explicitly acquires and releases
1458// a lock on the table. This makes some kind of transaction
1459// processing possible. E.g. set a write lock, add a row,
1460// write all data into the row and release the lock.
1461// The Table functions <src>lock</src> and <src>unlock</src>
1462// have to be used to acquire and release a (read or write) lock.
1463// <dt> TableLock::UserNoReadLocking
1464// <dd> is similar to UserLocking. However, similarly to AutoNoReadLocking
1465// no lock is needed to read the table.
1466// <dt> TableLock::NoLocking
1467// <dd> does not use table locking. It is the responsibility of the
1468// user to ensure that no concurrent access is done on the same
1469// bucket or tile in a storage manager, otherwise a table might
1470// get corrupted.
1471// <br>This mode is always used if Casacore is built with
1472// -DAIPS_TABLE_NOLOCKING.
1473// </dl>
1474// Synchronization of the processes accessing the same table is done
1475// by means of the lock file. When a lock is released, the storage
1476// managers flush their data into the table files. Some synchronization data
1477// is written into the lock file telling the new number of table rows
1478// and telling which storage managers have written data.
1479// This information is read when another process acquires the lock
1480// and is used to determine which storage managers have to refresh
1481// their internal caches.
1482// <br>Note that for the NoReadLocking modes (see above) explicit
1483// synchronization might be needed using <src>Table::resync</src>.
1484// <p>
1485// The function <src>Table::hasDataChanged</src> can be used to check
1486// if a table is (being) changed by another process. In this way
1487// a program can react on it. E.g. the table browser can refresh its
1488// screen when the underlying table is changed.
1489// <p>
1490// In general the default locking option will do.
1491// From the above it should be clear that heavy concurrent access
1492// results in a lot of flushing, thus will have a negative impact on
1493// performance. If uninterrupted access to a table is needed,
1494// the <src>PermanentLocking</src> option should be used.
1495// If transaction-like processing is done (e.g. updating a table
1496// containing an observation catalogue), the <src>UserLocking</src>
1497// option is probably best.
1498// <p>
1499// Creation or deletion of a table is not possible if that table
1500// is still open in another process. The function
1501// <src>Table::isMultiUsed()</src> can be used to check if a table
1502// is open in other processes.
1503// <br>
1504// The function <src>TableUtil::deleteTable</src> should be used to delete
1505// a table. Before deleting the table it ensures that it is writable
1506// and that it is not open in the current or another process.
1507// <p>
1508// The following example wants to read the table uninterrupted, thus it uses
1509// the <src>PermanentLocking</src> option. It also wants to wait
1510// until the lock is actually acquired.
1511// Note that the destructor closes the table and releases the lock.
1512// <srcblock>
1513// // Open the table (readonly).
1514// // Acquire a permanent (read) lock.
1515// // It waits until the lock is acquired.
1516// Table tab ("some.name",
1517// TableLock(TableLock::PermanentLockingWait));
1518// </srcblock>
1519//
1520// The following example uses the automatic locking..
1521// It tells the system to check about every 20 seconds if another
1522// process wants access to the table.
1523// <srcblock>
1524// // Open the table (readonly).
1525// Table tab ("some.name",
1526// TableLock(TableLock::AutoLocking, 20));
1527// </srcblock>
1528//
1529// The following example gets data (say from a GUI) and writes it
1530// as a row into the table. The lock the table as little as possible
1531// the lock is acquired just before writing and released immediately
1532// thereafter.
1533// <srcblock>
1534// // Open the table (writable).
1535// Table tab ("some.name",
1536// TableLock(TableLock::UserLocking),
1537// Table::Update);
1538// while (True) {
1539// get input data
1540// tab.lock(); // Acquire a write lock and wait for it.
1541// tab.addRow();
1542// write data into the row
1543// tab.unlock(); // Release the lock.
1544// }
1545// </srcblock>
1546//
1547// The following example deletes a table if it is not used in
1548// another process.
1549// <srcblock>
1550// Table tab ("some.name");
1551// if (! tab.isMultiUsed()) {
1552// tab.markForDelete();
1553// }
1554// </srcblock>
1555
1556// <ANCHOR NAME="Tables:KeyLookup">
1557// <h3>Table lookup based on a key</h3></ANCHOR>
1558//
1559// Class <linkto class=ColumnsIndex>ColumnsIndex</linkto> offers the
1560// user a means to find the rows matching a given key or key range.
1561// It is a somewhat primitive replacement of a B-tree index and in the
1562// future it may be replaced by a proper B+-tree implementation.
1563// <p>
1564// The <src>ColumnsIndex</src> class makes it possible to build an
1565// in-core index on one or more columns. Looking a key or key range
1566// is done using a binary search on that index. It returns a vector
1567// containing the row numbers of the rows matching the key (range).
1568// <p>
1569// The class is not capable of tracing changes in the underlying column(s).
1570// It detects a change in the number of rows and updates the index
1571// accordingly. However, it has to be told explicitly when a value
1572// in the underlying column(s) changes.
1573// <p>
1574// The following example shows how the class can be used.
1575// <example>
1576// Suppose one has an antenna table with key ANTENNA.
1577// <srcblock>
1578// // Open the table and make an index for column ANTENNA.
1579// Table tab("antenna.tab")
1580// ColumnsIndex colInx(tab, "ANTENNA");
1581// // Make a RecordFieldPtr for the ANTENNA field in the index key record.
1582// // Its data type has to match the data type of the column.
1583// RecordFieldPtr<Int> antFld(colInx.accessKey(), "ANTENNA");
1584// // Now loop in some way and find the row for the antenna
1585// // involved in that loop.
1586// Bool found;
1587// while (...) {
1588// // Fill the key field and get the row number.
1589// // ANTENNA is a unique key, so only one row number matches.
1590// // Otherwise function getRowNumbers had to be used.
1591// *antFld = antenna;
1592// uInt antRownr = colInx.getRowNumber (found);
1593// if (!found) {
1594// cout << "Antenna " << antenna << " is unknown" << endl;
1595// } else {
1596// // antRownr can now be used to get data from that row in
1597// // the antenna table.
1598// }
1599// }
1600// </srcblock>
1601// </example>
1602// <linkto class=ColumnsIndex>ColumnsIndex</linkto> itself contains a more
1603// advanced example. It shows how to use a private compare function
1604// to adjust the lookup if the index does not contain single
1605// key values, but intervals instead. This is useful if a row in
1606// a (sub)table is valid for, say, a time range instead of a single
1607// timestamp.
1608
1609// <ANCHOR NAME="Tables:performance">
1610// <h3>Performance and robustness considerations</h3></ANCHOR>
1611//
1612// CTDS resembles a database system, but it is not as robust.
1613// It lacks the transaction and logging facilities common to data base systems.
1614// It means that in case of a crash data might be lost.
1615// To reduce the risk of data loss to
1616// a minimum, it is advisable to regularly do a <tt>flush</tt>, optionally
1617// with an <tt>fsync</tt> to ensure that all data are really written.
1618// However, that can degrade the performance because it involves extra writes.
1619// So one should find the right balance between robustness and performance.
1620//
1621// To get a good feeling for the performance issues, it is important to
1622// understand some of the internals of CTDS.
1623// <br>The storage managers drive the performance. All storage managers use
1624// buckets (called tiles for the TiledStMan) which contain the data.
1625// All IO is done by bucket. The bucket/tile size is defined when creating
1626// the storage manager objects. Sometimes the default will do, but usually
1627// it is better to set it explicitly.
1628//
1629// It is best to do a flush when a tile is full.
1630// For example: <br>
1631// When creating a MeasurementSet containing N antennae (thus N*(N-1) baselines
1632// or N*(N+1) if auto-correlations are stored as well) it makes sense to
1633// store, say, N/2 rows in a tile and do a flush each time all baselines
1634// are written. In that way tiles are fully filled when doing the flush, so
1635// no extra IO is involved.
1636// <br>Here is some code showing this when creating a MeasurementSet.
1637// The code should speak for itself.
1638// <srcblock>
1639// MS* createMS (const String& msName, int nrchan, int nrant)
1640// {
1641// // Get the MS main default table description.
1642// TableDesc td = MS::requiredTableDesc();
1643// // Add the data column and its unit.
1644// MS::addColumnToDesc(td, MS::DATA, 2);
1645// td.rwColumnDesc(MS::columnName(MS::DATA)).rwKeywordSet().
1646// define("UNIT","Jy");
1647// // Store the DATA and FLAG column in two separate files.
1648// // In this way accessing FLAG only is much cheaper than
1649// // when combining DATA and FLAG.
1650// // All data have the same shape, thus use TiledColumnStMan.
1651// // Also store UVW with TiledColumnStMan.
1652// Vector<String> tsmNames(1);
1653// tsmNames[0] = MS::columnName(MS::DATA);
1654// td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq));
1655// td.defineHypercolumn("TiledData", 3, tsmNames);
1656// tsmNames[0] = MS::columnName(MS::FLAG);
1657// td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq));
1658// td.defineHypercolumn("TiledFlag", 3, tsmNames);
1659// tsmNames[0] = MS::columnName(MS::UVW);
1660// td.defineHypercolumn("TiledUVW", 2, tsmNames);
1661// // Setup the new table.
1662// SetupNewTable newTab(msName, td, Table::New);
1663// // Most columns vary slowly and use the IncrStMan.
1664// IncrementalStMan incrStMan("ISMData");
1665// // A few columns use he StandardStMan (set an appropriate bucket size).
1666// StandardStMan stanStMan("SSMData", 32768);
1667// // Store all pol and freq and some rows in a single tile.
1668// // autocorrelations are written, thus in total there are
1669// // nrant*(nrant+1)/2 baselines. Ensure a baseline takes up an
1670// // integer number of tiles.
1671// TiledColumnStMan tiledData("TiledData",
1672// IPosition(3,4,nchan,(nrant+1)/2));
1673// TiledColumnStMan tiledFlag("TiledFlag",
1674// IPosition(3,4,nchan,8*(nrant+1)/2));
1675// TiledColumnStMan tiledUVW("TiledUVW", IPosition(2,3,));
1676// IPosition(2,3,nrant*(nrant+1)/2));
1677// newTab.bindAll (incrStMan);
1678// newTab.bindColumn(MS::columnName(MS::ANTENNA1),stanStMan);
1679// newTab.bindColumn(MS::columnName(MS::ANTENNA2),stanStMan);
1680// newTab.bindColumn(MS::columnName(MS::DATA),tiledData);
1681// newTab.bindColumn(MS::columnName(MS::FLAG),tiledFlag);
1682// newTab.bindColumn(MS::columnName(MS::UVW),tiledUVW);
1683// // Create the MS and its subtables.
1684// // Get access to its columns.
1685// MS* msp = new MeasurementSet(newTab);
1686// // Create all subtables.
1687// // Do this after the creation of optional subtables,
1688// // so the MS will know about those optional sutables.
1689// msp->createDefaultSubtables (Table::New);
1690// return msp;
1691// }
1692// </srcblock>
1693
1694// <h4>Some more performance considerations</h4>
1695// Which storage managers to use and how to use them depends heavily on
1696// the type of data and the access patterns to the data. Here follow some
1697// guidelines:
1698// <ol>
1699// <li> Scalar data can be stored with the StandardStMan (SSM) or
1700// IncrementalStMan (ISM). For slowly varying data (e.g. the TIME column
1701// in a MeasurementSet) it is best to use the ISM. Otherwise the SSM.
1702// Note that very long strings (longer than the bucketsize) can only
1703// be stored with the SSM.
1704// <li> Any number of storage managers can be used. In fact, each column
1705// can have a storage manager of its own resulting in column-wise
1706// stored data which is more and more used in data base systems.
1707// In that way a query or sort on that column is very fast, because
1708// the buckets to read only contain data of that column.
1709// In practice one can decide to combine a few frequently used columns
1710// in a storage manager.
1711// <li> Array data can be stored with any column manager. Small fixed size
1712// arrays can be stored directly with the SSM
1713// (or ISM if not changing much).
1714// However, they can also be stored with a TiledStMan (TSM) as shown
1715// for the UVW column in the example above.
1716// <br> Large arrays should usually be stored with a TSM. However,
1717// if it must be possible to change the shape of an array after it
1718// was stored, the SSM (or ISM) must be used. Note that in that
1719// case a lot of disk space can be wasted, because the SSM and ISM
1720// store the array data at the end of the file if the array got
1721// bigger and do not reuse the old space. The only way to
1722// reclaim it is by making a deep copy of the entire table.
1723// <li> If an array is stored with a TSM, it is important to decide
1724// which TSM to use.
1725// <ol>
1726// <li> The TiledColumnStMan is the most efficient, but only suitable
1727// for arrays having the same shape in the entire column.
1728// <li> The TiledShapeStMan is suitable for columns where the arrays
1729// can have a few shapes.
1730// <li> The TiledCellStMan is suitable for columns where the arrays
1731// can have many different shapes.
1732// </ol>
1733// This is discussed in more detail
1734// <a href="#Tables:TiledStMan">above</a>.
1735// <li> If storing an array with a TSM, it can be very important to
1736// choose the right tile shape. Not only does this define the size
1737// of a tile, but it also defines if access in other directions
1738// than the natural direction can be fast. It is also discussed in
1739// more detail <a href="#Tables:TiledStMan">above</a>.
1740// <li> Columns can be combined in a single TiledStMan. For instance, combining DATA
1741// and FLAG is advantageous if FLAG is always used with DATA. However, if FLAG
1742// is used on its own (e.g. in combination with CORRECTED_DATA), it is better
1743// to separate them, otherwise tiles containing FLAG also contain DATA making the
1744// tiles much bigger, thus more expensive to access.
1745// </ol>
1746//
1747// <ANCHOR NAME="Tables:iotracing">
1748// <h4>IO Tracing</h4></ANCHOR>
1749//
1750// Several forms of tracing can be done to see how the Table I/O performs.
1751// <ul>
1752// <li> On Linux/UNIX systems the <src>strace</src> command can be used to
1753// collect trace information about the physical IO.
1754// <li> The function <src>showCacheStatistics</src> in class
1755// TiledStManAccessor can be used to show the number of actual reads
1756// and writes and the percentage of cache hits.
1757// <li> The software has some options to trace the operations done on
1758// tables. It is possible to specify the columns and/or the operations
1759// to be traced. The following <src>aipsrc</src> variables can be used.
1760// <ul>
1761// <li> <src>table.trace.filename</src> specifies the file to write the
1762// trace output to. If not given or empty, no tracing will be done.
1763// The file name can contain environment variables or a tilde.
1764// <li> <src>table.trace.operation</src> specifies the operations to be
1765// traced. It is a string containing s, r, and/or w where
1766// s means tracing RefTable construction (selection/sort),
1767// r means column reads, and w means column writes.
1768// If empty, only the high level table operations (open, create, close)
1769// will be traced.
1770// <li> <src>table.trace.columntype</src> specifies the types of columns to
1771// be traced. It is a string containing the characters s, a, and/or r.
1772// s means all scalar columns, a all array columns, and r all record
1773// columns. If empty and if <src>table.trace.column</src> is empty,
1774// its default value is a.
1775// <li> <src>table.trace.column</src> specifies names of columns to be
1776// traced. Its value can be one or more glob-like patterns separated
1777// by commas without any whitespace. The default is empty.
1778// For example:
1779// <srcblock>
1780// table.trace.column: *DATA,FLAG,WEIGHT*
1781// </srcblock>
1782// to trace all DATA, the FLAG, and all WEIGHT columns.
1783// </ul>
1784// The trace output is a text file with the following columns
1785// separated by a space.
1786// <ul>
1787// <li> The UTC time the trace line was written (with msec accuracy).
1788// <li> The operation: n(ew), o(pen), c(lose), t(able), r(ead), w(rite),
1789// s(election/sort/iter), p(rojection).
1790// t means an arbitrary table operation as given in the name column.
1791// <li> The table-id (as t=i) given at table creation (new) or open.
1792// <li> The table name, column name, or table operation
1793// (as <src>*oper*</src>).
1794// <src>*reftable*</src> means that the operation is on a RefTable
1795// (thus result of selection, sort, projection, or iteration).
1796// <li> The row or rows to access (* means all rows).
1797// Multiple rows are given as a series of ranges like s:e:i,s:e:i,...
1798// where e and i are only given if applicable (default i is 1).
1799// Note that e is inclusive and defaults to s.
1800// <li> The optional array shape to access (none means scalar).
1801// In case multiple rows are accessed, the last shape value is the
1802// number of rows.
1803// <li> The optional slice of the array in each row as [start][end][stride].
1804// </ul>
1805// Shape, start, end, and stride are given in Fortran-order as
1806// [n1,n2,...].
1807// </ul>
1808
1809// <ANCHOR NAME="Tables:applications">
1810// <h4>Applications to inspect/manipulate a table</h4></ANCHOR>
1811// <ul>
1812// <li><em>showtableinfo</em> shows the structure of a table. It can show:
1813// <ul>
1814// <li> the columns and their format (optionally sorted on name)
1815// <li> the data managers used to store the column data
1816// <li> the table and/or column keywords and their values
1817// <li> recursively the same info of the subtables
1818// </ul>
1819// <li><em>showtablelock</em> if a table is locked or opened and by
1820// which process.
1821// <li><em>lsmf</em> shows the virtual files contained in a MultiFile.
1822// <li><em>tomf</em> copies the given files to a MultiFile.
1823// <li><em>taql</em> can be used to query a table using the
1824// <a href="../notes/199.html">Table Query Language</a> (TaQL).
1825// </ul>
1826//
1827// </synopsis>
1828// </module>
1829
1830
1831
1832} //# NAMESPACE CASACORE - END
1833
1834#endif
this file contains all the compiler specific defines
Definition mainpage.dox:28