casacore
MultiFileBase.h
Go to the documentation of this file.
1 //# MultiFileBase.h: Abstract base class to combine multiple files in a single one
2 //# Copyright (C) 2014
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //# $Id: RegularFileIO.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $
27 
28 #ifndef CASA_MULTIFILEBASE_H
29 #define CASA_MULTIFILEBASE_H
30 
31 //# Includes
32 #include <casacore/casa/aips.h>
33 #include <casacore/casa/IO/ByteIO.h>
34 #include <casacore/casa/BasicSL/String.h>
35 #include <casacore/casa/Utilities/CountedPtr.h>
36 #include <casacore/casa/vector.h>
37 #include <casacore/casa/ostream.h>
38 
39 
40 namespace casacore { //# NAMESPACE CASACORE - BEGIN
41 
42  //# Forward declaration.
43  class AipsIO;
44  class HDF5Group;
45  class HDF5DataSet;
46 
47 
48  // <summary>
49  // Helper class for MultiFileInfo holding a data buffer
50  // </summary>
51  // <synopsis>
52  // The buffer can be allocated with posix_memalign (for O_DIRECT support).
53  // Hence the memory must be freed using free, which makes it impossible
54  // to use a shared_ptr to that memory. Hence it is encapsulated in this class.
55  // </synopsis>
56  struct MultiFileBuffer {
57  MultiFileBuffer (size_t bufSize, Bool useODirect);
59  { if (data) free (data); }
60  // Data member
61  char* data;
62  private:
65  };
66 
67  // <summary>
68  // Helper class for MultiFileBase containing info per internal file.
69  // </summary>
70  // <synopsis>
71  // This struct defines the various fields describing a logical file in a
72  // class derived from MultiFileBase (such as MultiFile or MultiHDF5).
73  // </synopsis>
74  // <use visibility=local>
75  struct MultiFileInfo {
76  // Initialize the object and create the buffer with the proper size.
77  // If align>1 (for use of O_DIRECT), the buffer is properly aligned and it
78  // is ensured that its size is a multiple of the alignment.
79  explicit MultiFileInfo (Int64 bufSize=0, Bool useODirect=False);
80  // Allocate the buffer.
81  void allocBuffer (Int64 bufSize, Bool useODirect=False)
82  { buffer = std::shared_ptr<MultiFileBuffer> (new MultiFileBuffer(bufSize, useODirect)); }
83  //# Data members.
84  vector<Int64> blockNrs; // physical blocknrs for this logical file
85  Int64 curBlock; // the data block held in buffer (<0 is none)
86  Int64 fsize; // file size (in bytes)
87  String name; // the virtual file name
88  Bool dirty; // has data in buffer been changed?
89  std::shared_ptr<MultiFileBuffer> buffer; // buffer holding a data block
92  };
93  void operator<< (ostream&, const MultiFileInfo&);
96 
97 
98  // <summary>
99  // Abstract base class to combine multiple files in a single one.
100  // </summary>
101 
102  // <use visibility=export>
103 
104  // <reviewed reviewer="" date="" tests="tMultiFile" demos="">
105  // </reviewed>
106 
107  // <synopsis>
108  // This class is a container file holding multiple virtual files. It is
109  // primarily meant as a container file for the storage manager files of a
110  // table to reduce the number of files used (especially for Lustre) and to
111  // reduce the number of open files (especially when concatenating tables).
112  // <br>A secondary goal is offering the ability to use an IO buffer size
113  // that matches the file system well (large buffer size for e.g. ZFS).
114  //
115  // The SetupNewTable constructor has a StorageOption argument to define
116  // if a MultiFile has to be used and if so, the buffer size to use.
117  // It is also possible to specify that through aipsrc variables.
118  //
119  // A virtual file is spread over multiple (fixed size) data blocks in the
120  // MultiFile. A data block is never shared by multiple files.
121  // For each virtual file MultiFile keeps a MultiFileInfo object telling
122  // the file size and the blocks numbers used for the file. When flushing
123  // the MultiFile, this meta info is written into a header block and,
124  // if needed, continuation blocks. On open and resync, it is read back.
125  // <br>
126  //
127  // A virtual file is represented by an MFFileIO object, which is derived
128  // from ByteIO and as such part of the casacore IO framework. It makes it
129  // possible for applications to access a virtual file in the same way as
130  // a regular file.
131  //
132  // It is possible to delete a virtual file. Its blocks will be added to
133  // the free block list (which is also stored in the meta info).
134  // </synopsis>
135 
136  // <example>
137  // In principle it is possible to use the MultiFile functions directly.
138  // However, in general it is much easier to use an MFFileIO object
139  // per virtual file as shown below.
140  // <srcblock>
141  // // Create a new MultiFile using a block size of 1 MB.
142  // MultiFile mfile("file.mf', ByteIO::New, 1048576);
143  // // Create a virtual file in it.
144  // MFFileIO mf1(mfile, "mf1", ByteIO::New);
145  // // Use it (for example) as the sink of AipsIO.
146  // AipsIO stream (&mf1);
147  // // Write values.
148  // stream << (Int)10;
149  // stream << True;
150  // // Seek to beginning of file and read data in.
151  // stream.setpos (0);
152  // Int vali;
153  // Bool valb;
154  // stream >> vali >> valb;
155  // </srcblock>
156  // </example>
157 
158  // <todo>
159  // <li> write headers at alternating file positions (for robustness)
160  // <li> possibly write headers entirely at the end if larger than blocksize
161  // </todo>
162 
163 
165  {
166  public:
167  // Open or create a MultiFileBase with the given name.
168  // Upon creation the block size can be given. If 0, it uses the block size
169  // of the file system the file is on.
170  // If useODIrect=True, it means that O_DIRECT is used. If the OS does not
171  // support it, the flag will always be False. If True, the data buffers will
172  // have a proper alignment and size (as needed by O_DIRECT).
174 
175  // The destructor flushes and closes the file.
176  virtual ~MultiFileBase();
177 
178  // Return the file id of a file in the MultiFileBase object.
179  // If the name is unknown, an exception is thrown if throwExcp is set.
180  // Otherwise it returns -1.
181  Int fileId (const String& name, Bool throwExcp=True) const;
182 
183  // Add a file to the MultiFileBase object. It returns the file id.
184  // Only the base name of the given file name is used. In this way the
185  // MultiFileBase container file can be moved.
186  Int addFile (const String& name);
187 
188  // Delete a file. It adds its blocks to the free block list.
190 
191  // Read a block at the given offset. It returns the actual size read.
192  Int64 read (Int fileId, void* buffer, Int64 size, Int64 offset);
193 
194  // Write a block at the given offset. It returns the actual size written.
195  Int64 write (Int fileId, const void* buffer, Int64 size, Int64 offset);
196 
197  // Flush the file by writing all dirty data and all header info.
198  void flush();
199 
200  // Resync with another process by clearing the buffers and rereading
201  // the header. The header is only read if its counter has changed.
202  void resync();
203 
204  // Reopen the underlying file for read/write access.
205  // Nothing will be done if the file is writable already.
206  // Otherwise it will be reopened and an exception will be thrown
207  // if it is not possible to reopen it for read/write access.
208  virtual void reopenRW() = 0;
209 
210  // Fsync the file (i.e., force the data to be physically written).
211  virtual void fsync() = 0;
212 
213  // Get the file name of the MultiFileBase.
214  String fileName() const
215  { return itsName; }
216 
217  // Is the file writable?
218  Bool isWritable() const
219  { return itsWritable; }
220 
221  // Will O_DIRECT be used?
222  Bool useODirect() const
223  { return itsUseODirect; }
224 
225  // Get the block size used.
226  Int64 blockSize() const
227  { return itsBlockSize; }
228 
229  // Get the nr of virtual files.
230  uInt nfile() const;
231 
232  // Get the total nr of data blocks used.
233  Int64 size() const
234  { return itsNrBlock; }
235 
236  // Get the info object (for test purposes mainly).
237  const vector<MultiFileInfo>& info() const
238  { return itsInfo; }
239 
240  // Get the free blocks (for test purposes mainly).
241  const vector<Int64>& freeBlocks() const
242  { return itsFreeBlocks; }
243 
244  private:
246  {
247  writeBlock (info, info.curBlock, info.buffer->data);
248  info.dirty = False;
249  }
250 
251  // Do the class-specific actions on adding a file.
252  virtual void doAddFile (MultiFileInfo&) = 0;
253  // Do the class-specific actions on deleting a file.
254  virtual void doDeleteFile (MultiFileInfo&) = 0;
255  // Flush the file itself.
256  virtual void flushFile() = 0;
257  // Flush and close the file.
258  virtual void close() = 0;
259  // Write the header info.
260  virtual void writeHeader() = 0;
261  // Read the header info. If always==False, the info is only read if the
262  // header counter has changed.
263  virtual void readHeader (Bool always=True) = 0;
264  // Extend the virtual file to fit lastblk.
265  virtual void extend (MultiFileInfo& info, Int64 lastblk) = 0;
266  // Write a data block.
267  virtual void writeBlock (MultiFileInfo& info, Int64 blknr,
268  const void* buffer) = 0;
269  // Read a data block.
270  virtual void readBlock (MultiFileInfo& info, Int64 blknr,
271  void* buffer) = 0;
272 
273  protected:
274  // Set the flags and blockSize for a new MultiFile/HDF5.
275  void setNewFile();
276 
277  //# Data members
279  Int64 itsBlockSize; // The blocksize used
280  Int64 itsNrBlock; // The total nr of blocks actually used
281  Int64 itsHdrCounter; // Counter of header changes
282  vector<MultiFileInfo> itsInfo;
283  std::shared_ptr<MultiFileBuffer> itsBuffer;
284  Bool itsUseODirect; // use O_DIRECT?
285  Bool itsWritable; // Is the file writable?
286  Bool itsChanged; // Has header info changed since last flush?
287  vector<Int64> itsFreeBlocks;
288  };
289 
290 
291 } //# NAMESPACE CASACORE - END
292 
293 #endif
Referenced counted pointer for constant data.
Definition: CountedPtr.h:81
Abstract base class to combine multiple files in a single one.
Int64 blockSize() const
Get the block size used.
virtual void extend(MultiFileInfo &info, Int64 lastblk)=0
Extend the virtual file to fit lastblk.
virtual void writeHeader()=0
Write the header info.
MultiFileBase(const String &name, Int blockSize, Bool useODirect)
Open or create a MultiFileBase with the given name.
virtual void fsync()=0
Fsync the file (i.e., force the data to be physically written).
virtual void readHeader(Bool always=True)=0
Read the header info.
Int64 write(Int fileId, const void *buffer, Int64 size, Int64 offset)
Write a block at the given offset.
void resync()
Resync with another process by clearing the buffers and rereading the header.
vector< Int64 > itsFreeBlocks
vector< MultiFileInfo > itsInfo
Int addFile(const String &name)
Add a file to the MultiFileBase object.
Int fileId(const String &name, Bool throwExcp=True) const
Return the file id of a file in the MultiFileBase object.
Int64 size() const
Get the total nr of data blocks used.
void deleteFile(Int fileId)
Delete a file.
Int64 read(Int fileId, void *buffer, Int64 size, Int64 offset)
Read a block at the given offset.
String fileName() const
Get the file name of the MultiFileBase.
Bool useODirect() const
Will O_DIRECT be used?
virtual ~MultiFileBase()
The destructor flushes and closes the file.
Bool isWritable() const
Is the file writable?
virtual void doDeleteFile(MultiFileInfo &)=0
Do the class-specific actions on deleting a file.
virtual void writeBlock(MultiFileInfo &info, Int64 blknr, const void *buffer)=0
Write a data block.
virtual void reopenRW()=0
Reopen the underlying file for read/write access.
virtual void readBlock(MultiFileInfo &info, Int64 blknr, void *buffer)=0
Read a data block.
virtual void flushFile()=0
Flush the file itself.
const vector< MultiFileInfo > & info() const
Get the info object (for test purposes mainly).
virtual void close()=0
Flush and close the file.
void setNewFile()
Set the flags and blockSize for a new MultiFile/HDF5.
virtual void doAddFile(MultiFileInfo &)=0
Do the class-specific actions on adding a file.
void flush()
Flush the file by writing all dirty data and all header info.
const vector< Int64 > & freeBlocks() const
Get the free blocks (for test purposes mainly).
uInt nfile() const
Get the nr of virtual files.
std::shared_ptr< MultiFileBuffer > itsBuffer
void writeDirty(MultiFileInfo &info)
String: the storage and methods of handling collections of characters.
Definition: String.h:225
free(pool)
this file contains all the compiler specific defines
Definition: mainpage.dox:28
const Bool False
Definition: aipstype.h:44
unsigned int uInt
Definition: aipstype.h:51
long long Int64
Define the extra non-standard types used by Casacore (like proposed uSize, Size)
Definition: aipsxtype.h:38
int Int
Definition: aipstype.h:50
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
ostream & operator<<(ostream &os, const IComplex &)
Show on ostream.
const Bool True
Definition: aipstype.h:43
AipsIO & operator>>(AipsIO &os, Record &rec)
Definition: Record.h:465
char * data
Data member.
Definition: MultiFileBase.h:61
MultiFileBuffer(const MultiFileBuffer &)
MultiFileBuffer(size_t bufSize, Bool useODirect)
MultiFileBuffer & operator=(const MultiFileBuffer &)
Helper class for MultiFileBase containing info per internal file.
Definition: MultiFileBase.h:75
void allocBuffer(Int64 bufSize, Bool useODirect=False)
Allocate the buffer.
Definition: MultiFileBase.h:81
vector< Int64 > blockNrs
Definition: MultiFileBase.h:84
std::shared_ptr< MultiFileBuffer > buffer
Definition: MultiFileBase.h:89
CountedPtr< HDF5Group > group
Definition: MultiFileBase.h:90
MultiFileInfo(Int64 bufSize=0, Bool useODirect=False)
Initialize the object and create the buffer with the proper size.
CountedPtr< HDF5DataSet > dataSet
Definition: MultiFileBase.h:91