casacore
Loading...
Searching...
No Matches
RowBasedFile.h
Go to the documentation of this file.
1#ifndef CASACORE_ROW_BASED_FILE_H_
2#define CASACORE_ROW_BASED_FILE_H_
3
4#include <fcntl.h>
5#include <unistd.h>
6
7#include <array>
8#include <cstdint>
9#include <string>
10#include <vector>
11
12namespace casacore {
13
15 public:
16 RowBasedFile() = default;
17 RowBasedFile(const RowBasedFile& rhs) = delete;
18 RowBasedFile(RowBasedFile&& rhs) noexcept
19 : file_(rhs.file_),
20 private_header_size_(rhs.private_header_size_),
21 n_rows_(rhs.n_rows_),
22 need_truncate_(rhs.need_truncate_),
23 stride_(rhs.stride_),
24 data_location_(rhs.data_location_),
25 filename_(rhs.filename_) {
26 rhs.file_ = -1;
27 rhs.private_header_size_ = kWriterPrivateHeaderSize;
28 rhs.n_rows_ = 0;
29 rhs.need_truncate_ = false;
30 rhs.stride_ = 0;
31 rhs.data_location_ = kWriterPrivateHeaderSize;
32 rhs.filename_ = "";
33 }
34
38 RowBasedFile(const std::string& filename, uint64_t header_size,
39 uint64_t stride)
40 : stride_(stride), filename_(filename) {
41 file_ = open(filename.c_str(), O_CREAT | O_RDWR | O_TRUNC,
42 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
43 if (file_ < 0)
44 throw std::runtime_error("I/O error: could not create new file '" +
45 filename + "'");
48 }
49
53 RowBasedFile(const std::string& filename, size_t header_size)
54 : filename_(filename) {
55 file_ = open(filename.c_str(), O_RDWR);
56 if (file_ < 0) {
57 file_ = open(filename.c_str(), O_RDONLY);
58 if (file_ < 0)
59 throw std::runtime_error("I/O error: could not open file '" + filename +
60 "'");
61 }
62 uint32_t magic_tag;
63 ReadData(reinterpret_cast<unsigned char*>(&magic_tag), sizeof(uint32_t));
64 if (magic_tag != kMagicFileTag) {
65 throw std::runtime_error(
66 "Could not read file " + filename +
67 ": file does not obey the Casacore row-based file format: either the "
68 "file is damaged, or this is not a Casacore row-based file");
69 }
70
71 uint32_t file_version;
72 ReadData(reinterpret_cast<unsigned char*>(&file_version), sizeof(uint32_t));
73 const uint32_t major_version = (file_version & 0xFF00) >> 8;
74 constexpr uint32_t kWriterMajorVersion = (kFileVersion & 0xFF00) >> 8;
75 if (major_version > kWriterMajorVersion) {
76 throw std::runtime_error("The file " + filename +
77 " requires a reader of at least major version " +
78 std::to_string(major_version) +
79 ". This reader is for major version " +
80 std::to_string(kWriterMajorVersion) + ".");
81 }
82
83 // Combine reading of private header size, stride and user header size in
84 // one read call.
85 std::array<unsigned char, sizeof(uint32_t) + 2 * sizeof(uint64_t)>
86 rest_of_private_header;
87 ReadData(rest_of_private_header.data(), rest_of_private_header.size());
89 reinterpret_cast<uint32_t&>(rest_of_private_header.data()[0]);
90 stride_ = reinterpret_cast<uint64_t&>(
91 rest_of_private_header.data()[sizeof(uint32_t)]);
92 size_t file_user_header_size = reinterpret_cast<uint64_t&>(
93 rest_of_private_header.data()[sizeof(uint32_t) + sizeof(uint64_t)]);
94 if (file_user_header_size != header_size) {
95 throw std::runtime_error("Error reading file " + filename +
96 ": inconsistent size of private header");
97 }
98
100 const uint64_t pos = lseek(file_, 0, SEEK_END);
101 n_rows_ = stride_ == 0 ? 0 : (pos - data_location_) / stride_;
102 }
103 ~RowBasedFile() noexcept {
104 try {
105 Close();
106 } catch (...) {
107 }
108 }
110 Close();
111 std::swap(file_, rhs.file_);
112 std::swap(private_header_size_, rhs.private_header_size_);
113 std::swap(n_rows_, rhs.n_rows_);
114 std::swap(need_truncate_, rhs.need_truncate_);
115 std::swap(stride_, rhs.stride_);
116 std::swap(data_location_, rhs.data_location_);
117 std::swap(filename_, rhs.filename_);
118 return *this;
119 }
120
126 void Close() {
127 if (IsOpen()) {
128 if (need_truncate_) {
129 try {
130 need_truncate_ = false;
131 Truncate(NRows());
132 } catch (...) {
133 // Truncate failed, still try to close the file to prevent a dangling
134 // open file, before throwing the exception.
136 throw;
137 }
138 }
140 }
141 }
142
143 void Truncate(uint64_t n_rows) {
144 const int result = ftruncate(file_, n_rows_ * stride_ + data_location_);
145 if (result < 0) {
146 throw std::runtime_error(
147 "I/O error: could not truncate file '" + filename_ + "' to have " +
148 std::to_string(n_rows) + " rows: " + ErrorString());
149 }
150 }
151
152 void Seek(off_t pos, int seek_direction) {
153 const off_t result = lseek(file_, pos, seek_direction);
154 if (result < 0)
155 throw std::runtime_error("I/O error: could not seek through file '" +
156 filename_ + "'");
157 }
158
159 void ReadData(unsigned char* data, uint64_t size) {
160 const int result = ::read(file_, data, size);
161 if (result < 0)
162 throw std::runtime_error("I/O error: could not read from file '" +
163 filename_ + "'");
164 }
165
166 void WriteData(const unsigned char* data, uint64_t size) {
167 const int result = write(file_, data, size);
168 if (result < 0)
169 throw std::runtime_error("I/O error: could not write to file '" +
170 filename_ + "'");
171 }
172 bool IsOpen() const { return file_ >= 0; }
177 uint64_t DataLocation() const { return data_location_; }
178 const std::string& Filename() const { return filename_; }
182 uint64_t HeaderSize() const { return data_location_ - private_header_size_; }
189 void WriteHeader(const unsigned char* data) {
190 Seek(private_header_size_, SEEK_SET);
192 }
196 void ReadHeader(unsigned char* data) {
197 Seek(private_header_size_, SEEK_SET);
199 }
203 uint64_t NRows() const { return n_rows_; }
204
205 void SetNRows(uint64_t new_n_rows) {
206 need_truncate_ = true;
207 n_rows_ = new_n_rows;
208 }
209
214 uint64_t Stride() const { return stride_; }
219 void SetStride(uint64_t new_stride) {
220 const uint64_t header_size = HeaderSize();
221 Truncate(0);
222 n_rows_ = 0;
223 stride_ = new_stride;
224 Seek(0, SEEK_SET);
227 }
228
232 void AddRows(uint64_t n_rows) { SetNRows(NRows() + n_rows); }
233
237 void DeleteRow() {
238 if (NRows() > 0) {
239 SetNRows(NRows() - 1);
240 }
241 }
242
243 private:
245 int result = close(file_);
246 file_ = -1;
248 n_rows_ = 0;
249 stride_ = 0;
251 filename_ = "";
252 if (result < 0)
253 throw std::runtime_error("Could not close file " + filename_);
254 }
255
257 // Collect entire private header in one write call
258 std::array<unsigned char, kWriterPrivateHeaderSize> private_header_buffer;
259 reinterpret_cast<uint32_t&>(private_header_buffer[0]) = kMagicFileTag;
260 reinterpret_cast<uint32_t&>(private_header_buffer[4]) = kFileVersion;
261 reinterpret_cast<uint32_t&>(private_header_buffer[8]) =
263 reinterpret_cast<uint64_t&>(private_header_buffer[12]) = stride_;
264 reinterpret_cast<uint64_t&>(private_header_buffer[20]) = HeaderSize();
265
266 WriteData(private_header_buffer.data(), private_header_buffer.size());
267 }
268
281 inline constexpr static uint32_t kWriterPrivateHeaderSize =
282 3 * sizeof(uint32_t) + 2 * sizeof(uint64_t);
283
295 inline constexpr static uint32_t kMagicFileTag = 0x66627243;
296
305 inline constexpr static uint32_t kFileVersion = 0x0100;
306
307 static std::string ErrorStringHelper(int result_value, char* buffer) {
308 if (result_value == 0)
309 return buffer;
310 else
311 return "Unknown error";
312 }
313
314 static std::string ErrorStringHelper(char* returned_buffer,
315 char* /*supplied_buffer*/) {
316 return std::string(returned_buffer);
317 }
318
319 static std::string ErrorString() {
320 char errstr[128];
321 // This is a small trick to allow both versions of strerror_r: by using
322 // function overloading, the right behaviour is picked.
323 return ErrorStringHelper(strerror_r(errno, errstr, 128), errstr);
324 }
325
326 // The "C" file API is used because we need to use (f)truncate, which is not
327 // available from the C++ fstream API.
328 int file_ = -1;
330 uint64_t n_rows_ = 0;
331 bool need_truncate_ = false;
332 uint64_t stride_ = 0;
338 std::string filename_;
339};
340
341} // namespace casacore
342
343#endif
void WriteData(const unsigned char *data, uint64_t size)
static constexpr uint32_t kFileVersion
Version of this file, in format 0xaabb, where aa is the major version and bb is the minor version.
uint64_t data_location_
This variable is also used to set/calculate the header size, using the relation: data_location_ = pri...
static constexpr uint32_t kWriterPrivateHeaderSize
The size of the private header that the writer creates for the current file format.
RowBasedFile(RowBasedFile &&rhs) noexcept
void Seek(off_t pos, int seek_direction)
static std::string ErrorStringHelper(char *returned_buffer, char *)
uint64_t DataLocation() const
Offset of the first row in the file.
void SetStride(uint64_t new_stride)
Set the number of bytes per row for this file.
void ReadHeader(unsigned char *data)
Read an optional extra header to the file.
RowBasedFile(const std::string &filename, size_t header_size)
Open an existing columnar file.
const std::string & Filename() const
RowBasedFile & operator=(RowBasedFile &&rhs)
static constexpr uint32_t kMagicFileTag
First four bytes of a file.
void WriteHeader(const unsigned char *data)
Write an optional extra header to the file.
void AddRows(uint64_t n_rows)
Adds a given number of rows to the back of the file.
void Truncate(uint64_t n_rows)
int file_
The "C" file API is used because we need to use (f)truncate, which is not available from the C++ fstr...
uint64_t HeaderSize() const
Number of bytes reserved for an optional header.
uint64_t NRows() const
Total number of rows stored in this file.
RowBasedFile(const RowBasedFile &rhs)=delete
void ReadData(unsigned char *data, uint64_t size)
void DeleteRow()
Deletes the last row.
static std::string ErrorStringHelper(int result_value, char *buffer)
static std::string ErrorString()
uint64_t Stride() const
Total number of bytes in one row.
void SetNRows(uint64_t new_n_rows)
void Close()
Close the file.
this file contains all the compiler specific defines
Definition mainpage.dox:28
void close() override
Flush and close the file.