forked from sqlite/sqlite
-
Notifications
You must be signed in to change notification settings - Fork 0
/
memjournal.c
434 lines (398 loc) · 13.4 KB
/
memjournal.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
/*
** 2008 October 7
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
**
** This file contains code use to implement an in-memory rollback journal.
** The in-memory rollback journal is used to journal transactions for
** ":memory:" databases and when the journal_mode=MEMORY pragma is used.
**
** Update: The in-memory journal is also used to temporarily cache
** smaller journals that are not critical for power-loss recovery.
** For example, statement journals that are not too big will be held
** entirely in memory, thus reducing the number of file I/O calls, and
** more importantly, reducing temporary file creation events. If these
** journals become too large for memory, they are spilled to disk. But
** in the common case, they are usually small and no file I/O needs to
** occur.
*/
#include "sqliteInt.h"
/* Forward references to internal structures */
typedef struct MemJournal MemJournal;
typedef struct FilePoint FilePoint;
typedef struct FileChunk FileChunk;
/*
** The rollback journal is composed of a linked list of these structures.
**
** The zChunk array is always at least 8 bytes in size - usually much more.
** Its actual size is stored in the MemJournal.nChunkSize variable.
*/
struct FileChunk {
FileChunk *pNext; /* Next chunk in the journal */
u8 zChunk[8]; /* Content of this chunk */
};
/*
** By default, allocate this many bytes of memory for each FileChunk object.
*/
#define MEMJOURNAL_DFLT_FILECHUNKSIZE 1024
/*
** For chunk size nChunkSize, return the number of bytes that should
** be allocated for each FileChunk structure.
*/
#define fileChunkSize(nChunkSize) (sizeof(FileChunk) + ((nChunkSize)-8))
/*
** An instance of this object serves as a cursor into the rollback journal.
** The cursor can be either for reading or writing.
*/
struct FilePoint {
sqlite3_int64 iOffset; /* Offset from the beginning of the file */
FileChunk *pChunk; /* Specific chunk into which cursor points */
};
/*
** This structure is a subclass of sqlite3_file. Each open memory-journal
** is an instance of this class.
*/
struct MemJournal {
const sqlite3_io_methods *pMethod; /* Parent class. MUST BE FIRST */
int nChunkSize; /* In-memory chunk-size */
int nSpill; /* Bytes of data before flushing */
int nSize; /* Bytes of data currently in memory */
FileChunk *pFirst; /* Head of in-memory chunk-list */
FilePoint endpoint; /* Pointer to the end of the file */
FilePoint readpoint; /* Pointer to the end of the last xRead() */
int flags; /* xOpen flags */
sqlite3_vfs *pVfs; /* The "real" underlying VFS */
const char *zJournal; /* Name of the journal file */
};
/*
** Read data from the in-memory journal file. This is the implementation
** of the sqlite3_vfs.xRead method.
*/
static int memjrnlRead(
sqlite3_file *pJfd, /* The journal file from which to read */
void *zBuf, /* Put the results here */
int iAmt, /* Number of bytes to read */
sqlite_int64 iOfst /* Begin reading at this offset */
){
MemJournal *p = (MemJournal *)pJfd;
u8 *zOut = zBuf;
int nRead = iAmt;
int iChunkOffset;
FileChunk *pChunk;
#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \
|| defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE)
if( (iAmt+iOfst)>p->endpoint.iOffset ){
return SQLITE_IOERR_SHORT_READ;
}
#endif
assert( (iAmt+iOfst)<=p->endpoint.iOffset );
assert( p->readpoint.iOffset==0 || p->readpoint.pChunk!=0 );
if( p->readpoint.iOffset!=iOfst || iOfst==0 ){
sqlite3_int64 iOff = 0;
for(pChunk=p->pFirst;
ALWAYS(pChunk) && (iOff+p->nChunkSize)<=iOfst;
pChunk=pChunk->pNext
){
iOff += p->nChunkSize;
}
}else{
pChunk = p->readpoint.pChunk;
assert( pChunk!=0 );
}
iChunkOffset = (int)(iOfst%p->nChunkSize);
do {
int iSpace = p->nChunkSize - iChunkOffset;
int nCopy = MIN(nRead, (p->nChunkSize - iChunkOffset));
memcpy(zOut, (u8*)pChunk->zChunk + iChunkOffset, nCopy);
zOut += nCopy;
nRead -= iSpace;
iChunkOffset = 0;
} while( nRead>=0 && (pChunk=pChunk->pNext)!=0 && nRead>0 );
p->readpoint.iOffset = pChunk ? iOfst+iAmt : 0;
p->readpoint.pChunk = pChunk;
return SQLITE_OK;
}
/*
** Free the list of FileChunk structures headed at MemJournal.pFirst.
*/
static void memjrnlFreeChunks(MemJournal *p){
FileChunk *pIter;
FileChunk *pNext;
for(pIter=p->pFirst; pIter; pIter=pNext){
pNext = pIter->pNext;
sqlite3_free(pIter);
}
p->pFirst = 0;
}
/*
** Flush the contents of memory to a real file on disk.
*/
static int memjrnlCreateFile(MemJournal *p){
int rc;
sqlite3_file *pReal = (sqlite3_file*)p;
MemJournal copy = *p;
memset(p, 0, sizeof(MemJournal));
rc = sqlite3OsOpen(copy.pVfs, copy.zJournal, pReal, copy.flags, 0);
if( rc==SQLITE_OK ){
int nChunk = copy.nChunkSize;
i64 iOff = 0;
FileChunk *pIter;
for(pIter=copy.pFirst; pIter; pIter=pIter->pNext){
if( iOff + nChunk > copy.endpoint.iOffset ){
nChunk = copy.endpoint.iOffset - iOff;
}
rc = sqlite3OsWrite(pReal, (u8*)pIter->zChunk, nChunk, iOff);
if( rc ) break;
iOff += nChunk;
}
if( rc==SQLITE_OK ){
/* No error has occurred. Free the in-memory buffers. */
memjrnlFreeChunks(©);
}
}
if( rc!=SQLITE_OK ){
/* If an error occurred while creating or writing to the file, restore
** the original before returning. This way, SQLite uses the in-memory
** journal data to roll back changes made to the internal page-cache
** before this function was called. */
sqlite3OsClose(pReal);
*p = copy;
}
return rc;
}
/*
** Write data to the file.
*/
static int memjrnlWrite(
sqlite3_file *pJfd, /* The journal file into which to write */
const void *zBuf, /* Take data to be written from here */
int iAmt, /* Number of bytes to write */
sqlite_int64 iOfst /* Begin writing at this offset into the file */
){
MemJournal *p = (MemJournal *)pJfd;
int nWrite = iAmt;
u8 *zWrite = (u8 *)zBuf;
/* If the file should be created now, create it and write the new data
** into the file on disk. */
if( p->nSpill>0 && (iAmt+iOfst)>p->nSpill ){
int rc = memjrnlCreateFile(p);
if( rc==SQLITE_OK ){
rc = sqlite3OsWrite(pJfd, zBuf, iAmt, iOfst);
}
return rc;
}
/* If the contents of this write should be stored in memory */
else{
/* An in-memory journal file should only ever be appended to. Random
** access writes are not required. The only exception to this is when
** the in-memory journal is being used by a connection using the
** atomic-write optimization. In this case the first 28 bytes of the
** journal file may be written as part of committing the transaction. */
assert( iOfst==p->endpoint.iOffset || iOfst==0 );
#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \
|| defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE)
if( iOfst==0 && p->pFirst ){
assert( p->nChunkSize>iAmt );
memcpy((u8*)p->pFirst->zChunk, zBuf, iAmt);
}else
#else
assert( iOfst>0 || p->pFirst==0 );
#endif
{
while( nWrite>0 ){
FileChunk *pChunk = p->endpoint.pChunk;
int iChunkOffset = (int)(p->endpoint.iOffset%p->nChunkSize);
int iSpace = MIN(nWrite, p->nChunkSize - iChunkOffset);
if( iChunkOffset==0 ){
/* New chunk is required to extend the file. */
FileChunk *pNew = sqlite3_malloc(fileChunkSize(p->nChunkSize));
if( !pNew ){
return SQLITE_IOERR_NOMEM_BKPT;
}
pNew->pNext = 0;
if( pChunk ){
assert( p->pFirst );
pChunk->pNext = pNew;
}else{
assert( !p->pFirst );
p->pFirst = pNew;
}
p->endpoint.pChunk = pNew;
}
memcpy((u8*)p->endpoint.pChunk->zChunk + iChunkOffset, zWrite, iSpace);
zWrite += iSpace;
nWrite -= iSpace;
p->endpoint.iOffset += iSpace;
}
p->nSize = iAmt + iOfst;
}
}
return SQLITE_OK;
}
/*
** Truncate the file.
**
** If the journal file is already on disk, truncate it there. Or, if it
** is still in main memory but is being truncated to zero bytes in size,
** ignore
*/
static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){
MemJournal *p = (MemJournal *)pJfd;
if( ALWAYS(size==0) ){
memjrnlFreeChunks(p);
p->nSize = 0;
p->endpoint.pChunk = 0;
p->endpoint.iOffset = 0;
p->readpoint.pChunk = 0;
p->readpoint.iOffset = 0;
}
return SQLITE_OK;
}
/*
** Close the file.
*/
static int memjrnlClose(sqlite3_file *pJfd){
MemJournal *p = (MemJournal *)pJfd;
memjrnlFreeChunks(p);
return SQLITE_OK;
}
/*
** Sync the file.
**
** If the real file has been created, call its xSync method. Otherwise,
** syncing an in-memory journal is a no-op.
*/
static int memjrnlSync(sqlite3_file *pJfd, int flags){
UNUSED_PARAMETER2(pJfd, flags);
return SQLITE_OK;
}
/*
** Query the size of the file in bytes.
*/
static int memjrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){
MemJournal *p = (MemJournal *)pJfd;
*pSize = (sqlite_int64) p->endpoint.iOffset;
return SQLITE_OK;
}
/*
** Table of methods for MemJournal sqlite3_file object.
*/
static const struct sqlite3_io_methods MemJournalMethods = {
1, /* iVersion */
memjrnlClose, /* xClose */
memjrnlRead, /* xRead */
memjrnlWrite, /* xWrite */
memjrnlTruncate, /* xTruncate */
memjrnlSync, /* xSync */
memjrnlFileSize, /* xFileSize */
0, /* xLock */
0, /* xUnlock */
0, /* xCheckReservedLock */
0, /* xFileControl */
0, /* xSectorSize */
0, /* xDeviceCharacteristics */
0, /* xShmMap */
0, /* xShmLock */
0, /* xShmBarrier */
0, /* xShmUnmap */
0, /* xFetch */
0 /* xUnfetch */
};
/*
** Open a journal file.
**
** The behaviour of the journal file depends on the value of parameter
** nSpill. If nSpill is 0, then the journal file is always create and
** accessed using the underlying VFS. If nSpill is less than zero, then
** all content is always stored in main-memory. Finally, if nSpill is a
** positive value, then the journal file is initially created in-memory
** but may be flushed to disk later on. In this case the journal file is
** flushed to disk either when it grows larger than nSpill bytes in size,
** or when sqlite3JournalCreate() is called.
*/
int sqlite3JournalOpen(
sqlite3_vfs *pVfs, /* The VFS to use for actual file I/O */
const char *zName, /* Name of the journal file */
sqlite3_file *pJfd, /* Preallocated, blank file handle */
int flags, /* Opening flags */
int nSpill /* Bytes buffered before opening the file */
){
MemJournal *p = (MemJournal*)pJfd;
/* Zero the file-handle object. If nSpill was passed zero, initialize
** it using the sqlite3OsOpen() function of the underlying VFS. In this
** case none of the code in this module is executed as a result of calls
** made on the journal file-handle. */
memset(p, 0, sizeof(MemJournal));
if( nSpill==0 ){
return sqlite3OsOpen(pVfs, zName, pJfd, flags, 0);
}
if( nSpill>0 ){
p->nChunkSize = nSpill;
}else{
p->nChunkSize = 8 + MEMJOURNAL_DFLT_FILECHUNKSIZE - sizeof(FileChunk);
assert( MEMJOURNAL_DFLT_FILECHUNKSIZE==fileChunkSize(p->nChunkSize) );
}
p->pMethod = (const sqlite3_io_methods*)&MemJournalMethods;
p->nSpill = nSpill;
p->flags = flags;
p->zJournal = zName;
p->pVfs = pVfs;
return SQLITE_OK;
}
/*
** Open an in-memory journal file.
*/
void sqlite3MemJournalOpen(sqlite3_file *pJfd){
sqlite3JournalOpen(0, 0, pJfd, 0, -1);
}
#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \
|| defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE)
/*
** If the argument p points to a MemJournal structure that is not an
** in-memory-only journal file (i.e. is one that was opened with a +ve
** nSpill parameter or as SQLITE_OPEN_MAIN_JOURNAL), and the underlying
** file has not yet been created, create it now.
*/
int sqlite3JournalCreate(sqlite3_file *pJfd){
int rc = SQLITE_OK;
MemJournal *p = (MemJournal*)pJfd;
if( p->pMethod==&MemJournalMethods && (
#ifdef SQLITE_ENABLE_ATOMIC_WRITE
p->nSpill>0
#else
/* While this appears to not be possible without ATOMIC_WRITE, the
** paths are complex, so it seems prudent to leave the test in as
** a NEVER(), in case our analysis is subtly flawed. */
NEVER(p->nSpill>0)
#endif
#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE
|| (p->flags & SQLITE_OPEN_MAIN_JOURNAL)
#endif
)){
rc = memjrnlCreateFile(p);
}
return rc;
}
#endif
/*
** The file-handle passed as the only argument is open on a journal file.
** Return true if this "journal file" is currently stored in heap memory,
** or false otherwise.
*/
int sqlite3JournalIsInMemory(sqlite3_file *p){
return p->pMethods==&MemJournalMethods;
}
/*
** Return the number of bytes required to store a JournalFile that uses vfs
** pVfs to create the underlying on-disk files.
*/
int sqlite3JournalSize(sqlite3_vfs *pVfs){
return MAX(pVfs->szOsFile, (int)sizeof(MemJournal));
}