Skip to content

Commit

Permalink
Expand cache= option and use write-through caching by default
Browse files Browse the repository at this point in the history
This patch changes the cache= option to accept none, writeback, or writethough
to control the host page cache behavior.  By default, writethrough caching is
now used which internally is implemented by using O_DSYNC to open the disk
images.  When using -snapshot, writeback is used by default since data integrity
it not at all an issue.

cache=none has the same behavior as cache=off previously.  The later syntax is
still supported by now deprecated.  I also cleaned up the O_DIRECT
implementation to avoid many of the #ifdefs.

Signed-off-by: Anthony Liguori <[email protected]>



git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5485 c046a42c-6fe2-441c-8c8c-71466251a162
  • Loading branch information
aliguori committed Oct 14, 2008
1 parent eeb438c commit 9f7965c
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 38 deletions.
41 changes: 17 additions & 24 deletions block-raw-posix.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@
#define DEBUG_BLOCK_PRINT(formatCstr, args...)
#endif

/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
#ifndef O_DIRECT
#define O_DIRECT O_DSYNC
#endif

#define FTYPE_FILE 0
#define FTYPE_CD 1
#define FTYPE_FD 2
Expand Down Expand Up @@ -101,9 +106,7 @@ typedef struct BDRVRawState {
int fd_got_error;
int fd_media_changed;
#endif
#if defined(O_DIRECT)
uint8_t* aligned_buf;
#endif
} BDRVRawState;

static int posix_aio_init(void);
Expand All @@ -129,10 +132,13 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
}
if (flags & BDRV_O_CREAT)
open_flags |= O_CREAT | O_TRUNC;
#ifdef O_DIRECT
if (flags & BDRV_O_DIRECT)

/* Use O_DSYNC for write-through caching, no flags for write-back caching,
* and O_DIRECT for no caching. */
if ((flags & BDRV_O_NOCACHE))
open_flags |= O_DIRECT;
#endif
else if (!(flags & BDRV_O_CACHE_WB))
open_flags |= O_DSYNC;

s->type = FTYPE_FILE;

Expand All @@ -146,17 +152,15 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
s->fd = fd;
for (i = 0; i < RAW_FD_POOL_SIZE; i++)
s->fd_pool[i] = -1;
#if defined(O_DIRECT)
s->aligned_buf = NULL;
if (flags & BDRV_O_DIRECT) {
if ((flags & BDRV_O_NOCACHE)) {
s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
if (s->aligned_buf == NULL) {
ret = -errno;
close(fd);
return ret;
}
}
#endif
return 0;
}

Expand Down Expand Up @@ -281,7 +285,6 @@ static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
}


#if defined(O_DIRECT)
/*
* offset and count are in bytes and possibly not aligned. For files opened
* with O_DIRECT, necessary alignments are ensured before calling
Expand Down Expand Up @@ -432,12 +435,6 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset,
return raw_pwrite_aligned(bs, offset, buf, count) + sum;
}

#else
#define raw_pread raw_pread_aligned
#define raw_pwrite raw_pwrite_aligned
#endif


#ifdef CONFIG_AIO
/***********************************************************/
/* Unix AIO using POSIX AIO */
Expand Down Expand Up @@ -661,7 +658,6 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
* If O_DIRECT is used and the buffer is not aligned fall back
* to synchronous IO.
*/
#if defined(O_DIRECT)
BDRVRawState *s = bs->opaque;

if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
Expand All @@ -672,7 +668,6 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
qemu_bh_schedule(bh);
return &acb->common;
}
#endif

acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
if (!acb)
Expand All @@ -694,7 +689,6 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
* If O_DIRECT is used and the buffer is not aligned fall back
* to synchronous IO.
*/
#if defined(O_DIRECT)
BDRVRawState *s = bs->opaque;

if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
Expand All @@ -705,7 +699,6 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
qemu_bh_schedule(bh);
return &acb->common;
}
#endif

acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
if (!acb)
Expand Down Expand Up @@ -770,10 +763,8 @@ static void raw_close(BlockDriverState *bs)
if (s->fd >= 0) {
close(s->fd);
s->fd = -1;
#if defined(O_DIRECT)
if (s->aligned_buf != NULL)
qemu_free(s->aligned_buf);
#endif
}
raw_close_fd_pool(s);
}
Expand Down Expand Up @@ -1003,10 +994,12 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
open_flags |= O_RDONLY;
bs->read_only = 1;
}
#ifdef O_DIRECT
if (flags & BDRV_O_DIRECT)
/* Use O_DSYNC for write-through caching, no flags for write-back caching,
* and O_DIRECT for no caching. */
if ((flags & BDRV_O_NOCACHE))
open_flags |= O_DIRECT;
#endif
else if (!(flags & BDRV_O_CACHE_WB))
open_flags |= O_DSYNC;

s->type = FTYPE_FILE;
#if defined(__linux__)
Expand Down
8 changes: 6 additions & 2 deletions block-raw-win32.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,10 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
#else
overlapped = FILE_ATTRIBUTE_NORMAL;
#endif
if (flags & BDRV_O_DIRECT)
if ((flags & BDRV_O_NOCACHE))
overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
else if (!(flags & BDRV_O_CACHE_WB))
overlapped |= FILE_FLAG_WRITE_THROUGH;
s->hfile = CreateFile(filename, access_flags,
FILE_SHARE_READ, NULL,
create_flags, overlapped, NULL);
Expand Down Expand Up @@ -440,8 +442,10 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
#else
overlapped = FILE_ATTRIBUTE_NORMAL;
#endif
if (flags & BDRV_O_DIRECT)
if ((flags & BDRV_O_NOCACHE))
overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
else if (!(flags & BDRV_O_CACHE_WB))
overlapped |= FILE_FLAG_WRITE_THROUGH;
s->hfile = CreateFile(filename, access_flags,
FILE_SHARE_READ, NULL,
create_flags, overlapped, NULL);
Expand Down
6 changes: 3 additions & 3 deletions block.c
Original file line number Diff line number Diff line change
Expand Up @@ -395,12 +395,12 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
/* Note: for compatibility, we open disk image files as RDWR, and
RDONLY as fallback */
if (!(flags & BDRV_O_FILE))
open_flags = BDRV_O_RDWR | (flags & BDRV_O_DIRECT);
open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK);
else
open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
ret = drv->bdrv_open(bs, filename, open_flags);
if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
ret = drv->bdrv_open(bs, filename, BDRV_O_RDONLY);
ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
bs->read_only = 1;
}
if (ret < 0) {
Expand All @@ -427,7 +427,7 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
}
path_combine(backing_filename, sizeof(backing_filename),
filename, bs->backing_file);
if (bdrv_open(bs->backing_hd, backing_filename, 0) < 0)
if (bdrv_open(bs->backing_hd, backing_filename, open_flags) < 0)
goto fail;
}

Expand Down
5 changes: 4 additions & 1 deletion block.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,10 @@ typedef struct QEMUSnapshotInfo {
use a disk image format on top of
it (default for
bdrv_file_open()) */
#define BDRV_O_DIRECT 0x0020
#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
#define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */

#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB)

void bdrv_info(void);
void bdrv_info_stats(void);
Expand Down
17 changes: 16 additions & 1 deletion qemu-doc.texi
Original file line number Diff line number Diff line change
Expand Up @@ -267,13 +267,28 @@ These options have the same definition as they have in @option{-hdachs}.
@item snapshot=@var{snapshot}
@var{snapshot} is "on" or "off" and allows to enable snapshot for given drive (see @option{-snapshot}).
@item cache=@var{cache}
@var{cache} is "on" or "off" and allows to disable host cache to access data.
@var{cache} is "none", "writeback", or "writethrough" and controls how the host cache is used to access block data.
@item format=@var{format}
Specify which disk @var{format} will be used rather than detecting
the format. Can be used to specifiy format=raw to avoid interpreting
an untrusted format header.
@end table

By default, writethrough caching is used for all block device. This means that
the host page cache will be used to read and write data but write notification
will be sent to the guest only when the data has been reported as written by
the storage subsystem.

Writeback caching will report data writes as completed as soon as the data is
present in the host page cache. This is safe as long as you trust your host.
If your host crashes or loses power, then the guest may experience data
corruption. When using the @option{-snapshot} option, writeback caching is
used by default.

The host page can be avoided entirely with @option{cache=none}. This will
attempt to do disk IO directly to the guests memory. QEMU may still perform
an internal copy of the data.

Instead of @option{-cdrom} you can use:
@example
qemu -drive file=file,index=2,media=cdrom
Expand Down
2 changes: 1 addition & 1 deletion qemu-nbd.c
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ int main(int argc, char **argv)
flags |= BDRV_O_SNAPSHOT;
break;
case 'n':
flags |= BDRV_O_DIRECT;
flags |= BDRV_O_NOCACHE;
break;
case 'b':
bindto = optarg;
Expand Down
18 changes: 12 additions & 6 deletions vl.c
Original file line number Diff line number Diff line change
Expand Up @@ -5648,10 +5648,12 @@ static int drive_init(struct drive_opt *arg, int snapshot,
}

if (get_param_value(buf, sizeof(buf), "cache", str)) {
if (!strcmp(buf, "off"))
if (!strcmp(buf, "off") || !strcmp(buf, "none"))
cache = 0;
else if (!strcmp(buf, "on"))
else if (!strcmp(buf, "writethrough"))
cache = 1;
else if (!strcmp(buf, "writeback"))
cache = 2;
else {
fprintf(stderr, "qemu: invalid cache option\n");
return -1;
Expand Down Expand Up @@ -5770,10 +5772,14 @@ static int drive_init(struct drive_opt *arg, int snapshot,
if (!file[0])
return 0;
bdrv_flags = 0;
if (snapshot)
if (snapshot) {
bdrv_flags |= BDRV_O_SNAPSHOT;
if (!cache)
bdrv_flags |= BDRV_O_DIRECT;
cache = 2; /* always use write-back with snapshot */
}
if (cache == 0) /* no caching */
bdrv_flags |= BDRV_O_NOCACHE;
else if (cache == 2) /* write-back */
bdrv_flags |= BDRV_O_CACHE_WB;
if (bdrv_open2(bdrv, file, bdrv_flags, drv) < 0 || qemu_key_check(bdrv, file)) {
fprintf(stderr, "qemu: could not open disk image %s\n",
file);
Expand Down Expand Up @@ -8145,7 +8151,7 @@ static void help(int exitcode)
"-cdrom file use 'file' as IDE cdrom image (cdrom is ide1 master)\n"
"-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n"
" [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n"
" [,cache=on|off][,format=f]\n"
" [,cache=writethrough|writeback|none][,format=f]\n"
" use 'file' as a drive image\n"
"-mtdblock file use 'file' as on-board Flash memory image\n"
"-sd file use 'file' as SecureDigital card image\n"
Expand Down

0 comments on commit 9f7965c

Please sign in to comment.