Skip to content

Commit

Permalink
Merge tag 'dm-3.17-changes' of git://git.kernel.org/pub/scm/linux/ker…
Browse files Browse the repository at this point in the history
…nel/git/device-mapper/linux-dm

Pull device mapper changes from Mike Snitzer:

 - Allow the thin target to paired with any size external origin; also
   allow thin snapshots to be larger than the external origin.

 - Add support for quickly loading a repetitive pattern into the
   dm-switch target.

 - Use per-bio data in the dm-crypt target instead of always using a
   mempool for each allocation.  Required switching to kmalloc alignment
   for the bio slab.

 - Fix DM core to properly stack the QUEUE_FLAG_NO_SG_MERGE flag

 - Fix the dm-cache and dm-thin targets' export of the minimum_io_size
   to match the data block size -- this fixes an issue where mkfs.xfs
   would improperly infer raid striping was in place on the underlying
   storage.

 - Small cleanups in dm-io, dm-mpath and dm-cache

* tag 'dm-3.17-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm table: propagate QUEUE_FLAG_NO_SG_MERGE
  dm switch: efficiently support repetitive patterns
  dm switch: factor out switch_region_table_read
  dm cache: set minimum_io_size to cache's data block size
  dm thin: set minimum_io_size to pool's data block size
  dm crypt: use per-bio data
  block: use kmalloc alignment for bio slab
  dm table: make dm_table_supports_discards static
  dm cache metadata: use dm-space-map-metadata.h defined size limits
  dm cache: fail migrations in the do_worker error path
  dm cache: simplify deferred set reference count increments
  dm thin: relax external origin size constraints
  dm thin: switch to an atomic_t for tracking pending new block preparations
  dm mpath: eliminate pg_ready() wrapper
  dm io: simplify dec_count and sync_io
  • Loading branch information
torvalds committed Aug 14, 2014
2 parents a8e4def + 200612e commit ba36899
Show file tree
Hide file tree
Showing 12 changed files with 408 additions and 206 deletions.
12 changes: 12 additions & 0 deletions Documentation/device-mapper/switch.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ which paths.
The path number in the range 0 ... (<num_paths> - 1).
Expressed in hexadecimal (WITHOUT any prefix like 0x).

R<n>,<m>
This parameter allows repetitive patterns to be loaded quickly. <n> and <m>
are hexadecimal numbers. The last <n> mappings are repeated in the next <m>
slots.

Status
======

Expand All @@ -124,3 +129,10 @@ Create a switch device with 64kB region size:
Set mappings for the first 7 entries to point to devices switch0, switch1,
switch2, switch0, switch1, switch2, switch1:
dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1

Set repetitive mapping. This command:
dmsetup message switch 0 set_region_mappings 1000:1 :2 R2,10
is equivalent to:
dmsetup message switch 0 set_region_mappings 1000:1 :2 :1 :2 :1 :2 :1 :2 \
:1 :2 :1 :2 :1 :2 :1 :2 :1 :2

3 changes: 2 additions & 1 deletion block/bio.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
bslab = &bio_slabs[entry];

snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN,
SLAB_HWCACHE_ALIGN, NULL);
if (!slab)
goto out_unlock;

Expand Down
4 changes: 2 additions & 2 deletions drivers/md/dm-cache-metadata.c
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
disk_super->discard_root = cpu_to_le64(cmd->discard_root);
disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks));
disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE);
disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
disk_super->cache_blocks = cpu_to_le32(0);

Expand Down Expand Up @@ -478,7 +478,7 @@ static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
bool may_format_device)
{
int r;
cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE,
cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
CACHE_METADATA_CACHE_SIZE,
CACHE_MAX_CONCURRENT_LOCKS);
if (IS_ERR(cmd->bm)) {
Expand Down
8 changes: 3 additions & 5 deletions drivers/md/dm-cache-metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,17 @@

#include "dm-cache-block-types.h"
#include "dm-cache-policy-internal.h"
#include "persistent-data/dm-space-map-metadata.h"

/*----------------------------------------------------------------*/

#define DM_CACHE_METADATA_BLOCK_SIZE 4096
#define DM_CACHE_METADATA_BLOCK_SIZE DM_SM_METADATA_BLOCK_SIZE

/* FIXME: remove this restriction */
/*
* The metadata device is currently limited in size.
*
* We have one block of index, which can hold 255 index entries. Each
* index entry contains allocation info about 16k metadata blocks.
*/
#define DM_CACHE_METADATA_MAX_SECTORS (255 * (1 << 14) * (DM_CACHE_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT)))
#define DM_CACHE_METADATA_MAX_SECTORS DM_SM_METADATA_MAX_SECTORS

/*
* A metadata device larger than 16GB triggers a warning.
Expand Down
128 changes: 80 additions & 48 deletions drivers/md/dm-cache-target.c
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,22 @@ static int bio_triggers_commit(struct cache *cache, struct bio *bio)
return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
}

/*
* You must increment the deferred set whilst the prison cell is held. To
* encourage this, we ask for 'cell' to be passed in.
*/
static void inc_ds(struct cache *cache, struct bio *bio,
struct dm_bio_prison_cell *cell)
{
size_t pb_data_size = get_per_bio_data_size(cache);
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);

BUG_ON(!cell);
BUG_ON(pb->all_io_entry);

pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
}

static void issue(struct cache *cache, struct bio *bio)
{
unsigned long flags;
Expand All @@ -737,6 +753,12 @@ static void issue(struct cache *cache, struct bio *bio)
spin_unlock_irqrestore(&cache->lock, flags);
}

static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell)
{
inc_ds(cache, bio, cell);
issue(cache, bio);
}

static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
{
unsigned long flags;
Expand Down Expand Up @@ -1015,6 +1037,11 @@ static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)

dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);

/*
* No need to inc_ds() here, since the cell will be held for the
* duration of the io.
*/
generic_make_request(bio);
}

Expand Down Expand Up @@ -1115,8 +1142,7 @@ static void check_for_quiesced_migrations(struct cache *cache,
return;

INIT_LIST_HEAD(&work);
if (pb->all_io_entry)
dm_deferred_entry_dec(pb->all_io_entry, &work);
dm_deferred_entry_dec(pb->all_io_entry, &work);

if (!list_empty(&work))
queue_quiesced_migrations(cache, &work);
Expand Down Expand Up @@ -1252,6 +1278,11 @@ static void process_flush_bio(struct cache *cache, struct bio *bio)
else
remap_to_cache(cache, bio, 0);

/*
* REQ_FLUSH is not directed at any particular block so we don't
* need to inc_ds(). REQ_FUA's are split into a write + REQ_FLUSH
* by dm-core.
*/
issue(cache, bio);
}

Expand Down Expand Up @@ -1301,15 +1332,6 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio)
&cache->stats.read_miss : &cache->stats.write_miss);
}

static void issue_cache_bio(struct cache *cache, struct bio *bio,
struct per_bio_data *pb,
dm_oblock_t oblock, dm_cblock_t cblock)
{
pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
remap_to_cache_dirty(cache, bio, oblock, cblock);
issue(cache, bio);
}

static void process_bio(struct cache *cache, struct prealloc *structs,
struct bio *bio)
{
Expand All @@ -1318,8 +1340,6 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
dm_oblock_t block = get_bio_block(cache, bio);
struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell;
struct policy_result lookup_result;
size_t pb_data_size = get_per_bio_data_size(cache);
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
bool discarded_block = is_discarded_oblock(cache, block);
bool passthrough = passthrough_mode(&cache->features);
bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
Expand Down Expand Up @@ -1359,30 +1379,30 @@ static void process_bio(struct cache *cache, struct prealloc *structs,

} else {
/* FIXME: factor out issue_origin() */
pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
remap_to_origin_clear_discard(cache, bio, block);
issue(cache, bio);
inc_and_issue(cache, bio, new_ocell);
}
} else {
inc_hit_counter(cache, bio);

if (bio_data_dir(bio) == WRITE &&
writethrough_mode(&cache->features) &&
!is_dirty(cache, lookup_result.cblock)) {
pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
issue(cache, bio);
} else
issue_cache_bio(cache, bio, pb, block, lookup_result.cblock);
inc_and_issue(cache, bio, new_ocell);

} else {
remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
inc_and_issue(cache, bio, new_ocell);
}
}

break;

case POLICY_MISS:
inc_miss_counter(cache, bio);
pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
remap_to_origin_clear_discard(cache, bio, block);
issue(cache, bio);
inc_and_issue(cache, bio, new_ocell);
break;

case POLICY_NEW:
Expand Down Expand Up @@ -1501,6 +1521,9 @@ static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
bio_list_init(&cache->deferred_flush_bios);
spin_unlock_irqrestore(&cache->lock, flags);

/*
* These bios have already been through inc_ds()
*/
while ((bio = bio_list_pop(&bios)))
submit_bios ? generic_make_request(bio) : bio_io_error(bio);
}
Expand All @@ -1518,6 +1541,9 @@ static void process_deferred_writethrough_bios(struct cache *cache)
bio_list_init(&cache->deferred_writethrough_bios);
spin_unlock_irqrestore(&cache->lock, flags);

/*
* These bios have already been through inc_ds()
*/
while ((bio = bio_list_pop(&bios)))
generic_make_request(bio);
}
Expand Down Expand Up @@ -1694,6 +1720,7 @@ static void do_worker(struct work_struct *ws)

if (commit_if_needed(cache)) {
process_deferred_flush_bios(cache, false);
process_migrations(cache, &cache->need_commit_migrations, migration_failure);

/*
* FIXME: rollback metadata or just go into a
Expand Down Expand Up @@ -2406,16 +2433,13 @@ static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
return r;
}

static int cache_map(struct dm_target *ti, struct bio *bio)
static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell **cell)
{
struct cache *cache = ti->private;

int r;
dm_oblock_t block = get_bio_block(cache, bio);
size_t pb_data_size = get_per_bio_data_size(cache);
bool can_migrate = false;
bool discarded_block;
struct dm_bio_prison_cell *cell;
struct policy_result lookup_result;
struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size);

Expand All @@ -2437,15 +2461,15 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
/*
* Check to see if that block is currently migrating.
*/
cell = alloc_prison_cell(cache);
if (!cell) {
*cell = alloc_prison_cell(cache);
if (!*cell) {
defer_bio(cache, bio);
return DM_MAPIO_SUBMITTED;
}

r = bio_detain(cache, block, bio, cell,
r = bio_detain(cache, block, bio, *cell,
(cell_free_fn) free_prison_cell,
cache, &cell);
cache, cell);
if (r) {
if (r < 0)
defer_bio(cache, bio);
Expand All @@ -2458,11 +2482,12 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
r = policy_map(cache->policy, block, false, can_migrate, discarded_block,
bio, &lookup_result);
if (r == -EWOULDBLOCK) {
cell_defer(cache, cell, true);
cell_defer(cache, *cell, true);
return DM_MAPIO_SUBMITTED;

} else if (r) {
DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r);
cell_defer(cache, *cell, false);
bio_io_error(bio);
return DM_MAPIO_SUBMITTED;
}
Expand All @@ -2476,59 +2501,66 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
* We need to invalidate this block, so
* defer for the worker thread.
*/
cell_defer(cache, cell, true);
cell_defer(cache, *cell, true);
r = DM_MAPIO_SUBMITTED;

} else {
pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
inc_miss_counter(cache, bio);
remap_to_origin_clear_discard(cache, bio, block);

cell_defer(cache, cell, false);
}

} else {
inc_hit_counter(cache, bio);
pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);

if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
!is_dirty(cache, lookup_result.cblock))
remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
else
remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);

cell_defer(cache, cell, false);
}
break;

case POLICY_MISS:
inc_miss_counter(cache, bio);
pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);

if (pb->req_nr != 0) {
/*
* This is a duplicate writethrough io that is no
* longer needed because the block has been demoted.
*/
bio_endio(bio, 0);
cell_defer(cache, cell, false);
return DM_MAPIO_SUBMITTED;
} else {
cell_defer(cache, *cell, false);
r = DM_MAPIO_SUBMITTED;

} else
remap_to_origin_clear_discard(cache, bio, block);
cell_defer(cache, cell, false);
}

break;

default:
DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
(unsigned) lookup_result.op);
cell_defer(cache, *cell, false);
bio_io_error(bio);
r = DM_MAPIO_SUBMITTED;
}

return r;
}

static int cache_map(struct dm_target *ti, struct bio *bio)
{
int r;
struct dm_bio_prison_cell *cell;
struct cache *cache = ti->private;

r = __cache_map(cache, bio, &cell);
if (r == DM_MAPIO_REMAPPED) {
inc_ds(cache, bio, cell);
cell_defer(cache, cell, false);
}

return r;
}

static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
{
struct cache *cache = ti->private;
Expand Down Expand Up @@ -2808,7 +2840,7 @@ static void cache_status(struct dm_target *ti, status_type_t type,
residency = policy_residency(cache->policy);

DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ",
(unsigned)(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT),
(unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
(unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
(unsigned long long)nr_blocks_metadata,
cache->sectors_per_block,
Expand Down Expand Up @@ -3062,7 +3094,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
*/
if (io_opt_sectors < cache->sectors_per_block ||
do_div(io_opt_sectors, cache->sectors_per_block)) {
blk_limits_io_min(limits, 0);
blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
}
set_discard_limits(cache, limits);
Expand All @@ -3072,7 +3104,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)

static struct target_type cache_target = {
.name = "cache",
.version = {1, 4, 0},
.version = {1, 5, 0},
.module = THIS_MODULE,
.ctr = cache_ctr,
.dtr = cache_dtr,
Expand Down
Loading

0 comments on commit ba36899

Please sign in to comment.