Skip to content

Commit

Permalink
drm/i915: Replace execbuf vma ht with an idr
Browse files Browse the repository at this point in the history
This was the competing idea long ago, but it was only with the rewrite
of the idr as an radixtree and using the radixtree directly ourselves,
along with the realisation that we can store the vma directly in the
radixtree and only need a list for the reverse mapping, that made the
patch performant enough to displace using a hashtable. Though the vma ht
is fast and doesn't require any extra allocation (as we can embed the node
inside the vma), it does require a thread for resizing and serialization
and will have the occasional slow lookup. That is hairy enough to
investigate alternatives and favour them if equivalent in peak performance.
One advantage of allocating an indirection entry is that we can support a
single shared bo between many clients, something that was done on a
first-come first-serve basis for shared GGTT vma previously. To offset
the extra allocations, we create yet another kmem_cache for them.

Signed-off-by: Chris Wilson <[email protected]>
Reviewed-by: Tvrtko Ursulin <[email protected]>
Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
  • Loading branch information
ickle committed Aug 18, 2017
1 parent 170fa29 commit d1b48c1
Show file tree
Hide file tree
Showing 11 changed files with 115 additions and 208 deletions.
6 changes: 0 additions & 6 deletions drivers/gpu/drm/i915/i915_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1954,12 +1954,6 @@ static int i915_context_status(struct seq_file *m, void *unused)
seq_putc(m, '\n');
}

seq_printf(m,
"\tvma hashtable size=%u (actual %lu), count=%u\n",
ctx->vma_lut.ht_size,
BIT(ctx->vma_lut.ht_bits),
ctx->vma_lut.ht_count);

seq_putc(m, '\n');
}

Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/i915/i915_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -2156,6 +2156,7 @@ struct drm_i915_private {

struct kmem_cache *objects;
struct kmem_cache *vmas;
struct kmem_cache *luts;
struct kmem_cache *requests;
struct kmem_cache *dependencies;
struct kmem_cache *priorities;
Expand Down
46 changes: 31 additions & 15 deletions drivers/gpu/drm/i915/i915_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -3242,25 +3242,33 @@ i915_gem_idle_work_handler(struct work_struct *work)

void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
{
struct drm_i915_private *i915 = to_i915(gem->dev);
struct drm_i915_gem_object *obj = to_intel_bo(gem);
struct drm_i915_file_private *fpriv = file->driver_priv;
struct i915_vma *vma, *vn;
struct i915_lut_handle *lut, *ln;

mutex_lock(&obj->base.dev->struct_mutex);
list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
if (vma->vm->file == fpriv)
mutex_lock(&i915->drm.struct_mutex);

list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
struct i915_gem_context *ctx = lut->ctx;
struct i915_vma *vma;

if (ctx->file_priv != fpriv)
continue;

vma = radix_tree_delete(&ctx->handles_vma, lut->handle);

if (!i915_vma_is_ggtt(vma))
i915_vma_close(vma);

vma = obj->vma_hashed;
if (vma && vma->ctx->file_priv == fpriv)
i915_vma_unlink_ctx(vma);
list_del(&lut->obj_link);
list_del(&lut->ctx_link);

if (i915_gem_object_is_active(obj) &&
!i915_gem_object_has_active_reference(obj)) {
i915_gem_object_set_active_reference(obj);
i915_gem_object_get(obj);
kmem_cache_free(i915->luts, lut);
__i915_gem_object_release_unless_active(obj);
}
mutex_unlock(&obj->base.dev->struct_mutex);

mutex_unlock(&i915->drm.struct_mutex);
}

static unsigned long to_wait_timeout(s64 timeout_ns)
Expand Down Expand Up @@ -4252,6 +4260,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
INIT_LIST_HEAD(&obj->global_link);
INIT_LIST_HEAD(&obj->userfault_link);
INIT_LIST_HEAD(&obj->vma_list);
INIT_LIST_HEAD(&obj->lut_list);
INIT_LIST_HEAD(&obj->batch_pool_link);

obj->ops = ops;
Expand Down Expand Up @@ -4495,8 +4504,8 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
{
lockdep_assert_held(&obj->base.dev->struct_mutex);

GEM_BUG_ON(i915_gem_object_has_active_reference(obj));
if (i915_gem_object_is_active(obj))
if (!i915_gem_object_has_active_reference(obj) &&
i915_gem_object_is_active(obj))
i915_gem_object_set_active_reference(obj);
else
i915_gem_object_put(obj);
Expand Down Expand Up @@ -4888,12 +4897,16 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
if (!dev_priv->vmas)
goto err_objects;

dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0);
if (!dev_priv->luts)
goto err_vmas;

dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
SLAB_TYPESAFE_BY_RCU);
if (!dev_priv->requests)
goto err_vmas;
goto err_luts;

dev_priv->dependencies = KMEM_CACHE(i915_dependency,
SLAB_HWCACHE_ALIGN |
Expand Down Expand Up @@ -4937,6 +4950,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
kmem_cache_destroy(dev_priv->dependencies);
err_requests:
kmem_cache_destroy(dev_priv->requests);
err_luts:
kmem_cache_destroy(dev_priv->luts);
err_vmas:
kmem_cache_destroy(dev_priv->vmas);
err_objects:
Expand All @@ -4959,6 +4974,7 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
kmem_cache_destroy(dev_priv->priorities);
kmem_cache_destroy(dev_priv->dependencies);
kmem_cache_destroy(dev_priv->requests);
kmem_cache_destroy(dev_priv->luts);
kmem_cache_destroy(dev_priv->vmas);
kmem_cache_destroy(dev_priv->objects);

Expand Down
87 changes: 19 additions & 68 deletions drivers/gpu/drm/i915/i915_gem_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,69 +93,28 @@

#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1

/* Initial size (as log2) to preallocate the handle->object hashtable */
#define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */

static void resize_vma_ht(struct work_struct *work)
static void lut_close(struct i915_gem_context *ctx)
{
struct i915_gem_context_vma_lut *lut =
container_of(work, typeof(*lut), resize);
unsigned int bits, new_bits, size, i;
struct hlist_head *new_ht;

GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS));

bits = 1 + ilog2(4*lut->ht_count/3 + 1);
new_bits = min_t(unsigned int,
max(bits, VMA_HT_BITS),
sizeof(unsigned int) * BITS_PER_BYTE - 1);
if (new_bits == lut->ht_bits)
goto out;

new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN);
if (!new_ht)
new_ht = vzalloc(sizeof(*new_ht)<<new_bits);
if (!new_ht)
/* Pretend resize succeeded and stop calling us for a bit! */
goto out;

size = BIT(lut->ht_bits);
for (i = 0; i < size; i++) {
struct i915_vma *vma;
struct hlist_node *tmp;
struct i915_lut_handle *lut, *ln;
struct radix_tree_iter iter;
void __rcu **slot;

hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node)
hlist_add_head(&vma->ctx_node,
&new_ht[hash_32(vma->ctx_handle,
new_bits)]);
list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
list_del(&lut->obj_link);
kmem_cache_free(ctx->i915->luts, lut);
}
kvfree(lut->ht);
lut->ht = new_ht;
lut->ht_bits = new_bits;
out:
smp_store_release(&lut->ht_size, BIT(bits));
GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
}

static void vma_lut_free(struct i915_gem_context *ctx)
{
struct i915_gem_context_vma_lut *lut = &ctx->vma_lut;
unsigned int i, size;
radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
struct i915_vma *vma = rcu_dereference_raw(*slot);
struct drm_i915_gem_object *obj = vma->obj;

if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)
cancel_work_sync(&lut->resize);
radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);

size = BIT(lut->ht_bits);
for (i = 0; i < size; i++) {
struct i915_vma *vma;
if (!i915_vma_is_ggtt(vma))
i915_vma_close(vma);

hlist_for_each_entry(vma, &lut->ht[i], ctx_node) {
vma->obj->vma_hashed = NULL;
vma->ctx = NULL;
i915_vma_put(vma);
}
__i915_gem_object_release_unless_active(obj);
}
kvfree(lut->ht);
}

static void i915_gem_context_free(struct i915_gem_context *ctx)
Expand All @@ -165,7 +124,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
lockdep_assert_held(&ctx->i915->drm.struct_mutex);
GEM_BUG_ON(!i915_gem_context_is_closed(ctx));

vma_lut_free(ctx);
i915_ppgtt_put(ctx->ppgtt);

for (i = 0; i < I915_NUM_ENGINES; i++) {
Expand Down Expand Up @@ -239,8 +197,11 @@ void i915_gem_context_release(struct kref *ref)
static void context_close(struct i915_gem_context *ctx)
{
i915_gem_context_set_closed(ctx);

lut_close(ctx);
if (ctx->ppgtt)
i915_ppgtt_close(&ctx->ppgtt->base);

ctx->file_priv = ERR_PTR(-EBADF);
i915_gem_context_put(ctx);
}
Expand Down Expand Up @@ -313,16 +274,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
ctx->i915 = dev_priv;
ctx->priority = I915_PRIORITY_NORMAL;

ctx->vma_lut.ht_bits = VMA_HT_BITS;
ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
BUILD_BUG_ON(BIT(VMA_HT_BITS) == I915_CTX_RESIZE_IN_PROGRESS);
ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
sizeof(*ctx->vma_lut.ht),
GFP_KERNEL);
if (!ctx->vma_lut.ht)
goto err_out;

INIT_WORK(&ctx->vma_lut.resize, resize_vma_ht);
INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
INIT_LIST_HEAD(&ctx->handles_list);

/* Default context will never have a file_priv */
ret = DEFAULT_CONTEXT_HANDLE;
Expand Down Expand Up @@ -372,8 +325,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
put_pid(ctx->pid);
idr_remove(&file_priv->context_idr, ctx->user_handle);
err_lut:
kvfree(ctx->vma_lut.ht);
err_out:
context_close(ctx);
return ERR_PTR(ret);
}
Expand Down
39 changes: 13 additions & 26 deletions drivers/gpu/drm/i915/i915_gem_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include <linux/bitops.h>
#include <linux/list.h>
#include <linux/radix-tree.h>

struct pid;

Expand Down Expand Up @@ -149,32 +150,6 @@ struct i915_gem_context {
/** ggtt_offset_bias: placement restriction for context objects */
u32 ggtt_offset_bias;

struct i915_gem_context_vma_lut {
/** ht_size: last request size to allocate the hashtable for. */
unsigned int ht_size;
#define I915_CTX_RESIZE_IN_PROGRESS BIT(0)
/** ht_bits: real log2(size) of hashtable. */
unsigned int ht_bits;
/** ht_count: current number of entries inside the hashtable */
unsigned int ht_count;

/** ht: the array of buckets comprising the simple hashtable */
struct hlist_head *ht;

/**
* resize: After an execbuf completes, we check the load factor
* of the hashtable. If the hashtable is too full, or too empty,
* we schedule a task to resize the hashtable. During the
* resize, the entries are moved between different buckets and
* so we cannot simultaneously read the hashtable as it is
* being resized (unlike rhashtable). Therefore we treat the
* active work as a strong barrier, pausing a subsequent
* execbuf to wait for the resize worker to complete, if
* required.
*/
struct work_struct resize;
} vma_lut;

/** engine: per-engine logical HW state */
struct intel_context {
struct i915_vma *state;
Expand Down Expand Up @@ -205,6 +180,18 @@ struct i915_gem_context {

/** remap_slice: Bitmask of cache lines that need remapping */
u8 remap_slice;

/** handles_vma: rbtree to look up our context specific obj/vma for
* the user handle. (user handles are per fd, but the binding is
* per vm, which may be one per context or shared with the global GTT)
*/
struct radix_tree_root handles_vma;

/** handles_list: reverse list of all the rbtree entries in use for
* this context, which allows us to free all the allocations on
* context close.
*/
struct list_head handles_list;
};

static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
Expand Down
Loading

0 comments on commit d1b48c1

Please sign in to comment.