From 707526ad865dc4064c3984bcc061596a21bf9d3b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 21 Sep 2019 18:47:59 -0700 Subject: [PATCH] cputlb: Merge and move memory_notdirty_write_{prepare,complete} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 9458a9a1df1a, all readers of the dirty bitmaps wait for the rcu lock, which means that they wait until the end of any executing TranslationBlock. As a consequence, there is no need for the actual access to happen in between the _prepare and _complete. Therefore, we can improve things by merging the two functions into notdirty_write and dropping the NotDirtyInfo structure. In addition, the only users of notdirty_write are in cputlb.c, so move the merged function there. Pass in the CPUIOTLBEntry from which the ram_addr_t may be computed. Reviewed-by: David Hildenbrand Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 76 +++++++++++++++++++--------------- exec.c | 44 -------------------- include/exec/memory-internal.h | 65 ----------------------------- 3 files changed, 42 insertions(+), 143 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 4f118d2cc97f..3e918385199b 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -33,6 +33,7 @@ #include "exec/helper-proto.h" #include "qemu/atomic.h" #include "qemu/atomic128.h" +#include "translate-all.h" /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ /* #define DEBUG_TLB */ @@ -1085,6 +1086,37 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) return qemu_ram_addr_from_host_nofail(p); } +static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, + CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) +{ + ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; + + trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); + + if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { + struct page_collection *pages + = page_collection_lock(ram_addr, ram_addr + size); + + /* We require mem_io_pc in tb_invalidate_phys_page_range. */ + cpu->mem_io_pc = retaddr; + + tb_invalidate_phys_page_fast(pages, ram_addr, size); + page_collection_unlock(pages); + } + + /* + * Set both VGA and migration bits for simplicity and to remove + * the notdirty callback faster. + */ + cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); + + /* We remove the notdirty callback only if the code has been flushed. */ + if (!cpu_physical_memory_is_clean(ram_addr)) { + trace_memory_notdirty_set_dirty(mem_vaddr); + tlb_set_dirty(cpu, mem_vaddr); + } +} + /* * Probe for whether the specified guest access is permitted. If it is not * permitted then an exception will be taken in the same way as if this @@ -1204,8 +1236,7 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, /* Probe for a read-modify-write atomic operation. Do not allow unaligned * operations, or io operations to proceed. Return the host address. */ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr, - NotDirtyInfo *ndi) + TCGMemOpIdx oi, uintptr_t retaddr) { size_t mmu_idx = get_mmuidx(oi); uintptr_t index = tlb_index(env, mmu_idx, addr); @@ -1265,12 +1296,9 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, hostaddr = (void *)((uintptr_t)addr + tlbe->addend); - ndi->active = false; if (unlikely(tlb_addr & TLB_NOTDIRTY)) { - ndi->active = true; - memory_notdirty_write_prepare(ndi, env_cpu(env), addr, - qemu_ram_addr_from_host_nofail(hostaddr), - 1 << s_bits); + notdirty_write(env_cpu(env), addr, 1 << s_bits, + &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); } return hostaddr; @@ -1641,28 +1669,13 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, return; } - haddr = (void *)((uintptr_t)addr + entry->addend); - /* Handle clean RAM pages. */ if (tlb_addr & TLB_NOTDIRTY) { - NotDirtyInfo ndi; - - /* We require mem_io_pc in tb_invalidate_phys_page_range. */ - env_cpu(env)->mem_io_pc = retaddr; - - memory_notdirty_write_prepare(&ndi, env_cpu(env), addr, - addr + iotlbentry->addr, size); - - if (unlikely(need_swap)) { - store_memop(haddr, val, op ^ MO_BSWAP); - } else { - store_memop(haddr, val, op); - } - - memory_notdirty_write_complete(&ndi); - return; + notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); } + haddr = (void *)((uintptr_t)addr + entry->addend); + /* * Keep these two store_memop separate to ensure that the compiler * is able to fold the entire function to a single instruction. @@ -1793,14 +1806,9 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr #define ATOMIC_NAME(X) \ HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) -#define ATOMIC_MMU_DECLS NotDirtyInfo ndi -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr, &ndi) -#define ATOMIC_MMU_CLEANUP \ - do { \ - if (unlikely(ndi.active)) { \ - memory_notdirty_write_complete(&ndi); \ - } \ - } while (0) +#define ATOMIC_MMU_DECLS +#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) +#define ATOMIC_MMU_CLEANUP #define DATA_SIZE 1 #include "atomic_template.h" @@ -1828,7 +1836,7 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, #undef ATOMIC_MMU_LOOKUP #define EXTRA_ARGS , TCGMemOpIdx oi #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC(), &ndi) +#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) #define DATA_SIZE 1 #include "atomic_template.h" diff --git a/exec.c b/exec.c index 961d7d64971f..7d835b1a2b65 100644 --- a/exec.c +++ b/exec.c @@ -2718,50 +2718,6 @@ ram_addr_t qemu_ram_addr_from_host(void *ptr) return block->offset + offset; } -/* Called within RCU critical section. */ -void memory_notdirty_write_prepare(NotDirtyInfo *ndi, - CPUState *cpu, - vaddr mem_vaddr, - ram_addr_t ram_addr, - unsigned size) -{ - ndi->cpu = cpu; - ndi->ram_addr = ram_addr; - ndi->mem_vaddr = mem_vaddr; - ndi->size = size; - ndi->pages = NULL; - - trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); - - assert(tcg_enabled()); - if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { - ndi->pages = page_collection_lock(ram_addr, ram_addr + size); - tb_invalidate_phys_page_fast(ndi->pages, ram_addr, size); - } -} - -/* Called within RCU critical section. */ -void memory_notdirty_write_complete(NotDirtyInfo *ndi) -{ - if (ndi->pages) { - assert(tcg_enabled()); - page_collection_unlock(ndi->pages); - ndi->pages = NULL; - } - - /* Set both VGA and migration bits for simplicity and to remove - * the notdirty callback faster. - */ - cpu_physical_memory_set_dirty_range(ndi->ram_addr, ndi->size, - DIRTY_CLIENTS_NOCODE); - /* we remove the notdirty callback only if the code has been - flushed */ - if (!cpu_physical_memory_is_clean(ndi->ram_addr)) { - trace_memory_notdirty_set_dirty(ndi->mem_vaddr); - tlb_set_dirty(ndi->cpu, ndi->mem_vaddr); - } -} - /* Generate a debug exception if a watchpoint has been hit. */ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, MemTxAttrs attrs, int flags, uintptr_t ra) diff --git a/include/exec/memory-internal.h b/include/exec/memory-internal.h index ef4fb9237127..9fcc2af25c88 100644 --- a/include/exec/memory-internal.h +++ b/include/exec/memory-internal.h @@ -49,70 +49,5 @@ void address_space_dispatch_free(AddressSpaceDispatch *d); void mtree_print_dispatch(struct AddressSpaceDispatch *d, MemoryRegion *root); - -struct page_collection; - -/* Opaque struct for passing info from memory_notdirty_write_prepare() - * to memory_notdirty_write_complete(). Callers should treat all fields - * as private, with the exception of @active. - * - * @active is a field which is not touched by either the prepare or - * complete functions, but which the caller can use if it wishes to - * track whether it has called prepare for this struct and so needs - * to later call the complete function. - */ -typedef struct { - CPUState *cpu; - struct page_collection *pages; - ram_addr_t ram_addr; - vaddr mem_vaddr; - unsigned size; - bool active; -} NotDirtyInfo; - -/** - * memory_notdirty_write_prepare: call before writing to non-dirty memory - * @ndi: pointer to opaque NotDirtyInfo struct - * @cpu: CPU doing the write - * @mem_vaddr: virtual address of write - * @ram_addr: the ram address of the write - * @size: size of write in bytes - * - * Any code which writes to the host memory corresponding to - * guest RAM which has been marked as NOTDIRTY must wrap those - * writes in calls to memory_notdirty_write_prepare() and - * memory_notdirty_write_complete(): - * - * NotDirtyInfo ndi; - * memory_notdirty_write_prepare(&ndi, ....); - * ... perform write here ... - * memory_notdirty_write_complete(&ndi); - * - * These calls will ensure that we flush any TCG translated code for - * the memory being written, update the dirty bits and (if possible) - * remove the slowpath callback for writing to the memory. - * - * This must only be called if we are using TCG; it will assert otherwise. - * - * We may take locks in the prepare call, so callers must ensure that - * they don't exit (via longjump or otherwise) without calling complete. - * - * This call must only be made inside an RCU critical section. - * (Note that while we're executing a TCG TB we're always in an - * RCU critical section, which is likely to be the case for callers - * of these functions.) - */ -void memory_notdirty_write_prepare(NotDirtyInfo *ndi, - CPUState *cpu, - vaddr mem_vaddr, - ram_addr_t ram_addr, - unsigned size); -/** - * memory_notdirty_write_complete: finish write to non-dirty memory - * @ndi: pointer to the opaque NotDirtyInfo struct which was initialized - * by memory_not_dirty_write_prepare(). - */ -void memory_notdirty_write_complete(NotDirtyInfo *ndi); - #endif #endif