@@ -64,28 +64,29 @@
} \
} while (0)
+/* run_on_cpu_data.target_ptr should always be big enough for a
+ * target_ulong even on 32 bit builds */
+QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
+
/* statistics */
int tlb_flush_count;
-/* NOTE:
- * If flush_global is true (the usual case), flush all tlb entries.
- * If flush_global is false, flush (at least) all tlb entries not
- * marked global.
- *
- * Since QEMU doesn't currently implement a global/not-global flag
- * for tlb entries, at the moment tlb_flush() will also flush all
- * tlb entries in the flush_global == false case. This is OK because
- * CPU architectures generally permit an implementation to drop
- * entries from the TLB at any time, so flushing more entries than
- * required is only an efficiency issue, not a correctness issue.
- */
-void tlb_flush(CPUState *cpu, int flush_global)
+static void tlb_flush_nocheck(CPUState *cpu, int flush_global)
{
CPUArchState *env = cpu->env_ptr;
+ /* The QOM tests will trigger tlb_flushes without setting up TCG
+ * so we bug out here in that case.
+ */
+ if (!tcg_enabled()) {
+ return;
+ }
+
assert_cpu_is_self(cpu);
tlb_debug("(%d)\n", flush_global);
+ tb_lock();
+
memset(env->tlb_table, -1, sizeof(env->tlb_table));
memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
@@ -94,6 +95,39 @@ void tlb_flush(CPUState *cpu, int flush_global)
env->tlb_flush_addr = -1;
env->tlb_flush_mask = 0;
tlb_flush_count++;
+
+ tb_unlock();
+
+ atomic_mb_set(&cpu->pending_tlb_flush, false);
+}
+
+static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
+{
+ tlb_flush_nocheck(cpu, data.host_int);
+}
+
+/* NOTE:
+ * If flush_global is true (the usual case), flush all tlb entries.
+ * If flush_global is false, flush (at least) all tlb entries not
+ * marked global.
+ *
+ * Since QEMU doesn't currently implement a global/not-global flag
+ * for tlb entries, at the moment tlb_flush() will also flush all
+ * tlb entries in the flush_global == false case. This is OK because
+ * CPU architectures generally permit an implementation to drop
+ * entries from the TLB at any time, so flushing more entries than
+ * required is only an efficiency issue, not a correctness issue.
+ */
+void tlb_flush(CPUState *cpu, int flush_global)
+{
+ if (cpu->created && !qemu_cpu_is_self(cpu)) {
+ if (atomic_cmpxchg(&cpu->pending_tlb_flush, false, true) == true) {
+ async_run_on_cpu(cpu, tlb_flush_global_async_work,
+ RUN_ON_CPU_HOST_INT(flush_global));
+ }
+ } else {
+ tlb_flush_nocheck(cpu, flush_global);
+ }
}
static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp)
@@ -103,6 +137,8 @@ static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp)
assert_cpu_is_self(cpu);
tlb_debug("start\n");
+ tb_lock();
+
for (;;) {
int mmu_idx = va_arg(argp, int);
@@ -117,6 +153,8 @@ static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp)
}
memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
+
+ tb_unlock();
}
void tlb_flush_by_mmuidx(CPUState *cpu, ...)
@@ -139,13 +177,15 @@ static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
}
}
-void tlb_flush_page(CPUState *cpu, target_ulong addr)
+static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
{
CPUArchState *env = cpu->env_ptr;
+ target_ulong addr = (target_ulong) data.target_ptr;
int i;
int mmu_idx;
assert_cpu_is_self(cpu);
+
tlb_debug("page :" TARGET_FMT_lx "\n", addr);
/* Check if we need to flush due to large pages. */
@@ -175,6 +215,18 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
tb_flush_jmp_cache(cpu, addr);
}
+void tlb_flush_page(CPUState *cpu, target_ulong addr)
+{
+ tlb_debug("page :" TARGET_FMT_lx "\n", addr);
+
+ if (!qemu_cpu_is_self(cpu)) {
+ async_run_on_cpu(cpu, tlb_flush_page_async_work,
+ RUN_ON_CPU_TARGET_PTR(addr));
+ } else {
+ tlb_flush_page_async_work(cpu, RUN_ON_CPU_TARGET_PTR(addr));
+ }
+}
+
void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
{
CPUArchState *env = cpu->env_ptr;
@@ -221,6 +273,16 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
tb_flush_jmp_cache(cpu, addr);
}
+void tlb_flush_page_all(target_ulong addr)
+{
+ CPUState *cpu;
+
+ CPU_FOREACH(cpu) {
+ async_run_on_cpu(cpu, tlb_flush_page_async_work,
+ RUN_ON_CPU_TARGET_PTR(addr));
+ }
+}
+
/* update the TLBs so that writes to code in the virtual page 'addr'
can be detected */
void tlb_protect_code(ram_addr_t ram_addr)
@@ -160,6 +160,7 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr);
void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
uintptr_t retaddr);
+void tlb_flush_page_all(target_ulong addr);
#else
static inline void tlb_flush_page(CPUState *cpu, target_ulong addr)
{
@@ -393,6 +393,12 @@ struct CPUState {
(absolute value) offset as small as possible. This reduces code
size, especially for hosts without large memory offsets. */
uint32_t tcg_exit_req;
+
+ /* The pending_tlb_flush flag is set and cleared atomically to
+ * avoid potential races. The aim of the flag is to avoid
+ * unnecessary flushes.
+ */
+ bool pending_tlb_flush;
};
QTAILQ_HEAD(CPUTailQ, CPUState);