[Xen-devel,v5,7/8] xen: arm: use superpages in p2m when pages are suitably aligned

Message ID	1404907666-8594-7-git-send-email-ian.campbell@citrix.com
State	New
Headers	show Return-Path: <patchwork-forward+bncBC4Y5F6PT4PBB2XB6SOQKGQEOYQHDGA@linaro.org> Received-SPF: pass (google.com: domain of patch+caf_=patchwork-forward=linaro.org@linaro.org designates 209.85.128.172 as permitted sender) client-ip=209.85.128.172; Received-SPF: none (google.com: xen-devel-bounces@lists.xen.org does not designate permitted sender hosts) client-ip=50.57.142.19; From: Ian Campbell <ian.campbell@citrix.com> To: <xen-devel@lists.xen.org> Date: Wed, 9 Jul 2014 13:07:45 +0100 Message-ID: <1404907666-8594-7-git-send-email-ian.campbell@citrix.com> In-Reply-To: <1404907608.16789.18.camel@kazak.uk.xensource.com> References: <1404907608.16789.18.camel@kazak.uk.xensource.com> MIME-Version: 1.0 Cc: julien.grall@linaro.org, tim@xen.org, Ian Campbell <ian.campbell@citrix.com>, stefano.stabellini@eu.citrix.com Subject: [Xen-devel] [PATCH v5 7/8] xen: arm: use superpages in p2m when pages are suitably aligned Precedence: list Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org Mailing-list: list patchwork-forward@linaro.org; contact patchwork-forward+owners@linaro.org Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c index bb5c810..e275f7d 100644 --- a/xen/arch/arm/domain.c +++ b/xen/arch/arm/domain.c @@ -747,6 +747,7 @@ int domain_relinquish_resources(struct domain *d) void arch_dump_domain_info(struct domain *d) { + p2m_dump_info(d); } diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c index cb22e9c..642591b 100644 --- a/xen/arch/arm/p2m.c +++ b/xen/arch/arm/p2m.c @@ -1,6 +1,7 @@ #include <xen/config.h> #include <xen/sched.h> #include <xen/lib.h> +#include <xen/stdbool.h> #include <xen/errno.h> #include <xen/domain_page.h> #include <xen/bitops.h> @@ -25,12 +26,26 @@ static bool_t p2m_table(lpae_t pte) { return p2m_valid(pte) && pte.p2m.table; } -#if 0 static bool_t p2m_mapping(lpae_t pte) { return p2m_valid(pte) && !pte.p2m.table; } -#endif + +void p2m_dump_info(struct domain *d) +{ + struct p2m_domain *p2m = &d->arch.p2m; + + spin_lock(&p2m->lock); + printk("p2m mappings for domain %d (vmid %d):\n", + d->domain_id, p2m->vmid); + BUG_ON(p2m->stats.mappings[0] || p2m->stats.shattered[0]); + printk(" 1G mappings: %ld (shattered %ld)\n", + p2m->stats.mappings[1], p2m->stats.shattered[1]); + printk(" 2M mappings: %ld (shattered %ld)\n", + p2m->stats.mappings[2], p2m->stats.shattered[2]); + printk(" 4K mappings: %ld\n", p2m->stats.mappings[3]); + spin_unlock(&p2m->lock); +} void dump_p2m_lookup(struct domain *d, paddr_t addr) { @@ -287,15 +302,26 @@ static inline void p2m_write_pte(lpae_t *p, lpae_t pte, bool_t flush_cache) clean_xen_dcache(*p); } -/* Allocate a new page table page and hook it in via the given entry */ -static int p2m_create_table(struct domain *d, lpae_t *entry, bool_t flush_cache) +/* + * Allocate a new page table page and hook it in via the given entry. + * apply_one_level relies on this returning 0 on success + * and -ve on failure. + * + * If the existing entry is present then it must be a mapping and not + * a table and it will be shattered into the next level down. + * + * level_shift is the number of bits at the level we want to create. + */ +static int p2m_create_table(struct domain *d, lpae_t *entry, + int level_shift, bool_t flush_cache) { struct p2m_domain *p2m = &d->arch.p2m; struct page_info *page; - void *p; + lpae_t *p; lpae_t pte; + int splitting = p2m_valid(*entry); - BUG_ON(entry->p2m.valid); + BUG_ON(p2m_table(*entry)); page = alloc_domheap_page(NULL, 0); if ( page == NULL ) @@ -304,9 +330,37 @@ static int p2m_create_table(struct domain *d, lpae_t *entry, bool_t flush_cache) page_list_add(page, &p2m->pages); p = __map_domain_page(page); - clear_page(p); + if ( splitting ) + { + p2m_type_t t = entry->p2m.type; + unsigned long base_pfn = entry->p2m.base; + int i; + + /* + * We are either splitting a first level 1G page into 512 second level + * 2M pages, or a second level 2M page into 512 third level 4K pages. + */ + for ( i=0 ; i < LPAE_ENTRIES; i++ ) + { + pte = mfn_to_p2m_entry(base_pfn + (i<<(level_shift-LPAE_SHIFT)), + MATTR_MEM, t); + + /* + * First and second level super pages set p2m.table = 0, but + * third level entries set table = 1. + */ + if ( level_shift - LPAE_SHIFT ) + pte.p2m.table = 0; + + write_pte(&p[i], pte); + } + } + else + clear_page(p); + if ( flush_cache ) clean_xen_dcache_va_range(p, PAGE_SIZE); + unmap_domain_page(p); pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid); @@ -324,8 +378,14 @@ enum p2m_operation { CACHEFLUSH, }; -static void p2m_put_page(const lpae_t pte) +/* Put any references on the single 4K page referenced by pte. TODO: + * Handle superpages, for now we only take special references for leaf + * pages (specifically foreign ones, which can't be super mapped today). + */ +static void p2m_put_l3_page(const lpae_t pte) { + ASSERT(p2m_valid(pte)); + /* TODO: Handle other p2m types * * It's safe to do the put_page here because page_alloc will @@ -341,6 +401,263 @@ static void p2m_put_page(const lpae_t pte) } } +/* + * Returns true if start_gpaddr..end_gpaddr contains at least one + * suitably aligned level_size mappping of maddr. + * + * So long as the range is large enough the end_gpaddr need not be + * aligned (callers should create one superpage mapping based on this + * result and then call this again on the new range, eventually the + * slop at the end will cause this function to return false). + */ +static bool_t is_mapping_aligned(const paddr_t start_gpaddr, + const paddr_t end_gpaddr, + const paddr_t maddr, + const paddr_t level_size) +{ + const paddr_t level_mask = level_size - 1; + + /* No hardware superpages at level 0 */ + if ( level_size == ZEROETH_SIZE ) + return false; + + /* + * A range smaller than the size of a superpage at this level + * cannot be superpage aligned. + */ + if ( ( end_gpaddr - start_gpaddr ) < level_size - 1 ) + return false; + + /* Both the gpaddr and maddr must be aligned */ + if ( start_gpaddr & level_mask ) + return false; + if ( maddr & level_mask ) + return false; + return true; +} + +#define P2M_ONE_DESCEND 0 +#define P2M_ONE_PROGRESS_NOP 0x1 +#define P2M_ONE_PROGRESS 0x10 + +/* + * 0 == (P2M_ONE_DESCEND) continue to descend the tree + * +ve == (P2M_ONE_PROGRESS_*) handled at this level, continue, flush, + * entry, addr and maddr updated. Return value is an + * indication of the amount of work done (for preemption). + * -ve == (-Exxx) error. + */ +static int apply_one_level(struct domain *d, + lpae_t *entry, + unsigned int level, + bool_t flush_cache, + enum p2m_operation op, + paddr_t start_gpaddr, + paddr_t end_gpaddr, + paddr_t *addr, + paddr_t *maddr, + bool_t *flush, + int mattr, + p2m_type_t t) +{ + /* Helpers to lookup the properties of each level */ + const paddr_t level_sizes[] = + { ZEROETH_SIZE, FIRST_SIZE, SECOND_SIZE, THIRD_SIZE }; + const paddr_t level_masks[] = + { ZEROETH_MASK, FIRST_MASK, SECOND_MASK, THIRD_MASK }; + const paddr_t level_shifts[] = + { ZEROETH_SHIFT, FIRST_SHIFT, SECOND_SHIFT, THIRD_SHIFT }; + const paddr_t level_size = level_sizes[level]; + const paddr_t level_mask = level_masks[level]; + const paddr_t level_shift = level_shifts[level]; + + struct p2m_domain *p2m = &d->arch.p2m; + lpae_t pte; + const lpae_t orig_pte = *entry; + int rc; + + BUG_ON(level > 3); + + switch ( op ) + { + case ALLOCATE: + ASSERT(level < 3 || !p2m_valid(orig_pte)); + ASSERT(*maddr == 0); + + if ( p2m_valid(orig_pte) ) + return P2M_ONE_DESCEND; + + if ( is_mapping_aligned(*addr, end_gpaddr, 0, level_size) ) + { + struct page_info *page; + + page = alloc_domheap_pages(d, level_shift - PAGE_SHIFT, 0); + if ( page ) + { + pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t); + if ( level < 3 ) + pte.p2m.table = 0; + p2m_write_pte(entry, pte, flush_cache); + p2m->stats.mappings[level]++; + + *addr += level_size; + *maddr += level_size; + + return P2M_ONE_PROGRESS; + } + else if ( level == 3 ) + return -ENOMEM; + } + + /* L3 is always suitably aligned for mapping (handled, above) */ + BUG_ON(level == 3); + + /* + * If we get here then we failed to allocate a sufficiently + * large contiguous region for this level (which can't be + * L3). Create a page table and continue to descend so we try + * smaller allocations. + */ + rc = p2m_create_table(d, entry, 0, flush_cache); + if ( rc < 0 ) + return rc; + + return P2M_ONE_DESCEND; + + case INSERT: + if ( is_mapping_aligned(*addr, end_gpaddr, *maddr, level_size) && + /* We do not handle replacing an existing table with a superpage */ + (level == 3 || !p2m_table(orig_pte)) ) + { + /* New mapping is superpage aligned, make it */ + pte = mfn_to_p2m_entry(*maddr >> PAGE_SHIFT, mattr, t); + if ( level < 3 ) + pte.p2m.table = 0; /* Superpage entry */ + + p2m_write_pte(entry, pte, flush_cache); + + *flush |= p2m_valid(orig_pte); + + *addr += level_size; + *maddr += level_size; + + if ( p2m_valid(orig_pte) ) + { + /* + * We can't currently get here for an existing table + * mapping, since we don't handle replacing an + * existing table with a superpage. If we did we would + * need to handle freeing (and accounting) for the bit + * of the p2m tree which we would be about to lop off. + */ + BUG_ON(level < 3 && p2m_table(orig_pte)); + if ( level == 3 ) + p2m_put_l3_page(orig_pte); + } + else /* New mapping */ + p2m->stats.mappings[level]++; + + return P2M_ONE_PROGRESS; + } + else + { + /* New mapping is not superpage aligned, create a new table entry */ + + /* L3 is always suitably aligned for mapping (handled, above) */ + BUG_ON(level == 3); + + /* Not present -> create table entry and descend */ + if ( !p2m_valid(orig_pte) ) + { + rc = p2m_create_table(d, entry, 0, flush_cache); + if ( rc < 0 ) + return rc; + return P2M_ONE_DESCEND; + } + + /* Existing superpage mapping -> shatter and descend */ + if ( p2m_mapping(orig_pte) ) + { + *flush = true; + rc = p2m_create_table(d, entry, + level_shift - PAGE_SHIFT, flush_cache); + if ( rc < 0 ) + return rc; + + p2m->stats.shattered[level]++; + p2m->stats.mappings[level]--; + p2m->stats.mappings[level+1] += LPAE_ENTRIES; + } /* else: an existing table mapping -> descend */ + + BUG_ON(!p2m_table(*entry)); + + return P2M_ONE_DESCEND; + } + + break; + + case RELINQUISH: + case REMOVE: + if ( !p2m_valid(orig_pte) ) + { + /* Progress up to next boundary */ + *addr = (*addr + level_size) & level_mask; + return P2M_ONE_PROGRESS_NOP; + } + + if ( level < 3 && p2m_table(orig_pte) ) + return P2M_ONE_DESCEND; + + *flush = true; + + memset(&pte, 0x00, sizeof(pte)); + p2m_write_pte(entry, pte, flush_cache); + + *addr += level_size; + + p2m->stats.mappings[level]--; + + if ( level == 3 ) + p2m_put_l3_page(orig_pte); + + /* + * This is still a single pte write, no matter the level, so no need to + * scale. + */ + return P2M_ONE_PROGRESS; + + case CACHEFLUSH: + if ( !p2m_valid(orig_pte) ) + { + *addr = (*addr + level_size) & level_mask; + return P2M_ONE_PROGRESS_NOP; + } + + if ( level < 3 && p2m_table(orig_pte) ) + return P2M_ONE_DESCEND; + + /* + * could flush up to the next boundary, but would need to be + * careful about preemption, so just do one page now and loop. + */ + if ( p2m_is_ram(orig_pte.p2m.type) ) + { + unsigned long offset = paddr_to_pfn(*addr & ~level_mask); + flush_page_to_ram(orig_pte.p2m.base + offset); + + *addr += PAGE_SIZE; + return P2M_ONE_PROGRESS; + } + else + { + *addr += PAGE_SIZE; + return P2M_ONE_PROGRESS_NOP; + } + } + + BUG(); /* Should never get here */ +} + static int apply_p2m_changes(struct domain *d, enum p2m_operation op, paddr_t start_gpaddr, @@ -349,7 +666,7 @@ static int apply_p2m_changes(struct domain *d, int mattr, p2m_type_t t) { - int rc; + int rc, ret; struct p2m_domain *p2m = &d->arch.p2m; lpae_t *first = NULL, *second = NULL, *third = NULL; paddr_t addr; @@ -357,9 +674,7 @@ static int apply_p2m_changes(struct domain *d, cur_first_offset = ~0, cur_second_offset = ~0; unsigned long count = 0; - unsigned int flush = 0; - bool_t populate = (op == INSERT || op == ALLOCATE); - lpae_t pte; + bool_t flush = false; bool_t flush_pt; /* Some IOMMU don't support coherent PT walk. When the p2m is @@ -373,6 +688,25 @@ static int apply_p2m_changes(struct domain *d, addr = start_gpaddr; while ( addr < end_gpaddr ) { + /* + * Arbitrarily, preempt every 512 operations or 8192 nops. + * 512*P2M_ONE_PROGRESS == 8192*P2M_ONE_PROGRESS_NOP == 0x2000 + * + * count is initialised to 0 above, so we are guaranteed to + * always make at least one pass. + */ + + if ( op == RELINQUISH && count >= 0x2000 ) + { + if ( hypercall_preempt_check() ) + { + p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT; + rc = -ERESTART; + goto out; + } + count = 0; + } + if ( cur_first_page != p2m_first_level_index(addr) ) { if ( first ) unmap_domain_page(first); @@ -385,22 +719,18 @@ static int apply_p2m_changes(struct domain *d, cur_first_page = p2m_first_level_index(addr); } - if ( !p2m_valid(first[first_table_offset(addr)]) ) - { - if ( !populate ) - { - addr = (addr + FIRST_SIZE) & FIRST_MASK; - continue; - } + /* We only use a 3 level p2m at the moment, so no level 0, + * current hardware doesn't support super page mappings at + * level 0 anyway */ - rc = p2m_create_table(d, &first[first_table_offset(addr)], - flush_pt); - if ( rc < 0 ) - { - printk("p2m_populate_ram: L1 failed\n"); - goto out; - } - } + ret = apply_one_level(d, &first[first_table_offset(addr)], + 1, flush_pt, op, + start_gpaddr, end_gpaddr, + &addr, &maddr, &flush, + mattr, t); + if ( ret < 0 ) { rc = ret ; goto out; } + count += ret; + if ( ret != P2M_ONE_DESCEND ) continue; BUG_ON(!p2m_valid(first[first_table_offset(addr)])); @@ -412,23 +742,16 @@ static int apply_p2m_changes(struct domain *d, } /* else: second already valid */ - if ( !p2m_valid(second[second_table_offset(addr)]) ) - { - if ( !populate ) - { - addr = (addr + SECOND_SIZE) & SECOND_MASK; - continue; - } - - rc = p2m_create_table(d, &second[second_table_offset(addr)], - flush_pt); - if ( rc < 0 ) { - printk("p2m_populate_ram: L2 failed\n"); - goto out; - } - } + ret = apply_one_level(d,&second[second_table_offset(addr)], + 2, flush_pt, op, + start_gpaddr, end_gpaddr, + &addr, &maddr, &flush, + mattr, t); + if ( ret < 0 ) { rc = ret ; goto out; } + count += ret; + if ( ret != P2M_ONE_DESCEND ) continue; - BUG_ON(!second[second_table_offset(addr)].p2m.valid); + BUG_ON(!p2m_valid(second[second_table_offset(addr)])); if ( cur_second_offset != second_table_offset(addr) ) { @@ -438,84 +761,15 @@ static int apply_p2m_changes(struct domain *d, cur_second_offset = second_table_offset(addr); } - pte = third[third_table_offset(addr)]; - - flush |= pte.p2m.valid; - - switch (op) { - case ALLOCATE: - { - /* Allocate a new RAM page and attach */ - struct page_info *page; - - ASSERT(!pte.p2m.valid); - rc = -ENOMEM; - page = alloc_domheap_page(d, 0); - if ( page == NULL ) { - printk("p2m_populate_ram: failed to allocate page\n"); - goto out; - } - - pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t); - - p2m_write_pte(&third[third_table_offset(addr)], - pte, flush_pt); - } - break; - case INSERT: - { - if ( pte.p2m.valid ) - p2m_put_page(pte); - pte = mfn_to_p2m_entry(maddr >> PAGE_SHIFT, mattr, t); - p2m_write_pte(&third[third_table_offset(addr)], - pte, flush_pt); - maddr += PAGE_SIZE; - } - break; - case RELINQUISH: - case REMOVE: - { - if ( !pte.p2m.valid ) - { - count++; - break; - } - - p2m_put_page(pte); - - count += 0x10; - - memset(&pte, 0x00, sizeof(pte)); - p2m_write_pte(&third[third_table_offset(addr)], - pte, flush_pt); - count++; - } - break; - - case CACHEFLUSH: - { - if ( !pte.p2m.valid || !p2m_is_ram(pte.p2m.type) ) - break; - - flush_page_to_ram(pte.p2m.base); - } - break; - } - - /* Preempt every 2MiB (mapped) or 32 MiB (unmapped) - arbitrary */ - if ( op == RELINQUISH && count >= 0x2000 ) - { - if ( hypercall_preempt_check() ) - { - p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT; - rc = -ERESTART; - goto out; - } - count = 0; - } - - /* Got the next page */ - addr += PAGE_SIZE; + ret = apply_one_level(d, &third[third_table_offset(addr)], + 3, flush_pt, op, + start_gpaddr, end_gpaddr, + &addr, &maddr, &flush, + mattr, t); + if ( ret < 0 ) { rc = ret ; goto out; } + /* L3 had better have done something! We cannot descend any further */ + BUG_ON(ret == P2M_ONE_DESCEND); + count += ret; } if ( flush ) diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h index 911d32d..327a79d 100644 --- a/xen/include/asm-arm/p2m.h +++ b/xen/include/asm-arm/p2m.h @@ -29,6 +29,15 @@ struct p2m_domain { * resume the search. Apart from during teardown this can only * decrease. */ unsigned long lowest_mapped_gfn; + + /* Gather some statistics for information purposes only */ + struct { + /* Number of mappings at each p2m tree level */ + unsigned long mappings[4]; + /* Number of times we have shattered a mapping + * at each p2m tree level. */ + unsigned long shattered[4]; + } stats; }; /* List of possible type for each page in the p2m entry. @@ -79,6 +88,9 @@ int p2m_alloc_table(struct domain *d); void p2m_save_state(struct vcpu *p); void p2m_restore_state(struct vcpu *n); +/* Print debugging/statistial info about a domain's p2m */ +void p2m_dump_info(struct domain *d); + /* Look up the MFN corresponding to a domain's PFN. */ paddr_t p2m_lookup(struct domain *d, paddr_t gpfn, p2m_type_t *t);

[Xen-devel,v5,7/8] xen: arm: use superpages in p2m when pages are suitably aligned

Commit Message

Comments

Patch