Skip to content

Commit

Permalink
ARC: mm: PAE40 support
Browse files Browse the repository at this point in the history
This is the first working implementation of 40-bit physical address
extension on ARCv2.

Signed-off-by: Alexey Brodkin <[email protected]>
Signed-off-by: Vineet Gupta <[email protected]>
  • Loading branch information
vineetgarc committed Oct 29, 2015
1 parent 25d4641 commit 5a364c2
Show file tree
Hide file tree
Showing 9 changed files with 110 additions and 14 deletions.
15 changes: 15 additions & 0 deletions arch/arc/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,21 @@ config HIGHMEM
kernel. Enable this to potentially allow access to rest of 2G and PAE
in future

config ARC_HAS_PAE40
bool "Support for the 40-bit Physical Address Extension"
default n
depends on ISA_ARCV2
select HIGHMEM
help
Enable access to physical memory beyond 4G, only supported on
ARC cores with 40 bit Physical Addressing support

config ARCH_PHYS_ADDR_T_64BIT
def_bool ARC_HAS_PAE40

config ARCH_DMA_ADDR_T_64BIT
bool

config ARC_CURR_IN_REG
bool "Dedicate Register r25 for current_task pointer"
default y
Expand Down
2 changes: 2 additions & 0 deletions arch/arc/include/asm/cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ extern int ioc_exists;
#if defined(CONFIG_ARC_MMU_V3) || defined(CONFIG_ARC_MMU_V4)
#define ARC_REG_IC_PTAG 0x1E
#endif
#define ARC_REG_IC_PTAG_HI 0x1F

/* Bit val in IC_CTRL */
#define IC_CTRL_CACHE_DISABLE 0x1
Expand All @@ -77,6 +78,7 @@ extern int ioc_exists;
#define ARC_REG_DC_FLSH 0x4B
#define ARC_REG_DC_FLDL 0x4C
#define ARC_REG_DC_PTAG 0x5C
#define ARC_REG_DC_PTAG_HI 0x5F

/* Bit val in DC_CTRL */
#define DC_CTRL_INV_MODE_FLUSH 0x40
Expand Down
7 changes: 7 additions & 0 deletions arch/arc/include/asm/mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@
#if (CONFIG_ARC_MMU_VER < 4)
#define ARC_REG_TLBPD0 0x405
#define ARC_REG_TLBPD1 0x406
#define ARC_REG_TLBPD1HI 0 /* Dummy: allows code sharing with ARC700 */
#define ARC_REG_TLBINDEX 0x407
#define ARC_REG_TLBCOMMAND 0x408
#define ARC_REG_PID 0x409
#define ARC_REG_SCRATCH_DATA0 0x418
#else
#define ARC_REG_TLBPD0 0x460
#define ARC_REG_TLBPD1 0x461
#define ARC_REG_TLBPD1HI 0x463
#define ARC_REG_TLBINDEX 0x464
#define ARC_REG_TLBCOMMAND 0x465
#define ARC_REG_PID 0x468
Expand Down Expand Up @@ -83,6 +85,11 @@ void arc_mmu_init(void);
extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
void read_decode_mmu_bcr(void);

static inline int is_pae40_enabled(void)
{
return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
}

#endif /* !__ASSEMBLY__ */

#endif
4 changes: 4 additions & 0 deletions arch/arc/include/asm/page.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,11 @@ typedef struct {

#else /* !STRICT_MM_TYPECHECKS */

#ifdef CONFIG_ARC_HAS_PAE40
typedef unsigned long long pte_t;
#else
typedef unsigned long pte_t;
#endif
typedef unsigned long pgd_t;
typedef unsigned long pgprot_t;

Expand Down
6 changes: 3 additions & 3 deletions arch/arc/include/asm/pgalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep)

static inline int __get_order_pgd(void)
{
return get_order(PTRS_PER_PGD * 4);
return get_order(PTRS_PER_PGD * sizeof(pgd_t));
}

static inline pgd_t *pgd_alloc(struct mm_struct *mm)
Expand Down Expand Up @@ -87,7 +87,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)

static inline int __get_order_pte(void)
{
return get_order(PTRS_PER_PTE * 4);
return get_order(PTRS_PER_PTE * sizeof(pte_t));
}

static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
Expand All @@ -110,7 +110,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte());
if (!pte_pg)
return 0;
memzero((void *)pte_pg, PTRS_PER_PTE * 4);
memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
page = virt_to_page(pte_pg);
if (!pgtable_page_ctor(page)) {
__free_page(page);
Expand Down
8 changes: 7 additions & 1 deletion arch/arc/include/asm/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,12 @@
/* Masks for actual TLB "PD"s */
#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
#define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)

#ifdef CONFIG_ARC_HAS_PAE40
#define PTE_BITS_NON_RWX_IN_PD1 (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE)
#else
#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE)
#endif

/**************************************************************************
* Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
Expand Down Expand Up @@ -272,7 +277,8 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)

#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot)
#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT)
#define pfn_pte(pfn, prot) (__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)))
#define pfn_pte(pfn, prot) (__pte(((pte_t)(pfn) << PAGE_SHIFT) | \
pgprot_val(prot)))
#define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))

/*
Expand Down
44 changes: 40 additions & 4 deletions arch/arc/mm/cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,10 @@ void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
}
}

/*
* For ARC700 MMUv3 I-cache and D-cache flushes
* Also reused for HS38 aliasing I-cache configuration
*/
static inline
void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
unsigned long sz, const int op)
Expand Down Expand Up @@ -289,6 +293,16 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
if (full_page)
write_aux_reg(aux_tag, paddr);

/*
* This is technically for MMU v4, using the MMU v3 programming model
* Special work for HS38 aliasing I-cache configuratino with PAE40
* - upper 8 bits of paddr need to be written into PTAG_HI
* - (and needs to be written before the lower 32 bits)
* Note that PTAG_HI is hoisted outside the line loop
*/
if (is_pae40_enabled() && op == OP_INV_IC)
write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32);

while (num_lines-- > 0) {
if (!full_page) {
write_aux_reg(aux_tag, paddr);
Expand All @@ -301,11 +315,17 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
}

/*
* In HS38x (MMU v4), although icache is VIPT, only paddr is needed for cache
* maintenance ops (in IVIL reg), as long as icache doesn't alias.
* In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT
* Here's how cache ops are implemented
*
* - D-cache: only paddr needed (in DC_IVDL/DC_FLDL)
* - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL)
* - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG
* respectively, similar to MMU v3 programming model, hence
* __cache_line_loop_v3() is used)
*
* For Aliasing icache, vaddr is also needed (in IVIL), while paddr is
* specified in PTAG (similar to MMU v3)
* If PAE40 is enabled, independent of aliasing considerations, the higher bits
* needs to be written into PTAG_HI
*/
static inline
void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
Expand Down Expand Up @@ -335,6 +355,22 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,

num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);

/*
* For HS38 PAE40 configuration
* - upper 8 bits of paddr need to be written into PTAG_HI
* - (and needs to be written before the lower 32 bits)
*/
if (is_pae40_enabled()) {
if (cacheop == OP_INV_IC)
/*
* Non aliasing I-cache in HS38,
* aliasing I-cache handled in __cache_line_loop_v3()
*/
write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32);
else
write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32);
}

while (num_lines-- > 0) {
write_aux_reg(aux_cmd, paddr);
paddr += L1_CACHE_BYTES;
Expand Down
27 changes: 22 additions & 5 deletions arch/arc/mm/tlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@ DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
static inline void __tlb_entry_erase(void)
{
write_aux_reg(ARC_REG_TLBPD1, 0);

if (is_pae40_enabled())
write_aux_reg(ARC_REG_TLBPD1HI, 0);

write_aux_reg(ARC_REG_TLBPD0, 0);
write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
}
Expand Down Expand Up @@ -182,7 +186,7 @@ static void utlb_invalidate(void)

}

static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
{
unsigned int idx;

Expand Down Expand Up @@ -225,10 +229,14 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid)
write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
}

static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
{
write_aux_reg(ARC_REG_TLBPD0, pd0);
write_aux_reg(ARC_REG_TLBPD1, pd1);

if (is_pae40_enabled())
write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32);

write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
}

Expand All @@ -249,6 +257,10 @@ noinline void local_flush_tlb_all(void)

/* Load PD0 and PD1 with template for a Blank Entry */
write_aux_reg(ARC_REG_TLBPD1, 0);

if (is_pae40_enabled())
write_aux_reg(ARC_REG_TLBPD1HI, 0);

write_aux_reg(ARC_REG_TLBPD0, 0);

for (entry = 0; entry < num_tlb; entry++) {
Expand Down Expand Up @@ -503,7 +515,8 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
{
unsigned long flags;
unsigned int asid_or_sasid, rwx;
unsigned long pd0, pd1;
unsigned long pd0;
pte_t pd1;

/*
* create_tlb() assumes that current->mm == vma->mm, since
Expand Down Expand Up @@ -785,10 +798,11 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));

n += scnprintf(buf + n, len - n,
"MMU [v%x]\t: %dK PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d\n",
"MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s%s\n",
p_mmu->ver, p_mmu->pg_sz_k, super_pg,
p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
p_mmu->u_dtlb, p_mmu->u_itlb);
p_mmu->u_dtlb, p_mmu->u_itlb,
IS_AVAIL2(p_mmu->pae, "PAE40 ", CONFIG_ARC_HAS_PAE40));

return buf;
}
Expand Down Expand Up @@ -821,6 +835,9 @@ void arc_mmu_init(void)
panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n",
(unsigned long)TO_MB(HPAGE_PMD_SIZE));

if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae)
panic("Hardware doesn't support PAE40\n");

/* Enable the MMU */
write_aux_reg(ARC_REG_PID, MMU_ENABLE);

Expand Down
11 changes: 10 additions & 1 deletion arch/arc/mm/tlbex.S
Original file line number Diff line number Diff line change
Expand Up @@ -223,12 +223,16 @@ ex_saved_reg1:
; (2) y = x & (PTRS_PER_PTE - 1) -> to get index
; (3) z = (pgtbl + y * 4)

#ifdef CONFIG_ARC_HAS_PAE40
#define PTE_SIZE_LOG 3 /* 8 == 2 ^ 3 */
#else
#define PTE_SIZE_LOG 2 /* 4 == 2 ^ 2 */
#endif

; multiply in step (3) above avoided by shifting lesser in step (1)
lsr r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG )
and r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG )
ld.aw r0, [r1, r0] ; r0: PTE
ld.aw r0, [r1, r0] ; r0: PTE (lower word only for PAE40)
; r1: PTE ptr

2:
Expand All @@ -247,6 +251,7 @@ ex_saved_reg1:
;-----------------------------------------------------------------
; Convert Linux PTE entry into TLB entry
; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu
; (for PAE40, two-words PTE, while three-word TLB Entry [PD0:PD1:PD1HI])
; IN: r0 = PTE, r1 = ptr to PTE

.macro CONV_PTE_TO_TLB
Expand All @@ -259,6 +264,10 @@ ex_saved_reg1:
or r3, r3, r2

sr r3, [ARC_REG_TLBPD1] ; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C
#ifdef CONFIG_ARC_HAS_PAE40
ld r3, [r1, 4] ; paddr[39..32]
sr r3, [ARC_REG_TLBPD1HI]
#endif

and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb

Expand Down

0 comments on commit 5a364c2

Please sign in to comment.