linux kernel的virtual kernel memory layout介绍(aarch64)
作者:互联网
相关文件:
memory.h
pgtable.h
fixmap.h
page.h
1、重要的配置
我们就以VA_BITS=48,PAGE_SIZE=4k来介绍
(1)、(VA_BITS)
(arch/arm64/Kconfig)
config ARM64_VA_BITS_36
bool "36-bit" if EXPERT
depends on ARM64_16K_PAGES
config ARM64_VA_BITS_39
bool "39-bit"
depends on ARM64_4K_PAGES
config ARM64_VA_BITS_42
bool "42-bit"
depends on ARM64_64K_PAGES
config ARM64_VA_BITS_47
bool "47-bit"
depends on ARM64_16K_PAGES
config ARM64_VA_BITS_48
bool "48-bit"
CONFIG_ARM64_VA_BITS_48=y
CONFIG_ARM64_VA_BITS=48
(2)、(PAGE_SIZE、PAGE_SHIFT)
如果选择了ARM64_4K_PAGES,那么PAGE_SIZE = 4K,PAGE_SHIFT=12
#ifdef CONFIG_ARM64_64K_PAGES
#define PAGE_SHIFT 16
#define CONT_SHIFT 5
#elif defined(CONFIG_ARM64_16K_PAGES)
#define PAGE_SHIFT 14
#define CONT_SHIFT 7
#else
#define PAGE_SHIFT 12
#define CONT_SHIFT 4
#endif
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
2、kernel 4.4代码 : 计算各个区域的地址
(memory.h):
#define VA_BITS (CONFIG_ARM64_VA_BITS)
#define VA_START (UL(0xffffffffffffffff) - \
(UL(1) << VA_BITS) + 1)
#define PAGE_OFFSET (UL(0xffffffffffffffff) - \
(UL(1) << (VA_BITS - 1)) + 1)
#define KIMAGE_VADDR (MODULES_END)
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
#define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE)
#define MODULES_VSIZE (SZ_128M)
#define PCI_IO_END (PAGE_OFFSET - SZ_2M)
#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
#define FIXADDR_TOP (PCI_IO_START - SZ_2M)
#define TASK_SIZE_64 (UL(1) << VA_BITS)
#ifdef CONFIG_KASAN
#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - 3))
#else
#define KASAN_SHADOW_SIZE (0)
#endif
#define MODULES_VSIZE (SZ_128M)
#define SZ_128M 0x08000000
#define PCI_IO_SIZE SZ_16M //(0x01000000)
#define PCI_IO_END (PAGE_OFFSET - SZ_2M)
#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
(fixmap.h)
#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
(pgtable.h)
#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
#define VMALLOC_START (MODULES_END)
#define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
#define VMEMMAP_START (VMALLOC_END + SZ_64K)
KASAN_SHADOW_SIZE = 0x2000_0000_0000
MODULES_VSIZE = 0x0800_0000
VA_BITS = 48
VA_START = 0xffff_0000_0000_0000
PAGE_OFFSET = 0xffff_8000_0000_0000
MODULES_VADDR=(VA_START + KASAN_SHADOW_SIZE) = 0xffff_2000_0000_0000
KIMAGE_VADDR = (MODULES_END) = (MODULES_VADDR + MODULES_VSIZE) = 0xffff_2000_0800_0000
KIMAGE_VADDR = 0xffff_2000_0800_0000
PCI_IO_END = 0xffff_8000_0000_0000 - 0x0000_0800 = 0xffff_7fff_ffff_e800
PCI_IO_START = 0xffff_7fff_ffff_e800 - 0x0100_0000 = 0xffff_7fff_feff_e800
FIXADDR_TOP = (PCI_IO_START - SZ_2M) = 0xffff_7fff_feff_e800 - 0x0020_0000 = 0xffff_7fff_fedf_e800 //(end addr)
PCI_IO_START = 0xffff_7fff_feff_e800
可见VMALLOC_START和KIMAGE_VADDR是重叠的,也就是kernel迁移到VMALLOC区域
再看VMEMMAP_SIZE :
(例如VA_BITS=48, PAGE_SHIFT=12的情况下)
(1UL << (VA_BITS - PAGE_SHIFT)) 表示48位的有效虚拟地址,一共可以表示多数个page页
再乘以sizeof(struct page), 表示需要多数内存来存储struct page
也就是说VMEMMAP是用来存储所有页面的struct page结构体的
结合以上地址,我们画了张图,更直观:
3、kernel 4.14代码 : 计算各个区域的地址
(memory.h):
#define VA_BITS (CONFIG_ARM64_VA_BITS)
#define VA_START (UL(0xffffffffffffffff) - \
(UL(1) << VA_BITS) + 1)
#define PAGE_OFFSET (UL(0xffffffffffffffff) - \
(UL(1) << (VA_BITS - 1)) + 1)
#define KIMAGE_VADDR (MODULES_END)
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
#define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE)
#define MODULES_VSIZE (SZ_128M)
#define VMEMMAP_START (PAGE_OFFSET - VMEMMAP_SIZE)
#define PCI_IO_END (VMEMMAP_START - SZ_2M)
#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
#define FIXADDR_TOP (PCI_IO_START - SZ_2M)
同样,我们也画了一张图:
4、kernel image搬移到vmalloc区域后,virt_to_phys的变化
virt_to_phys的作用是将内核虚拟地址转换成物理地址(针对线性映射区域)。
在kernel image还在线性映射区域的时候,virt_to_phys宏可以将kernel代码中的一个地址转换成物理地址,因为线性映射区域,物理地址和虚拟地址只有一个偏移。因此两者很容易转换。
那么现在kernel image和线性映射区域分开了,virt_to_phys宏又该如何实现呢?
在kernel中PAGE_OFFSET = 0x8000_0000_0000
#define PAGE_OFFSET (UL(0xffffffffffffffff) - \
(UL(1) << (VA_BITS - 1)) + 1)
当virt_to_phys调用时候,先判断bit47(最高有效位),如果为1,则表示是(memory)DRAM的地址。那么直接使用X[46:0]和PHYS_OFFSET相加即可
#define __virt_to_phys(x) ({ \
phys_addr_t __x = (phys_addr_t)(x); \
__x & BIT(VA_BITS - 1) ? (__x & ~PAGE_OFFSET) + PHYS_OFFSET : \
(__x - kimage_voffset); })
PHYS_OFFSET是DRAM的真实物理地址
memstart_addr = round_down(memblock_start_of_DRAM(),
ARM64_MEMSTART_ALIGN);
当virt_to_phys调用时候,先判断bit47(最高有效位),如果为0,则表示是kernel image的地址, 那么直接使用X[46:0]和kimage_voffset相减即可
kimage_voffset来自汇编中的__mmap_switched函数
str_l x21, __fdt_pointer, x5 // Save FDT pointer
ldr_l x4, kimage_vaddr // Save the offset between
sub x4, x4, x24 // the kernel virtual and
str_l x4, kimage_voffset, x5 // physical mappings
综上所述,virt_to_phys()当前能够转换的依然还是:线性区域、kimg区域(kernel image区域)
5、PCI/IO区域
如X86处理器为外设专门实现了一个单独的地址空间,称为"I/O地址空间"或者"I/O端口空间",CPU通过专门的I/O指令(如X86的IN和OUT指令)来访问这一空间中的地址单元
在arm64中,其实也有类似的指令,只是我们没有用到这个区域,也没有使用这些指令
具体在kernel/include/asm-generic/io.h中:
其中PCI_IOBASE对应的就是PCI/IO的及地址(PCI_IO_START)。
#ifndef insb
#define insb insb
static inline void insb(unsigned long addr, void *buffer, unsigned int count)
{
readsb(PCI_IOBASE + addr, buffer, count);
}
#endif
#ifndef insw
#define insw insw
static inline void insw(unsigned long addr, void *buffer, unsigned int count)
{
readsw(PCI_IOBASE + addr, buffer, count);
}
#endif
#ifndef insl
#define insl insl
static inline void insl(unsigned long addr, void *buffer, unsigned int count)
{
readsl(PCI_IOBASE + addr, buffer, count);
}
#endif
6、ioremap
那么ioremap映射到了哪个区域呢?
(arch/arm64/include/asm/io.h)
#define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
#define ioremap_nocache(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
#define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
#define ioremap_wt(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
#define iounmap __iounmap
(ioremap.c)
void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot)
{
return __ioremap_caller(phys_addr, size, prot,
__builtin_return_address(0));
}
EXPORT_SYMBOL(__ioremap);
static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size,
pgprot_t prot, void *caller)
{
unsigned long last_addr;
unsigned long offset = phys_addr & ~PAGE_MASK;
int err;
unsigned long addr;
struct vm_struct *area;
/*
* Page align the mapping address and size, taking account of any
* offset.
*/
phys_addr &= PAGE_MASK;
size = PAGE_ALIGN(size + offset);
/*
* Don't allow wraparound, zero size or outside PHYS_MASK.
*/
last_addr = phys_addr + size - 1;
if (!size || last_addr < phys_addr || (last_addr & ~PHYS_MASK))
return NULL;
/*
* Don't allow RAM to be mapped.
*/
if (WARN_ON(pfn_valid(__phys_to_pfn(phys_addr))))
return NULL;
area = get_vm_area_caller(size, VM_IOREMAP, caller);
if (!area)
return NULL;
addr = (unsigned long)area->addr;
area->phys_addr = phys_addr;
err = ioremap_page_range(addr, addr + size, phys_addr, prot);
if (err) {
vunmap((void *)addr);
return NULL;
}
return (void __iomem *)(offset + addr);
}
调用了get_vm_area_caller(size, VM_IOREMAP, caller)
(vmalloc.c)
struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
unsigned long start, unsigned long end,
const void *caller)
{
return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
GFP_KERNEL, caller);
}
static struct vm_struct *__get_vm_area_node(unsigned long size,
unsigned long align, unsigned long flags, unsigned long start,
unsigned long end, int node, gfp_t gfp_mask, const void *caller)
{
struct vmap_area *va;
struct vm_struct *area;
BUG_ON(in_interrupt());
if (flags & VM_IOREMAP)
align = 1ul << clamp_t(int, fls_long(size),
PAGE_SHIFT, IOREMAP_MAX_ORDER);
size = PAGE_ALIGN(size);
if (unlikely(!size))
return NULL;
area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
if (unlikely(!area))
return NULL;
if (!(flags & VM_NO_GUARD))
size += PAGE_SIZE;
va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
if (IS_ERR(va)) {
kfree(area);
return NULL;
}
setup_vmalloc_vm(area, va, flags, caller);
return area;
}
具体代码不再深究,但可以知道iorempa是在vmalloc区域分配的
标签:__,kernel,layout,addr,aarch64,unsigned,phys,size,define 来源: https://blog.51cto.com/u_15278218/2931146