深入浅出内存管理-memblock
memblock 介紹
memblock 內(nèi)存管理機制主要用于Linux Kernel 啟動階段(kernel啟動 -> kernel 通用內(nèi)存管理初始化完成.) 或者可以認為free_initmem 為止. 在啟動階段, 內(nèi)存分配器并不需要很復雜, memblock 是基于靜態(tài)數(shù)組, 采用的逆向最先適配的分配策略.
memblock 數(shù)據(jù)結構
memblock
struct memblock {bool bottom_up; /* is bottom up direction? */phys_addr_t current_limit;struct memblock_type memory;struct memblock_type reserved; #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAPstruct memblock_type physmem; #endif };memblock 內(nèi)存管理的核心數(shù)據(jù)結構
bottom_up 內(nèi)存分配的方向
current_limit 內(nèi)存分配最大限制值
memblock 的內(nèi)存分為3類, memory,reserved, 和 physmem
memory 可用的內(nèi)存的集合
reserved 已分配出去內(nèi)存的集合
memblock_type
struct memblock_type {unsigned long cnt; /* number of regions */unsigned long max; /* size of the allocated array */phys_addr_t total_size; /* size of all regions */struct memblock_region *regions;char *name; };memblock_type 用于描述在當前的memblock中此類型的memory region的數(shù)量
memblock_region
struct memblock_region {phys_addr_t base;phys_addr_t size;unsigned long flags; #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAPint nid; #endif };memblock_region 用于描述此內(nèi)存region中的基地址和大小
flags 定義于 include/linux/memblock.h
/* Definition of memblock flags. */ enum {MEMBLOCK_NONE = 0x0, /* No special request */MEMBLOCK_HOTPLUG = 0x1, /* hotpluggable region */MEMBLOCK_MIRROR = 0x2, /* mirrored region */MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */ };memblock API
在指定的node和范圍內(nèi)尋找可用大小的內(nèi)存
phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,phys_addr_t start, phys_addr_t end,int nid, ulong flags); phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,phys_addr_t size, phys_addr_t align);內(nèi)存添加和刪除type為memory的memblock region
int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); int memblock_add(phys_addr_t base, phys_addr_t size); int memblock_remove(phys_addr_t base, phys_addr_t size);memblock的內(nèi)存分配和釋放
phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align); int memblock_free(phys_addr_t base, phys_addr_t size); void memblock_allow_resize(void); int memblock_reserve(phys_addr_t base, phys_addr_t size); void memblock_trim_memory(phys_addr_t align); bool memblock_overlaps_region(struct memblock_type *type,phys_addr_t base, phys_addr_t size); int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); ulong choose_memblock_flags(void);memblock 實現(xiàn)
memblock_init
#define INIT_MEMBLOCK_REGIONS 128 #define INIT_PHYSMEM_REGIONS 4static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock; #endifstruct memblock memblock __initdata_memblock = {.memory.regions = memblock_memory_init_regions,.memory.cnt = 1, /* empty dummy entry */.memory.max = INIT_MEMBLOCK_REGIONS,.memory.name = "memory",.reserved.regions = memblock_reserved_init_regions,.reserved.cnt = 1, /* empty dummy entry */.reserved.max = INIT_MEMBLOCK_REGIONS,.reserved.name = "reserved",#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP.physmem.regions = memblock_physmem_init_regions,.physmem.cnt = 1, /* empty dummy entry */.physmem.max = INIT_PHYSMEM_REGIONS,.physmem.name = "physmem", #endif.bottom_up = false,.current_limit = MEMBLOCK_ALLOC_ANYWHERE, };系統(tǒng)會初始化 memory 和 reserved 的 128 個 memblock_region
bottom_up = false 默認的分配方式是從上到下
此時memblock_region 是沒有可用內(nèi)存的, 初次添加可用內(nèi)存是發(fā)生在setup_machine_fdt() 中, 它會解析device tree中的內(nèi)存物理信息,然后調(diào)用函數(shù)memblock_add 添加到memblock_region中去.
current_limit 在 kernel的啟動過程的 sanity_check_meminfo() 中設定
當kernel啟動到此處, 已經(jīng)可以使用memblock來進行內(nèi)存分配,從而進行頁表的建立.
memblock_add
int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) {phys_addr_t end = base + size - 1;memblock_dbg("memblock_add: [%pa-%pa] %pF\n",&base, &end, (void *)_RET_IP_);return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0); }int __init_memblock memblock_add_range(struct memblock_type *type,phys_addr_t base, phys_addr_t size,int nid, unsigned long flags) {bool insert = false;phys_addr_t obase = base;phys_addr_t end = base + memblock_cap_size(base, &size);int idx, nr_new;struct memblock_region *rgn;if (!size)return 0;第一次添加可用內(nèi)存到memory類型的memblock_region /* special case for empty array */if (type->regions[0].size == 0) {WARN_ON(type->cnt != 1 || type->total_size);type->regions[0].base = base;type->regions[0].size = size;type->regions[0].flags = flags;memblock_set_region_node(&type->regions[0], nid);type->total_size = size;return 0;} repeat:所有要添加的memblock region 都需要跑2次才能最終插入第一次是判斷是否需要插入,以及是否需要擴充存放memory region的數(shù)量大小第二次才是真正的添加操作base = obase;nr_new = 0;for_each_memblock_type(type, rgn) {phys_addr_t rbase = rgn->base;phys_addr_t rend = rbase + rgn->size;if (rbase >= end)break;找到需要輸入的節(jié)點位置if (rend <= base)continue;將非重疊的部分插入類型為memory的memblock_regionif (rbase > base) { #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAPWARN_ON(nid != memblock_get_region_node(rgn)); #endifWARN_ON(flags != rgn->flags);nr_new++;if (insert)memblock_insert_region(type, idx++, base,rbase - base, nid,flags);}/* area below @rend is dealt with, forget about it */base = min(rend, end);}插入剩余的部分 或者是 當前要添加的內(nèi)存區(qū)域需要插入到memblock_region的尾部if (base < end) {nr_new++;if (insert)memblock_insert_region(type, idx, base, end - base,nid, flags);}如果第一次執(zhí)行檢查發(fā)現(xiàn)要添加的內(nèi)存已經(jīng)全部重疊,則直接退出if (!nr_new)return 0;第一次執(zhí)行檢查是否需要調(diào)整內(nèi)存區(qū)數(shù)組大小,第二次執(zhí)行合并操作 if (!insert) {while (type->cnt + nr_new > type->max)if (memblock_double_array(type, obase, size) < 0)return -ENOMEM;insert = true;goto repeat;} else {檢查是否需要做合并的動作memblock_merge_regions(type);return 0;} }memblock_alloc
static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,phys_addr_t align, phys_addr_t start,phys_addr_t end, int nid) {phys_addr_t found;if (!align)align = SMP_CACHE_BYTES;在類型為memory的memblock_region中尋找合適的分配位置found = memblock_find_in_range_node(size, align, start, end, nid);將找到的內(nèi)存保存到類型為reserved的memblock_region中,代表已經(jīng)分配if (found && !memblock_reserve(found, size)) {/** The min_count is set to 0 so that memblock allocations are* never reported as leaks.*/檢查是否發(fā)生了memleakkmemleak_alloc(__va(found), size, 0, 0);return found;}return 0; }memblock_alloc 實際就是調(diào)用 memblock_alloc_range_nid 來實現(xiàn)分配
memblock_find_in_range_node
phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,phys_addr_t align, phys_addr_t start,phys_addr_t end, int nid) {phys_addr_t kernel_end, ret;/* pump up @end */if (end == MEMBLOCK_ALLOC_ACCESSIBLE)end = memblock.current_limit;永遠都不會分配第一個頁面start = max_t(phys_addr_t, start, PAGE_SIZE);end = max(start, end);kernel_end = __pa_symbol(_end);判斷分配的方向if (memblock_bottom_up() && end > kernel_end) {phys_addr_t bottom_up_start;/* make sure we will allocate above the kernel */bottom_up_start = max(start, kernel_end);/* ok, try bottom-up allocation first */ret = __memblock_find_range_bottom_up(bottom_up_start, end,size, align, nid);if (ret)return ret;}默認使用的是從上到下的方式尋找可用的內(nèi)存return __memblock_find_range_top_down(start, end, size, align, nid); }memblock_reserve
static int __init_memblock memblock_reserve_region(phys_addr_t base,phys_addr_t size,int nid,unsigned long flags) {struct memblock_type *_rgn = &memblock.reserved;memblock_dbg("memblock_reserve: [%#016llx-%#016llx] flags %#02lx %pF\n",(unsigned long long)base,(unsigned long long)base + size - 1,flags, (void *)_RET_IP_);return memblock_add_range(_rgn, base, size, nid, flags); }memblock_reserve 和 memblock_add 一樣也是調(diào)用memblock_add_range 來添加,只不過對象從memory變成了memblock.reserved.
總結
以上是生活随笔為你收集整理的深入浅出内存管理-memblock的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 讯时网站管理系统通杀0DAY漏洞
- 下一篇: XJTUSE专业课与实验指南(已经开源)