在内存启动最开始时,bios的起主导作用,内核通过它来了解物理内存的情况。同时调用bios过程建立一组物理地址范围和其对应的内存类型。
通过函数machine_specific_memory_setup()函数来建立物理地址映射。此表可以获取,是内核在bios列表的基础上构建的;否则内核按照保守的缺省设置构建这张表:从0x9f(lowmemsize())到0x100(high_memory)号的所有页框都标记为保留。setup_memory函数构架这张表。

static unsigned long __init setup_memory(void)
{
        unsigned long bootmap_size, start_pfn, max_low_pfn;
        start_pfn = PFN_UP(init_pg_tables_end);
        find_max_pfn();
        max_low_pfn = find_max_low_pfn();
        bootmap_size = init_bootmem(start_pfn, max_low_pfn);
        register_bootmem_low_pages(max_low_pfn);
        reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
        reserve_bootmem(0, PAGE_SIZE);此函数用来保留页面,方法是将保留位变成1.
        reserve_ebda_region();
        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86 == 6)
             reserve_bootmem(0xa0000 - 4096, 4096);
        return max_low_pfn;
}

函数经过简化变成最基本的形态。
其中,ul分别是bootmap_size(bit map的size),start_pfn(开始的页框),max_low_pfn(被内核直接映射的最后一个页框的页框号) ;
#define PFN_UP(x)       (((x) + PAGE_SIZE - 1) >> PAGE_SHIFT);
unsigned long init_pg_tables_end __initdata = ~0UL
三个相关的宏:
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
#define PFN_PHYS(x) ((x) << PAGE_SHIFT)

上面三个宏的作用是:
前两个是将地址x转换为页面号(PFN即Page Frame Number的缩写),
二者之间的区别为:PFN_UP()返回大于x的第一个页面号,而PFN_DOWN()返回小于x的第一个页面号。
宏PFN_PHYS()返回页面号x的物理地址。
所以这里的start_pfn也是如此。里面装载的是内核映像之后的起始页面号。

而接下来的函数则是对于e820成果的应用。目的很明确,顺次遍历e820的区间。找到是ram类型的内存区。将最后一个内存号放到max_pfn中。
void __init find_max_pfn(void)
{
        int i;
        max_pfn = 0;
        for (i = 0; i < e820.nr_map; i++) {
                unsigned long start, end;
                /* RAM? */
                if (e820.map[i].type != E820_RAM)
                        continue;
                start = PFN_UP(e820.map[i].addr);
                end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
                if (start >= end)
                        continue;
                if (end > max_pfn)
                        max_pfn = end;
        }
}
接下来的工作是找ram中在内核映像后的第一个可用页框的页框号。
max_low_pfn = find_max_low_pfn();
#define MAXMEM_PFN      PFN_DOWN(MAXMEM)
#define MAXMEM      (-__PAGE_OFFSET-__VMALLOC_RESERVE)

unsigned long __init find_max_low_pfn(void)
{
        unsigned long max_low_pfn;
        max_low_pfn = max_pfn;
        if (max_low_pfn > MAXMEM_PFN) {
                if (highmem_pages == -1)
                        highmem_pages = max_pfn - MAXMEM_PFN;
                if (highmem_pages + MAXMEM_PFN < max_pfn)
                        max_pfn = MAXMEM_PFN + highmem_pages;
                if (highmem_pages + MAXMEM_PFN > max_pfn) {
                        printk("only %luMB highmem pages available, ignoring highmem size of %uMB./n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
                        highmem_pages = 0;
                }
                max_low_pfn = MAXMEM_PFN;
        } else {
                if (highmem_pages == -1)
                        highmem_pages = 0;
        }
        return max_low_pfn;
}
/mm/bootmem.c
bootmem(bootmem allocator)的结构,仅仅用在系统引导时,它为整个物理内存建立起一个页面位图。
这个位图建立在从start_pfn开始的地方,到内核映象最后一个页框。这个位图用来管理低区(例如小于896MB),因为在0到896MB的范围内,有些页面可能保留,有些页面可能有空洞,因此,建立这个位图的目的就是要搞清楚哪一些物理页面是可以动态分配的。

 29 typedef struct bootmem_data {
 30         unsigned long node_boot_start;存放位图的第一个页面(内核映像结束处的第一个页面)
 31         unsigned long node_low_pfn;表示物理内存的顶点,最高不超过896mb.
 32         void *node_bootmem_map;指向bootmem位图
 33         unsigned long last_offset;用来存放前一次分配中所分配的最后一个字节相对于last_pos的位移量
 34         unsigned long last_pos;用来存放前一次分配的最后一个页面的页面号。用在__alloc_bootmem_core函数中,通过合并相邻的内存来减少内部碎片。
 35         unsigned long last_success;     /* Previous allocation point.  To speed up searching */
 37 } bootmem_data_t;

250 typedef struct pglist_data {
251         struct zone node_zones[MAX_NR_ZONES];
252         struct zonelist node_zonelists[GFP_ZONETYPES];
253         int nr_zones;
254         struct page *node_mem_map;
255         struct bootmem_data *bdata;
256         unsigned long node_start_pfn;
257         unsigned long node_present_pages; /* total number of physical pages */
258         unsigned long node_spanned_pages; /* total size of physical page range, including holes */
260         int node_id;
261         struct pglist_data *pgdat_next;
262         wait_queue_head_t kswapd_wait;
263         struct task_struct *kswapd;
264         int kswapd_max_order;
265 } pg_data_t;

可以看出,是对于pglist_data指针数组node_data的操作。
bootmap_size = init_bootmem(start_pfn, max_low_pfn);
unsigned long __init init_bootmem (unsigned long start, unsigned long pages)
{
        max_low_pfn = pages;
        min_low_pfn = start;
        return(init_bootmem_core(NODE_DATA(0), start, 0, pages));
}

 24 extern struct pglist_data *node_data[];
 25 #define NODE_DATA(nid)          (node_data[nid])

static unsigned long __init init_bootmem_core (pg_data_t *pgdat,unsigned long mapstart, unsigned long start, unsigned long end)
{
        bootmem_data_t *bdata = pgdat->bdata;
        unsigned long mapsize = ((end - start)+7)/8;现有页面数,然后向上取整,获得所需要的字节数。

        pgdat->pgdat_next = pgdat_list;各个节点组成链表,这是链表头。指向自己,目前循环链表中只有一个节点。
        pgdat_list = pgdat;

        mapsize = (mapsize + (sizeof(long) - 1UL)) & ~(sizeof(long) - 1UL);此为使得结果变成4的倍数。
        bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);将物理地址转化为虚地址。
        bdata->node_boot_start = (start << PAGE_SHIFT);用节点的起始物理地址初始化此元素
        bdata->node_low_pfn = end;用物理内存节点的页面号初始化此元素

        /*
         * Initially all pages are reserved - setup_arch() has to
         * register free RAM areas explicitly.
         */
        memset(bdata->node_bootmem_map, 0xff, mapsize);

        return mapsize;
}

此函数顺次遍历e820上记录的每个空间,当不是ram时跳过,找到ram,对齐边界,将给定范围的页面标记为空闲,将位图中某些位清零。

977 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
978 {
979         int i;
985         for (i = 0; i < e820.nr_map; i++) {
986                 unsigned long curr_pfn, last_pfn, size;
987                 /*Reserve usable low memory*/
990                 if (e820.map[i].type != E820_RAM)
991                         continue;
992                 /*We are rounding up the start address of usable memory:*/
995                 curr_pfn = PFN_UP(e820.map[i].addr);
996                 if (curr_pfn >= max_low_pfn)
997                         continue;
998                 /*... and at the end of the usable range downwards:*/
1001                 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1002
1003                 if (last_pfn > max_low_pfn)
1004                         last_pfn = max_low_pfn;
1005                  /*.. finally, did all the rounding and playing around just make the area go away?*/
1010                 if (last_pfn <= curr_pfn)
1011                         continue;
1012                 size = last_pfn - curr_pfn;
1014                 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1015         }
1016 }
通过宏将当前物理地址和尺寸传入函数。
362 void __init free_bootmem (unsigned long addr, unsigned long size)
363 {
364         free_bootmem_core(NODE_DATA(0)->bdata, addr, size);
365 }
计算页面总数,eidx等于当前地址加尺寸减去节点开始地址的差除以页面单位尺寸。然后求出最后一个页面号。然后start中是是第一个页面号。sidx(start index)初始化为相对于node_boot_start.的页面号。 将空白位标记。
104 static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size)
105 {
106         unsigned long i;
107         unsigned long start;
112         unsigned long sidx;
113         unsigned long eidx = (addr + size - bdata->node_boot_start)/PAGE_SIZE;
114         unsigned long end = (addr + size)/PAGE_SIZE;
115
116         BUG_ON(!size);
117         BUG_ON(end > bdata->node_low_pfn);
118
119         if (addr < bdata->last_success)
120                 bdata->last_success = addr;
125         start = (addr + PAGE_SIZE-1) / PAGE_SIZE;
126         sidx = start - (bdata->node_boot_start/PAGE_SIZE);
127
128         for (i = sidx; i < eidx; i++) {
129                 if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map)))
130                         BUG();
131         }
132 }

Logo

瓜分20万奖金 获得内推名额 丰厚实物奖励 易参与易上手

更多推荐