/*
 * relocate_kernel.S - put the kernel image in place to boot
 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
 *
 * This source code is licensed under the GNU General Public License,
 * Version 2. See the file COPYING for more details.
 */

#include <linux/linkage.h>
#include <asm/page.h>
#include <asm/kexec.h>

/*
 * Must be relocatable PIC code callable as a C function
 汇编存根代码执行下面的操作:
    *自栈中读取参数,并将它们存储到寄存器中,然后禁用中断。
    *使用以参数形式传递给自己的页地址(?????),在页的末端设置一个栈。
    *将新内核映像的起始地址存储到栈中,以使得存根代码的返回自动将系统引导到新的内核映像。
    *设置 cr0 寄存器的适当位来禁用内存分页。
    *将页目录基址寄存器 cr4 重设为 0。
    *清空快表(Translation Lookaside Buffers,TLB)。
    *将所有内核映像页拷贝到最终目标页。
    *再次清空 TLB。
    *将除了栈指针寄存器 esp(因为它指向容纳新内核起始地址的栈)以外的所有寄存器重设为 0。
    *自存根代码“返回”。自动将系统引导到新内核。
这一系列工作完成后,新内核获得控制权,然后系统正常引导起来//此处注释来源于https://www.ibm.com/developerworks/cn/linux/l-kexec/
 */

#define PTR(x) (x << 2)
#define PAGE_ALIGNED (1 << PAGE_SHIFT)
#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */

    .text
    .align PAGE_ALIGNED
    .globl relocate_kernel
relocate_kernel:
    movl 8(%esp), %ebp /* list of pages */

#ifdef CONFIG_X86_PAE
    //略去PAE部分
#else
    /* map the control page at its virtual address */
 page_list->+----------------------------------+ <--ebp
                    0 | PA_CONTROL_PAGE |
                       |-----------------------------------|
                    1 | VA_CONTROL_PAGE |
                       |-----------------------------------|
                    2 | PA_PGD |
                       |-----------------------------------|
                    3 | VA_PGD |
                       |-----------------------------------|
                   4 | PA_PTE_0 |
                       |-----------------------------------|
                    5 | VA_PTE_0 |
                       |-----------------------------------|
                    6 | PA_PTE_1 |
                       |-----------------------------------|
                    7 | VA_PTE_1 |
                        +---------------------------------+
    //其实就是将PA_PTE_0和PA_PTE_1的地址存到页全局目录PGD中(通过VA_PGD定位),PA_CONTROL_PAGE的物理地址分别存到页表PA_PTE_0和PA_PTE_1中,
    //使之可以通过PA和VA两种方式来访问PA_CONTROL_PAGE页
    movl PTR(VA_PGD)(%ebp), %edi
    movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
    andl $0xffc00000, %eax //取高10位,得到偏移量(单位为4字节)
    shrl $20, %eax //取VA_CONTROL_PAGE对应的页地址,取高12位(本该右移22位,但考虑之后还有X4字节得到页表在pgd的地址,才右移20位)
    addl %edi, %eax //与VA_PGD的页地址相加,这个应该是PA_PTE_0在全局页目录(VA_PGD)中对应的页目录项的地址

    movl PTR(PA_PTE_0)(%ebp), %edx
    orl $PAGE_ATTR, %edx //1.取PA_PTE_0对应的页地址,并加上末12位的页属性(0x63),然后存到上述eax对应的地址所代表的内存中
    movl %edx, (%eax)

    movl PTR(VA_PTE_0)(%ebp), %edi
    movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
    andl $0x003ff000, %eax //取中间10位
    shrl $10, %eax //本应右移12位,但考虑之后还有X4字节得到页在页表中的地址,才右移10位
    addl %edi, %eax //与VA_PTE_0的页地址相加,这个应该是VA_CONTROL_PAGE在页表(VA_PTE_0)中对应的页表项的地址

    movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
    orl $PAGE_ATTR, %edx
    movl %edx, (%eax) //2.取PA_CONTROL_PAGE对应的页地址,并加上末12位的页属性(0x63),然后存到上述eax对应的地址所代表的内存中

    /* identity map the control page at its physical address */

    movl PTR(VA_PGD)(%ebp), %edi
    movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
    andl $0xffc00000, %eax
    shrl $20, %eax
    addl %edi, %eax

    movl PTR(PA_PTE_1)(%ebp), %edx
    orl $PAGE_ATTR, %edx
    movl %edx, (%eax) //3.

    movl PTR(VA_PTE_1)(%ebp), %edi
    movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
    andl $0x003ff000, %eax
    shrl $10, %eax
    addl %edi, %eax

    movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
    orl $PAGE_ATTR, %edx
    movl %edx, (%eax) //4.
#endif

relocate_new_kernel:
    /* read the arguments and say goodbye to the stack *///马上就要切换页表了
       //自栈中读取参数,并将它们存储到寄存器中,然后禁用中断
    +-----------------------+
    | ... . |
    |-------------------------|esp+16
    | cpu_has_pae |
    |-------------------------|esp+12
    | start_address |
    |-------------------------|esp+8
    | list_of_page |
    |-------------------------|esp+4
    | page_list |
    +-----------------------+esp
    movl 4(%esp), %ebx /* page_list */
    movl 8(%esp), %ebp /* list of pages */
    movl 12(%esp), %edx /* start address */
    movl 16(%esp), %ecx /* cpu_has_pae */

    /* zero out flags, and disable interrupts */
    pushl $0
    popfl

    /* get physical address of control page now */
    /* this is impossible after page table switch */
    movl PTR(PA_CONTROL_PAGE)(%ebp), %edi

    /* switch to new set of page tables *///切换页表,和旧内核byebye了
    movl PTR(PA_PGD)(%ebp), %eax
    movl %eax, %cr3

    /* setup a new stack at the end of the physical control page *///在页的末端设置一个栈
    lea 4096(%edi), %esp

    /* jump to identity mapped page *///identity_mapped,relocate_kernel以及此时的栈应该在同一页内
    movl %edi, %eax
    addl $(identity_mapped - relocate_kernel), %eax
    pushl %eax
    ret

identity_mapped:
    /* store the start address on the stack */
    pushl %edx //执行完identity_mapped后,弹出此处压栈的edx(新内核入口函数)并执行,对应最后的ret

       //设置 cr0 寄存器的适当位来禁用内存分页;将页目录基址寄存器 cr4 重设为 0。
    /* Set cr0 to a known state:
     * 31 0 == Paging disabled
     * 18 0 == Alignment check disabled
     * 16 0 == Write protect disabled
     * 3 0 == No task switch
     * 2 0 == Don't do FP software emulation.
     * 0 1 == Proctected mode enabled
     */
    movl %cr0, %eax
    andl $~((1<<31)|(1<<18)|(1<<16)|(1<<3)|(1<<2)), %eax
    orl $(1<<0), %eax
    movl %eax, %cr0 //设置cr0寄存器的适当位来禁用内存分页。

    /* clear cr4 if applicable */
    testl %ecx, %ecx
    jz 1f
    /* Set cr4 to a known state:
     * Setting everything to zero seems safe.
     */
    movl %cr4, %eax
    andl $0, %eax
    movl %eax, %cr4 //将页目录基址寄存器cr4重设为 0。

    jmp 1f
1:

    /* Flush the TLB (needed?) *///清空快表(Translation Lookaside Buffers)
    xorl %eax, %eax
    movl %eax, %cr3
//#将所有内核映像页拷贝到最终目标页??????############

//按照网上一些资料,应该是将旧内核映像拷贝到其他内存中、覆盖旧内核,然后启动新内核。 这个看的还不白

    /* Do the copies */
    movl %ebx, %ecx //%ecx=%ebx=image->head
    jmp 1f

0: /* top, read another word from the indirection page */
    movl (%ebx), %ecx
    addl $4, %ebx
1:
    testl $0x1, %ecx /* is it a destination page */
    jz 2f
    movl %ecx, %edi
    andl $0xfffff000, %edi
    jmp 0b
2:
    testl $0x2, %ecx /* is it an indirection page */
    jz 2f
    movl %ecx, %ebx
    andl $0xfffff000, %ebx
    jmp 0b
2:
    testl $0x4, %ecx /* is it the done indicator */
    jz 2f
    jmp 3f
2:
    testl $0x8, %ecx /* is it the source indicator */
    jz 0b /* Ignore it otherwise */
    movl %ecx, %esi /* For every source page do a copy */
    andl $0xfffff000, %esi

    movl $1024, %ecx
    rep ; movsl //重复移动!!!!!!!
    jmp 0b
//#############################################

3:

    /* To be certain of avoiding problems with self-modifying code
     * I need to execute a serializing instruction here.
     * So I flush the TLB, it'
s handy, and not processor dependent.//再次清空 TLB
     */
    xorl %eax, %eax
    movl %eax, %cr3

    /* set all of the registers to known values */
    /* leave %esp alone *///将除了栈指针寄存器 esp(因为它指向容纳新内核起始地址的栈)以外的所有寄存器重设为 0。

    xorl %eax, %eax
    xorl %ebx, %ebx
    xorl %ecx, %ecx
    xorl %edx, %edx
    xorl %esi, %esi
    xorl %edi, %edi
    xorl %ebp, %ebp
    ret //自存根代码“返回”。自动将系统引导到新内核




References:
https://www.ibm.com/developerworks/cn/linux/l-kexec/
http://cncc.bingj.com/cache.aspx?q=linux%E5%86%85%E6%A0%B8%E8%B0%83%E8%AF%95++shangshuwu&d=4961035422072913&mkt=zh-CN&setlang=zh-CN&w=c6c2978d,2e8ac776
Logo

开放原子开发者工作坊旨在鼓励更多人参与开源活动,与志同道合的开发者们相互交流开发经验、分享开发心得、获取前沿技术趋势。工作坊有多种形式的开发者活动,如meetup、训练营等,主打技术交流,干货满满,真诚地邀请各位开发者共同参与!

更多推荐