进程创建的时候Linux内核会创建内核栈(arm手册也要求内核态有单独的栈),如应用进程在用户态通过系统调用陷入内核态的时候,上下文信息(如cpu寄存器)需要有个地方保存,如此,从内核态切换回用户态时候,能继续从系统调用之后的代码开始执行,这个保存的地方就是进程的内核栈,本文主要描述arm32下内核栈的生成过程和结构。
1.内核栈数据结构
正如进程在用户态执行函数跳转有一个栈,在内核态执行的时候同样有一个内核态的栈,分成两个栈也是处于安全的考虑,如果都使用用户态的栈,那么内核的数据可以被应用态访问不安全。我们不禁要问如下几个问题:
- 内核栈大小/结构/创建过程
- 怎么找到内核栈(哪些数据结构和API可以索引到)
标识进程的核心数据结构task_struct中有一个void *stack成员指向进程内核栈:
struct task_struct {
#ifdef CONFIG_THREAD_INFO_IN_TASK
/*
* For reasons of header soup (see current_thread_info()), this
* must be the first element of task_struct.
*/
struct thread_info thread_info;
#endif
void * stack;
...
}
目前平台没有配置 CONFIG_THREAD_INFO_IN_TASK,所以thread_info放在了stack指向的内存中,thread_info中存储了体系结构相关的信息,arm32 内核栈大小8KB:
//ARM架构 , 8K
#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define THREAD_START_SP (THREAD_SIZE - 8)
2.内核栈相关的API和数据结构
- task_stack_page
static inline void *task_stack_page(const struct task_struct *task)
{
return task->stack;
}
- task_pt_regs
#define task_pt_regs(p) \
((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
- pt_regs
struct pt_regs {
unsigned long uregs[18];
};
#define ARM_cpsr uregs[16]
#define ARM_pc uregs[15]
#define ARM_lr uregs[14]
#define ARM_sp uregs[13]
#define ARM_ip uregs[12]
#define ARM_fp uregs[11]
#define ARM_r10 uregs[10]
#define ARM_r9 uregs[9]
#define ARM_r8 uregs[8]
#define ARM_r7 uregs[7]
#define ARM_r6 uregs[6]
#define ARM_r5 uregs[5]
#define ARM_r4 uregs[4]
#define ARM_r3 uregs[3]
#define ARM_r2 uregs[2]
#define ARM_r1 uregs[1]
#define ARM_r0 uregs[0]
#define ARM_ORIG_r0 uregs[17]
进程从用户态陷入内核态时候,用户态的上下文信息保存在pt_regs数据结构中。
- struct thread_info
/*
* low level task data that entry.S needs immediate access to.
* __switch_to() assumes cpu_context follows immediately after cpu_domain.
*/
struct thread_info {
unsigned long flags; /* low level flags */
int preempt_count; /* 0 => preemptable, <0 => bug */
mm_segment_t addr_limit; /* address limit */
struct task_struct *task; /* main task structure */
__u32 cpu; /* cpu */
__u32 cpu_domain; /* cpu domain */
struct cpu_context_save cpu_context; /* cpu context */
__u32 syscall; /* syscall number */
__u8 used_cp[16]; /* thread used copro */
unsigned long tp_value[2]; /* TLS registers */
#ifdef CONFIG_CRUNCH
struct crunch_state crunchstate;
#endif
union fp_state fpstate __attribute__((aligned(8)));
union vfp_state vfpstate;
#ifdef CONFIG_ARM_THUMBEE
unsigned long thumbee_state; /* ThumbEE Handler Base register */
#endif
void *regs_on_excp; /* aee */
int cpu_excp; /* aee */
};
struct cpu_context_save {
__u32 r4;
__u32 r5;
__u32 r6;
__u32 r7;
__u32 r8;
__u32 r9;
__u32 sl;
__u32 fp;
__u32 sp;
__u32 pc;
__u32 extra[2]; /* Xscale 'acc' register, etc */
};
3.内核态SP寄存器
我们知道进程在内核态执行的时候,sp寄存器指向了内核栈,为什么内核的sp寄存器指向进程内核栈?这是什么时候设置的?
答案:进程上下文切换的时候(switch_to汇编)
首先进程创建的时候,在copy_thread会创建内核栈,并将内核栈地址保存在thread_info->cpu_context中,代码如下:
//参数p时指新建进程的task_struct
int
copy_thread(unsigned long clone_flags, unsigned long stack_start,
{
struct thread_info *thread = task_thread_info(p);
struct pt_regs *childregs = task_pt_regs(p);
memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
#ifdef CONFIG_CPU_USE_DOMAINS
/*
* Copy the initial value of the domain access control register
* from the current thread: thread->addr_limit will have been
* copied from the current thread via setup_thread_stack() in
* kernel/fork.c
*/
thread->cpu_domain = get_domain();
#endif
if (likely(!(p->flags & PF_KTHREAD))) {
*childregs = *current_pt_regs();
childregs->ARM_r0 = 0;
if (stack_start)
childregs->ARM_sp = stack_start;
} else {
memset(childregs, 0, sizeof(struct pt_regs));
thread->cpu_context.r4 = stk_sz;
thread->cpu_context.r5 = stack_start;
childregs->ARM_cpsr = SVC_MODE;
}
thread->cpu_context.pc = (unsigned long)ret_from_fork;
thread->cpu_context.sp = (unsigned long)childregs;
clear_ptrace_hw_breakpoint(p);
if (clone_flags & CLONE_SETTLS)
thread->tp_value[0] = childregs->ARM_r3;
thread->tp_value[1] = get_tpuser();
thread_notify(THREAD_NOTIFY_COPY, thread);
return 0;
}
thread->cpu_context.pc = (unsigned long) ret_from_fork设置新建进程的执行入口时ret_from_frok函数。
thread->cpu_context.sp = (unsigned long)childregs;thread_info成员cpu_context的sp成员指向了内核栈的pt_regs数据结构,pt_regs保存了用户态的通用寄存器。
上下文切换switch_to函数会将thread->cpu_context.sp设置到cpu的寄存器中,那么其中的sp就设置了内核态的sp寄存器中:
/*
* Register switch for ARMv3 and ARMv4 processors
* r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info
* previous and next are guaranteed not to be the same.
*/
ENTRY(__switch_to)
UNWIND(.fnstart )
UNWIND(.cantunwind )
add ip, r1, #TI_CPU_SAVE @ip指向被换出进程的thread_info->cpu_context
ARM( stmia ip!, {r4 - sl, fp, sp, lr} ) @ Store most regs on stack,即保存到cpu_context中
THUMB( stmia ip!, {r4 - sl, fp} ) @ Store most regs on stack
THUMB( str sp, [ip], #4 )
THUMB( str lr, [ip], #4 )
ldr r4, [r2, #TI_TP_VALUE]
ldr r5, [r2, #TI_TP_VALUE + 4]
#ifdef CONFIG_CPU_USE_DOMAINS
mrc p15, 0, r6, c3, c0, 0 @ Get domain register
str r6, [r1, #TI_CPU_DOMAIN] @ Save old domain register
ldr r6, [r2, #TI_CPU_DOMAIN]
#endif
switch_tls r1, r4, r5, r3, r7
#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
ldr r7, [r2, #TI_TASK]
ldr r8, =__stack_chk_guard
.if (TSK_STACK_CANARY > IMM12_MASK)
add r7, r7, #TSK_STACK_CANARY & ~IMM12_MASK
.endif
ldr r7, [r7, #TSK_STACK_CANARY & IMM12_MASK]
#endif
#ifdef CONFIG_CPU_USE_DOMAINS
mcr p15, 0, r6, c3, c0, 0 @ Set domain register
#endif
mov r5, r0
add r4, r2, #TI_CPU_SAVE @r4指向换入进程的cpu_context
ldr r0, =thread_notify_head
mov r1, #THREAD_NOTIFY_SWITCH
bl atomic_notifier_call_chain
#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
str r7, [r8]
#endif
THUMB( mov ip, r4 )
mov r0, r5
ARM( ldmia r4, {r4 - sl, fp, sp, pc} ) @ Load all regs saved previously,即将cpu_context中值加载到cpu寄存器中
THUMB( ldmia ip!, {r4 - sl, fp} ) @ Load all regs saved previously
THUMB( ldr sp, [ip], #4 )
THUMB( ldr pc, [ip] )
UNWIND(.fnend )
ENDPROC(__switch_to)
ARM( ldmia r4, {r4 - sl, fp, sp, pc} )会将进程thread_info->cpu_context中的值加载到cpu寄存器执行,上面分析我们知道进程创建的时候,thread->cpu_context.sp = (unsigned long)childregs,这样childregs值会加载到cpu sp寄存器,即内核态下sp指向了内核栈(更具体的说是内核栈中的pt_regs)