* 18(%esp) - %eax
* 1C(%esp) - %ds
* 20(%esp) - %es
- * 24(%esp) - %gs
+ * 24(%esp) - %fs
* 28(%esp) - orig_eax
* 2C(%esp) - %eip
* 30(%esp) - %cs
#include <asm/dwarf2.h>
#include "irq_vectors.h"
+/*
+ * We use macros for low-level operations which need to be overridden
+ * for paravirtualization. The following will never clobber any registers:
+ * INTERRUPT_RETURN (aka. "iret")
+ * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
+ * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
+ *
+ * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
+ * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
+ * Allowing a register to be clobbered can shrink the paravirt replacement
+ * enough to patch inline, increasing performance.
+ */
+
#define nr_syscalls ((syscall_table_size)/4)
CF_MASK = 0x00000001
NT_MASK = 0x00004000
VM_MASK = 0x00020000
-/* These are replaces for paravirtualization */
-#define DISABLE_INTERRUPTS cli
-#define ENABLE_INTERRUPTS sti
-#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
-#define INTERRUPT_RETURN iret
-#define GET_CR0_INTO_EAX movl %cr0, %eax
-
#ifdef CONFIG_PREEMPT
-#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
+#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
-#define preempt_stop
+#define preempt_stop(clobbers)
#define resume_kernel restore_nocheck
#endif
#define SAVE_ALL \
cld; \
- pushl %gs; \
+ pushl %fs; \
CFI_ADJUST_CFA_OFFSET 4;\
- /*CFI_REL_OFFSET gs, 0;*/\
+ /*CFI_REL_OFFSET fs, 0;*/\
pushl %es; \
CFI_ADJUST_CFA_OFFSET 4;\
/*CFI_REL_OFFSET es, 0;*/\
movl %edx, %ds; \
movl %edx, %es; \
movl $(__KERNEL_PDA), %edx; \
- movl %edx, %gs
+ movl %edx, %fs
#define RESTORE_INT_REGS \
popl %ebx; \
2: popl %es; \
CFI_ADJUST_CFA_OFFSET -4;\
/*CFI_RESTORE es;*/\
-3: popl %gs; \
+3: popl %fs; \
CFI_ADJUST_CFA_OFFSET -4;\
- /*CFI_RESTORE gs;*/\
+ /*CFI_RESTORE fs;*/\
.pushsection .fixup,"ax"; \
4: movl $0,(%esp); \
jmp 1b; \
CFI_ADJUST_CFA_OFFSET -4
jmp syscall_exit
CFI_ENDPROC
+END(ret_from_fork)
/*
* Return to user mode is not as complex as all this looks,
ALIGN
RING0_PTREGS_FRAME
ret_from_exception:
- preempt_stop
+ preempt_stop(CLBR_ANY)
ret_from_intr:
GET_THREAD_INFO(%ebp)
check_userspace:
jb resume_kernel # not returning to v8086 or userspace
ENTRY(resume_userspace)
- DISABLE_INTERRUPTS # make sure we don't miss an interrupt
+ DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
movl TI_flags(%ebp), %ecx
# int/exception return?
jne work_pending
jmp restore_all
+END(ret_from_exception)
#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
- DISABLE_INTERRUPTS
+ DISABLE_INTERRUPTS(CLBR_ANY)
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
jnz restore_nocheck
need_resched:
jz restore_all
call preempt_schedule_irq
jmp need_resched
+END(resume_kernel)
#endif
CFI_ENDPROC
* No need to follow this irqs on/off section: the syscall
* disabled irqs and here we enable it straight after entry:
*/
- ENABLE_INTERRUPTS
+ ENABLE_INTERRUPTS(CLBR_NONE)
pushl $(__USER_DS)
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET ss, 0*/
pushl $(__USER_CS)
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET cs, 0*/
+#ifndef CONFIG_COMPAT_VDSO
/*
* Push current_thread_info()->sysenter_return to the stack.
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words
* pushed above; +8 corresponds to copy_thread's esp0 setting.
*/
pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
+#else
+ pushl $SYSENTER_RETURN
+#endif
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET eip, 0
jae syscall_badsys
call *sys_call_table(,%eax,4)
movl %eax,PT_EAX(%esp)
- DISABLE_INTERRUPTS
+ DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX)
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx
movl PT_OLDESP(%esp), %ecx
xorl %ebp,%ebp
TRACE_IRQS_ON
-1: mov PT_GS(%esp), %gs
+1: mov PT_FS(%esp), %fs
ENABLE_INTERRUPTS_SYSEXIT
CFI_ENDPROC
.pushsection .fixup,"ax"
-2: movl $0,PT_GS(%esp)
+2: movl $0,PT_FS(%esp)
jmp 1b
.section __ex_table,"a"
.align 4
.long 1b,2b
.popsection
+ENDPROC(sysenter_entry)
# system call handler stub
ENTRY(system_call)
call *sys_call_table(,%eax,4)
movl %eax,PT_EAX(%esp) # store the return value
syscall_exit:
- DISABLE_INTERRUPTS # make sure we don't miss an interrupt
+ DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
TRACE_IRQS_OFF
.section .fixup,"ax"
iret_exc:
TRACE_IRQS_ON
- ENABLE_INTERRUPTS
+ ENABLE_INTERRUPTS(CLBR_NONE)
pushl $0 # no error code
pushl $do_iret_error
jmp error_code
jnz restore_nocheck
testl $0x00400000, %eax # returning to 32bit stack?
jnz restore_nocheck # allright, normal return
+
+#ifdef CONFIG_PARAVIRT
+ /*
+ * The kernel can't run on a non-flat stack if paravirt mode
+ * is active. Rather than try to fixup the high bits of
+ * ESP, bypass this code entirely. This may break DOSemu
+ * and/or Wine support in a paravirt VM, although the option
+ * is still available to implement the setting of the high
+ * 16-bits in the INTERRUPT_RETURN paravirt-op.
+ */
+ cmpl $0, paravirt_ops+PARAVIRT_enabled
+ jne restore_nocheck
+#endif
+
/* If returning to userspace with 16bit stack,
* try to fix the higher word of ESP, as the CPU
* won't restore it.
CFI_ADJUST_CFA_OFFSET 4
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
- DISABLE_INTERRUPTS
+ DISABLE_INTERRUPTS(CLBR_EAX)
TRACE_IRQS_OFF
lss (%esp), %esp
CFI_ADJUST_CFA_OFFSET -8
jmp restore_nocheck
CFI_ENDPROC
+ENDPROC(system_call)
# perform work that needs to be done immediately before resumption
ALIGN
jz work_notifysig
work_resched:
call schedule
- DISABLE_INTERRUPTS # make sure we don't miss an interrupt
+ DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
TRACE_IRQS_OFF
work_notifysig: # deal with pending signals and
# notify-resume requests
+#ifdef CONFIG_VM86
testl $VM_MASK, PT_EFLAGS(%esp)
movl %esp, %eax
jne work_notifysig_v86 # returning to kernel-space or
ALIGN
work_notifysig_v86:
-#ifdef CONFIG_VM86
pushl %ecx # save ti_flags for do_notify_resume
CFI_ADJUST_CFA_OFFSET 4
call save_v86_state # %eax contains pt_regs pointer
popl %ecx
CFI_ADJUST_CFA_OFFSET -4
movl %eax, %esp
+#else
+ movl %esp, %eax
+#endif
xorl %edx, %edx
call do_notify_resume
jmp resume_userspace_sig
-#endif
+END(work_pending)
# perform syscall exit tracing
ALIGN
cmpl $(nr_syscalls), %eax
jnae syscall_call
jmp syscall_exit
+END(syscall_trace_entry)
# perform syscall exit tracing
ALIGN
testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
jz work_pending
TRACE_IRQS_ON
- ENABLE_INTERRUPTS # could let do_syscall_trace() call
+ ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call
# schedule() instead
movl %esp, %eax
movl $1, %edx
call do_syscall_trace
jmp resume_userspace
+END(syscall_exit_work)
CFI_ENDPROC
RING0_INT_FRAME # can't unwind into user space anyway
GET_THREAD_INFO(%ebp)
movl $-EFAULT,PT_EAX(%esp)
jmp resume_userspace
+END(syscall_fault)
syscall_badsys:
movl $-ENOSYS,PT_EAX(%esp)
jmp resume_userspace
+END(syscall_badsys)
CFI_ENDPROC
#define FIXUP_ESPFIX_STACK \
/* since we are on a wrong stack, we cant make it a C code :( */ \
- GET_THREAD_INFO(%ebp); \
- movl TI_cpu(%ebp), %ebx; \
+ movl %fs:PDA_cpu, %ebx; \
PER_CPU(cpu_gdt_descr, %ebx); \
movl GDS_address(%ebx), %ebx; \
GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
ENTRY(interrupt)
.text
-vector=0
ENTRY(irq_entries_start)
RING0_INT_FRAME
+vector=0
.rept NR_IRQS
ALIGN
.if vector
1: pushl $~(vector)
CFI_ADJUST_CFA_OFFSET 4
jmp common_interrupt
-.data
+ .previous
.long 1b
-.text
+ .text
vector=vector+1
.endr
+END(irq_entries_start)
+
+.previous
+END(interrupt)
+.previous
/*
* the CPU automatically disables interrupts when executing an IRQ vector,
movl %esp,%eax
call do_IRQ
jmp ret_from_intr
+ENDPROC(common_interrupt)
CFI_ENDPROC
#define BUILD_INTERRUPT(name, nr) \
movl %esp,%eax; \
call smp_/**/name; \
jmp ret_from_intr; \
- CFI_ENDPROC
+ CFI_ENDPROC; \
+ENDPROC(name)
/* The include is where all of the SMP etc. interrupts come from */
#include "entry_arch.h"
+/* This alternate entry is needed because we hijack the apic LVTT */
+#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC)
+BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR)
+#endif
+
KPROBE_ENTRY(page_fault)
RING0_EC_FRAME
pushl $do_page_fault
CFI_ADJUST_CFA_OFFSET 4
ALIGN
error_code:
- /* the function address is in %gs's slot on the stack */
+ /* the function address is in %fs's slot on the stack */
pushl %es
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET es, 0*/
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebx, 0
cld
- pushl %gs
+ pushl %fs
CFI_ADJUST_CFA_OFFSET 4
- /*CFI_REL_OFFSET gs, 0*/
+ /*CFI_REL_OFFSET fs, 0*/
movl $(__KERNEL_PDA), %ecx
- movl %ecx, %gs
+ movl %ecx, %fs
UNWIND_ESPFIX_STACK
popl %ecx
CFI_ADJUST_CFA_OFFSET -4
/*CFI_REGISTER es, ecx*/
- movl PT_GS(%esp), %edi # get the function address
+ movl PT_FS(%esp), %edi # get the function address
movl PT_ORIG_EAX(%esp), %edx # get the error code
movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
- mov %ecx, PT_GS(%esp)
- /*CFI_REL_OFFSET gs, ES*/
+ mov %ecx, PT_FS(%esp)
+ /*CFI_REL_OFFSET fs, ES*/
movl $(__USER_DS), %ecx
movl %ecx, %ds
movl %ecx, %es
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
+END(coprocessor_error)
ENTRY(simd_coprocessor_error)
RING0_INT_FRAME
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
+END(simd_coprocessor_error)
ENTRY(device_not_available)
RING0_INT_FRAME
GET_CR0_INTO_EAX
testl $0x4, %eax # EM (math emulation bit)
jne device_not_available_emulate
- preempt_stop
+ preempt_stop(CLBR_ANY)
call math_state_restore
jmp ret_from_exception
device_not_available_emulate:
CFI_ADJUST_CFA_OFFSET -4
jmp ret_from_exception
CFI_ENDPROC
+END(device_not_available)
/*
* Debug traps and NMI can happen at the one SYSENTER instruction
.previous
KPROBE_END(nmi)
+#ifdef CONFIG_PARAVIRT
+ENTRY(native_iret)
+1: iret
+.section __ex_table,"a"
+ .align 4
+ .long 1b,iret_exc
+.previous
+END(native_iret)
+
+ENTRY(native_irq_enable_sysexit)
+ sti
+ sysexit
+END(native_irq_enable_sysexit)
+#endif
+
KPROBE_ENTRY(int3)
RING0_INT_FRAME
pushl $-1 # mark this as an int
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
+END(overflow)
ENTRY(bounds)
RING0_INT_FRAME
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
+END(bounds)
ENTRY(invalid_op)
RING0_INT_FRAME
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
+END(invalid_op)
ENTRY(coprocessor_segment_overrun)
RING0_INT_FRAME
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
+END(coprocessor_segment_overrun)
ENTRY(invalid_TSS)
RING0_EC_FRAME
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
+END(invalid_TSS)
ENTRY(segment_not_present)
RING0_EC_FRAME
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
+END(segment_not_present)
ENTRY(stack_segment)
RING0_EC_FRAME
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
+END(stack_segment)
KPROBE_ENTRY(general_protection)
RING0_EC_FRAME
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
+END(alignment_check)
ENTRY(divide_error)
RING0_INT_FRAME
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
+END(divide_error)
#ifdef CONFIG_X86_MCE
ENTRY(machine_check)
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
+END(machine_check)
#endif
ENTRY(spurious_interrupt_bug)
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
-
-#ifdef CONFIG_STACK_UNWIND
-ENTRY(arch_unwind_init_running)
- CFI_STARTPROC
- movl 4(%esp), %edx
- movl (%esp), %ecx
- leal 4(%esp), %eax
- movl %ebx, PT_EBX(%edx)
- xorl %ebx, %ebx
- movl %ebx, PT_ECX(%edx)
- movl %ebx, PT_EDX(%edx)
- movl %esi, PT_ESI(%edx)
- movl %edi, PT_EDI(%edx)
- movl %ebp, PT_EBP(%edx)
- movl %ebx, PT_EAX(%edx)
- movl $__USER_DS, PT_DS(%edx)
- movl $__USER_DS, PT_ES(%edx)
- movl $0, PT_GS(%edx)
- movl %ebx, PT_ORIG_EAX(%edx)
- movl %ecx, PT_EIP(%edx)
- movl 12(%esp), %ecx
- movl $__KERNEL_CS, PT_CS(%edx)
- movl %ebx, PT_EFLAGS(%edx)
- movl %eax, PT_OLDESP(%edx)
- movl 8(%esp), %eax
- movl %ecx, 8(%esp)
- movl PT_EBX(%edx), %ebx
- movl $__KERNEL_DS, PT_OLDSS(%edx)
- jmpl *%eax
- CFI_ENDPROC
-ENDPROC(arch_unwind_init_running)
-#endif
+END(spurious_interrupt_bug)
ENTRY(kernel_thread_helper)
pushl $0 # fake return address for unwinder