Subject: x86/fpu: use fnsave when fxsave is not available kernel-3.10.0-862.3.3.el7 uses fxsave opcode unconditionally, which causes swapper panic on early boot on i586. Fix to use fnsave when use_fxsr() is false. Unfortunately the eagerfpu code of RHEL kernel-3.10.0-862.3.3 isn't in upsstream kernel.org code, so this patch doesn't resemble kernel.org code either. Upstream kernel.org code uses union init_fpstate.fsave to reset fpu state in drop_init_fpu() (fpu__clear() in kernel.org 4.4 code), but kernel-3.10.0-862.3.3 only has struct xsave_struct *init_xstate_buf; so this patch adds and initializes struct fpu init_fpstate_fsave_buf; explicitly for frstor resetting. Only init_fpstate_fsave_buf.state->fsave is used, but to initialize it using fpu_finit(), whole struct fpu is declared. diff -up ./arch/x86/include/asm/fpu-internal.h.efp ./arch/x86/include/asm/fpu-internal.h --- ./arch/x86/include/asm/fpu-internal.h.efp 2018-06-13 17:11:28.000000000 +0900 +++ ./arch/x86/include/asm/fpu-internal.h 2018-06-24 20:29:31.000000000 +0900 @@ -382,8 +382,10 @@ static inline void drop_init_fpu(struct else { if (use_xsave()) xrstor_state(init_xstate_buf, -1); - else + else if (use_fxsr()) fxrstor_checking(&init_xstate_buf->i387); + else + frstor_checking(&init_fpstate_fsave_buf.state->fsave); if (boot_cpu_has(X86_FEATURE_OSPKE)) copy_init_pkru_to_fpregs(); @@ -512,15 +514,36 @@ static inline void user_fpu_begin(void) preempt_enable(); } -static inline void __save_fpu(struct task_struct *tsk) +/* + * These must be called with preempt disabled. Returns + * 'true' if the FPU state is still intact and we can + * keep registers active. + * + * The legacy FNSAVE instruction cleared all FPU state + * unconditionally, so registers are essentially destroyed. + * Modern FPU state can be kept in registers, if there are + * no pending FP exceptions. + */ +static inline int __save_fpu(struct task_struct *tsk) { if (use_xsave()) { if (unlikely(system_state == SYSTEM_BOOTING)) xsave_state_booting(&tsk->thread.fpu.state->xsave, -1); else xsave_state(&tsk->thread.fpu.state->xsave, -1); - } else + return 1; + } + if (use_fxsr()) { fpu_fxsave(&tsk->thread.fpu); + return 1; + } + /* + * Legacy FPU register saving, FNSAVE always clears FPU registers, + * so we have to mark them inactive: + */ + asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (tsk->thread.fpu.state->fsave)); + + return 0; } /* @@ -598,8 +621,34 @@ static inline void fpu_free(struct fpu * static inline void fpu_copy(struct task_struct *dst, struct task_struct *src) { if (use_eager_fpu()) { + /* + * Don't let 'init optimized' areas of the XSAVE area + * leak into the child task: + */ memset(&dst->thread.fpu.state->xsave, 0, xstate_size); - __save_fpu(dst); + + /* + * Save current FPU registers directly into the child + * FPU context, without any memory-to-memory copying. + * + * If the FPU context got destroyed in the process (FNSAVE + * done on old CPUs) then copy it back into the source + * context and mark the current task for lazy restore. + * + * We have to do all this with preemption disabled, + * mostly because of the FNSAVE case, because in that + * case we must not allow preemption in the window + * between the FNSAVE and us marking the context lazy. + * + * It shouldn't be an issue as even FNSAVE is plenty + * fast in terms of critical section length. + */ + preempt_disable(); + if (!__save_fpu(dst)) { + memcpy(src->thread.fpu.state, dst->thread.fpu.state, xstate_size); + restore_fpu_checking(src); + } + preempt_enable(); } else { struct fpu *dfpu = &dst->thread.fpu; struct fpu *sfpu = &src->thread.fpu; diff -up ./arch/x86/include/asm/xsave.h.efp ./arch/x86/include/asm/xsave.h --- ./arch/x86/include/asm/xsave.h.efp 2018-06-13 17:11:28.000000000 +0900 +++ ./arch/x86/include/asm/xsave.h 2018-06-24 18:19:31.000000000 +0900 @@ -50,6 +50,7 @@ extern unsigned int xstate_size; extern u64 pcntxt_mask; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern struct xsave_struct *init_xstate_buf; +extern struct fpu init_fpstate_fsave_buf; extern void xsave_init(void); extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); diff -up ./arch/x86/kernel/xsave.c.efp ./arch/x86/kernel/xsave.c --- ./arch/x86/kernel/xsave.c.efp 2018-06-13 17:11:28.000000000 +0900 +++ ./arch/x86/kernel/xsave.c 2018-06-24 19:59:23.000000000 +0900 @@ -39,6 +39,7 @@ u64 pcntxt_mask; * Represents init state for the supported extended state. */ struct xsave_struct *init_xstate_buf; +struct fpu init_fpstate_fsave_buf; static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; static unsigned int *xstate_offsets, *xstate_sizes; @@ -558,6 +559,12 @@ static void __init setup_init_fpu_buf(vo __alignof__(struct xsave_struct)); fx_finit(&init_xstate_buf->i387); + /* Setup init_fpstate.fsave */ + /*fpu_alloc(&init_fpstate_fsave_buf);*/ /*freeze on boot; instead, */ + init_fpstate_fsave_buf.state = alloc_bootmem_align(sizeof(union thread_xstate), + __alignof__(union thread_xstate)); + fpu_finit(&init_fpstate_fsave_buf); + if (!cpu_has_xsave) return;