2264 lines
61 KiB
C
2264 lines
61 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Modify History
|
|
*
|
|
* who when what
|
|
* --- ---- ----
|
|
* stone 2004-09-02 Add SIMD floating emulation code
|
|
* fire3 2008-12-27 Add SIMD floating emulation code for SW64
|
|
*/
|
|
|
|
#include <linux/uaccess.h>
|
|
|
|
#include <asm/ptrace.h>
|
|
|
|
#include "sfp-util.h"
|
|
|
|
#include <math-emu/soft-fp.h>
|
|
#include <math-emu/single.h>
|
|
#include <math-emu/double.h>
|
|
|
|
#define math_debug 0
|
|
|
|
#define DEBUG_INFO(fmt, arg...) \
|
|
do { \
|
|
if (math_debug) \
|
|
printk(KERN_DEBUG fmt, ## arg); \
|
|
} while (0)
|
|
|
|
/*
|
|
* This is for sw64
|
|
*/
|
|
|
|
#define IEEE_E_STATUS_MASK IEEE_STATUS_MASK
|
|
#define IEEE_E_STATUS_TO_EXCSUM_SHIFT 0
|
|
#define SW64_FP_DENOMAL 1 /* A denormal data */
|
|
#define SW64_FP_NORMAL 0 /* A denormal data */
|
|
#define SW64_FP_NAN 2
|
|
|
|
#define SW64_FP_NAN_S(X, val) \
|
|
do { \
|
|
union _FP_UNION_S *_flo = \
|
|
(union _FP_UNION_S *)(val); \
|
|
\
|
|
X##_f = _flo->bits.frac; \
|
|
X##_e = _flo->bits.exp; \
|
|
X##_s = _flo->bits.sign; \
|
|
\
|
|
switch (X##_e) { \
|
|
case 255: \
|
|
if (_FP_FRAC_ZEROP_1(X)) \
|
|
X##_c = SW64_FP_NORMAL; \
|
|
else \
|
|
X##_c = SW64_FP_NAN; \
|
|
break; \
|
|
default: \
|
|
X##_c = SW64_FP_NORMAL; \
|
|
break; \
|
|
} \
|
|
} while (0)
|
|
|
|
|
|
#define SW64_FP_NAN_D(X, val) \
|
|
do { \
|
|
union _FP_UNION_D *_flo = \
|
|
(union _FP_UNION_D *)(val); \
|
|
\
|
|
X##_f = _flo->bits.frac; \
|
|
X##_e = _flo->bits.exp; \
|
|
X##_s = _flo->bits.sign; \
|
|
\
|
|
switch (X##_e) { \
|
|
case 2047: \
|
|
if (_FP_FRAC_ZEROP_1(X)) \
|
|
X##_c = SW64_FP_NORMAL; \
|
|
else \
|
|
X##_c = SW64_FP_NAN; \
|
|
break; \
|
|
default: \
|
|
X##_c = SW64_FP_NORMAL; \
|
|
break; \
|
|
} \
|
|
} while (0)
|
|
|
|
|
|
|
|
#define SW64_FP_NORMAL_S(X, val) \
|
|
do { \
|
|
union _FP_UNION_S *_flo = \
|
|
(union _FP_UNION_S *)(val); \
|
|
\
|
|
X##_f = _flo->bits.frac; \
|
|
X##_e = _flo->bits.exp; \
|
|
X##_s = _flo->bits.sign; \
|
|
\
|
|
switch (X##_e) { \
|
|
case 0: \
|
|
if (_FP_FRAC_ZEROP_1(X)) \
|
|
X##_c = SW64_FP_NORMAL; \
|
|
else \
|
|
X##_c = SW64_FP_DENOMAL; \
|
|
break; \
|
|
default: \
|
|
X##_c = SW64_FP_NORMAL; \
|
|
break; \
|
|
} \
|
|
} while (0)
|
|
|
|
#define SW64_FP_NORMAL_D(X, val) \
|
|
do { \
|
|
union _FP_UNION_D *_flo = \
|
|
(union _FP_UNION_D *)(val); \
|
|
\
|
|
X##_f = _flo->bits.frac; \
|
|
X##_e = _flo->bits.exp; \
|
|
X##_s = _flo->bits.sign; \
|
|
\
|
|
switch (X##_e) { \
|
|
case 0: \
|
|
if (_FP_FRAC_ZEROP_1(X)) \
|
|
X##_c = SW64_FP_NORMAL; \
|
|
else \
|
|
X##_c = SW64_FP_DENOMAL; \
|
|
break; \
|
|
default: \
|
|
X##_c = SW64_FP_NORMAL; \
|
|
break; \
|
|
} \
|
|
} while (0)
|
|
|
|
/* Operation Code for SW64 */
|
|
#define OP_SIMD_1 0x1A
|
|
#define OP_SIMD_2 0x1B
|
|
#define OP_SIMD_MUL_ADD 0x1B
|
|
#define OP_SIMD_NORMAL 0x1A
|
|
#define OP_MUL_ADD 0x19
|
|
|
|
#define FNC_FMAS 0x0
|
|
#define FNC_FMAD 0x1
|
|
#define FNC_FMSS 0x2
|
|
#define FNC_FMSD 0x3
|
|
#define FNC_FNMAS 0x4
|
|
#define FNC_FNMAD 0x5
|
|
#define FNC_FNMSS 0x6
|
|
#define FNC_FNMSD 0x7
|
|
|
|
#define FNC_VADDS 0x80
|
|
#define FNC_VADDD 0x81
|
|
#define FNC_VSUBS 0x82
|
|
#define FNC_VSUBD 0x83
|
|
#define FNC_VMULS 0x84
|
|
#define FNC_VMULD 0x85
|
|
#define FNC_VDIVS 0x86
|
|
#define FNC_VDIVD 0x87
|
|
#define FNC_VSQRTS 0x88
|
|
#define FNC_VSQRTD 0x89
|
|
|
|
#define FNC_VFCMPEQ 0x8c
|
|
#define FNC_VFCMPLE 0x8d
|
|
#define FNC_VFCMPLT 0x8e
|
|
#define FNC_VFCMPUN 0x8f
|
|
|
|
#define FNC_VCPYS 0x90
|
|
#define FNC_VCPYSE 0x91
|
|
#define FNC_VCPYSN 0x92
|
|
|
|
#define FNC_VMAS 0x0
|
|
#define FNC_VMAD 0x1
|
|
#define FNC_VMSS 0x2
|
|
#define FNC_VMSD 0x3
|
|
#define FNC_VNMAS 0x4
|
|
#define FNC_VNMAD 0x5
|
|
#define FNC_VNMSS 0x6
|
|
#define FNC_VNMSD 0x7
|
|
|
|
long simd_fp_emul_s(unsigned long pc);
|
|
long simd_fp_emul_d(unsigned long pc);
|
|
long mul_add_fp_emul(unsigned long pc);
|
|
long simd_cmp_emul_d(unsigned long pc);
|
|
|
|
long simd_mul_add_fp_emul_d(unsigned long pc);
|
|
long simd_mul_add_fp_emul_s(unsigned long pc);
|
|
|
|
void read_fp_reg_s(unsigned long reg, unsigned long *p0,
|
|
unsigned long *p1, unsigned long *p2, unsigned long *p3);
|
|
void read_fp_reg_d(unsigned long reg, unsigned long *val_p0,
|
|
unsigned long *p1, unsigned long *p2, unsigned long *p3);
|
|
void write_fp_reg_s(unsigned long reg, unsigned long val_p0,
|
|
unsigned long p1, unsigned long p2, unsigned long p3);
|
|
void write_fp_reg_d(unsigned long reg, unsigned long val_p0,
|
|
unsigned long p1, unsigned long p2, unsigned long p3);
|
|
#define LOW_64_WORKING 1
|
|
#define HIGH_64_WORKING 2
|
|
|
|
/*
|
|
* End for sw64
|
|
*/
|
|
|
|
#define OPC_HMC 0x00
|
|
#define OPC_INTA 0x10
|
|
#define OPC_INTL 0x11
|
|
#define OPC_INTS 0x12
|
|
#define OPC_INTM 0x13
|
|
#define OPC_FLTC 0x14
|
|
#define OPC_FLTV 0x15
|
|
#define OPC_FLTI 0x16
|
|
#define OPC_FLTL 0x17
|
|
#define OPC_MISC 0x18
|
|
#define OPC_JSR 0x1a
|
|
|
|
#define FOP_SRC_S 0
|
|
#define FOP_SRC_T 2
|
|
#define FOP_SRC_Q 3
|
|
|
|
#define FOP_FNC_ADDx 0
|
|
#define FOP_FNC_CVTQL 0
|
|
#define FOP_FNC_SUBx 1
|
|
#define FOP_FNC_MULx 2
|
|
#define FOP_FNC_DIVx 3
|
|
#define FOP_FNC_CMPxUN 4
|
|
#define FOP_FNC_CMPxEQ 5
|
|
#define FOP_FNC_CMPxLT 6
|
|
#define FOP_FNC_CMPxLE 7
|
|
#define FOP_FNC_SQRTx 11
|
|
#define FOP_FNC_CVTxS 12
|
|
#define FOP_FNC_CVTxT 14
|
|
#define FOP_FNC_CVTxQ 15
|
|
|
|
/* this is for sw64 added by fire3*/
|
|
#define FOP_FNC_ADDS 0
|
|
#define FOP_FNC_ADDD 1
|
|
#define FOP_FNC_SUBS 2
|
|
#define FOP_FNC_SUBD 3
|
|
#define FOP_FNC_MULS 4
|
|
#define FOP_FNC_MULD 5
|
|
#define FOP_FNC_DIVS 6
|
|
#define FOP_FNC_DIVD 7
|
|
#define FOP_FNC_SQRTS 8
|
|
#define FOP_FNC_SQRTD 9
|
|
|
|
#define FOP_FNC_CMPEQ 0x10
|
|
#define FOP_FNC_CMPLE 0x11
|
|
#define FOP_FNC_CMPLT 0x12
|
|
#define FOP_FNC_CMPUN 0x13
|
|
|
|
#define FOP_FNC_CVTSD 0x20
|
|
#define FOP_FNC_CVTDS 0x21
|
|
#define FOP_FNC_CVTLS 0x2D
|
|
#define FOP_FNC_CVTLD 0x2F
|
|
#define FOP_FNC_CVTDL 0x27
|
|
#define FOP_FNC_CVTDL_G 0x22
|
|
#define FOP_FNC_CVTDL_P 0x23
|
|
#define FOP_FNC_CVTDL_Z 0x24
|
|
#define FOP_FNC_CVTDL_N 0x25
|
|
|
|
#define FOP_FNC_CVTWL 0x28
|
|
#define FOP_FNC_CVTLW 0x29
|
|
|
|
/* fire3 added end */
|
|
|
|
|
|
#define MISC_TRAPB 0x0000
|
|
#define MISC_EXCB 0x0400
|
|
|
|
extern unsigned long sw64_read_fp_reg(unsigned long reg);
|
|
extern void sw64_write_fp_reg(unsigned long reg, unsigned long val);
|
|
extern unsigned long sw64_read_fp_reg_s(unsigned long reg);
|
|
extern void sw64_write_fp_reg_s(unsigned long reg, unsigned long val);
|
|
|
|
|
|
#ifdef MODULE
|
|
|
|
MODULE_DESCRIPTION("FP Software completion module");
|
|
|
|
extern long (*sw64_fp_emul_imprecise)(struct pt_regs *, unsigned long);
|
|
extern long (*sw64_fp_emul)(unsigned long pc);
|
|
|
|
static long (*save_emul_imprecise)(struct pt_regs *, unsigned long);
|
|
static long (*save_emul)(unsigned long pc);
|
|
|
|
long do_sw_fp_emul_imprecise(struct pt_regs *, unsigned long);
|
|
long do_sw_fp_emul(unsigned long);
|
|
|
|
int init_module(void)
|
|
{
|
|
save_emul_imprecise = sw64_fp_emul_imprecise;
|
|
save_emul = sw64_fp_emul;
|
|
sw64_fp_emul_imprecise = do_sw_fp_emul_imprecise;
|
|
sw64_fp_emul = do_sw_fp_emul;
|
|
return 0;
|
|
}
|
|
|
|
void cleanup_module(void)
|
|
{
|
|
sw64_fp_emul_imprecise = save_emul_imprecise;
|
|
sw64_fp_emul = save_emul;
|
|
}
|
|
|
|
#undef sw64_fp_emul_imprecise
|
|
#define sw64_fp_emul_imprecise do_sw_fp_emul_imprecise
|
|
#undef sw64_fp_emul
|
|
#define sw64_fp_emul do_sw_fp_emul
|
|
|
|
#endif /* MODULE */
|
|
|
|
|
|
/*
|
|
* Emulate the floating point instruction at address PC. Returns -1 if the
|
|
* instruction to be emulated is illegal (such as with the opDEC trap), else
|
|
* the SI_CODE for a SIGFPE signal, else 0 if everything's ok.
|
|
*
|
|
* Notice that the kernel does not and cannot use FP regs. This is good
|
|
* because it means that instead of saving/restoring all fp regs, we simply
|
|
* stick the result of the operation into the appropriate register.
|
|
*/
|
|
long sw64_fp_emul(unsigned long pc)
|
|
{
|
|
FP_DECL_EX;
|
|
FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
|
|
FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
|
|
|
|
unsigned long fa, fb, fc, func, mode, mode_bk, src;
|
|
unsigned long res, va, vb, vc, swcr, fpcr;
|
|
__u32 insn;
|
|
long si_code;
|
|
unsigned long opcode;
|
|
|
|
get_user(insn, (__u32 *)pc);
|
|
opcode = (insn >> 26) & 0x3f;
|
|
fc = (insn >> 0) & 0x1f; /* destination register */
|
|
fb = (insn >> 16) & 0x1f;
|
|
fa = (insn >> 21) & 0x1f;
|
|
func = (insn >> 5) & 0xff;
|
|
fpcr = rdfpcr();
|
|
mode = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
|
|
DEBUG_INFO("======= Entering Floating mathe emulation =====\n");
|
|
DEBUG_INFO("Floating math emulation insn = %#lx, opcode=%d, func=%d\n", insn, opcode, func);
|
|
DEBUG_INFO("SW64 hardware fpcr = %#lx\n", fpcr);
|
|
swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
|
|
DEBUG_INFO("SW64 software swcr = %#lx\n", swcr);
|
|
DEBUG_INFO("fa:%#lx,fb:%#lx,fc:%#lx,func:%#lx,mode:%#lx\n", fa, fb, fc, func, mode);
|
|
|
|
if (opcode == OP_SIMD_NORMAL) { /* float simd math */
|
|
if (func == FNC_VADDS || func == FNC_VSUBS || func == FNC_VSQRTS
|
|
|| func == FNC_VMULS || func == FNC_VDIVS)
|
|
si_code = simd_fp_emul_s(pc);
|
|
if (func == FNC_VADDD || func == FNC_VSUBD || func == FNC_VSQRTD
|
|
|| func == FNC_VMULD || func == FNC_VDIVD)
|
|
si_code = simd_fp_emul_d(pc);
|
|
if (func == FNC_VFCMPUN || func == FNC_VFCMPLT || func == FNC_VFCMPLE
|
|
|| func == FNC_VFCMPEQ)
|
|
si_code = simd_cmp_emul_d(pc);
|
|
return si_code;
|
|
}
|
|
if (opcode == OP_SIMD_MUL_ADD) {/* simd mul and add */
|
|
func = (insn >> 10) & 0x3f;
|
|
if (func == FNC_VMAS || func == FNC_VMSS || func == FNC_VNMAS
|
|
|| func == FNC_VNMSS) {
|
|
si_code = simd_mul_add_fp_emul_s(pc);
|
|
return si_code;
|
|
}
|
|
|
|
if (func == FNC_VMAD || func == FNC_VMSD || func == FNC_VNMAD
|
|
|| func == FNC_VNMSD) {
|
|
si_code = simd_mul_add_fp_emul_d(pc);
|
|
return si_code;
|
|
}
|
|
func = (insn >> 5) & 0xff;
|
|
}
|
|
|
|
if (opcode == OP_MUL_ADD) {
|
|
si_code = mul_add_fp_emul(pc);
|
|
return si_code;
|
|
}
|
|
switch (func) {
|
|
case FOP_FNC_SUBS:
|
|
va = sw64_read_fp_reg_s(fa);
|
|
vb = sw64_read_fp_reg_s(fb);
|
|
FP_UNPACK_SP(SA, &va);
|
|
FP_UNPACK_SP(SB, &vb);
|
|
FP_SUB_S(SR, SA, SB);
|
|
goto pack_s;
|
|
|
|
case FOP_FNC_SUBD:
|
|
va = sw64_read_fp_reg(fa);
|
|
vb = sw64_read_fp_reg(fb);
|
|
FP_UNPACK_DP(DA, &va);
|
|
FP_UNPACK_DP(DB, &vb);
|
|
FP_SUB_D(DR, DA, DB);
|
|
goto pack_d;
|
|
|
|
case FOP_FNC_ADDS:
|
|
va = sw64_read_fp_reg_s(fa);
|
|
vb = sw64_read_fp_reg_s(fb);
|
|
FP_UNPACK_SP(SA, &va);
|
|
FP_UNPACK_SP(SB, &vb);
|
|
FP_ADD_S(SR, SA, SB);
|
|
goto pack_s;
|
|
|
|
case FOP_FNC_ADDD:
|
|
va = sw64_read_fp_reg(fa);
|
|
vb = sw64_read_fp_reg(fb);
|
|
FP_UNPACK_DP(DA, &va);
|
|
FP_UNPACK_DP(DB, &vb);
|
|
FP_ADD_D(DR, DA, DB);
|
|
goto pack_d;
|
|
|
|
case FOP_FNC_MULS:
|
|
va = sw64_read_fp_reg_s(fa);
|
|
vb = sw64_read_fp_reg_s(fb);
|
|
FP_UNPACK_SP(SA, &va);
|
|
FP_UNPACK_SP(SB, &vb);
|
|
FP_MUL_S(SR, SA, SB);
|
|
goto pack_s;
|
|
|
|
case FOP_FNC_MULD:
|
|
va = sw64_read_fp_reg(fa);
|
|
vb = sw64_read_fp_reg(fb);
|
|
FP_UNPACK_DP(DA, &va);
|
|
FP_UNPACK_DP(DB, &vb);
|
|
FP_MUL_D(DR, DA, DB);
|
|
goto pack_d;
|
|
|
|
case FOP_FNC_DIVS:
|
|
DEBUG_INFO("FOP_FNC_DIVS\n");
|
|
va = sw64_read_fp_reg_s(fa);
|
|
vb = sw64_read_fp_reg_s(fb);
|
|
FP_UNPACK_SP(SA, &va);
|
|
FP_UNPACK_SP(SB, &vb);
|
|
FP_DIV_S(SR, SA, SB);
|
|
goto pack_s;
|
|
|
|
case FOP_FNC_DIVD:
|
|
DEBUG_INFO("FOP_FNC_DIVD\n");
|
|
va = sw64_read_fp_reg(fa);
|
|
vb = sw64_read_fp_reg(fb);
|
|
FP_UNPACK_DP(DA, &va);
|
|
FP_UNPACK_DP(DB, &vb);
|
|
FP_DIV_D(DR, DA, DB);
|
|
goto pack_d;
|
|
|
|
case FOP_FNC_SQRTS:
|
|
va = sw64_read_fp_reg_s(fa);
|
|
vb = sw64_read_fp_reg_s(fb);
|
|
FP_UNPACK_SP(SA, &va);
|
|
FP_UNPACK_SP(SB, &vb);
|
|
FP_SQRT_S(SR, SB);
|
|
goto pack_s;
|
|
case FOP_FNC_SQRTD:
|
|
va = sw64_read_fp_reg(fa);
|
|
vb = sw64_read_fp_reg(fb);
|
|
FP_UNPACK_DP(DA, &va);
|
|
FP_UNPACK_DP(DB, &vb);
|
|
FP_SQRT_D(DR, DB);
|
|
goto pack_d;
|
|
}
|
|
|
|
|
|
va = sw64_read_fp_reg(fa);
|
|
vb = sw64_read_fp_reg(fb);
|
|
if ((func & ~0xf) == FOP_FNC_CMPEQ) {
|
|
va = sw64_read_fp_reg(fa);
|
|
vb = sw64_read_fp_reg(fb);
|
|
|
|
FP_UNPACK_RAW_DP(DA, &va);
|
|
FP_UNPACK_RAW_DP(DB, &vb);
|
|
if (!DA_e && !_FP_FRAC_ZEROP_1(DA)) {
|
|
FP_SET_EXCEPTION(FP_EX_DENORM);
|
|
if (FP_DENORM_ZERO)
|
|
_FP_FRAC_SET_1(DA, _FP_ZEROFRAC_1);
|
|
}
|
|
if (!DB_e && !_FP_FRAC_ZEROP_1(DB)) {
|
|
FP_SET_EXCEPTION(FP_EX_DENORM);
|
|
if (FP_DENORM_ZERO)
|
|
_FP_FRAC_SET_1(DB, _FP_ZEROFRAC_1);
|
|
}
|
|
FP_CMP_D(res, DA, DB, 3);
|
|
vc = 0x4000000000000000;
|
|
/* CMPTEQ, CMPTUN don't trap on QNaN, while CMPTLT and CMPTLE do */
|
|
if (res == 3 && (((func == FOP_FNC_CMPLT) || (func == FOP_FNC_CMPLE))
|
|
|| FP_ISSIGNAN_D(DA) || FP_ISSIGNAN_D(DB))) {
|
|
DEBUG_INFO("CMPLT CMPLE:func:%d, trap on QNaN.", func);
|
|
FP_SET_EXCEPTION(FP_EX_INVALID);
|
|
}
|
|
switch (func) {
|
|
case FOP_FNC_CMPUN:
|
|
if (res != 3)
|
|
vc = 0;
|
|
break;
|
|
case FOP_FNC_CMPEQ:
|
|
if (res)
|
|
vc = 0;
|
|
break;
|
|
case FOP_FNC_CMPLT:
|
|
if (res != -1)
|
|
vc = 0;
|
|
break;
|
|
case FOP_FNC_CMPLE:
|
|
if ((long)res > 0)
|
|
vc = 0;
|
|
break;
|
|
}
|
|
goto done_d;
|
|
}
|
|
FP_UNPACK_DP(DA, &va);
|
|
FP_UNPACK_DP(DB, &vb);
|
|
|
|
if (func == FOP_FNC_CVTSD) {
|
|
vb = sw64_read_fp_reg_s(fb);
|
|
FP_UNPACK_SP(SB, &vb);
|
|
DR_c = DB_c;
|
|
DR_s = DB_s;
|
|
DR_e = DB_e + (1024 - 128);
|
|
DR_f = SB_f << (52 - 23);
|
|
goto pack_d;
|
|
}
|
|
|
|
if (func == FOP_FNC_CVTDS) {
|
|
FP_CONV(S, D, 1, 1, SR, DB);
|
|
goto pack_s;
|
|
}
|
|
|
|
if (func == FOP_FNC_CVTDL || func == FOP_FNC_CVTDL_G || func == FOP_FNC_CVTDL_P
|
|
|| func == FOP_FNC_CVTDL_Z || func == FOP_FNC_CVTDL_N) {
|
|
mode_bk = mode;
|
|
if (func == FOP_FNC_CVTDL_Z)
|
|
mode = 0x0UL;
|
|
else if (func == FOP_FNC_CVTDL_N)
|
|
mode = 0x1UL;
|
|
else if (func == FOP_FNC_CVTDL_G)
|
|
mode = 0x2UL;
|
|
else if (func == FOP_FNC_CVTDL_P)
|
|
mode = 0x3UL;
|
|
|
|
if (DB_c == FP_CLS_NAN && (_FP_FRAC_HIGH_RAW_D(DB) & _FP_QNANBIT_D)) {
|
|
/* AAHB Table B-2 says QNaN should not trigger INV */
|
|
vc = 0;
|
|
} else
|
|
FP_TO_INT_ROUND_D(vc, DB, 64, 2);
|
|
mode = mode_bk;
|
|
goto done_d;
|
|
}
|
|
|
|
vb = sw64_read_fp_reg(fb);
|
|
|
|
switch (func) {
|
|
case FOP_FNC_CVTLW:
|
|
/*
|
|
* Notice: We can get here only due to an integer
|
|
* overflow. Such overflows are reported as invalid
|
|
* ops. We return the result the hw would have
|
|
* computed.
|
|
*/
|
|
vc = ((vb & 0xc0000000) << 32 | /* sign and msb */
|
|
(vb & 0x3fffffff) << 29); /* rest of the int */
|
|
FP_SET_EXCEPTION(FP_EX_INVALID);
|
|
goto done_d;
|
|
|
|
case FOP_FNC_CVTLS:
|
|
FP_FROM_INT_S(SR, ((long)vb), 64, long);
|
|
goto pack_s;
|
|
|
|
case FOP_FNC_CVTLD:
|
|
FP_FROM_INT_D(DR, ((long)vb), 64, long);
|
|
goto pack_d;
|
|
}
|
|
goto bad_insn;
|
|
|
|
|
|
pack_s:
|
|
FP_PACK_SP(&vc, SR);
|
|
|
|
if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
|
|
vc = 0;
|
|
DEBUG_INFO("SW64 Emulation S-floating _fex=%#lx, va=%#lx, vb=%#lx, vc=%#lx\n", _fex, va, vb, vc);
|
|
DEBUG_INFO("SW64 Emulation S-floating mode=%#lx,func=%#lx, swcr=%#lx\n", mode, func, swcr);
|
|
sw64_write_fp_reg_s(fc, vc);
|
|
goto done;
|
|
|
|
pack_d:
|
|
FP_PACK_DP(&vc, DR);
|
|
if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
|
|
vc = 0;
|
|
DEBUG_INFO("SW64 Emulation D-floating _fex=%#lx, va=%#lx, vb=%#lx, vc=%#lx\n", _fex, va, vb, vc);
|
|
DEBUG_INFO("SW64 Emulation D-floating mode=%#lx,func=%#lx, swcr=%#lx\n", mode, func, swcr);
|
|
done_d:
|
|
sw64_write_fp_reg(fc, vc);
|
|
goto done;
|
|
|
|
/*
|
|
* Take the appropriate action for each possible
|
|
* floating-point result:
|
|
*
|
|
* - Set the appropriate bits in the FPCR
|
|
* - If the specified exception is enabled in the FPCR,
|
|
* return. The caller (entArith) will dispatch
|
|
* the appropriate signal to the translated program.
|
|
*
|
|
* In addition, properly track the exception state in software
|
|
* as described in the SW64 Architecture Handbook section 4.7.7.3.
|
|
*/
|
|
done:
|
|
if (_fex) {
|
|
/* Record exceptions in software control word. */
|
|
swcr |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr |= ieee_swcr_to_fpcr(swcr);
|
|
DEBUG_INFO("SW64 before write fpcr = %#lx\n", fpcr);
|
|
wrfpcr(fpcr);
|
|
|
|
/* Do we generate a signal? */
|
|
_fex = _fex & swcr & IEEE_TRAP_ENABLE_MASK;
|
|
si_code = 0;
|
|
if (_fex) {
|
|
if (_fex & IEEE_TRAP_ENABLE_DNO)
|
|
si_code = FPE_FLTUND;
|
|
if (_fex & IEEE_TRAP_ENABLE_INE)
|
|
si_code = FPE_FLTRES;
|
|
if (_fex & IEEE_TRAP_ENABLE_UNF)
|
|
si_code = FPE_FLTUND;
|
|
if (_fex & IEEE_TRAP_ENABLE_OVF)
|
|
si_code = FPE_FLTOVF;
|
|
if (_fex & IEEE_TRAP_ENABLE_DZE)
|
|
si_code = FPE_FLTDIV;
|
|
if (_fex & IEEE_TRAP_ENABLE_INV)
|
|
si_code = FPE_FLTINV;
|
|
}
|
|
|
|
return si_code;
|
|
}
|
|
|
|
/*
|
|
* We used to write the destination register here, but DEC FORTRAN
|
|
* requires that the result *always* be written... so we do the write
|
|
* immediately after the operations above.
|
|
*/
|
|
|
|
return 0;
|
|
|
|
bad_insn:
|
|
printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
|
|
return -1;
|
|
}
|
|
|
|
long sw64_fp_emul_imprecise(struct pt_regs *regs, unsigned long write_mask)
|
|
{
|
|
unsigned long trigger_pc = regs->pc - 4;
|
|
unsigned long insn, opcode, rc, si_code = 0;
|
|
|
|
|
|
/*
|
|
* Turn off the bits corresponding to registers that are the
|
|
* target of instructions that set bits in the exception
|
|
* summary register. We have some slack doing this because a
|
|
* register that is the target of a trapping instruction can
|
|
* be written at most once in the trap shadow.
|
|
*
|
|
* Branches, jumps, TRAPBs, EXCBs and calls to HMcode all
|
|
* bound the trap shadow, so we need not look any further than
|
|
* up to the first occurrence of such an instruction.
|
|
*/
|
|
while (write_mask) {
|
|
get_user(insn, (__u32 *)(trigger_pc));
|
|
opcode = insn >> 26;
|
|
rc = insn & 0x1f;
|
|
|
|
switch (opcode) {
|
|
case OPC_HMC:
|
|
case OPC_JSR:
|
|
case 0x30 ... 0x3f: /* branches */
|
|
goto egress;
|
|
|
|
case OPC_MISC:
|
|
switch (insn & 0xffff) {
|
|
case MISC_TRAPB:
|
|
case MISC_EXCB:
|
|
goto egress;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case OPC_INTA:
|
|
case OPC_INTL:
|
|
case OPC_INTS:
|
|
case OPC_INTM:
|
|
write_mask &= ~(1UL << rc);
|
|
break;
|
|
|
|
case OPC_FLTC:
|
|
case OPC_FLTV:
|
|
case OPC_FLTI:
|
|
case OPC_FLTL:
|
|
write_mask &= ~(1UL << (rc + 32));
|
|
break;
|
|
}
|
|
if (!write_mask) {
|
|
/* Re-execute insns in the trap-shadow. */
|
|
regs->pc = trigger_pc + 4;
|
|
si_code = sw64_fp_emul(trigger_pc);
|
|
goto egress;
|
|
}
|
|
trigger_pc -= 4;
|
|
}
|
|
|
|
egress:
|
|
return si_code;
|
|
}
|
|
|
|
#define WORKING_PART_0 0
|
|
#define WORKING_PART_1 1
|
|
#define WORKING_PART_2 2
|
|
#define WORKING_PART_3 3
|
|
|
|
|
|
/*
|
|
* This is for sw64
|
|
*/
|
|
|
|
long simd_cmp_emul_d(unsigned long pc)
|
|
{
|
|
FP_DECL_EX;
|
|
FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); FP_DECL_D(DC);
|
|
unsigned long fa, fb, fc, func, mode, src;
|
|
unsigned long res, va, vb, vc, swcr, fpcr;
|
|
__u32 insn;
|
|
long si_code;
|
|
|
|
unsigned long va_p0, va_p1, va_p2, va_p3;
|
|
unsigned long vb_p0, vb_p1, vb_p2, vb_p3;
|
|
unsigned long vc_p0, vc_p1, vc_p2, vc_p3;
|
|
unsigned long fex_p0, fex_p1, fex_p2, fex_p3;
|
|
|
|
int working_part;
|
|
|
|
get_user(insn, (__u32 *)pc);
|
|
fc = (insn >> 0) & 0x1f; /* destination register */
|
|
fb = (insn >> 16) & 0x1f;
|
|
fa = (insn >> 21) & 0x1f;
|
|
func = (insn >> 5) & 0xff;
|
|
fpcr = rdfpcr();
|
|
mode = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
|
|
|
|
DEBUG_INFO("======== Entering SIMD floating-CMP math emulation =======\n");
|
|
DEBUG_INFO("hardware fpcr = %#lx\n", fpcr);
|
|
swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
|
|
DEBUG_INFO("software swcr = %#lx\n", swcr);
|
|
DEBUG_INFO("fa:%#lx,fb:%#lx,fc:%#lx,func:%#lx,mode:%#lx\n", fa, fb, fc, func, mode);
|
|
read_fp_reg_d(fa, &va_p0, &va_p1, &va_p2, &va_p3);
|
|
read_fp_reg_d(fb, &vb_p0, &vb_p1, &vb_p2, &vb_p3);
|
|
read_fp_reg_d(fc, &vc_p0, &vc_p1, &vc_p2, &vc_p3);
|
|
DEBUG_INFO("va_p0:%#lx, va_p1:%#lx, va_p2:%#lx, va_p3:%#lx\n", va_p0, va_p1, va_p2, va_p3);
|
|
DEBUG_INFO("vb_p0:%#lx, vb_p1:%#lx, vb_p2:%#lx, vb_p3:%#lx\n", vb_p0, vb_p1, vb_p2, vb_p3);
|
|
DEBUG_INFO("vc_p0:%#lx, vc_p1:%#lx, vc_p2:%#lx, vc_p3:%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
|
|
working_part = WORKING_PART_0;
|
|
simd_working:
|
|
_fex = 0;
|
|
switch (working_part) {
|
|
case WORKING_PART_0:
|
|
DEBUG_INFO("WORKING_PART_0\n");
|
|
va = va_p0;
|
|
vb = vb_p0;
|
|
vc = vc_p0;
|
|
break;
|
|
case WORKING_PART_1:
|
|
DEBUG_INFO("WORKING_PART_1\n");
|
|
va = va_p1;
|
|
vb = vb_p1;
|
|
vc = vc_p1;
|
|
break;
|
|
case WORKING_PART_2:
|
|
DEBUG_INFO("WORKING_PART_2\n");
|
|
va = va_p2;
|
|
vb = vb_p2;
|
|
vc = vc_p2;
|
|
break;
|
|
case WORKING_PART_3:
|
|
DEBUG_INFO("WORKING_PART_3\n");
|
|
va = va_p3;
|
|
vb = vb_p3;
|
|
vc = vc_p3;
|
|
break;
|
|
}
|
|
DEBUG_INFO("Before unpack va:%#lx, vb:%#lx\n", va, vb);
|
|
FP_UNPACK_RAW_DP(DA, &va);
|
|
FP_UNPACK_RAW_DP(DB, &vb);
|
|
DEBUG_INFO("DA_e:%d, _FP_FRAC_ZEROP_1(DA):%d\n", DA_e, _FP_FRAC_ZEROP_1(DA));
|
|
DEBUG_INFO("DB_e:%d, _FP_FRAC_ZEROP_1(DB):%d\n", DA_e, _FP_FRAC_ZEROP_1(DA));
|
|
DEBUG_INFO("DA iszero:%d, DB iszero:%d\n", ((!DA_e && _FP_FRAC_ZEROP_1(DA)) ? 1 : 0),
|
|
((!DB_e && _FP_FRAC_ZEROP_1(DB))));
|
|
if (!DA_e && !_FP_FRAC_ZEROP_1(DA)) {
|
|
FP_SET_EXCEPTION(FP_EX_DENORM);
|
|
if (FP_DENORM_ZERO)
|
|
_FP_FRAC_SET_1(DA, _FP_ZEROFRAC_1);
|
|
}
|
|
if (!DB_e && !_FP_FRAC_ZEROP_1(DB)) {
|
|
FP_SET_EXCEPTION(FP_EX_DENORM);
|
|
if (FP_DENORM_ZERO)
|
|
_FP_FRAC_SET_1(DB, _FP_ZEROFRAC_1);
|
|
}
|
|
FP_CMP_D(res, DA, DB, 3);
|
|
vc = 0x4000000000000000;
|
|
/* CMPTEQ, CMPTUN don't trap on QNaN, while CMPTLT and CMPTLE do */
|
|
if (res == 3 && (((func == FOP_FNC_CMPLT) || (func == FOP_FNC_CMPLE))
|
|
|| FP_ISSIGNAN_D(DA) || FP_ISSIGNAN_D(DB))) {
|
|
DEBUG_INFO("CMPLT CMPLE:func:%d, trap on QNaN.", func);
|
|
FP_SET_EXCEPTION(FP_EX_INVALID);
|
|
}
|
|
DEBUG_INFO("res:%d\n", res);
|
|
switch (func) {
|
|
case FNC_VFCMPUN:
|
|
if (res != 3)
|
|
vc = 0;
|
|
break;
|
|
case FNC_VFCMPEQ:
|
|
if (res)
|
|
vc = 0;
|
|
break;
|
|
case FNC_VFCMPLT:
|
|
if (res != -1)
|
|
vc = 0;
|
|
break;
|
|
case FNC_VFCMPLE:
|
|
if ((long)res > 0)
|
|
vc = 0;
|
|
break;
|
|
}
|
|
next_working_s:
|
|
switch (working_part) {
|
|
case WORKING_PART_0:
|
|
working_part = WORKING_PART_1;
|
|
vc_p0 = vc;
|
|
fex_p0 = _fex;
|
|
goto simd_working;
|
|
case WORKING_PART_1:
|
|
working_part = WORKING_PART_2;
|
|
vc_p1 = vc;
|
|
fex_p1 = _fex;
|
|
goto simd_working;
|
|
case WORKING_PART_2:
|
|
working_part = WORKING_PART_3;
|
|
vc_p2 = vc;
|
|
fex_p2 = _fex;
|
|
goto simd_working;
|
|
case WORKING_PART_3:
|
|
vc_p3 = vc;
|
|
fex_p3 = _fex;
|
|
goto done;
|
|
}
|
|
done:
|
|
if (fex_p0 || fex_p1 || fex_p2 || fex_p3) {
|
|
unsigned long fpcr_p0, fpcr_p1, fpcr_p2, fpcr_p3;
|
|
unsigned long swcr_p0, swcr_p1, swcr_p2, swcr_p3;
|
|
|
|
fpcr_p0 = fpcr_p1 = fpcr_p2 = fpcr_p3 = 0;
|
|
swcr_p0 = swcr_p1 = swcr_p2 = swcr_p3 = swcr;
|
|
/* manage fpcr_p0 */
|
|
if (fex_p0) {
|
|
swcr_p0 |= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p0 = fpcr;
|
|
fpcr_p0 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p0 |= ieee_swcr_to_fpcr(swcr_p0);
|
|
}
|
|
|
|
if (fex_p1) {
|
|
swcr_p1 |= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p1 = fpcr;
|
|
fpcr_p1 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p1 |= ieee_swcr_to_fpcr(swcr_p1);
|
|
}
|
|
|
|
if (fex_p2) {
|
|
swcr_p2 |= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p2 = fpcr;
|
|
fpcr_p2 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p2 |= ieee_swcr_to_fpcr(swcr_p2);
|
|
}
|
|
|
|
if (fex_p3) {
|
|
swcr_p3 |= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p3 = fpcr;
|
|
fpcr_p3 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p3 |= ieee_swcr_to_fpcr(swcr_p3);
|
|
}
|
|
|
|
fpcr = fpcr_p0 | fpcr_p1 | fpcr_p2 | fpcr_p3;
|
|
DEBUG_INFO("fex_p0 = %#lx\n", fex_p0);
|
|
DEBUG_INFO("fex_p1 = %#lx\n", fex_p1);
|
|
DEBUG_INFO("fex_p2 = %#lx\n", fex_p2);
|
|
DEBUG_INFO("fex_p3 = %#lx\n", fex_p3);
|
|
DEBUG_INFO("SIMD emulation almost finished.before write fpcr = %#lx\n", fpcr);
|
|
wrfpcr(fpcr);
|
|
DEBUG_INFO("Before write fp: vc_p0=%#lx, vc_p1=%#lx, vc_p2=%#lx, vc_p3=%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
|
|
write_fp_reg_d(fc, vc_p0, vc_p1, vc_p2, vc_p3);
|
|
|
|
/* Do we generate a signal? */
|
|
_fex = (fex_p0 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p1 & swcr & IEEE_TRAP_ENABLE_MASK)
|
|
| (fex_p2 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p3 & swcr & IEEE_TRAP_ENABLE_MASK);
|
|
si_code = 0;
|
|
if (_fex) {
|
|
if (_fex & IEEE_TRAP_ENABLE_DNO)
|
|
si_code = FPE_FLTUND;
|
|
if (_fex & IEEE_TRAP_ENABLE_INE)
|
|
si_code = FPE_FLTRES;
|
|
if (_fex & IEEE_TRAP_ENABLE_UNF)
|
|
si_code = FPE_FLTUND;
|
|
if (_fex & IEEE_TRAP_ENABLE_OVF)
|
|
si_code = FPE_FLTOVF;
|
|
if (_fex & IEEE_TRAP_ENABLE_DZE)
|
|
si_code = FPE_FLTDIV;
|
|
if (_fex & IEEE_TRAP_ENABLE_INV)
|
|
si_code = FPE_FLTINV;
|
|
}
|
|
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
|
|
return si_code;
|
|
|
|
}
|
|
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
|
|
return 0;
|
|
|
|
bad_insn:
|
|
printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
|
|
return -1;
|
|
}
|
|
|
|
|
|
long simd_fp_emul_d(unsigned long pc)
|
|
{
|
|
FP_DECL_EX;
|
|
FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); FP_DECL_D(DC);
|
|
unsigned long fa, fb, fc, func, mode, src;
|
|
unsigned long res, va, vb, vc, swcr, fpcr;
|
|
__u32 insn;
|
|
long si_code;
|
|
|
|
unsigned long va_p0, va_p1, va_p2, va_p3;
|
|
unsigned long vb_p0, vb_p1, vb_p2, vb_p3;
|
|
unsigned long vc_p0, vc_p1, vc_p2, vc_p3;
|
|
unsigned long fex_p0, fex_p1, fex_p2, fex_p3;
|
|
|
|
int working_part;
|
|
|
|
get_user(insn, (__u32 *)pc);
|
|
fc = (insn >> 0) & 0x1f; /* destination register */
|
|
fb = (insn >> 16) & 0x1f;
|
|
fa = (insn >> 21) & 0x1f;
|
|
func = (insn >> 5) & 0xff;
|
|
fpcr = rdfpcr();
|
|
mode = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
|
|
|
|
DEBUG_INFO("======== Entering SIMD D-floating math emulation =======\n");
|
|
DEBUG_INFO("hardware fpcr = %#lx\n", fpcr);
|
|
swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
|
|
DEBUG_INFO("software swcr = %#lx\n", swcr);
|
|
DEBUG_INFO("fa:%#lx,fb:%#lx,fc:%#lx,func:%#lx,mode:%#lx\n", fa, fb, fc, func, mode);
|
|
read_fp_reg_d(fa, &va_p0, &va_p1, &va_p2, &va_p3);
|
|
read_fp_reg_d(fb, &vb_p0, &vb_p1, &vb_p2, &vb_p3);
|
|
read_fp_reg_d(fc, &vc_p0, &vc_p1, &vc_p2, &vc_p3);
|
|
DEBUG_INFO("va_p0:%#lx, va_p1:%#lx, va_p2:%#lx, va_p3:%#lx\n", va_p0, va_p1, va_p2, va_p3);
|
|
DEBUG_INFO("vb_p0:%#lx, vb_p1:%#lx, vb_p2:%#lx, vb_p3:%#lx\n", vb_p0, vb_p1, vb_p2, vb_p3);
|
|
DEBUG_INFO("vc_p0:%#lx, vc_p1:%#lx, vc_p2:%#lx, vc_p3:%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
|
|
working_part = WORKING_PART_0;
|
|
simd_working:
|
|
_fex = 0;
|
|
switch (working_part) {
|
|
case WORKING_PART_0:
|
|
DEBUG_INFO("WORKING_PART_0\n");
|
|
va = va_p0;
|
|
vb = vb_p0;
|
|
vc = vc_p0;
|
|
if ((fpcr & FPCR_STATUS_MASK0) == 0) {
|
|
SW64_FP_NORMAL_D(DA, &va);
|
|
SW64_FP_NORMAL_D(DB, &vb);
|
|
if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL))
|
|
goto next_working_s;
|
|
else
|
|
DEBUG_INFO("LOW: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
|
|
} else {
|
|
SW64_FP_NAN_D(DA, &va);
|
|
SW64_FP_NAN_D(DB, &vb);
|
|
if (((DA_c == SW64_FP_NAN) || (DB_c == SW64_FP_NAN)))
|
|
goto next_working_s;
|
|
}
|
|
break;
|
|
case WORKING_PART_1:
|
|
DEBUG_INFO("WORKING_PART_1\n");
|
|
va = va_p1;
|
|
vb = vb_p1;
|
|
vc = vc_p1;
|
|
if ((fpcr & FPCR_STATUS_MASK1) == 0) {
|
|
SW64_FP_NORMAL_D(DA, &va);
|
|
SW64_FP_NORMAL_D(DB, &vb);
|
|
if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL))
|
|
goto next_working_s;
|
|
else
|
|
DEBUG_INFO("HIGH: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
|
|
} else {
|
|
SW64_FP_NAN_D(DA, &va);
|
|
SW64_FP_NAN_D(DB, &vb);
|
|
if (((DA_c == SW64_FP_NAN) || (DB_c == SW64_FP_NAN)))
|
|
goto next_working_s;
|
|
}
|
|
|
|
break;
|
|
case WORKING_PART_2:
|
|
DEBUG_INFO("WORKING_PART_2\n");
|
|
va = va_p2;
|
|
vb = vb_p2;
|
|
vc = vc_p2;
|
|
if ((fpcr & FPCR_STATUS_MASK2) == 0) {
|
|
SW64_FP_NORMAL_D(DA, &va);
|
|
SW64_FP_NORMAL_D(DB, &vb);
|
|
if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL))
|
|
goto next_working_s;
|
|
else
|
|
DEBUG_INFO("HIGH: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
|
|
} else {
|
|
SW64_FP_NAN_D(DA, &va);
|
|
SW64_FP_NAN_D(DB, &vb);
|
|
if (((DA_c == SW64_FP_NAN) || (DB_c == SW64_FP_NAN)))
|
|
goto next_working_s;
|
|
}
|
|
break;
|
|
case WORKING_PART_3:
|
|
DEBUG_INFO("WORKING_PART_3\n");
|
|
va = va_p3;
|
|
vb = vb_p3;
|
|
vc = vc_p3;
|
|
if ((fpcr & FPCR_STATUS_MASK3) == 0) {
|
|
SW64_FP_NORMAL_D(DA, &va);
|
|
SW64_FP_NORMAL_D(DB, &vb);
|
|
if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL))
|
|
goto next_working_s;
|
|
else
|
|
DEBUG_INFO("HIGH: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
|
|
} else {
|
|
SW64_FP_NAN_D(DA, &va);
|
|
SW64_FP_NAN_D(DB, &vb);
|
|
if (((DA_c == SW64_FP_NAN) || (DB_c == SW64_FP_NAN)))
|
|
goto next_working_s;
|
|
}
|
|
break;
|
|
}
|
|
|
|
FP_UNPACK_DP(DA, &va);
|
|
FP_UNPACK_DP(DB, &vb);
|
|
|
|
switch (func) {
|
|
case FNC_VSUBD:
|
|
DEBUG_INFO("FNC_VSUBD\n");
|
|
FP_SUB_D(DR, DA, DB);
|
|
goto pack_d;
|
|
case FNC_VMULD:
|
|
DEBUG_INFO("FNC_VMULD\n");
|
|
FP_MUL_D(DR, DA, DB);
|
|
goto pack_d;
|
|
case FNC_VADDD:
|
|
DEBUG_INFO("FNC_VADDD\n");
|
|
FP_ADD_D(DR, DA, DB);
|
|
goto pack_d;
|
|
case FNC_VDIVD:
|
|
DEBUG_INFO("FNC_VDIVD\n");
|
|
FP_DIV_D(DR, DA, DB);
|
|
goto pack_d;
|
|
case FNC_VSQRTD:
|
|
DEBUG_INFO("FNC_VSQRTD\n");
|
|
FP_SQRT_D(DR, DB);
|
|
goto pack_d;
|
|
}
|
|
pack_d:
|
|
FP_PACK_DP(&vc, DR);
|
|
if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ)) {
|
|
DEBUG_INFO("pack_d, vc=0 !!!!\n");
|
|
vc = 0;
|
|
}
|
|
|
|
DEBUG_INFO("SW64 SIMD Emulation D-floating _fex=%#lx, va=%#lx, vb=%#lx, vc=%#lx\n", _fex, va, vb, vc);
|
|
DEBUG_INFO("SW64 SIMD Emulation D-floating mode=%#lx,func=%#lx, swcr=%#lx\n", mode, func, swcr);
|
|
next_working_s:
|
|
switch (working_part) {
|
|
case WORKING_PART_0:
|
|
working_part = WORKING_PART_1;
|
|
vc_p0 = vc;
|
|
fex_p0 = _fex;
|
|
goto simd_working;
|
|
case WORKING_PART_1:
|
|
working_part = WORKING_PART_2;
|
|
vc_p1 = vc;
|
|
fex_p1 = _fex;
|
|
goto simd_working;
|
|
case WORKING_PART_2:
|
|
working_part = WORKING_PART_3;
|
|
vc_p2 = vc;
|
|
fex_p2 = _fex;
|
|
goto simd_working;
|
|
case WORKING_PART_3:
|
|
vc_p3 = vc;
|
|
fex_p3 = _fex;
|
|
goto done;
|
|
}
|
|
done:
|
|
if (fex_p0 || fex_p1 || fex_p2 || fex_p3) {
|
|
unsigned long fpcr_p0, fpcr_p1, fpcr_p2, fpcr_p3;
|
|
unsigned long swcr_p0, swcr_p1, swcr_p2, swcr_p3;
|
|
|
|
fpcr_p0 = fpcr_p1 = fpcr_p2 = fpcr_p3 = 0;
|
|
swcr_p0 = swcr_p1 = swcr_p2 = swcr_p3 = swcr;
|
|
/* manage fpcr_p0 */
|
|
if (fex_p0) {
|
|
swcr_p0 |= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p0 = fpcr;
|
|
fpcr_p0 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p0 |= ieee_swcr_to_fpcr(swcr_p0);
|
|
}
|
|
|
|
if (fex_p1) {
|
|
swcr_p1 |= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p1 = fpcr;
|
|
fpcr_p1 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p1 |= ieee_swcr_to_fpcr(swcr_p1);
|
|
}
|
|
|
|
if (fex_p2) {
|
|
swcr_p2 |= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p2 = fpcr;
|
|
fpcr_p2 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p2 |= ieee_swcr_to_fpcr(swcr_p2);
|
|
}
|
|
|
|
if (fex_p3) {
|
|
swcr_p3 |= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p3 = fpcr;
|
|
fpcr_p3 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p3 |= ieee_swcr_to_fpcr(swcr_p3);
|
|
}
|
|
|
|
fpcr = fpcr_p0 | fpcr_p1 | fpcr_p2 | fpcr_p3;
|
|
DEBUG_INFO("fex_p0 = %#lx\n", fex_p0);
|
|
DEBUG_INFO("fex_p1 = %#lx\n", fex_p1);
|
|
DEBUG_INFO("fex_p2 = %#lx\n", fex_p2);
|
|
DEBUG_INFO("fex_p3 = %#lx\n", fex_p3);
|
|
DEBUG_INFO("SIMD emulation almost finished.before write fpcr = %#lx\n", fpcr);
|
|
wrfpcr(fpcr);
|
|
DEBUG_INFO("Before write fp: vp_p0=%#lx, vc_p1=%#lx, vc_p2=%#lx, vc_p3=%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
|
|
write_fp_reg_d(fc, vc_p0, vc_p1, vc_p2, vc_p3);
|
|
|
|
/* Do we generate a signal? */
|
|
_fex = (fex_p0 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p1 & swcr & IEEE_TRAP_ENABLE_MASK)
|
|
| (fex_p2 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p3 & swcr & IEEE_TRAP_ENABLE_MASK);
|
|
si_code = 0;
|
|
if (_fex) {
|
|
if (_fex & IEEE_TRAP_ENABLE_DNO)
|
|
si_code = FPE_FLTUND;
|
|
if (_fex & IEEE_TRAP_ENABLE_INE)
|
|
si_code = FPE_FLTRES;
|
|
if (_fex & IEEE_TRAP_ENABLE_UNF)
|
|
si_code = FPE_FLTUND;
|
|
if (_fex & IEEE_TRAP_ENABLE_OVF)
|
|
si_code = FPE_FLTOVF;
|
|
if (_fex & IEEE_TRAP_ENABLE_DZE)
|
|
si_code = FPE_FLTDIV;
|
|
if (_fex & IEEE_TRAP_ENABLE_INV)
|
|
si_code = FPE_FLTINV;
|
|
}
|
|
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
|
|
return si_code;
|
|
}
|
|
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
|
|
return 0;
|
|
|
|
bad_insn:
|
|
printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
|
|
return -1;
|
|
}
|
|
|
|
long simd_fp_emul_s(unsigned long pc)
|
|
{
|
|
FP_DECL_EX;
|
|
FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
|
|
|
|
unsigned long fa, fb, fc, func, mode, src;
|
|
unsigned long res, va, vb, vc, swcr, fpcr;
|
|
__u32 insn;
|
|
long si_code;
|
|
|
|
unsigned long va_p0, va_p1, va_p2, va_p3;
|
|
unsigned long vb_p0, vb_p1, vb_p2, vb_p3;
|
|
unsigned long vc_p0, vc_p1, vc_p2, vc_p3;
|
|
unsigned long fex_p0, fex_p1, fex_p2, fex_p3;
|
|
|
|
int working_part;
|
|
|
|
get_user(insn, (__u32 *)pc);
|
|
fc = (insn >> 0) & 0x1f; /* destination register */
|
|
fb = (insn >> 16) & 0x1f;
|
|
fa = (insn >> 21) & 0x1f;
|
|
func = (insn >> 5) & 0xff;
|
|
fpcr = rdfpcr();
|
|
mode = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
|
|
|
|
DEBUG_INFO("======== Entering SIMD S-floating math emulation =======\n");
|
|
DEBUG_INFO("hardware fpcr = %#lx\n", fpcr);
|
|
swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
|
|
DEBUG_INFO("software swcr = %#lx\n", swcr);
|
|
DEBUG_INFO("fa:%#lx,fb:%#lx,fc:%#lx,func:%#lx,mode:%#lx\n", fa, fb, fc, func, mode);
|
|
read_fp_reg_s(fa, &va_p0, &va_p1, &va_p2, &va_p3);
|
|
read_fp_reg_s(fb, &vb_p0, &vb_p1, &vb_p2, &vb_p3);
|
|
read_fp_reg_s(fc, &vc_p0, &vc_p1, &vc_p2, &vc_p3);
|
|
DEBUG_INFO("va_p0:%#lx, va_p1:%#lx, va_p2:%#lx, va_p3:%#lx\n", va_p0, va_p1, va_p2, va_p3);
|
|
DEBUG_INFO("vb_p0:%#lx, vb_p1:%#lx, vb_p2:%#lx, vb_p3:%#lx\n", vb_p0, vb_p1, vb_p2, vb_p3);
|
|
DEBUG_INFO("vc_p0:%#lx, vc_p1:%#lx, vc_p2:%#lx, vc_p3:%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
|
|
working_part = WORKING_PART_0;
|
|
simd_working:
|
|
_fex = 0;
|
|
switch (working_part) {
|
|
case WORKING_PART_0:
|
|
DEBUG_INFO("WORKING_PART_0\n");
|
|
va = va_p0;
|
|
vb = vb_p0;
|
|
vc = vc_p0;
|
|
if ((fpcr & FPCR_STATUS_MASK0) == 0) {
|
|
SW64_FP_NORMAL_S(SA, &va);
|
|
SW64_FP_NORMAL_S(SB, &vb);
|
|
if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL))
|
|
goto next_working_s;
|
|
else
|
|
DEBUG_INFO("PART0: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
|
|
} else {
|
|
SW64_FP_NAN_S(SA, &va);
|
|
SW64_FP_NAN_S(SB, &vb);
|
|
if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
|
|
goto next_working_s;
|
|
}
|
|
break;
|
|
case WORKING_PART_1:
|
|
DEBUG_INFO("WORKING_PART_1\n");
|
|
va = va_p1;
|
|
vb = vb_p1;
|
|
vc = vc_p1;
|
|
if ((fpcr & FPCR_STATUS_MASK1) == 0) {
|
|
SW64_FP_NORMAL_S(SA, &va);
|
|
SW64_FP_NORMAL_S(SB, &vb);
|
|
if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL))
|
|
goto next_working_s;
|
|
else
|
|
DEBUG_INFO("PART1: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
|
|
} else {
|
|
SW64_FP_NAN_S(SA, &va);
|
|
SW64_FP_NAN_S(SB, &vb);
|
|
if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
|
|
goto next_working_s;
|
|
}
|
|
break;
|
|
case WORKING_PART_2:
|
|
DEBUG_INFO("WORKING_PART_2\n");
|
|
va = va_p2;
|
|
vb = vb_p2;
|
|
vc = vc_p2;
|
|
if ((fpcr & FPCR_STATUS_MASK2) == 0) {
|
|
SW64_FP_NORMAL_S(SA, &va);
|
|
SW64_FP_NORMAL_S(SB, &vb);
|
|
if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL))
|
|
goto next_working_s;
|
|
else
|
|
DEBUG_INFO("PART2: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
|
|
} else {
|
|
SW64_FP_NAN_S(SA, &va);
|
|
SW64_FP_NAN_S(SB, &vb);
|
|
if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
|
|
goto next_working_s;
|
|
}
|
|
break;
|
|
case WORKING_PART_3:
|
|
DEBUG_INFO("WORKING_PART_3\n");
|
|
va = va_p3;
|
|
vb = vb_p3;
|
|
vc = vc_p3;
|
|
if ((fpcr & FPCR_STATUS_MASK3) == 0) {
|
|
SW64_FP_NORMAL_S(SA, &va);
|
|
SW64_FP_NORMAL_S(SB, &vb);
|
|
if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL))
|
|
goto next_working_s;
|
|
else
|
|
DEBUG_INFO("PART3: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
|
|
} else {
|
|
SW64_FP_NAN_S(SA, &va);
|
|
SW64_FP_NAN_S(SB, &vb);
|
|
if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
|
|
goto next_working_s;
|
|
}
|
|
break;
|
|
|
|
}
|
|
|
|
FP_UNPACK_SP(SA, &va);
|
|
FP_UNPACK_SP(SB, &vb);
|
|
|
|
switch (func) {
|
|
case FNC_VSUBS:
|
|
DEBUG_INFO("FNC_VSUBS\n");
|
|
FP_SUB_S(SR, SA, SB);
|
|
goto pack_s;
|
|
case FNC_VMULS:
|
|
DEBUG_INFO("FNC_VMULS\n");
|
|
FP_MUL_S(SR, SA, SB);
|
|
goto pack_s;
|
|
case FNC_VADDS:
|
|
DEBUG_INFO("FNC_VADDS\n");
|
|
FP_ADD_S(SR, SA, SB);
|
|
goto pack_s;
|
|
case FNC_VDIVS:
|
|
DEBUG_INFO("FNC_VDIVS\n");
|
|
FP_DIV_S(SR, SA, SB);
|
|
goto pack_s;
|
|
case FNC_VSQRTS:
|
|
DEBUG_INFO("FNC_VSQRTS\n");
|
|
FP_SQRT_S(SR, SB);
|
|
goto pack_s;
|
|
}
|
|
pack_s:
|
|
FP_PACK_SP(&vc, SR);
|
|
if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ)) {
|
|
DEBUG_INFO("pack_s, vc=0 !!!!\n");
|
|
vc = 0;
|
|
}
|
|
|
|
DEBUG_INFO("SW64 SIMD Emulation S-floating _fex=%#lx, va=%#lx, vb=%#lx, vc=%#lx\n", _fex, va, vb, vc);
|
|
DEBUG_INFO("SW64 SIMD Emulation S-floating mode=%#lx,func=%#lx, swcr=%#lx\n", mode, func, swcr);
|
|
next_working_s:
|
|
switch (working_part) {
|
|
case WORKING_PART_0:
|
|
working_part = WORKING_PART_1;
|
|
vc_p0 = vc;
|
|
fex_p0 = _fex;
|
|
goto simd_working;
|
|
case WORKING_PART_1:
|
|
working_part = WORKING_PART_2;
|
|
vc_p1 = vc;
|
|
fex_p1 = _fex;
|
|
goto simd_working;
|
|
case WORKING_PART_2:
|
|
working_part = WORKING_PART_3;
|
|
vc_p2 = vc;
|
|
fex_p2 = _fex;
|
|
goto simd_working;
|
|
case WORKING_PART_3:
|
|
vc_p3 = vc;
|
|
fex_p3 = _fex;
|
|
goto done;
|
|
}
|
|
done:
|
|
if (fex_p0 || fex_p1 || fex_p2 || fex_p3) {
|
|
unsigned long fpcr_p0, fpcr_p1, fpcr_p2, fpcr_p3;
|
|
unsigned long swcr_p0, swcr_p1, swcr_p2, swcr_p3;
|
|
|
|
fpcr_p0 = fpcr_p1 = fpcr_p2 = fpcr_p3 = 0;
|
|
swcr_p0 = swcr_p1 = swcr_p2 = swcr_p3 = swcr;
|
|
/* manage fpcr_p0 */
|
|
if (fex_p0) {
|
|
swcr_p0 |= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p0 = fpcr;
|
|
fpcr_p0 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p0 |= ieee_swcr_to_fpcr(swcr_p0);
|
|
DEBUG_INFO("fex_p0: fpcr_p0:%#lx\n", fpcr_p0);
|
|
}
|
|
|
|
if (fex_p1) {
|
|
swcr_p1 |= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p1 = fpcr;
|
|
fpcr_p1 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p1 |= ieee_swcr_to_fpcr(swcr_p1);
|
|
DEBUG_INFO("fex_p1: fpcr_p1:%#lx\n", fpcr_p1);
|
|
}
|
|
|
|
if (fex_p2) {
|
|
swcr_p2 |= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p2 = fpcr;
|
|
fpcr_p2 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p2 |= ieee_swcr_to_fpcr(swcr_p2);
|
|
DEBUG_INFO("fex_p2: fpcr_p2:%#lx\n", fpcr_p2);
|
|
}
|
|
|
|
if (fex_p3) {
|
|
swcr_p3 |= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p3 = fpcr;
|
|
fpcr_p3 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p3 |= ieee_swcr_to_fpcr(swcr_p3);
|
|
DEBUG_INFO("fex_p3: fpcr_p3:%#lx\n", fpcr_p3);
|
|
}
|
|
|
|
fpcr = fpcr_p0 | fpcr_p1 | fpcr_p2 | fpcr_p3;
|
|
DEBUG_INFO("fex_p0 = %#lx\n", fex_p0);
|
|
DEBUG_INFO("fex_p1 = %#lx\n", fex_p1);
|
|
DEBUG_INFO("fex_p2 = %#lx\n", fex_p2);
|
|
DEBUG_INFO("fex_p3 = %#lx\n", fex_p3);
|
|
DEBUG_INFO("SIMD emulation almost finished.before write fpcr = %#lx\n", fpcr);
|
|
wrfpcr(fpcr);
|
|
|
|
DEBUG_INFO("Before write fp: vc_p0=%#lx, vc_p1=%#lx, vc_p2=%#lx, vc_p3=%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
|
|
write_fp_reg_s(fc, vc_p0, vc_p1, vc_p2, vc_p3);
|
|
|
|
/* Do we generate a signal? */
|
|
_fex = (fex_p0 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p1 & swcr & IEEE_TRAP_ENABLE_MASK)
|
|
| (fex_p2 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p3 & swcr & IEEE_TRAP_ENABLE_MASK);
|
|
si_code = 0;
|
|
if (_fex) {
|
|
if (_fex & IEEE_TRAP_ENABLE_DNO)
|
|
si_code = FPE_FLTUND;
|
|
if (_fex & IEEE_TRAP_ENABLE_INE)
|
|
si_code = FPE_FLTRES;
|
|
if (_fex & IEEE_TRAP_ENABLE_UNF)
|
|
si_code = FPE_FLTUND;
|
|
if (_fex & IEEE_TRAP_ENABLE_OVF)
|
|
si_code = FPE_FLTOVF;
|
|
if (_fex & IEEE_TRAP_ENABLE_DZE)
|
|
si_code = FPE_FLTDIV;
|
|
if (_fex & IEEE_TRAP_ENABLE_INV)
|
|
si_code = FPE_FLTINV;
|
|
}
|
|
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
|
|
return si_code;
|
|
}
|
|
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
|
|
return 0;
|
|
|
|
bad_insn:
|
|
printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
|
|
return -1;
|
|
|
|
}
|
|
|
|
static inline unsigned long negative_value(unsigned long va)
|
|
{
|
|
return (va ^ 0x8000000000000000UL);
|
|
}
|
|
|
|
static inline unsigned long s_negative_value(unsigned long va)
|
|
{
|
|
return (va ^ 0x80000000UL);
|
|
}
|
|
|
|
/*
|
|
* sw64 mul-add floating emulation
|
|
*/
|
|
long mul_add_fp_emul(unsigned long pc)
|
|
{
|
|
FP_DECL_EX;
|
|
FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SC); FP_DECL_S(S_TMP); FP_DECL_S(SR);
|
|
FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DC); FP_DECL_D(D_TMP); FP_DECL_D(DR);
|
|
FP_DECL_S(S_ZERO);
|
|
FP_DECL_D(D_ZERO);
|
|
FP_DECL_S(S_TMP2);
|
|
FP_DECL_D(D_TMP2);
|
|
|
|
unsigned long fa, fb, fc, fd, func, mode, src;
|
|
unsigned long res, va, vb, vc, vd, vtmp, vtmp2, swcr, fpcr;
|
|
__u32 insn;
|
|
long si_code;
|
|
unsigned long vzero = 0;
|
|
|
|
get_user(insn, (__u32 *)pc);
|
|
fd = (insn >> 0) & 0x1f; /* destination register */
|
|
fc = (insn >> 5) & 0x1f;
|
|
fb = (insn >> 16) & 0x1f;
|
|
fa = (insn >> 21) & 0x1f;
|
|
func = (insn >> 10) & 0x3f;
|
|
|
|
fpcr = rdfpcr();
|
|
mode = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
|
|
|
|
DEBUG_INFO("===== Entering SW64 MUL-ADD Emulation =====\n");
|
|
DEBUG_INFO("hardware fpcr = %#lx\n", fpcr);
|
|
swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
|
|
DEBUG_INFO("software swcr = %#lx\n", swcr);
|
|
|
|
if (func == FNC_FMAS || func == FNC_FMSS || func == FNC_FNMAS || func == FNC_FNMSS) {
|
|
va = sw64_read_fp_reg_s(fa);
|
|
vb = sw64_read_fp_reg_s(fb);
|
|
vc = sw64_read_fp_reg_s(fc);
|
|
FP_UNPACK_SP(SA, &va);
|
|
FP_UNPACK_SP(SB, &vb);
|
|
FP_UNPACK_SP(SC, &vc);
|
|
FP_UNPACK_SP(S_ZERO, &vzero);
|
|
}
|
|
if (func == FNC_FMAD || func == FNC_FMSD || func == FNC_FNMAD || func == FNC_FNMSD) {
|
|
va = sw64_read_fp_reg(fa);
|
|
vb = sw64_read_fp_reg(fb);
|
|
vc = sw64_read_fp_reg(fc);
|
|
FP_UNPACK_DP(DA, &va);
|
|
FP_UNPACK_DP(DB, &vb);
|
|
FP_UNPACK_DP(DC, &vc);
|
|
FP_UNPACK_DP(D_ZERO, &vzero);
|
|
}
|
|
DEBUG_INFO("va = %#lx, vb = %#lx, vc = %#lx\n", va, vb, vc);
|
|
switch (func) {
|
|
case FNC_FMAS:
|
|
FP_MUL_S(S_TMP, SA, SB);
|
|
FP_ADD_S(SR, S_TMP, SC);
|
|
goto pack_s;
|
|
case FNC_FMSS:
|
|
FP_MUL_S(S_TMP, SA, SB);
|
|
FP_SUB_S(SR, S_TMP, SC);
|
|
goto pack_s;
|
|
case FNC_FNMAS: /* (-va*vb) + vc */
|
|
va = s_negative_value(va);
|
|
FP_UNPACK_SP(SA, &va);
|
|
FP_MUL_S(S_TMP, SA, SB);
|
|
FP_ADD_S(SR, S_TMP, SC);
|
|
goto pack_s;
|
|
case FNC_FNMSS: /* (-va*vb) - vc */
|
|
va = s_negative_value(va);
|
|
FP_UNPACK_SP(SA, &va);
|
|
FP_MUL_S(S_TMP, SA, SB);
|
|
FP_SUB_S(SR, S_TMP, SC);
|
|
goto pack_s;
|
|
case FNC_FMAD:
|
|
FP_MUL_D(D_TMP, DA, DB);
|
|
FP_ADD_D(DR, D_TMP, DC);
|
|
goto pack_d;
|
|
case FNC_FMSD:
|
|
FP_MUL_D(D_TMP, DA, DB);
|
|
FP_SUB_D(DR, D_TMP, DC);
|
|
goto pack_d;
|
|
case FNC_FNMAD:
|
|
va = negative_value(va);
|
|
FP_UNPACK_DP(DA, &va);
|
|
FP_MUL_D(D_TMP, DA, DB);
|
|
FP_ADD_D(DR, D_TMP, DC);
|
|
goto pack_d;
|
|
case FNC_FNMSD:
|
|
va = negative_value(va);
|
|
FP_UNPACK_DP(DA, &va);
|
|
FP_MUL_D(D_TMP, DA, DB);
|
|
FP_SUB_D(DR, D_TMP, DC);
|
|
goto pack_d;
|
|
default:
|
|
goto bad_insn;
|
|
|
|
}
|
|
pack_s:
|
|
FP_PACK_SP(&vd, SR);
|
|
if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
|
|
vd = 0;
|
|
sw64_write_fp_reg_s(fd, vd);
|
|
goto done;
|
|
|
|
pack_d:
|
|
FP_PACK_DP(&vd, DR);
|
|
if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
|
|
vd = 0;
|
|
sw64_write_fp_reg(fd, vd);
|
|
|
|
done:
|
|
DEBUG_INFO("vd = %#lx\n", vd);
|
|
if (_fex) {
|
|
/* Record exceptions in software control word. */
|
|
swcr |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr |= ieee_swcr_to_fpcr(swcr);
|
|
wrfpcr(fpcr); /** wrfpcr will destroy vector register! */
|
|
if (func == FNC_FMAS || func == FNC_FMSS || func == FNC_FNMAS || func == FNC_FNMSS)
|
|
sw64_write_fp_reg_s(fd, vd);
|
|
if (func == FNC_FMAD || func == FNC_FMSD || func == FNC_FNMAD || func == FNC_FNMSD)
|
|
sw64_write_fp_reg(fd, vd);
|
|
|
|
/* Do we generate a signal? */
|
|
_fex = _fex & swcr & IEEE_TRAP_ENABLE_MASK;
|
|
si_code = 0;
|
|
if (_fex) {
|
|
if (_fex & IEEE_TRAP_ENABLE_DNO)
|
|
si_code = FPE_FLTUND;
|
|
if (_fex & IEEE_TRAP_ENABLE_INE)
|
|
si_code = FPE_FLTRES;
|
|
if (_fex & IEEE_TRAP_ENABLE_UNF)
|
|
si_code = FPE_FLTUND;
|
|
if (_fex & IEEE_TRAP_ENABLE_OVF)
|
|
si_code = FPE_FLTOVF;
|
|
if (_fex & IEEE_TRAP_ENABLE_DZE)
|
|
si_code = FPE_FLTDIV;
|
|
if (_fex & IEEE_TRAP_ENABLE_INV)
|
|
si_code = FPE_FLTINV;
|
|
}
|
|
|
|
return si_code;
|
|
}
|
|
|
|
/*
|
|
* We used to write the destination register here, but DEC FORTRAN
|
|
* requires that the result *always* be written... so we do the write
|
|
* immediately after the operations above.
|
|
*/
|
|
|
|
return 0;
|
|
|
|
bad_insn:
|
|
printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
|
|
return -1;
|
|
}
|
|
|
|
|
|
long simd_mul_add_fp_emul_s(unsigned long pc)
|
|
{
|
|
FP_DECL_EX;
|
|
FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SC); FP_DECL_S(S_TMP); FP_DECL_S(SR);
|
|
FP_DECL_S(S_ZERO);
|
|
FP_DECL_S(S_TMP2);
|
|
|
|
unsigned long fa, fb, fc, fd, func, mode, src;
|
|
unsigned long res, va, vb, vc, vd, vtmp, vtmp2, swcr, fpcr;
|
|
__u32 insn;
|
|
long si_code;
|
|
unsigned long vzero = 0;
|
|
|
|
get_user(insn, (__u32 *)pc);
|
|
fd = (insn >> 0) & 0x1f; /* destination register */
|
|
fc = (insn >> 5) & 0x1f;
|
|
fb = (insn >> 16) & 0x1f;
|
|
fa = (insn >> 21) & 0x1f;
|
|
func = (insn >> 10) & 0x3f;
|
|
|
|
fpcr = rdfpcr();
|
|
mode = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
|
|
|
|
unsigned long va_p0, va_p1, va_p2, va_p3;
|
|
unsigned long vb_p0, vb_p1, vb_p2, vb_p3;
|
|
unsigned long vc_p0, vc_p1, vc_p2, vc_p3;
|
|
unsigned long vd_p0, vd_p1, vd_p2, vd_p3;
|
|
unsigned long fex_p0, fex_p1, fex_p2, fex_p3;
|
|
|
|
int working_part;
|
|
|
|
DEBUG_INFO("======== Entering SIMD S-floating mul-add emulation =======\n");
|
|
swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
|
|
DEBUG_INFO("software swcr = %#lx\n", swcr);
|
|
DEBUG_INFO("hardware fpcr = %#lx\n", fpcr);
|
|
read_fp_reg_s(fa, &va_p0, &va_p1, &va_p2, &va_p3);
|
|
read_fp_reg_s(fb, &vb_p0, &vb_p1, &vb_p2, &vb_p3);
|
|
read_fp_reg_s(fc, &vc_p0, &vc_p1, &vc_p2, &vc_p3);
|
|
read_fp_reg_s(fd, &vd_p0, &vd_p1, &vd_p2, &vd_p3);
|
|
DEBUG_INFO("va_p0:%#lx, va_p1:%#lx, va_p2:%#lx, va_p3:%#lx\n", va_p0, va_p1, va_p2, va_p3);
|
|
DEBUG_INFO("vb_p0:%#lx, vb_p1:%#lx, vb_p2:%#lx, vb_p3:%#lx\n", vb_p0, vb_p1, vb_p2, vb_p3);
|
|
DEBUG_INFO("vc_p0:%#lx, vc_p1:%#lx, vc_p2:%#lx, vc_p3:%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
|
|
DEBUG_INFO("vd_p0:%#lx, vd_p1:%#lx, vd_p2:%#lx, vd_p3:%#lx\n", vd_p0, vd_p1, vd_p2, vd_p3);
|
|
working_part = WORKING_PART_0;
|
|
simd_working:
|
|
_fex = 0;
|
|
switch (working_part) {
|
|
case WORKING_PART_0:
|
|
DEBUG_INFO("WORKING_PART_0\n");
|
|
va = va_p0;
|
|
vb = vb_p0;
|
|
vc = vc_p0;
|
|
DEBUG_INFO("FPCR_STATUS_MASK0 : %#lx, fpcr :%#lx\n", FPCR_STATUS_MASK0, fpcr);
|
|
if ((fpcr & FPCR_STATUS_MASK0) == 0) {
|
|
SW64_FP_NORMAL_S(SA, &va);
|
|
SW64_FP_NORMAL_S(SB, &vb);
|
|
SW64_FP_NORMAL_S(SC, &vc);
|
|
if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL) && (SC_c == SW64_FP_NORMAL))
|
|
goto next_working_s;
|
|
else
|
|
DEBUG_INFO("LOW: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
|
|
} else {
|
|
SW64_FP_NAN_S(SA, &va);
|
|
SW64_FP_NAN_S(SB, &vb);
|
|
if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
|
|
goto next_working_s;
|
|
}
|
|
break;
|
|
case WORKING_PART_1:
|
|
DEBUG_INFO("WORKING_PART_1\n");
|
|
va = va_p1;
|
|
vb = vb_p1;
|
|
vc = vc_p1;
|
|
DEBUG_INFO("FPCR_STATUS_MASK1 : %#lx, fpcr :%#lx\n", FPCR_STATUS_MASK0, fpcr);
|
|
if ((fpcr & FPCR_STATUS_MASK1) == 0) {
|
|
SW64_FP_NORMAL_S(SA, &va);
|
|
SW64_FP_NORMAL_S(SB, &vb);
|
|
SW64_FP_NORMAL_S(SC, &vc);
|
|
if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL) && (SC_c == SW64_FP_NORMAL))
|
|
goto next_working_s;
|
|
else
|
|
DEBUG_INFO("HIGH: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
|
|
} else {
|
|
SW64_FP_NAN_S(SA, &va);
|
|
SW64_FP_NAN_S(SB, &vb);
|
|
if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
|
|
goto next_working_s;
|
|
}
|
|
break;
|
|
case WORKING_PART_2:
|
|
DEBUG_INFO("WORKING_PART_2\n");
|
|
va = va_p2;
|
|
vb = vb_p2;
|
|
vc = vc_p2;
|
|
if ((fpcr & FPCR_STATUS_MASK2) == 0) {
|
|
SW64_FP_NORMAL_S(SA, &va);
|
|
SW64_FP_NORMAL_S(SB, &vb);
|
|
SW64_FP_NORMAL_S(SC, &vc);
|
|
if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL) && (SC_c == SW64_FP_NORMAL))
|
|
goto next_working_s;
|
|
else
|
|
DEBUG_INFO("HIGH: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
|
|
} else {
|
|
SW64_FP_NAN_S(SA, &va);
|
|
SW64_FP_NAN_S(SB, &vb);
|
|
if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
|
|
goto next_working_s;
|
|
}
|
|
break;
|
|
case WORKING_PART_3:
|
|
DEBUG_INFO("WORKING_PART_3\n");
|
|
va = va_p3;
|
|
vb = vb_p3;
|
|
vc = vc_p3;
|
|
if ((fpcr & FPCR_STATUS_MASK3) == 0) {
|
|
SW64_FP_NORMAL_S(SA, &va);
|
|
SW64_FP_NORMAL_S(SB, &vb);
|
|
SW64_FP_NORMAL_S(SC, &vc);
|
|
if ((SA_c == SW64_FP_NORMAL) && (SB_c == SW64_FP_NORMAL) && (SC_c == SW64_FP_NORMAL))
|
|
goto next_working_s;
|
|
else
|
|
DEBUG_INFO("HIGH: SA_c = %#lx, SB_c = %#lx\n", SA_c, SB_c);
|
|
} else {
|
|
SW64_FP_NAN_S(SA, &va);
|
|
SW64_FP_NAN_S(SB, &vb);
|
|
if ((SA_c == SW64_FP_NAN) && (SB_c == SW64_FP_NAN))
|
|
goto next_working_s;
|
|
}
|
|
break;
|
|
}
|
|
|
|
FP_UNPACK_SP(SA, &va);
|
|
FP_UNPACK_SP(SB, &vb);
|
|
FP_UNPACK_SP(SC, &vc);
|
|
FP_UNPACK_SP(S_ZERO, &vzero);
|
|
switch (func) {
|
|
case FNC_FMAS:
|
|
FP_MUL_S(S_TMP, SA, SB);
|
|
FP_ADD_S(SR, S_TMP, SC);
|
|
goto pack_s;
|
|
case FNC_FMSS:
|
|
FP_MUL_S(S_TMP, SA, SB);
|
|
FP_SUB_S(SR, S_TMP, SC);
|
|
goto pack_s;
|
|
case FNC_FNMAS: /* (-va*vb) + vc */
|
|
va = s_negative_value(va);
|
|
FP_UNPACK_SP(SA, &va);
|
|
FP_MUL_S(S_TMP, SA, SB);
|
|
FP_ADD_S(SR, S_TMP, SC);
|
|
goto pack_s;
|
|
case FNC_FNMSS: /* (-va*vb) - vc */
|
|
va = s_negative_value(va);
|
|
FP_UNPACK_SP(SA, &va);
|
|
FP_MUL_S(S_TMP, SA, SB);
|
|
FP_SUB_S(SR, S_TMP, SC);
|
|
goto pack_s;
|
|
default:
|
|
goto bad_insn;
|
|
}
|
|
|
|
pack_s:
|
|
FP_PACK_SP(&vd, SR);
|
|
if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
|
|
vd = 0;
|
|
DEBUG_INFO("SW64 SIMD Emulation S-floating _fex=%#lx, va=%#lx, vb=%#lx, vc=%#lx\n", _fex, va, vb, vc);
|
|
DEBUG_INFO("SW64 SIMD Emulation S-floating mode=%#lx,func=%#lx, swcr=%#lx\n", mode, func, swcr);
|
|
next_working_s:
|
|
switch (working_part) {
|
|
case WORKING_PART_0:
|
|
working_part = WORKING_PART_1;
|
|
vd_p0 = vd;
|
|
fex_p0 = _fex;
|
|
goto simd_working;
|
|
case WORKING_PART_1:
|
|
working_part = WORKING_PART_2;
|
|
vd_p1 = vd;
|
|
fex_p1 = _fex;
|
|
goto simd_working;
|
|
case WORKING_PART_2:
|
|
working_part = WORKING_PART_3;
|
|
vd_p2 = vd;
|
|
fex_p2 = _fex;
|
|
goto simd_working;
|
|
case WORKING_PART_3:
|
|
vd_p3 = vd;
|
|
fex_p3 = _fex;
|
|
goto done;
|
|
}
|
|
done:
|
|
if (fex_p0 || fex_p1 || fex_p2 || fex_p3) {
|
|
unsigned long fpcr_p0, fpcr_p1, fpcr_p2, fpcr_p3;
|
|
unsigned long swcr_p0, swcr_p1, swcr_p2, swcr_p3;
|
|
|
|
fpcr_p0 = fpcr_p1 = fpcr_p2 = fpcr_p3 = 0;
|
|
swcr_p0 = swcr_p1 = swcr_p2 = swcr_p3 = swcr;
|
|
/* manage fpcr_p0 */
|
|
if (fex_p0) {
|
|
swcr_p0 |= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p0 = fpcr;
|
|
fpcr_p0 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p0 |= ieee_swcr_to_fpcr(swcr_p0);
|
|
}
|
|
|
|
if (fex_p1) {
|
|
swcr_p1 |= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p1 = fpcr;
|
|
fpcr_p1 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p1 |= ieee_swcr_to_fpcr(swcr_p1);
|
|
}
|
|
|
|
if (fex_p2) {
|
|
swcr_p2 |= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p2 = fpcr;
|
|
fpcr_p2 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p2 |= ieee_swcr_to_fpcr(swcr_p2);
|
|
}
|
|
|
|
if (fex_p3) {
|
|
swcr_p3 |= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p3 = fpcr;
|
|
fpcr_p3 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p3 |= ieee_swcr_to_fpcr(swcr_p3);
|
|
}
|
|
|
|
fpcr = fpcr_p0 | fpcr_p1 | fpcr_p2 | fpcr_p3;
|
|
DEBUG_INFO("fex_p0 = %#lx\n", fex_p0);
|
|
DEBUG_INFO("fex_p1 = %#lx\n", fex_p1);
|
|
DEBUG_INFO("fex_p2 = %#lx\n", fex_p2);
|
|
DEBUG_INFO("fex_p3 = %#lx\n", fex_p3);
|
|
DEBUG_INFO("SIMD emulation almost finished.before write fpcr = %#lx\n", fpcr);
|
|
wrfpcr(fpcr);
|
|
DEBUG_INFO("Before write fp: vp_p0=%#lx, vc_p1=%#lx, vc_p2=%#lx, vc_p3=%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
|
|
write_fp_reg_s(fd, vd_p0, vd_p1, vd_p2, vd_p3); /* write to fd */
|
|
|
|
/* Do we generate a signal? */
|
|
_fex = (fex_p0 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p1 & swcr & IEEE_TRAP_ENABLE_MASK)
|
|
| (fex_p2 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p3 & swcr & IEEE_TRAP_ENABLE_MASK);
|
|
si_code = 0;
|
|
if (_fex) {
|
|
if (_fex & IEEE_TRAP_ENABLE_DNO)
|
|
si_code = FPE_FLTUND;
|
|
if (_fex & IEEE_TRAP_ENABLE_INE)
|
|
si_code = FPE_FLTRES;
|
|
if (_fex & IEEE_TRAP_ENABLE_UNF)
|
|
si_code = FPE_FLTUND;
|
|
if (_fex & IEEE_TRAP_ENABLE_OVF)
|
|
si_code = FPE_FLTOVF;
|
|
if (_fex & IEEE_TRAP_ENABLE_DZE)
|
|
si_code = FPE_FLTDIV;
|
|
if (_fex & IEEE_TRAP_ENABLE_INV)
|
|
si_code = FPE_FLTINV;
|
|
}
|
|
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
|
|
return si_code;
|
|
|
|
}
|
|
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
|
|
return 0;
|
|
|
|
bad_insn:
|
|
printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
|
|
return -1;
|
|
}
|
|
|
|
long simd_mul_add_fp_emul_d(unsigned long pc)
|
|
{
|
|
FP_DECL_EX;
|
|
FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DC); FP_DECL_D(D_TMP); FP_DECL_D(DR);
|
|
FP_DECL_D(D_ZERO);
|
|
FP_DECL_D(D_TMP2);
|
|
|
|
unsigned long fa, fb, fc, fd, func, mode, src;
|
|
unsigned long res, va, vb, vc, vd, vtmp, vtmp2, swcr, fpcr;
|
|
__u32 insn;
|
|
long si_code;
|
|
unsigned long vzero = 0;
|
|
|
|
get_user(insn, (__u32 *)pc);
|
|
fd = (insn >> 0) & 0x1f; /* destination register */
|
|
fc = (insn >> 5) & 0x1f;
|
|
fb = (insn >> 16) & 0x1f;
|
|
fa = (insn >> 21) & 0x1f;
|
|
func = (insn >> 10) & 0x3f;
|
|
|
|
fpcr = rdfpcr();
|
|
mode = (fpcr >> FPCR_DYN_SHIFT) & 0x3;
|
|
|
|
unsigned long va_p0, va_p1, va_p2, va_p3;
|
|
unsigned long vb_p0, vb_p1, vb_p2, vb_p3;
|
|
unsigned long vc_p0, vc_p1, vc_p2, vc_p3;
|
|
unsigned long vd_p0, vd_p1, vd_p2, vd_p3;
|
|
unsigned long fex_p0, fex_p1, fex_p2, fex_p3;
|
|
|
|
int working_part;
|
|
|
|
DEBUG_INFO("======== Entering SIMD D-floating mul-add emulation =======\n");
|
|
DEBUG_INFO("hardware fpcr = %#lx\n", fpcr);
|
|
swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
|
|
DEBUG_INFO("software swcr = %#lx\n", swcr);
|
|
read_fp_reg_d(fa, &va_p0, &va_p1, &va_p2, &va_p3);
|
|
read_fp_reg_d(fb, &vb_p0, &vb_p1, &vb_p2, &vb_p3);
|
|
read_fp_reg_d(fc, &vc_p0, &vc_p1, &vc_p2, &vc_p3);
|
|
read_fp_reg_d(fd, &vd_p0, &vd_p1, &vd_p2, &vd_p3);
|
|
DEBUG_INFO("va_p0:%#lx, va_p1:%#lx, va_p2:%#lx, va_p3:%#lx\n", va_p0, va_p1, va_p2, va_p3);
|
|
DEBUG_INFO("vb_p0:%#lx, vb_p1:%#lx, vb_p2:%#lx, vb_p3:%#lx\n", vb_p0, vb_p1, vb_p2, vb_p3);
|
|
DEBUG_INFO("vc_p0:%#lx, vc_p1:%#lx, vc_p2:%#lx, vc_p3:%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
|
|
DEBUG_INFO("vd_p0:%#lx, vd_p1:%#lx, vd_p2:%#lx, vd_p3:%#lx\n", vd_p0, vd_p1, vd_p2, vd_p3);
|
|
working_part = WORKING_PART_0;
|
|
simd_working:
|
|
_fex = 0;
|
|
switch (working_part) {
|
|
case WORKING_PART_0:
|
|
DEBUG_INFO("WORKING_PART_0\n");
|
|
va = va_p0;
|
|
vb = vb_p0;
|
|
vc = vc_p0;
|
|
vd = vd_p0;
|
|
if ((fpcr & FPCR_STATUS_MASK0) == 0) {
|
|
SW64_FP_NORMAL_D(DA, &va);
|
|
SW64_FP_NORMAL_D(DB, &vb);
|
|
SW64_FP_NORMAL_D(DC, &vc);
|
|
if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL) && (DC_c == SW64_FP_NORMAL))
|
|
goto next_working_s;
|
|
else
|
|
DEBUG_INFO("LOW: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
|
|
} else {
|
|
SW64_FP_NAN_D(DA, &va);
|
|
SW64_FP_NAN_D(DB, &vb);
|
|
SW64_FP_NAN_D(DC, &vc);
|
|
if ((DA_c == SW64_FP_NAN) && (DB_c == SW64_FP_NAN) && (DC_c == SW64_FP_NAN))
|
|
goto next_working_s;
|
|
}
|
|
break;
|
|
case WORKING_PART_1:
|
|
DEBUG_INFO("WORKING_PART_1\n");
|
|
va = va_p1;
|
|
vb = vb_p1;
|
|
vc = vc_p1;
|
|
vd = vd_p1;
|
|
if ((fpcr & FPCR_STATUS_MASK1) == 0) {
|
|
SW64_FP_NORMAL_D(DA, &va);
|
|
SW64_FP_NORMAL_D(DB, &vb);
|
|
SW64_FP_NORMAL_D(DC, &vc);
|
|
if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL) && (DC_c == SW64_FP_NORMAL))
|
|
goto next_working_s;
|
|
else
|
|
DEBUG_INFO("HIGH: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
|
|
} else {
|
|
SW64_FP_NAN_D(DA, &va);
|
|
SW64_FP_NAN_D(DB, &vb);
|
|
SW64_FP_NAN_D(DC, &vc);
|
|
if ((DA_c == SW64_FP_NAN) && (DB_c == SW64_FP_NAN) && (DC_c == SW64_FP_NAN))
|
|
goto next_working_s;
|
|
}
|
|
break;
|
|
case WORKING_PART_2:
|
|
DEBUG_INFO("WORKING_PART_2\n");
|
|
va = va_p2;
|
|
vb = vb_p2;
|
|
vc = vc_p2;
|
|
vd = vd_p2;
|
|
if ((fpcr & FPCR_STATUS_MASK2) == 0) {
|
|
SW64_FP_NORMAL_D(DA, &va);
|
|
SW64_FP_NORMAL_D(DB, &vb);
|
|
SW64_FP_NORMAL_D(DC, &vc);
|
|
if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL) && (DC_c == SW64_FP_NORMAL))
|
|
goto next_working_s;
|
|
else
|
|
DEBUG_INFO("HIGH: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
|
|
} else {
|
|
SW64_FP_NAN_D(DA, &va);
|
|
SW64_FP_NAN_D(DB, &vb);
|
|
SW64_FP_NAN_D(DC, &vc);
|
|
if ((DA_c == SW64_FP_NAN) && (DB_c == SW64_FP_NAN) && (DC_c == SW64_FP_NAN))
|
|
goto next_working_s;
|
|
}
|
|
break;
|
|
case WORKING_PART_3:
|
|
DEBUG_INFO("WORKING_PART_3\n");
|
|
va = va_p3;
|
|
vb = vb_p3;
|
|
vc = vc_p3;
|
|
vd = vd_p3;
|
|
if ((fpcr & FPCR_STATUS_MASK3) == 0) {
|
|
SW64_FP_NORMAL_D(DA, &va);
|
|
SW64_FP_NORMAL_D(DB, &vb);
|
|
SW64_FP_NORMAL_D(DC, &vc);
|
|
if ((DA_c == SW64_FP_NORMAL) && (DB_c == SW64_FP_NORMAL) && (DC_c == SW64_FP_NORMAL))
|
|
goto next_working_s;
|
|
else
|
|
DEBUG_INFO("HIGH: DA_c = %#lx, DB_c = %#lx\n", DA_c, DB_c);
|
|
} else {
|
|
SW64_FP_NAN_D(DA, &va);
|
|
SW64_FP_NAN_D(DB, &vb);
|
|
SW64_FP_NAN_D(DC, &vc);
|
|
if ((DA_c == SW64_FP_NAN) && (DB_c == SW64_FP_NAN) && (DC_c == SW64_FP_NAN))
|
|
goto next_working_s;
|
|
}
|
|
break;
|
|
}
|
|
|
|
FP_UNPACK_DP(DA, &va);
|
|
FP_UNPACK_DP(DB, &vb);
|
|
FP_UNPACK_DP(DC, &vc);
|
|
FP_UNPACK_DP(D_ZERO, &vzero);
|
|
|
|
switch (func) {
|
|
case FNC_FMAD:
|
|
FP_MUL_D(D_TMP, DA, DB);
|
|
FP_ADD_D(DR, D_TMP, DC);
|
|
goto pack_d;
|
|
case FNC_FMSD:
|
|
FP_MUL_D(D_TMP, DA, DB);
|
|
FP_SUB_D(DR, D_TMP, DC);
|
|
goto pack_d;
|
|
case FNC_FNMAD:
|
|
va = negative_value(va);
|
|
FP_UNPACK_DP(DA, &va);
|
|
FP_MUL_D(D_TMP, DA, DB);
|
|
FP_ADD_D(DR, D_TMP, DC);
|
|
goto pack_d;
|
|
case FNC_FNMSD:
|
|
va = negative_value(va);
|
|
FP_UNPACK_DP(DA, &va);
|
|
FP_MUL_D(D_TMP, DA, DB);
|
|
FP_SUB_D(DR, D_TMP, DC);
|
|
|
|
goto pack_d;
|
|
default:
|
|
goto bad_insn;
|
|
}
|
|
|
|
pack_d:
|
|
FP_PACK_DP(&vd, DR);
|
|
if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
|
|
vd = 0;
|
|
DEBUG_INFO("SW64 SIMD Emulation D-floating _fex=%#lx, va=%#lx, vb=%#lx, vc=%#lx\n", _fex, va, vb, vc);
|
|
DEBUG_INFO("SW64 SIMD Emulation D-floating mode=%#lx,func=%#lx, swcr=%#lx\n", mode, func, swcr);
|
|
next_working_s:
|
|
switch (working_part) {
|
|
case WORKING_PART_0:
|
|
working_part = WORKING_PART_1;
|
|
vd_p0 = vd;
|
|
fex_p0 = _fex;
|
|
goto simd_working;
|
|
case WORKING_PART_1:
|
|
working_part = WORKING_PART_2;
|
|
vd_p1 = vd;
|
|
fex_p1 = _fex;
|
|
goto simd_working;
|
|
case WORKING_PART_2:
|
|
working_part = WORKING_PART_3;
|
|
vd_p2 = vd;
|
|
fex_p2 = _fex;
|
|
goto simd_working;
|
|
case WORKING_PART_3:
|
|
vd_p3 = vd;
|
|
fex_p3 = _fex;
|
|
goto done;
|
|
}
|
|
done:
|
|
if (fex_p0 || fex_p1 || fex_p2 || fex_p3) {
|
|
unsigned long fpcr_p0, fpcr_p1, fpcr_p2, fpcr_p3;
|
|
unsigned long swcr_p0, swcr_p1, swcr_p2, swcr_p3;
|
|
|
|
fpcr_p0 = fpcr_p1 = fpcr_p2 = fpcr_p3 = 0;
|
|
swcr_p0 = swcr_p1 = swcr_p2 = swcr_p3 = swcr;
|
|
/* manage fpcr_p0 */
|
|
if (fex_p0) {
|
|
swcr_p0 |= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p0 << IEEE_STATUS0_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p0 = fpcr;
|
|
fpcr_p0 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p0 |= ieee_swcr_to_fpcr(swcr_p0);
|
|
}
|
|
|
|
if (fex_p1) {
|
|
swcr_p1 |= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p1 << IEEE_STATUS1_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p1 = fpcr;
|
|
fpcr_p1 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p1 |= ieee_swcr_to_fpcr(swcr_p1);
|
|
}
|
|
|
|
if (fex_p2) {
|
|
swcr_p2 |= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p2 << IEEE_STATUS2_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p2 = fpcr;
|
|
fpcr_p2 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p2 |= ieee_swcr_to_fpcr(swcr_p2);
|
|
}
|
|
|
|
if (fex_p3) {
|
|
swcr_p3 |= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
|
|
current_thread_info()->ieee_state
|
|
|= (fex_p3 << IEEE_STATUS3_TO_EXCSUM_SHIFT);
|
|
|
|
/* Update hardware control register. */
|
|
fpcr_p3 = fpcr;
|
|
fpcr_p3 &= (~FPCR_MASK | FPCR_DYN_MASK);
|
|
fpcr_p3 |= ieee_swcr_to_fpcr(swcr_p3);
|
|
}
|
|
|
|
fpcr = fpcr_p0 | fpcr_p1 | fpcr_p2 | fpcr_p3;
|
|
DEBUG_INFO("fex_p0 = %#lx\n", fex_p0);
|
|
DEBUG_INFO("fex_p1 = %#lx\n", fex_p1);
|
|
DEBUG_INFO("fex_p2 = %#lx\n", fex_p2);
|
|
DEBUG_INFO("fex_p3 = %#lx\n", fex_p3);
|
|
DEBUG_INFO("SIMD emulation almost finished.before write fpcr = %#lx\n", fpcr);
|
|
wrfpcr(fpcr);
|
|
|
|
DEBUG_INFO("Before write fp: vp_p0=%#lx, vc_p1=%#lx, vc_p2=%#lx, vc_p3=%#lx\n", vc_p0, vc_p1, vc_p2, vc_p3);
|
|
write_fp_reg_d(fd, vd_p0, vd_p1, vd_p2, vd_p3); /* write to fd */
|
|
|
|
/* Do we generate a signal? */
|
|
_fex = (fex_p0 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p1 & swcr & IEEE_TRAP_ENABLE_MASK)
|
|
| (fex_p2 & swcr & IEEE_TRAP_ENABLE_MASK) | (fex_p3 & swcr & IEEE_TRAP_ENABLE_MASK);
|
|
si_code = 0;
|
|
if (_fex) {
|
|
if (_fex & IEEE_TRAP_ENABLE_DNO)
|
|
si_code = FPE_FLTUND;
|
|
if (_fex & IEEE_TRAP_ENABLE_INE)
|
|
si_code = FPE_FLTRES;
|
|
if (_fex & IEEE_TRAP_ENABLE_UNF)
|
|
si_code = FPE_FLTUND;
|
|
if (_fex & IEEE_TRAP_ENABLE_OVF)
|
|
si_code = FPE_FLTOVF;
|
|
if (_fex & IEEE_TRAP_ENABLE_DZE)
|
|
si_code = FPE_FLTDIV;
|
|
if (_fex & IEEE_TRAP_ENABLE_INV)
|
|
si_code = FPE_FLTINV;
|
|
}
|
|
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
|
|
return si_code;
|
|
}
|
|
DEBUG_INFO("SIMD finished.. si_code:%#lx\n", si_code);
|
|
return 0;
|
|
|
|
bad_insn:
|
|
printk(KERN_ERR "%s: Invalid FP insn %#x at %#lx\n", __func__, insn, pc);
|
|
return -1;
|
|
}
|
|
|
|
void read_fp_reg_s(unsigned long reg, unsigned long *val_p0,
|
|
unsigned long *val_p1, unsigned long *val_p2, unsigned long *val_p3)
|
|
{
|
|
unsigned long fp[2];
|
|
|
|
sw64_read_simd_fp_m_s(reg, fp);
|
|
*val_p0 = fp[0] & 0xffffffffUL;
|
|
*val_p1 = (fp[0] >> 32) & 0xffffffffUL;
|
|
*val_p2 = fp[1] & 0xffffffffUL;
|
|
*val_p3 = (fp[1] >> 32) & 0xffffffffUL;
|
|
}
|
|
|
|
void read_fp_reg_d(unsigned long reg, unsigned long *val_p0,
|
|
unsigned long *val_p1, unsigned long *val_p2, unsigned long *val_p3)
|
|
{
|
|
unsigned long fp[4];
|
|
|
|
sw64_read_simd_fp_m_d(reg, fp);
|
|
*val_p0 = fp[0];
|
|
*val_p1 = fp[1];
|
|
*val_p2 = fp[2];
|
|
*val_p3 = fp[3];
|
|
}
|
|
|
|
void write_fp_reg_s(unsigned long reg, unsigned long val_p0,
|
|
unsigned long val_p1, unsigned long val_p2, unsigned long val_p3)
|
|
{
|
|
unsigned long fp[2];
|
|
|
|
fp[0] = ((val_p1 & 0xffffffffUL) << 32) | (val_p0 & 0xffffffffUL);
|
|
fp[1] = ((val_p3 & 0xffffffffUL) << 32) | (val_p2 & 0xffffffffUL);
|
|
sw64_write_simd_fp_reg_s(reg, fp[0], fp[1]);
|
|
}
|
|
|
|
void write_fp_reg_d(unsigned long reg, unsigned long val_p0,
|
|
unsigned long val_p1, unsigned long val_p2, unsigned long val_p3)
|
|
{
|
|
sw64_write_simd_fp_reg_d(reg, val_p0, val_p1, val_p2, val_p3);
|
|
}
|