diff --git a/src/pkg/big/arith.go b/src/pkg/big/arith.go index 8a565e7901..3dcbe637f5 100644 --- a/src/pkg/big/arith.go +++ b/src/pkg/big/arith.go @@ -298,20 +298,18 @@ var ( ) -// UseAsm returns true if the assembly routines are enabled. -func useAsm() bool - func init() { - if useAsm() { - // Install assembly routines. - addVV = addVV_s; - subVV = subVV_s; - addVW = addVW_s; - subVW = subVW_s; - mulAddVWW = mulAddVWW_s; - addMulVVW = addMulVVW_s; - divWVW = divWVW_s; - } + // Uncomment to use generic routines. + //return; + + // Install assembly routines. + addVV = addVV_s; + subVV = subVV_s; + addVW = addVW_s; + subVW = subVW_s; + mulAddVWW = mulAddVWW_s; + addMulVVW = addMulVVW_s; + divWVW = divWVW_s; } diff --git a/src/pkg/big/arith_386.s b/src/pkg/big/arith_386.s index 885b152737..2f89182c1c 100644 --- a/src/pkg/big/arith_386.s +++ b/src/pkg/big/arith_386.s @@ -5,17 +5,171 @@ // This file provides fast assembly versions for the elementary // arithmetic operations on vectors implemented in arith.go. -TEXT big·useAsm(SB),7,$0 - MOVB $0, 4(SP) // assembly routines disabled - RET - - -// TODO(gri) Implement these routines and enable them. +// func addVV_s(z, x, y *Word, n int) (c Word) TEXT big·addVV_s(SB),7,$0 -TEXT big·subVV_s(SB),7,$0 -TEXT big·addVW_s(SB),7,$0 -TEXT big·subVW_s(SB),7,$0 -TEXT big·mulAddVWW_s(SB),7,$0 -TEXT big·addMulVVW_s(SB),7,$0 -TEXT big·divWVW_s(SB),7,$0 + MOVL z+0(FP), DI + MOVL x+4(FP), SI + MOVL y+8(FP), CX + MOVL n+12(FP), BP + MOVL $0, BX // i = 0 + MOVL $0, DX // c = 0 + JMP E1 + +L1: MOVL (SI)(BX*4), AX + RCRL $1, DX + ADCL (CX)(BX*4), AX + RCLL $1, DX + MOVL AX, (DI)(BX*4) + ADDL $1, BX // i++ + +E1: CMPL BX, BP // i < n + JL L1 + + MOVL DX, c+16(FP) + RET + + +// func subVV_s(z, x, y *Word, n int) (c Word) +// (same as addVV_s except for SBBL instead of ADCL and label names) +TEXT big·subVV_s(SB),7,$0 + MOVL z+0(FP), DI + MOVL x+4(FP), SI + MOVL y+8(FP), CX + MOVL n+12(FP), BP + MOVL $0, BX // i = 0 + MOVL $0, DX // c = 0 + JMP E2 + +L2: MOVL (SI)(BX*4), AX + RCRL $1, DX + SBBL (CX)(BX*4), AX + RCLL $1, DX + MOVL AX, (DI)(BX*4) + ADDL $1, BX // i++ + +E2: CMPL BX, BP // i < n + JL L2 + + MOVL DX, c+16(FP) + RET + + +// func addVW_s(z, x *Word, y Word, n int) (c Word) +TEXT big·addVW_s(SB),7,$0 + MOVL z+0(FP), DI + MOVL x+4(FP), SI + MOVL y+8(FP), AX // c = y + MOVL n+12(FP), BP + MOVL $0, BX // i = 0 + JMP E3 + +L3: ADDL (SI)(BX*4), AX + MOVL AX, (DI)(BX*4) + RCLL $1, AX + ANDL $1, AX + ADDL $1, BX // i++ + +E3: CMPL BX, BP // i < n + JL L3 + + MOVL AX, c+16(FP) + RET + + +// func subVW_s(z, x *Word, y Word, n int) (c Word) +TEXT big·subVW_s(SB),7,$0 + MOVL z+0(FP), DI + MOVL x+4(FP), SI + MOVL y+8(FP), AX // c = y + MOVL n+12(FP), BP + MOVL $0, BX // i = 0 + JMP E4 + +L4: MOVL (SI)(BX*4), DX // TODO(gri) is there a reverse SUBL? + SUBL AX, DX + MOVL DX, (DI)(BX*4) + RCLL $1, AX + ANDL $1, AX + ADDL $1, BX // i++ + +E4: CMPL BX, BP // i < n + JL L4 + + MOVL AX, c+16(FP) + RET + + +// func mulAddVWW_s(z, x *Word, y, r Word, n int) (c Word) +TEXT big·mulAddVWW_s(SB),7,$0 + MOVL z+0(FP), DI + MOVL x+4(FP), SI + MOVL y+8(FP), BP + MOVL r+12(FP), CX // c = r + MOVL n+16(FP), BX + LEAL (SI)(BX*4), SI + LEAL (DI)(BX*4), DI + NEGL BX // i = -n + JMP E5 + +L5: MOVL (SI)(BX*4), AX + MULL BP + ADDL CX, AX + ADCL $0, DX + MOVL AX, (DI)(BX*4) + MOVL DX, CX + ADDL $1, BX // i++ + +E5: CMPL BX, $0 // i < 0 + JL L5 + + MOVL CX, c+20(FP) + RET + + +// func addMulVVW_s(z, x *Word, y Word, n int) (c Word) +TEXT big·addMulVVW_s(SB),7,$0 + MOVL z+0(FP), DI + MOVL x+4(FP), SI + MOVL y+8(FP), BP + MOVL n+12(FP), BX + LEAL (SI)(BX*4), SI + LEAL (DI)(BX*4), DI + NEGL BX // i = -n + MOVL $0, CX // c = 0 + JMP E6 + +L6: MOVL (SI)(BX*4), AX + MULL BP + ADDL (DI)(BX*4), AX + ADCL $0, DX + ADDL CX, AX + ADCL $0, DX + MOVL AX, (DI)(BX*4) + MOVL DX, CX + ADDL $1, BX // i++ + +E6: CMPL BX, $0 // i < 0 + JL L6 + + MOVL CX, c+16(FP) + RET + + +// divWVW_s(z* Word, xn Word, x *Word, y Word, n int) (r Word) +TEXT big·divWVW_s(SB),7,$0 + MOVL z+0(FP), DI + MOVL xn+4(FP), DX // r = xn + MOVL x+8(FP), SI + MOVL y+12(FP), CX + MOVL n+16(FP), BX // i = n + JMP E7 + +L7: MOVL (SI)(BX*4), AX + DIVL CX + MOVL AX, (DI)(BX*4) + +E7: SUBL $1, BX // i-- + JGE L7 // i >= 0 + + MOVL DX, r+20(FP) RET diff --git a/src/pkg/big/arith_amd64.s b/src/pkg/big/arith_amd64.s index 4733a7c3aa..f9b070b74d 100644 --- a/src/pkg/big/arith_amd64.s +++ b/src/pkg/big/arith_amd64.s @@ -5,11 +5,6 @@ // This file provides fast assembly versions for the elementary // arithmetic operations on vectors implemented in arith.go. -TEXT big·useAsm(SB),7,$0 - MOVB $1, 8(SP) // assembly routines enabled - RET - - // TODO(gri) - experiment with unrolled loops for faster execution // func addVV_s(z, x, y *Word, n int) (c Word) diff --git a/src/pkg/big/arith_arm.s b/src/pkg/big/arith_arm.s index 885b152737..8bb1e9c282 100644 --- a/src/pkg/big/arith_arm.s +++ b/src/pkg/big/arith_arm.s @@ -5,17 +5,25 @@ // This file provides fast assembly versions for the elementary // arithmetic operations on vectors implemented in arith.go. -TEXT big·useAsm(SB),7,$0 - MOVB $0, 4(SP) // assembly routines disabled - RET - - -// TODO(gri) Implement these routines and enable them. +// TODO(gri) Implement these routines. TEXT big·addVV_s(SB),7,$0 + JMP big·addVV_g(SB) + TEXT big·subVV_s(SB),7,$0 + JMP big·subVV_g(SB) + TEXT big·addVW_s(SB),7,$0 + JMP big·addVW_g(SB) + TEXT big·subVW_s(SB),7,$0 + JMP big·subVW_g(SB) + TEXT big·mulAddVWW_s(SB),7,$0 + JMP big·mulAddVWW_g(SB) + TEXT big·addMulVVW_s(SB),7,$0 + JMP big·addMulVVW_g(SB) + TEXT big·divWVW_s(SB),7,$0 - RET + JMP big·divWVW_g(SB) +