/* Sparc64 gmp-mparam.h -- Compiler/machine parameter header file. Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2006 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define BITS_PER_MP_LIMB 64 #define BYTES_PER_MP_LIMB 8 /* Tell the toom3 multiply implementation to call low-level mpn functions instead of open-coding operations in C. */ #ifndef USE_MORE_MPN #define USE_MORE_MPN 1 #endif #define MUL_KARATSUBA_THRESHOLD 28 #define MUL_TOOM3_THRESHOLD 93 #define SQR_BASECASE_THRESHOLD 11 #define SQR_KARATSUBA_THRESHOLD 70 #define SQR_TOOM3_THRESHOLD 99 #define MULLOW_BASECASE_THRESHOLD 24 #define MULLOW_DC_THRESHOLD 28 #define MULLOW_MUL_N_THRESHOLD 123 #define DIV_SB_PREINV_THRESHOLD 0 /* always */ #define DIV_DC_THRESHOLD 22 #define POWM_THRESHOLD 85 #define GCD_ACCEL_THRESHOLD 3 #define GCDEXT_THRESHOLD 20 #define JACOBI_BASE_METHOD 2 #define DIVREM_1_NORM_THRESHOLD 3 #define DIVREM_1_UNNORM_THRESHOLD 3 #define MOD_1_NORM_THRESHOLD 3 #define MOD_1_UNNORM_THRESHOLD 3 #define USE_PREINV_DIVREM_1 1 #define USE_PREINV_MOD_1 1 #define DIVREM_2_THRESHOLD 0 /* always */ #define DIVEXACT_1_THRESHOLD 0 /* always */ #define MODEXACT_1_ODD_THRESHOLD 0 /* always */ #define GET_STR_DC_THRESHOLD 14 #define GET_STR_PRECOMPUTE_THRESHOLD 20 #define SET_STR_THRESHOLD 2997 #define MUL_FFT_TABLE { 336, 736, 1728, 3328, 7168, 20480, 49152, 327680, 0 } #define MUL_FFT_MODF_THRESHOLD 280 #define MUL_FFT_THRESHOLD 1920 #define SQR_FFT_TABLE { 368, 800, 1856, 3328, 7168, 20480, 49152, 196608, 0 } #define SQR_FFT_MODF_THRESHOLD 280 #define SQR_FFT_THRESHOLD 1920 dnl SPARC v9 mpn_lshift dnl Copyright 1996, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. dnl The GNU MP Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published dnl by the Free Software Foundation; either version 3 of the License, or (at dnl your option) any later version. dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl You should have received a copy of the GNU Lesser General Public License dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb C UltraSPARC 1&2: 2 C UltraSPARC 3: 3.25 C INPUT PARAMETERS define(`rp',`%i0') define(`up',`%i1') define(`n',`%i2') define(`cnt',`%i3') define(`u0',`%l0') define(`u1',`%l2') define(`u2',`%l4') define(`u3',`%l6') define(`tnc',`%i4') define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe define(`fmnop',`fmuld %f0,%f0,%f4') dnl A quasi nop running in the FM pipe ASM_START() REGISTER(%g2,#scratch) REGISTER(%g3,#scratch) PROLOGUE(mpn_lshift) save %sp,-160,%sp sllx n,3,%g1 sub %g0,cnt,tnc C negate shift count add up,%g1,up C make %o1 point at end of src add rp,%g1,rp C make %o0 point at end of res ldx [up-8],u3 C load first limb subcc n,5,n srlx u3,tnc,%i5 C compute function result sllx u3,cnt,%g3 bl,pn %icc,.Lend1234 fanop subcc n,4,n ldx [up-16],u0 ldx [up-24],u1 add up,-32,up ldx [up-0],u2 ldx [up-8],u3 srlx u0,tnc,%g2 bl,pn %icc,.Lend5678 fanop b,a .Loop .align 16 .Loop: sllx u0,cnt,%g1 or %g3,%g2,%g3 ldx [up-16],u0 fanop C -- srlx u1,tnc,%g2 subcc n,4,n stx %g3,[rp-8] fanop C -- sllx u1,cnt,%g3 or %g1,%g2,%g1 ldx [up-24],u1 fanop C -- srlx u2,tnc,%g2 stx %g1,[rp-16] add up,-32,up fanop C -- sllx u2,cnt,%g1 or %g3,%g2,%g3 ldx [up-0],u2 fanop C -- srlx u3,tnc,%g2 stx %g3,[rp-24] add rp,-32,rp fanop C -- sllx u3,cnt,%g3 or %g1,%g2,%g1 ldx [up-8],u3 fanop C -- srlx u0,tnc,%g2 stx %g1,[rp-0] bge,pt %icc,.Loop fanop C -- .Lend5678: sllx u0,cnt,%g1 or %g3,%g2,%g3 srlx u1,tnc,%g2 stx %g3,[rp-8] sllx u1,cnt,%g3 or %g1,%g2,%g1 srlx u2,tnc,%g2 stx %g1,[rp-16] sllx u2,cnt,%g1 or %g3,%g2,%g3 srlx u3,tnc,%g2 stx %g3,[rp-24] add rp,-32,rp sllx u3,cnt,%g3 C carry... or %g1,%g2,%g1 stx %g1,[rp-0] .Lend1234: addcc n,4,n bz,pn %icc,.Lret fanop .Loop0: add rp,-8,rp subcc n,1,n ldx [up-16],u3 add up,-8,up srlx u3,tnc,%g2 or %g3,%g2,%g3 stx %g3,[rp] sllx u3,cnt,%g3 bnz,pt %icc,.Loop0 fanop .Lret: stx %g3,[rp-8] mov %i5,%i0 ret restore EPILOGUE(mpn_lshift) /* UltraSPARC 64 mpn_mod_1 -- mpn by limb remainder. Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2003 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "gmp.h" #include "gmp-impl.h" #include "longlong.h" #include "mpn/sparc64/sparc64.h" /* 64-bit divisor 32-bit divisor cycles/limb cycles/limb (approx) (approx) Ultrasparc 2i: 160 120 */ /* 32-bit divisors are treated in special case code. This requires 4 mulx per limb instead of 8 in the general case. For big endian systems we need HALF_ENDIAN_ADJ included in the src[i] addressing, to get the two halves of each limb read in the correct order. This is kept in an adj variable. Doing that measures about 6 c/l faster than just writing HALF_ENDIAN_ADJ(i) in the loop. The latter shouldn't be 6 cycles worth of work, but perhaps it doesn't schedule well (on gcc 3.2.1 at least). A simple udivx/umulx loop for the 32-bit case was attempted for small sizes, but at size==2 it was only about the same speed and at size==3 was slower. */ mp_limb_t mpn_mod_1 (mp_srcptr src_limbptr, mp_size_t size_limbs, mp_limb_t d_limb) { int norm, norm_rshift; mp_limb_t src_high_limb; mp_size_t i; ASSERT (size_limbs >= 0); ASSERT (d_limb != 0); if (UNLIKELY (size_limbs == 0)) return 0; src_high_limb = src_limbptr[size_limbs-1]; /* udivx is