April 1992/Multiple Copy Math Functions/Listing 2

Listing 2 (sin_4.c)

typedef struct {
   double X1, X2, X3, X4;
}      ARG_D_4;                 /* vector 4*/
#include "float.h"
ARG_D_4 sin_4(xx1, xx2, xx3, xx4)
   double xx1, xx2, xx3, xx4;
/* use where cos of same argument not needed
** 16 digits precision, compare to 15 digits in "dtrig.h"
** T C Prince */
{
   union dblfmt {
      double dbl;
      int idbl;
      struct dfmt {           /* IEEE p754 */
         unsigned int sign:1;
         unsigned int ex:11;
      }    fmt;
   }     xi1;
   double xr, x2, x4, x8;
#ifdef _STDC__
   long double pi = 3.1415926535897932385, pml;
#include <limits.h>
#else
   register double pi = 3.1415926535897932385, pml;
#define LONG_MIN 0x80000000
#endif
   union dblfmt pm, round;
   ARG_D_4 res;
#define BIAS DBL_MAX_EXP
#include <errno.h>
#ifndef errno
   extern int errno;
#endif
#define ODD(i) ((i)&1)
/* use identity sin(x + n pi) = (-1)^n sin(x)
**  to reduce range to -pi/2 < x < pi/2
**      pml=rint(xi1/pi) */
#if FLT_ROUNDS != 1
       #error "rounding mode not nearest; adjust code"
#endif
#if FLT_RADIX !=2 && FLT_RADIX != 10
       #error "code not optimum for accuracy in this RADIX"
#endif
#if DBL_DIG > 16
       #error "more terms needed for full accuracy"
#endif
/* shortcut test of sign, not portable to VAX */
   round.dbl = 1 / LDBL_EPSILON;
   xi1.dbl = xx1;
   round.idbl |= xi1.idbl & LONG_MIN;
   pml = xx1 / pi + round.dbl;
/* sign reversal may reduce register usage */
   xr = pi * (pml -= round.dbl) - xx1;
/* shortcut test for fabs(pml) > INT_MAX */
   pm.dbl = pml;
   if (pm.fmt.ex > BIAS +31)
      errno = ERANGE;
/* don't wait to calculate xr**2 until sign is fixed;
** another sign reversal is due if pm.dbl is odd */
   x2 = xr * xr;
/* first sign reversal compensated in coefficient signs;
** conditional sign fixed by testing odd/even
** first two results are obtained by straight Horner
** polynomial evaluation */
   res.X1 = (-.9999999999999999 + x2 * (.1666666666666607
   + x2 * (-.833333333328281e-2 + x2 * (.19841269824875e-3
      + x2 * (-.2755731661057e-5 + x2 * (.25051882036e-7
       + x2 * (-.160481709e-9 + x2 * .7374418e-12)))))))
      * (ODD((int) pm.dbl) ? -xr : xr);
/* sin(xi2) */
   round.dbl = 1 / LDBL_EPSILON;
   xi1.dbl = xx2;
   round.idbl |= xi1.idbl & LONG_MIN;
   pml = xx2 / pi + round.dbl;
   xr = pi * (pml -= round.dbl) - xx2;
   pm.dbl = pml;
   if (pm.fmt.ex > BIAS + 31)
      errno = ERANGE;
   x2 = xr * xr;
   res.X2 = (-.9999999999999999 + x2 * (.1666666666666607
   + x2 * (-.833333333328281e-2 + x2 * (.19841269824875e-3
      + x2 * (-.2755731661057e-5 + x2 * (.25051882036e-7
       + x2 * (-.160481709e-9 + x2 * .7374418e-12)))))))
      * (ODD((int) pm.dbl) ? -xr : xr);
/* sin(xi3) */
   round.dbl = 1 / LDBL_EPSILON;
   xi1.dbl = xx3;
   round.idbl |= xi1.idbl & LONG_MIN;
   pml = xx3 / pi + round.dbl;
   xr = pi * (pml -= round.dbl) - xx3;
   pm.dbl = pml;
   if (pm.fmt.ex > BIAS + 31)
      errno = ERANGE;
   x2 = xr * xr;
   x4 = x2 * x2;
/* split into 2 Horner polynomials to increase
** parallelism after 1st result finishes */
   res.X3 = (-.9999999999999999 + x2 * (.1666666666666607
                         + x2 * (-.833333333328281e-2
                          + x2 * .19841269824875e-3))
            + (-.2755731661057e-5 + x2 * (.25051882036e-7
                               + x2 * (-.160481709e-9
                    + x2 * .7374418e-12))) * x4 * x4) *
       (ODD((int) pm.dbl) ? -xr : xr);
/* sin(xi4) */
   round.dbl = 1 / LDBL_EPSILON;
   xi1.dbl = xx4;
   round.idbl |= xi1.idbl & LONG_MIN;
   pml = xx4 / pi + round.dbl;
   xr = pi * (pml -= round.dbl) - xi1.dbl;
/* errno is set to ERANGE if any of the arguments are too
** large for reasonable range reduction */
   pm.dbl = pml;
   if (pm.fmt.ex > BIAS + 31)
      errno = ERANGE;
   x2 = xr * xr;
   x4 = x2 * x2;
   x8 = x4 * x4;
/* multiply by 1 is K&R way to enforce parentheses */
   res.X4 = ((-.9999999999999999 + x2 * (.1666666666666607
                         + x2 * (-.833333333328281e-2
                      + x2 * .19841269824875e-3))) * 1
            + (-.2755731661057e-5 + x2 * (.25051882036e-7
      + x2 * (-.160481709e-9 + x2 * .7374418e-12))) * x8)
       * (ODD((int) pm.dbl) ? -xr : xr);
   return res;
}
/* End of File */