40 __mmask8 invalid = _mm512_cmp_pd_mask(x, exp_max_arg, _CMP_NLE_UQ);
41 if( ! _mm512_kortestz(invalid, invalid) )
42 throw domain_error( DEMsg(
"v1expd", x, invalid) );
43 x = _mm512_max_pd(exp_min_arg, x);
44 v8si m = v1expd_core(x);
45 v8df p2n = v1pow2d_core(m);
46 return _mm512_mul_pd(x, p2n);
51 __mmask8 invalid = _mm512_cmp_pd_mask(x, exp10_max_arg, _CMP_NLE_UQ);
52 if( ! _mm512_kortestz(invalid, invalid) )
53 throw domain_error( DEMsg(
"v1exp10d", x, invalid) );
54 x = _mm512_max_pd(exp10_min_arg, x);
55 v8df h = _mm512_mul_pd(x, third);
56 v8df y = _mm512_roundscale_pd(h, _MM_FROUND_TRUNC);
57 v8df z = _mm512_mul_pd(y, three);
58 x = _mm512_sub_pd(x, z);
59 x = _mm512_mul_pd(x, ln_ten);
60 x = _mm512_fmadd_pd(z, exp10_c1, x);
61 v8si m1 = v1expd_core(x);
62 y = _mm512_mul_pd(y, ten);
63 v8si m2 = _mm512_cvtpd_epi32(y);
64 v8si m = _mm256_add_epi32(m1, m2);
65 m = _mm256_max_epi32(m, moff);
66 v8df p2n = v1pow2d_core(m);
67 return _mm512_mul_pd(x, p2n);
72 __mmask8 invalid = _mm512_cmp_pd_mask(x, expm1_max_arg, _CMP_NLE_UQ);
73 if( ! _mm512_kortestz(invalid, invalid) )
74 throw domain_error( DEMsg(
"v1expm1d", x, invalid) );
75 x = _mm512_max_pd(expm1_min_arg, x);
76 return v1expm1d_core(x);
79 inline v16sf
v1expf(v16sf x)
81 __mmask16 invalid = _mm512_cmp_ps_mask(x, expf_max_arg, _CMP_NLE_UQ);
82 if( ! _mm512_kortestz(invalid, invalid) )
83 throw domain_error( DEMsg(
"v1expf", x, invalid) );
84 x = _mm512_max_ps(expf_min_arg, x);
85 v16si n = v1expf_core(x);
86 v16sf p2n = v1pow2f_core(n);
87 return _mm512_mul_ps(x, p2n);
92 __mmask16 invalid = _mm512_cmp_ps_mask(x, exp10f_max_arg, _CMP_NLE_UQ);
93 if( ! _mm512_kortestz(invalid, invalid) )
94 throw domain_error( DEMsg(
"v1exp10f", x, invalid) );
95 x = _mm512_max_ps(exp10f_min_arg, x);
96 v16sf h = _mm512_mul_ps(x, thirdf);
97 v16sf y = _mm512_roundscale_ps(h, _MM_FROUND_TRUNC);
98 v16sf z = _mm512_mul_ps(y, threef);
99 x = _mm512_sub_ps(x, z);
100 x = _mm512_mul_ps(x, ln_tenf);
101 x = _mm512_fmadd_ps(z, exp10f_c1, x);
102 v16si m1 = v1expf_core(x);
103 y = _mm512_mul_ps(y, tenf);
104 v16si m2 = _mm512_cvtps_epi32(y);
105 v16si m = _mm512_add_epi32(m1, m2);
106 m = _mm512_max_epi32(m, mofff);
107 v16sf p2n = v1pow2f_core(m);
108 return _mm512_mul_ps(x, p2n);
113 __mmask16 invalid = _mm512_cmp_ps_mask(x, expm1f_max_arg, _CMP_NLE_UQ);
114 if( ! _mm512_kortestz(invalid, invalid) )
115 throw domain_error( DEMsg(
"v1expm1f", x, invalid) );
116 x = _mm512_max_ps(expm1f_min_arg, x);
117 return v1expm1f_core(x);
122 inline v4df
v1expd(v4df x)
124 v4df invalid = _mm256_cmp_pd(x, exp_max_arg, _CMP_NLE_UQ);
125 if( ! _mm256_testz_pd(invalid, invalid) )
126 throw domain_error( DEMsg(
"v1expd", x, invalid) );
127 x = _mm256_max_pd(exp_min_arg, x);
128 v4si m = v1expd_core(x);
129 v4df p2n = v1pow2d_core(m);
130 return _mm256_mul_pd(x, p2n);
135 v4df invalid = _mm256_cmp_pd(x, exp10_max_arg, _CMP_NLE_UQ);
136 if( ! _mm256_testz_pd(invalid, invalid) )
137 throw domain_error( DEMsg(
"v1exp10d", x, invalid) );
138 x = _mm256_max_pd(exp10_min_arg, x);
139 v4df h = _mm256_mul_pd(x, third);
140 v4df y = _mm256_round_pd(h, _MM_FROUND_TRUNC);
141 v4df z = _mm256_mul_pd(y, three);
142 x = _mm256_sub_pd(x, z);
143 x = _mm256_mul_pd(x, ln_ten);
145 x = _mm256_fmadd_pd(z, exp10_c1, x);
147 h = _mm256_mul_pd(z, exp10_c1);
148 x = _mm256_add_pd(h, x);
150 v4si m1 = v1expd_core(x);
151 y = _mm256_mul_pd(y, ten);
152 v4si m2 = _mm256_cvtpd_epi32(y);
153 v4si m = _mm_add_epi32(m1, m2);
154 m = _mm_max_epi32(m, moff);
155 v4df p2n = v1pow2d_core(m);
156 return _mm256_mul_pd(x, p2n);
161 v4df invalid = _mm256_cmp_pd(x, expm1_max_arg, _CMP_NLE_UQ);
162 if( ! _mm256_testz_pd(invalid, invalid) )
163 throw domain_error( DEMsg(
"v1expm1d", x, invalid) );
164 x = _mm256_max_pd(expm1_min_arg, x);
165 return v1expm1d_core(x);
168 inline v8sf
v1expf(v8sf x)
170 v8sf invalid = _mm256_cmp_ps(x, expf_max_arg, _CMP_NLE_UQ);
171 if( ! _mm256_testz_ps(invalid, invalid) )
172 throw domain_error( DEMsg(
"v1expf", x, invalid) );
173 x = _mm256_max_ps(expf_min_arg, x);
174 v8si n = v1expf_core(x);
175 v8sf p2n = v1pow2f_core(n);
176 return _mm256_mul_ps(x, p2n);
181 v8sf invalid = _mm256_cmp_ps(x, exp10f_max_arg, _CMP_NLE_UQ);
182 if( ! _mm256_testz_ps(invalid, invalid) )
183 throw domain_error( DEMsg(
"v1exp10f", x, invalid) );
184 x = _mm256_max_ps(exp10f_min_arg, x);
185 v8sf h = _mm256_mul_ps(x, thirdf);
186 v8sf y = _mm256_round_ps(h, _MM_FROUND_TRUNC);
187 v8sf z = _mm256_mul_ps(y, threef);
188 x = _mm256_sub_ps(x, z);
189 x = _mm256_mul_ps(x, ln_tenf);
191 x = _mm256_fmadd_ps(z, exp10f_c1, x);
193 h = _mm256_mul_ps(z, exp10f_c1);
194 x = _mm256_add_ps(h, x);
196 v8si m1 = v1expf_core(x);
197 y = _mm256_mul_ps(y, tenf);
198 v8si m2 = _mm256_cvtps_epi32(y);
200 v8si m = _mm256_add_epi32(m1, m2);
201 m = _mm256_max_epi32(m, mofff);
202 v8sf p2n = v1pow2f_core(m);
204 v4si m1l = _mm256_extractf128_si256(m1, 0);
205 v4si m2l = _mm256_extractf128_si256(m2, 0);
206 v4si m1u = _mm256_extractf128_si256(m1, 1);
207 v4si m2u = _mm256_extractf128_si256(m2, 1);
208 m1l = _mm_add_epi32(m1l, m2l);
209 m1u = _mm_add_epi32(m1u, m2u);
210 m1l = _mm_max_epi32(m1l, mofff);
211 m1u = _mm_max_epi32(m1u, mofff);
212 m1l = _mm_add_epi32(m1l, offf);
213 m1u = _mm_add_epi32(m1u, offf);
214 m1l = _mm_slli_epi32(m1l, 23);
215 m1u = _mm_slli_epi32(m1u, 23);
216 v8si m = _mm256_setzero_si256();
217 m = _mm256_insertf128_si256(m, m1l, 0);
218 m = _mm256_insertf128_si256(m, m1u, 1);
219 v8sf p2n = _mm256_castsi256_ps(m);
221 return _mm256_mul_ps(x, p2n);
226 v8sf invalid = _mm256_cmp_ps(x, expm1f_max_arg, _CMP_NLE_UQ);
227 if( ! _mm256_testz_ps(invalid, invalid) )
228 throw domain_error( DEMsg(
"v1expm1f", x, invalid) );
229 x = _mm256_max_ps(expm1f_min_arg, x);
230 return v1expm1f_core(x);
233 #endif // __AVX512F__
283 void vexp(
const double x[],
double y[],
long nlo,
long nhi)
290 void vexp10(
const double x[],
double y[],
long nlo,
long nhi)
297 void vexpm1(
const double x[],
double y[],
long nlo,
long nhi)
325 void vexp(
double *y,
double x0,
double x1,
double x2,
double x3)
340 void vexp(
double *y,
double x0,
double x1,
double x2,
double x3,
double x4,
double x5,
double x6,
double x7)
345 void vexp10(
double *y,
double x0,
double x1,
double x2,
double x3,
double x4,
double x5,
double x6,
double x7)
350 void vexpm1(
double *y,
double x0,
double x1,
double x2,
double x3,
double x4,
double x5,
double x6,
double x7)
#define V1FUN_PD_8(FUN, V)
sys_float wr_exp10f(sys_float x)
void vexp(const double x[], double y[], long nlo, long nhi)
void vecfun(const T x[], T y[], long nlo, long nhi, T(*scalfun1)(T), V(*)(V))
double wr_exp10d(double x)
#define V1FUN_PS_4(FUN, V)
void vexp10(const double x[], double y[], long nlo, long nhi)
sys_float wr_expm1f(sys_float x)
#define V1FUN_PD_4(FUN, V)
#define DEBUG_ENTRY(funcname)
#define V1FUN_PS_8(FUN, V)
sys_float wr_expf(sys_float x)
void vexpm1(const double x[], double y[], long nlo, long nhi)
#define V1FUN_PS_16(FUN, V)
double wr_expm1d(double x)