cloudy  trunk
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TestVectorize.cpp
Go to the documentation of this file.
1 #include "cdstd.h"
2 #include <UnitTest++.h>
3 #include "cddefines.h"
4 #include "thirdparty.h"
5 #include "vectorize.h"
6 
7 namespace {
8 
9  TEST(TestAvxPool)
10  {
11  // there is not a lot we can check here since the interesting bits are hidden
12  // behind the interface, so the real testing will be done with valgrind-test
13  t_avx_pool pool;
14  init_genrand( 376356017L );
15  for( int i=0; i < 2048; ++i )
16  {
17  size_t sz = genrand_int32()%200000;
18  char *p = static_cast<char*>(pool.avx_alloc(sz));
19  size_t ip = reinterpret_cast<size_t>(p);
20  // test for correct alignment
21  CHECK( p != NULL && (ip/CD_ALIGN)*CD_ALIGN == ip );
22  for( size_t j=0; j < sz; ++j )
23  p[j] = 'c';
24  pool.avx_free(p);
25  }
26  // check that many simultaneous requests work correctly
27  const int ntest = 200;
28  char* pp[ntest];
29  for( int i=0; i < ntest; ++i )
30  {
31  pp[i] = static_cast<char*>(pool.avx_alloc(4000));
32  size_t ip = reinterpret_cast<size_t>(pp[i]);
33  CHECK( pp[i] != NULL && (ip/CD_ALIGN)*CD_ALIGN == ip );
34  for( size_t j=0; j < 4000; ++j )
35  pp[i][j] = 'c';
36  }
37  for( int i=0; i < ntest; ++i )
38  pool.avx_free(pp[i]);
39  }
40 
41  TEST(TestAvxPtr)
42  {
43  avx_ptr<double,true> p1(100);
44  CHECK( p1.data() != NULL && p1.data() == p1.ptr0() );
45  for( int i=0; i < 100; ++i )
46  p1[i] = 1.;
47  CHECK_THROW(( p1[-1] = 1. ),out_of_range);
48  CHECK_THROW(( p1[100] = 1. ),out_of_range);
49  avx_ptr<double> p2(0);
50  CHECK( p2.data() == NULL );
51  avx_ptr<double> p3(-10);
52  CHECK( p3.data() == NULL );
53  avx_ptr<double,true> p4(-10,100);
54  CHECK( p4.data() != NULL && p4.data() == p4.ptr0()-10 );
55  for( int i=-10; i < 100; ++i )
56  p4[i] = 1.;
57  CHECK_THROW(( p4[-11] = 1. ),out_of_range);
58  CHECK_THROW(( p1[100] = 1. ),out_of_range);
59  avx_ptr<double> p5(10,10);
60  CHECK( p5.data() == NULL );
61  avx_ptr<double> p6(10,-10);
62  CHECK( p6.data() == NULL );
63  }
64 
65  TEST(TestAllocatorAvx)
66  {
67  // there is not a lot we can check here since the interesting bits are hidden
68  // behind the interface, so the real testing will be done with valgrind-test
69  init_genrand( 376833017L );
70  for( int i=0; i < 2048; ++i )
71  {
72  size_t sz = genrand_int32()%200000;
73  vector< int, allocator_avx<int> > a(sz);
74  size_t ip = reinterpret_cast<size_t>(get_ptr(a));
75  CHECK( ip != 0 && (ip/CD_ALIGN)*CD_ALIGN == ip );
76  for( size_t j=0; j < sz; ++j )
77  a[j] = 3;
78  }
79  }
80 
81  TEST(TestReduceAd)
82  {
83  double a[32];
84  for( int i=0; i < 32; ++i )
85  a[i] = double(i);
86  for( int ilo=0; ilo < 32; ilo++ )
87  {
88  for( int ihi=ilo+1; ihi <= 32; ihi++ )
89  {
90  double val = reduce_a( a, ilo, ihi );
91  double expect = double(ihi*(ihi-1)/2 - ilo*(ilo-1)/2);
92  CHECK( fp_equal( val, expect ) );
93  }
94  }
95  }
96 
97  TEST(TestReduceAf)
98  {
99  sys_float a[32];
100  for( int i=0; i < 32; ++i )
101  a[i] = sys_float(i);
102  for( int ilo=0; ilo < 32; ilo++ )
103  {
104  for( int ihi=ilo+1; ihi <= 32; ihi++ )
105  {
106  sys_float val = reduce_a( a, ilo, ihi );
107  sys_float expect = sys_float(ihi*(ihi-1)/2 - ilo*(ilo-1)/2);
108  CHECK( fp_equal( val, expect ) );
109  }
110  }
111  }
112 
113  TEST(TestReduceABd)
114  {
115  double a[32], b[32];
116  for( int i=0; i < 32; ++i )
117  {
118  a[i] = double(i);
119  b[i] = double(i);
120  }
121  for( int ilo=0; ilo < 32; ilo++ )
122  {
123  for( int ihi=ilo+1; ihi <= 32; ihi++ )
124  {
125  double val = reduce_ab( a, b, ilo, ihi );
126  double expect = double(ihi*(ihi-1)*(2*ihi-1)/6 - ilo*(ilo-1)*(2*ilo-1)/6);
127  CHECK( fp_equal( val, expect ) );
128  }
129  }
130  }
131 
132  TEST(TestReduceABdf)
133  {
134  double a[32];
135  sys_float b[32];
136  for( int i=0; i < 32; ++i )
137  {
138  a[i] = double(i);
139  b[i] = sys_float(i);
140  }
141  for( int ilo=0; ilo < 32; ilo++ )
142  {
143  for( int ihi=ilo+1; ihi <= 32; ihi++ )
144  {
145  double val = reduce_ab( a, b, ilo, ihi );
146  double expect = double(ihi*(ihi-1)*(2*ihi-1)/6 - ilo*(ilo-1)*(2*ilo-1)/6);
147  CHECK( fp_equal( val, expect ) );
148  }
149  }
150  }
151 
152  TEST(TestReduceABf)
153  {
154  sys_float a[32], b[32];
155  for( int i=0; i < 32; ++i )
156  {
157  a[i] = sys_float(i);
158  b[i] = sys_float(i);
159  }
160  for( int ilo=0; ilo < 32; ilo++ )
161  {
162  for( int ihi=ilo+1; ihi <= 32; ihi++ )
163  {
164  sys_float val = reduce_ab( a, b, ilo, ihi );
165  sys_float expect = sys_float(ihi*(ihi-1)*(2*ihi-1)/6 - ilo*(ilo-1)*(2*ilo-1)/6);
166  CHECK( fp_equal( val, expect ) );
167  }
168  }
169  }
170 
171  TEST(TestReduceABCd)
172  {
173  double a[32], b[32], c[32];
174  for( int i=0; i < 32; ++i )
175  {
176  a[i] = double(i);
177  b[i] = double(i);
178  c[i] = double(i);
179  }
180  for( int ilo=0; ilo < 32; ilo++ )
181  {
182  for( int ihi=ilo+1; ihi <= 32; ihi++ )
183  {
184  double val = reduce_abc( a, b, c, ilo, ihi );
185  double expect = double(pow2(ihi*(ihi-1)/2) - pow2(ilo*(ilo-1)/2));
186  CHECK( fp_equal( val, expect ) );
187  }
188  }
189  }
190 
191  TEST(TestReduceABCddf)
192  {
193  double a[32], b[32];
194  sys_float c[32];
195  for( int i=0; i < 32; ++i )
196  {
197  a[i] = double(i);
198  b[i] = double(i);
199  c[i] = sys_float(i);
200  }
201  for( int ilo=0; ilo < 32; ilo++ )
202  {
203  for( int ihi=ilo+1; ihi <= 32; ihi++ )
204  {
205  double val = reduce_abc( a, b, c, ilo, ihi );
206  double expect = double(pow2(ihi*(ihi-1)/2) - pow2(ilo*(ilo-1)/2));
207  CHECK( fp_equal( val, expect ) );
208  }
209  }
210  }
211 
212  TEST(TestReduceABCdff)
213  {
214  double a[32];
215  sys_float b[32], c[32];
216  for( int i=0; i < 32; ++i )
217  {
218  a[i] = double(i);
219  b[i] = sys_float(i);
220  c[i] = sys_float(i);
221  }
222  for( int ilo=0; ilo < 32; ilo++ )
223  {
224  for( int ihi=ilo+1; ihi <= 32; ihi++ )
225  {
226  double val = reduce_abc( a, b, c, ilo, ihi );
227  double expect = double(pow2(ihi*(ihi-1)/2) - pow2(ilo*(ilo-1)/2));
228  CHECK( fp_equal( val, expect ) );
229  }
230  }
231  }
232 
233  TEST(TestReduceABCf)
234  {
235  sys_float a[32], b[32], c[32];
236  for( int i=0; i < 32; ++i )
237  {
238  a[i] = sys_float(i);
239  b[i] = sys_float(i);
240  c[i] = sys_float(i);
241  }
242  for( int ilo=0; ilo < 32; ilo++ )
243  {
244  for( int ihi=ilo+1; ihi <= 32; ihi++ )
245  {
246  sys_float val = reduce_abc( a, b, c, ilo, ihi );
247  sys_float expect = sys_float(pow2(ihi*(ihi-1)/2) - pow2(ilo*(ilo-1)/2));
248  CHECK( fp_equal( val, expect ) );
249  }
250  }
251  }
252 
253  TEST(TestReduceAB_Ad)
254  {
255  double a[32], b[32];
256  for( int i=0; i < 32; ++i )
257  {
258  a[i] = double(i);
259  b[i] = double(i);
260  }
261  for( int ilo=0; ilo < 32; ilo++ )
262  {
263  for( int ihi=ilo+1; ihi <= 32; ihi++ )
264  {
265  double val2, val = reduce_ab_a( a, b, ilo, ihi, &val2 );
266  double expect = double(ihi*(ihi-1)*(2*ihi-1)/6 - ilo*(ilo-1)*(2*ilo-1)/6);
267  double expect2 = double(ihi*(ihi-1)/2 - ilo*(ilo-1)/2);
268  CHECK( fp_equal( val, expect ) );
269  CHECK( fp_equal( val2, expect2 ) );
270  }
271  }
272  }
273 
274  TEST(TestReduceAB_Adf)
275  {
276  double a[32];
277  sys_float b[32];
278  for( int i=0; i < 32; ++i )
279  {
280  a[i] = double(i);
281  b[i] = sys_float(i);
282  }
283  for( int ilo=0; ilo < 32; ilo++ )
284  {
285  for( int ihi=ilo+1; ihi <= 32; ihi++ )
286  {
287  double val2, val = reduce_ab_a( a, b, ilo, ihi, &val2 );
288  double expect = double(ihi*(ihi-1)*(2*ihi-1)/6 - ilo*(ilo-1)*(2*ilo-1)/6);
289  double expect2 = double(ihi*(ihi-1)/2 - ilo*(ilo-1)/2);
290  CHECK( fp_equal( val, expect ) );
291  CHECK( fp_equal( val2, expect2 ) );
292  val = reduce_ab_a( b, a, ilo, ihi, &val2 );
293  CHECK( fp_equal( val, expect ) );
294  CHECK( fp_equal( val2, expect2 ) );
295  }
296  }
297  }
298 
299  TEST(TestReduceAB_Af)
300  {
301  double a[32], b[32];
302  for( int i=0; i < 32; ++i )
303  {
304  a[i] = double(i);
305  b[i] = double(i);
306  }
307  for( int ilo=0; ilo < 32; ilo++ )
308  {
309  for( int ihi=ilo+1; ihi <= 32; ihi++ )
310  {
311  double val2, val = reduce_ab_a( a, b, ilo, ihi, &val2 );
312  double expect = double(ihi*(ihi-1)*(2*ihi-1)/6 - ilo*(ilo-1)*(2*ilo-1)/6);
313  double expect2 = double(ihi*(ihi-1)/2 - ilo*(ilo-1)/2);
314  CHECK( fp_equal( val, expect ) );
315  CHECK( fp_equal( val2, expect2 ) );
316  }
317  }
318  }
319 
320  TEST(TestReduceABC_ABd)
321  {
322  double a[32], b[32], c[32];
323  for( int i=0; i < 32; ++i )
324  {
325  a[i] = double(i);
326  b[i] = double(i);
327  c[i] = double(i);
328  }
329  for( int ilo=0; ilo < 32; ilo++ )
330  {
331  for( int ihi=ilo+1; ihi <= 32; ihi++ )
332  {
333  double val2, val = reduce_abc_ab( a, b, c, ilo, ihi, &val2 );
334  double expect = double(pow2(ihi*(ihi-1)/2) - pow2(ilo*(ilo-1)/2));
335  double expect2 = double(ihi*(ihi-1)*(2*ihi-1)/6 - ilo*(ilo-1)*(2*ilo-1)/6);
336  CHECK( fp_equal( val, expect ) );
337  CHECK( fp_equal( val2, expect2 ) );
338  }
339  }
340  }
341 
342  TEST(TestExp10d)
343  {
344  // exp10(double) is defined in cddefines.h
345  double maxarg = log10(DBL_MAX);
346  init_genrand( 392916017L );
347  for( int i=0; i < 2048; ++i )
348  {
349  double arg = genrand_real1()*633. - 324.;
350  if( arg < maxarg )
351  {
352  double val1 = exp10(arg);
353  double val2 = pow(10.,arg);
354  // inaccuracies in pow(10.,x) will contribute to the error as well...
355  // so this test may differ a bit from one glibc version to another.
356  CHECK( fp_equal( val1, val2, 10 ) );
357  }
358  }
359  }
360 
361  TEST(TestExp10f)
362  {
363  // exp10(sys_float) is defined in cddefines.h
364  sys_float maxarg = log10f(FLT_MAX);
365  init_genrand( 392977017L );
366  for( int i=0; i < 2048; ++i )
367  {
368  sys_float arg = sys_float(genrand_real1())*84.f - 45.f;
369  if( arg < maxarg )
370  {
371  sys_float val1 = exp10(arg);
372  sys_float val2 = sys_float(pow(10.,double(arg)));
373  CHECK( fp_equal( val1, val2, 6 ) );
374  }
375  }
376  }
377 
378  TEST(TestVexpd)
379  {
380  const int sz = 2048;
381  ALIGNED(CD_ALIGN) double arg[sz];
382  ALIGNED(CD_ALIGN) double val[sz];
383 #ifdef __AVX__
384  double minarg = log(DBL_MIN);
385  // check correct behavior near the underflow margin
386  // our implementation doesn't support gradual underflow
387  double a1 = nextafter(minarg,-DBL_MAX);
388  vexp(val,a1,a1,a1,a1);
389  CHECK( val[0] == 0. );
390  double a2 = nextafter(minarg,DBL_MAX);
391  vexp(val,a2,a2,a2,a2);
392  CHECK( val[0] > 0. );
393  // check correct behavior near the overflow margin
394  double maxarg = log(DBL_MAX);
395  a1 = nextafter(maxarg,-DBL_MAX);
396  vexp(val,a1,a1,a1,a1);
397  CHECK( val[0] < DBL_MAX );
398  a2 = nextafter(maxarg,DBL_MAX);
399  CHECK_THROW( vexp(val,a2,a2,a2,a2), domain_error );
400  a2 = -numeric_limits<double>().infinity();
401  vexp(val,a2,a2,a2,a2);
402  CHECK( val[0] == 0. );
403  a2 = numeric_limits<double>().infinity();
404  CHECK_THROW( vexp(val,a2,a2,a2,a2), domain_error );
405  a2 = numeric_limits<double>().quiet_NaN();
406  CHECK_THROW( vexp(val,a2,a2,a2,a2), domain_error );
407  // now check results over the entire range
408  init_genrand( 395256017L );
409  for( int i=0; i < sz; )
410  {
411  double x = (genrand_real1()-0.5)*1420.;
412  if( x < maxarg )
413  arg[i++] = x;
414  }
415  vexp( arg, val, 0, sz );
416  for( int i=0; i < sz; ++i )
417  {
418  if( arg[i] < minarg )
419  CHECK( val[i] == 0. );
420  else
421  CHECK( fp_equal( val[i], exp(arg[i]) ) );
422  }
423 #endif
424  for( int i=0; i < 32; ++i )
425  arg[i] = double(i);
426  // finally check that non-aligned arrays are treated correctly
427  for( int i=0; i < 32; ++i )
428  {
429  for( int j=i+1; j <= 32; ++j )
430  {
431  invalidate_array( val, 32*sizeof(double) );
432  vexp( arg, val, i, j );
433  for( int k=0; k < 32; ++k )
434  {
435  if( k < i || k >= j )
436  CHECK( isnan(val[k]) );
437  else
438  CHECK( fp_equal( val[k], exp(arg[k]) ) );
439  }
440  invalidate_array( val, 32*sizeof(double) );
441  vexp( &arg[i], val, 0, j-i );
442  for( int k=0; k < 32; ++k )
443  {
444  if( k >= j-i )
445  CHECK( isnan(val[k]) );
446  else
447  CHECK( fp_equal( val[k], exp(arg[k+i]) ) );
448  }
449  }
450  }
451  }
452 
453  TEST(TestVexp10d)
454  {
455 #ifdef __AVX__
456  const int sz = 2048;
457  ALIGNED(CD_ALIGN) double arg[sz];
458  ALIGNED(CD_ALIGN) double val[sz];
459 
460  double minarg = log10(DBL_MIN);
461  double a1 = nextafter(minarg,-DBL_MAX);
462  vexp10(val,a1,a1,a1,a1);
463  CHECK( val[0] == 0. );
464  double a2 = nextafter(minarg,DBL_MAX);
465  vexp10(val,a2,a2,a2,a2);
466  CHECK( val[0] > 0. );
467  double maxarg = log10(DBL_MAX);
468  a1 = nextafter(maxarg,-DBL_MAX);
469  vexp10(val,a1,a1,a1,a1);
470  CHECK( val[0] < DBL_MAX );
471  a2 = nextafter(maxarg,DBL_MAX);
472  CHECK_THROW( vexp10(val,a2,a2,a2,a2), domain_error );
473  a2 = -numeric_limits<double>().infinity();
474  vexp10(val,a2,a2,a2,a2);
475  CHECK( val[0] == 0. );
476  a2 = numeric_limits<double>().infinity();
477  CHECK_THROW( vexp10(val,a2,a2,a2,a2), domain_error );
478  a2 = numeric_limits<double>().quiet_NaN();
479  CHECK_THROW( vexp10(val,a2,a2,a2,a2), domain_error );
480  init_genrand( 395316017L );
481  for( int i=0; i < sz; )
482  {
483  double x = (genrand_real1()-0.5)*617.;
484  if( x < maxarg )
485  arg[i++] = x;
486  }
487  vexp10( arg, val, 0, sz );
488  for( int i=0; i < sz; ++i )
489  {
490  if( arg[i] < minarg )
491  CHECK( val[i] == 0. );
492  else
493  CHECK( fp_equal( val[i], pow(10.,arg[i]), 10 ) );
494  }
495 #else
496  CHECK( fp_equal( 1., 1. ) );
497 #endif
498  }
499 
500  TEST(TestVexpm1d)
501  {
502 #ifdef __AVX__
503  const int sz = 2048;
504  ALIGNED(CD_ALIGN) double arg[sz];
505  ALIGNED(CD_ALIGN) double val[sz];
506 
507  double maxarg = log(DBL_MAX);
508  double a1 = nextafter(maxarg,-DBL_MAX);
509  vexpm1(val,a1,a1,a1,a1);
510  CHECK( val[0] < DBL_MAX );
511  double a2 = nextafter(maxarg,DBL_MAX);
512  CHECK_THROW( vexpm1(val,a2,a2,a2,a2), domain_error );
513  a2 = -numeric_limits<double>().infinity();
514  vexpm1(val,a2,a2,a2,a2);
515  CHECK( val[0] == -1. );
516  a2 = numeric_limits<double>().infinity();
517  CHECK_THROW( vexpm1(val,a2,a2,a2,a2), domain_error );
518  a2 = numeric_limits<double>().quiet_NaN();
519  CHECK_THROW( vexpm1(val,a2,a2,a2,a2), domain_error );
520  double y[8];
521  vexpm1( y, 1.e-10, 8.e-8, 4.e-5, 6.e-3, 2.e-1, 1., 1., 1. );
522  // constants below were derived from Abramowitz & Stegun, Handbook of Mathematical Functions
523  CHECK( fp_equal( y[0], 1.000000000050000e-10 ) );
524  CHECK( fp_equal( y[1], 8.00000032000000853e-8 ) );
525  CHECK( fp_equal( y[2], 4.00008000106667733342e-5 ) );
526  CHECK( fp_equal( y[3], 6.0180360540648648555845e-3 ) );
527  CHECK( fp_equal( y[4], 2.214027581601698339210720e-1 ) );
528  CHECK( fp_equal( y[5], 1.7182818284590452353602875 ) );
529  init_genrand( 377216017L );
530  for( int i=0; i < sz; )
531  {
532  double x = genrand_real1()*749.-39.;
533  if( x < maxarg )
534  arg[i++] = x;
535  }
536  vexpm1( arg, val, 0, sz );
537  for( int i=0; i < sz; ++i )
538  CHECK( fp_equal( val[i], expm1(arg[i]) ) );
539 #else
540  CHECK( fp_equal( 1., 1. ) );
541 #endif
542  }
543 
544  TEST(TestVexpf)
545  {
546  const int sz = 2048;
547  ALIGNED(CD_ALIGN) sys_float arg[sz];
548  ALIGNED(CD_ALIGN) sys_float val[sz];
549 #ifdef __AVX__
550  sys_float minarg = logf(FLT_MIN);
551  sys_float a1 = nextafterf(minarg,-FLT_MAX);
552  vexp(val,a1,a1,a1,a1);
553  CHECK( val[0] == 0.f );
554  sys_float a2 = nextafterf(minarg,FLT_MAX);
555  vexp(val,a2,a2,a2,a2);
556  CHECK( val[0] > 0.f );
557  sys_float maxarg = logf(FLT_MAX);
558  a1 = nextafterf(maxarg,-FLT_MAX);
559  vexp(val,a1,a1,a1,a1);
560  CHECK( val[0] < FLT_MAX );
561  a2 = nextafterf(maxarg,FLT_MAX);
562  CHECK_THROW( vexp(val,a2,a2,a2,a2), domain_error );
563  a2 = -numeric_limits<sys_float>().infinity();
564  vexp(val,a2,a2,a2,a2);
565  CHECK( val[0] == 0.f );
566  a2 = numeric_limits<sys_float>().infinity();
567  CHECK_THROW( vexp(val,a2,a2,a2,a2), domain_error );
568  a2 = numeric_limits<sys_float>().quiet_NaN();
569  CHECK_THROW( vexp(val,a2,a2,a2,a2), domain_error );
570  init_genrand( 395288017L );
571  for( int i=0; i < sz; )
572  {
573  sys_float x = sys_float(genrand_real1()-0.5)*178.f;
574  if( x < maxarg )
575  arg[i++] = x;
576  }
577  vexp( arg, val, 0, sz );
578  for( int i=0; i < sz; ++i )
579  {
580  if( arg[i] < minarg )
581  CHECK( val[i] == 0.f );
582  else
583  CHECK( fp_equal( val[i], expf(arg[i]) ) );
584  }
585 #endif
586  for( int i=0; i < 32; ++i )
587  arg[i] = sys_float(i);
588  for( int i=0; i < 32; ++i )
589  {
590  for( int j=i+1; j <= 32; ++j )
591  {
592  invalidate_array( val, 32*sizeof(sys_float) );
593  vexp( arg, val, i, j );
594  for( int k=0; k < 32; ++k )
595  {
596  if( k < i || k >= j )
597  CHECK( isnan(val[k]) );
598  else
599  CHECK( fp_equal( val[k], expf(arg[k]) ) );
600  }
601  invalidate_array( val, 32*sizeof(sys_float) );
602  vexp( &arg[i], val, 0, j-i );
603  for( int k=0; k < 32; ++k )
604  {
605  if( k >= j-i )
606  CHECK( isnan(val[k]) );
607  else
608  CHECK( fp_equal( val[k], expf(arg[k+i]) ) );
609  }
610  }
611  }
612  }
613 
614  TEST(TestVexp10f)
615  {
616 #ifdef __AVX__
617  const int sz = 2048;
618  ALIGNED(CD_ALIGN) sys_float arg[sz];
619  ALIGNED(CD_ALIGN) sys_float val[sz];
620 
621  sys_float minarg = log10f(FLT_MIN);
622  sys_float a1 = nextafterf(minarg,-FLT_MAX);
623  vexp10(val,a1,a1,a1,a1);
624  CHECK( val[0] == 0.f );
625  sys_float a2 = nextafterf(minarg,FLT_MAX);
626  vexp10(val,a2,a2,a2,a2);
627  CHECK( val[0] > 0.f );
628  sys_float maxarg = log10f(FLT_MAX);
629  a1 = nextafterf(maxarg,-FLT_MAX);
630  vexp10(val,a1,a1,a1,a1);
631  CHECK( val[0] < FLT_MAX );
632  a2 = nextafterf(maxarg,FLT_MAX);
633  CHECK_THROW( vexp10(val,a2,a2,a2,a2), domain_error );
634  a2 = -numeric_limits<sys_float>().infinity();
635  vexp10(val,a2,a2,a2,a2);
636  CHECK( val[0] == 0.f );
637  a2 = numeric_limits<sys_float>().infinity();
638  CHECK_THROW( vexp10(val,a2,a2,a2,a2), domain_error );
639  a2 = numeric_limits<sys_float>().quiet_NaN();
640  CHECK_THROW( vexp10(val,a2,a2,a2,a2), domain_error );
641  init_genrand( 395313517L );
642  for( int i=0; i < sz; )
643  {
644  sys_float x = sys_float(genrand_real1()-0.5)*78.f;
645  if( x < maxarg )
646  arg[i++] = x;
647  }
648  vexp10( arg, val, 0, sz );
649  for( int i=0; i < sz; ++i )
650  {
651  if( arg[i] < minarg )
652  CHECK( val[i] == 0.f );
653  else
654  CHECK( fp_equal( val[i], powf(10.f,arg[i]), 8 ) );
655  }
656 #else
657  CHECK( fp_equal( 1., 1. ) );
658 #endif
659  }
660 
661  TEST(TestVexpm1f)
662  {
663 #ifdef __AVX__
664  const int sz = 2048;
665  ALIGNED(CD_ALIGN) sys_float arg[sz];
666  ALIGNED(CD_ALIGN) sys_float val[sz];
667 
668  sys_float maxarg = logf(FLT_MAX);
669  sys_float a1 = nextafterf(maxarg,-FLT_MAX);
670  vexpm1(val,a1,a1,a1,a1);
671  CHECK( val[0] < FLT_MAX );
672  sys_float a2 = nextafterf(maxarg,FLT_MAX);
673  CHECK_THROW( vexpm1(val,a2,a2,a2,a2), domain_error );
674  a2 = -numeric_limits<sys_float>().infinity();
675  vexpm1(val,a2,a2,a2,a2);
676  CHECK( val[0] == -1.f );
677  a2 = numeric_limits<sys_float>().infinity();
678  CHECK_THROW( vexpm1(val,a2,a2,a2,a2), domain_error );
679  a2 = numeric_limits<sys_float>().quiet_NaN();
680  CHECK_THROW( vexpm1(val,a2,a2,a2,a2), domain_error );
681  sys_float y[8];
682  vexpm1( y, 1.e-10f, 8.e-8f, 4.e-5f, 6.e-3f, 2.e-1f, 1.f, 1.f, 1.f );
683  // constants below were derived from Abramowitz & Stegun, Handbook of Mathematical Functions
684  CHECK( fp_equal( y[0], 1.000000000050000e-10f ) );
685  CHECK( fp_equal( y[1], 8.00000032000000853e-8f ) );
686  CHECK( fp_equal( y[2], 4.00008000106667733342e-5f ) );
687  CHECK( fp_equal( y[3], 6.0180360540648648555845e-3f ) );
688  CHECK( fp_equal( y[4], 2.214027581601698339210720e-1f ) );
689  CHECK( fp_equal( y[5], 1.7182818284590452353602875f ) );
690  init_genrand( 395324417L );
691  for( int i=0; i < sz; )
692  {
693  sys_float x = sys_float(genrand_real1())*108.f-19.f;
694  if( x < maxarg )
695  arg[i++] = x;
696  }
697  vexpm1( arg, val, 0, sz );
698  for( int i=0; i < sz; ++i )
699  CHECK( fp_equal( val[i], expm1f(arg[i]) ) );
700 #else
701  CHECK( fp_equal( 1., 1. ) );
702 #endif
703  }
704 
705  TEST(TestVexpdx4)
706  {
707  double y[4];
708  vexp(y,1.1,1.2,1.3,1.4);
709  CHECK( fp_equal( y[0], exp(1.1) ) );
710  CHECK( fp_equal( y[1], exp(1.2) ) );
711  CHECK( fp_equal( y[2], exp(1.3) ) );
712  CHECK( fp_equal( y[3], exp(1.4) ) );
713  }
714 
715  TEST(TestVexp10dx4)
716  {
717  double y[4];
718  vexp10(y,1.1,1.2,1.3,1.4);
719  CHECK( fp_equal( y[0], pow(10.,1.1) ) );
720  CHECK( fp_equal( y[1], pow(10.,1.2) ) );
721  CHECK( fp_equal( y[2], pow(10.,1.3) ) );
722  CHECK( fp_equal( y[3], pow(10.,1.4) ) );
723  }
724 
725  TEST(TestVexpm1dx4)
726  {
727  double y[4];
728  vexpm1(y,1.e-10,1.2,1.3,1.4);
729  CHECK( fp_equal( y[0], expm1(1.e-10) ) );
730  CHECK( fp_equal( y[1], expm1(1.2) ) );
731  CHECK( fp_equal( y[2], expm1(1.3) ) );
732  CHECK( fp_equal( y[3], expm1(1.4) ) );
733  }
734 
735  TEST(TestVexpdx8)
736  {
737  double y[8];
738  vexp(y,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8);
739  CHECK( fp_equal( y[0], exp(1.1) ) );
740  CHECK( fp_equal( y[1], exp(1.2) ) );
741  CHECK( fp_equal( y[2], exp(1.3) ) );
742  CHECK( fp_equal( y[3], exp(1.4) ) );
743  CHECK( fp_equal( y[4], exp(1.5) ) );
744  CHECK( fp_equal( y[5], exp(1.6) ) );
745  CHECK( fp_equal( y[6], exp(1.7) ) );
746  CHECK( fp_equal( y[7], exp(1.8) ) );
747  }
748 
749  TEST(TestVexp10dx8)
750  {
751  double y[8];
752  vexp10(y,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8);
753  CHECK( fp_equal( y[0], pow(10.,1.1) ) );
754  CHECK( fp_equal( y[1], pow(10.,1.2) ) );
755  CHECK( fp_equal( y[2], pow(10.,1.3) ) );
756  CHECK( fp_equal( y[3], pow(10.,1.4) ) );
757  CHECK( fp_equal( y[4], pow(10.,1.5) ) );
758  CHECK( fp_equal( y[5], pow(10.,1.6) ) );
759  CHECK( fp_equal( y[6], pow(10.,1.7) ) );
760  CHECK( fp_equal( y[7], pow(10.,1.8) ) );
761  }
762 
763  TEST(TestVexpm1dx8)
764  {
765  double y[8];
766  vexpm1(y,1.e-10,1.2,1.3,1.4,1.5,1.6,1.7,1.8);
767  CHECK( fp_equal( y[0], expm1(1.e-10) ) );
768  CHECK( fp_equal( y[1], expm1(1.2) ) );
769  CHECK( fp_equal( y[2], expm1(1.3) ) );
770  CHECK( fp_equal( y[3], expm1(1.4) ) );
771  CHECK( fp_equal( y[4], expm1(1.5) ) );
772  CHECK( fp_equal( y[5], expm1(1.6) ) );
773  CHECK( fp_equal( y[6], expm1(1.7) ) );
774  CHECK( fp_equal( y[7], expm1(1.8) ) );
775  }
776 
777  TEST(TestVexpfx4)
778  {
779  sys_float y[4];
780  vexp(y,1.1f,1.2f,1.3f,1.4f);
781  CHECK( fp_equal( y[0], expf(1.1f) ) );
782  CHECK( fp_equal( y[1], expf(1.2f) ) );
783  CHECK( fp_equal( y[2], expf(1.3f) ) );
784  CHECK( fp_equal( y[3], expf(1.4f) ) );
785  }
786 
787  TEST(TestVexp10fx4)
788  {
789  sys_float y[4];
790  vexp10(y,1.1f,1.2f,1.3f,1.4f);
791  CHECK( fp_equal( y[0], powf(10.f,1.1f) ) );
792  CHECK( fp_equal( y[1], powf(10.f,1.2f) ) );
793  CHECK( fp_equal( y[2], powf(10.f,1.3f) ) );
794  CHECK( fp_equal( y[3], powf(10.f,1.4f) ) );
795  }
796 
797  TEST(TestVexpm1fx4)
798  {
799  sys_float y[4];
800  vexpm1(y,1.e-10f,1.2f,1.3f,1.4f);
801  CHECK( fp_equal( y[0], expm1f(1.e-10f) ) );
802  CHECK( fp_equal( y[1], expm1f(1.2f) ) );
803  CHECK( fp_equal( y[2], expm1f(1.3f) ) );
804  CHECK( fp_equal( y[3], expm1f(1.4f) ) );
805  }
806 
807  TEST(TestVexpfx8)
808  {
809  sys_float y[8];
810  vexp(y,1.1f,1.2f,1.3f,1.4f,1.5f,1.6f,1.7f,1.8f);
811  CHECK( fp_equal( y[0], expf(1.1f) ) );
812  CHECK( fp_equal( y[1], expf(1.2f) ) );
813  CHECK( fp_equal( y[2], expf(1.3f) ) );
814  CHECK( fp_equal( y[3], expf(1.4f) ) );
815  CHECK( fp_equal( y[4], expf(1.5f) ) );
816  CHECK( fp_equal( y[5], expf(1.6f) ) );
817  CHECK( fp_equal( y[6], expf(1.7f) ) );
818  CHECK( fp_equal( y[7], expf(1.8f) ) );
819  }
820 
821  TEST(TestVexp10fx8)
822  {
823  sys_float y[8];
824  vexp10(y,1.1f,1.2f,1.3f,1.4f,1.5f,1.6f,1.7f,1.8f);
825  CHECK( fp_equal( y[0], powf(10.f,1.1f) ) );
826  CHECK( fp_equal( y[1], powf(10.f,1.2f) ) );
827  CHECK( fp_equal( y[2], powf(10.f,1.3f) ) );
828  CHECK( fp_equal( y[3], powf(10.f,1.4f) ) );
829  CHECK( fp_equal( y[4], powf(10.f,1.5f) ) );
830  CHECK( fp_equal( y[5], powf(10.f,1.6f) ) );
831  CHECK( fp_equal( y[6], powf(10.f,1.7f) ) );
832  CHECK( fp_equal( y[7], powf(10.f,1.8f) ) );
833  }
834 
835  TEST(TestVexpm1fx8)
836  {
837  sys_float y[8];
838  vexpm1(y,1.e-10f,1.2f,1.3f,1.4f,1.5f,1.6f,1.7f,1.8f);
839  CHECK( fp_equal( y[0], expm1f(1.e-10f) ) );
840  CHECK( fp_equal( y[1], expm1f(1.2f) ) );
841  CHECK( fp_equal( y[2], expm1f(1.3f) ) );
842  CHECK( fp_equal( y[3], expm1f(1.4f) ) );
843  CHECK( fp_equal( y[4], expm1f(1.5f) ) );
844  CHECK( fp_equal( y[5], expm1f(1.6f) ) );
845  CHECK( fp_equal( y[6], expm1f(1.7f) ) );
846  CHECK( fp_equal( y[7], expm1f(1.8f) ) );
847  }
848 
849  TEST(TestVexpfx16)
850  {
851  sys_float y[16];
852  vexp(y,1.1f,1.2f,1.3f,1.4f,1.5f,1.6f,1.7f,1.8f,2.11f,2.22f,2.33f,2.44f,2.55f,2.66f,2.77f,2.88f);
853  CHECK( fp_equal( y[0], expf(1.1f) ) );
854  CHECK( fp_equal( y[1], expf(1.2f) ) );
855  CHECK( fp_equal( y[2], expf(1.3f) ) );
856  CHECK( fp_equal( y[3], expf(1.4f) ) );
857  CHECK( fp_equal( y[4], expf(1.5f) ) );
858  CHECK( fp_equal( y[5], expf(1.6f) ) );
859  CHECK( fp_equal( y[6], expf(1.7f) ) );
860  CHECK( fp_equal( y[7], expf(1.8f) ) );
861  CHECK( fp_equal( y[8], expf(2.11f) ) );
862  CHECK( fp_equal( y[9], expf(2.22f) ) );
863  CHECK( fp_equal( y[10], expf(2.33f) ) );
864  CHECK( fp_equal( y[11], expf(2.44f) ) );
865  CHECK( fp_equal( y[12], expf(2.55f) ) );
866  CHECK( fp_equal( y[13], expf(2.66f) ) );
867  CHECK( fp_equal( y[14], expf(2.77f) ) );
868  CHECK( fp_equal( y[15], expf(2.88f) ) );
869  }
870 
871  TEST(TestVexp10fx16)
872  {
873  sys_float y[16];
874  vexp10(y,1.1f,1.2f,1.3f,1.4f,1.5f,1.6f,1.7f,1.8f,2.11f,2.22f,2.33f,2.44f,2.55f,2.66f,2.77f,2.88f);
875  CHECK( fp_equal( y[0], powf(10.f,1.1f) ) );
876  CHECK( fp_equal( y[1], powf(10.f,1.2f) ) );
877  CHECK( fp_equal( y[2], powf(10.f,1.3f) ) );
878  CHECK( fp_equal( y[3], powf(10.f,1.4f) ) );
879  CHECK( fp_equal( y[4], powf(10.f,1.5f) ) );
880  CHECK( fp_equal( y[5], powf(10.f,1.6f) ) );
881  CHECK( fp_equal( y[6], powf(10.f,1.7f) ) );
882  CHECK( fp_equal( y[7], powf(10.f,1.8f) ) );
883  CHECK( fp_equal( y[8], powf(10.f,2.11f) ) );
884  CHECK( fp_equal( y[9], powf(10.f,2.22f) ) );
885  CHECK( fp_equal( y[10], powf(10.f,2.33f) ) );
886  CHECK( fp_equal( y[11], powf(10.f,2.44f) ) );
887  CHECK( fp_equal( y[12], powf(10.f,2.55f) ) );
888  CHECK( fp_equal( y[13], powf(10.f,2.66f) ) );
889  CHECK( fp_equal( y[14], powf(10.f,2.77f) ) );
890  CHECK( fp_equal( y[15], powf(10.f,2.88f) ) );
891  }
892 
893  TEST(TestVexpm1fx16)
894  {
895  sys_float y[16];
896  vexpm1(y,1.e-10f,1.2f,1.3f,1.4f,1.5f,1.6f,1.7f,1.8f,2.11f,2.22f,2.33f,2.44f,2.55f,2.66f,2.77f,2.88f);
897  CHECK( fp_equal( y[0], expm1f(1.e-10f) ) );
898  CHECK( fp_equal( y[1], expm1f(1.2f) ) );
899  CHECK( fp_equal( y[2], expm1f(1.3f) ) );
900  CHECK( fp_equal( y[3], expm1f(1.4f) ) );
901  CHECK( fp_equal( y[4], expm1f(1.5f) ) );
902  CHECK( fp_equal( y[5], expm1f(1.6f) ) );
903  CHECK( fp_equal( y[6], expm1f(1.7f) ) );
904  CHECK( fp_equal( y[7], expm1f(1.8f) ) );
905  CHECK( fp_equal( y[8], expm1f(2.11f) ) );
906  CHECK( fp_equal( y[9], expm1f(2.22f) ) );
907  CHECK( fp_equal( y[10], expm1f(2.33f) ) );
908  CHECK( fp_equal( y[11], expm1f(2.44f) ) );
909  CHECK( fp_equal( y[12], expm1f(2.55f) ) );
910  CHECK( fp_equal( y[13], expm1f(2.66f) ) );
911  CHECK( fp_equal( y[14], expm1f(2.77f) ) );
912  CHECK( fp_equal( y[15], expm1f(2.88f) ) );
913  }
914 
915  TEST(TestVlogd)
916  {
917 #ifdef __AVX__
918  double x, y[4];
919  x = DBL_MIN/10.;
920  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
921  x = 0.;
922  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
923  x = -1.;
924  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
925  x = numeric_limits<double>().infinity();
926  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
927  x = -numeric_limits<double>().infinity();
928  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
929  x = numeric_limits<double>().quiet_NaN();
930  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
931 
932  const int sz = 2048;
933  ALIGNED(CD_ALIGN) double arg[sz];
934  ALIGNED(CD_ALIGN) double val[sz];
935  init_genrand( 395317717L );
936  for( int i=0; i < sz; ++i )
937  arg[i] = exp(genrand_real1()*1418.17-708.39);
938  vlog( arg, val, 0, sz );
939  for( int i=0; i < sz; ++i )
940  CHECK( fp_equal( val[i], log(arg[i]) ) );
941 #else
942  CHECK( fp_equal( 1., 1. ) );
943 #endif
944  }
945 
946  TEST(TestVlog10d)
947  {
948 #ifdef __AVX__
949  double x, y[4];
950  x = DBL_MIN/10.;
951  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
952  x = 0.;
953  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
954  x = -1.;
955  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
956  x = numeric_limits<double>().infinity();
957  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
958  x = -numeric_limits<double>().infinity();
959  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
960  x = numeric_limits<double>().quiet_NaN();
961  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
962 
963  const int sz = 2048;
964  ALIGNED(CD_ALIGN) double arg[sz];
965  ALIGNED(CD_ALIGN) double val[sz];
966  init_genrand( 395288717L );
967  for( int i=0; i < sz; ++i )
968  arg[i] = exp(genrand_real1()*1418.17-708.39);
969  vlog10( arg, val, 0, sz );
970  for( int i=0; i < sz; ++i )
971  CHECK( fp_equal( val[i], log10(arg[i]) ) );
972 #else
973  CHECK( fp_equal( 1., 1. ) );
974 #endif
975  }
976 
977  TEST(TestVlog1pd)
978  {
979 #ifdef __AVX__
980  double x, y[4];
981  x = -1.;
982  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
983  x = -2.;
984  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
985  x = numeric_limits<double>().infinity();
986  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
987  x = -numeric_limits<double>().infinity();
988  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
989  x = numeric_limits<double>().quiet_NaN();
990  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
991 
992  const int sz = 2048;
993  ALIGNED(CD_ALIGN) double arg[sz];
994  ALIGNED(CD_ALIGN) double val[sz];
995  init_genrand( 337288717L );
996  // first test arguments close to zero
997  for( int i=0; i < sz; ++i )
998  {
999  arg[i] = exp(genrand_real1()*258.-260.);
1000  if( (i%2) == 1 )
1001  arg[i] *= -1.;
1002  }
1003  vlog1p( arg, val, 0, sz );
1004  for( int i=0; i < sz; ++i )
1005  CHECK( fp_equal( val[i], log1p(arg[i]) ) );
1006  // next test full range
1007  for( int i=0; i < sz; ++i )
1008  arg[i] = exp(genrand_real1()*1418.17-708.39) - 0.9999999999;
1009  vlog1p( arg, val, 0, sz );
1010  for( int i=0; i < sz; ++i )
1011  CHECK( fp_equal( val[i], log1p(arg[i]) ) );
1012 #else
1013  CHECK( fp_equal( 1., 1. ) );
1014 #endif
1015  }
1016 
1017  TEST(TestVlogf)
1018  {
1019 #ifdef __AVX__
1020  sys_float x, y[4];
1021  x = FLT_MIN/10.f;
1022  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
1023  x = 0.f;
1024  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
1025  x = -1.f;
1026  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
1027  x = numeric_limits<sys_float>().infinity();
1028  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
1029  x = -numeric_limits<sys_float>().infinity();
1030  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
1031  x = numeric_limits<sys_float>().quiet_NaN();
1032  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
1033 
1034  const int sz = 2048;
1035  ALIGNED(CD_ALIGN) sys_float arg[sz];
1036  ALIGNED(CD_ALIGN) sys_float val[sz];
1037  init_genrand( 395955717L );
1038  for( int i=0; i < sz; ++i )
1039  arg[i] = exp(genrand_real1()*176.05-87.33);
1040  vlog( arg, val, 0, sz );
1041  for( int i=0; i < sz; ++i )
1042  CHECK( fp_equal( val[i], logf(arg[i]) ) );
1043 #else
1044  CHECK( fp_equal( 1., 1. ) );
1045 #endif
1046  }
1047 
1048  TEST(TestVlog10f)
1049  {
1050 #ifdef __AVX__
1051  sys_float x, y[4];
1052  x = FLT_MIN/10.f;
1053  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
1054  x = 0.f;
1055  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
1056  x = -1.f;
1057  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
1058  x = numeric_limits<sys_float>().infinity();
1059  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
1060  x = -numeric_limits<sys_float>().infinity();
1061  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
1062  x = numeric_limits<sys_float>().quiet_NaN();
1063  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
1064 
1065  const int sz = 2048;
1066  ALIGNED(CD_ALIGN) sys_float arg[sz];
1067  ALIGNED(CD_ALIGN) sys_float val[sz];
1068  init_genrand( 395281237L );
1069  for( int i=0; i < sz; ++i )
1070  arg[i] = exp(genrand_real1()*176.05-87.33);
1071  vlog10( arg, val, 0, sz );
1072  for( int i=0; i < sz; ++i )
1073  CHECK( fp_equal( val[i], log10f(arg[i]) ) );
1074 #else
1075  CHECK( fp_equal( 1., 1. ) );
1076 #endif
1077  }
1078 
1079  TEST(TestVlog1pf)
1080  {
1081 #ifdef __AVX__
1082  sys_float x, y[4];
1083  x = -1.f;
1084  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
1085  x = -2.f;
1086  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
1087  x = numeric_limits<sys_float>().infinity();
1088  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
1089  x = -numeric_limits<sys_float>().infinity();
1090  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
1091  x = numeric_limits<sys_float>().quiet_NaN();
1092  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
1093 
1094  const int sz = 2048;
1095  ALIGNED(CD_ALIGN) sys_float arg[sz];
1096  ALIGNED(CD_ALIGN) sys_float val[sz];
1097  init_genrand( 337898717L );
1098  // first test arguments close to zero
1099  for( int i=0; i < sz; ++i )
1100  {
1101  arg[i] = exp(genrand_real1()*78.-80.);
1102  if( (i%2) == 1 )
1103  arg[i] *= -1.f;
1104  }
1105  vlog1p( arg, val, 0, sz );
1106  for( int i=0; i < sz; ++i )
1107  CHECK( fp_equal( val[i], log1pf(arg[i]) ) );
1108  // next test full range
1109  for( int i=0; i < sz; ++i )
1110  arg[i] = exp(genrand_real1()*176.05-87.33) - 0.99999;
1111  vlog1p( arg, val, 0, sz );
1112  for( int i=0; i < sz; ++i )
1113  CHECK( fp_equal( val[i], log1pf(arg[i]) ) );
1114 #else
1115  CHECK( fp_equal( 1., 1. ) );
1116 #endif
1117  }
1118 
1119  TEST(TestVlogdx4)
1120  {
1121  double y[4];
1122  vlog(y,11.,22.,33.,44.);
1123  CHECK( fp_equal( y[0], log(11.) ) );
1124  CHECK( fp_equal( y[1], log(22.) ) );
1125  CHECK( fp_equal( y[2], log(33.) ) );
1126  CHECK( fp_equal( y[3], log(44.) ) );
1127  }
1128 
1129  TEST(TestVlog10dx4)
1130  {
1131  double y[4];
1132  vlog10(y,11.,22.,33.,44.);
1133  CHECK( fp_equal( y[0], log10(11.) ) );
1134  CHECK( fp_equal( y[1], log10(22.) ) );
1135  CHECK( fp_equal( y[2], log10(33.) ) );
1136  CHECK( fp_equal( y[3], log10(44.) ) );
1137  }
1138 
1139  TEST(TestVlog1pdx4)
1140  {
1141  double y[4];
1142  vlog1p(y,11.,22.,33.,44.);
1143  CHECK( fp_equal( y[0], log1p(11.) ) );
1144  CHECK( fp_equal( y[1], log1p(22.) ) );
1145  CHECK( fp_equal( y[2], log1p(33.) ) );
1146  CHECK( fp_equal( y[3], log1p(44.) ) );
1147  }
1148 
1149  TEST(TestVlogdx8)
1150  {
1151  double y[8];
1152  vlog(y,11.,22.,33.,44.,55.,66.,77.,88.);
1153  CHECK( fp_equal( y[0], log(11.) ) );
1154  CHECK( fp_equal( y[1], log(22.) ) );
1155  CHECK( fp_equal( y[2], log(33.) ) );
1156  CHECK( fp_equal( y[3], log(44.) ) );
1157  CHECK( fp_equal( y[4], log(55.) ) );
1158  CHECK( fp_equal( y[5], log(66.) ) );
1159  CHECK( fp_equal( y[6], log(77.) ) );
1160  CHECK( fp_equal( y[7], log(88.) ) );
1161  }
1162 
1163  TEST(TestVlog10dx8)
1164  {
1165  double y[8];
1166  vlog10(y,11.,22.,33.,44.,55.,66.,77.,88.);
1167  CHECK( fp_equal( y[0], log10(11.) ) );
1168  CHECK( fp_equal( y[1], log10(22.) ) );
1169  CHECK( fp_equal( y[2], log10(33.) ) );
1170  CHECK( fp_equal( y[3], log10(44.) ) );
1171  CHECK( fp_equal( y[4], log10(55.) ) );
1172  CHECK( fp_equal( y[5], log10(66.) ) );
1173  CHECK( fp_equal( y[6], log10(77.) ) );
1174  CHECK( fp_equal( y[7], log10(88.) ) );
1175  }
1176 
1177  TEST(TestVlog1pdx8)
1178  {
1179  double y[8];
1180  vlog1p(y,11.,22.,33.,44.,55.,66.,77.,88.);
1181  CHECK( fp_equal( y[0], log1p(11.) ) );
1182  CHECK( fp_equal( y[1], log1p(22.) ) );
1183  CHECK( fp_equal( y[2], log1p(33.) ) );
1184  CHECK( fp_equal( y[3], log1p(44.) ) );
1185  CHECK( fp_equal( y[4], log1p(55.) ) );
1186  CHECK( fp_equal( y[5], log1p(66.) ) );
1187  CHECK( fp_equal( y[6], log1p(77.) ) );
1188  CHECK( fp_equal( y[7], log1p(88.) ) );
1189  }
1190 
1191  TEST(TestVlogfx4)
1192  {
1193  sys_float y[4];
1194  vlog(y,11.f,22.f,33.f,44.f);
1195  CHECK( fp_equal( y[0], logf(11.f) ) );
1196  CHECK( fp_equal( y[1], logf(22.f) ) );
1197  CHECK( fp_equal( y[2], logf(33.f) ) );
1198  CHECK( fp_equal( y[3], logf(44.f) ) );
1199  }
1200 
1201  TEST(TestVlog10fx4)
1202  {
1203  sys_float y[4];
1204  vlog10(y,11.f,22.f,33.f,44.f);
1205  CHECK( fp_equal( y[0], log10f(11.f) ) );
1206  CHECK( fp_equal( y[1], log10f(22.f) ) );
1207  CHECK( fp_equal( y[2], log10f(33.f) ) );
1208  CHECK( fp_equal( y[3], log10f(44.f) ) );
1209  }
1210 
1211  TEST(TestVlog1pfx4)
1212  {
1213  sys_float y[4];
1214  vlog1p(y,11.f,22.f,33.f,44.f);
1215  CHECK( fp_equal( y[0], log1pf(11.f) ) );
1216  CHECK( fp_equal( y[1], log1pf(22.f) ) );
1217  CHECK( fp_equal( y[2], log1pf(33.f) ) );
1218  CHECK( fp_equal( y[3], log1pf(44.f) ) );
1219  }
1220 
1221  TEST(TestVlogfx8)
1222  {
1223  sys_float y[8];
1224  vlog(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f);
1225  CHECK( fp_equal( y[0], logf(11.f) ) );
1226  CHECK( fp_equal( y[1], logf(22.f) ) );
1227  CHECK( fp_equal( y[2], logf(33.f) ) );
1228  CHECK( fp_equal( y[3], logf(44.f) ) );
1229  CHECK( fp_equal( y[4], logf(55.f) ) );
1230  CHECK( fp_equal( y[5], logf(66.f) ) );
1231  CHECK( fp_equal( y[6], logf(77.f) ) );
1232  CHECK( fp_equal( y[7], logf(88.f) ) );
1233  }
1234 
1235  TEST(TestVlog10fx8)
1236  {
1237  sys_float y[8];
1238  vlog10(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f);
1239  CHECK( fp_equal( y[0], log10f(11.f) ) );
1240  CHECK( fp_equal( y[1], log10f(22.f) ) );
1241  CHECK( fp_equal( y[2], log10f(33.f) ) );
1242  CHECK( fp_equal( y[3], log10f(44.f) ) );
1243  CHECK( fp_equal( y[4], log10f(55.f) ) );
1244  CHECK( fp_equal( y[5], log10f(66.f) ) );
1245  CHECK( fp_equal( y[6], log10f(77.f) ) );
1246  CHECK( fp_equal( y[7], log10f(88.f) ) );
1247  }
1248 
1249  TEST(TestVlog1pfx8)
1250  {
1251  sys_float y[8];
1252  vlog1p(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f);
1253  CHECK( fp_equal( y[0], log1pf(11.f) ) );
1254  CHECK( fp_equal( y[1], log1pf(22.f) ) );
1255  CHECK( fp_equal( y[2], log1pf(33.f) ) );
1256  CHECK( fp_equal( y[3], log1pf(44.f) ) );
1257  CHECK( fp_equal( y[4], log1pf(55.f) ) );
1258  CHECK( fp_equal( y[5], log1pf(66.f) ) );
1259  CHECK( fp_equal( y[6], log1pf(77.f) ) );
1260  CHECK( fp_equal( y[7], log1pf(88.f) ) );
1261  }
1262 
1263  TEST(TestVlogfx16)
1264  {
1265  sys_float y[16];
1266  vlog(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f,111.f,122.f,133.f,144.f,155.f,166.f,177.f,188.f);
1267  CHECK( fp_equal( y[0], logf(11.f) ) );
1268  CHECK( fp_equal( y[1], logf(22.f) ) );
1269  CHECK( fp_equal( y[2], logf(33.f) ) );
1270  CHECK( fp_equal( y[3], logf(44.f) ) );
1271  CHECK( fp_equal( y[4], logf(55.f) ) );
1272  CHECK( fp_equal( y[5], logf(66.f) ) );
1273  CHECK( fp_equal( y[6], logf(77.f) ) );
1274  CHECK( fp_equal( y[7], logf(88.f) ) );
1275  CHECK( fp_equal( y[8], logf(111.f) ) );
1276  CHECK( fp_equal( y[9], logf(122.f) ) );
1277  CHECK( fp_equal( y[10], logf(133.f) ) );
1278  CHECK( fp_equal( y[11], logf(144.f) ) );
1279  CHECK( fp_equal( y[12], logf(155.f) ) );
1280  CHECK( fp_equal( y[13], logf(166.f) ) );
1281  CHECK( fp_equal( y[14], logf(177.f) ) );
1282  CHECK( fp_equal( y[15], logf(188.f) ) );
1283  }
1284 
1285  TEST(TestVlog10fx16)
1286  {
1287  sys_float y[16];
1288  vlog10(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f,111.f,122.f,133.f,144.f,155.f,166.f,177.f,188.f);
1289  CHECK( fp_equal( y[0], log10f(11.f) ) );
1290  CHECK( fp_equal( y[1], log10f(22.f) ) );
1291  CHECK( fp_equal( y[2], log10f(33.f) ) );
1292  CHECK( fp_equal( y[3], log10f(44.f) ) );
1293  CHECK( fp_equal( y[4], log10f(55.f) ) );
1294  CHECK( fp_equal( y[5], log10f(66.f) ) );
1295  CHECK( fp_equal( y[6], log10f(77.f) ) );
1296  CHECK( fp_equal( y[7], log10f(88.f) ) );
1297  CHECK( fp_equal( y[8], log10f(111.f) ) );
1298  CHECK( fp_equal( y[9], log10f(122.f) ) );
1299  CHECK( fp_equal( y[10], log10f(133.f) ) );
1300  CHECK( fp_equal( y[11], log10f(144.f) ) );
1301  CHECK( fp_equal( y[12], log10f(155.f) ) );
1302  CHECK( fp_equal( y[13], log10f(166.f) ) );
1303  CHECK( fp_equal( y[14], log10f(177.f) ) );
1304  CHECK( fp_equal( y[15], log10f(188.f) ) );
1305  }
1306 
1307  TEST(TestVlog1pfx16)
1308  {
1309  sys_float y[16];
1310  vlog1p(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f,111.f,122.f,133.f,144.f,155.f,166.f,177.f,188.f);
1311  CHECK( fp_equal( y[0], log1pf(11.f) ) );
1312  CHECK( fp_equal( y[1], log1pf(22.f) ) );
1313  CHECK( fp_equal( y[2], log1pf(33.f) ) );
1314  CHECK( fp_equal( y[3], log1pf(44.f) ) );
1315  CHECK( fp_equal( y[4], log1pf(55.f) ) );
1316  CHECK( fp_equal( y[5], log1pf(66.f) ) );
1317  CHECK( fp_equal( y[6], log1pf(77.f) ) );
1318  CHECK( fp_equal( y[7], log1pf(88.f) ) );
1319  CHECK( fp_equal( y[8], log1pf(111.f) ) );
1320  CHECK( fp_equal( y[9], log1pf(122.f) ) );
1321  CHECK( fp_equal( y[10], log1pf(133.f) ) );
1322  CHECK( fp_equal( y[11], log1pf(144.f) ) );
1323  CHECK( fp_equal( y[12], log1pf(155.f) ) );
1324  CHECK( fp_equal( y[13], log1pf(166.f) ) );
1325  CHECK( fp_equal( y[14], log1pf(177.f) ) );
1326  CHECK( fp_equal( y[15], log1pf(188.f) ) );
1327  }
1328 
1329  TEST(TestVsqrtd)
1330  {
1331 #ifdef __AVX__
1332  double x, y[4];
1333  x = -1.;
1334  CHECK_THROW( vsqrt(y,x,x,x,x), domain_error );
1335  x = -numeric_limits<double>().infinity();
1336  CHECK_THROW( vsqrt(y,x,x,x,x), domain_error );
1337  x = numeric_limits<double>().infinity();
1338  CHECK_THROW( vsqrt(y,x,x,x,x), domain_error );
1339  x = numeric_limits<double>().quiet_NaN();
1340  CHECK_THROW( vsqrt(y,x,x,x,x), domain_error );
1341  x = 0.;
1342  vsqrt(y,x,x,x,x);
1343  CHECK( y[0] == 0. );
1344 
1345  const int sz = 2048;
1346  ALIGNED(CD_ALIGN) double arg[sz];
1347  ALIGNED(CD_ALIGN) double val[sz];
1348  init_genrand( 335098717L );
1349  for( int i=0; i < sz; ++i )
1350  arg[i] = exp(genrand_real1()*1418.17-708.39);
1351  vsqrt( arg, val, 0, sz );
1352  for( int i=0; i < sz; ++i )
1353  CHECK( fp_equal( val[i], sqrt(arg[i]) ) );
1354 #else
1355  CHECK( fp_equal( 1., 1. ) );
1356 #endif
1357  }
1358 
1359  TEST(TestVhypotd)
1360  {
1361  const int sz = 2048;
1362  ALIGNED(CD_ALIGN) double arg1[sz];
1363  ALIGNED(CD_ALIGN) double arg2[sz];
1364  ALIGNED(CD_ALIGN) double val[sz];
1365 #ifdef __AVX__
1366  double y[4], x = -numeric_limits<double>().infinity();
1367  CHECK_THROW( vhypot(y,x,1.,x,1.,x,1.,x,1.), domain_error );
1368  CHECK_THROW( vhypot(y,1.,x,1.,x,1.,x,1.,x), domain_error );
1369  x = numeric_limits<double>().infinity();
1370  CHECK_THROW( vhypot(y,x,1.,x,1.,x,1.,x,1.), domain_error );
1371  CHECK_THROW( vhypot(y,1.,x,1.,x,1.,x,1.,x), domain_error );
1372  x = numeric_limits<double>().quiet_NaN();
1373  CHECK_THROW( vhypot(y,x,1.,x,1.,x,1.,x,1.), domain_error );
1374  CHECK_THROW( vhypot(y,1.,x,1.,x,1.,x,1.,x), domain_error );
1375  vhypot(y,0.,0.,1.,0.,0.,1.,DBL_MAX/2.,DBL_MAX/2.);
1376  CHECK( fp_equal( y[0], 0. ) );
1377  CHECK( fp_equal( y[1], 1. ) );
1378  CHECK( fp_equal( y[2], 1. ) );
1379  CHECK( fp_equal( y[3], DBL_MAX/sqrt(2.) ) );
1380  // now check results over the entire range
1381  init_genrand( 395933017L );
1382  for( int i=0; i < sz; ++i )
1383  {
1384  arg1[i] = exp(genrand_real1()*1417.82-708.39);
1385  if( (i%4) < 2 )
1386  arg1[i] = -arg1[i];
1387  arg2[i] = exp(genrand_real1()*1417.82-708.39);
1388  if( (i%2) == 1 )
1389  arg2[i] = -arg2[i];
1390  }
1391  vhypot( arg1, arg2, val, 0, sz );
1392  for( int i=0; i < sz; ++i )
1393  {
1394  CHECK( fp_equal( val[i], hypot(arg1[i], arg2[i]) ) );
1395  }
1396 #endif
1397  for( int i=0; i < 32; ++i )
1398  {
1399  arg1[i] = double(i);
1400  arg2[i] = double(i);
1401  }
1402  // finally check that non-aligned arrays are treated correctly
1403  for( int i=0; i < 32; ++i )
1404  {
1405  for( int j=i+1; j <= 32; ++j )
1406  {
1407  invalidate_array( val, 32*sizeof(double) );
1408  vhypot( arg1, arg2, val, i, j );
1409  for( int k=0; k < 32; ++k )
1410  {
1411  if( k < i || k >= j )
1412  CHECK( isnan(val[k]) );
1413  else
1414  CHECK( fp_equal( val[k], double(k)*sqrt(2.) ) );
1415  }
1416  invalidate_array( val, 32*sizeof(double) );
1417  vhypot( &arg1[i], &arg2[i], val, 0, j-i );
1418  for( int k=0; k < 32; ++k )
1419  {
1420  if( k >= j-i )
1421  CHECK( isnan(val[k]) );
1422  else
1423  CHECK( fp_equal( val[k], double(k+i)*sqrt(2.) ) );
1424  }
1425  invalidate_array( val, 32*sizeof(double) );
1426  vhypot( &arg1[i], arg2, &val[i], 0, j-i );
1427  for( int k=0; k < 32; ++k )
1428  {
1429  if( k < i || k >= j )
1430  CHECK( isnan(val[k]) );
1431  else
1432  CHECK( fp_equal( val[k], sqrt(double((k-i)*(k-i)+k*k)) ) );
1433  }
1434  invalidate_array( val, 32*sizeof(double) );
1435  vhypot( arg1, &arg2[i], &val[i], 0, j-i );
1436  for( int k=0; k < 32; ++k )
1437  {
1438  if( k < i || k >= j )
1439  CHECK( isnan(val[k]) );
1440  else
1441  CHECK( fp_equal( val[k], sqrt(double((k-i)*(k-i)+k*k)) ) );
1442  }
1443  invalidate_array( val, 32*sizeof(double) );
1444  vhypot( &arg1[i], &arg2[i/2], val, 0, j-i );
1445  for( int k=0; k < 32; ++k )
1446  {
1447  if( k >= j-i )
1448  CHECK( isnan(val[k]) );
1449  else
1450  {
1451  double ref2 = double((k+i)*(k+i) + (k+i/2)*(k+i/2));
1452  CHECK( fp_equal( val[k], sqrt(ref2) ) );
1453  }
1454  }
1455  }
1456  }
1457  }
1458 
1459  TEST(TestVsqrtf)
1460  {
1461 #ifdef __AVX__
1462  sys_float x, y[4];
1463  x = -1.f;
1464  CHECK_THROW( vsqrt(y,x,x,x,x), domain_error );
1465  x = -numeric_limits<sys_float>().infinity();
1466  CHECK_THROW( vsqrt(y,x,x,x,x), domain_error );
1467  x = numeric_limits<sys_float>().infinity();
1468  CHECK_THROW( vsqrt(y,x,x,x,x), domain_error );
1469  x = numeric_limits<sys_float>().quiet_NaN();
1470  CHECK_THROW( vsqrt(y,x,x,x,x), domain_error );
1471  x = 0.f;
1472  vsqrt(y,x,x,x,x);
1473  CHECK( y[0] == 0.f );
1474 
1475  const int sz = 2048;
1476  ALIGNED(CD_ALIGN) sys_float arg[sz];
1477  ALIGNED(CD_ALIGN) sys_float val[sz];
1478  init_genrand( 335036617L );
1479  for( int i=0; i < sz; ++i )
1480  arg[i] = exp(genrand_real1()*176.05-87.33);
1481  vsqrt( arg, val, 0, sz );
1482  for( int i=0; i < sz; ++i )
1483  CHECK( fp_equal( val[i], sqrtf(arg[i]) ) );
1484 #else
1485  CHECK( fp_equal( 1., 1. ) );
1486 #endif
1487  }
1488 
1489  TEST(TestVhypotf)
1490  {
1491  const int sz = 2048;
1492  ALIGNED(CD_ALIGN) sys_float arg1[sz];
1493  ALIGNED(CD_ALIGN) sys_float arg2[sz];
1494  ALIGNED(CD_ALIGN) sys_float val[sz];
1495 #ifdef __AVX__
1496  sys_float y[4], x = -numeric_limits<sys_float>().infinity();
1497  CHECK_THROW( vhypot(y,x,1.f,x,1.f,x,1.f,x,1.f), domain_error );
1498  CHECK_THROW( vhypot(y,1.f,x,1.f,x,1.f,x,1.f,x), domain_error );
1499  x = numeric_limits<sys_float>().infinity();
1500  CHECK_THROW( vhypot(y,x,1.f,x,1.f,x,1.f,x,1.f), domain_error );
1501  CHECK_THROW( vhypot(y,1.f,x,1.f,x,1.f,x,1.f,x), domain_error );
1502  x = numeric_limits<sys_float>().quiet_NaN();
1503  CHECK_THROW( vhypot(y,x,1.f,x,1.f,x,1.f,x,1.f), domain_error );
1504  CHECK_THROW( vhypot(y,1.f,x,1.f,x,1.f,x,1.f,x), domain_error );
1505  vhypot(y,0.f,0.f,1.f,0.f,0.f,1.f,FLT_MAX/2.f,FLT_MAX/2.f);
1506  CHECK( fp_equal( y[0], 0.f ) );
1507  CHECK( fp_equal( y[1], 1.f ) );
1508  CHECK( fp_equal( y[2], 1.f ) );
1509  CHECK( fp_equal( y[3], FLT_MAX/sqrtf(2.f) ) );
1510  // now check results over the entire range
1511  init_genrand( 395912717L );
1512  for( int i=0; i < sz; ++i )
1513  {
1514  arg1[i] = exp(genrand_real1()*175.70-87.33);
1515  if( (i%4) < 2 )
1516  arg1[i] = -arg1[i];
1517  arg2[i] = exp(genrand_real1()*175.70-87.33);
1518  if( (i%2) == 1 )
1519  arg2[i] = -arg2[i];
1520  }
1521  vhypot( arg1, arg2, val, 0, sz );
1522  for( int i=0; i < sz; ++i )
1523  {
1524  CHECK( fp_equal( val[i], hypotf(arg1[i], arg2[i]) ) );
1525  }
1526 #endif
1527  for( int i=0; i < 32; ++i )
1528  {
1529  arg1[i] = sys_float(i);
1530  arg2[i] = sys_float(i);
1531  }
1532  // finally check that non-aligned arrays are treated correctly
1533  for( int i=0; i < 32; ++i )
1534  {
1535  for( int j=i+1; j <= 32; ++j )
1536  {
1537  invalidate_array( val, 32*sizeof(sys_float) );
1538  vhypot( arg1, arg2, val, i, j );
1539  for( int k=0; k < 32; ++k )
1540  {
1541  if( k < i || k >= j )
1542  CHECK( isnanf(val[k]) );
1543  else
1544  CHECK( fp_equal( val[k], sys_float(k)*sqrtf(2.f) ) );
1545  }
1546  invalidate_array( val, 32*sizeof(sys_float) );
1547  vhypot( &arg1[i], &arg2[i], val, 0, j-i );
1548  for( int k=0; k < 32; ++k )
1549  {
1550  if( k >= j-i )
1551  CHECK( isnanf(val[k]) );
1552  else
1553  CHECK( fp_equal( val[k], sys_float(k+i)*sqrtf(2.f) ) );
1554  }
1555  invalidate_array( val, 32*sizeof(sys_float) );
1556  vhypot( &arg1[i], arg2, &val[i], 0, j-i );
1557  for( int k=0; k < 32; ++k )
1558  {
1559  if( k < i || k >= j )
1560  CHECK( isnanf(val[k]) );
1561  else
1562  CHECK( fp_equal( val[k], sqrtf(sys_float((k-i)*(k-i)+k*k)) ) );
1563  }
1564  invalidate_array( val, 32*sizeof(sys_float) );
1565  vhypot( arg1, &arg2[i], &val[i], 0, j-i );
1566  for( int k=0; k < 32; ++k )
1567  {
1568  if( k < i || k >= j )
1569  CHECK( isnanf(val[k]) );
1570  else
1571  CHECK( fp_equal( val[k], sqrtf(sys_float((k-i)*(k-i)+k*k)) ) );
1572  }
1573  invalidate_array( val, 32*sizeof(sys_float) );
1574  vhypot( &arg1[i], &arg2[i/2], val, 0, j-i );
1575  for( int k=0; k < 32; ++k )
1576  {
1577  if( k >= j-i )
1578  CHECK( isnanf(val[k]) );
1579  else
1580  {
1581  sys_float ref2 = sys_float((k+i)*(k+i) + (k+i/2)*(k+i/2));
1582  CHECK( fp_equal( val[k], sqrtf(ref2) ) );
1583  }
1584  }
1585  }
1586  }
1587  }
1588 
1589  TEST(TestVsqrtdx4)
1590  {
1591  double y[4];
1592  vsqrt(y,11.,22.,33.,44.);
1593  CHECK( fp_equal( y[0], sqrt(11.) ) );
1594  CHECK( fp_equal( y[1], sqrt(22.) ) );
1595  CHECK( fp_equal( y[2], sqrt(33.) ) );
1596  CHECK( fp_equal( y[3], sqrt(44.) ) );
1597  }
1598 
1599  TEST(TestVhypotdx4)
1600  {
1601  double y[4];
1602  vhypot(y,11.,22.,33.,44.,55.,66.,77.,88.);
1603  CHECK( fp_equal( y[0], hypot(11.,22.) ) );
1604  CHECK( fp_equal( y[1], hypot(33.,44.) ) );
1605  CHECK( fp_equal( y[2], hypot(55.,66.) ) );
1606  CHECK( fp_equal( y[3], hypot(77.,88.) ) );
1607  }
1608 
1609  TEST(TestVsqrtdx8)
1610  {
1611  double y[8];
1612  vsqrt(y,11.,22.,33.,44.,55.,66.,77.,88.);
1613  CHECK( fp_equal( y[0], sqrt(11.) ) );
1614  CHECK( fp_equal( y[1], sqrt(22.) ) );
1615  CHECK( fp_equal( y[2], sqrt(33.) ) );
1616  CHECK( fp_equal( y[3], sqrt(44.) ) );
1617  CHECK( fp_equal( y[4], sqrt(55.) ) );
1618  CHECK( fp_equal( y[5], sqrt(66.) ) );
1619  CHECK( fp_equal( y[6], sqrt(77.) ) );
1620  CHECK( fp_equal( y[7], sqrt(88.) ) );
1621  }
1622 
1623  TEST(TestVsqrtfx4)
1624  {
1625  sys_float y[4];
1626  vsqrt(y,11.f,22.f,33.f,44.f);
1627  CHECK( fp_equal( y[0], sqrtf(11.f) ) );
1628  CHECK( fp_equal( y[1], sqrtf(22.f) ) );
1629  CHECK( fp_equal( y[2], sqrtf(33.f) ) );
1630  CHECK( fp_equal( y[3], sqrtf(44.f) ) );
1631  }
1632 
1633  TEST(TestVhypotfx4)
1634  {
1635  sys_float y[4];
1636  vhypot(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f);
1637  CHECK( fp_equal( y[0], hypotf(11.f,22.f) ) );
1638  CHECK( fp_equal( y[1], hypotf(33.f,44.f) ) );
1639  CHECK( fp_equal( y[2], hypotf(55.f,66.f) ) );
1640  CHECK( fp_equal( y[3], hypotf(77.f,88.f) ) );
1641  }
1642 
1643  TEST(TestVsqrtfx8)
1644  {
1645  sys_float y[8];
1646  vsqrt(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f);
1647  CHECK( fp_equal( y[0], sqrtf(11.f) ) );
1648  CHECK( fp_equal( y[1], sqrtf(22.f) ) );
1649  CHECK( fp_equal( y[2], sqrtf(33.f) ) );
1650  CHECK( fp_equal( y[3], sqrtf(44.f) ) );
1651  CHECK( fp_equal( y[4], sqrtf(55.f) ) );
1652  CHECK( fp_equal( y[5], sqrtf(66.f) ) );
1653  CHECK( fp_equal( y[6], sqrtf(77.f) ) );
1654  CHECK( fp_equal( y[7], sqrtf(88.f) ) );
1655  }
1656 
1657  TEST(TestVhypotfx8)
1658  {
1659  sys_float y[8];
1660  vhypot(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f,111.f,122.f,133.f,144.f,155.f,166.f,177.f,188.f);
1661  CHECK( fp_equal( y[0], hypotf(11.f,22.f) ) );
1662  CHECK( fp_equal( y[1], hypotf(33.f,44.f) ) );
1663  CHECK( fp_equal( y[2], hypotf(55.f,66.f) ) );
1664  CHECK( fp_equal( y[3], hypotf(77.f,88.f) ) );
1665  CHECK( fp_equal( y[4], hypotf(111.f,122.f) ) );
1666  CHECK( fp_equal( y[5], hypotf(133.f,144.f) ) );
1667  CHECK( fp_equal( y[6], hypotf(155.f,166.f) ) );
1668  CHECK( fp_equal( y[7], hypotf(177.f,188.f) ) );
1669  }
1670 
1671  TEST(TestVsqrtfx16)
1672  {
1673  sys_float y[16];
1674  vsqrt(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f,111.f,122.f,133.f,144.f,155.f,166.f,177.f,188.f);
1675  CHECK( fp_equal( y[0], sqrtf(11.f) ) );
1676  CHECK( fp_equal( y[1], sqrtf(22.f) ) );
1677  CHECK( fp_equal( y[2], sqrtf(33.f) ) );
1678  CHECK( fp_equal( y[3], sqrtf(44.f) ) );
1679  CHECK( fp_equal( y[4], sqrtf(55.f) ) );
1680  CHECK( fp_equal( y[5], sqrtf(66.f) ) );
1681  CHECK( fp_equal( y[6], sqrtf(77.f) ) );
1682  CHECK( fp_equal( y[7], sqrtf(88.f) ) );
1683  CHECK( fp_equal( y[8], sqrtf(111.f) ) );
1684  CHECK( fp_equal( y[9], sqrtf(122.f) ) );
1685  CHECK( fp_equal( y[10], sqrtf(133.f) ) );
1686  CHECK( fp_equal( y[11], sqrtf(144.f) ) );
1687  CHECK( fp_equal( y[12], sqrtf(155.f) ) );
1688  CHECK( fp_equal( y[13], sqrtf(166.f) ) );
1689  CHECK( fp_equal( y[14], sqrtf(177.f) ) );
1690  CHECK( fp_equal( y[15], sqrtf(188.f) ) );
1691  }
1692 
1693  TEST(TestVasinhd)
1694  {
1695 #ifdef __AVX__
1696  double x, y[4];
1697  x = -numeric_limits<double>().infinity();
1698  CHECK_THROW( vasinh(y,x,x,x,x), domain_error );
1699  x = numeric_limits<double>().infinity();
1700  CHECK_THROW( vasinh(y,x,x,x,x), domain_error );
1701  x = -numeric_limits<double>().quiet_NaN();
1702  CHECK_THROW( vasinh(y,x,x,x,x), domain_error );
1703  x = 0.;
1704  vasinh(y,x,x,x,x);
1705  CHECK( y[0] == 0. );
1706 
1707  const int sz = 2048;
1708  ALIGNED(CD_ALIGN) double arg[sz];
1709  ALIGNED(CD_ALIGN) double val[sz];
1710  init_genrand( 335366717L );
1711  for( int i=0; i < sz; ++i )
1712  {
1713  arg[i] = exp(genrand_real1()*1418.17-708.39);
1714  if( (i%2) == 1 )
1715  arg[i] = -arg[i];
1716  }
1717  vasinh( arg, val, 0, sz );
1718  for( int i=0; i < sz; ++i )
1719  CHECK( fp_equal( val[i], asinh(arg[i]) ) );
1720 #else
1721  CHECK( fp_equal( 1., 1. ) );
1722 #endif
1723  }
1724 
1725  TEST(TestVasinhdFast)
1726  {
1727 #ifdef __AVX__
1728  double x, y[4];
1729  x = -numeric_limits<double>().infinity();
1730  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1731  x = numeric_limits<double>().infinity();
1732  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1733  x = numeric_limits<double>().quiet_NaN();
1734  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1735  x = -1.;
1736  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1737  double z = sqrt(DBL_MAX);
1738  x = nextafter(z,DBL_MAX);
1739  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1740  x = nextafter(z,-DBL_MAX);
1741  vfast_asinh(y,x,x,x,x);
1742  CHECK( y[0] > 0. && y[0] < DBL_MAX );
1743  x = 0.;
1744  vfast_asinh(y,x,x,x,x);
1745  CHECK( y[0] == 0. );
1746 
1747  const int sz = 2048;
1748  ALIGNED(CD_ALIGN) double arg[sz];
1749  ALIGNED(CD_ALIGN) double val[sz];
1750  init_genrand( 335777717L );
1751  for( int i=0; i < sz; ++i )
1752  arg[i] = exp(genrand_real1()*1063.28-708.39);
1753  vfast_asinh( arg, val, 0, sz );
1754  for( int i=0; i < sz; ++i )
1755  CHECK( fp_equal( val[i], asinh(arg[i]) ) );
1756 #else
1757  CHECK( fp_equal( 1., 1. ) );
1758 #endif
1759  }
1760 
1761  TEST(TestVasinhf)
1762  {
1763 #ifdef __AVX__
1764  sys_float x, y[4];
1765  x = -numeric_limits<sys_float>().infinity();
1766  CHECK_THROW( vasinh(y,x,x,x,x), domain_error );
1767  x = numeric_limits<sys_float>().infinity();
1768  CHECK_THROW( vasinh(y,x,x,x,x), domain_error );
1769  x = numeric_limits<sys_float>().quiet_NaN();
1770  CHECK_THROW( vasinh(y,x,x,x,x), domain_error );
1771  x = 0.f;
1772  vasinh(y,x,x,x,x);
1773  CHECK( y[0] == 0.f );
1774 
1775  const int sz = 2048;
1776  ALIGNED(CD_ALIGN) sys_float arg[sz];
1777  ALIGNED(CD_ALIGN) sys_float val[sz];
1778  init_genrand( 335391417L );
1779  for( int i=0; i < sz; ++i )
1780  {
1781  arg[i] = exp(genrand_real1()*176.05-87.33);
1782  if( (i%2) == 1 )
1783  arg[i] = -arg[i];
1784  }
1785  vasinh( arg, val, 0, sz );
1786  for( int i=0; i < sz; ++i )
1787  CHECK( fp_equal( val[i], asinhf(arg[i]) ) );
1788 #else
1789  CHECK( fp_equal( 1., 1. ) );
1790 #endif
1791  }
1792 
1793  TEST(TestVasinhfFast)
1794  {
1795 #ifdef __AVX__
1796  sys_float x, y[4];
1797  x = -numeric_limits<sys_float>().infinity();
1798  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1799  x = numeric_limits<sys_float>().infinity();
1800  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1801  x = numeric_limits<sys_float>().quiet_NaN();
1802  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1803  x = -1.f;
1804  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1805  sys_float z = sqrt(FLT_MAX);
1806  x = nextafterf(z,FLT_MAX);
1807  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1808  x = nextafterf(z,-FLT_MAX);
1809  vfast_asinh(y,x,x,x,x);
1810  CHECK( y[0] > 0.f && y[0] < FLT_MAX );
1811  x = 0.f;
1812  vfast_asinh(y,x,x,x,x);
1813  CHECK( y[0] == 0.f );
1814 
1815  const int sz = 2048;
1816  ALIGNED(CD_ALIGN) sys_float arg[sz];
1817  ALIGNED(CD_ALIGN) sys_float val[sz];
1818  init_genrand( 330286717L );
1819  for( int i=0; i < sz; ++i )
1820  arg[i] = exp(genrand_real1()*131.69-87.33);
1821  vfast_asinh( arg, val, 0, sz );
1822  for( int i=0; i < sz; ++i )
1823  CHECK( fp_equal( val[i], asinhf(arg[i]) ) );
1824 #else
1825  CHECK( fp_equal( 1., 1. ) );
1826 #endif
1827  }
1828 
1829  TEST(TestVasinhdx4)
1830  {
1831  double y[4];
1832  vasinh(y,11.,22.,33.,44.);
1833  CHECK( fp_equal( y[0], asinh(11.) ) );
1834  CHECK( fp_equal( y[1], asinh(22.) ) );
1835  CHECK( fp_equal( y[2], asinh(33.) ) );
1836  CHECK( fp_equal( y[3], asinh(44.) ) );
1837  }
1838 
1839  TEST(TestVasinhdFastx4)
1840  {
1841  double y[4];
1842  vfast_asinh(y,11.,22.,33.,44.);
1843  CHECK( fp_equal( y[0], asinh(11.) ) );
1844  CHECK( fp_equal( y[1], asinh(22.) ) );
1845  CHECK( fp_equal( y[2], asinh(33.) ) );
1846  CHECK( fp_equal( y[3], asinh(44.) ) );
1847  }
1848 
1849  TEST(TestVasinhdx8)
1850  {
1851  double y[8];
1852  vasinh(y,11.,22.,33.,44.,55.,66.,77.,88.);
1853  CHECK( fp_equal( y[0], asinh(11.) ) );
1854  CHECK( fp_equal( y[1], asinh(22.) ) );
1855  CHECK( fp_equal( y[2], asinh(33.) ) );
1856  CHECK( fp_equal( y[3], asinh(44.) ) );
1857  CHECK( fp_equal( y[4], asinh(55.) ) );
1858  CHECK( fp_equal( y[5], asinh(66.) ) );
1859  CHECK( fp_equal( y[6], asinh(77.) ) );
1860  CHECK( fp_equal( y[7], asinh(88.) ) );
1861  }
1862 
1863  TEST(TestVasinhdFastx8)
1864  {
1865  double y[8];
1866  vfast_asinh(y,11.,22.,33.,44.,55.,66.,77.,88.);
1867  CHECK( fp_equal( y[0], asinh(11.) ) );
1868  CHECK( fp_equal( y[1], asinh(22.) ) );
1869  CHECK( fp_equal( y[2], asinh(33.) ) );
1870  CHECK( fp_equal( y[3], asinh(44.) ) );
1871  CHECK( fp_equal( y[4], asinh(55.) ) );
1872  CHECK( fp_equal( y[5], asinh(66.) ) );
1873  CHECK( fp_equal( y[6], asinh(77.) ) );
1874  CHECK( fp_equal( y[7], asinh(88.) ) );
1875  }
1876 
1877  TEST(TestVasinhfx4)
1878  {
1879  sys_float y[4];
1880  vasinh(y,11.f,22.f,33.f,44.f);
1881  CHECK( fp_equal( y[0], asinhf(11.f) ) );
1882  CHECK( fp_equal( y[1], asinhf(22.f) ) );
1883  CHECK( fp_equal( y[2], asinhf(33.f) ) );
1884  CHECK( fp_equal( y[3], asinhf(44.f) ) );
1885  }
1886 
1887  TEST(TestVasinhfFastx4)
1888  {
1889  sys_float y[4];
1890  vfast_asinh(y,11.f,22.f,33.f,44.f);
1891  CHECK( fp_equal( y[0], asinhf(11.f) ) );
1892  CHECK( fp_equal( y[1], asinhf(22.f) ) );
1893  CHECK( fp_equal( y[2], asinhf(33.f) ) );
1894  CHECK( fp_equal( y[3], asinhf(44.f) ) );
1895  }
1896 
1897  TEST(TestVasinhfx8)
1898  {
1899  sys_float y[8];
1900  vasinh(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f);
1901  CHECK( fp_equal( y[0], asinhf(11.f) ) );
1902  CHECK( fp_equal( y[1], asinhf(22.f) ) );
1903  CHECK( fp_equal( y[2], asinhf(33.f) ) );
1904  CHECK( fp_equal( y[3], asinhf(44.f) ) );
1905  CHECK( fp_equal( y[4], asinhf(55.f) ) );
1906  CHECK( fp_equal( y[5], asinhf(66.f) ) );
1907  CHECK( fp_equal( y[6], asinhf(77.f) ) );
1908  CHECK( fp_equal( y[7], asinhf(88.f) ) );
1909  }
1910 
1911  TEST(TestVasinhfFastx8)
1912  {
1913  sys_float y[8];
1914  vfast_asinh(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f);
1915  CHECK( fp_equal( y[0], asinhf(11.f) ) );
1916  CHECK( fp_equal( y[1], asinhf(22.f) ) );
1917  CHECK( fp_equal( y[2], asinhf(33.f) ) );
1918  CHECK( fp_equal( y[3], asinhf(44.f) ) );
1919  CHECK( fp_equal( y[4], asinhf(55.f) ) );
1920  CHECK( fp_equal( y[5], asinhf(66.f) ) );
1921  CHECK( fp_equal( y[6], asinhf(77.f) ) );
1922  CHECK( fp_equal( y[7], asinhf(88.f) ) );
1923  }
1924 
1925  TEST(TestVasinhfx16)
1926  {
1927  sys_float y[16];
1928  vasinh(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f,111.f,122.f,133.f,144.f,155.f,166.f,177.f,188.f);
1929  CHECK( fp_equal( y[0], asinhf(11.f) ) );
1930  CHECK( fp_equal( y[1], asinhf(22.f) ) );
1931  CHECK( fp_equal( y[2], asinhf(33.f) ) );
1932  CHECK( fp_equal( y[3], asinhf(44.f) ) );
1933  CHECK( fp_equal( y[4], asinhf(55.f) ) );
1934  CHECK( fp_equal( y[5], asinhf(66.f) ) );
1935  CHECK( fp_equal( y[6], asinhf(77.f) ) );
1936  CHECK( fp_equal( y[7], asinhf(88.f) ) );
1937  CHECK( fp_equal( y[8], asinhf(111.f) ) );
1938  CHECK( fp_equal( y[9], asinhf(122.f) ) );
1939  CHECK( fp_equal( y[10], asinhf(133.f) ) );
1940  CHECK( fp_equal( y[11], asinhf(144.f) ) );
1941  CHECK( fp_equal( y[12], asinhf(155.f) ) );
1942  CHECK( fp_equal( y[13], asinhf(166.f) ) );
1943  CHECK( fp_equal( y[14], asinhf(177.f) ) );
1944  CHECK( fp_equal( y[15], asinhf(188.f) ) );
1945  }
1946 
1947  TEST(TestVasinhfFastx16)
1948  {
1949  sys_float y[16];
1950  vfast_asinh(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f,111.f,122.f,133.f,144.f,155.f,166.f,177.f,188.f);
1951  CHECK( fp_equal( y[0], asinhf(11.f) ) );
1952  CHECK( fp_equal( y[1], asinhf(22.f) ) );
1953  CHECK( fp_equal( y[2], asinhf(33.f) ) );
1954  CHECK( fp_equal( y[3], asinhf(44.f) ) );
1955  CHECK( fp_equal( y[4], asinhf(55.f) ) );
1956  CHECK( fp_equal( y[5], asinhf(66.f) ) );
1957  CHECK( fp_equal( y[6], asinhf(77.f) ) );
1958  CHECK( fp_equal( y[7], asinhf(88.f) ) );
1959  CHECK( fp_equal( y[8], asinhf(111.f) ) );
1960  CHECK( fp_equal( y[9], asinhf(122.f) ) );
1961  CHECK( fp_equal( y[10], asinhf(133.f) ) );
1962  CHECK( fp_equal( y[11], asinhf(144.f) ) );
1963  CHECK( fp_equal( y[12], asinhf(155.f) ) );
1964  CHECK( fp_equal( y[13], asinhf(166.f) ) );
1965  CHECK( fp_equal( y[14], asinhf(177.f) ) );
1966  CHECK( fp_equal( y[15], asinhf(188.f) ) );
1967  }
1968 }
double exp10(double x)
Definition: cddefines.h:1368
void avx_free(void *p_ptr_alloc)
Definition: vectorize.h:124
T * get_ptr(T *v)
Definition: cddefines.h:1109
double genrand_real1()
void vlog10(const double x[], double y[], long nlo, long nhi)
void vfast_asinh(const double x[], double y[], long nlo, long nhi)
void invalidate_array(T *p, size_t size)
Definition: cddefines.h:1091
ALIGNED(CD_ALIGN) static const double qg32_w[numPoints]
void vlog1p(const double x[], double y[], long nlo, long nhi)
void vhypot(const double x1[], const double x2[], double y[], long nlo, long nhi)
unsigned long genrand_int32()
void vlog(const double x[], double y[], long nlo, long nhi)
double reduce_a(const double *a, long ilo, long ihi)
void * avx_alloc(size_t sz)
Definition: vectorize.h:105
void vexp(const double x[], double y[], long nlo, long nhi)
bool fp_equal(sys_float x, sys_float y, int n=3)
Definition: cddefines.h:854
float sys_float
Definition: cddefines.h:127
static double a1[83]
void vexp10(const double x[], double y[], long nlo, long nhi)
double reduce_abc_ab(const double *a, const double *b, const double *c, long ilo, long ihi, double *sum_ab)
void vasinh(const double x[], double y[], long nlo, long nhi)
double reduce_ab(const double *a, const double *b, long ilo, long ihi)
double reduce_ab_a(const double *a, const double *b, long ilo, long ihi, double *sum_a)
void vsqrt(const double x[], double y[], long nlo, long nhi)
T pow2(T a)
Definition: cddefines.h:981
#define isnan
Definition: cddefines.h:659
double reduce_abc(const double *a, const double *b, const double *c, long ilo, long ihi)
void init_genrand(unsigned long s)
#define CD_ALIGN
Definition: cpu.h:156
void vexpm1(const double x[], double y[], long nlo, long nhi)
static double a2[63]