Cloudy
Spectral Synthesis Code for Astrophysics
Loading...
Searching...
No Matches
vectorize_hyper_core.h
Go to the documentation of this file.
1/* This file is part of Cloudy and is copyright (C)1978-2025 by Gary J. Ferland and
2 * others. For conditions of distribution and use see copyright notice in license.txt */
3
4#ifndef VECTORIZE_HYPER_CORE_H
5#define VECTORIZE_HYPER_CORE_H
6
7#include "vectorize_math.h"
10
11//
12// Written by Peter A.M. van Hoof, Royal Observatory of Belgium, Brussels
13//
14// this file contains vectorized versions of the single and double variants of the asinh()
15// function. They are vectorized using AVX instructions, but also make use of AVX2, FMA,
16// and AVX512 instructions when available. The basic algorithms for calculating the asinh()
17// functions were somewhat simplified from the openlibm library versions available at
18// http://openlibm.org/ which is subject to the following copyright:
19//
20// ====================================================
21// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
22//
23// Developed at SunSoft, a Sun Microsystems, Inc. business.
24// Permission to use, copy, modify, and distribute this
25// software is freely granted, provided that this notice
26// is preserved.
27// ====================================================
28//
29
30#ifdef __AVX__
31
32VECLL_CONST(asinh_mask1,0x7fffffffffffffff);
33VECLL_CONST(asinh_mask2,0x8000000000000000);
34
35VECDI_CONST(asinh_2p28,0x41b0000000000000); // 2^28
36
37#ifdef __AVX512F__
38
39inline v8df v1asinhd_core(v8df x)
40{
41 v8df x2 = _mm512_mul_pd(x, x);
42 v8df arg = _mm512_add_pd(x2, one);
43 arg = v1sqrtd_core(arg);
44 arg = _mm512_add_pd(arg, one);
45 arg = _mm512_div_pd(x2, arg);
46 arg = _mm512_add_pd(arg, x);
47 return v1log1pd_core(arg);
48}
49
50#else
51
52inline v4df v1asinhd_core(v4df x)
53{
54 v4df x2 = _mm256_mul_pd(x, x);
55 v4df arg = _mm256_add_pd(x2, one);
56 arg = v1sqrtd_core(arg);
57 arg = _mm256_add_pd(arg, one);
58 arg = _mm256_div_pd(x2, arg);
59 arg = _mm256_add_pd(arg, x);
60 return v1log1pd_core(arg);
61}
62
63#endif // __AVX512F__
64
65VECII_CONST(asinh_mask1f,0x7fffffff);
66VECII_CONST(asinh_mask2f,0x80000000);
67
68VECFI_CONST(asinhf_2p28,0x4d800000); // 2^28
69
70#ifdef __AVX512F__
71
72inline v16sf v1asinhf_core(v16sf x)
73{
74 v16sf x2 = _mm512_mul_ps(x, x);
75 v16sf arg = _mm512_add_ps(x2, onef);
76 arg = v1sqrtf_core(arg);
77 arg = _mm512_add_ps(arg, onef);
78 arg = _mm512_div_ps(x2, arg);
79 arg = _mm512_add_ps(arg, x);
80 return v1log1pf_core(arg);
81}
82
83#else
84
85inline v8sf v1asinhf_core(v8sf x)
86{
87 v8sf x2 = _mm256_mul_ps(x, x);
88 v8sf arg = _mm256_add_ps(x2, onef);
89 arg = v1sqrtf_core(arg);
90 arg = _mm256_add_ps(arg, onef);
91 arg = _mm256_div_ps(x2, arg);
92 arg = _mm256_add_ps(arg, x);
93 return v1log1pf_core(arg);
94}
95
96#endif // __AVX512F__
97
98#endif // __AVX__
99
100#endif
static double x2[63]
Definition atmdat_3body.cpp:20