Cloudy
Spectral Synthesis Code for Astrophysics
Loading...
Searching...
No Matches
vectorize.h
Go to the documentation of this file.
1/* This file is part of Cloudy and is copyright (C)1978-2025 by Gary J. Ferland and
2 * others. For conditions of distribution and use see copyright notice in license.txt */
3
4#ifndef VECTORIZE_H
5#define VECTORIZE_H
6
7#include "vectorize_reduce.h"
8#include "vectorize_exp.h"
9#include "vectorize_log.h"
10#include "vectorize_sqrt.h"
11#include "vectorize_hyper.h"
12
13//
14// The t_avx_pool class maintains a pool of scratch arrays that can be used by the vectorization
15// routines. The memory is correctly aligned on an AVX boundary.
16//
17// To use the pool, declare a pointer variable as follows
18//
19// #include "vectorize.h"
20// avx_ptr<realnum> a(n), b(nlo, nhi), c(nlo, nhi);
21//
22// You can index the resulting pointers as you would a normal array
23//
24// realnum c = a[0]*b[2];
25//
26// Indexing the arrays is valid for the following indices:
27//
28// a[i] : for 0 <= i < n
29// b[i] : for nlo <= i < nhi
30//
31// In the case of the array b[], the element b[nlo] will be correctly aligned on an AVX boundary. To
32// pass the scratch arrays to the vectorization routine, use the following:
33//
34// vexp( b.ptr0(), c.ptr0(), nlo, nhi );
35//
36// When the avx_ptr variable goes out of scope, the scratch array will be released automatically.
37// An avx_ptr supports bounds checking when the BOUNDS_CHECK macro is defined at compile time
38// (similar to multi_arr and flex_arr).
39//
40// The t_avx_pool class maintains a minimum pool of p_min_size arrays. The size of the pool can grow
41// when needed. When a scratch array is released with an index number >= p_min_size, the memory will
42// be freed immediately to conserve memory.
43//
44// The default scratch array can hold p_def_size doubles. In the default setup that should be larger
45// than rfield.nflux_with_check for efficiency since an array of that size is plausible to be
46// requested. If the frequency mesh is larger (e.g. due to the SET CONTINUUM RESOLUTION command) the
47// following optimization will be used. The first time an array larger than p_def_size doubles is
48// requested, it will have to allocated. When that array is subsequently released, it is swapped
49// with an unused smaller array with index < p_min_size, and then the smaller array is freed. This
50// gives near optimal performance even for large frequency grids, at the expense of a slight
51// overhead when releasing the scratch array.
52//
53
55{
56 vector<void*> p_ptr;
57 vector<size_t> p_size;
58 vector<bool> p_used;
59
60 // for efficiency this number should be larger than rfield.nflux_with_check
61 // in the default setup -- adjust this constant if that is not the case.
62 static const size_t p_def_size = 8500*sizeof(double);
63 static const size_t p_min_size = 30;
64
65 void p_alloc(size_t sz, bool lgUsed)
66 {
67 void *p_ptr_alloc;
68 if( posix_memalign(&p_ptr_alloc, CD_ALIGN, sz) != 0 )
69 throw bad_alloc();
70 p_ptr.push_back(p_ptr_alloc);
71 p_size.push_back(sz);
72 p_used.push_back(lgUsed);
73 }
74
75public:
77 {
78 for( size_t i=0; i < p_min_size; ++i )
79 p_alloc(p_def_size,false);
80 }
82 {
83 for( size_t i=0; i < p_ptr.size(); ++i )
85 }
86 void* avx_alloc(size_t sz)
87 {
88 void* p_ptr_alloc = NULL;
89 for( size_t i=0; i < p_ptr.size(); i++ )
90 {
91 if( !p_used[i] && sz <= p_size[i] )
92 {
93 p_ptr_alloc = p_ptr[i];
94 p_used[i] = true;
95 break;
96 }
97 }
98 if( p_ptr_alloc == NULL )
99 {
100 p_alloc(sz,true);
101 p_ptr_alloc = p_ptr.back();
102 }
103 return p_ptr_alloc;
104 }
105 void avx_free(void* p_ptr_alloc)
106 {
107 size_t i;
108 for( i=0; i < p_ptr.size(); i++ )
109 {
110 if( p_ptr[i] == p_ptr_alloc )
111 {
112 if( i >= p_min_size )
113 {
114 //
115 size_t j = p_min_size;
116 for( j=0; j < p_min_size; ++j )
117 {
118 if( !p_used[j] && p_size[j] < p_size[i] )
119 break;
120 }
121 if( j < p_min_size )
122 {
124 p_ptr[j] = p_ptr[i];
125 p_size[j] = p_size[i];
126 }
127 else
128 {
130 }
131 p_ptr[i] = NULL;
132 p_size[i] = 0;
133 }
134 p_used[i] = false;
135 break;
136 }
137 }
138 // clean up unused entries at the back
139 while( p_ptr.back() == NULL )
140 {
141 p_ptr.pop_back();
142 p_size.pop_back();
143 p_used.pop_back();
144 }
145 }
146};
147
148extern t_avx_pool avx_pool;
149
150template<class T, bool lgBC=lgBOUNDSCHECKVAL>
152{
154 long p_end;
157
158 // make default constructor private since it shouldn't be used
160 {
161 p_ptr = NULL;
163 p_begin = 0;
164 p_end = 0;
165 }
166 void p_alloc(long begin, long end)
167 {
168 size_t sz = size_t(max(end-begin,0)*sizeof(T));
169 if( sz == 0 )
170 {
172 p_ptr = NULL;
173 p_begin = 0;
174 p_end = 0;
175 }
176 else
177 {
178 p_ptr_alloc = static_cast<T*>(avx_pool.avx_alloc(sz));
179 p_ptr = p_ptr_alloc - begin;
180 p_begin = begin;
181 p_end = end;
182 }
183 }
184public:
185 typedef T& reference;
186 typedef const T& const_reference;
187
188 explicit avx_ptr(long size)
189 {
190 p_alloc(0, size);
191 }
192 avx_ptr(long begin, long end)
193 {
194 p_alloc(begin, end);
195 }
197 {
198 if( p_ptr_alloc != NULL )
199 avx_pool.avx_free(p_ptr_alloc);
200 }
202 {
203 if( lgBC && ( i < p_begin || i >= p_end ) )
204 OUT_OF_RANGE( "avx_ptr::operator[]" );
205 return *(p_ptr+i);
206 }
208 {
209 if( lgBC && ( i < p_begin || i >= p_end ) )
210 OUT_OF_RANGE( "avx_ptr::operator[]" );
211 return *(p_ptr+i);
212 }
213 T* data()
214 {
215 return p_ptr_alloc;
216 }
217 const T* data() const
218 {
219 return p_ptr_alloc;
220 }
221 T* ptr0()
222 {
223 return p_ptr;
224 }
225 const T* ptr0() const
226 {
227 return p_ptr;
228 }
229};
230
231//
232// The allocator_avx class can be used to get a vector with internal data aligned on an AVX boundary
233//
234// Use the class as follows:
235//
236// #include "vectorize.h"
237// vector< realnum, allocator_avx<realnum> > arr;
238//
239// Now that C++11 is in effect, you can also use the following shorthand:
240//
241// vector_avx<realnum> arr;
242//
243
244template<class T>
245class allocator_avx;
246
247template<>
248class allocator_avx<void>
249{
250public:
251 typedef size_t size_type;
252 typedef ptrdiff_t difference_type;
253 typedef void* pointer;
254 typedef const void* const_pointer;
255 typedef void value_type;
256
257 template<class U>
258 struct rebind { typedef allocator_avx<U> other; };
259
261};
262
263template<class T>
264class allocator_avx : public allocator<T>
265{
266public:
267 typedef size_t size_type;
268 typedef ptrdiff_t difference_type;
269 typedef T* pointer;
270 typedef const T* const_pointer;
271 typedef T& reference;
272 typedef const T& const_reference;
273 typedef T value_type;
274
275 template<class U>
276 struct rebind
277 {
279 };
280
282
283 allocator_avx() throw() {}
284
285 allocator_avx(const allocator_avx& a) throw() : allocator<T>(a) {}
286
287 template<class U>
289
290 ~allocator_avx() throw() {}
291
293 {
294 void* p;
295 if( posix_memalign(&p, CD_ALIGN, n*sizeof(T)) != 0 )
296 throw bad_alloc();
297 return pointer(p);
298 }
299
300 void deallocate(pointer p, size_type) throw()
301 {
303 }
304};
305
306template<class T, class U>
308{
309 return true;
310}
311
312template<class T>
314{
315 return true;
316}
317
318template<class T, class U>
320{
321 return false;
322}
323
324template<class T>
326{
327 return false;
328}
329
330template<typename T>
331using vector_avx = typename std::vector<T,allocator_avx<T>>;
332
333#endif
#define NULL
Definition cddefines.h:115
NORETURN void OUT_OF_RANGE(const char *str)
Definition cddefines.h:646
long max(int a, long b)
Definition cddefines.h:821
void * pointer
Definition vectorize.h:253
ptrdiff_t difference_type
Definition vectorize.h:252
size_t size_type
Definition vectorize.h:251
true_type propagate_on_container_move_assignment
Definition vectorize.h:260
const void * const_pointer
Definition vectorize.h:254
void value_type
Definition vectorize.h:255
Definition vectorize.h:265
U & reference
Definition vectorize.h:271
U value_type
Definition vectorize.h:273
U * pointer
Definition vectorize.h:269
const U & const_reference
Definition vectorize.h:272
size_t size_type
Definition vectorize.h:267
ptrdiff_t difference_type
Definition vectorize.h:268
pointer allocate(size_type n, typename allocator_avx< void >::const_pointer=NULL)
Definition vectorize.h:292
~allocator_avx()
Definition vectorize.h:290
void deallocate(pointer p, size_type)
Definition vectorize.h:300
allocator_avx()
Definition vectorize.h:283
const U * const_pointer
Definition vectorize.h:270
true_type propagate_on_container_move_assignment
Definition vectorize.h:281
allocator_avx(const allocator_avx< U > &)
Definition vectorize.h:288
allocator_avx(const allocator_avx &a)
Definition vectorize.h:285
T * p_ptr
Definition vectorize.h:156
avx_ptr(long size)
Definition vectorize.h:188
void p_alloc(long begin, long end)
Definition vectorize.h:166
const T * data() const
Definition vectorize.h:217
T * p_ptr_alloc
Definition vectorize.h:155
reference operator[](long i)
Definition vectorize.h:201
long p_end
Definition vectorize.h:154
const T & const_reference
Definition vectorize.h:186
avx_ptr()
Definition vectorize.h:159
long p_begin
Definition vectorize.h:153
T & reference
Definition vectorize.h:185
T * data()
Definition vectorize.h:213
avx_ptr(long begin, long end)
Definition vectorize.h:192
~avx_ptr()
Definition vectorize.h:196
const T * ptr0() const
Definition vectorize.h:225
T * ptr0()
Definition vectorize.h:221
Definition vectorize.h:55
static const size_t p_def_size
Definition vectorize.h:62
static const size_t p_min_size
Definition vectorize.h:63
vector< void * > p_ptr
Definition vectorize.h:56
void p_alloc(size_t sz, bool lgUsed)
Definition vectorize.h:65
void avx_free(void *p_ptr_alloc)
Definition vectorize.h:105
t_avx_pool()
Definition vectorize.h:76
void * avx_alloc(size_t sz)
Definition vectorize.h:86
~t_avx_pool()
Definition vectorize.h:81
vector< size_t > p_size
Definition vectorize.h:57
vector< bool > p_used
Definition vectorize.h:58
void posix_memalign_free(void *p)
Definition cpu.h:143
#define CD_ALIGN
Definition cpu.h:127
Definition vectorize.h:277
allocator_avx< U > other
Definition vectorize.h:278
Definition vectorize.h:258
allocator_avx< U > other
Definition vectorize.h:258
t_avx_pool avx_pool
Definition vectorize.cpp:7
bool operator!=(const allocator_avx< T > &, const allocator_avx< U > &)
Definition vectorize.h:319
t_avx_pool avx_pool
Definition vectorize.cpp:7
bool operator==(const allocator_avx< T > &, const allocator_avx< U > &)
Definition vectorize.h:307
typename std::vector< T, allocator_avx< T > > vector_avx
Definition vectorize.h:331