Cloudy
Spectral Synthesis Code for Astrophysics
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
vectorize.h
Go to the documentation of this file.
1 /* This file is part of Cloudy and is copyright (C)1978-2023 by Gary J. Ferland and
2  * others. For conditions of distribution and use see copyright notice in license.txt */
3 
4 #ifndef VECTORIZE_H
5 #define VECTORIZE_H
6 
7 #include "vectorize_reduce.h"
8 #include "vectorize_exp.h"
9 #include "vectorize_log.h"
10 #include "vectorize_sqrt.h"
11 #include "vectorize_hyper.h"
12 
13 //
14 // The t_avx_pool class maintains a pool of scratch arrays that can be used by the vectorization
15 // routines. The memory is correctly aligned on an AVX boundary.
16 //
17 // To use the pool, declare a pointer variable as follows
18 //
19 // #include "vectorize.h"
20 // avx_ptr<realnum> a(n), b(nlo, nhi), c(nlo, nhi);
21 //
22 // You can index the resulting pointers as you would a normal array
23 //
24 // realnum c = a[0]*b[2];
25 //
26 // Indexing the arrays is valid for the following indices:
27 //
28 // a[i] : for 0 <= i < n
29 // b[i] : for nlo <= i < nhi
30 //
31 // In the case of the array b[], the element b[nlo] will be correctly aligned on an AVX boundary. To
32 // pass the scratch arrays to the vectorization routine, use the following:
33 //
34 // vexp( b.ptr0(), c.ptr0(), nlo, nhi );
35 //
36 // When the avx_ptr variable goes out of scope, the scratch array will be released automatically.
37 // An avx_ptr supports bounds checking when the BOUNDS_CHECK macro is defined at compile time
38 // (similar to multi_arr and flex_arr).
39 //
40 // The t_avx_pool class maintains a minimum pool of p_min_size arrays. The size of the pool can grow
41 // when needed. When a scratch array is released with an index number >= p_min_size, the memory will
42 // be freed immediately to conserve memory.
43 //
44 // The default scratch array can hold p_def_size doubles. In the default setup that should be larger
45 // than rfield.nflux_with_check for efficiency since an array of that size is plausible to be
46 // requested. If the frequency mesh is larger (e.g. due to the SET CONTINUUM RESOLUTION command) the
47 // following optimization will be used. The first time an array larger than p_def_size doubles is
48 // requested, it will have to allocated. When that array is subsequently released, it is swapped
49 // with an unused smaller array with index < p_min_size, and then the smaller array is freed. This
50 // gives near optimal performance even for large frequency grids, at the expense of a slight
51 // overhead when releasing the scratch array.
52 //
53 
55 {
56  vector<void*> p_ptr;
57  vector<size_t> p_size;
58  vector<bool> p_used;
59 
60  // for efficiency this number should be larger than rfield.nflux_with_check
61  // in the default setup -- adjust this constant if that is not the case.
62  static const size_t p_def_size = 8500*sizeof(double);
63  static const size_t p_min_size = 30;
64 
65  void p_alloc(size_t sz, bool lgUsed)
66  {
67  void *p_ptr_alloc;
68  if( posix_memalign(&p_ptr_alloc, CD_ALIGN, sz) != 0 )
69  throw bad_alloc();
70  p_ptr.push_back(p_ptr_alloc);
71  p_size.push_back(sz);
72  p_used.push_back(lgUsed);
73  }
74 
75 public:
77  {
78  for( size_t i=0; i < p_min_size; ++i )
79  p_alloc(p_def_size,false);
80  }
82  {
83  for( size_t i=0; i < p_ptr.size(); ++i )
85  }
86  void* avx_alloc(size_t sz)
87  {
88  void* p_ptr_alloc = NULL;
89  for( size_t i=0; i < p_ptr.size(); i++ )
90  {
91  if( !p_used[i] && sz <= p_size[i] )
92  {
93  p_ptr_alloc = p_ptr[i];
94  p_used[i] = true;
95  break;
96  }
97  }
98  if( p_ptr_alloc == NULL )
99  {
100  p_alloc(sz,true);
101  p_ptr_alloc = p_ptr.back();
102  }
103  return p_ptr_alloc;
104  }
105  void avx_free(void* p_ptr_alloc)
106  {
107  size_t i;
108  for( i=0; i < p_ptr.size(); i++ )
109  {
110  if( p_ptr[i] == p_ptr_alloc )
111  {
112  if( i >= p_min_size )
113  {
114  //
115  size_t j = p_min_size;
116  for( j=0; j < p_min_size; ++j )
117  {
118  if( !p_used[j] && p_size[j] < p_size[i] )
119  break;
120  }
121  if( j < p_min_size )
122  {
124  p_ptr[j] = p_ptr[i];
125  p_size[j] = p_size[i];
126  }
127  else
128  {
130  }
131  p_ptr[i] = NULL;
132  p_size[i] = 0;
133  }
134  p_used[i] = false;
135  break;
136  }
137  }
138  // clean up unused entries at the back
139  while( p_ptr.back() == NULL )
140  {
141  p_ptr.pop_back();
142  p_size.pop_back();
143  p_used.pop_back();
144  }
145  }
146 };
147 
148 extern t_avx_pool avx_pool;
149 
150 template<class T, bool lgBC=lgBOUNDSCHECKVAL>
151 class avx_ptr
152 {
153  long p_begin;
154  long p_end;
156  T* p_ptr;
157 
158  // make default constructor private since it shouldn't be used
160  {
161  p_ptr = NULL;
162  p_ptr_alloc = NULL;
163  p_begin = 0;
164  p_end = 0;
165  }
166  void p_alloc(long begin, long end)
167  {
168  size_t sz = size_t(max(end-begin,0)*sizeof(T));
169  if( sz == 0 )
170  {
171  p_ptr_alloc = NULL;
172  p_ptr = NULL;
173  p_begin = 0;
174  p_end = 0;
175  }
176  else
177  {
178  p_ptr_alloc = static_cast<T*>(avx_pool.avx_alloc(sz));
179  p_ptr = p_ptr_alloc - begin;
180  p_begin = begin;
181  p_end = end;
182  }
183  }
184 public:
185  typedef T& reference;
186  typedef const T& const_reference;
187 
188  explicit avx_ptr(long size)
189  {
190  p_alloc(0, size);
191  }
192  avx_ptr(long begin, long end)
193  {
194  p_alloc(begin, end);
195  }
197  {
198  if( p_ptr_alloc != NULL )
199  avx_pool.avx_free(p_ptr_alloc);
200  }
202  {
203  if( lgBC && ( i < p_begin || i >= p_end ) )
204  OUT_OF_RANGE( "avx_ptr::operator[]" );
205  return *(p_ptr+i);
206  }
208  {
209  if( lgBC && ( i < p_begin || i >= p_end ) )
210  OUT_OF_RANGE( "avx_ptr::operator[]" );
211  return *(p_ptr+i);
212  }
213  T* data()
214  {
215  return p_ptr_alloc;
216  }
217  const T* data() const
218  {
219  return p_ptr_alloc;
220  }
221  T* ptr0()
222  {
223  return p_ptr;
224  }
225  const T* ptr0() const
226  {
227  return p_ptr;
228  }
229 };
230 
231 //
232 // The allocator_avx class can be used to get a vector with internal data aligned on an AVX boundary
233 //
234 // Use the class as follows:
235 //
236 // #include "vectorize.h"
237 // vector< realnum, allocator_avx<realnum> > arr;
238 //
239 // Now that C++11 is in effect, you can also use the following shorthand:
240 //
241 // vector_avx<realnum> arr;
242 //
243 
244 template<class T>
246 
247 template<>
248 class allocator_avx<void>
249 {
250 public:
251  typedef size_t size_type;
252  typedef ptrdiff_t difference_type;
253  typedef void* pointer;
254  typedef const void* const_pointer;
255  typedef void value_type;
256 
257  template<class U>
258  struct rebind { typedef allocator_avx<U> other; };
259 
261 };
262 
263 template<class T>
264 class allocator_avx : public allocator<T>
265 {
266 public:
267  typedef size_t size_type;
268  typedef ptrdiff_t difference_type;
269  typedef T* pointer;
270  typedef const T* const_pointer;
271  typedef T& reference;
272  typedef const T& const_reference;
273  typedef T value_type;
274 
275  template<class U>
276  struct rebind
277  {
279  };
280 
282 
283  allocator_avx() throw() {}
284 
285  allocator_avx(const allocator_avx& a) throw() : allocator<T>(a) {}
286 
287  template<class U>
288  allocator_avx(const allocator_avx<U>&) throw() {}
289 
290  ~allocator_avx() throw() {}
291 
293  {
294  void* p;
295  if( posix_memalign(&p, CD_ALIGN, n*sizeof(T)) != 0 )
296  throw bad_alloc();
297  return pointer(p);
298  }
299 
300  void deallocate(pointer p, size_type) throw()
301  {
303  }
304 };
305 
306 template<class T, class U>
307 inline bool operator== (const allocator_avx<T>&, const allocator_avx<U>&)
308 {
309  return true;
310 }
311 
312 template<class T>
313 inline bool operator== (const allocator_avx<T>&, const allocator_avx<T>&)
314 {
315  return true;
316 }
317 
318 template<class T, class U>
319 inline bool operator!= (const allocator_avx<T>&, const allocator_avx<U>&)
320 {
321  return false;
322 }
323 
324 template<class T>
325 inline bool operator!= (const allocator_avx<T>&, const allocator_avx<T>&)
326 {
327  return false;
328 }
329 
330 template<typename T>
331 using vector_avx = typename std::vector<T,allocator_avx<T>>;
332 
333 #endif
T * p_ptr
Definition: vectorize.h:156
t_avx_pool()
Definition: vectorize.h:76
T & reference
Definition: vectorize.h:271
static const size_t p_min_size
Definition: vectorize.h:63
Definition: vectorize.h:245
Definition: vectorize.h:151
const T * const_pointer
Definition: vectorize.h:270
T * ptr0()
Definition: vectorize.h:221
~allocator_avx()
Definition: vectorize.h:290
void avx_free(void *p_ptr_alloc)
Definition: vectorize.h:105
ptrdiff_t difference_type
Definition: vectorize.h:268
allocator_avx< U > other
Definition: vectorize.h:258
allocator_avx(const allocator_avx< U > &)
Definition: vectorize.h:288
long p_end
Definition: vectorize.h:154
vector< size_t > p_size
Definition: vectorize.h:57
void * pointer
Definition: vectorize.h:253
avx_ptr(long size)
Definition: vectorize.h:188
void p_alloc(size_t sz, bool lgUsed)
Definition: vectorize.h:65
ptrdiff_t difference_type
Definition: vectorize.h:252
const T * ptr0() const
Definition: vectorize.h:225
NORETURN void OUT_OF_RANGE(const char *str)
Definition: cddefines.h:646
void * avx_alloc(size_t sz)
Definition: vectorize.h:86
allocator_avx< U > other
Definition: vectorize.h:278
true_type propagate_on_container_move_assignment
Definition: vectorize.h:281
void deallocate(pointer p, size_type)
Definition: vectorize.h:300
true_type propagate_on_container_move_assignment
Definition: vectorize.h:260
typename std::vector< T, allocator_avx< T >> vector_avx
Definition: vectorize.h:331
bool operator!=(const allocator_avx< T > &, const allocator_avx< U > &)
Definition: vectorize.h:319
bool operator==(const allocator_avx< T > &, const allocator_avx< U > &)
Definition: vectorize.h:307
avx_ptr()
Definition: vectorize.h:159
reference operator[](long i)
Definition: vectorize.h:201
T * data()
Definition: vectorize.h:213
long max(int a, long b)
Definition: cddefines.h:821
allocator_avx()
Definition: vectorize.h:283
#define NULL
Definition: cddefines.h:115
~avx_ptr()
Definition: vectorize.h:196
void p_alloc(long begin, long end)
Definition: vectorize.h:166
size_t size_type
Definition: vectorize.h:251
void posix_memalign_free(void *p)
Definition: cpu.h:143
const T & const_reference
Definition: vectorize.h:186
~t_avx_pool()
Definition: vectorize.h:81
t_avx_pool avx_pool
Definition: vectorize.cpp:7
const void * const_pointer
Definition: vectorize.h:254
Definition: vectorize.h:276
avx_ptr(long begin, long end)
Definition: vectorize.h:192
allocator_avx(const allocator_avx &a)
Definition: vectorize.h:285
T * pointer
Definition: vectorize.h:269
const T & const_reference
Definition: vectorize.h:272
pointer allocate(size_type n, typename allocator_avx< void >::const_pointer=NULL)
Definition: vectorize.h:292
T & reference
Definition: vectorize.h:185
const T * data() const
Definition: vectorize.h:217
Definition: vectorize.h:54
#define CD_ALIGN
Definition: cpu.h:127
vector< void * > p_ptr
Definition: vectorize.h:56
size_t size_type
Definition: vectorize.h:267
long p_begin
Definition: vectorize.h:153
vector< bool > p_used
Definition: vectorize.h:58
static const size_t p_def_size
Definition: vectorize.h:62
T value_type
Definition: vectorize.h:273
T * p_ptr_alloc
Definition: vectorize.h:155
void value_type
Definition: vectorize.h:255