Cloudy
Spectral Synthesis Code for Astrophysics
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ran.h
Go to the documentation of this file.
1 /* This file is part of Cloudy and is copyright (C)1978-2023 by Gary J. Ferland and
2  * others. For conditions of distribution and use see copyright notice in license.txt */
3 
4 #ifndef RAN_H
5 #define RAN_H
6 
7 class t_ran;
8 
9 // see: https://isocpp.org/wiki/faq/pointers-to-members
10 typedef void (t_ran::*t_ran_fun)(void* p, size_t s);
11 
12 //
13 // ran_pool holds a pool of PRNG variates, thus allowing efficient initialization using vectorization
14 //
15 // init() sets the size of the pool and the method for filling the pool
16 // reset() invalidates the remaining variates in the pool, thus forcing a reinitialization
17 // the next time next() is called; useful when the random seed is changed
18 // lgInitialized() true if init() was called
19 // next() returns the next variate; this routine is CPU time critical
20 // p_alloc() allocates the pool, making sure it is correctly aligned for SIMD access
21 // p_update_pool() (re)fill the pool with new random variates
22 // ~ran_pool() frees the pool
23 //
24 template<class T>
25 class ran_pool
26 {
27  void* p_pool; // start of the pool, correctly aligned for SIMD
28  T* p_next; // pointer to next data item that can be returned
29  T* p_end; // points just beyond the end of the pool
30  size_t p_size; // number of elements in the pool
31  size_t p_squad; // size of the pool in quadwords
32  t_ran* p_rc; // pointer to the instantiation of the random number class
33  t_ran_fun p_fill; // fills pool, member function of t_ran class
34 
35  void p_alloc()
36  {
37  // make sure the memory is correctly aligned for SIMD instructions
38  if( posix_memalign(&p_pool, CD_ALIGN, p_size*sizeof(T)) != 0 )
39  throw bad_alloc();
40  p_next = p_end = (T*)p_pool + p_size;
41  }
43  {
44  ASSERT( lgInitialized() );
45  if( p_pool == NULL )
46  p_alloc();
47  (p_rc->*p_fill)(p_pool, p_squad);
48  p_next = (T*)p_pool;
49  }
50 public:
51  void init(size_t s, t_ran* rc, t_ran_fun f)
52  {
53  ASSERT( !lgInitialized() );
54  ASSERT( s > 0 );
55  p_size = s;
56  p_squad = (s*sizeof(T))/8;
57  // make sure size is multiple of 8 bytes
58  ASSERT( p_squad*8 == s*sizeof(T) );
59  p_rc = rc;
60  p_fill = f;
61  }
62  void reset()
63  {
64  p_next = p_end;
65  }
66  bool lgInitialized() const
67  {
68  return ( p_size > 0 );
69  }
70  T next()
71  {
72  if( UNLIKELY(p_next == p_end) )
73  p_update_pool();
74  return *p_next++;
75  }
76 
78  {
79  p_pool = p_next = p_end = NULL;
80  p_size = p_squad = 0;
81  p_rc = NULL;
82  p_fill = NULL;
83  }
84  ran_pool(const ran_pool&) = delete;
85  ran_pool& operator= (const ran_pool&) = delete;
87  {
89  }
90 };
91 
92 //
93 // Two algorithms are supported here: xoroshiro128+ (v1.0) and xoshiro256** (v1.0)
94 //
95 // The 2016 version of xoroshiro128+ was the first to be added, and has since been upgraded to v1.0.
96 // The xoshiro256** algorithm was added later, and is described as an "all-purpose, rock-solid
97 // generator" with no known deficiencies. This is the default generator in Cloudy, with xoroshiro128+
98 // being kept as a backup. The latter has the advantage that it is somewhat faster, but has some mild
99 // deficiencies. To switch, simply change the instantiation of ran at the top of ran.cpp.
100 //
101 // For more information see: http://xoshiro.di.unimi.it/
102 //
103 
105 
106 //
107 // t_ran is a global class for efficiently generating pseudo-random numbers. It can generate integer
108 // as well as floating point variates (the latter with a uniform or normal distribution)
109 //
110 // The variates are stored in pools to allow efficient computation using SIMD instructions. The
111 // underlying PRNG algorithm to generate the random bits can be freely chosen. Currently supported
112 // are xoshiro256** and xoroshiro128+. Each quadword in a SIMD register should generate a different,
113 // non-overlapping stream of random numbers and therefore needs a different state. This is done
114 // automatically in the seeding process. The algorithm also takes care that each rank in an MPI run
115 // gets a different, non-overlapping stream of random numbers. The algorithm is not thread-safe in
116 // openMP runs.
117 //
118 // ND -- size of the pool of variates in quadwords
119 // p_algo -- which algorithm should be used as the underlying PRNG
120 // p_sq -- size of the state for the underlying PRNG algorithm in quadwords
121 // p_npack -- number of quadwords packed into a single SIMD register
122 // p_ns -- size of the state for a full SIMD register in quadwords (= p_sq*p_npack)
123 // p_s -- the seed that was used to generate the initial state
124 // p_state[p_ns] -- the state of the PRNG (needs to be correctly aligned for SIMD access).
125 //
126 // i7(), u8(), i15(), u16(), i31(), u32(), i63(), u64() -- return signed or unsigned integer variates
127 // dbl(), rnm() -- return uniformly distributed floating point variates on (0,1)
128 // normal() -- return normally distributed floating point variates (double precision only)
129 // these numbers are generated using the Ziggurat algorithm
130 // init() -- does a one time initialization using a randomly generated seed. Subsequent calls will
131 // be ignored. There is also a version with a fixed seed.
132 // new_rank() -- jump ahead in random number sequence for a different rank number. Mainly useful for
133 // forked threads.
134 // get_seed() -- return the seed that was used.
135 // print_seed() -- return the seed that was used as a string for printing.
136 //
137 class t_ran
138 {
139  // pool size in quadwords
140  static const size_t ND = 2048;
141  // size of the state in quadwords for a single xoroshiro128plus PRNG stream
142  static const size_t SQ_XOROSHIRO128 = 2;
143  static const size_t SQ_XOSHIRO256 = 4;
144 
147  size_t p_sq;
148  size_t p_npack;
149  size_t p_ns;
150  uint64 p_s;
151  uint64* p_state;
152 
153  const double* p_zigxd;
154  const double* p_zigrd;
155  const double* p_zige2d;
156 
157  void p_init(uint64 s, int nRANK);
158  void p_seed(uint64 s, int nRANK);
159  uint64 p_generate_random_seed();
160  uint64 p_random_seed();
161  void p_xoroshiro128plus(uint64* pool, size_t size);
162  void p_xoshiro256starstar(uint64* pool, size_t size);
163  double p_ZigTailNormal(bool lgNegative);
164 
165  // fill arrays with random variates
166  // NB NB -- the pool need to be correctly aligned for SIMD access
167  // NB NB -- the size needs to be a multiple of the SIMD vector size
168  void p_u64(void* pool, size_t size);
169  void p_dbl(void* pool, size_t size);
170  void p_flt(void* pool, size_t size);
171  void p_zig(void* pool, size_t size);
172 
180 public:
181  /* integer random variate on the [0,0x7f] interval */
182  int8 i7() { return int8(p_pc.next() >> 1); }
183  /* integer random variate on the [0,0xff] interval */
184  uint8 u8() { return p_pc.next(); }
185  /* integer random variate on the [0,0x7fff] interval */
186  int16 i15() { return int16(p_ps.next() >> 1); }
187  /* integer random variate on the [0,0xffff] interval */
188  uint16 u16() { return p_ps.next(); }
189  /* integer random variate on the [0,0x7fffffff] interval */
190  int32 i31() { return int32(p_pi.next() >> 1); }
191  /* integer random variate on the [0,0xffffffff] interval */
192  uint32 u32() { return p_pi.next(); }
193  /* integer random variate on the [0,0x7fffffffffffffff] interval */
194  int64 i63() { return int64(p_pl.next() >> 1); }
195  /* integer random variate on the [1,0xffffffffffffffff] interval */
196  uint64 u64() { return p_pl.next(); }
197  /* double precision uniform random variate on the (0,1) interval */
198  double dbl() { return p_pd.next(); }
199  /* single precision uniform random variate on the (0,1) interval */
201  {
202 #ifdef FLT_IS_DBL
203  return p_pd.next();
204 #else
205  return p_pf.next();
206 #endif
207  }
208  /* generates a random number with normal distribution and unit standard deviation using Ziggurat */
209  double normal()
210  {
211  while( true )
212  {
213  double u = p_zd.next();
214  uint8 i = u8();
215  if( LIKELY(fabs(u) < p_zigrd[i]) )
216  return u*p_zigxd[i];
217  if( UNLIKELY(i == 0) )
218  return p_ZigTailNormal( u < 0. );
219  double x = u*p_zigxd[i];
220  double e2 = exp( -0.5*pow2(x) );
221  if( p_zige2d[i+1] + dbl()*(p_zige2d[i]-p_zige2d[i+1]) < e2 )
222  return x;
223  }
224  }
225 
226  void init()
227  {
228  uint64 s = p_generate_random_seed();
229  p_init(s, 0);
230  }
231  void init(uint64 s, int nRANK)
232  {
233  p_init(s, nRANK);
234  }
235  void new_rank(int nRANK)
236  {
238  p_seed(p_s, nRANK);
239  }
240  uint64 get_seed() const { return p_s; }
241  string print_seed() const
242  {
243  ostringstream oss;
244  oss << "PRNG seed: 0x" << setw(16) << setfill('0') << hex << p_s;
245  return oss.str();
246  }
247 
248  explicit t_ran(algo_prng algo);
249  t_ran(const t_ran&) = delete;
250  t_ran& operator= (const t_ran&) = delete;
252  {
254  }
255 };
256 
257 extern t_ran ran;
258 
259 #endif
int32 i31()
Definition: ran.h:190
t_ran & operator=(const t_ran &)=delete
const double * p_zigrd
Definition: ran.h:154
void * p_pool
Definition: ran.h:27
int64 i63()
Definition: ran.h:194
size_t p_npack
Definition: ran.h:148
#define LIKELY(x)
Definition: cpu.h:479
ran_pool()
Definition: ran.h:77
ran_pool< sys_float > p_pf
Definition: ran.h:178
algo_prng
Definition: ran.h:104
void p_xoshiro256starstar(uint64 *pool, size_t size)
Definition: ran.cpp:294
Definition: ran.h:104
void p_dbl(void *pool, size_t size)
Definition: ran.cpp:315
void p_init(uint64 s, int nRANK)
Definition: ran.cpp:586
~ran_pool()
Definition: ran.h:86
const double * p_zigxd
Definition: ran.h:153
realnum rnm()
Definition: ran.h:200
void reset()
Definition: ran.h:62
bool p_lgInitialized
Definition: ran.h:145
uint64 * p_state
Definition: ran.h:151
static const size_t ND
Definition: ran.h:140
const double * p_zige2d
Definition: ran.h:155
ran_pool< uint32 > p_pi
Definition: ran.h:175
t_ran ran
t_ran * p_rc
Definition: ran.h:32
uint8 u8()
Definition: ran.h:184
ran_pool & operator=(const ran_pool &)=delete
~t_ran()
Definition: ran.h:251
void init()
Definition: ran.h:226
uint32 u32()
Definition: ran.h:192
void p_xoroshiro128plus(uint64 *pool, size_t size)
Definition: ran.cpp:289
T * p_next
Definition: ran.h:28
string print_seed() const
Definition: ran.h:241
float realnum
Definition: cddefines.h:127
ran_pool< uint8 > p_pc
Definition: ran.h:173
Definition: ran.h:25
uint64 p_random_seed()
uint64 p_generate_random_seed()
Definition: ran.cpp:625
void p_u64(void *pool, size_t size)
Definition: ran.cpp:302
void p_flt(void *pool, size_t size)
Definition: ran.cpp:327
#define NULL
Definition: cddefines.h:115
static const size_t SQ_XOSHIRO256
Definition: ran.h:143
ran_pool< uint64 > p_pl
Definition: ran.h:176
t_ran(algo_prng algo)
Definition: ran.cpp:653
Definition: ran.h:104
algo_prng p_algo
Definition: ran.h:146
uint64 u64()
Definition: ran.h:196
void posix_memalign_free(void *p)
Definition: cpu.h:143
uint16 u16()
Definition: ran.h:188
size_t p_squad
Definition: ran.h:31
double normal()
Definition: ran.h:209
#define ASSERT(exp)
Definition: cddefines.h:637
uint64 p_s
Definition: ran.h:150
ran_pool< double > p_pd
Definition: ran.h:177
T pow2(T a)
Definition: cddefines.h:987
void init(size_t s, t_ran *rc, t_ran_fun f)
Definition: ran.h:51
void p_alloc()
Definition: ran.h:35
double p_ZigTailNormal(bool lgNegative)
Definition: ran.cpp:571
double dbl()
Definition: ran.h:198
#define UNLIKELY(x)
Definition: cpu.h:490
T * p_end
Definition: ran.h:29
size_t p_size
Definition: ran.h:30
void p_seed(uint64 s, int nRANK)
Definition: ran.cpp:609
void init(uint64 s, int nRANK)
Definition: ran.h:231
void p_zig(void *pool, size_t size)
Definition: ran.cpp:339
double e2(double x)
Definition: thirdparty.cpp:3100
size_t p_sq
Definition: ran.h:147
bool lgInitialized() const
Definition: ran.h:66
Definition: ran.h:137
void p_update_pool()
Definition: ran.h:42
int16 i15()
Definition: ran.h:186
#define CD_ALIGN
Definition: cpu.h:127
ran_pool< double > p_zd
Definition: ran.h:179
T next()
Definition: ran.h:70
void new_rank(int nRANK)
Definition: ran.h:235
int8 i7()
Definition: ran.h:182
t_ran_fun p_fill
Definition: ran.h:33
ran_pool< uint16 > p_ps
Definition: ran.h:174
size_t p_ns
Definition: ran.h:149
uint64 get_seed() const
Definition: ran.h:240
static const size_t SQ_XOROSHIRO128
Definition: ran.h:142
void(t_ran::* t_ran_fun)(void *p, size_t s)
Definition: ran.h:10