This is the mail archive of the
gsl-discuss@sources.redhat.com
mailing list for the GSL project.
Re: a few questions about discrete rng
- From: Rodney Sparapani <rsparapa at post dot its dot mcw dot edu>
- To: bjg at network-theory dot co dot uk
- Cc: gsl-discuss at sources dot redhat dot com
- Date: Tue, 9 Jul 2002 10:37:41 -0500 (CDT)
- Subject: Re: a few questions about discrete rng
- Reply-to: Rodney Sparapani <rsparapa at post dot its dot mcw dot edu>
Brian:
Apparently, I didn't understand the standards documentation with respect to
size_t. But, it's too bad about gsl_vector. I think this decision should be
reconsidered. Also, I remember reading that there have been some developments
with respect to vectors in C99. Might this not also be a good reason to
re-think the design? It is alot easier to pass one parameter than 2 since you
can easily get them backwards.
But, back to discrete with size_t and without gsl_vector. I thought that it
would be better to streamline it a bit and make it look more like the other
rng's. I'm attaching an example of that which also contains a prototype for a
multinomial randist. Comments are welcome and please feel free to use this in
GSL.
Rodney Sparapani Medical College of Wisconsin
Sr. Biostatistician Patient Care & Outcomes Research (PCOR)
rsparapa@mcw.edu http://www.mcw.edu/pcor
Was 'Name That Tune' rigged? WWLD -- What Would Lombardi Do
#include <gsl/gsl_vector_uint.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include "discrete.c"
typedef struct { /* struct for Walker algorithm */
size_t K;
size_t *A;
double *F;
gsl_rng *r;
} gsl_rng_discrete;
gsl_rng_discrete *
gsl_rng_discrete_alloc(const gsl_rng_type * T, size_t Kevents, const double *ProbArray)
{
size_t k,b,s;
gsl_rng_discrete *g;
size_t nBigs, nSmalls;
gsl_stack_t *Bigs;
gsl_stack_t *Smalls;
double *E;
double pTotal = 0.0, mean, d;
if (Kevents < 1) {
/* Could probably treat Kevents=1 as a special case */
GSL_ERROR_VAL ("number of events must be a positive integer",
GSL_EINVAL, 0);
}
/* Make sure elements of ProbArray[] are positive.
* Won't enforce that sum is unity; instead will just normalize
*/
for (k=0; k<Kevents; ++k) {
if (ProbArray[k] < 0) {
GSL_ERROR_VAL ("probabilities must be non-negative",
GSL_EINVAL, 0) ;
}
pTotal += ProbArray[k];
}
/* Begin setting up the main "object" (just a struct, no steroids) */
g = (gsl_rng_discrete *)malloc(sizeof(gsl_rng_discrete));
g->r = gsl_rng_alloc (T);
g->K = Kevents;
g->F = (double *)malloc(sizeof(double)*Kevents);
g->A = (size_t *)malloc(sizeof(size_t)*Kevents);
E = (double *)malloc(sizeof(double)*Kevents);
if (E==NULL) {
GSL_ERROR_VAL ("Cannot allocate memory for randevent", ENOMEM, 0);
}
for (k=0; k<Kevents; ++k) {
E[k] = ProbArray[k]/pTotal;
}
/* Now create the Bigs and the Smalls */
mean = 1.0/Kevents;
nSmalls=nBigs=0;
for (k=0; k<Kevents; ++k) {
if (E[k] < mean) ++nSmalls;
else ++nBigs;
}
Bigs = new_stack(nBigs);
Smalls = new_stack(nSmalls);
for (k=0; k<Kevents; ++k) {
if (E[k] < mean) {
push_stack(Smalls,k);
}
else {
push_stack(Bigs,k);
}
}
/* Now work through the smalls */
while (size_stack(Smalls) > 0) {
s = pop_stack(Smalls);
if (size_stack(Bigs) == 0) {
/* Then we are on our last value */
(g->A)[s]=s;
(g->F)[s]=1.0;
break;
}
b = pop_stack(Bigs);
(g->A)[s]=b;
(g->F)[s]=Kevents*E[s];
#if DEBUG
fprintf(stderr,"s=%2d, A=%2d, F=%.4f\n",s,(g->A)[s],(g->F)[s]);
#endif
d = mean - E[s];
E[s] += d; /* now E[s] == mean */
E[b] -= d;
if (E[b] < mean) {
push_stack(Smalls,b); /* no longer big, join ranks of the small */
}
else if (E[b] > mean) {
push_stack(Bigs,b); /* still big, put it back where you found it */
}
else {
/* E[b]==mean implies it is finished too */
(g->A)[b]=b;
(g->F)[b]=1.0;
}
}
while (size_stack(Bigs) > 0) {
b = pop_stack(Bigs);
(g->A)[b]=b;
(g->F)[b]=1.0;
}
/* Stacks have been emptied, and A and F have been filled */
#if 0
/* if 1, then artificially set all F[k]'s to unity. This will
* give wrong answers, but you'll get them faster. But, not
* that much faster (I get maybe 20%); that's an upper bound
* on what the optimal preprocessing would give.
*/
for (k=0; k<Kevents; ++k) {
(g->F)[k] = 1.0;
}
#endif
#if KNUTH_CONVENTION
/* For convenience, set F'[k]=(k+F[k])/K */
/* This saves some arithmetic in gsl_rng_discrete(); I find that
* it doesn't actually make much difference.
*/
for (k=0; k<Kevents; ++k) {
(g->F)[k] += k;
(g->F)[k] /= Kevents;
}
#endif
free_stack(Bigs);
free_stack(Smalls);
free((char *)E);
return g;
}
size_t
gsl_rng_discrete_get(const gsl_rng_discrete *g)
{
size_t c=0;
double u,f;
u = gsl_rng_uniform(g->r);
#if KNUTH_CONVENTION
c = (u*(g->K));
#else
u *= g->K;
c = u;
u -= c;
#endif
f = (g->F)[c];
/* fprintf(stderr,"c,f,u: %d %.4f %f\n",c,f,u); */
if (f == 1.0) return c;
if (u < f) {
return c;
}
else {
return (g->A)[c];
}
}
void gsl_rng_discrete_free(gsl_rng_discrete *g)
{
gsl_rng_free(g->r);
free((char *)(g->r));
free((char *)(g->A));
free((char *)(g->F));
free((char *)g);
}
gsl_vector_uint * gsl_ran_multinomial(const gsl_rng_discrete *d, const size_t n)
{
gsl_vector_uint * X = gsl_vector_uint_calloc(d->K);
unsigned int i, j;
for(i=0; i<n; i++) {
j=gsl_rng_discrete_get(d);
gsl_vector_uint_set(X, j, gsl_vector_uint_get(X, j)+1);
}
return X;
}
int main()
{
double p[5]={0.05, 0.10, 0.15, 0.20, 0.5};
gsl_rng_discrete * d = gsl_rng_discrete_alloc(gsl_rng_gfsr4, 5, p);
gsl_vector_uint *x = gsl_ran_multinomial(d, 10000);
FILE * f = fopen("test.txt", "w");
gsl_vector_uint_fprintf (f, x, "%d");
fclose (f);
gsl_rng_discrete_free(d);
return 0;
}