This is the mail archive of the gsl-discuss@sources.redhat.com mailing list for the GSL project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: a few questions about discrete rng


Brian:

Apparently, I didn't understand the standards documentation with respect to 
size_t.  But, it's too bad about gsl_vector.  I think this decision should be 
reconsidered.  Also, I remember reading that there have been some developments 
with respect to vectors in C99.  Might this not also be a good reason to 
re-think the design?  It is alot easier to pass one parameter than 2 since you 
can easily get them backwards.

But, back to discrete with size_t and without gsl_vector.  I thought that it 
would be better to streamline it a bit and make it look more like the other 
rng's.  I'm attaching an example of that which also contains a prototype for a 
multinomial randist.  Comments are welcome and please feel free to use this in 
GSL.

Rodney Sparapani              Medical College of Wisconsin
Sr. Biostatistician           Patient Care & Outcomes Research (PCOR)
rsparapa@mcw.edu              http://www.mcw.edu/pcor
Was 'Name That Tune' rigged?  WWLD -- What Would Lombardi Do
#include <gsl/gsl_vector_uint.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include "discrete.c"

typedef struct {                /* struct for Walker algorithm */
    size_t K;
    size_t *A;
    double *F;
    gsl_rng *r;
} gsl_rng_discrete;

gsl_rng_discrete *
gsl_rng_discrete_alloc(const gsl_rng_type * T, size_t Kevents, const double *ProbArray)
{
    size_t k,b,s;
    gsl_rng_discrete *g;
    size_t nBigs, nSmalls;
    gsl_stack_t *Bigs;
    gsl_stack_t *Smalls;
    double *E;
    double pTotal = 0.0, mean, d;
    
    if (Kevents < 1) {
      /* Could probably treat Kevents=1 as a special case */

      GSL_ERROR_VAL ("number of events must be a positive integer", 
			GSL_EINVAL, 0);
    }

    /* Make sure elements of ProbArray[] are positive.
     * Won't enforce that sum is unity; instead will just normalize
     */

    for (k=0; k<Kevents; ++k) {
        if (ProbArray[k] < 0) {
	  GSL_ERROR_VAL ("probabilities must be non-negative",
			    GSL_EINVAL, 0) ;
        }
        pTotal += ProbArray[k];
    }

    /* Begin setting up the main "object" (just a struct, no steroids) */
    g = (gsl_rng_discrete *)malloc(sizeof(gsl_rng_discrete));
    g->r = gsl_rng_alloc (T);
    g->K = Kevents;
    g->F = (double *)malloc(sizeof(double)*Kevents);
    g->A = (size_t *)malloc(sizeof(size_t)*Kevents);

    E = (double *)malloc(sizeof(double)*Kevents);

    if (E==NULL) {
      GSL_ERROR_VAL ("Cannot allocate memory for randevent", ENOMEM, 0);
    }

    for (k=0; k<Kevents; ++k) {
        E[k] = ProbArray[k]/pTotal;
    }

    /* Now create the Bigs and the Smalls */
    mean = 1.0/Kevents;
    nSmalls=nBigs=0;
    for (k=0; k<Kevents; ++k) {
        if (E[k] < mean) ++nSmalls;
        else             ++nBigs;
    }
    Bigs   = new_stack(nBigs);
    Smalls = new_stack(nSmalls);
    for (k=0; k<Kevents; ++k) {
        if (E[k] < mean) {
            push_stack(Smalls,k);
        }
        else {
            push_stack(Bigs,k);
        }
    }
    /* Now work through the smalls */
    while (size_stack(Smalls) > 0) {
        s = pop_stack(Smalls);
        if (size_stack(Bigs) == 0) {
            /* Then we are on our last value */
            (g->A)[s]=s;
            (g->F)[s]=1.0;
            break;
        }
        b = pop_stack(Bigs);
        (g->A)[s]=b;
        (g->F)[s]=Kevents*E[s];
#if DEBUG
        fprintf(stderr,"s=%2d, A=%2d, F=%.4f\n",s,(g->A)[s],(g->F)[s]);
#endif        
        d = mean - E[s];
        E[s] += d;              /* now E[s] == mean */
        E[b] -= d;
        if (E[b] < mean) {
            push_stack(Smalls,b); /* no longer big, join ranks of the small */
        }
        else if (E[b] > mean) {
            push_stack(Bigs,b); /* still big, put it back where you found it */
        }
        else {
            /* E[b]==mean implies it is finished too */
            (g->A)[b]=b;
            (g->F)[b]=1.0;
        }
    }
    while (size_stack(Bigs) > 0) {
        b = pop_stack(Bigs);
        (g->A)[b]=b;
        (g->F)[b]=1.0;
    }
    /* Stacks have been emptied, and A and F have been filled */

    
#if 0
    /* if 1, then artificially set all F[k]'s to unity.  This will
     * give wrong answers, but you'll get them faster.  But, not
     * that much faster (I get maybe 20%); that's an upper bound
     * on what the optimal preprocessing would give.
     */
    for (k=0; k<Kevents; ++k) {
        (g->F)[k] = 1.0;
    }
#endif

#if KNUTH_CONVENTION
    /* For convenience, set F'[k]=(k+F[k])/K */
    /* This saves some arithmetic in gsl_rng_discrete(); I find that
     * it doesn't actually make much difference.
     */
    for (k=0; k<Kevents; ++k) {
        (g->F)[k] += k;
        (g->F)[k] /= Kevents;
    }
#endif    

    free_stack(Bigs);
    free_stack(Smalls);
    free((char *)E);

    return g;
}

size_t
gsl_rng_discrete_get(const gsl_rng_discrete *g)
{
    size_t c=0;
    double u,f;
    u = gsl_rng_uniform(g->r);
#if KNUTH_CONVENTION
    c = (u*(g->K));
#else
    u *= g->K;
    c = u;
    u -= c;
#endif
    f = (g->F)[c];
    /* fprintf(stderr,"c,f,u: %d %.4f %f\n",c,f,u); */
    if (f == 1.0) return c;

    if (u < f) {
        return c;
    }
    else {
        return (g->A)[c];
    }
}

void gsl_rng_discrete_free(gsl_rng_discrete *g)
{
    gsl_rng_free(g->r);
    free((char *)(g->r));
    free((char *)(g->A));
    free((char *)(g->F));
    free((char *)g);
}

gsl_vector_uint * gsl_ran_multinomial(const gsl_rng_discrete *d, const size_t n) 
{
  gsl_vector_uint * X = gsl_vector_uint_calloc(d->K);
  unsigned int i, j;

  for(i=0; i<n; i++) {
    j=gsl_rng_discrete_get(d);
    gsl_vector_uint_set(X, j, gsl_vector_uint_get(X, j)+1);
  }

  return X;
}

int main()
{
  double p[5]={0.05, 0.10, 0.15, 0.20, 0.5};

  
  gsl_rng_discrete * d = gsl_rng_discrete_alloc(gsl_rng_gfsr4, 5, p);

  gsl_vector_uint *x = gsl_ran_multinomial(d, 10000);
 
     FILE * f = fopen("test.txt", "w");
     gsl_vector_uint_fprintf (f, x, "%d");
     fclose (f);

     gsl_rng_discrete_free(d);

     return 0;
}

 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]