brokenStick.r

Revision 3 - 6/4/08 at 3:48 pm by eric.ward

Previous revision
Back to revision history for brokenStick.r
This file is part of the project Estimating the number of species in a community



   m <- 100
   Nm <- 30
   # Nm is observed species in sample
   # m is number of individuals sampled
   #m <- d$Total.Of.Count[w[ii]]
   #Nm <- d$numSpecies[w[ii]]
   #m <- 20;
   #Nm <- 10;

   maxSpecies <- 60 # could be larger ,but probability declines greatly 
   minN <- Nm;  # this is the smallest possible number of species (the number you observe in the sample)
   maxN <- maxSpecies;  # this is the largest possible number of species                                                  
   
   # generate a large number of samples based on broken stick rng
   num.integrals <- 1000;  
   pr.N <- rep(0,maxN);    # this is used to store pr(N | Nm, m)
   
## HERE'S THE EFFICIENT APPROACH (ALTERNATIVE BELOW)
   for(i in minN:(maxN)) {
      pr.N[i] <- mean(replicate(num.integrals,dpois(Nm, i*exp(-sample.size(broken.stick(i), m, i)/i))));   # this is pr(N | Nm, m)
   }
   predN <- which(pr.N==max(pr.N))     
   
## HERE'S THE MORE DETAILED APPROACH WITH LOOPS   
   # iterate over multiple values of N
   for(i in minN:(maxN)) {
      # this is used to store the probs for individual realizations of the vector p
      pr.Nm <- 0
      for(j in 1:num.integrals) {
         p <- broken.stick(i); # draw random vector
         samp <- sample.size(p, m, i);  # calc sample size conditioned on p,m
         lambda <- i*exp(-samp/i);  # calc lambda given p,m
         #lambda <- i*exp(-sample.size(broken.stick(i), m, i)/i);  # this is written in individual lines above
         # use poisson density, alternative = neg bin
         pr.Nm[j] <- dpois(Nm, lambda)   
      }
      pr.N[i] <- mean(pr.Nm);   # this is pr(N | Nm, m)
   }
   predN <- which(pr.N==max(pr.N))   
   


    
broken.stick <- function(n) {
   # there are several options for the broken stick model
   # McArthur and Wilson's broken stick model is not sequential
   #r <- c(0,runif(n-1),1);
   #s <- sort(r);
   #p <- s[2:(n+1)] - s[1:n];
   #p

   # second option is more along the lines of Sugihara
   # stick is broken sequentially
   l <- 0;
   r <- runif(n)
   l[1] <- r[1];
   l[2] <- (1-r[1]);
   # pick a random stick and break it at the place given in r[it]
   for(it in 2:(n-1)) {
      st <- sample(seq(1,it),1)
      # add the new length to the end
      l[it+1] <- (1-r[it])*l[st];   
      # reduce the length of the picked stick
      l[st] <- r[it]*l[st]; 
   }
   l
   # we want length of each of the n segments 
   #r <- s$x[2:(n+1)]-s$x[1:n]
   #r <- runif(n,0,1);
   #r <- r/sum(r)
   #r
}

sample.size <- function(p, m, n) {
   # p is vector of multinomial cell probabilities
   # m is the sample size (number of individuals)
   # n is the hypothetical number of species
   me <- -n*log(sum((1-p)^m)/n)
}
Sculpin 0.2 | xhtml | problems or comments? | report bugs