m <- 100
Nm <- 30
# Nm is observed species in sample
# m is number of individuals sampled
#m <- d$Total.Of.Count[w[ii]]
#Nm <- d$numSpecies[w[ii]]
#m <- 20;
#Nm <- 10;
maxSpecies <- 60 # could be larger ,but probability declines greatly
minN <- Nm; # this is the smallest possible number of species (the number you observe in the sample)
maxN <- maxSpecies; # this is the largest possible number of species
# generate a large number of samples based on broken stick rng
num.integrals <- 1000;
pr.N <- rep(0,maxN); # this is used to store pr(N | Nm, m)
## HERE'S THE EFFICIENT APPROACH (ALTERNATIVE BELOW)
for(i in minN:(maxN)) {
pr.N[i] <- mean(replicate(num.integrals,dpois(Nm, i*exp(-sample.size(broken.stick(i), m, i)/i)))); # this is pr(N | Nm, m)
}
predN <- which(pr.N==max(pr.N))
## HERE'S THE MORE DETAILED APPROACH WITH LOOPS
# iterate over multiple values of N
for(i in minN:(maxN)) {
# this is used to store the probs for individual realizations of the vector p
pr.Nm <- 0
for(j in 1:num.integrals) {
p <- broken.stick(i); # draw random vector
samp <- sample.size(p, m, i); # calc sample size conditioned on p,m
lambda <- i*exp(-samp/i); # calc lambda given p,m
#lambda <- i*exp(-sample.size(broken.stick(i), m, i)/i); # this is written in individual lines above
# use poisson density, alternative = neg bin
pr.Nm[j] <- dpois(Nm, lambda)
}
pr.N[i] <- mean(pr.Nm); # this is pr(N | Nm, m)
}
predN <- which(pr.N==max(pr.N))
broken.stick <- function(n) {
# there are several options for the broken stick model
# McArthur and Wilson's broken stick model is not sequential
#r <- c(0,runif(n-1),1);
#s <- sort(r);
#p <- s[2:(n+1)] - s[1:n];
#p
# second option is more along the lines of Sugihara
# stick is broken sequentially
l <- 0;
r <- runif(n)
l[1] <- r[1];
l[2] <- (1-r[1]);
# pick a random stick and break it at the place given in r[it]
for(it in 2:(n-1)) {
st <- sample(seq(1,it),1)
# add the new length to the end
l[it+1] <- (1-r[it])*l[st];
# reduce the length of the picked stick
l[st] <- r[it]*l[st];
}
l
# we want length of each of the n segments
#r <- s$x[2:(n+1)]-s$x[1:n]
#r <- runif(n,0,1);
#r <- r/sum(r)
#r
}
sample.size <- function(p, m, n) {
# p is vector of multinomial cell probabilities
# m is the sample size (number of individuals)
# n is the hypothetical number of species
me <- -n*log(sum((1-p)^m)/n)
}