univariate.R

Revision 1 - 7/5/08 at 5:58 pm by eric.ward

Back to revision history for univariate.R
This file is part of the project Bayesian MAR(1) model
# Code written by Eric Ward, 07.04.08
# email: eric.ward@noaa.gov
# 
# This is designed to do the univariate sampling for Bayesian linear regression
# Two versions are included, one utilizing multivariate sampling, one doing variable at a time.
# Output from this program can be used as an mcmc() object, and fed directly into CODA for
# diagnostics, by calling codamenu().  
#
# Sources:        Unless otherwise specified, all eqns and page refs refer to Gelman
# 1. Gelman (2004), Bayesian Data Analysis 2nd ed
# 2. Carlin and Louis () Bayes and Empirical Bayes methods for data analysis
# 3. Chib (1995) Marginal Likelihood from the Gibbs Output, JASA


library(coda)     # needed if mcmc objects are returned
library(mvtnorm)  # needed to call dmvnorm
log2pi <- log(2*pi)
#library(MASS)     # needed to call rmvnorm
# Functions for random number generation, not currently used
#rinvgamma<-function(shape, scale = 1) return(1/rgamma(1, shape, scale))
#rinvchi2<-function(shape, scale = 1) return(shape*scale/rchisq(1,shape))
#rinvchi2.2 <-function(shape, scale = 1) return(shape*scale/rchisq(1,shape))
# Function for estimating prior density
dinvgamma.log <- function(x, shape, scale = 1) 
{
    log.density <- shape * log(scale) - lgamma(shape) - (shape + 1) * log(x) - (scale/x)
    return(log.density)
}
dinvchi2.log <- function(x, shape, scale = 1) return(dinvgamma.log(x, shape/2, shape*scale/2))
dinvchi2 <- function(x, shape, scale = 1) return(exp(dinvgamma.log(x, shape/2, shape*scale/2)))      
# Function to compute MV normal random numbers, mvrnorm in MASS takes 137% longer, because it uses eigen 
rmvnorm.chol <- function(norm.01, u, Sigma) {
   # norm.01 is a vector of Normal(0,1) random numbers
   return(c(t(u) + (norm.01)%*%chol(Sigma)))
}


###############################################################################
#  Function for doing sampling posteriors for univariate normal linear regression, 
#  with conjugate priors.  The difference between univariate.exact and univariate.gibbs
#  is that univariate.exact draws all B parameters from a MVN distribution.
###############################################################################
univariate.all <- function(y, X, priorB = list(B0=NA, sigmaB=NA,B.init=NA), priorSig = list(form="shape-scale",n0=NA,sig20=NA,mu=NA,variance=NA,sig.init=NA), MCMCpars=list(n.burn=1000, n.iter=10000, n.thin=1),makePlots=FALSE) {
  # y is the data, can be passed in as a vector or matrix
  # X is a matrix of covariates, including column of 1s for intercept
  # priorB is an optional list containing: 
  #    B0, vector of prior means (defaults to 0)
  #    sigmaB, vector / matrix of prior covariances (defaults to diagonal)
  #    B.init, vector of initial values (defaults to 0)
  # priorSig is an optional list containing: 
  #    form, either "shape-scale" (default) or "mean-var"
  #    if the prior shape-scale (default): 
  #    n0: how many data points is this prior worth? (defaults to 1)
  #    sig20, prior scale parameter (defaults to 1) 
  #    if the prior mean-var:
  #    mu, prior mean parameter
  #    variance, prior var parameter
  #    sig.init, initial value (defaults to MLE)
  # MCMCpars is an optional list containing:
  #    n.burn, period of burn in (true length = burn * thin)
  #    n.iter, number of samples to save after the burn
  #    n.thin, thinning interval
  # makePlots is T/F, whether to plot the log(BF) and MCMC object, default = F

  # the "form" argument of the priorSig list allows the user to optionally specify the prior for sigma2
  # in terms of the mean and the variance
  if(priorSig$form=="mean-var") {
    # then mean and var need to be translated into shape / scale
    priorSig$n0 = (2 * priorSig$mu^2)/priorSig$variance + 4        # shape parameter
    priorSig$sig20 = (priorSig$mu)*(priorSig$n0-2)/priorSig$n0     # scale parameter
  }

  n = length(y)    # number of observations
  k = dim(X)[2]    # number of estimated pars, not including sig2
  # The next 4 variables are just constants to be used later
  n.min.k = n-k
  n0plusnk = priorSig$n0 + (n + k)
  nplusk = n+k
  n0sig20 = priorSig$n0+priorSig$sig20
  #if(is.na(sigmaY)==TRUE) sigmaY = diag(n)  else sigmaY = priorSig$sigmaY # if sigmaY is not passed in, use identity matrix
  Qy = diag(n)   # this is scaling cov matrix, sigmaY = Qy * sigma2
  # Make sure that the priors for B have been specified correctly
  if(is.null(priorB$B0) == TRUE || length(priorB$B0) != k || any(is.na(priorB$B0)==TRUE)) priorB$B0 = rep(0,k)         # default prior has mean 0
  if(is.null(priorB$B.init) == TRUE || length(priorB$B.init) != k || any(is.na(priorB$B.init)==TRUE)) priorB$B.init = rep(0,k)
  B = priorB$B.init # Assign B to the initial guesses               
  
  # Make sure that the priors for sigma2 have been specified correctly
  #if(priorSig$n0 <= 4) stop("Error: increase shape parameter for sigma2 prior")
  #if(priorSig$sig20 <= 0) stop("Error: increase scale parameter for sigma2 prior")
  if(is.null(priorSig$sig.init) == TRUE || is.na(priorSig$sig.init) == TRUE) {
      Bhat = chol2inv(chol(t(X)%*%X))%*%t(X)%*%y
      priorSig$sig.init = sqrt(t(y-X%*%Bhat)%*%(y-X%*%Bhat)/(n-k))
  }
  sigma2 = priorSig$sig.init
  # Create X.star and Y.star, eq 14.25 Gelman p 384
  X.star = rbind(X, diag(k))
  Y.star = as.matrix(c(y,priorB$B0))
  # These matrices are zeros to fill in the components of Sigma.star
  zeros.UR = matrix(0,n,k)
  zeros.LL = matrix(0,k,n)                                       
  
  if(is.null(priorB$sigmaB) == TRUE || dim(as.matrix(priorB$sigmaB))[1] != k || any(is.na(priorB$sigmaB)==TRUE)==TRUE) priorB$sigmaB = rep(1,k)
  if(is.matrix(priorB$sigmaB)==TRUE) diag.priorB = (priorB$sigmaB) else diag.priorB = diag(priorB$sigmaB)

  #################################################################
  # THIS IS THE BURN-IN PHASE
  #################################################################
  invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB))))
  rndchi2 = rchisq(n=MCMCpars$n.burn*MCMCpars$n.thin, df=n0plusnk)  # take advantage of vectors, draw all the Chi2 draws we'll need
  for(i in 1:(MCMCpars$n.burn*MCMCpars$n.thin)) {
     # Y | B, sig2 ~ norm(XB, sig2*I)
     # sig2 | y ~ InvX2(n0+n+k, (n0*sig20+(n+k)*s2)/(n0+n+k)  # n+k is length of Y.star
     y.min.XB = Y.star - X.star%*%B                           # residuals
     s2 = t(y.min.XB)%*%invSigma.star%*%y.min.XB/(n.min.k)          # eqn 14.18
     # draw x ~ chi2 (v) RV, v*s2/x ~ InvChi2(v, s2)          # Inv Chi2, Appendix, p580
     sigma2 = (n0sig20+(nplusk)*s2)/rndchi2[i]                # p384 
     # Multivariate distribution of B | sigma2, y
     # MVN (Bhat, Vb)  Bhat = inv(t(X)*X) *t(X)*y, Vb = inv(t(X)*X)
     # We can build the star matrices (14.25), and then plug them in as known covariances (14.11-14.12)
     # this is for all cases, EW commented out the next 2 lines for speed in the independent case
     # invSigma.star = chol2inv(chol(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB))))   
     # avoid inverting the matrix because we can assume the data & pars are independent
     invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB))))
     # Use cholesky decomp to avoid calculating inverse
     Vb = chol2inv(chol(t(X.star)%*%invSigma.star%*%X.star))  # eqn 14.3 or 14.11
     Bhat = Vb%*%t(X.star)%*%invSigma.star%*%Y.star           # 14.12 
     B = rmvnorm.chol(rnorm(k,0,1),Bhat,Vb*c(sigma2))         # 14.3
  }  
  
  #################################################################
  # THIS IS THE SAMPLING PHASE
  #################################################################
  c = 1
  rndchi2 = rchisq(n=MCMCpars$n.iter*MCMCpars$n.thin, df=n0plusnk)  # take advantage of vectors, draw all the Chi2 draws we'll need
  B.post = matrix(0,MCMCpars$n.iter,k)
  sigma2.post = 0
  for(i in 1:MCMCpars$n.iter) {    # Nested for loops avoid the if or modulus statements - makes speed 30-50% faster
     for(j in 1:MCMCpars$n.thin) {
        # Y | B, sig2 ~ norm(XB, sig2*I)
        # sig2 | y ~ InvX2(n0+n+k, (n0*sig20+(n+k)*s2)/(n0+n+k)  # n+k is length of Y.star
        y.min.XB = Y.star - X.star%*%B                           # residuals
        s2 = t(y.min.XB)%*%invSigma.star%*%y.min.XB/(n.min.k)          # eqn 14.18
        # draw x ~ chi2 (v) RV, v*s2/x ~ InvChi2(v, s2)          # Inv Chi2, Appendix, p580
        sigma2 = (n0sig20+(nplusk)*s2)/rndchi2[c]                # p384
        c = c+1 
        # Multivariate distribution of B | sigma2, y
        # MVN (Bhat, Vb)  Bhat = inv(t(X)*X) *t(X)*y, Vb = inv(t(X)*X)
        # We can build the star matrices (14.25), and then plug them in as known covariances (14.11-14.12)
        # this is for all cases, EW commented out the next 2 lines for speed in the independent case
        # invSigma.star = chol2inv(chol(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB))))   
        # avoid inverting the matrix because we can assume the data & pars are independent
        invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB))))
        # Use cholesky decomp to avoid calculating inverse
        Vb = chol2inv(chol(t(X.star)%*%invSigma.star%*%X.star))  # eqn 14.3 or 14.11
        Bhat = Vb%*%t(X.star)%*%invSigma.star%*%Y.star           # 14.12 
        B = rmvnorm.chol(rnorm(k,0,1),Bhat,Vb*c(sigma2))         # 14.3
     }
     # Store the parameter draws
     B.post[i,] = B
     sigma2.post[i] = sigma2
  }    

  #################################################################
  # THIS IS THE BAYES FACTOR CALCULATION / PARAMETER SUMMARY PHASE
  #################################################################  
  B.mode = 0        
  for(i in 1:k) {
     d = density(B.post[,i])
     B.mode[i] = d$x[which(d$y==max(d$y))]   # Calculate the mode
  }
  d = density(sqrt(sigma2.post))
  sigma.mode = d$x[which(d$y==max(d$y))]     # Calculate the mode
  sigma2.mode <- sigma.mode^2
  # calculate the log likelihood at the posterior mode
  logLike = sum(dnorm(x = y, mean = X%*%B.mode, sd = sigma.mode,log=T))  # calculate the log likelihood
  # calculate log prior for sigma2, alternatively could estimate mode for tau and use gamma  
  logPrior = dinvchi2.log(x = sigma.mode^2,shape = priorSig$n0, scale = priorSig$sig20)  # Appendix A
  logPrior = logPrior + dmvnorm(x=B.mode, mean = priorB$B0, sigma=diag.priorB, log=TRUE)
  
  # log(posterior) is broken down into two points,
  # Posterior(B*,sigma*|Y) = Posterior(B*|sigma*,Y)*Posterior(sigma*|Y)
  # See Chib, section 2.1.2 or maybe a better explanation in Carlin & Louis, p 209
  # Posterior(B*|sigma*,Y) is directly available:
  sigma2 <- sigma2.mode
  invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB))))
  Vb = chol2inv(chol(t(X.star)%*%invSigma.star%*%X.star))  # eqn 14.3 or 14.11
  Bhat = Vb%*%t(X.star)%*%invSigma.star%*%Y.star           # 14.12 
  logPost.B <- dmvnorm(x=B.mode, mean = Bhat, sigma=Vb*c(sigma2), log=TRUE)
  logPost.sigma <- 0
  for(i in 1:MCMCpars$n.iter) {  # Rao-Blackwell approximation, Carlin & Louis, p 209 eqn 6.1.6
     # Calculate Posterior(sigma*|Y) as average over P(sigma*|B[i],Y) 
     y.min.XB = Y.star - X.star%*%B.post[i,]         # residuals
     invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2.mode),zeros.UR),cbind(zeros.LL,diag.priorB))))
     s2 = t(y.min.XB)%*%invSigma.star%*%y.min.XB/(n.min.k)          # eqn 14.18
     logPost.sigma[i] <- dinvchi2(x = sigma2.mode, shape = n0plusnk, scale = s2)
  }
  logPost.sigma <- log(mean(logPost.sigma))
  
  # marginal likelihood - essentially log [p(model | data) ]
  # Chib refers to it as BMI, basic marginal likelihood identity
  BFML = logLike + logPrior - logPost.B - logPost.sigma  # Chib, section 2.1.2  
  
  # return data frame of saved values
  post = as.data.frame(cbind(B.post,sqrt(sigma2.post)))
  for(i in 1:k) names(post)[i] = paste("B",i-1,sep="")
  names(post)[k+1] = "sigma"
  print(summary(mcmc(post)))    # summarize output to console
  print(paste("Log(Bayes Factor): ",BFML,sep=""),quote=FALSE)
  ############  THIS IS TO MAKE OPTIONAL PLOTS
  if(makePlots == TRUE) {
     #par(ask=T) 
     plot(mcmc(post))         
     
     par(mfrow=c(2,2)) # make 2 x 2 panel plots of priors & posteriors
     #num.plots = ceil((k+1)/4)
     Bpriors <- matrix(0,20000,k)
     for(i in 1:20000) Bpriors[i,] = rmvnorm.chol(rnorm(k,0,1),priorB$B0, diag.priorB)
     for(i in 1:k) {
        # draw a sample from the prior variance to plot
        post = B.post[,i]
        prior = Bpriors[,i]
        #min.x = min(c(sigma2.post)); max.x = max(c(sigma2.post))
        min.x = min(post,prior); max.x = max(post,prior)
        d.prior = density(prior); d.prior$y = d.prior$y/sum(d.prior$y)
        d.post = density(post); d.post$y = d.post$y/sum(d.post$y) 
        #min.x = min(d.post$x[which(d.post$y > median(d.post$y))])
        #max.x = max(d.post$x[which(d.post$y > median(d.post$y))])
        #d.like = 
        min.y = 0.05*max(d.prior$y,d.post$y);  max.y = max(1.10*max(d.prior$y,d.post$y),0.01+max(d.prior$y,d.post$y))
        plot(d.prior$x,d.prior$y,main="",ylab="Density",xlab=paste("B",i-1,sep=""),xlim=c(min.x,max.x),ylim=c(min.y,max.y),type="l",col="blue",lwd=2)
        #lines(d.like$x,d.like$y,col="red",lwd=2)
        lines(d.post$x,d.post$y,lwd=2,col="purple")
        text(d.prior$x[which(d.prior$y==max(d.prior$y))], 0.004+max(d.prior$y),expression(paste("P(",theta,")")),cex=0.7)
        #text(d.like$x[which(d.like$y==max(d.like$y))], 0.006+max(d.like$y),expression(paste("L(Y|",theta,")")))
        text(d.post$x[which(d.post$y==max(d.post$y))], 0.004+max(d.post$y),expression(paste("P(",theta,"|Y)")),cex=0.7)     
     }
     
     # draw a sample from the prior variance to plot
     prior = log((priorSig$n0*priorSig$sig20)/rchisq(100000,df=priorSig$n0)) 
     post = sigma2.post
     #min.x = min(c(sigma2.post)); max.x = max(c(sigma2.post))
     min.x = min(post,prior); max.x = max(post,prior)
     d.prior = density(prior,from=0); d.prior$y = d.prior$y/sum(d.prior$y)
     d.post = density(post,from=0); d.post$y = d.post$y/sum(d.post$y) 
     #min.x = min(d.post$x[which(d.post$y > median(d.post$y))])
     #max.x = max(d.post$x[which(d.post$y > median(d.post$y))])
     #d.like = 
     min.y = 0.05*max(d.prior$y,d.post$y);  max.y = max(1.10*max(d.prior$y,d.post$y),0.01+max(d.prior$y,d.post$y))
     plot(d.prior$x,d.prior$y,main="",ylab="Density",xlab=expression(sigma[2]),xlim=c(min.x,max.x),ylim=c(min.y,max.y),type="l",col="blue",lwd=2)
     #lines(d.like$x,d.like$y,col="red",lwd=2)
     lines(d.post$x,d.post$y,lwd=2,col="purple")
     text(d.prior$x[which(d.prior$y==max(d.prior$y))], 0.004+max(d.prior$y),expression(paste("P(",theta,")")),cex=0.7)
     #text(d.like$x[which(d.like$y==max(d.like$y))], 0.006+max(d.like$y),expression(paste("L(Y|",theta,")")))
     text(d.post$x[which(d.post$y==max(d.post$y))], 0.004+max(d.post$y),expression(paste("P(",theta,"|Y)")),cex=0.7)  
  }
  list(saved = post, B.mode = B.mode,sigma.mode=sigma.mode,LogBayesFac=BFML)
}



###############################################################################
#  Function for doing variable at a time (vat) gibbs sampling for univariate normal
#  linear regression, with conjugate priors
###############################################################################

univariate.vat <- function(y, X, priorB = list(B0=NA, sigmaB=NA,B.init=NA), priorSig = list(form="shape-scale",n0=NA,sig20=NA,mu=NA,variance=NA,sig.init=NA), MCMCpars=list(n.burn=1000,n.iter=10000,n.thin=1),makePlots=FALSE) {
  # y is the data, can be passed in as a vector or matrix
  # X is a matrix of covariates, including column of 1s for intercept
  # priorB is an optional list containing: 
  #    B0, vector of prior means (defaults to 0)
  #    sigmaB, vector / matrix of prior covariances (defaults to diagonal)
  #    B.init, vector of initial values (defaults to 0)
  # priorSig is an optional list containing: 
  #    form, either "shape-scale" (default) or "mean-var"
  #    if the prior shape-scale (default): 
  #    n0, prior shape parameter: how many data points is this prior worth? (defaults to 1)
  #    sig20, prior scale parameter (defaults to 1) 
  #    if the prior mean-var:
  #    mu, prior mean parameter
  #    variance, prior var parameter
  #    sig.init, initial value (defaults to MLE)
  # MCMCpars is an optional list containing:
  #    n.burn, period of burn in (true length = burn * thin)
  #    n.iter, number of samples to save after the burn
  #    n.thin, thinning interval
  # makePlots is T/F, whether to plot the log(BF) and MCMC object, default = F

  # the "form" argument of the priorSig list allows the user to optionally specify the prior for sigma2
  # in terms of the mean and the variance
  if(priorSig$form=="mean-var") {
    # then mean and var need to be translated into shape / scale
    priorSig$n0 = (2 * priorSig$mu^2)/priorSig$variance + 4        # shape parameter
    priorSig$sig20 = (priorSig$mu)*(priorSig$n0-2)/priorSig$n0     # scale parameter
  }

  n = length(y)    # number of observations
  k = dim(X)[2]    # number of estimated pars, not including sig2
  # The next 4 variables are just constants to be used later
  n.min.k = n-k
  n0plusnk = priorSig$n0 + (n + k)
  nplusk = n+k
  n0sig20 = priorSig$n0+priorSig$sig20
  #if(is.na(sigmaY)==TRUE) sigmaY = diag(n)  else sigmaY = priorSig$sigmaY # if sigmaY is not passed in, use identity matrix
  Qy = diag(n)   # this is scaling cov matrix, sigmaY = Qy * sigma2
  # Make sure that the priors for B have been specified correctly
  if(is.null(priorB$B0) == TRUE || length(priorB$B0) != k || any(is.na(priorB$B0)==TRUE)) priorB$B0 = rep(0,k)         # default prior has mean 0
  if(is.null(priorB$B.init) == TRUE || length(priorB$B.init) != k || any(is.na(priorB$B.init)==TRUE)) priorB$B.init = rep(0,k)
  B = priorB$B.init # Assign B to the initial guesses               
  
  # Make sure that the priors for sigma2 have been specified correctly
  #if(priorSig$n0 <= 4) stop("Error: increase shape parameter for sigma2 prior")
  #if(priorSig$sig20 <= 0) stop("Error: increase scale parameter for sigma2 prior")
  if(is.null(priorSig$sig.init) == TRUE || is.na(priorSig$sig.init) == TRUE) {
      Bhat = chol2inv(chol(t(X)%*%X))%*%t(X)%*%y
      priorSig$sig.init = sqrt(t(y-X%*%Bhat)%*%(y-X%*%Bhat)/(n-k))
  }
  sigma2 = priorSig$sig.init
  # Create X.star and Y.star, eq 14.25 Gelman p 384
  X.star = rbind(X, diag(k))
  Y.star = as.matrix(c(y,priorB$B0))
  # These matrices are zeros to fill in the components of Sigma.star
  zeros.UR = matrix(0,n,k)
  zeros.LL = matrix(0,k,n)                                       
  
  if(is.null(priorB$sigmaB) == TRUE || dim(as.matrix(priorB$sigmaB))[1] != k || any(is.na(priorB$sigmaB)==TRUE)==TRUE) priorB$sigmaB = rep(1,k)
  if(is.matrix(priorB$sigmaB)==TRUE) diag.priorB = (priorB$sigmaB) else diag.priorB = diag(priorB$sigmaB)

  #################################################################
  # THIS IS THE BURN-IN PHASE
  #################################################################
  invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB))))
  rndchi2 = rchisq(n=MCMCpars$n.burn*MCMCpars$n.thin, df=n0plusnk)  # take advantage of vectors, draw all the Chi2 draws we'll need
  for(i in 1:(MCMCpars$n.burn*MCMCpars$n.thin)) {
     # Y | B, sig2 ~ norm(XB, sig2*I)
     # sig2 | y ~ InvX2(n0+n+k, (n0*sig20+(n+k)*s2)/(n0+n+k)  # n+k is length of Y.star
     y.min.XB = Y.star - X.star%*%B                           # residuals
     s2 = t(y.min.XB)%*%invSigma.star%*%y.min.XB/(n.min.k)          # eqn 14.18
     # draw x ~ chi2 (v) RV, v*s2/x ~ InvChi2(v, s2)          # Inv Chi2, Appendix, p580
     sigma2 = (n0sig20+(nplusk)*s2)/rndchi2[i]                # p384 
     # Multivariate distribution of B | sigma2, y
     # MVN (Bhat, Vb)  Bhat = inv(t(X)*X) *t(X)*y, Vb = inv(t(X)*X)
     # We can build the star matrices (14.25), and then plug them in as known covariances (14.11-14.12)
     # this is for all cases, EW commented out the next 2 lines for speed in the independent case
     # invSigma.star = chol2inv(chol(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB))))   
     # avoid inverting the matrix because we can assume the data & pars are independent
     invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB))))
     # Use cholesky decomp to avoid calculating inverse
     Vb = chol2inv(chol(t(X.star)%*%invSigma.star%*%X.star))  # eqn 14.3 or 14.11
     Bhat = Vb%*%t(X.star)%*%invSigma.star%*%Y.star           # 14.12 
     # Do variable at a time gibbs sampling
     Vb.inv = chol2inv(chol(Vb*c(sigma2)))  # calculate inverese of cov matrix
     ind.vars = (1/diag(Vb.inv))        # this is used in the var argument of A.2, and in sq brackets on line above
     C = diag(k)-diag(ind.vars)%*%Vb.inv # calculate conditional coefficients, eqn A.2 in Appendix
     for(j in 1:k) {
        # sample the conditional univariate distribution, A.2 in Appendix
        # Mean can be computed several ways: 2nd method preferred for speed
        #   mu[j] = Bhat[j] + Vb.sigma2[j,-j]%*%chol2inv(chol(Vb.sigma2[-j,-j]))%*%as.matrix((B[-j]-Bhat[-j]))
        #   mu[j] = Bhat[j] + C[j,]%*%as.matrix(B-Bhat)
        # Variance can be computed several ways: 2nd method preferred for speed
        #   var[j] = Vb.sigma2[j,j] - Vb.sigma2[j,-j]%*%chol2inv(chol(Vb.sigma2[-j,-j]))%*%Vb.sigma2[-j,j]
        #   var[j] = ind.vars[j]
        B[j] = rnorm(1, Bhat[j] + C[j,]%*%as.matrix(B-Bhat), sqrt(ind.vars[j]))
     }     
  }  
  
  #################################################################
  # THIS IS THE SAMPLING PHASE
  #################################################################
  c = 1
  rndchi2 = rchisq(n=MCMCpars$n.iter*MCMCpars$n.thin, df=n0plusnk)  # take advantage of vectors, draw all the Chi2 draws we'll need
  B.post = matrix(0,MCMCpars$n.iter,k)
  sigma2.post = 0
  for(i in 1:MCMCpars$n.iter) {    # Nested for loops avoid the if or modulus statements - makes speed 30-50% faster
     for(j in 1:MCMCpars$n.thin) {
        # Y | B, sig2 ~ norm(XB, sig2*I)
        # sig2 | y ~ InvX2(n0+n+k, (n0*sig20+(n+k)*s2)/(n0+n+k)  # n+k is length of Y.star
        y.min.XB = Y.star - X.star%*%B                           # residuals
        s2 = t(y.min.XB)%*%invSigma.star%*%y.min.XB/(n.min.k)          # eqn 14.18
        # draw x ~ chi2 (v) RV, v*s2/x ~ InvChi2(v, s2)          # Inv Chi2, Appendix, p580
        sigma2 = (n0sig20+(nplusk)*s2)/rndchi2[c]                # p384
        c = c+1 
        # Multivariate distribution of B | sigma2, y
        # MVN (Bhat, Vb)  Bhat = inv(t(X)*X) *t(X)*y, Vb = inv(t(X)*X)
        # We can build the star matrices (14.25), and then plug them in as known covariances (14.11-14.12)
        # this is for all cases, EW commented out the next 2 lines for speed in the independent case
        # invSigma.star = chol2inv(chol(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB))))   
        # avoid inverting the matrix because we can assume the data & pars are independent
        invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB))))
        # Use cholesky decomp to avoid calculating inverse
        Vb = chol2inv(chol(t(X.star)%*%invSigma.star%*%X.star))  # eqn 14.3 or 14.11
        Bhat = Vb%*%t(X.star)%*%invSigma.star%*%Y.star           # 14.12 
        # Do variable at a time gibbs sampling
        Vb.inv = chol2inv(chol(Vb*c(sigma2)))  # calculate inverese of cov matrix
        ind.vars = (1/diag(Vb.inv))        # this is used in the var argument of A.2, and in sq brackets on line above
        C = diag(k)-diag(ind.vars)%*%Vb.inv # calculate conditional coefficients, eqn A.2 in Appendix
        for(j in 1:k) {
           # sample the conditional univariate distribution, A.2 in Appendix
           # Mean can be computed several ways: 2nd method preferred for speed
           #   mu[j] = Bhat[j] + Vb.sigma2[j,-j]%*%chol2inv(chol(Vb.sigma2[-j,-j]))%*%as.matrix((B[-j]-Bhat[-j]))
           #   mu[j] = Bhat[j] + C[j,]%*%as.matrix(B-Bhat)
           # Variance can be computed several ways: 2nd method preferred for speed
           #   var[j] = Vb.sigma2[j,j] - Vb.sigma2[j,-j]%*%chol2inv(chol(Vb.sigma2[-j,-j]))%*%Vb.sigma2[-j,j]
           #   var[j] = ind.vars[j]
           B[j] = rnorm(1, Bhat[j] + C[j,]%*%as.matrix(B-Bhat), sqrt(ind.vars[j]))
        }     
     }
     # Store the parameter draws
     B.post[i,] = B
     sigma2.post[i] = sigma2
  }    
  
  #################################################################
  # THIS IS THE BAYES FACTOR CALCULATION / PARAMETER SUMMARY PHASE
  #################################################################  
  B.mode = 0        
  for(i in 1:k) {
     d = density(B.post[,i])
     B.mode[i] = d$x[which(d$y==max(d$y))]   # Calculate the mode
  }
  d = density(sqrt(sigma2.post))
  sigma.mode = d$x[which(d$y==max(d$y))]     # Calculate the mode
  sigma2.mode <- sigma.mode^2
  # calculate the log likelihood at the posterior mode
  logLike = sum(dnorm(x = y, mean = X%*%B.mode, sd = sigma.mode,log=T))  # calculate the log likelihood
  # calculate log prior for sigma2, alternatively could estimate mode for tau and use gamma  
  logPrior = dinvchi2.log(x = sigma.mode^2,shape = priorSig$n0, scale = priorSig$sig20)  # Appendix A
  logPrior = logPrior + dmvnorm(x=B.mode, mean = priorB$B0, sigma=diag.priorB, log=TRUE)
  
  # log(posterior) is broken down into two points,
  # Posterior(B*,sigma*|Y) = Posterior(B*|sigma*,Y)*Posterior(sigma*|Y)
  # See Chib, section 2.1.2 or maybe a better explanation in Carlin & Louis, p 209
  # Posterior(B*|sigma*,Y) is directly available:
  sigma2 <- sigma2.mode
  invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB))))
  Vb = chol2inv(chol(t(X.star)%*%invSigma.star%*%X.star))  # eqn 14.3 or 14.11
  Bhat = Vb%*%t(X.star)%*%invSigma.star%*%Y.star           # 14.12 
  logPost.B <- dmvnorm(x=B.mode, mean = Bhat, sigma=Vb*c(sigma2), log=TRUE)
  logPost.sigma <- 0
  for(i in 1:MCMCpars$n.iter) {  # Rao-Blackwell approximation, Carlin & Louis, p 209 eqn 6.1.6
     # Calculate Posterior(sigma*|Y) as average over P(sigma*|B[i],Y) 
     y.min.XB = Y.star - X.star%*%B.post[i,]         # residuals
     invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2.mode),zeros.UR),cbind(zeros.LL,diag.priorB))))
     s2 = t(y.min.XB)%*%invSigma.star%*%y.min.XB/(n.min.k)          # eqn 14.18
     logPost.sigma[i] <- dinvchi2(x = sigma2.mode, shape = n0plusnk, scale = s2)
  }
  logPost.sigma <- log(mean(logPost.sigma))
  
  # marginal likelihood - essentially log [p(model | data) ]
  # Chib refers to it as BMI, basic marginal likelihood identity
  BFML = logLike + logPrior - logPost.B - logPost.sigma  # Chib, section 2.1.2  
  
  # return data frame of saved values
  post = as.data.frame(cbind(B.post,sqrt(sigma2.post)))
  for(i in 1:k) names(post)[i] = paste("B",i-1,sep="")
  names(post)[k+1] = "sigma"
  print(summary(mcmc(post)))    # summarize output to console
  print(paste("Log(Bayes Factor): ",BFML,sep=""),quote=FALSE)
  ############  THIS IS TO MAKE OPTIONAL PLOTS
  if(makePlots == TRUE) {
     #par(ask=T) 
     plot(mcmc(post))         
     
     par(mfrow=c(2,2)) # make 2 x 2 panel plots of priors & posteriors
     #num.plots = ceil((k+1)/4)
     Bpriors <- matrix(0,20000,k)
     for(i in 1:20000) Bpriors[i,] = rmvnorm.chol(rnorm(k,0,1),priorB$B0, diag.priorB)
     for(i in 1:k) {
        # draw a sample from the prior variance to plot
        post = B.post[,i]
        prior = Bpriors[,i]
        #min.x = min(c(sigma2.post)); max.x = max(c(sigma2.post))
        min.x = min(post,prior); max.x = max(post,prior)
        d.prior = density(prior); d.prior$y = d.prior$y/sum(d.prior$y)
        d.post = density(post); d.post$y = d.post$y/sum(d.post$y) 
        #min.x = min(d.post$x[which(d.post$y > median(d.post$y))])
        #max.x = max(d.post$x[which(d.post$y > median(d.post$y))])
        #d.like = 
        min.y = 0.05*max(d.prior$y,d.post$y);  max.y = max(1.10*max(d.prior$y,d.post$y),0.01+max(d.prior$y,d.post$y))
        plot(d.prior$x,d.prior$y,main="",ylab="Density",xlab=paste("B",i-1,sep=""),xlim=c(min.x,max.x),ylim=c(min.y,max.y),type="l",col="blue",lwd=2)
        #lines(d.like$x,d.like$y,col="red",lwd=2)
        lines(d.post$x,d.post$y,lwd=2,col="purple")
        text(d.prior$x[which(d.prior$y==max(d.prior$y))], 0.004+max(d.prior$y),expression(paste("P(",theta,")")),cex=0.7)
        #text(d.like$x[which(d.like$y==max(d.like$y))], 0.006+max(d.like$y),expression(paste("L(Y|",theta,")")))
        text(d.post$x[which(d.post$y==max(d.post$y))], 0.004+max(d.post$y),expression(paste("P(",theta,"|Y)")),cex=0.7)     
     }
     
     # draw a sample from the prior variance to plot
     prior = log((priorSig$n0*priorSig$sig20)/rchisq(100000,df=priorSig$n0)) 
     post = sigma2.post
     #min.x = min(c(sigma2.post)); max.x = max(c(sigma2.post))
     min.x = min(post,prior); max.x = max(post,prior)
     d.prior = density(prior,from=0); d.prior$y = d.prior$y/sum(d.prior$y)
     d.post = density(post,from=0); d.post$y = d.post$y/sum(d.post$y) 
     #min.x = min(d.post$x[which(d.post$y > median(d.post$y))])
     #max.x = max(d.post$x[which(d.post$y > median(d.post$y))])
     #d.like = 
     min.y = 0.05*max(d.prior$y,d.post$y);  max.y = max(1.10*max(d.prior$y,d.post$y),0.01+max(d.prior$y,d.post$y))
     plot(d.prior$x,d.prior$y,main="",ylab="Density",xlab=expression(sigma[2]),xlim=c(min.x,max.x),ylim=c(min.y,max.y),type="l",col="blue",lwd=2)
     #lines(d.like$x,d.like$y,col="red",lwd=2)
     lines(d.post$x,d.post$y,lwd=2,col="purple")
     text(d.prior$x[which(d.prior$y==max(d.prior$y))], 0.004+max(d.prior$y),expression(paste("P(",theta,")")),cex=0.7)
     #text(d.like$x[which(d.like$y==max(d.like$y))], 0.006+max(d.like$y),expression(paste("L(Y|",theta,")")))
     text(d.post$x[which(d.post$y==max(d.post$y))], 0.004+max(d.post$y),expression(paste("P(",theta,"|Y)")),cex=0.7)  
  }
  list(saved = post, B.mode = B.mode,sigma.mode=sigma.mode,LogBayesFac=BFML)
}
Sculpin 0.2 | xhtml | problems or comments? | report bugs