# Code written by Eric Ward, 07.04.08 # email: eric.ward@noaa.gov # # This is designed to do the univariate sampling for Bayesian linear regression # Two versions are included, one utilizing multivariate sampling, one doing variable at a time. # Output from this program can be used as an mcmc() object, and fed directly into CODA for # diagnostics, by calling codamenu(). # # Sources: Unless otherwise specified, all eqns and page refs refer to Gelman # 1. Gelman (2004), Bayesian Data Analysis 2nd ed # 2. Carlin and Louis () Bayes and Empirical Bayes methods for data analysis # 3. Chib (1995) Marginal Likelihood from the Gibbs Output, JASA library(coda) # needed if mcmc objects are returned library(mvtnorm) # needed to call dmvnorm log2pi <- log(2*pi) #library(MASS) # needed to call rmvnorm # Functions for random number generation, not currently used #rinvgamma<-function(shape, scale = 1) return(1/rgamma(1, shape, scale)) #rinvchi2<-function(shape, scale = 1) return(shape*scale/rchisq(1,shape)) #rinvchi2.2 <-function(shape, scale = 1) return(shape*scale/rchisq(1,shape)) # Function for estimating prior density dinvgamma.log <- function(x, shape, scale = 1) { log.density <- shape * log(scale) - lgamma(shape) - (shape + 1) * log(x) - (scale/x) return(log.density) } dinvchi2.log <- function(x, shape, scale = 1) return(dinvgamma.log(x, shape/2, shape*scale/2)) dinvchi2 <- function(x, shape, scale = 1) return(exp(dinvgamma.log(x, shape/2, shape*scale/2))) # Function to compute MV normal random numbers, mvrnorm in MASS takes 137% longer, because it uses eigen rmvnorm.chol <- function(norm.01, u, Sigma) { # norm.01 is a vector of Normal(0,1) random numbers return(c(t(u) + (norm.01)%*%chol(Sigma))) } ############################################################################### # Function for doing sampling posteriors for univariate normal linear regression, # with conjugate priors. The difference between univariate.exact and univariate.gibbs # is that univariate.exact draws all B parameters from a MVN distribution. ############################################################################### univariate.all <- function(y, X, priorB = list(B0=NA, sigmaB=NA,B.init=NA), priorSig = list(form="shape-scale",n0=NA,sig20=NA,mu=NA,variance=NA,sig.init=NA), MCMCpars=list(n.burn=1000, n.iter=10000, n.thin=1),makePlots=FALSE) { # y is the data, can be passed in as a vector or matrix # X is a matrix of covariates, including column of 1s for intercept # priorB is an optional list containing: # B0, vector of prior means (defaults to 0) # sigmaB, vector / matrix of prior covariances (defaults to diagonal) # B.init, vector of initial values (defaults to 0) # priorSig is an optional list containing: # form, either "shape-scale" (default) or "mean-var" # if the prior shape-scale (default): # n0: how many data points is this prior worth? (defaults to 1) # sig20, prior scale parameter (defaults to 1) # if the prior mean-var: # mu, prior mean parameter # variance, prior var parameter # sig.init, initial value (defaults to MLE) # MCMCpars is an optional list containing: # n.burn, period of burn in (true length = burn * thin) # n.iter, number of samples to save after the burn # n.thin, thinning interval # makePlots is T/F, whether to plot the log(BF) and MCMC object, default = F # the "form" argument of the priorSig list allows the user to optionally specify the prior for sigma2 # in terms of the mean and the variance if(priorSig$form=="mean-var") { # then mean and var need to be translated into shape / scale priorSig$n0 = (2 * priorSig$mu^2)/priorSig$variance + 4 # shape parameter priorSig$sig20 = (priorSig$mu)*(priorSig$n0-2)/priorSig$n0 # scale parameter } n = length(y) # number of observations k = dim(X)[2] # number of estimated pars, not including sig2 # The next 4 variables are just constants to be used later n.min.k = n-k n0plusnk = priorSig$n0 + (n + k) nplusk = n+k n0sig20 = priorSig$n0+priorSig$sig20 #if(is.na(sigmaY)==TRUE) sigmaY = diag(n) else sigmaY = priorSig$sigmaY # if sigmaY is not passed in, use identity matrix Qy = diag(n) # this is scaling cov matrix, sigmaY = Qy * sigma2 # Make sure that the priors for B have been specified correctly if(is.null(priorB$B0) == TRUE || length(priorB$B0) != k || any(is.na(priorB$B0)==TRUE)) priorB$B0 = rep(0,k) # default prior has mean 0 if(is.null(priorB$B.init) == TRUE || length(priorB$B.init) != k || any(is.na(priorB$B.init)==TRUE)) priorB$B.init = rep(0,k) B = priorB$B.init # Assign B to the initial guesses # Make sure that the priors for sigma2 have been specified correctly #if(priorSig$n0 <= 4) stop("Error: increase shape parameter for sigma2 prior") #if(priorSig$sig20 <= 0) stop("Error: increase scale parameter for sigma2 prior") if(is.null(priorSig$sig.init) == TRUE || is.na(priorSig$sig.init) == TRUE) { Bhat = chol2inv(chol(t(X)%*%X))%*%t(X)%*%y priorSig$sig.init = sqrt(t(y-X%*%Bhat)%*%(y-X%*%Bhat)/(n-k)) } sigma2 = priorSig$sig.init # Create X.star and Y.star, eq 14.25 Gelman p 384 X.star = rbind(X, diag(k)) Y.star = as.matrix(c(y,priorB$B0)) # These matrices are zeros to fill in the components of Sigma.star zeros.UR = matrix(0,n,k) zeros.LL = matrix(0,k,n) if(is.null(priorB$sigmaB) == TRUE || dim(as.matrix(priorB$sigmaB))[1] != k || any(is.na(priorB$sigmaB)==TRUE)==TRUE) priorB$sigmaB = rep(1,k) if(is.matrix(priorB$sigmaB)==TRUE) diag.priorB = (priorB$sigmaB) else diag.priorB = diag(priorB$sigmaB) ################################################################# # THIS IS THE BURN-IN PHASE ################################################################# invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB)))) rndchi2 = rchisq(n=MCMCpars$n.burn*MCMCpars$n.thin, df=n0plusnk) # take advantage of vectors, draw all the Chi2 draws we'll need for(i in 1:(MCMCpars$n.burn*MCMCpars$n.thin)) { # Y | B, sig2 ~ norm(XB, sig2*I) # sig2 | y ~ InvX2(n0+n+k, (n0*sig20+(n+k)*s2)/(n0+n+k) # n+k is length of Y.star y.min.XB = Y.star - X.star%*%B # residuals s2 = t(y.min.XB)%*%invSigma.star%*%y.min.XB/(n.min.k) # eqn 14.18 # draw x ~ chi2 (v) RV, v*s2/x ~ InvChi2(v, s2) # Inv Chi2, Appendix, p580 sigma2 = (n0sig20+(nplusk)*s2)/rndchi2[i] # p384 # Multivariate distribution of B | sigma2, y # MVN (Bhat, Vb) Bhat = inv(t(X)*X) *t(X)*y, Vb = inv(t(X)*X) # We can build the star matrices (14.25), and then plug them in as known covariances (14.11-14.12) # this is for all cases, EW commented out the next 2 lines for speed in the independent case # invSigma.star = chol2inv(chol(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB)))) # avoid inverting the matrix because we can assume the data & pars are independent invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB)))) # Use cholesky decomp to avoid calculating inverse Vb = chol2inv(chol(t(X.star)%*%invSigma.star%*%X.star)) # eqn 14.3 or 14.11 Bhat = Vb%*%t(X.star)%*%invSigma.star%*%Y.star # 14.12 B = rmvnorm.chol(rnorm(k,0,1),Bhat,Vb*c(sigma2)) # 14.3 } ################################################################# # THIS IS THE SAMPLING PHASE ################################################################# c = 1 rndchi2 = rchisq(n=MCMCpars$n.iter*MCMCpars$n.thin, df=n0plusnk) # take advantage of vectors, draw all the Chi2 draws we'll need B.post = matrix(0,MCMCpars$n.iter,k) sigma2.post = 0 for(i in 1:MCMCpars$n.iter) { # Nested for loops avoid the if or modulus statements - makes speed 30-50% faster for(j in 1:MCMCpars$n.thin) { # Y | B, sig2 ~ norm(XB, sig2*I) # sig2 | y ~ InvX2(n0+n+k, (n0*sig20+(n+k)*s2)/(n0+n+k) # n+k is length of Y.star y.min.XB = Y.star - X.star%*%B # residuals s2 = t(y.min.XB)%*%invSigma.star%*%y.min.XB/(n.min.k) # eqn 14.18 # draw x ~ chi2 (v) RV, v*s2/x ~ InvChi2(v, s2) # Inv Chi2, Appendix, p580 sigma2 = (n0sig20+(nplusk)*s2)/rndchi2[c] # p384 c = c+1 # Multivariate distribution of B | sigma2, y # MVN (Bhat, Vb) Bhat = inv(t(X)*X) *t(X)*y, Vb = inv(t(X)*X) # We can build the star matrices (14.25), and then plug them in as known covariances (14.11-14.12) # this is for all cases, EW commented out the next 2 lines for speed in the independent case # invSigma.star = chol2inv(chol(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB)))) # avoid inverting the matrix because we can assume the data & pars are independent invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB)))) # Use cholesky decomp to avoid calculating inverse Vb = chol2inv(chol(t(X.star)%*%invSigma.star%*%X.star)) # eqn 14.3 or 14.11 Bhat = Vb%*%t(X.star)%*%invSigma.star%*%Y.star # 14.12 B = rmvnorm.chol(rnorm(k,0,1),Bhat,Vb*c(sigma2)) # 14.3 } # Store the parameter draws B.post[i,] = B sigma2.post[i] = sigma2 } ################################################################# # THIS IS THE BAYES FACTOR CALCULATION / PARAMETER SUMMARY PHASE ################################################################# B.mode = 0 for(i in 1:k) { d = density(B.post[,i]) B.mode[i] = d$x[which(d$y==max(d$y))] # Calculate the mode } d = density(sqrt(sigma2.post)) sigma.mode = d$x[which(d$y==max(d$y))] # Calculate the mode sigma2.mode <- sigma.mode^2 # calculate the log likelihood at the posterior mode logLike = sum(dnorm(x = y, mean = X%*%B.mode, sd = sigma.mode,log=T)) # calculate the log likelihood # calculate log prior for sigma2, alternatively could estimate mode for tau and use gamma logPrior = dinvchi2.log(x = sigma.mode^2,shape = priorSig$n0, scale = priorSig$sig20) # Appendix A logPrior = logPrior + dmvnorm(x=B.mode, mean = priorB$B0, sigma=diag.priorB, log=TRUE) # log(posterior) is broken down into two points, # Posterior(B*,sigma*|Y) = Posterior(B*|sigma*,Y)*Posterior(sigma*|Y) # See Chib, section 2.1.2 or maybe a better explanation in Carlin & Louis, p 209 # Posterior(B*|sigma*,Y) is directly available: sigma2 <- sigma2.mode invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB)))) Vb = chol2inv(chol(t(X.star)%*%invSigma.star%*%X.star)) # eqn 14.3 or 14.11 Bhat = Vb%*%t(X.star)%*%invSigma.star%*%Y.star # 14.12 logPost.B <- dmvnorm(x=B.mode, mean = Bhat, sigma=Vb*c(sigma2), log=TRUE) logPost.sigma <- 0 for(i in 1:MCMCpars$n.iter) { # Rao-Blackwell approximation, Carlin & Louis, p 209 eqn 6.1.6 # Calculate Posterior(sigma*|Y) as average over P(sigma*|B[i],Y) y.min.XB = Y.star - X.star%*%B.post[i,] # residuals invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2.mode),zeros.UR),cbind(zeros.LL,diag.priorB)))) s2 = t(y.min.XB)%*%invSigma.star%*%y.min.XB/(n.min.k) # eqn 14.18 logPost.sigma[i] <- dinvchi2(x = sigma2.mode, shape = n0plusnk, scale = s2) } logPost.sigma <- log(mean(logPost.sigma)) # marginal likelihood - essentially log [p(model | data) ] # Chib refers to it as BMI, basic marginal likelihood identity BFML = logLike + logPrior - logPost.B - logPost.sigma # Chib, section 2.1.2 # return data frame of saved values post = as.data.frame(cbind(B.post,sqrt(sigma2.post))) for(i in 1:k) names(post)[i] = paste("B",i-1,sep="") names(post)[k+1] = "sigma" print(summary(mcmc(post))) # summarize output to console print(paste("Log(Bayes Factor): ",BFML,sep=""),quote=FALSE) ############ THIS IS TO MAKE OPTIONAL PLOTS if(makePlots == TRUE) { #par(ask=T) plot(mcmc(post)) par(mfrow=c(2,2)) # make 2 x 2 panel plots of priors & posteriors #num.plots = ceil((k+1)/4) Bpriors <- matrix(0,20000,k) for(i in 1:20000) Bpriors[i,] = rmvnorm.chol(rnorm(k,0,1),priorB$B0, diag.priorB) for(i in 1:k) { # draw a sample from the prior variance to plot post = B.post[,i] prior = Bpriors[,i] #min.x = min(c(sigma2.post)); max.x = max(c(sigma2.post)) min.x = min(post,prior); max.x = max(post,prior) d.prior = density(prior); d.prior$y = d.prior$y/sum(d.prior$y) d.post = density(post); d.post$y = d.post$y/sum(d.post$y) #min.x = min(d.post$x[which(d.post$y > median(d.post$y))]) #max.x = max(d.post$x[which(d.post$y > median(d.post$y))]) #d.like = min.y = 0.05*max(d.prior$y,d.post$y); max.y = max(1.10*max(d.prior$y,d.post$y),0.01+max(d.prior$y,d.post$y)) plot(d.prior$x,d.prior$y,main="",ylab="Density",xlab=paste("B",i-1,sep=""),xlim=c(min.x,max.x),ylim=c(min.y,max.y),type="l",col="blue",lwd=2) #lines(d.like$x,d.like$y,col="red",lwd=2) lines(d.post$x,d.post$y,lwd=2,col="purple") text(d.prior$x[which(d.prior$y==max(d.prior$y))], 0.004+max(d.prior$y),expression(paste("P(",theta,")")),cex=0.7) #text(d.like$x[which(d.like$y==max(d.like$y))], 0.006+max(d.like$y),expression(paste("L(Y|",theta,")"))) text(d.post$x[which(d.post$y==max(d.post$y))], 0.004+max(d.post$y),expression(paste("P(",theta,"|Y)")),cex=0.7) } # draw a sample from the prior variance to plot prior = log((priorSig$n0*priorSig$sig20)/rchisq(100000,df=priorSig$n0)) post = sigma2.post #min.x = min(c(sigma2.post)); max.x = max(c(sigma2.post)) min.x = min(post,prior); max.x = max(post,prior) d.prior = density(prior,from=0); d.prior$y = d.prior$y/sum(d.prior$y) d.post = density(post,from=0); d.post$y = d.post$y/sum(d.post$y) #min.x = min(d.post$x[which(d.post$y > median(d.post$y))]) #max.x = max(d.post$x[which(d.post$y > median(d.post$y))]) #d.like = min.y = 0.05*max(d.prior$y,d.post$y); max.y = max(1.10*max(d.prior$y,d.post$y),0.01+max(d.prior$y,d.post$y)) plot(d.prior$x,d.prior$y,main="",ylab="Density",xlab=expression(sigma[2]),xlim=c(min.x,max.x),ylim=c(min.y,max.y),type="l",col="blue",lwd=2) #lines(d.like$x,d.like$y,col="red",lwd=2) lines(d.post$x,d.post$y,lwd=2,col="purple") text(d.prior$x[which(d.prior$y==max(d.prior$y))], 0.004+max(d.prior$y),expression(paste("P(",theta,")")),cex=0.7) #text(d.like$x[which(d.like$y==max(d.like$y))], 0.006+max(d.like$y),expression(paste("L(Y|",theta,")"))) text(d.post$x[which(d.post$y==max(d.post$y))], 0.004+max(d.post$y),expression(paste("P(",theta,"|Y)")),cex=0.7) } list(saved = post, B.mode = B.mode,sigma.mode=sigma.mode,LogBayesFac=BFML) } ############################################################################### # Function for doing variable at a time (vat) gibbs sampling for univariate normal # linear regression, with conjugate priors ############################################################################### univariate.vat <- function(y, X, priorB = list(B0=NA, sigmaB=NA,B.init=NA), priorSig = list(form="shape-scale",n0=NA,sig20=NA,mu=NA,variance=NA,sig.init=NA), MCMCpars=list(n.burn=1000,n.iter=10000,n.thin=1),makePlots=FALSE) { # y is the data, can be passed in as a vector or matrix # X is a matrix of covariates, including column of 1s for intercept # priorB is an optional list containing: # B0, vector of prior means (defaults to 0) # sigmaB, vector / matrix of prior covariances (defaults to diagonal) # B.init, vector of initial values (defaults to 0) # priorSig is an optional list containing: # form, either "shape-scale" (default) or "mean-var" # if the prior shape-scale (default): # n0, prior shape parameter: how many data points is this prior worth? (defaults to 1) # sig20, prior scale parameter (defaults to 1) # if the prior mean-var: # mu, prior mean parameter # variance, prior var parameter # sig.init, initial value (defaults to MLE) # MCMCpars is an optional list containing: # n.burn, period of burn in (true length = burn * thin) # n.iter, number of samples to save after the burn # n.thin, thinning interval # makePlots is T/F, whether to plot the log(BF) and MCMC object, default = F # the "form" argument of the priorSig list allows the user to optionally specify the prior for sigma2 # in terms of the mean and the variance if(priorSig$form=="mean-var") { # then mean and var need to be translated into shape / scale priorSig$n0 = (2 * priorSig$mu^2)/priorSig$variance + 4 # shape parameter priorSig$sig20 = (priorSig$mu)*(priorSig$n0-2)/priorSig$n0 # scale parameter } n = length(y) # number of observations k = dim(X)[2] # number of estimated pars, not including sig2 # The next 4 variables are just constants to be used later n.min.k = n-k n0plusnk = priorSig$n0 + (n + k) nplusk = n+k n0sig20 = priorSig$n0+priorSig$sig20 #if(is.na(sigmaY)==TRUE) sigmaY = diag(n) else sigmaY = priorSig$sigmaY # if sigmaY is not passed in, use identity matrix Qy = diag(n) # this is scaling cov matrix, sigmaY = Qy * sigma2 # Make sure that the priors for B have been specified correctly if(is.null(priorB$B0) == TRUE || length(priorB$B0) != k || any(is.na(priorB$B0)==TRUE)) priorB$B0 = rep(0,k) # default prior has mean 0 if(is.null(priorB$B.init) == TRUE || length(priorB$B.init) != k || any(is.na(priorB$B.init)==TRUE)) priorB$B.init = rep(0,k) B = priorB$B.init # Assign B to the initial guesses # Make sure that the priors for sigma2 have been specified correctly #if(priorSig$n0 <= 4) stop("Error: increase shape parameter for sigma2 prior") #if(priorSig$sig20 <= 0) stop("Error: increase scale parameter for sigma2 prior") if(is.null(priorSig$sig.init) == TRUE || is.na(priorSig$sig.init) == TRUE) { Bhat = chol2inv(chol(t(X)%*%X))%*%t(X)%*%y priorSig$sig.init = sqrt(t(y-X%*%Bhat)%*%(y-X%*%Bhat)/(n-k)) } sigma2 = priorSig$sig.init # Create X.star and Y.star, eq 14.25 Gelman p 384 X.star = rbind(X, diag(k)) Y.star = as.matrix(c(y,priorB$B0)) # These matrices are zeros to fill in the components of Sigma.star zeros.UR = matrix(0,n,k) zeros.LL = matrix(0,k,n) if(is.null(priorB$sigmaB) == TRUE || dim(as.matrix(priorB$sigmaB))[1] != k || any(is.na(priorB$sigmaB)==TRUE)==TRUE) priorB$sigmaB = rep(1,k) if(is.matrix(priorB$sigmaB)==TRUE) diag.priorB = (priorB$sigmaB) else diag.priorB = diag(priorB$sigmaB) ################################################################# # THIS IS THE BURN-IN PHASE ################################################################# invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB)))) rndchi2 = rchisq(n=MCMCpars$n.burn*MCMCpars$n.thin, df=n0plusnk) # take advantage of vectors, draw all the Chi2 draws we'll need for(i in 1:(MCMCpars$n.burn*MCMCpars$n.thin)) { # Y | B, sig2 ~ norm(XB, sig2*I) # sig2 | y ~ InvX2(n0+n+k, (n0*sig20+(n+k)*s2)/(n0+n+k) # n+k is length of Y.star y.min.XB = Y.star - X.star%*%B # residuals s2 = t(y.min.XB)%*%invSigma.star%*%y.min.XB/(n.min.k) # eqn 14.18 # draw x ~ chi2 (v) RV, v*s2/x ~ InvChi2(v, s2) # Inv Chi2, Appendix, p580 sigma2 = (n0sig20+(nplusk)*s2)/rndchi2[i] # p384 # Multivariate distribution of B | sigma2, y # MVN (Bhat, Vb) Bhat = inv(t(X)*X) *t(X)*y, Vb = inv(t(X)*X) # We can build the star matrices (14.25), and then plug them in as known covariances (14.11-14.12) # this is for all cases, EW commented out the next 2 lines for speed in the independent case # invSigma.star = chol2inv(chol(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB)))) # avoid inverting the matrix because we can assume the data & pars are independent invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB)))) # Use cholesky decomp to avoid calculating inverse Vb = chol2inv(chol(t(X.star)%*%invSigma.star%*%X.star)) # eqn 14.3 or 14.11 Bhat = Vb%*%t(X.star)%*%invSigma.star%*%Y.star # 14.12 # Do variable at a time gibbs sampling Vb.inv = chol2inv(chol(Vb*c(sigma2))) # calculate inverese of cov matrix ind.vars = (1/diag(Vb.inv)) # this is used in the var argument of A.2, and in sq brackets on line above C = diag(k)-diag(ind.vars)%*%Vb.inv # calculate conditional coefficients, eqn A.2 in Appendix for(j in 1:k) { # sample the conditional univariate distribution, A.2 in Appendix # Mean can be computed several ways: 2nd method preferred for speed # mu[j] = Bhat[j] + Vb.sigma2[j,-j]%*%chol2inv(chol(Vb.sigma2[-j,-j]))%*%as.matrix((B[-j]-Bhat[-j])) # mu[j] = Bhat[j] + C[j,]%*%as.matrix(B-Bhat) # Variance can be computed several ways: 2nd method preferred for speed # var[j] = Vb.sigma2[j,j] - Vb.sigma2[j,-j]%*%chol2inv(chol(Vb.sigma2[-j,-j]))%*%Vb.sigma2[-j,j] # var[j] = ind.vars[j] B[j] = rnorm(1, Bhat[j] + C[j,]%*%as.matrix(B-Bhat), sqrt(ind.vars[j])) } } ################################################################# # THIS IS THE SAMPLING PHASE ################################################################# c = 1 rndchi2 = rchisq(n=MCMCpars$n.iter*MCMCpars$n.thin, df=n0plusnk) # take advantage of vectors, draw all the Chi2 draws we'll need B.post = matrix(0,MCMCpars$n.iter,k) sigma2.post = 0 for(i in 1:MCMCpars$n.iter) { # Nested for loops avoid the if or modulus statements - makes speed 30-50% faster for(j in 1:MCMCpars$n.thin) { # Y | B, sig2 ~ norm(XB, sig2*I) # sig2 | y ~ InvX2(n0+n+k, (n0*sig20+(n+k)*s2)/(n0+n+k) # n+k is length of Y.star y.min.XB = Y.star - X.star%*%B # residuals s2 = t(y.min.XB)%*%invSigma.star%*%y.min.XB/(n.min.k) # eqn 14.18 # draw x ~ chi2 (v) RV, v*s2/x ~ InvChi2(v, s2) # Inv Chi2, Appendix, p580 sigma2 = (n0sig20+(nplusk)*s2)/rndchi2[c] # p384 c = c+1 # Multivariate distribution of B | sigma2, y # MVN (Bhat, Vb) Bhat = inv(t(X)*X) *t(X)*y, Vb = inv(t(X)*X) # We can build the star matrices (14.25), and then plug them in as known covariances (14.11-14.12) # this is for all cases, EW commented out the next 2 lines for speed in the independent case # invSigma.star = chol2inv(chol(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB)))) # avoid inverting the matrix because we can assume the data & pars are independent invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB)))) # Use cholesky decomp to avoid calculating inverse Vb = chol2inv(chol(t(X.star)%*%invSigma.star%*%X.star)) # eqn 14.3 or 14.11 Bhat = Vb%*%t(X.star)%*%invSigma.star%*%Y.star # 14.12 # Do variable at a time gibbs sampling Vb.inv = chol2inv(chol(Vb*c(sigma2))) # calculate inverese of cov matrix ind.vars = (1/diag(Vb.inv)) # this is used in the var argument of A.2, and in sq brackets on line above C = diag(k)-diag(ind.vars)%*%Vb.inv # calculate conditional coefficients, eqn A.2 in Appendix for(j in 1:k) { # sample the conditional univariate distribution, A.2 in Appendix # Mean can be computed several ways: 2nd method preferred for speed # mu[j] = Bhat[j] + Vb.sigma2[j,-j]%*%chol2inv(chol(Vb.sigma2[-j,-j]))%*%as.matrix((B[-j]-Bhat[-j])) # mu[j] = Bhat[j] + C[j,]%*%as.matrix(B-Bhat) # Variance can be computed several ways: 2nd method preferred for speed # var[j] = Vb.sigma2[j,j] - Vb.sigma2[j,-j]%*%chol2inv(chol(Vb.sigma2[-j,-j]))%*%Vb.sigma2[-j,j] # var[j] = ind.vars[j] B[j] = rnorm(1, Bhat[j] + C[j,]%*%as.matrix(B-Bhat), sqrt(ind.vars[j])) } } # Store the parameter draws B.post[i,] = B sigma2.post[i] = sigma2 } ################################################################# # THIS IS THE BAYES FACTOR CALCULATION / PARAMETER SUMMARY PHASE ################################################################# B.mode = 0 for(i in 1:k) { d = density(B.post[,i]) B.mode[i] = d$x[which(d$y==max(d$y))] # Calculate the mode } d = density(sqrt(sigma2.post)) sigma.mode = d$x[which(d$y==max(d$y))] # Calculate the mode sigma2.mode <- sigma.mode^2 # calculate the log likelihood at the posterior mode logLike = sum(dnorm(x = y, mean = X%*%B.mode, sd = sigma.mode,log=T)) # calculate the log likelihood # calculate log prior for sigma2, alternatively could estimate mode for tau and use gamma logPrior = dinvchi2.log(x = sigma.mode^2,shape = priorSig$n0, scale = priorSig$sig20) # Appendix A logPrior = logPrior + dmvnorm(x=B.mode, mean = priorB$B0, sigma=diag.priorB, log=TRUE) # log(posterior) is broken down into two points, # Posterior(B*,sigma*|Y) = Posterior(B*|sigma*,Y)*Posterior(sigma*|Y) # See Chib, section 2.1.2 or maybe a better explanation in Carlin & Louis, p 209 # Posterior(B*|sigma*,Y) is directly available: sigma2 <- sigma2.mode invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2),zeros.UR),cbind(zeros.LL,diag.priorB)))) Vb = chol2inv(chol(t(X.star)%*%invSigma.star%*%X.star)) # eqn 14.3 or 14.11 Bhat = Vb%*%t(X.star)%*%invSigma.star%*%Y.star # 14.12 logPost.B <- dmvnorm(x=B.mode, mean = Bhat, sigma=Vb*c(sigma2), log=TRUE) logPost.sigma <- 0 for(i in 1:MCMCpars$n.iter) { # Rao-Blackwell approximation, Carlin & Louis, p 209 eqn 6.1.6 # Calculate Posterior(sigma*|Y) as average over P(sigma*|B[i],Y) y.min.XB = Y.star - X.star%*%B.post[i,] # residuals invSigma.star = diag(1/diag(rbind(cbind(Qy * c(sigma2.mode),zeros.UR),cbind(zeros.LL,diag.priorB)))) s2 = t(y.min.XB)%*%invSigma.star%*%y.min.XB/(n.min.k) # eqn 14.18 logPost.sigma[i] <- dinvchi2(x = sigma2.mode, shape = n0plusnk, scale = s2) } logPost.sigma <- log(mean(logPost.sigma)) # marginal likelihood - essentially log [p(model | data) ] # Chib refers to it as BMI, basic marginal likelihood identity BFML = logLike + logPrior - logPost.B - logPost.sigma # Chib, section 2.1.2 # return data frame of saved values post = as.data.frame(cbind(B.post,sqrt(sigma2.post))) for(i in 1:k) names(post)[i] = paste("B",i-1,sep="") names(post)[k+1] = "sigma" print(summary(mcmc(post))) # summarize output to console print(paste("Log(Bayes Factor): ",BFML,sep=""),quote=FALSE) ############ THIS IS TO MAKE OPTIONAL PLOTS if(makePlots == TRUE) { #par(ask=T) plot(mcmc(post)) par(mfrow=c(2,2)) # make 2 x 2 panel plots of priors & posteriors #num.plots = ceil((k+1)/4) Bpriors <- matrix(0,20000,k) for(i in 1:20000) Bpriors[i,] = rmvnorm.chol(rnorm(k,0,1),priorB$B0, diag.priorB) for(i in 1:k) { # draw a sample from the prior variance to plot post = B.post[,i] prior = Bpriors[,i] #min.x = min(c(sigma2.post)); max.x = max(c(sigma2.post)) min.x = min(post,prior); max.x = max(post,prior) d.prior = density(prior); d.prior$y = d.prior$y/sum(d.prior$y) d.post = density(post); d.post$y = d.post$y/sum(d.post$y) #min.x = min(d.post$x[which(d.post$y > median(d.post$y))]) #max.x = max(d.post$x[which(d.post$y > median(d.post$y))]) #d.like = min.y = 0.05*max(d.prior$y,d.post$y); max.y = max(1.10*max(d.prior$y,d.post$y),0.01+max(d.prior$y,d.post$y)) plot(d.prior$x,d.prior$y,main="",ylab="Density",xlab=paste("B",i-1,sep=""),xlim=c(min.x,max.x),ylim=c(min.y,max.y),type="l",col="blue",lwd=2) #lines(d.like$x,d.like$y,col="red",lwd=2) lines(d.post$x,d.post$y,lwd=2,col="purple") text(d.prior$x[which(d.prior$y==max(d.prior$y))], 0.004+max(d.prior$y),expression(paste("P(",theta,")")),cex=0.7) #text(d.like$x[which(d.like$y==max(d.like$y))], 0.006+max(d.like$y),expression(paste("L(Y|",theta,")"))) text(d.post$x[which(d.post$y==max(d.post$y))], 0.004+max(d.post$y),expression(paste("P(",theta,"|Y)")),cex=0.7) } # draw a sample from the prior variance to plot prior = log((priorSig$n0*priorSig$sig20)/rchisq(100000,df=priorSig$n0)) post = sigma2.post #min.x = min(c(sigma2.post)); max.x = max(c(sigma2.post)) min.x = min(post,prior); max.x = max(post,prior) d.prior = density(prior,from=0); d.prior$y = d.prior$y/sum(d.prior$y) d.post = density(post,from=0); d.post$y = d.post$y/sum(d.post$y) #min.x = min(d.post$x[which(d.post$y > median(d.post$y))]) #max.x = max(d.post$x[which(d.post$y > median(d.post$y))]) #d.like = min.y = 0.05*max(d.prior$y,d.post$y); max.y = max(1.10*max(d.prior$y,d.post$y),0.01+max(d.prior$y,d.post$y)) plot(d.prior$x,d.prior$y,main="",ylab="Density",xlab=expression(sigma[2]),xlim=c(min.x,max.x),ylim=c(min.y,max.y),type="l",col="blue",lwd=2) #lines(d.like$x,d.like$y,col="red",lwd=2) lines(d.post$x,d.post$y,lwd=2,col="purple") text(d.prior$x[which(d.prior$y==max(d.prior$y))], 0.004+max(d.prior$y),expression(paste("P(",theta,")")),cex=0.7) #text(d.like$x[which(d.like$y==max(d.like$y))], 0.006+max(d.like$y),expression(paste("L(Y|",theta,")"))) text(d.post$x[which(d.post$y==max(d.post$y))], 0.004+max(d.post$y),expression(paste("P(",theta,"|Y)")),cex=0.7) } list(saved = post, B.mode = B.mode,sigma.mode=sigma.mode,LogBayesFac=BFML) }