Supplement

Computation of Credible Sets

In last lecture, Keith implemented a function to find the highest posterior density (HPD) credible set. Now, we compare it with the “naive” method to introduce the concept of computational efficiency. First, we generate some data according to Example 5.10 (see the handwritten notes):

set.seed(4010)
n = 20
x = rbinom(n, 1, .4)
prior = function(theta) {
  (3*dnorm(theta,.2,.03) 
   +2*dnorm(theta,.5,.07)
   +6*dnorm(theta,.9,.07))*(theta>=0)*(theta<=1)
}
posterior = function(theta, x) {
    n = length(x)
    s = sum(x)
    theta^s *(1-theta)^(n-s) *prior(theta)
}

Next, we slightly modified Keith’s implementation:

hpd_keith = function(x, posterior, support=c(0,1), n=301, alpha=0.005, plot=TRUE){
    # step 1: values of posterior at different values of theta in the support
    theta = seq(from=support[1], to=support[2], length=n)
    d = posterior(theta, x)
    d = d/sum(d)
    # step 2: find the theta that satisfy the credibility requirement 
    O = order(d, decreasing=TRUE)
    N = sum(cumsum(d[O])<1-alpha) +1
    selected = theta[O[1:N]]
    # step 3: plot
    if (plot) {
      plot(theta, d, type="l", lwd=2, col="red4",
           xlab=bquote(theta),
           ylab=bquote(symbol("\265")~pi(theta~"|"~italic(x[1:n]))))
      abline(v=selected, col="pink")
      abline(h=c(0,d[O[N]]), v=support, lty=2, lwd=.75)
    }
}
hpd_keith(x, posterior, alpha=0.05)

We also implement a “naive” method:

hpd_naive = function(x, posterior, support=c(0,1), n=301, alpha=0.005, plot=TRUE){
    # step 1: values of posterior at different values of theta in the support
    theta = seq(from=support[1], to=support[2], length=n)
    d = posterior(theta, x)
    d = d/sum(d)
    # step 2: find the theta that satisfy the credibility requirement 
    n = length(d)
    dDsc = sort(d, decreasing=TRUE)
    selected = rep(0, n)
    for (N in 1:n) {
      if (sum(dDsc[1:N]) > 1-alpha)
        break
    }
    cutoff = dDsc[N]
    # step 3: plot
    if (plot) {
      plot(theta, d, type="l", lwd=2, col="red4",
           xlab=bquote(theta),
           ylab=bquote(symbol("\265")~pi(theta~"|"~italic(x[1:n]))))
      abline(v=theta[which(d>cutoff)], col="pink")
      abline(h=c(0,cutoff), v=support, lty=2, lwd=.75)
    }
}
hpd_naive(x, posterior, alpha=0.05)

Finally, we compare their speed:

library(microbenchmark)
speed = microbenchmark(keith=hpd_keith(x, posterior, n=10000, plot=FALSE),
                       naive=hpd_naive(x, posterior, n=10000, plot=FALSE))
print(speed)
## Unit: milliseconds
##   expr     min       lq      mean  median      uq     max neval
##  keith  5.0879  5.23225  5.487325  5.2832  5.4324  7.7350   100
##  naive 48.5869 50.22255 55.010239 51.2785 54.6437 85.1875   100

We can see that Keith’s implementation, which utilized the highly optimized cumsum, is faster than naively searching the cutoff. Theoretically, Keith’s method runs in \(O(n \log n)\) time (due to ordering) while the naive method runs in \(O(n^2)\) time (due to for loop with sum).

By using the concept of time complexity, we can also explain why equal tailed (ET) credible sets can be found more efficiently (in \(O(n)\) time) as sorting is not needed. Let’s try it in R:

et = function(x, posterior, support=c(0,1), n=301, alpha=0.005, plot=TRUE){
    # step 1: values of posterior at different values of theta in the support
    theta = seq(from=support[1], to=support[2], length=n)
    d = posterior(theta, x)
    d = d/sum(d)
    # step 2: find the theta that satisfy the credibility requirement 
    csd = cumsum(d)
    L = sum(csd < alpha/2)
    U = sum(csd < 1-alpha/2)
    # step 3: plot
    if (plot) {
      plot(theta, d, type="l", lwd=2, col="red4",
           xlab=bquote(theta),
           ylab=bquote(pi(theta~"|"~italic(x[1:n]))))
      abline(v=theta[L:U], col="pink")
      abline(v=c(support,L,U), lty=2, lwd=.75)
    }
}
et(x, posterior, alpha=0.05)

We also compare their speed:

library(microbenchmark)
speed = microbenchmark(hpd=hpd_keith(x, posterior, n=10000, plot=FALSE),
                       et=et(x, posterior, n=10000, plot=FALSE))
print(speed)
## Unit: milliseconds
##  expr    min     lq     mean  median     uq     max neval
##   hpd 5.1966 5.4896 6.791105 6.08265 7.2393 14.0774   100
##    et 4.7908 5.1707 6.245704 5.75670 6.7996 14.4915   100