################################################################################################ ## Code by Vanja Dukic, U of Colorado at Boulder ## Bayesian Statistics and Computing 2019 ## ## LECTURE 14 - Monte Carlo and importance sampling ################################################################################################ # Recall the Startup example: # A startup company A has a cool new product that they supply to company B, but it’s a bit unreliable. # Company A has in the past sent two large shipments to company B. Among those, 10% were defective. # This is considered the prior information. # # Now, company B wants to give company A one more chance, and orders a shipment of 50 units. The # shipment arrives, and out of that shipment, only 1 unit is defective. # This is considered the data at hand. # # Encouraged, the company B is considering ordering a large shipment of size 500 in the future. # (a) Previously, we proposed 3 prior distributions based on the available prior information # p ~ Beta(a,b) # then, we derived the posterior density based on the data (1 out of 50 defective items): # X ~ Bin(50,p); x = 1 # p ~ Beta(a,b) # (p|X=x) = p^x * (1-p)^(n-x) * p^(a-1) * (1-p)^(b-1) # = p^(x+a-1) * (1-p)^(n-x+b-1) # which is the Beta (x+a, n-x+b) density x=1 n=50 x.grid = seq(0.001,0.999,by=0.001) # pessimistic prior a.pes = 5 b.pes = 20 a.post.pes = x+a.pes b.post.pes = n-x+b.pes # realisic prior a.real = 10 b.real = 90 a.post.real = x+a.real b.post.real = n-x+b.real # optimistic prior a.opt = 2 b.opt = 28 a.post.opt = x+a.opt b.post.opt = n-x+b.opt x11() par(mfrow=c(5,3)) #plot(x.grid, dbeta(x.grid, a.pes, b.pes), 'l', col='red',xlab="p",ylab="Pessimistic beta prior",main="Pessimistic beta prior") #plot(x.grid, dbeta(x.grid, a.real, b.real), 'l', col='red',xlab="p",ylab="Realistic beta prior",main="Realistic beta prior") #plot(x.grid, dbeta(x.grid, a.opt , b.opt), 'l', col='red',xlab="p",ylab="Optimistic beta prior",main="Optimistic beta prior") plot(x.grid, dbeta(x.grid, a.post.pes, b.post.pes), 'l', col='red',xlab="p",ylab="Pessimistic beta prior",main="Pessimistic beta posterior") plot(x.grid, dbeta(x.grid, a.post.real, b.post.real), 'l', col='red',xlab="p",ylab="Realistic beta prior",main="Realistic beta posterior") plot(x.grid, dbeta(x.grid, a.post.opt, b.post.opt), 'l', col='red',xlab="p",ylab="Optimistic beta prior",main="Optimistic beta posterior") # Instead of evaluating the density on the grid, as above, we could obtain # a sample from the posterior and plot a histogram Nsample = 1000 psample.pes = rbeta(Nsample, a.post.pes, b.post.pes) psample.real = rbeta(Nsample, a.post.real, b.post.real) psample.opt = rbeta(Nsample, a.post.opt, b.post.opt) hist(psample.pes,nclass=30,xlim=c(0,1), main='MC sample from pessimistic posterior') hist(psample.real,nclass=30,xlim=c(0,1), main='MC sample from realistic posterior') hist(psample.opt,nclass=30,xlim=c(0,1), main='MC sample from optimistic posterior') # We then derived the posterior predictive distribution for the number of defective items # for a future sample of 500 # The posterior predictive is given as # y|x = integral_0^1 of f(y|p) g(p|X=x) dp # where g(p|X=x) is the posterior given data at hand # and f(y|p) is the pmf (or density) of the new data for a given p # # Nnew = 500 # integral_0^1 choose(Nnew,y) * p^y* (1-p)^(Nnew-y) * p^(x+a-1) * (1-p)^(n-x+b-1) = # integral_0^1 choose(Nnew,y) * p^(y + x + a - 1) * (1-p)^(Nnew - y + n - x + b - 1))) = # choose(Nnew,y) * beta(y + x + a , Nnew - y + n - x + b) y = seq(0,500,by=1) Nnew = 500 post.pred.pes = choose(Nnew,y) * beta(y + x + a.pes , Nnew - y + n - x + b.pes) plot(y, post.pred.pes, 'l', col='red',xlab="p",ylab="Pessimistic beta prior",main="Pessimistic posterior predictive") post.pred.real = choose(Nnew,y) * beta(y + x + a.real , Nnew - y + n - x + b.real) plot(y, post.pred.real, 'l', col='red',xlab="p",ylab="Realistic beta prior",main="Realistic posterior predictive") post.pred.opt = choose(Nnew,y) * beta(y + x + a.opt , Nnew - y + n - x + b.opt) plot(y, post.pred.opt, 'l', col='red',xlab="p",ylab="Optimistic beta prior",main="Optimistic posterior predictive") # Instad of analytically evaluating the integral, we could obtain # a sample from the above posterior predictive densities # Using Monte Carlo integration and method of composition # # So now, we need to sample the integrand # integral_0^1 choose(Nnew,y) * p^y* (1-p)^(Nnew-y) * p^(x+a-1) * (1-p)^(n-x+b-1) dp = # # we do that by first sampling p from the posterior # and then data y from the pdf of y given that p # # recall we already have samples from the posterior # psample.pes # psample.real # psample.opt ynew.sample.pes = rbinom(Nsample,Nnew,psample.pes) ynew.sample.real = rbinom(Nsample,Nnew,psample.real) ynew.sample.opt = rbinom(Nsample,Nnew,psample.opt) hist(ynew.sample.pes,nclass=30,xlim=c(0,500), main='MC sample from pessimistic post pred') hist(ynew.sample.real,nclass=30,xlim=c(0,500), main='MC sample from realistic post pred') hist(ynew.sample.opt,nclass=30,xlim=c(0,500), main='MC sample from optimistic post pred') # We can also evaluate the mixture approximation to the above posterior predictive densities # based on the Monte Carlo sample of p's # # integral_0^1 choose(Nnew,y) * p^y* (1-p)^(Nnew-y) * p^(x+a-1) * (1-p)^(n-x+b-1) dp # is approximated by # Y | y ~ 1/Nsample Sum_{i=1}^{i=Nsample} choose(Nnew,y) * p_i^y* (1-p_i)^(Nnew-y) # # We now evaluate the mixture approximation over the grid y: Ymx = matrix(y,length(y),1)%*%matrix(1,1,Nsample) post.pred.mix.pes = 1/Nsample * rowSums( dbinom(Ymx,Nnew,psample.pes) ) post.pred.mix.real = 1/Nsample * rowSums( dbinom(Ymx,Nnew,psample.real) ) post.pred.mix.opt = 1/Nsample * rowSums( dbinom(Ymx,Nnew,psample.opt) ) plot(y, post.pred.mix.pes, main='Mixture approx to pessimistic post pred') plot(y, post.pred.mix.real, main='Mixture approx to realistic post pred') plot(y, post.pred.mix.opt, main='Mixture approx to optimistic post pred') # Recall the posterior predictive means and central and HPD intervals were: # posterior predictive means: # analytical: post.pred.mean.pes = sum(post.pred.pes*y)/sum(post.pred.pes) post.pred.mean.opt = sum(post.pred.opt*y)/sum(post.pred.opt) post.pred.mean.real = sum(post.pred.real*y)/sum(post.pred.real) c(post.pred.mean.pes, post.pred.mean.real, post.pred.mean.opt) # 40.00000 36.66667 18.75000 # the MC versions: post.pred.mean.mc.pes = mean(ynew.sample.pes) post.pred.mean.mc.opt = mean(ynew.sample.opt) post.pred.mean.mc.real = mean(ynew.sample.real) c(post.pred.mean.mc.pes, post.pred.mean.mc.real, post.pred.mean.mc.opt) # 39.982 36.688 18.947 # the Mixture approximation versions: post.pred.mean.mix.pes = sum(post.pred.mix.pes*y)/sum(post.pred.mix.pes) post.pred.mean.mix.opt = sum(post.pred.mix.opt*y)/sum(post.pred.mix.opt) post.pred.mean.mix.real = sum(post.pred.mix.real*y)/sum(post.pred.mix.real) c(post.pred.mean.mix.pes, post.pred.mean.mix.real, post.pred.mean.mix.opt) # 40.10235 36.84157 19.22061 ### CREDIBLE INTERVALS Cred=0.95 # The central 95% credible interval for the number of defective items for new shipment of 500 post.pred.cci.pes = cumsum(post.pred.pes)/sum(post.pred.pes) lb.cci.pes = y[max(which(post.pred.cci.pes<=(1-Cred)/2))] ub.cci.pes = y[min(which(post.pred.cci.pes>=(1+Cred)/2))] c(lb.cci.pes,ub.cci.pes) # 12, 78 post.pred.cci.real = cumsum(post.pred.real)/sum(post.pred.real) lb.cci.real = y[max(which(post.pred.cci.real<=(1-Cred)/2 ))] ub.cci.real = y[min(which(post.pred.cci.real>=(1+Cred)/2 ))] c(lb.cci.real,ub.cci.real) # 15,63 post.pred.cci.opt = cumsum(post.pred.opt)/sum(post.pred.opt) lb.cci.opt = y[max(which(post.pred.cci.opt<=(1-Cred)/2))] ub.cci.opt = y[min(which(post.pred.cci.opt>=(1+Cred)/2 ))] c(lb.cci.opt,ub.cci.opt) # 2,46 # The 95% highest probability density (HPD) credible interval # for the number of defective items for this new shipment of size 50 # ## NOTE THE VERSION BELOW PRODUCES SLIGHTLY WIDER INTERVALS ## (A BIT ABOVE THE CRED LEVEl) # PESSIMISTIC post.pred.hpd.pes.sorted = sort(post.pred.pes/sum(post.pred.pes),decreasing=FALSE, index.return=TRUE) yindex.pes.complement=post.pred.hpd.pes.sorted$ix[which(cumsum(post.pred.hpd.pes.sorted$x)<(1-Cred))] # note ix goes from 1 to 501, so shifted by 1 from y yindex.pes=post.pred.hpd.pes.sorted$ix[-yindex.pes.complement] post.pred.hpd.pes = sort(501-yindex.pes) #check to see if it's really Cred level sum(post.pred.pes[post.pred.hpd.pes]/sum(post.pred.pes)) # check to verify that it's a single interval max(diff(sort(yindex.pes))) # 1, so yes, can report a single interval post.pred.hpd.pes.lb = min(post.pred.hpd.pes) post.pred.hpd.pes.ub = max(post.pred.hpd.pes) c(post.pred.hpd.pes.lb, post.pred.hpd.pes.ub) # 11,73 ####################################### # REALISTIC post.pred.hpd.real.sorted = sort(post.pred.real/sum(post.pred.real),decreasing=FALSE, index.return=TRUE) yindex.real.complement=post.pred.hpd.real.sorted$ix[which(cumsum(post.pred.hpd.real.sorted$x)<(1-Cred))] # note ix goes from 1 to 501, so shifted by 1 from y yindex.real=post.pred.hpd.real.sorted$ix[-yindex.real.complement] post.pred.hpd.real = sort(501-yindex.real) #check to see if it's really Cred level sum(post.pred.real[post.pred.hpd.real]/sum(post.pred.real)) # check to verify that it's a single interval max(diff(sort(yindex.real))) # 1, so yes, can report a single interval post.pred.hpd.real.lb = min(post.pred.hpd.real) post.pred.hpd.real.ub = max(post.pred.hpd.real) c(post.pred.hpd.real.lb, post.pred.hpd.real.ub) # 15,61 ####################################### # OPTIMISTIC post.pred.hpd.opt.sorted = sort(post.pred.opt/sum(post.pred.opt),decreasing=FALSE, index.return=TRUE) yindex.opt.complement=post.pred.hpd.opt.sorted$ix[which(cumsum(post.pred.hpd.opt.sorted$x)<(1-Cred))] # note ix goes from 1 to 501, so shifted by 1 from y yindex.opt=post.pred.hpd.opt.sorted$ix[-yindex.opt.complement] post.pred.hpd.opt = sort(501-yindex.opt) #check to see if it's really Cred level sum(post.pred.opt[post.pred.hpd.opt]/sum(post.pred.opt)) # check to verify that it's a single interval max(diff(sort(yindex.opt))) # 1, so yes, can report a single interval post.pred.hpd.opt.lb = min(post.pred.hpd.opt) post.pred.hpd.opt.ub = max(post.pred.hpd.opt) c(post.pred.hpd.opt.lb, post.pred.hpd.opt.ub) # 1,41 ####################################### ## NOTE THE VERSION BELOW PRODUCES SLIGHTLY NARROWER INTERVALS ## (A BIT BELOW THE CRED LEVEl) # PESSIMISTIC post.pred.hpd.pes.sorted = sort(post.pred.pes/sum(post.pred.pes),decreasing=TRUE, index.return=TRUE) # note ix goes from 1 to 501, so shifted by 1 from y yindex.pes=post.pred.hpd.pes.sorted$ix[which(cumsum(post.pred.hpd.pes.sorted$x)<=Cred)]-1 # check to verify that it's a single interval post.pred.hpd.pes = sort(yindex.pes) max(diff(sort(yindex.pes))) # 1, so yes, a single interval post.pred.hpd.pes.lb = min(post.pred.hpd.pes) post.pred.hpd.pes.ub = max(post.pred.hpd.pes) c(post.pred.hpd.pes.lb, post.pred.hpd.pes.ub) # 11,72 #REALISTIC post.pred.hpd.real.sorted = sort(post.pred.real/sum(post.pred.real),decreasing=TRUE, index.return=TRUE) # note ix goes from 1 to 501, so shifted by 1 from y yindex.real=post.pred.hpd.real.sorted$ix[which(cumsum(post.pred.hpd.real.sorted$x)<=Cred)]-1 # check to verify that it's a single interval post.pred.hpd.real = sort(yindex.real) max(diff(sort(yindex.real))) # 1, so yes, a single interval post.pred.hpd.real.lb = min(post.pred.hpd.real) post.pred.hpd.real.ub = max(post.pred.hpd.real) c(post.pred.hpd.real.lb, post.pred.hpd.real.ub) # 15,60 # OPTIMISTIC post.pred.hpd.opt.sorted = sort(post.pred.opt/sum(post.pred.opt),decreasing=TRUE, index.return=TRUE) # note ix goes from 1 to 501, so shifted by 1 from y yindex.opt=post.pred.hpd.opt.sorted$ix[which(cumsum(post.pred.hpd.opt.sorted$x)<=Cred)]-1 # check to verify that it's a single interval post.pred.hpd.opt = sort(yindex.opt) max(diff(sort(yindex.opt))) # 1, so yes, a single interval post.pred.hpd.opt.lb = min(post.pred.hpd.opt) post.pred.hpd.opt.ub = max(post.pred.hpd.opt) c(post.pred.hpd.opt.lb, post.pred.hpd.opt.ub) # 1,40 ####################################################################################### ## Importance sampling tricks # Imagine if we only did this analysis with the realistic prior # And now we want to do a sensitivity analysis with two additional priors (pessimistic and optimistic) # So I'm asking you to redo the above analyses using only the realistic analysis # and importance resampling # # Ok, imagine we only have # psample.real # How do we get a sample from opt and pes? # Answer: Simply reweight the sample at hand # using weights determined as # dbeta(psample.real, a.post.pes, b.post.pes) / dbeta(psample.real, a.post.real, b.post.real) w.pes = dbeta(psample.real, a.post.pes, b.post.pes) / dbeta(psample.real, a.post.real, b.post.real) w.opt = dbeta(psample.real, a.post.opt, b.post.opt) / dbeta(psample.real, a.post.real, b.post.real) psample.pes.sir = sample(psample.real , Nsample, replace = TRUE, prob = w.pes) psample.opt.sir = sample(psample.real , Nsample, replace = TRUE, prob = w.opt) ynew.imp.sample.pes = rbinom(Nsample,Nnew,psample.pes) ynew.imp.sample.opt = rbinom(Nsample,Nnew,psample.opt) hist(ynew.imp.sample.pes,nclass=30,xlim=c(0,500), main='Importance sample from pessimistic posterior predictive') hist(ynew.sample.real,nclass=30,xlim=c(0,500), main='Importance sample from realistic posterior predictive') hist(ynew.imp.sample.opt,nclass=30,xlim=c(0,500), main='Importance sample from optimistic posterior predictive') # Similarly, you can use those to obtain the new posterior predictive means # Recall, analytical: # 40.00000 36.66667 18.75000 # # the MC versions: # 39.982 36.688 18.947 # # the Mixture approximation versions: # 40.10235 36.84157 19.22061 # The importance sample versions: post.pred.mean.imp.pes = mean(ynew.imp.sample.pes) post.pred.mean.imp.opt = mean(ynew.imp.sample.opt) post.pred.mean.imp.real = mean(ynew.sample.real) c(post.pred.mean.imp.pes, post.pred.mean.imp.real, post.pred.mean.imp.opt) # 40.147 36.688 19.252