################################################################################################
## Code by Vanja Dukic, U of Colorado at Boulder
## Bayesian Statistics and Computing 2019
##
## LECTURE 14 - Monte Carlo and importance sampling  
################################################################################################

# Recall the Startup example:
# A startup company A has a cool new product that they supply to company B, but it’s a bit unreliable.
# Company A has in the past sent two large shipments to company B. Among those, 10% were defective.
# This is considered the prior information.
#
# Now, company B wants to give company A one more chance, and orders a shipment of 50 units. The
# shipment arrives, and out of that shipment, only 1 unit is defective. 
# This is considered the data at hand.
#
# Encouraged, the company B is considering ordering a large shipment of size 500 in the future.


# (a) Previously, we proposed 3 prior distributions based on the available prior information 
#     p ~ Beta(a,b)
#  then, we derived the posterior density based on the data (1 out of 50 defective items):
# X ~ Bin(50,p); x = 1
# p ~ Beta(a,b)
# (p|X=x) = p^x * (1-p)^(n-x) * p^(a-1) * (1-p)^(b-1)
#         = p^(x+a-1)  * (1-p)^(n-x+b-1)
# which is the Beta (x+a, n-x+b) density

x=1
n=50

x.grid = seq(0.001,0.999,by=0.001)

# pessimistic prior
a.pes = 5
b.pes = 20
a.post.pes = x+a.pes
b.post.pes = n-x+b.pes

# realisic  prior
a.real = 10
b.real = 90
a.post.real = x+a.real
b.post.real = n-x+b.real

# optimistic prior
a.opt = 2
b.opt = 28
a.post.opt = x+a.opt
b.post.opt = n-x+b.opt

x11()
par(mfrow=c(5,3))

#plot(x.grid, dbeta(x.grid, a.pes, b.pes), 'l', col='red',xlab="p",ylab="Pessimistic beta prior",main="Pessimistic beta prior")
#plot(x.grid, dbeta(x.grid, a.real, b.real), 'l', col='red',xlab="p",ylab="Realistic beta prior",main="Realistic beta prior")
#plot(x.grid, dbeta(x.grid, a.opt , b.opt), 'l', col='red',xlab="p",ylab="Optimistic beta prior",main="Optimistic beta prior")

plot(x.grid, dbeta(x.grid, a.post.pes, b.post.pes), 'l', col='red',xlab="p",ylab="Pessimistic beta prior",main="Pessimistic beta posterior")
plot(x.grid, dbeta(x.grid, a.post.real, b.post.real), 'l', col='red',xlab="p",ylab="Realistic beta prior",main="Realistic beta posterior")
plot(x.grid, dbeta(x.grid, a.post.opt, b.post.opt), 'l', col='red',xlab="p",ylab="Optimistic beta prior",main="Optimistic beta posterior")

# Instead of evaluating the density on the grid, as above, we could obtain 
# a sample from the posterior and plot a histogram

Nsample = 1000
psample.pes = rbeta(Nsample, a.post.pes, b.post.pes)
psample.real = rbeta(Nsample, a.post.real, b.post.real)
psample.opt = rbeta(Nsample, a.post.opt, b.post.opt)

hist(psample.pes,nclass=30,xlim=c(0,1), main='MC sample from pessimistic posterior')
hist(psample.real,nclass=30,xlim=c(0,1), main='MC sample from realistic posterior')
hist(psample.opt,nclass=30,xlim=c(0,1), main='MC sample from optimistic posterior')


# We then derived the posterior predictive distribution for the number of defective items 
# for a future sample of 500
# The posterior predictive is given as 
# y|x = integral_0^1 of  f(y|p) g(p|X=x) dp
# where g(p|X=x) is the posterior given data at hand
# and f(y|p) is the pmf (or density) of the new data for a given p
#
# Nnew = 500
# integral_0^1  choose(Nnew,y) * p^y* (1-p)^(Nnew-y) * p^(x+a-1)  * (1-p)^(n-x+b-1)  = 
# integral_0^1  choose(Nnew,y) * p^(y + x + a - 1) *  (1-p)^(Nnew - y + n - x + b - 1)))  = 
# choose(Nnew,y) * beta(y + x + a , Nnew - y + n - x + b)

y = seq(0,500,by=1)
Nnew = 500

post.pred.pes = choose(Nnew,y) * beta(y + x + a.pes , Nnew - y + n - x + b.pes)
plot(y, post.pred.pes, 'l', col='red',xlab="p",ylab="Pessimistic beta prior",main="Pessimistic posterior predictive")

post.pred.real = choose(Nnew,y) * beta(y + x + a.real , Nnew - y + n - x + b.real)
plot(y, post.pred.real, 'l', col='red',xlab="p",ylab="Realistic beta prior",main="Realistic posterior predictive")

post.pred.opt = choose(Nnew,y) * beta(y + x + a.opt , Nnew - y + n - x + b.opt)
plot(y, post.pred.opt, 'l', col='red',xlab="p",ylab="Optimistic beta prior",main="Optimistic posterior predictive")


# Instad of analytically evaluating the integral, we could obtain 
# a sample from the above posterior predictive densities
# Using Monte Carlo integration and method of composition 
#
# So now, we need to sample the integrand
# integral_0^1  choose(Nnew,y) * p^y* (1-p)^(Nnew-y) * p^(x+a-1)  * (1-p)^(n-x+b-1)  dp = 
#
# we do that by first sampling p from the posterior
# and then data y from the pdf of y given that p
#
# recall we already have samples from the posterior
# psample.pes 
# psample.real 
# psample.opt 


ynew.sample.pes = rbinom(Nsample,Nnew,psample.pes)
ynew.sample.real = rbinom(Nsample,Nnew,psample.real)
ynew.sample.opt = rbinom(Nsample,Nnew,psample.opt)

hist(ynew.sample.pes,nclass=30,xlim=c(0,500), main='MC sample from pessimistic post pred')
hist(ynew.sample.real,nclass=30,xlim=c(0,500), main='MC sample from realistic post pred')
hist(ynew.sample.opt,nclass=30,xlim=c(0,500), main='MC sample from optimistic post pred')


# We can also evaluate the mixture approximation to the above posterior predictive densities
# based on the Monte Carlo sample of p's
#
# integral_0^1  choose(Nnew,y) * p^y* (1-p)^(Nnew-y) * p^(x+a-1)  * (1-p)^(n-x+b-1)  dp  
# is approximated by
#  Y | y ~  1/Nsample  Sum_{i=1}^{i=Nsample} choose(Nnew,y) * p_i^y* (1-p_i)^(Nnew-y)
#
# We now evaluate the mixture approximation over the grid y:

Ymx = matrix(y,length(y),1)%*%matrix(1,1,Nsample)
post.pred.mix.pes = 1/Nsample  *  rowSums( dbinom(Ymx,Nnew,psample.pes) )
post.pred.mix.real = 1/Nsample * rowSums( dbinom(Ymx,Nnew,psample.real) )
post.pred.mix.opt = 1/Nsample  *  rowSums( dbinom(Ymx,Nnew,psample.opt) )

plot(y, post.pred.mix.pes, main='Mixture approx to pessimistic post pred')
plot(y, post.pred.mix.real, main='Mixture approx to realistic post pred')
plot(y, post.pred.mix.opt, main='Mixture approx to optimistic post pred')


# Recall the posterior predictive means and central and HPD intervals were:
# posterior predictive means:

# analytical:
post.pred.mean.pes =  sum(post.pred.pes*y)/sum(post.pred.pes)
post.pred.mean.opt =  sum(post.pred.opt*y)/sum(post.pred.opt)
post.pred.mean.real =  sum(post.pred.real*y)/sum(post.pred.real)
c(post.pred.mean.pes, post.pred.mean.real, post.pred.mean.opt)
#  40.00000 36.66667 18.75000

# the MC versions:
post.pred.mean.mc.pes =  mean(ynew.sample.pes)
post.pred.mean.mc.opt =  mean(ynew.sample.opt)
post.pred.mean.mc.real =  mean(ynew.sample.real)
c(post.pred.mean.mc.pes, post.pred.mean.mc.real, post.pred.mean.mc.opt)
# 39.982 36.688 18.947

# the Mixture approximation versions:
post.pred.mean.mix.pes =  sum(post.pred.mix.pes*y)/sum(post.pred.mix.pes)
post.pred.mean.mix.opt =  sum(post.pred.mix.opt*y)/sum(post.pred.mix.opt)
post.pred.mean.mix.real =  sum(post.pred.mix.real*y)/sum(post.pred.mix.real)
c(post.pred.mean.mix.pes, post.pred.mean.mix.real, post.pred.mean.mix.opt)
# 40.10235 36.84157 19.22061


### CREDIBLE INTERVALS 

Cred=0.95

#  The central 95% credible interval for the number of defective items for new shipment of 500
post.pred.cci.pes =  cumsum(post.pred.pes)/sum(post.pred.pes)
lb.cci.pes = y[max(which(post.pred.cci.pes<=(1-Cred)/2))]
ub.cci.pes = y[min(which(post.pred.cci.pes>=(1+Cred)/2))]
c(lb.cci.pes,ub.cci.pes)
# 12, 78

post.pred.cci.real =  cumsum(post.pred.real)/sum(post.pred.real)
lb.cci.real = y[max(which(post.pred.cci.real<=(1-Cred)/2 ))]
ub.cci.real = y[min(which(post.pred.cci.real>=(1+Cred)/2 ))]
c(lb.cci.real,ub.cci.real)
# 15,63

post.pred.cci.opt =  cumsum(post.pred.opt)/sum(post.pred.opt)
lb.cci.opt = y[max(which(post.pred.cci.opt<=(1-Cred)/2))]
ub.cci.opt = y[min(which(post.pred.cci.opt>=(1+Cred)/2 ))]
c(lb.cci.opt,ub.cci.opt)
# 2,46

# The 95% highest probability density (HPD) credible interval 
# for the number of defective items for this new shipment of size 50
#
## NOTE THE VERSION BELOW PRODUCES SLIGHTLY WIDER INTERVALS 
## (A BIT ABOVE THE CRED LEVEl) 

# PESSIMISTIC
post.pred.hpd.pes.sorted =  sort(post.pred.pes/sum(post.pred.pes),decreasing=FALSE, index.return=TRUE)
yindex.pes.complement=post.pred.hpd.pes.sorted$ix[which(cumsum(post.pred.hpd.pes.sorted$x)<(1-Cred))]
# note ix goes from 1 to 501, so shifted by 1 from y
yindex.pes=post.pred.hpd.pes.sorted$ix[-yindex.pes.complement]
post.pred.hpd.pes = sort(501-yindex.pes)
#check to see if it's really Cred level
sum(post.pred.pes[post.pred.hpd.pes]/sum(post.pred.pes))
# check to verify that it's a single interval
max(diff(sort(yindex.pes)))
# 1, so yes, can report a single interval
post.pred.hpd.pes.lb = min(post.pred.hpd.pes)
post.pred.hpd.pes.ub = max(post.pred.hpd.pes)
c(post.pred.hpd.pes.lb, post.pred.hpd.pes.ub)
# 11,73
#######################################


# REALISTIC
post.pred.hpd.real.sorted =  sort(post.pred.real/sum(post.pred.real),decreasing=FALSE, index.return=TRUE)
yindex.real.complement=post.pred.hpd.real.sorted$ix[which(cumsum(post.pred.hpd.real.sorted$x)<(1-Cred))]
# note ix goes from 1 to 501, so shifted by 1 from y
yindex.real=post.pred.hpd.real.sorted$ix[-yindex.real.complement]
post.pred.hpd.real = sort(501-yindex.real)
#check to see if it's really Cred level
sum(post.pred.real[post.pred.hpd.real]/sum(post.pred.real))
# check to verify that it's a single interval
max(diff(sort(yindex.real)))
# 1, so yes, can report a single interval
post.pred.hpd.real.lb = min(post.pred.hpd.real)
post.pred.hpd.real.ub = max(post.pred.hpd.real)
c(post.pred.hpd.real.lb, post.pred.hpd.real.ub)
# 15,61
#######################################


# OPTIMISTIC
post.pred.hpd.opt.sorted =  sort(post.pred.opt/sum(post.pred.opt),decreasing=FALSE, index.return=TRUE)
yindex.opt.complement=post.pred.hpd.opt.sorted$ix[which(cumsum(post.pred.hpd.opt.sorted$x)<(1-Cred))]
# note ix goes from 1 to 501, so shifted by 1 from y
yindex.opt=post.pred.hpd.opt.sorted$ix[-yindex.opt.complement]
post.pred.hpd.opt = sort(501-yindex.opt)
#check to see if it's really Cred level
sum(post.pred.opt[post.pred.hpd.opt]/sum(post.pred.opt))
# check to verify that it's a single interval
max(diff(sort(yindex.opt)))
# 1, so yes, can report a single interval
post.pred.hpd.opt.lb = min(post.pred.hpd.opt)
post.pred.hpd.opt.ub = max(post.pred.hpd.opt)
c(post.pred.hpd.opt.lb, post.pred.hpd.opt.ub)
# 1,41
#######################################


## NOTE THE VERSION BELOW PRODUCES SLIGHTLY NARROWER INTERVALS 
## (A BIT BELOW THE CRED LEVEl) 

# PESSIMISTIC
post.pred.hpd.pes.sorted =  sort(post.pred.pes/sum(post.pred.pes),decreasing=TRUE, index.return=TRUE)
# note ix goes from 1 to 501, so shifted by 1 from y
yindex.pes=post.pred.hpd.pes.sorted$ix[which(cumsum(post.pred.hpd.pes.sorted$x)<=Cred)]-1
# check to verify that it's a single interval
post.pred.hpd.pes = sort(yindex.pes)
max(diff(sort(yindex.pes)))
# 1, so yes, a single interval
post.pred.hpd.pes.lb = min(post.pred.hpd.pes)
post.pred.hpd.pes.ub = max(post.pred.hpd.pes)
c(post.pred.hpd.pes.lb, post.pred.hpd.pes.ub)
# 11,72


#REALISTIC
post.pred.hpd.real.sorted =  sort(post.pred.real/sum(post.pred.real),decreasing=TRUE, index.return=TRUE)
# note ix goes from 1 to 501, so shifted by 1 from y
yindex.real=post.pred.hpd.real.sorted$ix[which(cumsum(post.pred.hpd.real.sorted$x)<=Cred)]-1
# check to verify that it's a single interval
post.pred.hpd.real = sort(yindex.real)
max(diff(sort(yindex.real)))
# 1, so yes, a single interval
post.pred.hpd.real.lb = min(post.pred.hpd.real)
post.pred.hpd.real.ub = max(post.pred.hpd.real)
c(post.pred.hpd.real.lb, post.pred.hpd.real.ub)
# 15,60


# OPTIMISTIC
post.pred.hpd.opt.sorted =  sort(post.pred.opt/sum(post.pred.opt),decreasing=TRUE, index.return=TRUE)
# note ix goes from 1 to 501, so shifted by 1 from y
yindex.opt=post.pred.hpd.opt.sorted$ix[which(cumsum(post.pred.hpd.opt.sorted$x)<=Cred)]-1
# check to verify that it's a single interval
post.pred.hpd.opt = sort(yindex.opt)
max(diff(sort(yindex.opt)))
# 1, so yes, a single interval
post.pred.hpd.opt.lb = min(post.pred.hpd.opt)
post.pred.hpd.opt.ub = max(post.pred.hpd.opt)
c(post.pred.hpd.opt.lb, post.pred.hpd.opt.ub)
# 1,40


#######################################################################################
## Importance sampling tricks

# Imagine if we only did this analysis with the realistic prior
# And now we want to do a sensitivity analysis with two additional priors (pessimistic and optimistic)
# So I'm asking you to redo the above analyses using only the realistic analysis 
# and importance resampling
#
# Ok, imagine we only have
# psample.real 
# How do we get a sample from opt and pes?
# Answer: Simply reweight the sample at hand
# using weights determined as
# dbeta(psample.real, a.post.pes, b.post.pes) / dbeta(psample.real, a.post.real, b.post.real)
w.pes = dbeta(psample.real, a.post.pes, b.post.pes) / dbeta(psample.real, a.post.real, b.post.real)
w.opt = dbeta(psample.real, a.post.opt, b.post.opt) / dbeta(psample.real, a.post.real, b.post.real)

psample.pes.sir = sample(psample.real , Nsample, replace = TRUE, prob = w.pes)
psample.opt.sir = sample(psample.real , Nsample, replace = TRUE, prob = w.opt)

ynew.imp.sample.pes = rbinom(Nsample,Nnew,psample.pes)
ynew.imp.sample.opt = rbinom(Nsample,Nnew,psample.opt)

hist(ynew.imp.sample.pes,nclass=30,xlim=c(0,500), main='Importance  sample from pessimistic posterior predictive')
hist(ynew.sample.real,nclass=30,xlim=c(0,500), main='Importance sample from realistic posterior predictive')
hist(ynew.imp.sample.opt,nclass=30,xlim=c(0,500), main='Importance sample from optimistic posterior predictive')

# Similarly, you can use those to obtain the new posterior predictive means
# Recall, analytical:
#  40.00000 36.66667 18.75000
#
# the MC versions:
# 39.982 36.688 18.947
#
# the Mixture approximation versions:
# 40.10235 36.84157 19.22061

# The importance sample versions:
post.pred.mean.imp.pes =  mean(ynew.imp.sample.pes)
post.pred.mean.imp.opt =  mean(ynew.imp.sample.opt)
post.pred.mean.imp.real =  mean(ynew.sample.real)
c(post.pred.mean.imp.pes, post.pred.mean.imp.real, post.pred.mean.imp.opt)
# 40.147 36.688 19.252