################################################################################################
## Code by Vanja Dukic, U of Colorado at Boulder
## Bayesian Statistics and Computing 2019
##
## LECTURE 9 - Beta Binomial model (a modified example based on Lafleur's notes)
################################################################################################

# A startup company A has a cool new product that they supply to company B, but it’s a bit unreliable.
# Company A has in the past sent two large shipments to company B. Among those, 10% were defective.
# This is considered the prior information.
#
# Now, company B wants to give company A one more chance, and orders a shipment of 50 units. The
# shipment arrives, and out of that shipment, only 1 unit is defective. 
# This is considered the data at hand.
#
# Encouraged, the company B is considering ordering a large shipment of size 500 in the future.

# (a) What would be the best frequentist estimate of the proportion of defective items 
# for this new shipment of size 500?

# Solution: The frequents estimate is based on the data at hand: 1 out of 50 defective.
x=1
n=50
phat.freq = x/n
# 0.02

# (b) What is the variance of the estimator you used in part a?
phat.freq.var = phat.freq*(1-phat.freq)/n 
# 0.000392
# st dev = 0.01979899


# (c) Let us  do a Bayesian analysis of this problem now. Propose a prior distribution 
# based on the available prior information.

# p ~ Beta(a,b)

x.grid = seq(0.001,0.999,by=0.001)

par(mfrow=c(3,1))


# pessimistic prior
a.pes = 5
b.pes = 20
a=a.pes
b=b.pes
plot(x.grid, dbeta(x.grid, a, b), 'l', col='red',xlab="p",ylab="Pessimistic beta prior",main="Pessimistic beta prior")


# realisic  prior
a.real = 10
b.real = 90
a=a.real
b=b.real
plot(x.grid, dbeta(x.grid, a, b), 'l', col='red',xlab="p",ylab="Realistic beta prior",main="Realistic beta prior")


# optimistic prior
a.opt = 2
b.opt = 28
a=a.opt
b=b.opt
plot(x.grid, dbeta(x.grid, a , b), 'l', col='red',xlab="p",ylab="Optimistic beta prior",main="Optimistic beta prior")


# (d) Derive the posterior density based on the data (1 out of 50 defective items) and the above prior

# X ~ Bin(50,p); x = 1
# p ~ Beta(a,b)
# (p|X=x) = p^x * (1-p)^(n-x) * p^(a-1) * (1-p)^(b-1)
#         = p^(x+a-1)  * (1-p)^(n-x+b-1)
# which is the Beta (x+a, n-x+b) density

a.post = x+a
b.post = n-x+b

x11()
par(mfrow=c(3,2))
# pessimistic prior
a.pes = 5
b.pes = 20
a=a.pes
b=b.pes
a.post = x+a
b.post = n-x+b
plot(x.grid, dbeta(x.grid, a, b), 'l', col='red',xlab="p",ylab="Pessimistic beta prior",main="Pessimistic beta prior")
plot(x.grid, dbeta(x.grid, a.post, b.post), 'l', col='red',xlab="p",ylab="Pessimistic beta prior",main="Pessimistic beta posterior")


# realisic  prior
a.real = 10
b.real = 90
a=a.real
b=b.real
a.post = x+a
b.post = n-x+b
plot(x.grid, dbeta(x.grid, a, b), 'l', col='red',xlab="p",ylab="Realistic beta prior",main="Realistic beta prior")
plot(x.grid, dbeta(x.grid, a.post, b.post), 'l', col='red',xlab="p",ylab="Realistic beta prior",main="Realistic beta posterior")


# optimistic prior
a.opt = 2
b.opt = 28
a=a.opt
b=b.opt
a.post = x+a
b.post = n-x+b
plot(x.grid, dbeta(x.grid, a , b), 'l', col='red',xlab="p",ylab="Optimistic beta prior",main="Optimistic beta prior")
plot(x.grid, dbeta(x.grid, a.post, b.post), 'l', col='red',xlab="p",ylab="Optimistic beta prior",main="Optimistic beta posterior")


# (e) Derive the posterior predictive distribution for the number of defective items 
# for a future sample of 500

# posterior predictive is given as 
# y|x = integral_0^1 of  f(y|p) g(p|X=x) dp
# where g(p|X=x) is the posterior given data at hand
# and f(y|p) is the pmf (or density) of the new data for a given p

# Nnew = 500
# integral_0^1  choose(Nnew,y) * p^y* (1-p)^(Nnew-y) * p^(x+a-1)  * (1-p)^(n-x+b-1)  = 
# integral_0^1  choose(Nnew,y) * p^(y + x + a - 1) *  (1-p)^(Nnew - y + n - x + b - 1)))  = 
# choose(Nnew,y) * beta(y + x + a , Nnew - y + n - x + b)

y = seq(0,500,by=1)
Nnew = 500

x11()
par(mfrow=c(3,3))

# pessimistic prior
a.pes = 5
b.pes = 20
a=a.pes
b=b.pes
a.post = x+a
b.post = n-x+b
post.pred.pes = choose(Nnew,y) * beta(y + x + a , Nnew - y + n - x + b)

plot(x.grid, dbeta(x.grid, a, b), 'l', col='red',xlab="p",ylab="Pessimistic beta prior",main="Pessimistic beta prior")
plot(x.grid, dbeta(x.grid, a.post, b.post), 'l', col='red',xlab="p",ylab="Pessimistic beta prior",main="Pessimistic beta posterior")
plot(y, post.pred.pes, 'l', col='red',xlab="p",ylab="Pessimistic beta prior",main="Pessimistic posterior predictive")


# realisic  prior
a.real = 10
b.real = 90
a=a.real
b=b.real
a.post = x+a
b.post = n-x+b
post.pred.real = choose(Nnew,y) * beta(y + x + a , Nnew - y + n - x + b)

plot(x.grid, dbeta(x.grid, a, b), 'l', col='red',xlab="p",ylab="Realistic beta prior",main="Realistic beta prior")
plot(x.grid, dbeta(x.grid, a.post, b.post), 'l', col='red',xlab="p",ylab="Realistic beta prior",main="Realistic beta posterior")
plot(y, post.pred.real, 'l', col='red',xlab="p",ylab="Realistic beta prior",main="Realistic posterior predictive")


# optimistic prior
a.opt = 2
b.opt = 28
a=a.opt
b=b.opt
a.post = x+a
b.post = n-x+b
post.pred.opt = choose(Nnew,y) * beta(y + x + a , Nnew - y + n - x + b)

plot(x.grid, dbeta(x.grid, a , b), 'l', col='red',xlab="p",ylab="Optimistic beta prior",main="Optimistic beta prior")
plot(x.grid, dbeta(x.grid, a.post, b.post), 'l', col='red',xlab="p",ylab="Optimistic beta prior",main="Optimistic beta posterior")
plot(y, post.pred.opt, 'l', col='red',xlab="p",ylab="Optimistic beta prior",main="Optimistic posterior predictive")


# (f) Plot the probability mass function derived above
# see above


# (g) Under the square error loss, find the Bayesian estimator of the number of 
# defective items for this new shipment of size 500
# 
# This is the posterior mean of the posterior predictive
# 

post.pred.mean.pes =  sum(post.pred.pes*y)/sum(post.pred.pes)

post.pred.mean.opt =  sum(post.pred.opt*y)/sum(post.pred.opt)

post.pred.mean.real =  sum(post.pred.real*y)/sum(post.pred.real)

c(post.pred.mean.pes, post.pred.mean.real, post.pred.mean.opt)
#  40.00000 36.66667 18.75000


# (h) Provide the actual estimate of the number of defective items for this 
# new shipment of size 500 using the estimator above

c(post.pred.mean.pes, post.pred.mean.real, post.pred.mean.opt)
#  40.00000 36.66667 18.75000


# (i) Provide the central 95% credible interval for the number of defective items 
# for this new shipment of size 500

post.pred.cci.pes =  cumsum(post.pred.pes)/sum(post.pred.pes)
lb.cci.pes = y[max(which(post.pred.cci.pes<=0.025))]
ub.cci.pes = y[min(which(post.pred.cci.pes>=0.975))]
c(lb.cci.pes,ub.cci.pes)
# 12, 78

post.pred.cci.real =  cumsum(post.pred.real)/sum(post.pred.real)
lb.cci.real = y[max(which(post.pred.cci.real<=0.025))]
ub.cci.real = y[min(which(post.pred.cci.real>=0.975))]
c(lb.cci.real,ub.cci.real)
# 15,63

post.pred.cci.opt =  cumsum(post.pred.opt)/sum(post.pred.opt)
lb.cci.opt = y[max(which(post.pred.cci.opt<=0.025))]
ub.cci.opt = y[min(which(post.pred.cci.opt>=0.975))]
c(lb.cci.opt,ub.cci.opt)
# 2,46

# (j) Provide the 95% highest probability density (HPD) credible interval 
# for the number of defective items for this new shipment of size 50


Cred=0.95

## NOTE THE VERSION BELOW PRODUCES SLIGHTLY WIDER INTERVALS 
## (A BIT ABOVE THE CRED LEVEl) 

post.pred.hpd.pes.sorted =  sort(post.pred.pes/sum(post.pred.pes),decreasing=FALSE, index.return=TRUE)
yindex.pes.complement=post.pred.hpd.pes.sorted$ix[which(cumsum(post.pred.hpd.pes.sorted$x)<(1-Cred))]
# note ix goes from 1 to 501, so shifted by 1 from y
yindex.pes=post.pred.hpd.pes.sorted$ix[-yindex.pes.complement]
post.pred.hpd.pes = sort(501-yindex.pes)
#check to see if it's really Cred level
sum(post.pred.pes[post.pred.hpd.pes]/sum(post.pred.pes))
# check to verify that it's a single interval
max(diff(sort(yindex.pes)))
# 1, so yes, can report a single interval
post.pred.hpd.pes.lb = min(post.pred.hpd.pes)
post.pred.hpd.pes.ub = max(post.pred.hpd.pes)
c(post.pred.hpd.pes.lb, post.pred.hpd.pes.ub)
# 11,73
#######################################


post.pred.hpd.real.sorted =  sort(post.pred.real/sum(post.pred.real),decreasing=FALSE, index.return=TRUE)
yindex.real.complement=post.pred.hpd.real.sorted$ix[which(cumsum(post.pred.hpd.real.sorted$x)<(1-Cred))]
# note ix goes from 1 to 501, so shifted by 1 from y
yindex.real=post.pred.hpd.real.sorted$ix[-yindex.real.complement]
post.pred.hpd.real = sort(501-yindex.real)
#check to see if it's really Cred level
sum(post.pred.real[post.pred.hpd.real]/sum(post.pred.real))
# check to verify that it's a single interval
max(diff(sort(yindex.real)))
# 1, so yes, can report a single interval
post.pred.hpd.real.lb = min(post.pred.hpd.real)
post.pred.hpd.real.ub = max(post.pred.hpd.real)
c(post.pred.hpd.real.lb, post.pred.hpd.real.ub)
# 15,61
#######################################


post.pred.hpd.opt.sorted =  sort(post.pred.opt/sum(post.pred.opt),decreasing=FALSE, index.return=TRUE)
yindex.opt.complement=post.pred.hpd.opt.sorted$ix[which(cumsum(post.pred.hpd.opt.sorted$x)<(1-Cred))]
# note ix goes from 1 to 501, so shifted by 1 from y
yindex.opt=post.pred.hpd.opt.sorted$ix[-yindex.opt.complement]
post.pred.hpd.opt = sort(501-yindex.opt)
#check to see if it's really Cred level
sum(post.pred.opt[post.pred.hpd.opt]/sum(post.pred.opt))
# check to verify that it's a single interval
max(diff(sort(yindex.opt)))
# 1, so yes, can report a single interval
post.pred.hpd.opt.lb = min(post.pred.hpd.opt)
post.pred.hpd.opt.ub = max(post.pred.hpd.opt)
c(post.pred.hpd.opt.lb, post.pred.hpd.opt.ub)
# 1,41
#######################################


## NOTE THE VERSION BELOW PRODUCES SLIGHTLY NARROWER INTERVALS 
## (A BIT BELOW THE CRED LEVEl) 

post.pred.hpd.pes.sorted =  sort(post.pred.pes/sum(post.pred.pes),decreasing=TRUE, index.return=TRUE)
# note ix goes from 1 to 501, so shifted by 1 from y
yindex.pes=post.pred.hpd.pes.sorted$ix[which(cumsum(post.pred.hpd.pes.sorted$x)<=Cred)]-1
# check to verify that it's a single interval
post.pred.hpd.pes = sort(yindex.pes)
max(diff(sort(yindex.pes)))
# 1, so yes, a single interval
post.pred.hpd.pes.lb = min(post.pred.hpd.pes)
post.pred.hpd.pes.ub = max(post.pred.hpd.pes)
c(post.pred.hpd.pes.lb, post.pred.hpd.pes.ub)
# 11,72


post.pred.hpd.real.sorted =  sort(post.pred.real/sum(post.pred.real),decreasing=TRUE, index.return=TRUE)
# note ix goes from 1 to 501, so shifted by 1 from y
yindex.real=post.pred.hpd.real.sorted$ix[which(cumsum(post.pred.hpd.real.sorted$x)<=Cred)]-1
# check to verify that it's a single interval
post.pred.hpd.real = sort(yindex.real)
max(diff(sort(yindex.real)))
# 1, so yes, a single interval
post.pred.hpd.real.lb = min(post.pred.hpd.real)
post.pred.hpd.real.ub = max(post.pred.hpd.real)
c(post.pred.hpd.real.lb, post.pred.hpd.real.ub)
# 15,60


post.pred.hpd.opt.sorted =  sort(post.pred.opt/sum(post.pred.opt),decreasing=TRUE, index.return=TRUE)
# note ix goes from 1 to 501, so shifted by 1 from y
yindex.opt=post.pred.hpd.opt.sorted$ix[which(cumsum(post.pred.hpd.opt.sorted$x)<=Cred)]-1
# check to verify that it's a single interval
post.pred.hpd.opt = sort(yindex.opt)
max(diff(sort(yindex.opt)))
# 1, so yes, a single interval
post.pred.hpd.opt.lb = min(post.pred.hpd.opt)
post.pred.hpd.opt.ub = max(post.pred.hpd.opt)
c(post.pred.hpd.opt.lb, post.pred.hpd.opt.ub)
# 1,40


# (k) Report the estimate and the 95% HPD for the analyses with 2 alternative priors; 
# one pessimistic and one optimistic
# see above