## LECTURE 2 - EXAMPLES OF DENSITIES, LIKELIHOODS, AND MLEs ##


## NORMAL DENSITY EXAMPLE
mu = 0
sd = 1
xgrid = seq(-5,5,by=0.1)
xdensity = dnorm(xgrid,mu,sd)
plot(xgrid, xdensity, 'l', lwd=2,col='red', main = 'Normal(0,1) density')
# See help(plot) for other plotting options


#RANDOM SAMPLE of size n from Normal
n = 10
X = rnorm(n,mu,sd)

# add sampled points to the density plot
par(new=TRUE)
points(X,rep(0,n))


# Likelihood of mu (assumes unknown mu, known sd)
LikNorm = function(m) {
exp(sum(dnorm(X,m,sd,log=TRUE))) }


# Negative log-likelihood of mu (assumes unknown mu, known sd)
NeglogLikNorm = function(m) {
-sum(dnorm(X,m,sd,log=TRUE)) }


# Plot these two functions
par(mfrow=c(1,2))
plot(xgrid,apply(t(t(xgrid)),1,LikNorm), 'l', lwd = 2, col='blue', xlab = expression(mu), ylab = 'Likelihood')
plot(xgrid,apply(t(t(xgrid)),1,NeglogLikNorm), 'l', lwd = 2, col='blue', xlab = expression(mu), ylab = 'Negative Log likelihood')


### FIND THE MLE: OPTIMIZATION
nrep=100
MLEsO = rep(0,nrep)
MLEsN = rep(0,nrep)
MLEvO = rep(0,nrep)
MLEvN = rep(0,nrep)


for(i in 1:nrep)
{
# generate repeated samples
n = 10
X = rnorm(n,mu,sd)

# optimize using 2 different R functions
tempOptim = optim(0.01,neglogLikNorm,method='L-BFGS-B',lower=-10,upper=10,hessian=TRUE)
tempNLM = nlm(neglogLikNorm,0.01,hessian=TRUE)

# store the results
MLEsO[i] = tempOptim$par
MLEsN[i] = tempNLM$estimate
MLEvO[i] = 1/tempOptim$hessian
MLEvN[i] = 1/tempNLM$hessian
}

# plot the histogram of MLE estimates, and asymptotic density
xgrid = seq(-1,1,by=0.01)
xdensity = dnorm(xgrid,mean(0),sd=sqrt(.1),log=FALSE)
par(mfrow=c(1,2))
hist(MLEsO, freq=FALSE); par(new=TRUE); plot(xgrid, xdensity, 'l', xlab='', ylab='',lwd=2,col='red', axes=FALSE)
hist(MLEsN, freq=FALSE); par(new=TRUE); plot(xgrid, xdensity, 'l', lwd=2,col='red', xlab='', ylab='', axes=FALSE)

# compute MSE
BiasO =  mean(MLEsO)  - 0
BiasN = mean(MLEsN) - 0
VarO = var(MLEsO)
VarN= var(MLEsN)

MSEO= BiasO^2 + VarO
MSEN= BiasN^2 + VarN


################################################
####### BINOMIAL EXAMPLE

p=0.5

#RANDOM SAMPLE of size n from Binomial
n = 100
Y = rbinom(n,1,p)

xgrid = seq(0.01,0.99,by=0.01)
xdensity = dbeta(xgrid,sum(Y),n-sum(Y))
plot(xgrid, xdensity, 'l', lwd=2,col='red', main = 'Beta density')

LikBeta = function(p) {
dbeta(p,sum(Y),n-sum(Y)) }

NeglogLikBeta = function(p) {
-dbeta(p,sum(Y),n-sum(Y),log=TRUE) }


par(mfrow=c(1,2))
plot(xgrid,apply(t(t(xgrid)),1,LikBeta), 'l', lwd = 2, col='blue', xlab = expression(p), ylab = 'Likelihood')
plot(xgrid,apply(t(t(xgrid)),1,NeglogLikBeta), 'l', lwd = 2, col='blue', xlab = expression(p), ylab = 'Negative Log likelihood')


### OPTIMIZATION
nrep=100
bMLEsO = rep(0,nrep)
bMLEsN = rep(0,nrep)
bMLEvO = rep(0,nrep)
bMLEvN = rep(0,nrep)


for(i in 1:nrep)
{
p=0.5
n = 100
Y = rbinom(n,1,p)
tempOptim = optim(0.01,NeglogLikBeta,method='L-BFGS-B',lower=0.00001,upper=0.999999,hessian=TRUE)
tempNLM = nlm(NeglogLikBeta,0.01,hessian=TRUE)
bMLEsO[i] = tempOptim$par
bMLEsN[i] = tempNLM$estimate
bMLEvO[i] = 1/tempOptim$hessian
bMLEvN[i] = 1/tempNLM$hessian
}

xgrid = seq(0,1,by=0.01)
xdensity = dnorm(xgrid,p,sd=sqrt(p*(1-p)/n),log=FALSE)
par(mfrow=c(1,2))
hist(bMLEsO, freq=FALSE,xlim=c(0,1)); par(new=TRUE); plot(xgrid, xdensity, 'l', xlab='', ylab='',lwd=2,col='red', axes=FALSE)
hist(bMLEsN, freq=FALSE,xlim=c(0,1)); par(new=TRUE); plot(xgrid, xdensity, 'l', lwd=2,col='red', xlab='', ylab='', axes=FALSE)

# MSE
BiasO =  mean(bMLEsO)  - 0
BiasN = mean(bMLEsN) - 0
VarO = var(bMLEsO)
VarN= var(bMLEsN)

MSEO= BiasO^2 + VarO
MSEN= BiasN^2 + VarN


##########################################################


### CHANGE OF MEASURE
# Say instead of X we'd like to work with exp(X)
# This is called a transformation, or more generally a change of variables 
# In probability and analysis, this is called change of measure
#

# Normal N(0,1) and logNormal(0,1)
# density and histogram of 1000 samples from N(0,1)
n = 1000
mu = 0
sd = 1
xgrid = seq(-5,5,by=0.1)
xdensity = dnorm(xgrid,mu,sd)
X = rnorm(n,mu,sd)

par(mfrow=c(1,2))
plot(xgrid, xlim=range(X), xdensity, 'l', lwd=2,col='red', xlab='X', main = 'Normal(0,1) density')
par(new=TRUE);
hist(X, xlim=range(X), freq=FALSE,main='', xlab='', ylab='');  


# lognormal (exp transform)
plot(exp(xgrid), xlim=c(0,20), dlnorm(exp(xgrid)), 'l', lwd=2,col='red', xlab='exp(X)', main = 'LogNormal(0,1) density')
par(new=TRUE);
hist(exp(X), xlim=c(0,20), nclass=10, freq=FALSE,main='', xlab='', ylab='');  


# Normal N(0,1) and ChiSquare(1)
# density and histogram of 1000 samples from N(0,1)
x11()
par(mfrow=c(1,2))
plot(xgrid, xlim=range(X), xdensity, 'l', lwd=2,col='red', xlab='X', main = 'Normal(0,1) density')
par(new=TRUE);
hist(X, xlim=range(X), freq=FALSE,main='', xlab='', ylab='');  


# chi square (square transform)
plot((xgrid^2), xlim=c(0,20), dchisq((xgrid)^2,1), 'l', lwd=2,col='red', xlab='X^2', main = 'Chi Square(1) density')
par(new=TRUE);
hist((X^2), xlim=c(0,20), nclass=10, freq=FALSE,main='', xlab='', ylab='');