# Examples for the correlated errors lectures (week 8)
# Written by: Vanja Dukic
# Date: March 4, 2014
# Simulations for the Runs and Durbin Watson tests
# Performance under the null hypothesis

require(tseries)

# preliminary setting (small sample chosen on purpose)
n1 = 5     # ones  (number of positive residuals)
n2 = 7     # zeros (number of negative residuals)
n = n1+n2  # total number of observations


# To get exactly n1 + and n1 -, we need something like:
# command sample(x, size, replace = FALSE, prob = NULL) 
# takes a sample of the specified size from the elements of
# ‘x’ using either with or without replacement.
 
xT = c(rep(0,n2),rep(1,n1))

K=1000
nruns = rep(0,K)
rtp = rep(0,K)
for (k in 1:K){
x = sample(xT)  # this gives a different arrangement of xT every time
nruns[k] = sum(abs(x[1:n-1] - x[2:n])) + 1
rt = runs.test(as.factor(x))
rtp[k] = rt$p
}


hist(nruns, nclass=20)
hist(rtp, nclass=10)

sum(rtp<=0.05)

nrunMo = mean(nruns)
nrunVo = var(nruns)

nrunMth = 1+ 2*n1*n2/n 
nrunVth = 2*n1*n2*(2*n1*n2 - n1 - n2)/(n^2 * (n1+n2-1))


########################################################################
### EXAMPLE: CONSUMER EXPENDITURE
## DATA P203

stock = P203
stock.lm = lm(Expenditure~Stock, data = stock)
summary(stock.lm)
#             Estimate Std. Error t value Pr(>|t|)    
#(Intercept) -154.7192    19.8500  -7.794 3.54e-07 ***
#Stock          2.3004     0.1146  20.080 8.99e-14 ***
#Multiple R-squared: 0.9573,	Adjusted R-squared: 0.9549 
#F-statistic: 403.2 on 1 and 18 DF,  p-value: 8.988e-14 

plot(rstudent(stock.lm))
abline(0,0,col='red')

Tres = as.factor(sign(rstudent(stock.lm)))

runs.test(Tres, alternative = 'two.sided') 
#Standard Normal = -2.6865, p-value = 0.007221


#######################################################################
# Simulation of runtest performance under Ho for regression residuals
# Using Y = a + X + e, e ~ N(0,sdE); X ~ N(0,sdX)

a=5
K=1000
sdX = 0.1
sdE = 1
nruns = rep(0,K)
rtp = rep(0,K)
npos = rep(0,K)
X = rnorm(n, mean = 0, sd = sdX)

for (k in 1:K){
e = rnorm(n, mean = 0, sd = sdE)
Y = a+X+e
temp.lm = lm(Y~X)
Tres = (sign(rstudent(temp.lm)))
Tres[Tres==-1]=0
npos[k]=sum(Tres)
nruns[k] = sum(abs(Tres[1:n-1] - Tres[2:n])) + 1
Tres = as.factor(sign(rstudent(temp.lm)))
rt = runs.test(Tres, alternative = 'two.sided') 
rtp[k] = rt$p
}

hist(nruns, nclass=10)
hist(rtp, nclass=10)
hist(npos,nclass=10)

sum(rtp<=0.05)
mean(npos)
mean(nruns)
var(nruns)

#Using approximate theory
nrunMth = 1+ 2*npos*(n-npos)/n 
nrunVth = 2*npos*(n-npos)*(2*npos*(n-npos) - n)/(n^2 * (n-1))

hist(nrunMth, nclass=10)
hist(nrunVth, nclass=10)


###############################################################

# DURBIN-WATSON
library(lmtest)
help(dwtest)
#     dwtest(formula, order.by = NULL, alternative = 
#        c("greater", "two.sided", "less"))

stock = P203
stock.lm = lm(Expenditure~Stock, data = stock)
summary(stock.lm)
dws = dwtest(stock.lm)
dws

#	Durbin-Watson test
#
#data:  stock.lm 
#DW = 0.3282, p-value = 2.303e-08
#alternative hypothesis: true autocorrelation is greater than 0 


##############################################
# Simulation of dw for regression under null
# Using Y = a + X + e, e ~ N(0,1); X ~ N(0,0.01)

dwp = rep(0,K)
X = rnorm(n, mean = 0, sd = sdX)

for (k in 1:K){
e = rnorm(n, mean = 0, sd = sdE)
Y = a+X+e
temp.lm = lm(Y~X)
dws = dwtest(temp.lm)
dwp[k] = dws$p
}

hist(dwp, nclass=10)
sum(dwp<=0.05)