#### R code for Crab example
#### Poisson Regression Model for Count Data
#### Vanja Dukic, U of Colorado at Boulder, 2019
#### based on the Crab code at Penn State Science


crab=read.table("~/HOME/TEACHING/4590/S2020/week13_Poisson/code/crab.txt")
colnames(crab)=c("Obs","Color","Spine","Width","Weight","Satelites")
attach(crab)

#### The intercept only model 

model1=glm(Satelites~1, family=poisson(link=log))
summary(model)

#### This model implies the expected number of satellites 
#### per each crab is the same
#### in this case: E(Satelites)=2.919=exp(1.0713)
model1$fitted


#### Poisson Regression of Satelites on Width

model2=glm(Satelites~1+Width,family=poisson(link=log))
summary(model2)
anova(model1, model2)

#### Get the expected count for each observation: 
#### e.g. for the first observation E(y1)=\hat mu = 3.810
muhat2 =  model2$fitted

crab = data.frame(crab,pred1=model1$fitted,pred2=model2$fitted)
attach(crab)

#### note the linear predictor values 
#### equal the fitted values
#### when exponentiated 
#### e.g., for the first observation, exp(1.3378)=3.810

model2$linear.predictors
exp(model2$linear.predictors)
sum(model2$fitted - exp(model2$linear.predictors))


#### Interpretation of the slope which is statistically significant here
#### e.g., exp(0.1640)=1.18
#### as the width increases by one unit, the number of satilies will 
#### increase (positive sign of the coef), 
#### it will be multiplied by 1.18
#### e.g., for Width=26 and Width=25, first for all values
#### then for specific rows

model$fitted[Width==26]/model$fitted[Width==25]

model$fitted[2]/model$fitted[6]


#### Several types of residuals
#     residuals(object, type = c("deviance", "pearson", "working",
#                                "response", "partial"), ...)
#     type: the type of residuals which should be returned.  The
#          alternatives are: ‘"deviance"’ (default), ‘"pearson"’,
#          ‘"working"’, ‘"response"’, and ‘"partial"’.  Can be
#          abbreviated.

# Deviance residuals
resDs.2 = rstandard(model2)
resD.2 = resid(model2)

# Note, this equals to the residual deviance of the model:
sum(res2.2^2)
# [1] 567.8786

deviance(model2)
# [1] 567.8786


# Pearson residuals
resP.2 = resid(model2,type = 'pearson')
resPs.2 = rstandard(model2, type='pearson')

# Pearson residuals by hand:
resP.2.hand = (Sat - pred2)/sqrt(pred2)

# Standardized Pearson residuals by hand
resPs.2.hand = resP.2.hand/sqrt(1-hatvalues(model2))

# Response (raw) residuals
resR.2 = residuals(model2, type='response')
resR.2.hand = Sat-model2$fitted


# a way to get a quick look at the diagnostics and
# patterns of these residuals
plot(model2)
plot(Width, model2$fitted)
plot(Width, log(model2$fitted))
plot(Width,resR.2)
plot(Width,resP.2)
plot(Width,resDs.2)


#### Based on the residual deviance the model does NOT fit well

1-pchisq(model$deviance, model$df.residual)

#### creating a scatter plot of Satelites vs. Width

plot(Width,Satelites)
identify(Width, Satelites) 

#### click on the plot to identify individual values
#### identified on the screen and the plot, \#48,101,165

#### Diagnostics measures (like in logistic regression)
#### But these work for ungrouped data too, 
#### as long as there is a variable with counts
#### You can do many more but here are a few indicating a lack of fit

influence(model2)
names(influence(model2))
# "hat"  "coefficients" "sigma"        "dev.res"      "pear.res"    

plot(influence(model2)$pear.res)
plot(model$linear.predictors, residuals(model, type="pearson"))

#### To predict a new value

newdt=data.frame(Width=26.3)
predict.glm(model, type="response", newdata=newdt)