#### R code for Crab example #### Poisson Regression Model for Count Data #### Vanja Dukic, U of Colorado at Boulder, 2019 #### based on the Crab code at Penn State Science crab=read.table("~/HOME/TEACHING/4590/S2020/week13_Poisson/code/crab.txt") colnames(crab)=c("Obs","Color","Spine","Width","Weight","Satelites") attach(crab) #### The intercept only model model1=glm(Satelites~1, family=poisson(link=log)) summary(model) #### This model implies the expected number of satellites #### per each crab is the same #### in this case: E(Satelites)=2.919=exp(1.0713) model1$fitted #### Poisson Regression of Satelites on Width model2=glm(Satelites~1+Width,family=poisson(link=log)) summary(model2) anova(model1, model2) #### Get the expected count for each observation: #### e.g. for the first observation E(y1)=\hat mu = 3.810 muhat2 = model2$fitted crab = data.frame(crab,pred1=model1$fitted,pred2=model2$fitted) attach(crab) #### note the linear predictor values #### equal the fitted values #### when exponentiated #### e.g., for the first observation, exp(1.3378)=3.810 model2$linear.predictors exp(model2$linear.predictors) sum(model2$fitted - exp(model2$linear.predictors)) #### Interpretation of the slope which is statistically significant here #### e.g., exp(0.1640)=1.18 #### as the width increases by one unit, the number of satilies will #### increase (positive sign of the coef), #### it will be multiplied by 1.18 #### e.g., for Width=26 and Width=25, first for all values #### then for specific rows model$fitted[Width==26]/model$fitted[Width==25] model$fitted[2]/model$fitted[6] #### Several types of residuals # residuals(object, type = c("deviance", "pearson", "working", # "response", "partial"), ...) # type: the type of residuals which should be returned. The # alternatives are: ‘"deviance"’ (default), ‘"pearson"’, # ‘"working"’, ‘"response"’, and ‘"partial"’. Can be # abbreviated. # Deviance residuals resDs.2 = rstandard(model2) resD.2 = resid(model2) # Note, this equals to the residual deviance of the model: sum(res2.2^2) # [1] 567.8786 deviance(model2) # [1] 567.8786 # Pearson residuals resP.2 = resid(model2,type = 'pearson') resPs.2 = rstandard(model2, type='pearson') # Pearson residuals by hand: resP.2.hand = (Sat - pred2)/sqrt(pred2) # Standardized Pearson residuals by hand resPs.2.hand = resP.2.hand/sqrt(1-hatvalues(model2)) # Response (raw) residuals resR.2 = residuals(model2, type='response') resR.2.hand = Sat-model2$fitted # a way to get a quick look at the diagnostics and # patterns of these residuals plot(model2) plot(Width, model2$fitted) plot(Width, log(model2$fitted)) plot(Width,resR.2) plot(Width,resP.2) plot(Width,resDs.2) #### Based on the residual deviance the model does NOT fit well 1-pchisq(model$deviance, model$df.residual) #### creating a scatter plot of Satelites vs. Width plot(Width,Satelites) identify(Width, Satelites) #### click on the plot to identify individual values #### identified on the screen and the plot, \#48,101,165 #### Diagnostics measures (like in logistic regression) #### But these work for ungrouped data too, #### as long as there is a variable with counts #### You can do many more but here are a few indicating a lack of fit influence(model2) names(influence(model2)) # "hat" "coefficients" "sigma" "dev.res" "pear.res" plot(influence(model2)$pear.res) plot(model$linear.predictors, residuals(model, type="pearson")) #### To predict a new value newdt=data.frame(Width=26.3) predict.glm(model, type="response", newdata=newdt)