edu = P228 edu.lm = lm(ACHV ~ FAM + PEER + SCHOOL, data = edu) summary(edu.lm) # Estimate Std. Error t value Pr(>|t|) #(Intercept) -0.06996 0.25064 -0.279 0.781 #FAM 1.10126 1.41056 0.781 0.438 #PEER 2.32206 1.48129 1.568 0.122 #SCHOOL -2.28100 2.22045 -1.027 0.308 # #Multiple R-squared: 0.2063, Adjusted R-squared: 0.1702 #F-statistic: 5.717 on 3 and 66 DF, p-value: 0.001535 #### Visualize pairwise correlations: matrix plot plot(edu) #### Visualize pairwise correlations: correlation matrix cor(edu) # ACHV FAM PEER SCHOOL #ACHV 1.0000000 0.4194588 0.4398464 0.4181006 #FAM 0.4194588 1.0000000 0.9600806 0.9856837 #PEER 0.4398464 0.9600806 1.0000000 0.9821601 #SCHOOL 0.4181006 0.9856837 0.9821601 1.0000000 #### Examine the VIF of predictors install.packages('car') #install.packages('rms') require(car) #require(rms) vif(edu.lm) # FAM PEER SCHOOL #37.58064 30.21166 83.15544 ################################################################################# # CONSTRAINTS # Example: French imports french = P233 # french YEAR IMPORT DOPROD STOCK CONSUM 1 49 15.9 149.3 4.2 108.1 2 50 16.4 161.2 4.1 114.8 ... 17 65 50.3 336.8 1.2 232.0 18 66 56.6 353.9 4.5 242.9 french.lm = lm(IMPORT ~ DOPROD + STOCK + CONSUM, data = french) summary(french.lm) # Estimate Std. Error t value Pr(>|t|) #(Intercept) -19.7251 4.1253 -4.782 0.000293 *** #DOPROD 0.0322 0.1869 0.172 0.865650 #STOCK 0.4142 0.3223 1.285 0.219545 #CONSUM 0.2427 0.2854 0.851 0.409268 #Multiple R-squared: 0.973, Adjusted R-squared: 0.9673 #F-statistic: 168.4 on 3 and 14 DF, p-value: 3.212e-11 vif(french.lm) # DOPROD STOCK CONSUM #469.742135 1.049877 469.371343 cor(french) # YEAR IMPORT DOPROD STOCK CONSUM #YEAR 1.0000000 0.9497300 0.9882170 0.1948879 0.9886330 #IMPORT 0.9497300 1.0000000 0.9841779 0.2659096 0.9847645 #DOPROD 0.9882170 0.9841779 1.0000000 0.2154456 0.9989330 #STOCK 0.1948879 0.2659096 0.2154456 1.0000000 0.2136902 #CONSUM 0.9886330 0.9847645 0.9989330 0.2136902 1.0000000 x11() jpeg(file = 'frenchres.jpg') plot(rstudent(french.lm),type='b', ylab='Studentized Residuals') dev.off() # Make an indicator for Year < 1960 french$Y60 = french$YEAR < 60 french60.lm = lm(IMPORT ~ DOPROD + STOCK + CONSUM, data = french[french$Y60,]) summary(french60.lm) # Estimate Std. Error t value Pr(>|t|) #(Intercept) -10.12799 1.21216 -8.355 6.9e-05 *** #DOPROD -0.05140 0.07028 -0.731 0.488344 #STOCK 0.58695 0.09462 6.203 0.000444 *** #CONSUM 0.28685 0.10221 2.807 0.026277 * #Multiple R-squared: 0.9919, Adjusted R-squared: 0.9884 #F-statistic: 285.6 on 3 and 7 DF, p-value: 1.112e-07 x11() jpeg(file = 'frenchres2.jpg') plot(rstudent(french60.lm),type='b', ylab='Studentized Residuals') dev.off() ## CONSTRAINED REGRESSION french$DC = french$DOPROD +french$CONSUM frenchDC.lm = lm(IMPORT ~ STOCK + DC, data = french[french$Y60,]) summary(frenchDC.lm) # Estimate Std. Error t value Pr(>|t|) #(Intercept) -9.00680 1.24502 -7.234 8.94e-05 *** #STOCK 0.61164 0.10922 5.600 0.00051 *** #DC 0.08638 0.00356 24.266 8.87e-09 *** #Multiple R-squared: 0.9874, Adjusted R-squared: 0.9843 #F-statistic: 314.4 on 2 and 8 DF, p-value: 2.489e-08 vif(frenchDC.lm) # STOCK DC #1.000893 1.000893 anova(frenchDC.lm, french60.lm) #Model 1: IMPORT ~ STOCK + DC #Model 2: IMPORT ~ DOPROD + STOCK + CONSUM # Res.Df RSS Df Sum of Sq F Pr(>F) #1 8 2.5932 #2 7 1.6729 1 0.92023 3.8504 0.09052 . ######################################################################################3 # PCA -- see slides