LMR Chapter 4
library(faraway)
savings data
data("savings")
head(savings)
## sr pop15 pop75 dpi ddpi
## Australia 11.43 29.35 2.87 2329.68 2.87
## Austria 12.07 23.32 4.41 1507.99 3.93
## Belgium 13.17 23.80 4.43 2108.47 3.82
## Bolivia 5.75 41.89 1.67 189.13 0.22
## Brazil 12.88 42.19 0.83 728.47 4.56
## Canada 8.79 31.72 2.85 2982.88 2.43
g<-lm(sr~.,savings)
summary(g)
##
## Call:
## lm(formula = sr ~ ., data = savings)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.2422 -2.6857 -0.2488 2.4280 9.7509
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.5660865 7.3545161 3.884 0.000334 ***
## pop15 -0.4611931 0.1446422 -3.189 0.002603 **
## pop75 -1.6914977 1.0835989 -1.561 0.125530
## dpi -0.0003369 0.0009311 -0.362 0.719173
## ddpi 0.4096949 0.1961971 2.088 0.042471 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.803 on 45 degrees of freedom
## Multiple R-squared: 0.3385, Adjusted R-squared: 0.2797
## F-statistic: 5.756 on 4 and 45 DF, p-value: 0.0007904
(tss<-sum((savings$sr-mean(savings$sr))^2))
## [1] 983.6283
(rss<-deviance(g))
## [1] 650.713
n<-nrow(savings)
p<-4
q<-df.residual(g)
(f<-((tss-rss)/(p))/(rss/(q)))
## [1] 5.755681
1-pf(f,p,q)
## [1] 0.0007903779
g2<-lm(sr~pop75+dpi+ddpi,savings)
summary(g2)
##
## Call:
## lm(formula = sr ~ pop75 + dpi + ddpi, data = savings)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0577 -3.2144 0.1687 2.4260 10.0763
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.4874944 1.4276619 3.844 0.00037 ***
## pop75 0.9528574 0.7637455 1.248 0.21849
## dpi 0.0001972 0.0010030 0.197 0.84499
## ddpi 0.4737951 0.2137272 2.217 0.03162 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.164 on 46 degrees of freedom
## Multiple R-squared: 0.189, Adjusted R-squared: 0.1361
## F-statistic: 3.573 on 3 and 46 DF, p-value: 0.02093
(rss2<-deviance(g2))
## [1] 797.7249
(f<-(rss2-rss)/(rss/q))
## [1] 10.16659
1-pf(f,1,q)
## [1] 0.002603019
anova(g2,g)
## Analysis of Variance Table
##
## Model 1: sr ~ pop75 + dpi + ddpi
## Model 2: sr ~ pop15 + pop75 + dpi + ddpi
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 46 797.72
## 2 45 650.71 1 147.01 10.167 0.002603 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sat data
data("sat")
head(sat)
## expend ratio salary takers verbal math total
## Alabama 4.405 17.2 31.144 8 491 538 1029
## Alaska 8.963 17.6 47.951 47 445 489 934
## Arizona 4.778 19.3 32.175 27 448 496 944
## Arkansas 4.459 17.1 28.934 6 482 523 1005
## California 4.992 24.0 41.078 45 417 485 902
## Colorado 5.443 18.4 34.571 29 462 518 980
g<-lm(total~expend+ratio+salary,sat)
summary(g)
##
## Call:
## lm(formula = total ~ expend + ratio + salary, data = sat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -140.911 -46.740 -7.535 47.966 123.329
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1069.234 110.925 9.639 1.29e-12 ***
## expend 16.469 22.050 0.747 0.4589
## ratio 6.330 6.542 0.968 0.3383
## salary -8.823 4.697 -1.878 0.0667 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 68.65 on 46 degrees of freedom
## Multiple R-squared: 0.2096, Adjusted R-squared: 0.1581
## F-statistic: 4.066 on 3 and 46 DF, p-value: 0.01209
(tss<-sum((sat$total-mean(sat$total))^2))
## [1] 274307.7
(rss<-deviance(g))
## [1] 216811.9
n<-nrow(sat)
p<-3
q<-df.residual(g)
(f<-((tss-rss)/(p))/(rss/(q)))
## [1] 4.066203
1-pf(f,p,q)
## [1] 0.01208607
g2<-lm(total~expend+ratio+salary+takers,sat)
summary(g2)
##
## Call:
## lm(formula = total ~ expend + ratio + salary + takers, data = sat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -90.531 -20.855 -1.746 15.979 66.571
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1045.9715 52.8698 19.784 < 2e-16 ***
## expend 4.4626 10.5465 0.423 0.674
## ratio -3.6242 3.2154 -1.127 0.266
## salary 1.6379 2.3872 0.686 0.496
## takers -2.9045 0.2313 -12.559 2.61e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.7 on 45 degrees of freedom
## Multiple R-squared: 0.8246, Adjusted R-squared: 0.809
## F-statistic: 52.88 on 4 and 45 DF, p-value: < 2.2e-16
(rss2<-deviance(g2))
## [1] 48123.9
(f<-(rss-rss2)/(rss2/q))
## [1] 161.2432
1-pf(f,1,q)
## [1] 1.110223e-16
anova(g,g2)
## Analysis of Variance Table
##
## Model 1: total ~ expend + ratio + salary
## Model 2: total ~ expend + ratio + salary + takers
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 46 216812
## 2 45 48124 1 168688 157.74 2.607e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1