LMR Chapter 4

library(faraway)

savings data

data("savings")
head(savings)
##              sr pop15 pop75     dpi ddpi
## Australia 11.43 29.35  2.87 2329.68 2.87
## Austria   12.07 23.32  4.41 1507.99 3.93
## Belgium   13.17 23.80  4.43 2108.47 3.82
## Bolivia    5.75 41.89  1.67  189.13 0.22
## Brazil    12.88 42.19  0.83  728.47 4.56
## Canada     8.79 31.72  2.85 2982.88 2.43
g<-lm(sr~.,savings)
summary(g)
## 
## Call:
## lm(formula = sr ~ ., data = savings)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.2422 -2.6857 -0.2488  2.4280  9.7509 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 28.5660865  7.3545161   3.884 0.000334 ***
## pop15       -0.4611931  0.1446422  -3.189 0.002603 ** 
## pop75       -1.6914977  1.0835989  -1.561 0.125530    
## dpi         -0.0003369  0.0009311  -0.362 0.719173    
## ddpi         0.4096949  0.1961971   2.088 0.042471 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.803 on 45 degrees of freedom
## Multiple R-squared:  0.3385, Adjusted R-squared:  0.2797 
## F-statistic: 5.756 on 4 and 45 DF,  p-value: 0.0007904
(tss<-sum((savings$sr-mean(savings$sr))^2))
## [1] 983.6283
(rss<-deviance(g))
## [1] 650.713
n<-nrow(savings)
p<-4
q<-df.residual(g)
(f<-((tss-rss)/(p))/(rss/(q)))
## [1] 5.755681
1-pf(f,p,q)
## [1] 0.0007903779
g2<-lm(sr~pop75+dpi+ddpi,savings)
summary(g2)
## 
## Call:
## lm(formula = sr ~ pop75 + dpi + ddpi, data = savings)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.0577 -3.2144  0.1687  2.4260 10.0763 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 5.4874944  1.4276619   3.844  0.00037 ***
## pop75       0.9528574  0.7637455   1.248  0.21849    
## dpi         0.0001972  0.0010030   0.197  0.84499    
## ddpi        0.4737951  0.2137272   2.217  0.03162 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.164 on 46 degrees of freedom
## Multiple R-squared:  0.189,  Adjusted R-squared:  0.1361 
## F-statistic: 3.573 on 3 and 46 DF,  p-value: 0.02093
(rss2<-deviance(g2))
## [1] 797.7249
(f<-(rss2-rss)/(rss/q))
## [1] 10.16659
1-pf(f,1,q)
## [1] 0.002603019
anova(g2,g)
## Analysis of Variance Table
## 
## Model 1: sr ~ pop75 + dpi + ddpi
## Model 2: sr ~ pop15 + pop75 + dpi + ddpi
##   Res.Df    RSS Df Sum of Sq      F   Pr(>F)   
## 1     46 797.72                                
## 2     45 650.71  1    147.01 10.167 0.002603 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

sat data

data("sat")
head(sat)
##            expend ratio salary takers verbal math total
## Alabama     4.405  17.2 31.144      8    491  538  1029
## Alaska      8.963  17.6 47.951     47    445  489   934
## Arizona     4.778  19.3 32.175     27    448  496   944
## Arkansas    4.459  17.1 28.934      6    482  523  1005
## California  4.992  24.0 41.078     45    417  485   902
## Colorado    5.443  18.4 34.571     29    462  518   980
g<-lm(total~expend+ratio+salary,sat)
summary(g)
## 
## Call:
## lm(formula = total ~ expend + ratio + salary, data = sat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -140.911  -46.740   -7.535   47.966  123.329 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1069.234    110.925   9.639 1.29e-12 ***
## expend        16.469     22.050   0.747   0.4589    
## ratio          6.330      6.542   0.968   0.3383    
## salary        -8.823      4.697  -1.878   0.0667 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 68.65 on 46 degrees of freedom
## Multiple R-squared:  0.2096, Adjusted R-squared:  0.1581 
## F-statistic: 4.066 on 3 and 46 DF,  p-value: 0.01209
(tss<-sum((sat$total-mean(sat$total))^2))
## [1] 274307.7
(rss<-deviance(g))
## [1] 216811.9
n<-nrow(sat)
p<-3
q<-df.residual(g)
(f<-((tss-rss)/(p))/(rss/(q)))
## [1] 4.066203
1-pf(f,p,q)
## [1] 0.01208607
g2<-lm(total~expend+ratio+salary+takers,sat)
summary(g2)
## 
## Call:
## lm(formula = total ~ expend + ratio + salary + takers, data = sat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -90.531 -20.855  -1.746  15.979  66.571 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1045.9715    52.8698  19.784  < 2e-16 ***
## expend         4.4626    10.5465   0.423    0.674    
## ratio         -3.6242     3.2154  -1.127    0.266    
## salary         1.6379     2.3872   0.686    0.496    
## takers        -2.9045     0.2313 -12.559 2.61e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32.7 on 45 degrees of freedom
## Multiple R-squared:  0.8246, Adjusted R-squared:  0.809 
## F-statistic: 52.88 on 4 and 45 DF,  p-value: < 2.2e-16
(rss2<-deviance(g2))
## [1] 48123.9
(f<-(rss-rss2)/(rss2/q))
## [1] 161.2432
1-pf(f,1,q)
## [1] 1.110223e-16
anova(g,g2)
## Analysis of Variance Table
## 
## Model 1: total ~ expend + ratio + salary
## Model 2: total ~ expend + ratio + salary + takers
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1     46 216812                                  
## 2     45  48124  1    168688 157.74 2.607e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1