Ex6.2

pho<-data.frame(x1 <- c(0.4,0.4,3.1,0.6,4.7,1.7,9.4,10.1,11.6,12.6,10.9,23.1,23.1,21.6,23.1,1.9,26.8,29.9), x2 <- c(52,34,19,34,24,65,44,31,29,58,37,46,50,44,56,36,58,51), x3 <- c(158,163,37,157,59,123,46,117,173,112,111,114,134,73,168,143,202,124), y <- c(64,60,71,61,54,77,81,93,93,51,76,96,77,93,95,54,168,99))
#(1) 
lm.sol<-lm(y~x1+x2+x3,data=pho)
 summary(lm.sol)
## 
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = pho)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -27.6  -11.2   -2.8   11.6   48.8 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   44.929     18.341    2.45   0.0281 * 
## x1             1.803      0.529    3.41   0.0042 **
## x2            -0.134      0.444   -0.30   0.7677   
## x3             0.167      0.114    1.46   0.1657   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.9 on 14 degrees of freedom
## Multiple R-squared:  0.551,  Adjusted R-squared:  0.455 
## F-statistic: 5.73 on 3 and 14 DF,  p-value: 0.009

(1) 回归方程为 y=44.9290+1.8033x1-0.1337x2+0.1668x3
(2)回归方程显著,但有些回归系数不显著。
(3)

lm.step<-step(lm.sol)
## Start:  AIC=111.2
## y ~ x1 + x2 + x3
## 
##        Df Sum of Sq   RSS AIC
## - x2    1        36  5599 109
## <none>               5563 111
## - x3    1       850  6413 112
## - x1    1      4618 10181 120
## 
## Step:  AIC=109.3
## y ~ x1 + x3
## 
##        Df Sum of Sq   RSS AIC
## <none>               5599 109
## - x3    1       833  6433 110
## - x1    1      5169 10769 119
summary(lm.step)
## 
## Call:
## lm(formula = y ~ x1 + x3, data = pho)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -29.71 -11.32  -2.95  11.29  48.68 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   41.479     13.883    2.99   0.0092 **
## x1             1.737      0.467    3.72   0.0020 **
## x3             0.155      0.104    1.49   0.1559   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.3 on 15 degrees of freedom
## Multiple R-squared:  0.548,  Adjusted R-squared:  0.488 
## F-statistic:  9.1 on 2 and 15 DF,  p-value: 0.00259

x3仍不够显著。 再用drop1函数做逐步回归。

drop1(lm.step)
## Single term deletions
## 
## Model:
## y ~ x1 + x3
##        Df Sum of Sq   RSS AIC
## <none>               5599 109
## x1      1      5169 10769 119
## x3      1       833  6433 110

可以考虑再去掉x3.

lm.opt<-lm(y~x1,data=pho);summary(lm.opt)
## 
## Call:
## lm(formula = y ~ x1, data = pho)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -31.49  -8.28  -1.67   5.62  59.34 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   59.259      7.420    7.99  5.7e-07 ***
## x1             1.843      0.479    3.85   0.0014 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.1 on 16 degrees of freedom
## Multiple R-squared:  0.481,  Adjusted R-squared:  0.448 
## F-statistic: 14.8 on 1 and 16 DF,  p-value: 0.00142

皆显著。