d2.5<-read.csv("C:\\Users\\86167\\Desktop\\ex2.5.csv",header=T)
lm.finance<-lm(y~x1+x2+x3+x4+x5+x6,data=d2.5)
summary(lm.finance)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x6, data = d2.5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3078.2 -713.3 -118.6 674.8 2852.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.716e+04 1.585e+04 2.345 0.028937 *
## x1 -7.792e-01 3.351e-01 -2.326 0.030138 *
## x2 2.308e-01 5.888e-02 3.920 0.000786 ***
## x3 5.425e-01 8.940e-01 0.607 0.550460
## x4 -3.059e-01 1.636e-01 -1.869 0.075580 .
## x5 4.600e-01 1.527e-01 3.012 0.006636 **
## x6 -5.757e-01 6.274e-01 -0.918 0.369255
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1428 on 21 degrees of freedom
## Multiple R-squared: 0.9985, Adjusted R-squared: 0.9981
## F-statistic: 2338 on 6 and 21 DF, p-value: < 2.2e-16
lm.step<-step(lm.finance,direction="both")
## Start: AIC=412.73
## y ~ x1 + x2 + x3 + x4 + x5 + x6
##
## Df Sum of Sq RSS AIC
## - x3 1 750912 43571799 411.22
## - x6 1 1716840 44537727 411.83
## <none> 42820887 412.73
## - x4 1 7125754 49946641 415.04
## - x1 1 11028364 53849251 417.15
## - x5 1 18499558 61320445 420.78
## - x2 1 31333958 74154845 426.10
##
## Step: AIC=411.22
## y ~ x1 + x2 + x4 + x5 + x6
##
## Df Sum of Sq RSS AIC
## - x6 1 1114248 44686047 409.92
## <none> 43571799 411.22
## + x3 1 750912 42820887 412.73
## - x1 1 10574111 54145911 415.30
## - x4 1 21835301 65407100 420.59
## - x2 1 32092087 75663886 424.67
## - x5 1 112640001 156211801 444.97
##
## Step: AIC=409.92
## y ~ x1 + x2 + x4 + x5
##
## Df Sum of Sq RSS AIC
## <none> 44686047 409.92
## + x6 1 1114248 43571799 411.22
## + x3 1 148320 44537727 411.83
## - x1 1 9603822 54289869 413.37
## - x4 1 31433342 76119389 422.84
## - x2 1 32250373 76936420 423.14
## - x5 1 111626871 156312918 442.98
##计算普通残差、标准化残差和学生化残差
y.res<-residuals (lm.finance) #计算模型lm.exam的普通残差
y.rst<-rstandard(lm.step) #计算回归模型lm.step的标准化残差
print(y.rst) #输出回归模型lm.step的标准化残差y.rst
## 1 2 3 4 5 6
## -1.21699581 -0.78721946 -0.38233142 -0.27703068 0.28527306 1.22685046
## 7 8 9 10 11 12
## 1.32333522 1.22932548 0.81626125 0.45556993 -0.10743258 -0.69287852
## 13 14 15 16 17 18
## -0.92329244 -0.49363842 -0.24834446 -0.53198827 0.18421338 -0.03401075
## 19 20 21 22 23 24
## -0.37768467 0.74526662 -0.27973593 -0.82441646 2.20553032 0.07556743
## 25 26 27 28
## -1.73291937 -2.37088887 1.36683593 1.96225224
y.fit<-predict(lm.step) #计算回归模型lm.step的预测值
plot(y.res~ y.fit) #绘制以普通残差为纵坐标,预测值为横坐标的散点图
plot(y.rst~ y.fit) #绘制以标准化残差为纵坐标,预测值为横坐标的散点图
lm.step_new<-update(lm.step,log(.)~.) #对模型进行对数变换
y.rst<-rstandard(lm.step_new) #计算lm.step_new的标准化残差
y.fit<-predict(lm.step_new) #计算lm.step_new的预测值
plot(y.rst~ y.fit) #绘制以标准化残差为纵坐标,预测值为横坐标的散点图
##第26号点为异常点
# 去掉第26号观测值建立新的数据框
d2.5.new<-d2.5[-26,]
# 建立新的回归模型
lm.new<-lm(y~x1+x2+x3+x4+x5+x6,data=d2.5.new)
summary(lm.new)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x6, data = d2.5.new)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2807.39 -630.53 -92.54 553.38 2549.24
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.992e+04 1.398e+04 2.140 0.044895 *
## x1 -9.727e-01 2.987e-01 -3.257 0.003947 **
## x2 2.394e-01 5.116e-02 4.679 0.000144 ***
## x3 9.499e-01 7.888e-01 1.204 0.242550
## x4 -2.323e-01 1.443e-01 -1.610 0.123113
## x5 4.221e-01 1.331e-01 3.170 0.004809 **
## x6 -6.316e-01 5.445e-01 -1.160 0.259769
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1239 on 20 degrees of freedom
## Multiple R-squared: 0.9988, Adjusted R-squared: 0.9984
## F-statistic: 2742 on 6 and 20 DF, p-value: < 2.2e-16
lm.step<-step(lm.new,direction="both") #用一切子集回归法来进行逐步回归
## Start: AIC=390.47
## y ~ x1 + x2 + x3 + x4 + x5 + x6
##
## Df Sum of Sq RSS AIC
## - x6 1 2063605 32743411 390.23
## - x3 1 2224639 32904445 390.36
## <none> 30679805 390.47
## - x4 1 3975254 34655059 391.76
## - x5 1 15419533 46099339 399.46
## - x1 1 16272374 46952179 399.96
## - x2 1 33580276 64260081 408.43
##
## Step: AIC=390.23
## y ~ x1 + x2 + x3 + x4 + x5
##
## Df Sum of Sq RSS AIC
## - x3 1 1021533 33764944 389.06
## <none> 32743411 390.23
## + x6 1 2063605 30679805 390.47
## - x4 1 10881273 43624683 395.97
## - x1 1 14215993 46959404 397.96
## - x5 1 21284824 54028235 401.75
## - x2 1 34199139 66942550 407.53
##
## Step: AIC=389.06
## y ~ x1 + x2 + x4 + x5
##
## Df Sum of Sq RSS AIC
## <none> 33764944 389.06
## + x3 1 1021533 32743411 390.23
## + x6 1 860500 32904445 390.36
## - x1 1 13303749 47068694 396.02
## - x4 1 27700323 61465267 403.23
## - x2 1 34757775 68522719 406.16
## - x5 1 118952263 152717207 427.80
y.rst<-rstandard(lm.step) #计算回归模型lm.step的标准化残差
y.fit<-predict(lm.step) #计算回归模型lm.step的预测值
plot(y.rst~ y.fit) #绘制以标准化残差为纵坐标,预测值为横坐标的散点图
##几乎全部落在[-2,2]区域 ##回归诊断
par(mfrow=c(2,2))
# 绘制诊断图
plot(lm.step)
# 计算诊断统计量
influence.measures(lm.step)
## Influence measures of
## lm(formula = y ~ x1 + x2 + x4 + x5, data = d2.5.new) :
##
## dfb.1_ dfb.x1 dfb.x2 dfb.x4 dfb.x5 dffit cov.r cook.d hat
## 1 -0.558569 -0.13628 -0.20363 0.51241 0.22704 -0.8385 1.090 0.134619 0.2621
## 2 -0.242060 -0.00375 -0.10961 0.21368 0.06571 -0.4591 1.298 0.042473 0.2008
## 3 -0.075919 0.02315 -0.03804 0.06298 0.00204 -0.1980 1.418 0.008132 0.1543
## 4 -0.029071 0.03086 -0.01370 0.02034 -0.01930 -0.1321 1.398 0.003636 0.1237
## 5 0.000163 -0.03530 0.00483 0.00578 0.02728 0.0908 1.393 0.001723 0.1084
## 6 0.012032 -0.09925 -0.03080 0.01108 0.10676 0.4147 0.877 0.032952 0.0804
## 7 -0.138470 -0.23148 -0.03887 0.16347 0.21654 0.4679 0.838 0.041482 0.0896
## 8 -0.208726 -0.27466 -0.01967 0.23099 0.23337 0.4488 0.922 0.038812 0.0991
## 9 -0.144915 -0.16998 -0.00983 0.15721 0.14040 0.2750 1.172 0.015308 0.0932
## 10 0.027333 0.05245 -0.03443 -0.02646 -0.02411 0.1472 1.242 0.004469 0.0609
## 11 0.018589 0.02737 -0.00925 -0.01938 -0.01672 0.0330 1.507 0.000228 0.1639
## 12 -0.177049 -0.26826 0.09890 0.18681 0.15558 -0.3013 1.609 0.018793 0.2617
## 13 -0.180391 -0.31672 0.12789 0.19317 0.18032 -0.3885 1.293 0.030583 0.1743
## 14 -0.043926 -0.11467 0.07633 0.04985 0.04459 -0.1643 1.430 0.005613 0.1490
## 15 0.011332 -0.02246 0.03772 -0.00925 -0.00595 -0.0702 1.394 0.001031 0.1037
## 16 0.128857 0.04862 0.07611 -0.12660 -0.08554 -0.2065 1.280 0.008772 0.0997
## 17 -0.037459 -0.01780 -0.02019 0.03695 0.02718 0.0528 1.428 0.000584 0.1204
## 18 0.061220 0.03825 0.02952 -0.06096 -0.05099 -0.0750 1.523 0.001178 0.1765
## 19 0.260243 0.20567 0.02077 -0.26280 -0.17399 -0.2946 1.418 0.017862 0.1862
## 20 0.030189 0.15968 -0.01595 -0.04161 -0.13685 0.3071 1.203 0.019095 0.1149
## 21 0.056058 0.03374 -0.07941 -0.05591 0.03973 -0.1598 1.346 0.005299 0.1083
## 22 0.316044 0.38861 -0.50125 -0.33163 0.07423 -0.7034 1.123 0.096114 0.2306
## 23 0.218665 -0.00330 1.41797 -0.20237 -1.09091 1.6752 0.529 0.457556 0.3192
## 24 -0.055840 -0.03574 -0.10989 0.05549 0.11125 -0.1510 1.825 0.004764 0.3166
## 25 0.606337 0.73269 -0.03294 -0.62661 -0.65526 -1.3530 0.286 0.274048 0.1791
## 27 0.206060 0.07269 0.08770 -0.20283 -0.06962 0.6277 1.544 0.079466 0.3250
## 28 0.176564 0.16558 -1.12511 -0.19273 0.88861 1.9498 2.873 0.738825 0.6985
## inf
## 1
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16
## 17
## 18
## 19
## 20
## 21
## 22
## 23 *
## 24 *
## 25 *
## 27
## 28 *
##23.24.25.28号为强影响点