d2.5<-read.csv("C:\\Users\\86167\\Desktop\\ex2.5.csv",header=T)
lm.finance<-lm(y~x1+x2+x3+x4+x5+x6,data=d2.5)
summary(lm.finance)
## 
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x6, data = d2.5)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3078.2  -713.3  -118.6   674.8  2852.2 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.716e+04  1.585e+04   2.345 0.028937 *  
## x1          -7.792e-01  3.351e-01  -2.326 0.030138 *  
## x2           2.308e-01  5.888e-02   3.920 0.000786 ***
## x3           5.425e-01  8.940e-01   0.607 0.550460    
## x4          -3.059e-01  1.636e-01  -1.869 0.075580 .  
## x5           4.600e-01  1.527e-01   3.012 0.006636 ** 
## x6          -5.757e-01  6.274e-01  -0.918 0.369255    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1428 on 21 degrees of freedom
## Multiple R-squared:  0.9985, Adjusted R-squared:  0.9981 
## F-statistic:  2338 on 6 and 21 DF,  p-value: < 2.2e-16
lm.step<-step(lm.finance,direction="both")
## Start:  AIC=412.73
## y ~ x1 + x2 + x3 + x4 + x5 + x6
## 
##        Df Sum of Sq      RSS    AIC
## - x3    1    750912 43571799 411.22
## - x6    1   1716840 44537727 411.83
## <none>              42820887 412.73
## - x4    1   7125754 49946641 415.04
## - x1    1  11028364 53849251 417.15
## - x5    1  18499558 61320445 420.78
## - x2    1  31333958 74154845 426.10
## 
## Step:  AIC=411.22
## y ~ x1 + x2 + x4 + x5 + x6
## 
##        Df Sum of Sq       RSS    AIC
## - x6    1   1114248  44686047 409.92
## <none>               43571799 411.22
## + x3    1    750912  42820887 412.73
## - x1    1  10574111  54145911 415.30
## - x4    1  21835301  65407100 420.59
## - x2    1  32092087  75663886 424.67
## - x5    1 112640001 156211801 444.97
## 
## Step:  AIC=409.92
## y ~ x1 + x2 + x4 + x5
## 
##        Df Sum of Sq       RSS    AIC
## <none>               44686047 409.92
## + x6    1   1114248  43571799 411.22
## + x3    1    148320  44537727 411.83
## - x1    1   9603822  54289869 413.37
## - x4    1  31433342  76119389 422.84
## - x2    1  32250373  76936420 423.14
## - x5    1 111626871 156312918 442.98

##计算普通残差、标准化残差和学生化残差

y.res<-residuals (lm.finance) #计算模型lm.exam的普通残差
y.rst<-rstandard(lm.step) #计算回归模型lm.step的标准化残差
print(y.rst) #输出回归模型lm.step的标准化残差y.rst
##           1           2           3           4           5           6 
## -1.21699581 -0.78721946 -0.38233142 -0.27703068  0.28527306  1.22685046 
##           7           8           9          10          11          12 
##  1.32333522  1.22932548  0.81626125  0.45556993 -0.10743258 -0.69287852 
##          13          14          15          16          17          18 
## -0.92329244 -0.49363842 -0.24834446 -0.53198827  0.18421338 -0.03401075 
##          19          20          21          22          23          24 
## -0.37768467  0.74526662 -0.27973593 -0.82441646  2.20553032  0.07556743 
##          25          26          27          28 
## -1.73291937 -2.37088887  1.36683593  1.96225224
y.fit<-predict(lm.step) #计算回归模型lm.step的预测值
plot(y.res~ y.fit) #绘制以普通残差为纵坐标,预测值为横坐标的散点图

plot(y.rst~ y.fit) #绘制以标准化残差为纵坐标,预测值为横坐标的散点图

lm.step_new<-update(lm.step,log(.)~.) #对模型进行对数变换
y.rst<-rstandard(lm.step_new) #计算lm.step_new的标准化残差
y.fit<-predict(lm.step_new) #计算lm.step_new的预测值
plot(y.rst~ y.fit) #绘制以标准化残差为纵坐标,预测值为横坐标的散点图

##第26号点为异常点

# 去掉第26号观测值建立新的数据框
d2.5.new<-d2.5[-26,]
# 建立新的回归模型
lm.new<-lm(y~x1+x2+x3+x4+x5+x6,data=d2.5.new)
summary(lm.new)
## 
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x6, data = d2.5.new)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2807.39  -630.53   -92.54   553.38  2549.24 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.992e+04  1.398e+04   2.140 0.044895 *  
## x1          -9.727e-01  2.987e-01  -3.257 0.003947 ** 
## x2           2.394e-01  5.116e-02   4.679 0.000144 ***
## x3           9.499e-01  7.888e-01   1.204 0.242550    
## x4          -2.323e-01  1.443e-01  -1.610 0.123113    
## x5           4.221e-01  1.331e-01   3.170 0.004809 ** 
## x6          -6.316e-01  5.445e-01  -1.160 0.259769    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1239 on 20 degrees of freedom
## Multiple R-squared:  0.9988, Adjusted R-squared:  0.9984 
## F-statistic:  2742 on 6 and 20 DF,  p-value: < 2.2e-16
lm.step<-step(lm.new,direction="both") #用一切子集回归法来进行逐步回归
## Start:  AIC=390.47
## y ~ x1 + x2 + x3 + x4 + x5 + x6
## 
##        Df Sum of Sq      RSS    AIC
## - x6    1   2063605 32743411 390.23
## - x3    1   2224639 32904445 390.36
## <none>              30679805 390.47
## - x4    1   3975254 34655059 391.76
## - x5    1  15419533 46099339 399.46
## - x1    1  16272374 46952179 399.96
## - x2    1  33580276 64260081 408.43
## 
## Step:  AIC=390.23
## y ~ x1 + x2 + x3 + x4 + x5
## 
##        Df Sum of Sq      RSS    AIC
## - x3    1   1021533 33764944 389.06
## <none>              32743411 390.23
## + x6    1   2063605 30679805 390.47
## - x4    1  10881273 43624683 395.97
## - x1    1  14215993 46959404 397.96
## - x5    1  21284824 54028235 401.75
## - x2    1  34199139 66942550 407.53
## 
## Step:  AIC=389.06
## y ~ x1 + x2 + x4 + x5
## 
##        Df Sum of Sq       RSS    AIC
## <none>               33764944 389.06
## + x3    1   1021533  32743411 390.23
## + x6    1    860500  32904445 390.36
## - x1    1  13303749  47068694 396.02
## - x4    1  27700323  61465267 403.23
## - x2    1  34757775  68522719 406.16
## - x5    1 118952263 152717207 427.80
y.rst<-rstandard(lm.step) #计算回归模型lm.step的标准化残差
y.fit<-predict(lm.step) #计算回归模型lm.step的预测值
plot(y.rst~ y.fit) #绘制以标准化残差为纵坐标,预测值为横坐标的散点图

##几乎全部落在[-2,2]区域 ##回归诊断

par(mfrow=c(2,2))
# 绘制诊断图
plot(lm.step)

# 计算诊断统计量
influence.measures(lm.step)
## Influence measures of
##   lm(formula = y ~ x1 + x2 + x4 + x5, data = d2.5.new) :
## 
##       dfb.1_   dfb.x1   dfb.x2   dfb.x4   dfb.x5   dffit cov.r   cook.d    hat
## 1  -0.558569 -0.13628 -0.20363  0.51241  0.22704 -0.8385 1.090 0.134619 0.2621
## 2  -0.242060 -0.00375 -0.10961  0.21368  0.06571 -0.4591 1.298 0.042473 0.2008
## 3  -0.075919  0.02315 -0.03804  0.06298  0.00204 -0.1980 1.418 0.008132 0.1543
## 4  -0.029071  0.03086 -0.01370  0.02034 -0.01930 -0.1321 1.398 0.003636 0.1237
## 5   0.000163 -0.03530  0.00483  0.00578  0.02728  0.0908 1.393 0.001723 0.1084
## 6   0.012032 -0.09925 -0.03080  0.01108  0.10676  0.4147 0.877 0.032952 0.0804
## 7  -0.138470 -0.23148 -0.03887  0.16347  0.21654  0.4679 0.838 0.041482 0.0896
## 8  -0.208726 -0.27466 -0.01967  0.23099  0.23337  0.4488 0.922 0.038812 0.0991
## 9  -0.144915 -0.16998 -0.00983  0.15721  0.14040  0.2750 1.172 0.015308 0.0932
## 10  0.027333  0.05245 -0.03443 -0.02646 -0.02411  0.1472 1.242 0.004469 0.0609
## 11  0.018589  0.02737 -0.00925 -0.01938 -0.01672  0.0330 1.507 0.000228 0.1639
## 12 -0.177049 -0.26826  0.09890  0.18681  0.15558 -0.3013 1.609 0.018793 0.2617
## 13 -0.180391 -0.31672  0.12789  0.19317  0.18032 -0.3885 1.293 0.030583 0.1743
## 14 -0.043926 -0.11467  0.07633  0.04985  0.04459 -0.1643 1.430 0.005613 0.1490
## 15  0.011332 -0.02246  0.03772 -0.00925 -0.00595 -0.0702 1.394 0.001031 0.1037
## 16  0.128857  0.04862  0.07611 -0.12660 -0.08554 -0.2065 1.280 0.008772 0.0997
## 17 -0.037459 -0.01780 -0.02019  0.03695  0.02718  0.0528 1.428 0.000584 0.1204
## 18  0.061220  0.03825  0.02952 -0.06096 -0.05099 -0.0750 1.523 0.001178 0.1765
## 19  0.260243  0.20567  0.02077 -0.26280 -0.17399 -0.2946 1.418 0.017862 0.1862
## 20  0.030189  0.15968 -0.01595 -0.04161 -0.13685  0.3071 1.203 0.019095 0.1149
## 21  0.056058  0.03374 -0.07941 -0.05591  0.03973 -0.1598 1.346 0.005299 0.1083
## 22  0.316044  0.38861 -0.50125 -0.33163  0.07423 -0.7034 1.123 0.096114 0.2306
## 23  0.218665 -0.00330  1.41797 -0.20237 -1.09091  1.6752 0.529 0.457556 0.3192
## 24 -0.055840 -0.03574 -0.10989  0.05549  0.11125 -0.1510 1.825 0.004764 0.3166
## 25  0.606337  0.73269 -0.03294 -0.62661 -0.65526 -1.3530 0.286 0.274048 0.1791
## 27  0.206060  0.07269  0.08770 -0.20283 -0.06962  0.6277 1.544 0.079466 0.3250
## 28  0.176564  0.16558 -1.12511 -0.19273  0.88861  1.9498 2.873 0.738825 0.6985
##    inf
## 1     
## 2     
## 3     
## 4     
## 5     
## 6     
## 7     
## 8     
## 9     
## 10    
## 11    
## 12    
## 13    
## 14    
## 15    
## 16    
## 17    
## 18    
## 19    
## 20    
## 21    
## 22    
## 23   *
## 24   *
## 25   *
## 27    
## 28   *

##23.24.25.28号为强影响点