load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch10\\example10_1.RData")
library(corrgram)
corrgram(example10_1[2:7],order=TRUE,lower.panel = panel.shade,upper.panel = panel.pie,text.panel = panel.txt)
model_1=lm(y~x1+x2+x3+x4+x5,data = example10_1)
summary(model_1)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4 + x5, data = example10_1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.7204 -6.0600 0.7152 3.2144 21.4805
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.2604768 10.4679833 0.407 0.68856
## x1 0.1273254 0.0959790 1.327 0.20037
## x2 0.1605660 0.0556834 2.884 0.00952 **
## x3 0.0007636 0.0013556 0.563 0.57982
## x4 -0.3331990 0.3986248 -0.836 0.41362
## x5 -0.5746462 0.3087506 -1.861 0.07826 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.65 on 19 degrees of freedom
## Multiple R-squared: 0.8518, Adjusted R-squared: 0.8128
## F-statistic: 21.84 on 5 and 19 DF, p-value: 2.835e-07
confint(model_1,level = 0.95)#计算回归系数的置信区间
## 2.5 % 97.5 %
## (Intercept) -17.649264072 26.170217667
## x1 -0.073561002 0.328211809
## x2 0.044019355 0.277112598
## x3 -0.002073719 0.003600932
## x4 -1.167530271 0.501132297
## x5 -1.220868586 0.071576251
anova(model_1)#方差分析表
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## x1 1 10508.9 10508.9 92.7389 9.625e-09 ***
## x2 1 1347.1 1347.1 11.8878 0.002696 **
## x3 1 85.4 85.4 0.7539 0.396074
## x4 1 40.5 40.5 0.3573 0.557082
## x5 1 392.5 392.5 3.4641 0.078262 .
## Residuals 19 2153.0 113.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#模型诊断:残差图
par(mfrow=c(1,2),mai=c(0.8,0.8,0.4,0.1),cex=0.8,cex.main=0.7)
plot(model_1,which = 1:2)
#考虑到第2、4个点的残差较大(见Q-Q图),剔除后继续分析
new_model_1=lm(y~x1+x2+x3+x4+x5,data=example10_1[-c(2,4),])
summary(new_model_1)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4 + x5, data = example10_1[-c(2,
## 4), ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.7788 -4.4404 0.7701 4.5470 13.0666
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.935e+00 8.211e+00 0.845 0.41000
## x1 2.588e-01 7.370e-02 3.511 0.00268 **
## x2 2.314e-02 5.768e-02 0.401 0.69331
## x3 6.481e-05 9.743e-04 0.067 0.94774
## x4 -2.572e-01 2.849e-01 -0.903 0.37922
## x5 -7.479e-01 2.171e-01 -3.446 0.00309 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.252 on 17 degrees of freedom
## Multiple R-squared: 0.9184, Adjusted R-squared: 0.8944
## F-statistic: 38.28 on 5 and 17 DF, p-value: 1.143e-08
#此时的回归诊断
par(mfrow=c(1,2),mai=c(0.8,0.8,0.4,0.1),cex=0.8)
plot(new_model_1,which = 1:2)
## 多重共线性:放宽回归模型假定1
library(psych)
corr.test(example10_1[3:7],use='complete')
## Call:corr.test(x = example10_1[3:7], use = "complete")
## Correlation matrix
## x1 x2 x3 x4 x5
## x1 1.00 0.74 0.88 -0.62 -0.28
## x2 0.74 1.00 0.55 -0.54 -0.32
## x3 0.88 0.55 1.00 -0.52 -0.29
## x4 -0.62 -0.54 -0.52 1.00 0.10
## x5 -0.28 -0.32 -0.29 0.10 1.00
## Sample Size
## [1] 25
## Probability values (Entries above the diagonal are adjusted for multiple tests.)
## x1 x2 x3 x4 x5
## x1 0.00 0.00 0.00 0.01 0.47
## x2 0.00 0.00 0.03 0.03 0.46
## x3 0.00 0.00 0.00 0.04 0.47
## x4 0.00 0.01 0.01 0.00 0.65
## x5 0.18 0.12 0.16 0.65 0.00
##
## To see confidence intervals of the correlations, print with the short=FALSE option
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
vif(model_1)
## x1 x2 x3 x4 x5
## 8.233159 2.629940 5.184365 1.702361 1.174053
tolerance=1/vif(model_1)#容忍度:VIF的倒数
tolerance
## x1 x2 x3 x4 x5
## 0.1214601 0.3802368 0.1928877 0.5874195 0.8517500
model_2=step(model_1)#变量选择
## Start: AIC=123.39
## y ~ x1 + x2 + x3 + x4 + x5
##
## Df Sum of Sq RSS AIC
## - x3 1 35.96 2189.0 121.81
## - x4 1 79.17 2232.2 122.30
## <none> 2153.0 123.39
## - x1 1 199.42 2352.4 123.61
## - x5 1 392.54 2545.6 125.58
## - x2 1 942.22 3095.2 130.47
##
## Step: AIC=121.81
## y ~ x1 + x2 + x4 + x5
##
## Df Sum of Sq RSS AIC
## - x4 1 78.22 2267.2 120.69
## <none> 2189.0 121.81
## - x5 1 445.69 2634.7 124.44
## - x2 1 925.88 3114.9 128.63
## - x1 1 1133.27 3322.3 130.24
##
## Step: AIC=120.69
## y ~ x1 + x2 + x5
##
## Df Sum of Sq RSS AIC
## <none> 2267.2 120.69
## - x5 1 404.28 2671.5 122.79
## - x2 1 1050.90 3318.1 128.21
## - x1 1 1661.83 3929.0 132.43
new_model_2=lm(y~x1+x2+x5,data = example10_1)#拟合逐步回归模型
summary(new_model_2)
##
## Call:
## lm(formula = y ~ x1 + x2 + x5, data = example10_1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.027 -5.361 -1.560 2.304 23.001
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.68928 6.25242 -0.270 0.78966
## x1 0.19022 0.04848 3.923 0.00078 ***
## x2 0.15763 0.05052 3.120 0.00518 **
## x5 -0.56979 0.29445 -1.935 0.06656 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.39 on 21 degrees of freedom
## Multiple R-squared: 0.8439, Adjusted R-squared: 0.8216
## F-statistic: 37.85 on 3 and 21 DF, p-value: 1.187e-08
anova(new_model_2)#方差分析表
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## x1 1 10508.9 10508.9 97.3392 2.452e-09 ***
## x2 1 1347.1 1347.1 12.4775 0.001976 **
## x5 1 404.3 404.3 3.7447 0.066558 .
## Residuals 21 2267.2 108.0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#逐步回归模型的诊断
par(mfrow=c(1,2),mai=c(0.8,0.8,0.4,0.1),cex=0.8)
plot(new_model_2,which=1:2)
library(lm.beta)
mode1_1.beta=lm.beta(model_1)
summary(mode1_1.beta)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4 + x5, data = example10_1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.7204 -6.0600 0.7152 3.2144 21.4805
##
## Coefficients:
## Estimate Standardized Std. Error t value Pr(>|t|)
## (Intercept) 4.2604768 0.0000000 10.4679833 0.407 0.68856
## x1 0.1273254 0.3361822 0.0959790 1.327 0.20037
## x2 0.1605660 0.4130034 0.0556834 2.884 0.00952 **
## x3 0.0007636 0.1132753 0.0013556 0.563 0.57982
## x4 -0.3331990 -0.0963203 0.3986248 -0.836 0.41362
## x5 -0.5746462 -0.1781104 0.3087506 -1.861 0.07826 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.65 on 19 degrees of freedom
## Multiple R-squared: 0.8518, Adjusted R-squared: 0.8128
## F-statistic: 21.84 on 5 and 19 DF, p-value: 2.835e-07
anova(new_model_2,model_1)
## Analysis of Variance Table
##
## Model 1: y ~ x1 + x2 + x5
## Model 2: y ~ x1 + x2 + x3 + x4 + x5
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 21 2267.2
## 2 19 2153.0 2 114.17 0.5038 0.6121
AIC(new_model_2,model_1)
## df AIC
## new_model_2 5 193.6325
## model_1 7 196.3408
x_0=example10_1[,c(3,4,7)]
pre=predict(new_model_2)
res=residuals(new_model_2)
zre=rstandard(new_model_2)
con_int=predict(new_model_2,x_0,interval = 'confidence',level = 0.95)
pre_int=predict(new_model_2,x_0,interval = 'prediction',level = 0.95)
my_summary=data.frame(营业额=example10_1$y,点预测值=pre,残差=res,标准化残差=zre,置信下限=con_int[,2],置信上限=con_int[,3],预测下限=pre_int[,2],预测上限=pre_int[,3])
round(my_summary,3)#表示输出my_summary,保留小数点3位
## 营业额 点预测值 残差 标准化残差 置信下限 置信上限 预测下限 预测上限
## 1 53.2 52.189 1.011 0.102 45.457 58.921 29.557 74.822
## 2 18.5 -4.501 23.001 2.359 -11.969 2.967 -27.363 18.361
## 3 11.3 21.963 -10.663 -1.072 15.685 28.240 -0.539 44.464
## 4 84.7 65.114 19.586 2.766 49.300 80.928 38.337 91.891
## 5 7.3 6.128 1.172 0.122 -2.283 14.540 -17.059 29.316
## 6 17.9 22.408 -4.508 -0.466 14.581 30.235 -0.574 45.390
## 7 2.5 1.278 1.222 0.125 -6.113 8.670 -21.559 24.116
## 8 27.3 34.719 -7.419 -0.747 28.400 41.038 12.206 57.232
## 9 5.9 10.628 -4.728 -0.472 4.904 16.351 -11.726 32.981
## 10 23.9 37.843 -13.943 -1.390 32.193 43.493 15.509 60.178
## 11 69.4 62.852 6.548 0.709 52.939 72.766 39.079 86.626
## 12 20.6 18.296 2.304 0.229 13.036 23.556 -3.943 40.535
## 13 1.9 -5.510 7.410 0.771 -13.694 2.674 -28.616 17.596
## 14 3.0 14.956 -11.956 -1.202 8.687 21.224 -7.543 37.455
## 15 7.3 8.860 -1.560 -0.169 -1.050 18.770 -14.913 32.632
## 16 46.2 29.831 16.369 1.699 21.745 37.917 6.759 52.903
## 17 78.8 78.016 0.784 0.112 62.105 93.927 51.182 104.850
## 18 11.1 13.167 -2.067 -0.209 6.420 19.915 -9.470 35.805
## 19 8.6 15.847 -7.247 -0.815 4.670 27.024 -8.481 40.175
## 20 48.9 50.727 -1.827 -0.184 44.205 57.250 28.156 73.299
## 21 22.1 27.461 -5.361 -0.536 21.671 33.251 5.090 49.831
## 22 11.1 0.233 10.867 1.354 -13.486 13.953 -25.362 25.829
## 23 8.6 22.627 -14.027 -1.395 17.181 28.073 0.344 44.911
## 24 48.9 48.961 -0.061 -0.006 42.722 55.200 26.470 71.452
## 25 22.1 27.005 -4.905 -0.490 21.220 32.790 4.636 49.374
#新值预测
x_1=data.frame(x1=50,x2=100,x5=10)
predict(new_model_2,newdata = x_1)
## 1
## 17.88685
predict(new_model_2,data.frame(x1=50,x2=10,x5=10),interval = 'confidence',level = 0.95)
## fit lwr upr
## 1 3.700067 -4.684078 12.08421
predict(new_model_2,data.frame(x1=50,x2=10,x5=10),interval = 'prediction',level = 0.95)
## fit lwr upr
## 1 3.700067 -19.47764 26.87777