setwd("d:/data")
d2 <- read.csv("C:\\Users\\86167\\Desktop\\第一产业和一级指标.csv", header = T)
# 建立全变量回归方程
lm.one <- lm(y ~ x1 + x2 + x3, data = d2)
# 进行逐步回归
summary(lm.one)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = d2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -311.40 -130.26 -9.64 140.37 299.16
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3682.15792 244.04269 15.088 3.68e-07 ***
## x1 8.79002 3.36199 2.615 0.0309 *
## x2 -3.02661 2.96981 -1.019 0.3380
## x3 -0.07138 1.55629 -0.046 0.9645
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 245.8 on 8 degrees of freedom
## Multiple R-squared: 0.9168, Adjusted R-squared: 0.8855
## F-statistic: 29.37 on 3 and 8 DF, p-value: 0.0001142
lm.one.step<-step(lm.one,direction="both")
## Start: AIC=135.24
## y ~ x1 + x2 + x3
##
## Df Sum of Sq RSS AIC
## - x3 1 127 483365 133.24
## - x2 1 62737 545976 134.71
## <none> 483238 135.24
## - x1 1 412913 896151 140.65
##
## Step: AIC=133.24
## y ~ x1 + x2
##
## Df Sum of Sq RSS AIC
## - x2 1 63542 546907 132.73
## <none> 483365 133.24
## + x3 1 127 483238 135.24
## - x1 1 650578 1133944 141.48
##
## Step: AIC=132.73
## y ~ x1
##
## Df Sum of Sq RSS AIC
## <none> 546907 132.73
## + x2 1 63542 483365 133.24
## + x3 1 932 545976 134.71
## - x1 1 5258185 5805092 159.07
# 计算普通残差
y.res<-residuals(lm.one)
# 计算标准化残差
y.rst<-rstandard(lm.one.step)
print(y.rst)
## 1 2 3 4 5 6
## 0.06325835 -0.10394070 0.75730091 0.36486586 0.83474324 0.02767989
## 7 8 9 10 11 12
## -0.97477742 -1.40480704 -1.44027614 -0.92393516 1.20320283 1.83997295
# 计算预测值
y.fit<-predict(lm.one.step)
# 绘制普通残差与预测值的散点图
plot(y.res~y.fit)

# 绘制标准化残差与预测值的散点图
plot(y.rst~y.fit)

# 第7、12号点为异常点
d2_no_outlier <- d2[-c(7),]
# 重新建立回归模型
lm.one_no_outlier <- lm(y ~ x1 + x2 + x3 , data = d2_no_outlier)
summary(lm.one_no_outlier)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = d2_no_outlier)
##
## Residuals:
## Min 1Q Median 3Q Max
## -324.80 -122.85 2.14 180.08 270.99
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3662.10399 258.59401 14.162 2.08e-06 ***
## x1 8.19420 3.69908 2.215 0.0623 .
## x2 -2.29754 3.40342 -0.675 0.5213
## x3 -0.09299 1.63184 -0.057 0.9562
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 257.6 on 7 degrees of freedom
## Multiple R-squared: 0.9198, Adjusted R-squared: 0.8854
## F-statistic: 26.76 on 3 and 7 DF, p-value: 0.0003292
lm.step<-step(lm.one_no_outlier,direction="both") #用一切子集回归法来进行逐步回归
## Start: AIC=125.16
## y ~ x1 + x2 + x3
##
## Df Sum of Sq RSS AIC
## - x3 1 216 464811 123.17
## - x2 1 30246 494842 123.86
## <none> 464596 125.16
## - x1 1 325689 790285 129.00
##
## Step: AIC=123.17
## y ~ x1 + x2
##
## Df Sum of Sq RSS AIC
## - x2 1 30129 494941 121.86
## <none> 464811 123.17
## + x3 1 216 464596 125.16
## - x1 1 475204 940015 128.91
##
## Step: AIC=121.86
## y ~ x1
##
## Df Sum of Sq RSS AIC
## <none> 494941 121.86
## + x2 1 30129 464811 123.17
## + x3 1 99 494842 123.86
## - x1 1 5298591 5793532 146.92
y.rst<-rstandard(lm.step) #计算回归模型lm.step的标准化残差
y.fit<-predict(lm.step) #计算回归模型lm.step的预测值
plot(y.rst~ y.fit) #绘制以标准化残差为纵坐标,预测值为横坐标的散点

lm.step_new<-update(lm.step,log(.)~.) #对模型进行对数变换
y.rst<-rstandard(lm.step_new) #计算lm.step_new的标准化残差
y.fit<-predict(lm.step_new) #计算lm.step_new的预测值
plot(y.rst~ y.fit) #绘制以标准化残差为纵坐标,预测值为横坐标

# 全变量模型的拟合优度指标
summary(lm.one)$r.squared
## [1] 0.9167561
summary(lm.one)$adj.r.squared
## [1] 0.8855397
# 逐步回归模型的拟合优度指标
summary(lm.one.step)$r.squared
## [1] 0.9057883
summary(lm.one.step)$adj.r.squared
## [1] 0.8963672
par(mfrow=c(2,2)) #在一个2×2网格中创建4个绘图区
plot(lm.step_new) #绘制模型诊断图

influence.measures(lm.step_new)
## Influence measures of
## lm(formula = log(y) ~ x1, data = d2_no_outlier) :
##
## dfb.1_ dfb.x1 dffit cov.r cook.d hat inf
## 1 -0.44599 0.38486 -0.447 1.783 0.10745 0.3513 *
## 2 -0.21885 0.17604 -0.223 1.604 0.02746 0.2409
## 3 0.38820 -0.28063 0.415 1.240 0.08758 0.1674
## 4 0.15379 -0.08554 0.192 1.330 0.02007 0.1133
## 5 0.25982 -0.10905 0.377 1.041 0.06899 0.0992
## 6 0.03443 -0.00869 0.060 1.383 0.00201 0.0929
## 8 -0.00696 -0.24033 -0.549 0.848 0.13059 0.1125
## 9 0.10909 -0.38938 -0.659 0.827 0.18299 0.1397
## 10 0.12419 -0.30542 -0.450 1.202 0.10135 0.1684
## 11 -0.20291 0.38244 0.492 1.357 0.12344 0.2304
## 12 -0.41282 0.70564 0.856 1.167 0.33470 0.2840
preds<-data.frame(x1=407.76)
predict(lm.step,newdata=preds,interval="prediction", level=0.95)
## fit lwr upr
## 1 6049.366 5439.628 6659.104