load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch9\\example9_1.RData")
library(car)
## Loading required package: carData
scatterplot(example9_1$销售收入~example9_1$广告支出,pch=19,xlab='广告支出',ylab='销售收入',cex.lab=0.8)
#cex.lab: The magnification to be used for x and y labels relative to the current setting of cex.
cor(example9_1$销售收入,example9_1$广告支出)#也可以cor(example9_1[,2],example9_1[,3])
## [1] 0.937114
\[H_0:\rho=0 \quad v.s. \quad H_1: \rho \neq 0\]
cor.test(example9_1$销售收入,example9_1$广告支出)
##
## Pearson's product-moment correlation
##
## data: example9_1$销售收入 and example9_1$广告支出
## t = 11.391, df = 18, p-value = 1.161e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8450142 0.9752189
## sample estimates:
## cor
## 0.937114
model=lm(example9_1$销售收入~example9_1$广告支出)
summary(model)
##
## Call:
## lm(formula = example9_1$销售收入 ~ example9_1$广告支出)
##
## Residuals:
## Min 1Q Median 3Q Max
## -766.30 -273.85 -26.79 174.73 900.66
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2343.8916 274.4825 8.539 9.56e-08 ***
## example9_1$\u5e7f\u544a\u652f\u51fa 5.6735 0.4981 11.391 1.16e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 394 on 18 degrees of freedom
## Multiple R-squared: 0.8782, Adjusted R-squared: 0.8714
## F-statistic: 129.8 on 1 and 18 DF, p-value: 1.161e-09
confint(model,level = 0.95)#置信区间
## 2.5 % 97.5 %
## (Intercept) 1767.225152 2920.558006
## example9_1$\u5e7f\u544a\u652f\u51fa 4.627092 6.719825
#Actually, the level is 0.95 by default.
anova(model)#方差分析表
## Analysis of Variance Table
##
## Response: example9_1$销售收入
## Df Sum Sq Mean Sq F value Pr(>F)
## example9_1$\u5e7f\u544a\u652f\u51fa 1 20139304 20139304 129.76 1.161e-09 ***
## Residuals 18 2793629 155202
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#绘制拟合图
plot(example9_1$销售收入~example9_1$广告支出)
text(example9_1$销售收入~example9_1$广告支出,labels=example9_1$企业编号,cex=0.6,adj=c(-0.6,0.25),col=4)
abline(model,col=2,lwd=2)
n=nrow(example9_1)
for (i in 1:n) {
segments(example9_1[i,3],example9_1[i,2],example9_1[i,3],model$fitted.values[i])
}
mtext(expression(hat(y)==2343.8916+5.6735%*%example9_1$广告支出),cex=0.7,side=1,line=-6,adj=0.75)
arrows(600,4900,550,5350,code = 2,angle = 15,length = 0.08)
pre_model=predict(model)
con_int=predict(model,data.frame(广告支出=example9_1$广告支出),interval='confidence',level=0.95)
pre_int=predict(model,data.frame(广告支出=example9_1$广告支出),interval='prediction',level=0.95)
data.frame(销售收入=example9_1$销售收入,点预测值=pre_model,置信下限=con_int[,2],置信上限=con_int[,3],预测下限=pre_int[,2],预测上限=pre_int[,3])
## \u9500\u552e\u6536\u5165 \u70b9\u9884\u6d4b\u503c \u7f6e\u4fe1\u4e0b\u9650
## 1 4597.5 4264.925 3998.348
## 2 6611.0 6945.066 6590.492
## 3 7349.3 6448.639 6168.060
## 4 5525.7 5260.049 5074.789
## 5 4675.9 4763.054 4552.697
## 6 4418.6 4762.487 4552.080
## 7 5845.4 6196.170 5948.675
## 8 7313.0 7151.013 6763.533
## 9 5035.4 5015.523 4822.893
## 10 4322.6 4578.100 4349.549
## 11 6389.5 6320.986 6057.644
## 12 4152.2 4011.888 3709.980
## 13 5544.8 4854.964 4652.116
## 14 6095.1 5946.538 5726.896
## 15 3626.2 3821.828 3491.525
## 16 3745.4 4074.296 3781.397
## 17 5121.8 5888.101 5674.071
## 18 5674.5 5747.967 5545.680
## 19 4256.6 4043.660 3746.360
## 20 5803.7 6008.946 5782.898
## \u7f6e\u4fe1\u4e0a\u9650 \u9884\u6d4b\u4e0b\u9650 \u9884\u6d4b\u4e0a\u9650
## 1 4531.501 3395.383 5134.467
## 2 7299.641 6044.643 7845.490
## 3 6729.217 5574.703 7322.575
## 4 5445.310 4411.897 6108.201
## 5 4973.412 3909.069 5617.039
## 6 4972.894 3908.490 5616.484
## 7 6443.664 5332.287 7060.053
## 8 7538.493 6237.130 8064.896
## 9 5208.154 4165.731 5865.315
## 10 4806.650 3719.452 5436.747
## 11 6584.328 5452.430 7189.542
## 12 4313.796 3130.873 4892.904
## 13 5057.813 4002.798 5707.131
## 14 6166.179 5090.218 6802.857
## 15 4152.130 2930.682 4712.973
## 16 4367.196 3196.327 4952.266
## 17 6102.132 5033.204 6742.998
## 18 5950.254 4895.934 6600.000
## 19 4340.960 3164.212 4923.107
## 20 6234.994 5150.961 6866.931
#绘制置信区间、预测区间图
model_1=lm(销售收入~广告支出,data=example9_1)
#所以下面都是对应起来的,区分下上述的model,原因详见predict函数,data.frame这部分还不能改变量
#所以杜绝中文名变量,从你我做起 XD
x_0=seq(min(example9_1$广告支出),max(example9_1$广告支出))
con_int_1=predict(model_1,data.frame(广告支出=x_0),interval='confidence',level=0.95)
pre_int_1=predict(model_1,data.frame(广告支出=x_0),interval='prediction',level=0.95)
par(cex=0.8,mai=c(0.7,0.7,0.1,0.1))
n=nrow(example9_1)
plot(example9_1$销售收入~example9_1$广告支出)
abline(model,lwd=2)
for (i in 1:n) {
segments(example9_1[i,3],example9_1[i,2],example9_1[i,3],model$fitted.values[i])
}
lines(x_0,y=con_int_1[,2],lty=5,lwd=2,col='blue')#如果使用之前的model,就会报错 Error in xy.coords(x, y) : 'x' and 'y' lengths differ
lines(x_0,y=con_int_1[,3],lty=5,lwd=2,col='blue')
lines(x_0,y=pre_int_1[,2],lty=6,lwd=2,col='red')
lines(x_0,y=pre_int_1[,3],lty=6,lwd=2,col='red')
# lty: The line type. Line types can either be specified as an integer
# (0=blank, 1=solid (default), 2=dashed, 3=dotted, 4=dotdash, 5=longdash, 6=twodash)
# or as one of the character strings "blank", "solid", "dashed", "dotted", "dotdash", "longdash", or "twodash", where "blank" uses ‘invisible lines’ (i.e., does not draw them).
legend(x='topleft',legend = c('回归线','置信区间','预测区间'),lty=c(1,5,6),col=c(1,4,2),lwd=2,cex=0.8)
#计算x某点的点预测值、置信区间、预测区间
x_1=data.frame(广告支出 =500)
predict(model_1,newdata = x_1)
## 1
## 5180.621
predict(model_1,data.frame(广告支出=500),interval='confidence',level=0.95)
## fit lwr upr
## 1 5180.621 4994.127 5367.115
predict(model_1,data.frame(广告支出=500),interval='prediction',level=0.95)
## fit lwr upr
## 1 5180.621 4332.199 6029.043
pre=fitted(model)
res=residuals(model)
zre=model$residuals/sqrt(deviance(model)/df.residual(model))
data.frame(销售收入=example9_1$销售收入,点预测值=pre,残差=res,标准化残差=zre)
## \u9500\u552e\u6536\u5165 \u70b9\u9884\u6d4b\u503c \u6b8b\u5dee
## 1 4597.5 4264.925 332.57536
## 2 6611.0 6945.066 -334.06646
## 3 7349.3 6448.639 900.66117
## 4 5525.7 5260.049 265.65073
## 5 4675.9 4763.054 -87.15430
## 6 4418.6 4762.487 -343.88696
## 7 5845.4 6196.170 -350.76993
## 8 7313.0 7151.013 161.98700
## 9 5035.4 5015.523 19.87679
## 10 4322.6 4578.100 -255.49955
## 11 6389.5 6320.986 68.51398
## 12 4152.2 4011.888 140.31161
## 13 5544.8 4854.964 689.83567
## 14 6095.1 5946.538 148.56225
## 15 3626.2 3821.828 -195.62753
## 16 3745.4 4074.296 -328.89643
## 17 5121.8 5888.101 -766.30113
## 18 5674.5 5747.967 -73.46670
## 19 4256.6 4043.660 212.94024
## 20 5803.7 6008.946 -205.24580
## \u6807\u51c6\u5316\u6b8b\u5dee
## 1 0.8441934
## 2 -0.8479784
## 3 2.2861954
## 4 0.6743151
## 5 -0.2212283
## 6 -0.8729063
## 7 -0.8903777
## 8 0.4111801
## 9 0.0504543
## 10 -0.6485479
## 11 0.1739126
## 12 0.3561603
## 13 1.7510460
## 14 0.3771033
## 15 -0.4965716
## 16 -0.8348550
## 17 -1.9451423
## 18 -0.1864844
## 19 0.5405174
## 20 -0.5209861
load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch9\\example9_1.RData")
model=lm(example9_1$销售收入~example9_1$广告支出)
library(car)
crPlots(model)
par(mfrow=c(2,2),cex=0.8,cex.main=0.7)
plot(model)
load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch9\\example9_1.RData")
model=lm(example9_1$销售收入~example9_1$广告支出)
library(car)
ncvTest(model)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 1.126441, Df = 1, p = 0.28854
\(p = 0.28854 > 0.05\) ,故不拒绝原假设,可认为模型符合方差齐性。
#绘制散布-水平图
spreadLevelPlot(model)
##
## Suggested power transformation: 0.8730215
load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch9\\example9_1.RData")
model=lm(example9_1$销售收入~example9_1$广告支出)
library(car)
durbinWatsonTest(model)
## lag Autocorrelation D-W Statistic p-value
## 1 0.1330482 1.679232 0.506
## Alternative hypothesis: rho != 0