所有使用過之套件、程式碼
簡報網址(請複製到分頁後開啟): “https://www.canva.com/design/DAFBtoqJOrc/35DgosMR7EJKAdaI9U-9Sg/view?utm_content=DAFBtoqJOrc&utm_campaign=designshare&utm_medium=link2&utm_source=sharebutton”
library(readxl)
library("olsrr")
library("tidyverse")
library(ggplot2)
library(ggfortify)
library(ggpmisc)
library(lindia)
library(MASS)
library(car)
library(nortest)
library(lmtest)
library(lawstat)
library(gvlma)
data1 = read_excel("C:/Users/yujen/Desktop/data.xlsx")
df = read_excel("finaltest.xlsx")#盒鬚圖x
ggplot(data1 ,aes(y=rate))+
geom_boxplot(col="navy",
outlier.colour ="steel blue"
,outlier.shape = 8
,outlier.size = 4)+coord_flip()+
scale_x_discrete() +
ylab("確診率") #盒鬚圖Y
summary(data1$netflixpremium) Min. 1st Qu. Median Mean 3rd Qu. Max.
9.26 13.54 14.20 15.14 17.68 20.32
library(ggplot2)
ggplot(data1 ,aes(y=netflixpremium),col = "blue")+
geom_boxplot( col="navy",
outlier.colour ="steel blue"
,outlier.shape = 8
,outlier.size = 4)+coord_flip()+
scale_x_discrete() +
ylab("Netflix_premium")lm = lm(formula =netflixpremium~rate,data = data1)
summary(lm) #完整的表
Call:
lm(formula = netflixpremium ~ rate, data = data1)
Residuals:
Min 1Q Median 3Q Max
-5.7604 -1.8029 0.3785 1.5189 4.3338
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 12.5031 0.6446 19.396 < 2e-16 ***
rate 12.7490 2.6022 4.899 1.13e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.506 on 48 degrees of freedom
Multiple R-squared: 0.3334, Adjusted R-squared: 0.3195
F-statistic: 24 on 1 and 48 DF, p-value: 1.135e-05
library(ggplot2)
ggplot(data1 ,aes(x =rate ,y =netflixpremium)) +
geom_point(color = "steel blue",size = 2)ggplot(data1, aes(x =rate, y =netflixpremium)) +
geom_point(color = "steel blue") +
labs( x="確診率", y="Netflix premium價格",
title="散布圖",subtitle="紅色為迴歸線",size =2) +
theme(plot.title=element_text(color = 'black', size = 12)) +
theme(axis.title=element_text(color='steel blue', size=12,)) +
theme(axis.line = element_line(colour="black", size = 0.5)) +
geom_smooth(method ='lm',formula =y~x,se=FALSE, color='red') +
stat_poly_eq(formula = y~x, eq.with.lhs = "italic(hat(y))~`=`~",
aes(label = paste(..eq.label.., ..rr.label.., sep = "*plain (\",\")~")),
parse=TRUE) data1$residuals =residuals(lm)
autoplot(lm)#Kolmogorov-Smirnov Test
lillie.test(data1$residuals)
Lilliefors (Kolmogorov-Smirnov) normality test
data: data1$residuals
D = 0.089007, p-value = 0.414
#ncvtest、bptest
ncvTest(lm)Non-constant Variance Score Test
Variance formula: ~ fitted.values
Chisquare = 0.8661806, Df = 1, p = 0.35201
bptest(lm)
studentized Breusch-Pagan test
data: lm
BP = 1.2033, df = 1, p-value = 0.2727
#dwtest
dwtest(lm,alternative = "two.sided")
Durbin-Watson test
data: lm
DW = 1.9278, p-value = 0.8058
alternative hypothesis: true autocorrelation is not 0
durbinWatsonTest(lm) lag Autocorrelation D-W Statistic p-value
1 -0.01901518 1.927758 0.826
Alternative hypothesis: rho != 0
sr_1 = lm(y~1,data = df)
sr2 = lm(formula = y~x1+D1+x2+x3+x4+x5+x6+x7+
x1:D1+x2:D1+x3:D1+x5:D1+x6:D1+x7:D1,data = df)
sr = lm(formula =y~x1+D1+x2+x3+x4+x5+x6+x7,data = df)
anova(sr, sr2)#檢定Analysis of Variance Table
Model 1: y ~ x1 + D1 + x2 + x3 + x4 + x5 + x6 + x7
Model 2: y ~ x1 + D1 + x2 + x3 + x4 + x5 + x6 + x7 + x1:D1 + x2:D1 + x3:D1 +
x5:D1 + x6:D1 + x7:D1
Res.Df RSS Df Sum of Sq F Pr(>F)
1 31 41.794
2 25 32.434 6 9.3601 1.2025 0.3376
anova(sr)#檢定Analysis of Variance Table
Response: y
Df Sum Sq Mean Sq F value Pr(>F)
x1 1 173.411 173.411 128.6252 1.461e-12 ***
D1 1 17.690 17.690 13.1217 0.00103 **
x2 1 42.376 42.376 31.4316 3.775e-06 ***
x3 1 9.919 9.919 7.3573 0.01080 *
x4 1 0.047 0.047 0.0352 0.85233
x5 1 1.390 1.390 1.0313 0.31772
x6 1 0.742 0.742 0.5502 0.46383
x7 1 0.058 0.058 0.0431 0.83687
Residuals 31 41.794 1.348
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(sr)#完整的表
Call:
lm(formula = y ~ x1 + D1 + x2 + x3 + x4 + x5 + x6 + x7, data = df)
Residuals:
Min 1Q Median 3Q Max
-2.2694 -0.7087 0.0573 0.7950 1.6789
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 12.03518 1.99408 6.035 1.11e-06 ***
x1 0.02438 0.01185 2.057 0.048194 *
D1 -1.93081 0.52917 -3.649 0.000959 ***
x2 0.30583 0.22537 1.357 0.184583
x3 -0.27785 0.11349 -2.448 0.020216 *
x4 -0.41932 3.59133 -0.117 0.907803
x5 -0.44076 0.36655 -1.202 0.238287
x6 0.22390 0.29123 0.769 0.447835
x7 -0.41963 2.02094 -0.208 0.836867
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.161 on 31 degrees of freedom
Multiple R-squared: 0.8546, Adjusted R-squared: 0.8171
F-statistic: 22.77 on 8 and 31 DF, p-value: 6.003e-11
coef(sr) #係數(Intercept) x1 D1 x2 x3
12.03517809 0.02438157 -1.93081071 0.30582900 -0.27785317
x4 x5 x6 x7
-0.41932383 -0.44076144 0.22389801 -0.41963160
##逐步法
stepAIC(sr_1,direction = "both",
scope = list(upper=sr)) #aic
stepAIC(sr_1,direction = "both",scope = list(upper=sr),
k = log(nrow(df))) #bic
##向前法
stepAIC(sr_1,direction = "forward",
scope = list(lower=sr_1,upper=sr)) #aic
stepAIC(sr_1,direction = "forward",
scope = list(lower=sr_1,upper=sr),k = log(nrow(df))) #bic
##向後法
stepAIC(sr, direction = "backward",
scope = list(upper=sr)) #aic
stepAIC(sr, direction = "backward",
scope = list(upper=sr),k = log(nrow(df))) #bic#全子集迴歸
allp = ols_step_all_possible(sr)
as.data.frame(allp)
plot(allp) #最佳子集迴歸
best = ols_step_best_subset(sr)
best
plot(best) lmfit = lm(y~x1+D1+x2+x3,data =df)
coef(lmfit)(Intercept) x1 D1 x2 x3
10.78579807 0.01961141 -2.03666535 0.45364282 -0.28968680
anova(lmfit)Analysis of Variance Table
Response: y
Df Sum Sq Mean Sq F value Pr(>F)
x1 1 173.411 173.411 137.8415 1.085e-13 ***
D1 1 17.690 17.690 14.0619 0.0006393 ***
x2 1 42.376 42.376 33.6838 1.401e-06 ***
x3 1 9.919 9.919 7.8845 0.0080986 **
Residuals 35 44.031 1.258
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vif(lmfit) x1 D1 x2 x3
3.511804 1.055721 3.242370 1.126611
autoplot(lmfit)summary(gvlma(lmfit))
Call:
lm(formula = y ~ x1 + D1 + x2 + x3, data = df)
Residuals:
Min 1Q Median 3Q Max
-2.52079 -0.64468 0.01549 0.89354 1.69804
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 10.78580 1.04684 10.303 3.86e-12 ***
x1 0.01961 0.01014 1.933 0.061335 .
D1 -2.03667 0.47956 -4.247 0.000152 ***
x2 0.45364 0.08347 5.435 4.30e-06 ***
x3 -0.28969 0.10317 -2.808 0.008099 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.122 on 35 degrees of freedom
Multiple R-squared: 0.8468, Adjusted R-squared: 0.8293
F-statistic: 48.37 on 4 and 35 DF, p-value: 8.726e-14
ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
Level of Significance = 0.05
Call:
gvlma(x = lmfit)
Value p-value Decision
Global Stat 2.5428 0.6370 Assumptions acceptable.
Skewness 1.6800 0.1949 Assumptions acceptable.
Kurtosis 0.1716 0.6787 Assumptions acceptable.
Link Function 0.4103 0.5218 Assumptions acceptable.
Heteroscedasticity 0.2809 0.5961 Assumptions acceptable.
##殘差分析
df$residuals =residuals(lmfit)
shapiro.test(df$residuals) #常態檢定p>0.05
lillie.test(df$residuals)
durbinWatsonTest(lmfit)#殘差獨立性檢定 dw test介於0~4 1.5~2.5 p>0.05
ncvTest(lmfit) #變異數同質性 p>0.05
bptest(lmfit) #變異數同質性 p>0.05
##boxcox
library("trafo")
bc = boxcox(lmfit, lambda="estim",method= "ml", lambdarange = c(-5,5),plotit = T)
bc
##Cook D
gg_cooksd(lmfit,label = T,show.threshold = T,
threshold = "convention",scale.factor = 0.5)