berikut kodingan saya
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom)
## Warning: package 'broom' was built under R version 4.3.3
library(datarium)
## Warning: package 'datarium' was built under R version 4.3.3
library(ggplot2)
data("marketing")
head(marketing, 3)
## youtube facebook newspaper sales
## 1 276.12 45.36 83.04 26.52
## 2 53.40 47.16 54.12 12.48
## 3 20.64 55.08 83.16 11.16
tail(marketing, 3)
## youtube facebook newspaper sales
## 198 212.40 11.16 7.68 15.36
## 199 340.32 50.40 79.44 30.60
## 200 278.52 10.32 10.44 16.08
model<-lm(sales ~ youtube, data = marketing)
model
##
## Call:
## lm(formula = sales ~ youtube, data = marketing)
##
## Coefficients:
## (Intercept) youtube
## 8.43911 0.04754
#Predicted regression line
model.diag.metrics<-augment(model)
ggplot(model.diag.metrics, aes(youtube, sales)) +
geom_point() +
stat_smooth(method=lm, se=FALSE) +
geom_segment(aes(xend=youtube, yend=.fitted), color = "pink", linewidth = 0.3)+
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
#Linearity
plot(model, 1)
#Homogeneity
plot(model, 3)
#Breush-Pagan Test
error2 <- residuals(model)^2
model.error<-lm(error2 ~ marketing$youtube)
chi_stat<-dim(marketing)[1]*summary(model.error)$r.square
p_value<-1-pchisq(chi_stat, df = 1)
cat("BP =",chi_stat, "; p-value =", p_value)
## BP = 48.03797 ; p-value = 4.180434e-12
#Compare Breush-Pagan Test
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.3
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
bptest(model)
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 48.038, df = 1, p-value = 4.18e-12
p_value<-1-pchisq(chi_stat, df=1)
cat("BP =",chi_stat, "; p-value =", p_value)
## BP = 48.03797 ; p-value = 4.180434e-12
#Transformation on sales
log.model<-lm(log(sales) ~ youtube, data = marketing)
log.model
##
## Call:
## lm(formula = log(sales) ~ youtube, data = marketing)
##
## Coefficients:
## (Intercept) youtube
## 2.189514 0.003156
#Check homogeneity of variance
bptest(log.model)
##
## studentized Breusch-Pagan test
##
## data: log.model
## BP = 1.1959, df = 1, p-value = 0.2742
#Normality
plot(log.model, 2)
#Homogenitas
plot(log.model, 3)
#Normality with Kolmogrov-Smirnov
error <- residuals(log.model)
ecdf_function <- ecdf(error)
t_cdf <- pnorm(error, mean = 0, sd = sd(error))
e_cdf <- ecdf_function(error)
D <- max(abs(e_cdf - t_cdf))
print(D)
## [1] 0.0658037
#Normality with ks.test
ks.test(error, "pnorm", 0, sd(error))
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: error
## D = 0.070804, p-value = 0.2686
## alternative hypothesis: two-sided
#Check normality
ks.test(error, "pnorm", 0, sd(residuals(model)))
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: error
## D = 0.45656, p-value < 2.2e-16
## alternative hypothesis: two-sided
#Autocorrelation
plot(log.model, 1)
#Durbin-Watson
n<-dim(marketing)[1]
r<-cor(error[-1], error[-n])
DW<-2*(1-r)
DW
## [1] 1.819105
#Check Autocorrelation
dU<-1.778
if(DW>dU && DW<4-dU){
cat("No Autocorrelation, DW =", DW)
} else {
cat("Autocorrelation exists, DW =", DW)
}
## No Autocorrelation, DW = 1.819105
#Comparison
dwtest(log.model)
##
## Durbin-Watson test
##
## data: log.model
## DW = 1.81, p-value = 0.08847
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(model)
##
## Durbin-Watson test
##
## data: model
## DW = 1.9347, p-value = 0.3213
## alternative hypothesis: true autocorrelation is greater than 0
#Penambahan variabel
model.mlr<-lm(sales ~ youtube + facebook + newspaper, data = marketing)
#Model baru
model.mlr
##
## Call:
## lm(formula = sales ~ youtube + facebook + newspaper, data = marketing)
##
## Coefficients:
## (Intercept) youtube facebook newspaper
## 3.526667 0.045765 0.188530 -0.001037
#Linearity
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.3.3
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
marketing$fit.r<-model.mlr$residuals
marketing$fit.p<-model.mlr$fitted.values
marketing %>%
melt(measure.vars = c("youtube", "facebook", "newspaper", "fit.p")) %>%
ggplot(aes(value, fit.r, group = variable)) +
geom_point(shape = 1) +
geom_smooth(method = loess) +
geom_hline(yintercept = 0) +
facet_wrap(~ variable, scales = "free")
## `geom_smooth()` using formula = 'y ~ x'
#Multicolinierity
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.3.3
## corrplot 0.95 loaded
corrplot.mixed(cor(marketing[,1:4]), upper = "square")
#Variance Inflation Factor
m_youtube<-lm(youtube ~ facebook + newspaper, data = marketing)
m_facebook<-lm(facebook ~ youtube + newspaper, data = marketing)
m_newspaper<-lm(newspaper ~ youtube + facebook, data = marketing)
R2_youtube<-summary(m_youtube)$r.sq
R2_facebook<-summary(m_facebook)$r.sq
R2_newspaper<-summary(m_newspaper)$r.sq
VIF_y<-1/(1-R2_youtube)
VIF_f<-1/(1-R2_facebook)
VIF_n<-1/(1-R2_newspaper)
#Check VIF
data.frame(Youtube = VIF_y,
Facebook = VIF_f,
Newspape = VIF_n)
## Youtube Facebook Newspape
## 1 1.004611 1.144952 1.145187
#Compare VIF dari R
library(car)
## Warning: package 'car' was built under R version 4.3.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.3.3
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
vif(model.mlr)
## youtube facebook newspaper
## 1.004611 1.144952 1.145187
#Penambahan variabel colinear
marketing$X4 <- 0.5*marketing$facebook + marketing$newspaper + rnorm(n,0,2)
model.update <- lm(sales ~ youtube + facebook + newspaper + X4, data = marketing)
model.update
##
## Call:
## lm(formula = sales ~ youtube + facebook + newspaper + X4, data = marketing)
##
## Coefficients:
## (Intercept) youtube facebook newspaper X4
## 3.51451 0.04581 0.20482 0.03097 -0.03197
#Check VIF dari X4
m_x4 <- lm(X4 ~ newspaper + youtube + facebook, data = marketing)
R2_x4 <- summary(m_x4)$r.square
1/(1- R2_x4)
## [1] 211.6354
#Compare VIF dari package car
vif(model.update)
## youtube facebook newspaper X4
## 1.009896 19.680838 155.251826 211.635408