lapply(c("car","lmtest"),library,character.only=T)[[1]]
## Loading required package: carData
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## [1] "car" "carData" "stats" "graphics" "grDevices" "utils"
## [7] "datasets" "methods" "base"
library(readxl)
lapply(c("tidyverse","rvest","kableExtra"),library,character.only=T)[[1]]
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some() masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
##
## Attaching package: 'rvest'
##
##
## The following object is masked from 'package:readr':
##
## guess_encoding
##
##
##
## Attaching package: 'kableExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## group_rows
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "readxl" "lmtest"
## [13] "zoo" "car" "carData" "stats" "graphics" "grDevices"
## [19] "utils" "datasets" "methods" "base"
library(readxl)
pizza <- read_excel("C:/Users/user/Downloads/Pizza Price/pizza_v2.xlsx", sheet = "Sheet2" )
View(pizza)
Pada data kolom size telah dikoding ulang sehingga menjadi data ordinal dengan ketentuan small = 1 medium = 2 reguler = 3 large = 4 XL = 5 jumbo = 6
Selain itu, data kolom extra sauce, extra cheese, dan extra mushroom awalnya berupa yes or no, sehingga diubah menjadi data dummy dengan ketentuan yes = 1 no = 0
pizza$company <- as.factor(pizza$company)
pizza$topping <- as.factor(pizza$topping)
df <- pizza%>%mutate(company=relevel(pizza$company, ref = "A"));
df <- pizza%>%mutate(topping=relevel(pizza$topping, ref = "beef"));
df$company;df$price_rupiah;df$diameter;df$topping;df$variant;df$size_ordinal;df$extra_sauce;df$extra_cheese;df$extra_mushrooms
## [1] A A A A A A A A A A A A A A A A A A A A A A A A A B B B B B B B B B B B B
## [38] B B B B B B B B B B B B C C C C C C C C C C C C C C C C C C C C C C C C C
## [75] C C C C C D D D D D D D D D D D D D D D D D D D D E E E E E E E E E E E E
## [112] E E E E E E E E E E E E E E E E E
## Levels: A B C D E
## [1] 235000 198000 120000 155000 248000 140000 110000 70000 90000 90000
## [11] 140000 110000 70000 90000 90000 140000 110000 70000 90000 90000
## [21] 230000 188000 114000 149000 149000 23500 46000 72000 49000 83000
## [31] 96000 31000 69000 93000 75000 115000 123000 33000 46000 72000
## [41] 76000 119000 126500 75000 46000 72000 49000 83000 96000 39000
## [51] 72000 99000 44000 78000 100000 39000 72000 99000 35000 60000
## [61] 98000 35000 60000 98000 44000 78000 100000 28000 51000 84000
## [71] 39000 72000 99000 35000 60000 98000 32000 54000 92000 140000
## [81] 110000 70000 90000 90000 230000 188000 114000 149000 149000 23500
## [91] 46000 72000 49000 83000 96000 31000 69000 93000 75000 115000
## [101] 23500 46000 72000 49000 83000 96000 31000 69000 93000 115000
## [111] 123000 33000 46000 72000 76000 119000 126500 75000 46000 72000
## [121] 49000 83000 96000 39000 72000 99000 44000 78000
## [1] 22.0 20.0 16.0 14.0 18.0 18.5 16.0 8.0 12.0 12.0 18.5 16.0 8.0 12.0 12.0
## [16] 18.5 16.0 8.0 12.0 12.0 22.0 18.5 14.0 16.5 16.5 8.5 12.0 14.0 12.0 17.0
## [31] 12.0 8.5 12.0 14.0 12.0 17.0 12.0 8.5 12.0 14.0 12.0 17.0 12.0 8.5 12.0
## [46] 14.0 12.0 17.0 12.0 8.5 12.0 14.0 8.5 12.0 14.0 8.5 12.0 14.0 8.5 12.0
## [61] 14.0 8.5 12.0 14.0 8.5 12.0 14.0 8.5 12.0 14.0 8.5 12.0 14.0 8.5 12.0
## [76] 14.0 8.5 12.0 14.0 18.5 16.0 8.0 12.0 12.0 22.0 18.5 14.0 16.5 16.5 8.5
## [91] 12.0 14.0 12.0 17.0 12.0 8.5 12.0 14.0 12.0 17.0 8.5 12.0 14.0 12.0 17.0
## [106] 12.0 8.5 12.0 14.0 17.0 12.0 8.5 12.0 14.0 12.0 17.0 12.0 8.5 12.0 14.0
## [121] 12.0 17.0 12.0 8.5 12.0 14.0 8.5 12.0
## [1] chicken papperoni mushrooms smoked beef mozzarella
## [6] black papper smoked beef papperoni mushrooms smoked beef
## [11] mozzarella black papper smoked beef black papper mozzarella
## [16] mozzarella smoked beef chicken mushrooms mozzarella
## [21] chicken mushrooms chicken smoked beef chicken
## [26] mozzarella chicken smoked beef mozzarella chicken
## [31] chicken mushrooms chicken mushrooms mushrooms
## [36] mozzarella smoked beef chicken mozzarella chicken
## [41] chicken mushrooms smoked beef smoked beef mushrooms
## [46] mozzarella smoked beef chicken mushrooms tuna
## [51] tuna tuna meat meat meat
## [56] sausage sausage sausage mushrooms onion
## [61] mozzarella meat meat meat vegetables
## [66] vegetables vegetables vegetables vegetables vegetables
## [71] vegetables vegetables vegetables beef beef
## [76] beef tuna tuna tuna mozzarella
## [81] smoked beef chicken mushrooms mozzarella chicken
## [86] mushrooms chicken smoked beef chicken mozzarella
## [91] chicken smoked beef mozzarella chicken chicken
## [96] mushrooms chicken mushrooms mushrooms mozzarella
## [101] mozzarella chicken smoked beef mozzarella chicken
## [106] chicken mushrooms chicken mushrooms mozzarella
## [111] smoked beef chicken mozzarella chicken chicken
## [116] mushrooms smoked beef smoked beef mushrooms mozzarella
## [121] smoked beef chicken mushrooms tuna tuna
## [126] tuna meat meat
## 12 Levels: beef black papper chicken meat mozzarella mushrooms ... vegetables
## Warning: Unknown or uninitialised column: `variant`.
## NULL
## [1] 6 6 3 3 6 6 6 3 3 3 6 6 3 3 3 6 6 3 3 3 6 6 3 3 3 1 2 4 2 5 2 1 2 4 2 5 2
## [38] 1 2 4 2 5 2 1 2 4 2 5 2 1 2 4 1 2 4 1 2 4 1 2 4 1 2 4 1 2 4 1 2 4 1 2 4 1
## [75] 2 4 1 2 4 6 6 3 3 3 6 6 3 3 3 1 2 4 2 5 2 1 2 4 2 5 1 2 4 2 5 2 1 2 4 5 2
## [112] 1 2 4 2 5 2 1 2 4 2 5 2 1 2 4 1 2
## [1] 1 1 1 1 1 0 0 0 1 0 0 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 0 0 0 0 1
## [38] 0 1 1 0 1 1 1 1 1 0 0 0 1 1 1 1 0 1 0 1 0 1 1 1 1 1 1 0 0 0 0 0 1 0 1 0 1
## [75] 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 0 0 0 0 1 1 1 0 0 0 1 0 0 0 1
## [112] 0 1 1 0 1 1 1 1 1 0 0 0 1 1 1 1 0
## [1] 1 1 1 0 0 0 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1
## [38] 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 0 0 0
## [75] 0 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 1 1
## [112] 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 0 0
## [1] 0 0 1 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1
## [38] 1 1 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1 1 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 1 1 1
## [75] 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 0 1 1 1 1
## [112] 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1
model <- lm(price_rupiah~company+diameter+topping+size_ordinal+extra_sauce+extra_cheese+extra_mushrooms, data=df)
summary(model)
##
## Call:
## lm(formula = price_rupiah ~ company + diameter + topping + size_ordinal +
## extra_sauce + extra_cheese + extra_mushrooms, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41621 -13511 586 10113 91721
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -44747.3 21805.4 -2.052 0.04260 *
## companyB -32489.7 7318.3 -4.440 2.20e-05 ***
## companyC -33290.6 11601.1 -2.870 0.00495 **
## companyD -20257.6 7332.5 -2.763 0.00675 **
## companyE -31945.5 7188.3 -4.444 2.16e-05 ***
## diameter 12016.9 1402.3 8.569 8.61e-14 ***
## toppingblack papper -13976.3 22452.5 -0.622 0.53495
## toppingchicken 500.7 17750.1 0.028 0.97755
## toppingmeat 8124.8 15606.2 0.521 0.60371
## toppingmozzarella -4965.3 17419.4 -0.285 0.77616
## toppingmushrooms 2586.5 17759.7 0.146 0.88448
## toppingonion -10326.3 26812.3 -0.385 0.70090
## toppingpapperoni 20778.9 24009.8 0.865 0.38874
## toppingsausage 12966.5 19319.2 0.671 0.50356
## toppingsmoked beef 12656.8 17843.0 0.709 0.47966
## toppingtuna 870.8 16021.3 0.054 0.95676
## toppingvegetables 11143.6 15743.1 0.708 0.48058
## size_ordinal -3951.8 3015.6 -1.310 0.19285
## extra_sauce 10433.1 4514.8 2.311 0.02276 *
## extra_cheese 1632.2 4780.9 0.341 0.73347
## extra_mushrooms 2964.9 4249.4 0.698 0.48687
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22470 on 107 degrees of freedom
## Multiple R-squared: 0.7884, Adjusted R-squared: 0.7488
## F-statistic: 19.93 on 20 and 107 DF, p-value: < 2.2e-16
Rsq_awal = summary(model)$r.squared
akurasi_awal = AIC(model)
Didapat R-sq model awal sebesar 0,7884 yang mana cukup tinggi, artinya peubah yang dipilih telah mewakili keragaman harga pizza sebesar 78,84% Sedangkan didapat AIC model awal sebesar 2949,442
uji.normal<-function(x, object.name="x", graph=TRUE, graph.transformed=TRUE){
lapply(c("fitdistrplus", "kSamples", "rcompanion"), library, character.only=T)
if(any(x<0))x<-x-min(x)+1
mean <- fitdist(x, "norm")$estimate[1]; sd <- fitdist(x, "norm")$estimate[2]
uji<-ks.test(x, "pnorm", mean=mean, sd=sd)
uji1<- ad.test(x, rnorm(length(x), mean=mean, sd=sd))
pvalue<-uji$p.value
PVALUE1<-uji1$ad[1,3]
PVALUE2<-uji1$ad[2,3]
t<-transformTukey(x,quiet = TRUE,plotit = FALSE)
pt<-ks.test(t, "pnorm", mean=fitdist(t,"norm")$estimate[1],
sd=fitdist(t,"norm")$estimate[2])$p.value
lambda<-transformTukey(x,returnLambda =TRUE,quiet=TRUE,plotit = FALSE)
if(graph==TRUE){
if(graph.transformed==FALSE){
par(mfrow=c(1,2))
hist(x, freq=F, col="steelblue", border="white",
main=paste("Histogram of ",object.name),xlab=object.name)
lines(density(x),lwd=2, col="coral")
qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
}
else{
par(mfrow=c(2,2))
hist(x, freq=F, col="steelblue", border="white",
main=paste("Histogram of ",object.name),xlab=object.name)
lines(density(x),lwd=2, col="coral")
hist(t, main=paste("Histogram of ",object.name,"transformed"),
xlab=paste(object.name,"transformed"), freq=F,
col="steelblue",border = "white")
lines(density(t),lwd=2, col="coral")
qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
qqnorm(t, col="coral");qqline(t,col="steelblue", lwd=2)
}
}
z<-ifelse((PVALUE1>=0.05 & PVALUE2<0.05 ||PVALUE1<0.05 & PVALUE2>=0.05),
ifelse(pvalue>=0.05,
return(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Terima H0, data menyebar normal")),
return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Tolak H0, data tidak menyebar normal"),
`lambda transformasi`=lambda,
`Data Hasil Transformasi Tukey`= t,
`Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt,
`Keputusan`=ifelse(pt>=0.05,
"Terima H0, data menyebar normal",
"Tolak H0, data tidak menyebar normal"))))),
ifelse(((pvalue >= 0.05)&(PVALUE1 >= 0.05||PVALUE2>= 0.05)),
return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Terima H0, data menyebar normal"),
`Hasil Uji Anderson`=
data.frame(`P-Value`=
rbind(`Versi 1`=PVALUE1, `Versi 2`=PVALUE2),
Keputusan=rep("Terima H0, data menyebar normal", 2)))),
ifelse((pvalue >= 0.05&(PVALUE1 < 0.05||PVALUE2 < 0.05)),
return(list(`Hasil Uji Kolmogorov Smirnov`= data.frame(`P-Value`=pvalue,
Keputusan="Terima H0, data menyebar normal"),
`Hasil Uji Anderson`=data.frame(`P-Value`=
rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2),
Keputusan= rep("Tolak H0, data tidak menyebar normal",2)),
`lambda transformasi`=lambda,
`Data Hasil Transformasi Tukey`= t,
`Setelah transformasi~Uji Kolmogorov-Smirnov`=
data.frame(`P-Value`=pt,
`Keputusan`=ifelse(pt>=0.05,
"Terima H0, data menyebar normal",
"Tolak H0, data tidak menyebar normal")))),
ifelse(pvalue < 0.05&(PVALUE1 >= 0.05||PVALUE2 >= 0.05),
return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Tolak H0, data tidak menyebar normal"),
`Hasil Uji Anderson`=data.frame(`P-Value`= rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2),
Keputusan=rep("Terima H0, data menyebar normal",2)),
`lambda transformasi`=lambda,
`Data Hasil Transformasi Tukey`= t,
`Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt,
`Keputusan`=ifelse(pt>=0.05,
"Terima H0, data menyebar normal",
"Tolak H0, data tidak menyebar normal")))),
return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Tolak H0, data tidak menyebar normal"),
`Hasil Uji Anderson`=data.frame(`P-Value`=rbind(`Versi 1`=PVALUE1,
`Versi 2`=PVALUE2),
Keputusan=rep("Tolak H0, data tidak menyebar normal",2)),
`lambda transformasi`=lambda,
`Data Hasil Transformasi Tukey`= t,
`Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt,
`Keputusan`=ifelse(pt>=0.05,
"Terima H0, data menyebar normal",
"Tolak H0, data tidak menyebar normal"))))))))
return(z)
}
uji.normal1<-function(x, object.name="x", graph=TRUE, graph.transformed=TRUE){
lapply(c("fitdistrplus", "kSamples", "rcompanion"), library, character.only=T)
if(any(x<0))x<-x-min(x)+1
mean <- mean(x); sd <- sd(x)
uji<-ks.test(x, "pnorm", mean=mean, sd=sd)
uji1<- ad.test(x, rnorm(length(x), mean=mean, sd=sd))
pvalue<-uji$p.value
PVALUE1<-uji1$ad[1,3]
PVALUE2<-uji1$ad[2,3]
t<-transformTukey(x,quiet = TRUE,plotit = FALSE)
pt<-ks.test(t, "pnorm", mean=mean(t),
sd=sd(t))$p.value
lambda<-transformTukey(x,returnLambda =TRUE,quiet=TRUE,plotit = FALSE)
if(graph==TRUE){
if(graph.transformed==FALSE){
par(mfrow=c(1,2))
hist(x, freq=F, col="steelblue", border="white",
main=paste("Histogram of ",object.name),xlab=object.name)
lines(density(x),lwd=2, col="coral")
qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
}
else{
par(mfrow=c(2,2))
hist(x, freq=F, col="steelblue", border="white",
main=paste("Histogram of ",object.name),xlab=object.name)
lines(density(x),lwd=2, col="coral")
hist(t, main=paste("Histogram of ",object.name,"transformed"),
xlab=paste(object.name,"transformed"), freq=F,
col="steelblue",border = "white")
lines(density(t),lwd=2, col="coral")
qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
qqnorm(t, col="coral");qqline(t,col="steelblue", lwd=2)
}
}
z<-ifelse((PVALUE1>=0.05 & PVALUE2<0.05 ||PVALUE1<0.05 & PVALUE2>=0.05),
ifelse(pvalue>=0.05,
return(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Terima H0, data menyebar normal")),
return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Tolak H0, data tidak menyebar normal"),
`lambda transformasi`=lambda,
`Data Hasil Transformasi Tukey`= t,
`Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt,
`Keputusan`=ifelse(pt>=0.05,
"Terima H0, data menyebar normal",
"Tolak H0, data tidak menyebar normal"))))),
ifelse(((pvalue >= 0.05)&(PVALUE1 >= 0.05||PVALUE2>= 0.05)),
return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Terima H0, data menyebar normal"),
`Hasil Uji Anderson`=
data.frame(`P-Value`=
rbind(`Versi 1`=PVALUE1, `Versi 2`=PVALUE2),
Keputusan=rep("Terima H0, data menyebar normal", 2)))),
ifelse((pvalue >= 0.05&(PVALUE1 < 0.05||PVALUE2 < 0.05)),
return(list(`Hasil Uji Kolmogorov Smirnov`= data.frame(`P-Value`=pvalue,
Keputusan="Terima H0, data menyebar normal"),
`Hasil Uji Anderson`=data.frame(`P-Value`=
rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2),
Keputusan= rep("Tolak H0, data tidak menyebar normal",2)),
`lambda transformasi`=lambda,
`Data Hasil Transformasi Tukey`= t,
`Setelah transformasi~Uji Kolmogorov-Smirnov`=
data.frame(`P-Value`=pt,
`Keputusan`=ifelse(pt>=0.05,
"Terima H0, data menyebar normal",
"Tolak H0, data tidak menyebar normal")))),
ifelse(pvalue < 0.05&(PVALUE1 >= 0.05||PVALUE2 >= 0.05),
return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Tolak H0, data tidak menyebar normal"),
`Hasil Uji Anderson`=data.frame(`P-Value`= rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2),
Keputusan=rep("Terima H0, data menyebar normal",2)),
`lambda transformasi`=lambda,
`Data Hasil Transformasi Tukey`= t,
`Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt,
`Keputusan`=ifelse(pt>=0.05,
"Terima H0, data menyebar normal",
"Tolak H0, data tidak menyebar normal")))),
return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Tolak H0, data tidak menyebar normal"),
`Hasil Uji Anderson`=data.frame(`P-Value`=rbind(`Versi 1`=PVALUE1,
`Versi 2`=PVALUE2),
Keputusan=rep("Tolak H0, data tidak menyebar normal",2)),
`lambda transformasi`=lambda,
`Data Hasil Transformasi Tukey`= t,
`Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt,
`Keputusan`=ifelse(pt>=0.05,
"Terima H0, data menyebar normal",
"Tolak H0, data tidak menyebar normal"))))))))
return(z)
}
library(fitdistrplus)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: survival
library(kSamples)
## Loading required package: SuppDists
library(rcompanion)
uji.normal(model$residuals,"residuals",graph.transformed = F)
## Warning in ks.test.default(x, "pnorm", mean = mean, sd = sd): ties should not
## be present for the Kolmogorov-Smirnov test
## Warning in ks.test.default(t, "pnorm", mean = fitdist(t, "norm")$estimate[1], :
## ties should not be present for the Kolmogorov-Smirnov test
## $`Hasil Uji Kolmogorov Smirnov`
## P.Value Keputusan
## 1 0.4507046 Terima H0, data menyebar normal
##
## $`Hasil Uji Anderson`
## P.Value Keputusan
## Versi 1 0.33204 Terima H0, data menyebar normal
## Versi 2 0.33500 Terima H0, data menyebar normal
Didapat p-value sebesar 0,4507046 sehingga terima H0 : data menyebar normal
ks.test(model$residuals, "pnorm", mean = mean(model$residuals), sd = sd(model$residuals))
## Warning in ks.test.default(model$residuals, "pnorm", mean =
## mean(model$residuals), : ties should not be present for the Kolmogorov-Smirnov
## test
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: model$residuals
## D = 0.076694, p-value = 0.4388
## alternative hypothesis: two-sided
berdasarkan test Kolmogorov-Smirnov jufa didapat p-value sebesar 0,4388 sehingga dapat disimpulkan bahwa data menyebar normal
par(mfrow=c(1,2));plot(model,c(1,3))
## Warning: not plotting observations with leverage one:
## 60
gqtest(model)
##
## Goldfeld-Quandt test
##
## data: model
## GQ = 0.89713, df1 = 43, df2 = 43, p-value = 0.6382
## alternative hypothesis: variance increases from segment 1 to 2
Berdasarkan Goldfeld-Quandt test didapatkan p-Value sebesar 0,6382 sehingga dapat disimpulkan bahwa ragam sisaan homogen (homoskedastisitas)
bgtest(model)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: model
## LM test = 4.8112, df = 1, p-value = 0.02828
Berdasarkan uji Breusch-Godfrey didapat p-value sebesar 0,02828 sehingga tolak H0 dan dapat diartikan bahwa terjadi pelanggaran autokorelasi data
library(car)
vif(model)
## GVIF Df GVIF^(1/(2*Df))
## company 6.978501 4 1.274883
## diameter 5.334613 1 2.309678
## topping 10.322380 11 1.111939
## size_ordinal 5.833645 1 2.415294
## extra_sauce 1.246202 1 1.116334
## extra_cheese 1.292380 1 1.136829
## extra_mushrooms 1.130514 1 1.063256
Terjadi multikolinearitas pada peubah topping karena nilai VIF>10. Hal ini dapat diatasi pada pemilihan peubah menggunakan metode regresi Ridge Lasso yang umumnya digunakan dalam penanganan data multikolinearitas
t.test(model$residuals,mu=0,conf.level = 0.95)
##
## One Sample t-test
##
## data: model$residuals
## t = 4.5334e-16, df = 127, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -3607.797 3607.797
## sample estimates:
## mean of x
## 8.265333e-13
Didapat p-value = 1, yaitu lebih besar dari 0,05 sehingga terima H0 : nilai sisaan sama dengan nol
library(TTR)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'forecast'
## The following object is masked from 'package:rcompanion':
##
## accuracy
library(lmtest)
library(dplyr)
library(orcutt)
library(HoRM)
library(ggplot2)
library(corrplot)
## corrplot 0.92 loaded
modelCO<-cochrane.orcutt(model)
modelCO
## Cochrane-orcutt estimation for first order autocorrelation
##
## Call:
## lm(formula = price_rupiah ~ company + diameter + topping + size_ordinal +
## extra_sauce + extra_cheese + extra_mushrooms, data = df)
##
## number of interaction: 15
## rho 0.282698
##
## Durbin-Watson statistic
## (original): 1.64631 , p-value: 2.04e-03
## (transformed): 1.90974 , p-value: 1.473e-01
##
## coefficients:
## (Intercept) companyB companyC companyD
## -34638.2944 -31767.7828 -30144.7238 -19349.2729
## companyE diameter toppingblack papper toppingchicken
## -30951.3866 11086.0482 -14093.6812 5891.1825
## toppingmeat toppingmozzarella toppingmushrooms toppingonion
## 6518.4344 2222.6240 2969.1165 -9731.8315
## toppingpapperoni toppingsausage toppingsmoked beef toppingtuna
## 28482.1711 14328.1802 16417.1007 991.1608
## toppingvegetables size_ordinal extra_sauce extra_cheese
## 10227.3735 -3373.6892 7629.9762 -2397.5474
## extra_mushrooms
## 2892.2826
bgtest(modelCO)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: modelCO
## LM test = 0.30658, df = 1, p-value = 0.5798
Setelah dilakukan penanganan menggunakan Cochrane Orcutt dan dites kembali menggunakan uji Breusch-Godfrey, dapat dilihat bahwa data sudah tidak ada pelanggaran autokorelasi
library(leaps)
forward<-regsubsets(price_rupiah~company+diameter+topping+size_ordinal+extra_sauce+extra_cheese+extra_mushrooms, nvmax=20 #banyak peubah yang muncul saat model awal;
,data=df,method="forward")
sf<-summary(forward);sf
## Subset selection object
## Call: regsubsets.formula(price_rupiah ~ company + diameter + topping +
## size_ordinal + extra_sauce + extra_cheese + extra_mushrooms,
## nvmax = 20, data = df, method = "forward")
## 20 Variables (and intercept)
## Forced in Forced out
## companyB FALSE FALSE
## companyC FALSE FALSE
## companyD FALSE FALSE
## companyE FALSE FALSE
## diameter FALSE FALSE
## toppingblack papper FALSE FALSE
## toppingchicken FALSE FALSE
## toppingmeat FALSE FALSE
## toppingmozzarella FALSE FALSE
## toppingmushrooms FALSE FALSE
## toppingonion FALSE FALSE
## toppingpapperoni FALSE FALSE
## toppingsausage FALSE FALSE
## toppingsmoked beef FALSE FALSE
## toppingtuna FALSE FALSE
## toppingvegetables FALSE FALSE
## size_ordinal FALSE FALSE
## extra_sauce FALSE FALSE
## extra_cheese FALSE FALSE
## extra_mushrooms FALSE FALSE
## 1 subsets of each size up to 20
## Selection Algorithm: forward
## companyB companyC companyD companyE diameter toppingblack papper
## 1 ( 1 ) " " " " " " " " "*" " "
## 2 ( 1 ) " " " " " " " " "*" " "
## 3 ( 1 ) " " " " " " " " "*" " "
## 4 ( 1 ) " " " " " " " " "*" " "
## 5 ( 1 ) "*" " " " " " " "*" " "
## 6 ( 1 ) "*" " " " " "*" "*" " "
## 7 ( 1 ) "*" "*" " " "*" "*" " "
## 8 ( 1 ) "*" "*" "*" "*" "*" " "
## 9 ( 1 ) "*" "*" "*" "*" "*" " "
## 10 ( 1 ) "*" "*" "*" "*" "*" " "
## 11 ( 1 ) "*" "*" "*" "*" "*" "*"
## 12 ( 1 ) "*" "*" "*" "*" "*" "*"
## 13 ( 1 ) "*" "*" "*" "*" "*" "*"
## 14 ( 1 ) "*" "*" "*" "*" "*" "*"
## 15 ( 1 ) "*" "*" "*" "*" "*" "*"
## 16 ( 1 ) "*" "*" "*" "*" "*" "*"
## 17 ( 1 ) "*" "*" "*" "*" "*" "*"
## 18 ( 1 ) "*" "*" "*" "*" "*" "*"
## 19 ( 1 ) "*" "*" "*" "*" "*" "*"
## 20 ( 1 ) "*" "*" "*" "*" "*" "*"
## toppingchicken toppingmeat toppingmozzarella toppingmushrooms
## 1 ( 1 ) " " " " " " " "
## 2 ( 1 ) " " " " " " " "
## 3 ( 1 ) " " " " " " " "
## 4 ( 1 ) " " " " " " " "
## 5 ( 1 ) " " " " " " " "
## 6 ( 1 ) " " " " " " " "
## 7 ( 1 ) " " " " " " " "
## 8 ( 1 ) " " " " " " " "
## 9 ( 1 ) " " " " " " " "
## 10 ( 1 ) " " " " "*" " "
## 11 ( 1 ) " " " " "*" " "
## 12 ( 1 ) " " " " "*" " "
## 13 ( 1 ) " " " " "*" " "
## 14 ( 1 ) " " " " "*" " "
## 15 ( 1 ) " " " " "*" " "
## 16 ( 1 ) " " "*" "*" " "
## 17 ( 1 ) " " "*" "*" " "
## 18 ( 1 ) " " "*" "*" "*"
## 19 ( 1 ) " " "*" "*" "*"
## 20 ( 1 ) "*" "*" "*" "*"
## toppingonion toppingpapperoni toppingsausage toppingsmoked beef
## 1 ( 1 ) " " " " " " " "
## 2 ( 1 ) " " " " " " " "
## 3 ( 1 ) " " " " " " "*"
## 4 ( 1 ) " " "*" " " "*"
## 5 ( 1 ) " " "*" " " "*"
## 6 ( 1 ) " " "*" " " "*"
## 7 ( 1 ) " " "*" " " "*"
## 8 ( 1 ) " " "*" " " "*"
## 9 ( 1 ) " " "*" " " "*"
## 10 ( 1 ) " " "*" " " "*"
## 11 ( 1 ) " " "*" " " "*"
## 12 ( 1 ) "*" "*" " " "*"
## 13 ( 1 ) "*" "*" " " "*"
## 14 ( 1 ) "*" "*" " " "*"
## 15 ( 1 ) "*" "*" "*" "*"
## 16 ( 1 ) "*" "*" "*" "*"
## 17 ( 1 ) "*" "*" "*" "*"
## 18 ( 1 ) "*" "*" "*" "*"
## 19 ( 1 ) "*" "*" "*" "*"
## 20 ( 1 ) "*" "*" "*" "*"
## toppingtuna toppingvegetables size_ordinal extra_sauce extra_cheese
## 1 ( 1 ) " " " " " " " " " "
## 2 ( 1 ) " " " " " " "*" " "
## 3 ( 1 ) " " " " " " "*" " "
## 4 ( 1 ) " " " " " " "*" " "
## 5 ( 1 ) " " " " " " "*" " "
## 6 ( 1 ) " " " " " " "*" " "
## 7 ( 1 ) " " " " " " "*" " "
## 8 ( 1 ) " " " " " " "*" " "
## 9 ( 1 ) " " " " "*" "*" " "
## 10 ( 1 ) " " " " "*" "*" " "
## 11 ( 1 ) " " " " "*" "*" " "
## 12 ( 1 ) " " " " "*" "*" " "
## 13 ( 1 ) " " " " "*" "*" " "
## 14 ( 1 ) " " "*" "*" "*" " "
## 15 ( 1 ) " " "*" "*" "*" " "
## 16 ( 1 ) " " "*" "*" "*" " "
## 17 ( 1 ) " " "*" "*" "*" "*"
## 18 ( 1 ) " " "*" "*" "*" "*"
## 19 ( 1 ) "*" "*" "*" "*" "*"
## 20 ( 1 ) "*" "*" "*" "*" "*"
## extra_mushrooms
## 1 ( 1 ) " "
## 2 ( 1 ) " "
## 3 ( 1 ) " "
## 4 ( 1 ) " "
## 5 ( 1 ) " "
## 6 ( 1 ) " "
## 7 ( 1 ) " "
## 8 ( 1 ) " "
## 9 ( 1 ) " "
## 10 ( 1 ) " "
## 11 ( 1 ) " "
## 12 ( 1 ) " "
## 13 ( 1 ) "*"
## 14 ( 1 ) "*"
## 15 ( 1 ) "*"
## 16 ( 1 ) "*"
## 17 ( 1 ) "*"
## 18 ( 1 ) "*"
## 19 ( 1 ) "*"
## 20 ( 1 ) "*"
par(mfrow=c(2,2))
plot(sf$rsq, xlab="Jumlah Variabel", ylab="R2",type="l");points(1:20,sf$rsq,col=ifelse(sf$rsq==sf$rsq[which.max(sf$rsq)],"coral","steelblue"),cex=1.5,pch=16)
plot(sf$adjr2, xlab="Jumlah Variabel", ylab="Adj R2",type="l");points(1:20,sf$adjr2,col=ifelse(sf$adjr2==sf$adjr2[which.max(sf$adjr2)],"coral","steelblue"),cex=1.5,pch=16)
plot(sf$bic, xlab="Jumlah Variabel", ylab="BIC",type="l");points(1:20,sf$bic,col=ifelse(sf$bic==sf$bic[which.min(sf$bic)],"coral","steelblue"),cex=1.5,pch=16)
plot(sf$cp, xlab="Jumlah Variabel", ylab="CP",type="l");points(1:20,sf$cp,col=ifelse(sf$cp==sf$cp[which.min(sf$cp)],"coral","steelblue"),cex=1.5,pch=16)
Tampak berdasarkan plot adj R sq akan mencapai maksimal ketika 11
variabel
coef(forward,11)
## (Intercept) companyB companyC companyD
## -37140.813 -33372.057 -29656.775 -20941.642
## companyE diameter toppingblack papper toppingmozzarella
## -32212.970 11846.424 -17535.108 -7166.178
## toppingpapperoni toppingsmoked beef size_ordinal extra_sauce
## 17558.137 10085.357 -3716.072 8910.129
Berdasarkan model forward dengan 11 peubah terbaik yang akan memberikan nilai adj R sq terbesar terhadap peubah bebas harga pizza adalah peubah company B, company C, company D, company E, diameter, topping black papper, topping mozarella, topping papperoni, topping smoked beef, size, dan extra sauce
for_summary <- summary(forward)
# Menghitung AIC secara manual
n <- nrow(df) # Jumlah pengamatan
k <- for_summary$which[1] # Jumlah variabel yang dipilih dalam model terbaik
# Menghitung AIC
aic_for <- n * log(for_summary$rss[1] / n) + 2 * k
aic_for
## [1] 2595.677
#rsquared
rsq_for <- summary(forward)$rsq[which.max(summary(forward)$rsq)]
rsq_for
## [1] 0.7884006
Didapat nilai R sq 0,7884006 dan nilai AIC sebesar 2595,677
backward<-regsubsets(price_rupiah~company+diameter+topping+size_ordinal+extra_sauce+extra_cheese+extra_mushrooms, nvmax=20 #banyak peubah yang muncul saat model awal;
,data=df,method="backward")
sb<-summary(backward);sb
## Subset selection object
## Call: regsubsets.formula(price_rupiah ~ company + diameter + topping +
## size_ordinal + extra_sauce + extra_cheese + extra_mushrooms,
## nvmax = 20, data = df, method = "backward")
## 20 Variables (and intercept)
## Forced in Forced out
## companyB FALSE FALSE
## companyC FALSE FALSE
## companyD FALSE FALSE
## companyE FALSE FALSE
## diameter FALSE FALSE
## toppingblack papper FALSE FALSE
## toppingchicken FALSE FALSE
## toppingmeat FALSE FALSE
## toppingmozzarella FALSE FALSE
## toppingmushrooms FALSE FALSE
## toppingonion FALSE FALSE
## toppingpapperoni FALSE FALSE
## toppingsausage FALSE FALSE
## toppingsmoked beef FALSE FALSE
## toppingtuna FALSE FALSE
## toppingvegetables FALSE FALSE
## size_ordinal FALSE FALSE
## extra_sauce FALSE FALSE
## extra_cheese FALSE FALSE
## extra_mushrooms FALSE FALSE
## 1 subsets of each size up to 20
## Selection Algorithm: backward
## companyB companyC companyD companyE diameter toppingblack papper
## 1 ( 1 ) " " " " " " " " "*" " "
## 2 ( 1 ) "*" " " " " " " "*" " "
## 3 ( 1 ) "*" " " " " "*" "*" " "
## 4 ( 1 ) "*" "*" " " "*" "*" " "
## 5 ( 1 ) "*" "*" "*" "*" "*" " "
## 6 ( 1 ) "*" "*" "*" "*" "*" " "
## 7 ( 1 ) "*" "*" "*" "*" "*" " "
## 8 ( 1 ) "*" "*" "*" "*" "*" "*"
## 9 ( 1 ) "*" "*" "*" "*" "*" "*"
## 10 ( 1 ) "*" "*" "*" "*" "*" "*"
## 11 ( 1 ) "*" "*" "*" "*" "*" "*"
## 12 ( 1 ) "*" "*" "*" "*" "*" "*"
## 13 ( 1 ) "*" "*" "*" "*" "*" "*"
## 14 ( 1 ) "*" "*" "*" "*" "*" "*"
## 15 ( 1 ) "*" "*" "*" "*" "*" "*"
## 16 ( 1 ) "*" "*" "*" "*" "*" "*"
## 17 ( 1 ) "*" "*" "*" "*" "*" "*"
## 18 ( 1 ) "*" "*" "*" "*" "*" "*"
## 19 ( 1 ) "*" "*" "*" "*" "*" "*"
## 20 ( 1 ) "*" "*" "*" "*" "*" "*"
## toppingchicken toppingmeat toppingmozzarella toppingmushrooms
## 1 ( 1 ) " " " " " " " "
## 2 ( 1 ) " " " " " " " "
## 3 ( 1 ) " " " " " " " "
## 4 ( 1 ) " " " " " " " "
## 5 ( 1 ) " " " " " " " "
## 6 ( 1 ) " " " " " " " "
## 7 ( 1 ) " " " " "*" " "
## 8 ( 1 ) " " " " "*" " "
## 9 ( 1 ) " " " " "*" " "
## 10 ( 1 ) " " " " "*" " "
## 11 ( 1 ) " " " " "*" " "
## 12 ( 1 ) " " " " "*" " "
## 13 ( 1 ) " " " " "*" " "
## 14 ( 1 ) " " "*" "*" " "
## 15 ( 1 ) " " "*" "*" " "
## 16 ( 1 ) " " "*" "*" " "
## 17 ( 1 ) " " "*" "*" " "
## 18 ( 1 ) " " "*" "*" "*"
## 19 ( 1 ) " " "*" "*" "*"
## 20 ( 1 ) "*" "*" "*" "*"
## toppingonion toppingpapperoni toppingsausage toppingsmoked beef
## 1 ( 1 ) " " " " " " " "
## 2 ( 1 ) " " " " " " " "
## 3 ( 1 ) " " " " " " " "
## 4 ( 1 ) " " " " " " " "
## 5 ( 1 ) " " " " " " " "
## 6 ( 1 ) " " " " " " " "
## 7 ( 1 ) " " " " " " " "
## 8 ( 1 ) " " " " " " " "
## 9 ( 1 ) " " " " " " "*"
## 10 ( 1 ) " " " " " " "*"
## 11 ( 1 ) " " "*" " " "*"
## 12 ( 1 ) " " "*" " " "*"
## 13 ( 1 ) " " "*" "*" "*"
## 14 ( 1 ) " " "*" "*" "*"
## 15 ( 1 ) " " "*" "*" "*"
## 16 ( 1 ) "*" "*" "*" "*"
## 17 ( 1 ) "*" "*" "*" "*"
## 18 ( 1 ) "*" "*" "*" "*"
## 19 ( 1 ) "*" "*" "*" "*"
## 20 ( 1 ) "*" "*" "*" "*"
## toppingtuna toppingvegetables size_ordinal extra_sauce extra_cheese
## 1 ( 1 ) " " " " " " " " " "
## 2 ( 1 ) " " " " " " " " " "
## 3 ( 1 ) " " " " " " " " " "
## 4 ( 1 ) " " " " " " " " " "
## 5 ( 1 ) " " " " " " " " " "
## 6 ( 1 ) " " " " " " "*" " "
## 7 ( 1 ) " " " " " " "*" " "
## 8 ( 1 ) " " " " " " "*" " "
## 9 ( 1 ) " " " " " " "*" " "
## 10 ( 1 ) " " " " "*" "*" " "
## 11 ( 1 ) " " " " "*" "*" " "
## 12 ( 1 ) " " "*" "*" "*" " "
## 13 ( 1 ) " " "*" "*" "*" " "
## 14 ( 1 ) " " "*" "*" "*" " "
## 15 ( 1 ) " " "*" "*" "*" " "
## 16 ( 1 ) " " "*" "*" "*" " "
## 17 ( 1 ) " " "*" "*" "*" "*"
## 18 ( 1 ) " " "*" "*" "*" "*"
## 19 ( 1 ) "*" "*" "*" "*" "*"
## 20 ( 1 ) "*" "*" "*" "*" "*"
## extra_mushrooms
## 1 ( 1 ) " "
## 2 ( 1 ) " "
## 3 ( 1 ) " "
## 4 ( 1 ) " "
## 5 ( 1 ) " "
## 6 ( 1 ) " "
## 7 ( 1 ) " "
## 8 ( 1 ) " "
## 9 ( 1 ) " "
## 10 ( 1 ) " "
## 11 ( 1 ) " "
## 12 ( 1 ) " "
## 13 ( 1 ) " "
## 14 ( 1 ) " "
## 15 ( 1 ) "*"
## 16 ( 1 ) "*"
## 17 ( 1 ) "*"
## 18 ( 1 ) "*"
## 19 ( 1 ) "*"
## 20 ( 1 ) "*"
par(mfrow=c(2,2))
plot(sb$rsq, xlab="Jumlah Variabel", ylab="R2",type="l");points(1:20,sb$rsq,col=ifelse(sb$rsq==sb$rsq[which.max(sb$rsq)],"coral","steelblue"),cex=1.5,pch=16)
plot(sb$adjr2, xlab="Jumlah Variabel", ylab="Adj R2",type="l");points(1:20,sb$adjr2,col=ifelse(sb$adjr2==sb$adjr2[which.max(sb$adjr2)],"coral","steelblue"),cex=1.5,pch=16)
plot(sb$bic, xlab="Jumlah Variabel", ylab="BIC",type="l");points(1:20,sb$bic,col=ifelse(sb$bic==sb$bic[which.min(sb$bic)],"coral","steelblue"),cex=1.5,pch=16)
plot(sb$cp, xlab="Jumlah Variabel", ylab="CP",type="l");points(1:20,sb$cp,col=ifelse(sb$cp==sb$cp[which.min(sb$cp)],"coral","steelblue"),cex=1.5,pch=16)
Tampak berdasarkan plot adj R sq akan mencapai maksimal ketika 11
variabel
coef(backward,11)
## (Intercept) companyB companyC companyD
## -37140.813 -33372.057 -29656.775 -20941.642
## companyE diameter toppingblack papper toppingmozzarella
## -32212.970 11846.424 -17535.108 -7166.178
## toppingpapperoni toppingsmoked beef size_ordinal extra_sauce
## 17558.137 10085.357 -3716.072 8910.129
Berdasarkan model backward dengan 11 peubah terbaik yang akan memberikan nilai adj R sq terbesar terhadap peubah bebas harga pizza adalah peubah company B, company C, company D, company E, diameter, topping black papper, topping mozarella, topping papperoni, topping smoked beef, size, dan extra sauce
back_summary <- summary(backward)
# Menghitung AIC secara manual
n <- nrow(pizza) # Jumlah pengamatan
k <- back_summary$which[1] # Jumlah variabel yang dipilih dalam model terbaik
# Menghitung AIC
aic_back <- n * log(back_summary$rss[1] / n) + 2 * k
aic_back
## [1] 2595.677
#rsquared
rsq_back <- summary(backward)$rsq[which.max(summary(backward)$rsq)]
rsq_back
## [1] 0.7884006
Didapat nilai R sq 0,7884006 dan nilai AIC sebesar 2595,677
library(MASS)
step <- step(model, direction = "both")
## Start: AIC=2584.19
## price_rupiah ~ company + diameter + topping + size_ordinal +
## extra_sauce + extra_cheese + extra_mushrooms
##
## Df Sum of Sq RSS AIC
## - topping 11 5.5932e+09 5.9629e+10 2574.8
## - extra_cheese 1 5.8861e+07 5.4095e+10 2582.3
## - extra_mushrooms 1 2.4584e+08 5.4282e+10 2582.8
## <none> 5.4036e+10 2584.2
## - size_ordinal 1 8.6725e+08 5.4904e+10 2584.2
## - extra_sauce 1 2.6968e+09 5.6733e+10 2588.4
## - company 4 1.2585e+10 6.6621e+10 2603.0
## - diameter 1 3.7084e+10 9.1121e+10 2649.1
##
## Step: AIC=2574.8
## price_rupiah ~ company + diameter + size_ordinal + extra_sauce +
## extra_cheese + extra_mushrooms
##
## Df Sum of Sq RSS AIC
## - extra_cheese 1 7.4040e+04 5.9630e+10 2572.8
## - extra_mushrooms 1 7.2581e+07 5.9702e+10 2573.0
## - size_ordinal 1 8.9091e+08 6.0520e+10 2574.7
## <none> 5.9629e+10 2574.8
## - extra_sauce 1 2.9952e+09 6.2625e+10 2579.1
## + topping 11 5.5932e+09 5.4036e+10 2584.2
## - company 4 1.5875e+10 7.5504e+10 2597.0
## - diameter 1 3.8393e+10 9.8022e+10 2636.4
##
## Step: AIC=2572.8
## price_rupiah ~ company + diameter + size_ordinal + extra_sauce +
## extra_mushrooms
##
## Df Sum of Sq RSS AIC
## - extra_mushrooms 1 7.3111e+07 5.9703e+10 2571.0
## - size_ordinal 1 9.1983e+08 6.0549e+10 2572.8
## <none> 5.9630e+10 2572.8
## + extra_cheese 1 7.4040e+04 5.9629e+10 2574.8
## - extra_sauce 1 3.0043e+09 6.2634e+10 2577.1
## + topping 11 5.5344e+09 5.4095e+10 2582.3
## - company 4 1.5990e+10 7.5620e+10 2595.2
## - diameter 1 3.9002e+10 9.8632e+10 2635.2
##
## Step: AIC=2570.96
## price_rupiah ~ company + diameter + size_ordinal + extra_sauce
##
## Df Sum of Sq RSS AIC
## <none> 5.9703e+10 2571.0
## - size_ordinal 1 9.4745e+08 6.0650e+10 2571.0
## + extra_mushrooms 1 7.3111e+07 5.9630e+10 2572.8
## + extra_cheese 1 6.0360e+05 5.9702e+10 2573.0
## - extra_sauce 1 3.0589e+09 6.2762e+10 2575.3
## + topping 11 5.3712e+09 5.4331e+10 2580.9
## - company 4 1.6166e+10 7.5868e+10 2593.6
## - diameter 1 3.9160e+10 9.8862e+10 2633.5
summary(step)
##
## Call:
## lm(formula = price_rupiah ~ company + diameter + size_ordinal +
## extra_sauce, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40555 -17008 -1573 11627 85192
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -35211 12314 -2.859 0.00501 **
## companyB -33454 6872 -4.868 3.46e-06 ***
## companyC -30965 6644 -4.660 8.24e-06 ***
## companyD -21417 6869 -3.118 0.00228 **
## companyE -32722 6576 -4.976 2.19e-06 ***
## diameter 11739 1323 8.872 8.22e-15 ***
## size_ordinal -3899 2826 -1.380 0.17016
## extra_sauce 10115 4080 2.480 0.01454 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22310 on 120 degrees of freedom
## Multiple R-squared: 0.7662, Adjusted R-squared: 0.7526
## F-statistic: 56.18 on 7 and 120 DF, p-value: < 2.2e-16
aic_step = AIC(step)
summary(step)$r.squared
## [1] 0.7662117
Didapat nilai R sq 0,7662117 dan nilai AIC sebesar 2936.207
akurasi <- matrix(c(akurasi_awal,aic_for,aic_back,aic_step),nrow=1,ncol=4,byrow = T)
colnames(akurasi) <- c("Model Awal", "Model Forward", "Model Backward", "Model Stepwise")
row.names(akurasi) <- c("AIC")
akurasi
## Model Awal Model Forward Model Backward Model Stepwise
## AIC 2949.442 2595.677 2595.677 2936.207
Tampak bahwa nilai AIC terkecil didapat pada model forward dan backward Setelah ditelusuri didapat pula bahwa nilai Rsq model forward dan backward bahkan nilai koefisien regresi hingga peubah yang diambil pun sama persis
Dipilih model forward
#Model Terbaik (ambil forward karena rsq terbesar dan aic terkecil tapi nilainya sama dengan backward)
forward_model <- stepAIC(model, direction = "forward")
## Start: AIC=2584.19
## price_rupiah ~ company + diameter + topping + size_ordinal +
## extra_sauce + extra_cheese + extra_mushrooms
summary(forward_model)
##
## Call:
## lm(formula = price_rupiah ~ company + diameter + topping + size_ordinal +
## extra_sauce + extra_cheese + extra_mushrooms, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41621 -13511 586 10113 91721
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -44747.3 21805.4 -2.052 0.04260 *
## companyB -32489.7 7318.3 -4.440 2.20e-05 ***
## companyC -33290.6 11601.1 -2.870 0.00495 **
## companyD -20257.6 7332.5 -2.763 0.00675 **
## companyE -31945.5 7188.3 -4.444 2.16e-05 ***
## diameter 12016.9 1402.3 8.569 8.61e-14 ***
## toppingblack papper -13976.3 22452.5 -0.622 0.53495
## toppingchicken 500.7 17750.1 0.028 0.97755
## toppingmeat 8124.8 15606.2 0.521 0.60371
## toppingmozzarella -4965.3 17419.4 -0.285 0.77616
## toppingmushrooms 2586.5 17759.7 0.146 0.88448
## toppingonion -10326.3 26812.3 -0.385 0.70090
## toppingpapperoni 20778.9 24009.8 0.865 0.38874
## toppingsausage 12966.5 19319.2 0.671 0.50356
## toppingsmoked beef 12656.8 17843.0 0.709 0.47966
## toppingtuna 870.8 16021.3 0.054 0.95676
## toppingvegetables 11143.6 15743.1 0.708 0.48058
## size_ordinal -3951.8 3015.6 -1.310 0.19285
## extra_sauce 10433.1 4514.8 2.311 0.02276 *
## extra_cheese 1632.2 4780.9 0.341 0.73347
## extra_mushrooms 2964.9 4249.4 0.698 0.48687
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22470 on 107 degrees of freedom
## Multiple R-squared: 0.7884, Adjusted R-squared: 0.7488
## F-statistic: 19.93 on 20 and 107 DF, p-value: < 2.2e-16
ks.test(forward_model$residuals, "pnorm",
mean = mean(forward_model$residuals),
sd = sd(forward_model$residuals))
## Warning in ks.test.default(forward_model$residuals, "pnorm", mean =
## mean(forward_model$residuals), : ties should not be present for the
## Kolmogorov-Smirnov test
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: forward_model$residuals
## D = 0.076694, p-value = 0.4388
## alternative hypothesis: two-sided
berdasarkan test Kolmogorov-Smirnov jufa didapat p-value sebesar 0,4388 sehingga dapat disimpulkan bahwa data menyebar normal
par(mfrow=c(1,2));plot(forward_model,c(1,3))
## Warning: not plotting observations with leverage one:
## 60
gqtest(forward_model)
##
## Goldfeld-Quandt test
##
## data: forward_model
## GQ = 0.89713, df1 = 43, df2 = 43, p-value = 0.6382
## alternative hypothesis: variance increases from segment 1 to 2
Berdasarkan Goldfeld-Quandt test didapatkan p-Value sebesar 0,6382 sehingga dapat disimpulkan bahwa ragam sisaan homogen (homoskedastisitas)
bgtest(forward_model)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: forward_model
## LM test = 4.8112, df = 1, p-value = 0.02828
Berdasarkan uji Breusch-Godfrey didapat p-value sebesar 0,02828 sehingga tolak H0 dan dapat diartikan bahwa terjadi pelanggaran autokorelasi data
library(car)
vif(forward_model)
## GVIF Df GVIF^(1/(2*Df))
## company 6.978501 4 1.274883
## diameter 5.334613 1 2.309678
## topping 10.322380 11 1.111939
## size_ordinal 5.833645 1 2.415294
## extra_sauce 1.246202 1 1.116334
## extra_cheese 1.292380 1 1.136829
## extra_mushrooms 1.130514 1 1.063256
Terjadi multikolinearitas pada peubah topping karena nilai VIF>10. Hal ini dapat diatasi pada pemilihan peubah menggunakan metode regresi Ridge Lasso yang umumnya digunakan dalam penanganan data multikolinearitas
t.test(model$residuals,mu=0,conf.level = 0.95)
##
## One Sample t-test
##
## data: model$residuals
## t = 4.5334e-16, df = 127, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -3607.797 3607.797
## sample estimates:
## mean of x
## 8.265333e-13
Didapat p-value = 1, yaitu lebih besar dari 0,05 sehingga terima H0 : nilai sisaan sama dengan nol
library(TTR)
library(forecast)
library(lmtest)
library(dplyr)
library(orcutt)
library(HoRM)
library(ggplot2)
library(corrplot)
modelCO.for<-cochrane.orcutt(forward_model)
modelCO.for
## Cochrane-orcutt estimation for first order autocorrelation
##
## Call:
## lm(formula = price_rupiah ~ company + diameter + topping + size_ordinal +
## extra_sauce + extra_cheese + extra_mushrooms, data = df)
##
## number of interaction: 15
## rho 0.282698
##
## Durbin-Watson statistic
## (original): 1.64631 , p-value: 2.04e-03
## (transformed): 1.90974 , p-value: 1.473e-01
##
## coefficients:
## (Intercept) companyB companyC companyD
## -34638.2944 -31767.7828 -30144.7238 -19349.2729
## companyE diameter toppingblack papper toppingchicken
## -30951.3866 11086.0482 -14093.6812 5891.1825
## toppingmeat toppingmozzarella toppingmushrooms toppingonion
## 6518.4344 2222.6240 2969.1165 -9731.8315
## toppingpapperoni toppingsausage toppingsmoked beef toppingtuna
## 28482.1711 14328.1802 16417.1007 991.1608
## toppingvegetables size_ordinal extra_sauce extra_cheese
## 10227.3735 -3373.6892 7629.9762 -2397.5474
## extra_mushrooms
## 2892.2826
bgtest(modelCO.for)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: modelCO.for
## LM test = 0.30658, df = 1, p-value = 0.5798
Setelah dilakukan penanganan menggunakan Cochrane Orcutt dan dites kembali menggunakan uji Breusch-Godfrey, dapat dilihat bahwa data sudah tidak ada pelanggaran autokorelasi
library(glmnet)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loaded glmnet 4.1-7
Berdasarkan model forward dengan 11 peubah terbaik yang akan memberikan nilai adj R sq terbesar terhadap peubah bebas harga pizza adalah peubah company B, company C, company D, company E, diameter, topping black papper, topping mozarella, topping papperoni, topping smoked beef, size, dan extra sauce
Yang mana ke 11 peubah itu dari peubah company, diameter, topping, size_ordinal, dan extra sauce
x <- data.matrix(pizza[,c('company','diameter','topping','size_ordinal','extra_sauce')])
y <- pizza$price_rupiah
alphas <- c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1) # Ganti dengan nilai alpha yang sesuai
rss_values <- numeric(length(alphas))
num_parameters <- numeric(length(alphas))
for (i in 1:length(alphas)) {
model <- glmnet(x, y, alpha=alphas[i])
y_pred <- predict(model, newx=x, s=0) # s=0 untuk mendapatkan model Ridge
rss <- sum((y - y_pred)^2)
k <- sum(model$beta != 0) + 1 # Jumlah parameter termasuk intercept
rss_values[i] <- rss
num_parameters[i] <- k
}
n <- length(y) # Jumlah sampel
aic_values <- numeric(length(alphas))
for (i in 1:length(alphas)) {
aic <- n * log(rss_values[i] / n) + 2 * num_parameters[i]
aic_values[i] <- aic
}
best_alpha <- alphas[which.min(aic_values)]
best_alpha
## [1] 1
Karena alpha terbaik di 1 maka dipilih regresi lasso karena alpha yang digunakan regresi lasso sebesar 1
lasso<-cv.glmnet(x,y,alpha=1);plot(lasso)
best.ll<-lasso$lambda.min
bestlasso<-glmnet(x,y,alpha=1,lambda=best.ll);coef(bestlasso)
## 6 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) -53981.6321
## company -4407.3023
## diameter 11048.2153
## topping 716.8855
## size_ordinal .
## extra_sauce 11016.2986
rsq<-function(bestmodel,bestlambda,x,y){
#y duga
y.duga <- predict(bestmodel, s = bestlambda, newx = x)
#JKG dan JKT
jkt <- sum((y - mean(y))^2)
jkg <- sum((y.duga- y)^2)
#find R-Squared
rsq <- 1 - jkg/jkt
return(rsq)
}
rsq_lasso <- rsq(bestlasso,best.ll,x,y)
Didapat nilai Rsq regresi Lasso sebesar 0,7278051
# Cari aic lasso
alpha <- 1 # 0 untuk Lasso
lambda_seq <- seq(0.01, 1, by = 0.01) # Atur lambda sesuai kebutuhan
lasso_model <- glmnet(x, y, alpha = alpha, lambda = lambda_seq)
# Hitung RSS untuk setiap nilai lambda
rss_values <- rep(NA, length(lambda_seq))
for (i in 1:length(lambda_seq)) {
predictions <- predict(lasso_model, newx = x, s = lambda_seq[i])
residuals <- y - predictions
rss_values[i] <- sum(residuals^2)
}
# Hitung jumlah parameter
num_parameters <- sapply(coef(lasso_model, s = lambda_seq), function(x) sum(x != 0))
# Hitung AIC
n <- length(y) # Jumlah sampel
aic_values <- n * log(rss_values) + 2 * num_parameters
# Temukan lambda dengan AIC terkecil
best_lambda <- lambda_seq[which.min(aic_values)]
best_aic <- min(aic_values)
aic_lasso <- best_aic
aic_lasso
## [1] 3197.22
Serta didapat pula bahwa AIC regresi Lasso sebesar 3197,22
akurasi <- matrix(c(akurasi_awal,aic_for,aic_lasso,
Rsq_awal, rsq_for, rsq_lasso),nrow=2,ncol=3,byrow = T)
colnames(akurasi) <- c("Model Awal", "Model Forward", "Model Lasso")
row.names(akurasi) <- c("AIC", "Rsq")
akurasi
## Model Awal Model Forward Model Lasso
## AIC 2949.4423597 2595.6766765 3197.2203932
## Rsq 0.7884006 0.7884006 0.7277425
Berdasarkan tabel akurasi dapat dilihat bahwa Rsq terbesar didapat pada model awal dan model forward Sedangkan untuk nilai AIC terkecil didapat pada model forward(atau backward karena di awal didapat nilai AIC, Rsq, koefisien, bahkan peubah antara kedua model tersebut sama persis) Oleh karena itu, model terbaik dalam data pizza ini adalah model forward
Diduga model regresi Lasso tidak sepenuhnya baik karena multikolinearitas yang terjadi sangat kecil, memang lebih dari 10 namun hanya dikisaran 10,89 sehingga model lasso yang notabene bekerja sebagai penanganan multikolinearitas tidak memiliki Rsq yang tinggi