Library

lapply(c("car","lmtest"),library,character.only=T)[[1]]

## Loading required package: carData

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## [1] "car"       "carData"   "stats"     "graphics"  "grDevices" "utils"    
## [7] "datasets"  "methods"   "base"

library(readxl)
lapply(c("tidyverse","rvest","kableExtra"),library,character.only=T)[[1]]

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some()   masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## 
## Attaching package: 'rvest'
## 
## 
## The following object is masked from 'package:readr':
## 
##     guess_encoding
## 
## 
## 
## Attaching package: 'kableExtra'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "readxl"    "lmtest"   
## [13] "zoo"       "car"       "carData"   "stats"     "graphics"  "grDevices"
## [19] "utils"     "datasets"  "methods"   "base"

Input data

library(readxl)
pizza <- read_excel("C:/Users/user/Downloads/Pizza Price/pizza_v2.xlsx", sheet = "Sheet2" ) 
View(pizza)

Pada data kolom size telah dikoding ulang sehingga menjadi data ordinal dengan ketentuan small = 1 medium = 2 reguler = 3 large = 4 XL = 5 jumbo = 6

Selain itu, data kolom extra sauce, extra cheese, dan extra mushroom awalnya berupa yes or no, sehingga diubah menjadi data dummy dengan ketentuan yes = 1 no = 0

Faktor

pizza$company <- as.factor(pizza$company)
pizza$topping <- as.factor(pizza$topping)

Relevel

df <- pizza%>%mutate(company=relevel(pizza$company, ref = "A"));
df <- pizza%>%mutate(topping=relevel(pizza$topping, ref = "beef"));

df$company;df$price_rupiah;df$diameter;df$topping;df$variant;df$size_ordinal;df$extra_sauce;df$extra_cheese;df$extra_mushrooms

##   [1] A A A A A A A A A A A A A A A A A A A A A A A A A B B B B B B B B B B B B
##  [38] B B B B B B B B B B B B C C C C C C C C C C C C C C C C C C C C C C C C C
##  [75] C C C C C D D D D D D D D D D D D D D D D D D D D E E E E E E E E E E E E
## [112] E E E E E E E E E E E E E E E E E
## Levels: A B C D E

##   [1] 235000 198000 120000 155000 248000 140000 110000  70000  90000  90000
##  [11] 140000 110000  70000  90000  90000 140000 110000  70000  90000  90000
##  [21] 230000 188000 114000 149000 149000  23500  46000  72000  49000  83000
##  [31]  96000  31000  69000  93000  75000 115000 123000  33000  46000  72000
##  [41]  76000 119000 126500  75000  46000  72000  49000  83000  96000  39000
##  [51]  72000  99000  44000  78000 100000  39000  72000  99000  35000  60000
##  [61]  98000  35000  60000  98000  44000  78000 100000  28000  51000  84000
##  [71]  39000  72000  99000  35000  60000  98000  32000  54000  92000 140000
##  [81] 110000  70000  90000  90000 230000 188000 114000 149000 149000  23500
##  [91]  46000  72000  49000  83000  96000  31000  69000  93000  75000 115000
## [101]  23500  46000  72000  49000  83000  96000  31000  69000  93000 115000
## [111] 123000  33000  46000  72000  76000 119000 126500  75000  46000  72000
## [121]  49000  83000  96000  39000  72000  99000  44000  78000

##   [1] 22.0 20.0 16.0 14.0 18.0 18.5 16.0  8.0 12.0 12.0 18.5 16.0  8.0 12.0 12.0
##  [16] 18.5 16.0  8.0 12.0 12.0 22.0 18.5 14.0 16.5 16.5  8.5 12.0 14.0 12.0 17.0
##  [31] 12.0  8.5 12.0 14.0 12.0 17.0 12.0  8.5 12.0 14.0 12.0 17.0 12.0  8.5 12.0
##  [46] 14.0 12.0 17.0 12.0  8.5 12.0 14.0  8.5 12.0 14.0  8.5 12.0 14.0  8.5 12.0
##  [61] 14.0  8.5 12.0 14.0  8.5 12.0 14.0  8.5 12.0 14.0  8.5 12.0 14.0  8.5 12.0
##  [76] 14.0  8.5 12.0 14.0 18.5 16.0  8.0 12.0 12.0 22.0 18.5 14.0 16.5 16.5  8.5
##  [91] 12.0 14.0 12.0 17.0 12.0  8.5 12.0 14.0 12.0 17.0  8.5 12.0 14.0 12.0 17.0
## [106] 12.0  8.5 12.0 14.0 17.0 12.0  8.5 12.0 14.0 12.0 17.0 12.0  8.5 12.0 14.0
## [121] 12.0 17.0 12.0  8.5 12.0 14.0  8.5 12.0

##   [1] chicken      papperoni    mushrooms    smoked beef  mozzarella  
##   [6] black papper smoked beef  papperoni    mushrooms    smoked beef 
##  [11] mozzarella   black papper smoked beef  black papper mozzarella  
##  [16] mozzarella   smoked beef  chicken      mushrooms    mozzarella  
##  [21] chicken      mushrooms    chicken      smoked beef  chicken     
##  [26] mozzarella   chicken      smoked beef  mozzarella   chicken     
##  [31] chicken      mushrooms    chicken      mushrooms    mushrooms   
##  [36] mozzarella   smoked beef  chicken      mozzarella   chicken     
##  [41] chicken      mushrooms    smoked beef  smoked beef  mushrooms   
##  [46] mozzarella   smoked beef  chicken      mushrooms    tuna        
##  [51] tuna         tuna         meat         meat         meat        
##  [56] sausage      sausage      sausage      mushrooms    onion       
##  [61] mozzarella   meat         meat         meat         vegetables  
##  [66] vegetables   vegetables   vegetables   vegetables   vegetables  
##  [71] vegetables   vegetables   vegetables   beef         beef        
##  [76] beef         tuna         tuna         tuna         mozzarella  
##  [81] smoked beef  chicken      mushrooms    mozzarella   chicken     
##  [86] mushrooms    chicken      smoked beef  chicken      mozzarella  
##  [91] chicken      smoked beef  mozzarella   chicken      chicken     
##  [96] mushrooms    chicken      mushrooms    mushrooms    mozzarella  
## [101] mozzarella   chicken      smoked beef  mozzarella   chicken     
## [106] chicken      mushrooms    chicken      mushrooms    mozzarella  
## [111] smoked beef  chicken      mozzarella   chicken      chicken     
## [116] mushrooms    smoked beef  smoked beef  mushrooms    mozzarella  
## [121] smoked beef  chicken      mushrooms    tuna         tuna        
## [126] tuna         meat         meat        
## 12 Levels: beef black papper chicken meat mozzarella mushrooms ... vegetables

## Warning: Unknown or uninitialised column: `variant`.

## NULL

##   [1] 6 6 3 3 6 6 6 3 3 3 6 6 3 3 3 6 6 3 3 3 6 6 3 3 3 1 2 4 2 5 2 1 2 4 2 5 2
##  [38] 1 2 4 2 5 2 1 2 4 2 5 2 1 2 4 1 2 4 1 2 4 1 2 4 1 2 4 1 2 4 1 2 4 1 2 4 1
##  [75] 2 4 1 2 4 6 6 3 3 3 6 6 3 3 3 1 2 4 2 5 2 1 2 4 2 5 1 2 4 2 5 2 1 2 4 5 2
## [112] 1 2 4 2 5 2 1 2 4 2 5 2 1 2 4 1 2

##   [1] 1 1 1 1 1 0 0 0 1 0 0 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 0 0 0 0 1
##  [38] 0 1 1 0 1 1 1 1 1 0 0 0 1 1 1 1 0 1 0 1 0 1 1 1 1 1 1 0 0 0 0 0 1 0 1 0 1
##  [75] 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 0 0 0 0 1 1 1 0 0 0 1 0 0 0 1
## [112] 0 1 1 0 1 1 1 1 1 0 0 0 1 1 1 1 0

##   [1] 1 1 1 0 0 0 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1
##  [38] 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 0 0 0
##  [75] 0 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 1 1
## [112] 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 0 0

##   [1] 0 0 1 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1 1 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 1 1 1
##  [75] 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 0 1 1 1 1
## [112] 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1

Model Regresi Awal

model <- lm(price_rupiah~company+diameter+topping+size_ordinal+extra_sauce+extra_cheese+extra_mushrooms, data=df)
summary(model)

## 
## Call:
## lm(formula = price_rupiah ~ company + diameter + topping + size_ordinal + 
##     extra_sauce + extra_cheese + extra_mushrooms, data = df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -41621 -13511    586  10113  91721 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -44747.3    21805.4  -2.052  0.04260 *  
## companyB            -32489.7     7318.3  -4.440 2.20e-05 ***
## companyC            -33290.6    11601.1  -2.870  0.00495 ** 
## companyD            -20257.6     7332.5  -2.763  0.00675 ** 
## companyE            -31945.5     7188.3  -4.444 2.16e-05 ***
## diameter             12016.9     1402.3   8.569 8.61e-14 ***
## toppingblack papper -13976.3    22452.5  -0.622  0.53495    
## toppingchicken         500.7    17750.1   0.028  0.97755    
## toppingmeat           8124.8    15606.2   0.521  0.60371    
## toppingmozzarella    -4965.3    17419.4  -0.285  0.77616    
## toppingmushrooms      2586.5    17759.7   0.146  0.88448    
## toppingonion        -10326.3    26812.3  -0.385  0.70090    
## toppingpapperoni     20778.9    24009.8   0.865  0.38874    
## toppingsausage       12966.5    19319.2   0.671  0.50356    
## toppingsmoked beef   12656.8    17843.0   0.709  0.47966    
## toppingtuna            870.8    16021.3   0.054  0.95676    
## toppingvegetables    11143.6    15743.1   0.708  0.48058    
## size_ordinal         -3951.8     3015.6  -1.310  0.19285    
## extra_sauce          10433.1     4514.8   2.311  0.02276 *  
## extra_cheese          1632.2     4780.9   0.341  0.73347    
## extra_mushrooms       2964.9     4249.4   0.698  0.48687    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22470 on 107 degrees of freedom
## Multiple R-squared:  0.7884, Adjusted R-squared:  0.7488 
## F-statistic: 19.93 on 20 and 107 DF,  p-value: < 2.2e-16

Rsq_awal = summary(model)$r.squared
akurasi_awal = AIC(model)

Didapat R-sq model awal sebesar 0,7884 yang mana cukup tinggi, artinya peubah yang dipilih telah mewakili keragaman harga pizza sebesar 78,84% Sedangkan didapat AIC model awal sebesar 2949,442

Asumsi Klasik

Uji Asumsi Normalitas

uji.normal<-function(x, object.name="x", graph=TRUE, graph.transformed=TRUE){
  lapply(c("fitdistrplus", "kSamples", "rcompanion"), library, character.only=T) 
  if(any(x<0))x<-x-min(x)+1
  mean <- fitdist(x, "norm")$estimate[1]; sd <- fitdist(x, "norm")$estimate[2]
  uji<-ks.test(x, "pnorm", mean=mean, sd=sd)
  uji1<- ad.test(x, rnorm(length(x), mean=mean, sd=sd))
  pvalue<-uji$p.value
  PVALUE1<-uji1$ad[1,3]
  PVALUE2<-uji1$ad[2,3]
  t<-transformTukey(x,quiet = TRUE,plotit = FALSE)
  pt<-ks.test(t, "pnorm", mean=fitdist(t,"norm")$estimate[1], 
              sd=fitdist(t,"norm")$estimate[2])$p.value
  lambda<-transformTukey(x,returnLambda =TRUE,quiet=TRUE,plotit = FALSE)
  if(graph==TRUE){
    if(graph.transformed==FALSE){
      par(mfrow=c(1,2))
      hist(x, freq=F, col="steelblue", border="white", 
           main=paste("Histogram of ",object.name),xlab=object.name)
      lines(density(x),lwd=2, col="coral")
      qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
    }
    else{
      par(mfrow=c(2,2))
      hist(x, freq=F, col="steelblue", border="white", 
           main=paste("Histogram of ",object.name),xlab=object.name)
      lines(density(x),lwd=2, col="coral")
      hist(t, main=paste("Histogram of ",object.name,"transformed"), 
           xlab=paste(object.name,"transformed"), freq=F, 
           col="steelblue",border = "white")
      lines(density(t),lwd=2, col="coral")
      qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
      qqnorm(t, col="coral");qqline(t,col="steelblue", lwd=2)
    }
  }
  z<-ifelse((PVALUE1>=0.05 & PVALUE2<0.05 ||PVALUE1<0.05 & PVALUE2>=0.05), 
            
            ifelse(pvalue>=0.05, 
                   return(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                            Keputusan="Terima H0, data menyebar normal")), 
                   return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                 Keputusan="Tolak H0, data tidak menyebar normal"),
                                 `lambda transformasi`=lambda,
                                 `Data Hasil Transformasi Tukey`= t,
                                `Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt, 
                                  `Keputusan`=ifelse(pt>=0.05, 
                                  "Terima H0, data menyebar normal", 
                                  "Tolak H0, data tidak menyebar normal"))))),
            
            ifelse(((pvalue >= 0.05)&(PVALUE1 >= 0.05||PVALUE2>= 0.05)), 
                   return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                 Keputusan="Terima H0, data menyebar normal"), 
                                 `Hasil Uji Anderson`=
                                 data.frame(`P-Value`=
                                rbind(`Versi 1`=PVALUE1, `Versi 2`=PVALUE2),
                                Keputusan=rep("Terima H0, data menyebar normal", 2)))), 
                   
                   ifelse((pvalue >= 0.05&(PVALUE1 < 0.05||PVALUE2 < 0.05)),
                          return(list(`Hasil Uji Kolmogorov Smirnov`= data.frame(`P-Value`=pvalue,
                                        Keputusan="Terima H0, data menyebar normal"), 
                                      `Hasil Uji Anderson`=data.frame(`P-Value`=
                                        rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2), 
                                        Keputusan= rep("Tolak H0, data tidak menyebar normal",2)), 
                                        `lambda transformasi`=lambda,
                                        `Data Hasil Transformasi Tukey`= t,
                                        `Setelah transformasi~Uji Kolmogorov-Smirnov`=
                                        data.frame(`P-Value`=pt, 
                                        `Keputusan`=ifelse(pt>=0.05, 
                                            "Terima H0, data menyebar normal",
                                            "Tolak H0, data tidak menyebar normal")))),
                          
                          ifelse(pvalue < 0.05&(PVALUE1 >= 0.05||PVALUE2 >= 0.05),
                                 return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                  Keputusan="Tolak H0, data tidak menyebar normal"), 
                                  `Hasil Uji Anderson`=data.frame(`P-Value`= rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2), 
                                    Keputusan=rep("Terima H0, data menyebar normal",2)),
                                  `lambda transformasi`=lambda,
                                  `Data Hasil Transformasi Tukey`= t,
                                  `Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt, 
                                  `Keputusan`=ifelse(pt>=0.05, 
                                  "Terima H0, data menyebar normal", 
                                  "Tolak H0, data tidak menyebar normal")))),
                                 
                                 return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                               Keputusan="Tolak H0, data tidak menyebar normal"),
                                               `Hasil Uji Anderson`=data.frame(`P-Value`=rbind(`Versi 1`=PVALUE1,
                                                    `Versi 2`=PVALUE2), 
                                                                                                                                                                          Keputusan=rep("Tolak H0, data tidak menyebar normal",2)), 
                                             `lambda transformasi`=lambda,
                                             `Data Hasil Transformasi Tukey`= t,
                                             `Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt,
                                              `Keputusan`=ifelse(pt>=0.05, 
                                              "Terima H0, data menyebar normal", 
                                              "Tolak H0, data tidak menyebar normal"))))))))
  return(z)
}

uji.normal1<-function(x, object.name="x", graph=TRUE, graph.transformed=TRUE){
  lapply(c("fitdistrplus", "kSamples", "rcompanion"), library, character.only=T) 
  if(any(x<0))x<-x-min(x)+1
  mean <- mean(x); sd <- sd(x)
  uji<-ks.test(x, "pnorm", mean=mean, sd=sd)
  uji1<- ad.test(x, rnorm(length(x), mean=mean, sd=sd))
  pvalue<-uji$p.value
  PVALUE1<-uji1$ad[1,3]
  PVALUE2<-uji1$ad[2,3]
  t<-transformTukey(x,quiet = TRUE,plotit = FALSE)
  pt<-ks.test(t, "pnorm", mean=mean(t), 
              sd=sd(t))$p.value
  lambda<-transformTukey(x,returnLambda =TRUE,quiet=TRUE,plotit = FALSE)
  if(graph==TRUE){
    if(graph.transformed==FALSE){
      par(mfrow=c(1,2))
      hist(x, freq=F, col="steelblue", border="white", 
           main=paste("Histogram of ",object.name),xlab=object.name)
      lines(density(x),lwd=2, col="coral")
      qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
    }
    else{
      par(mfrow=c(2,2))
      hist(x, freq=F, col="steelblue", border="white", 
           main=paste("Histogram of ",object.name),xlab=object.name)
      lines(density(x),lwd=2, col="coral")
      hist(t, main=paste("Histogram of ",object.name,"transformed"), 
           xlab=paste(object.name,"transformed"), freq=F, 
           col="steelblue",border = "white")
      lines(density(t),lwd=2, col="coral")
      qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
      qqnorm(t, col="coral");qqline(t,col="steelblue", lwd=2)
    }
  }
  z<-ifelse((PVALUE1>=0.05 & PVALUE2<0.05 ||PVALUE1<0.05 & PVALUE2>=0.05), 
            
            ifelse(pvalue>=0.05, 
                   return(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                    Keputusan="Terima H0, data menyebar normal")), 
                   return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                         Keputusan="Tolak H0, data tidak menyebar normal"),
                               `lambda transformasi`=lambda,
                               `Data Hasil Transformasi Tukey`= t,
                               `Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt, 
                                                                                        `Keputusan`=ifelse(pt>=0.05, 
                                                                                                           "Terima H0, data menyebar normal", 
                                                                                                           "Tolak H0, data tidak menyebar normal"))))),
            
            ifelse(((pvalue >= 0.05)&(PVALUE1 >= 0.05||PVALUE2>= 0.05)), 
                   return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                         Keputusan="Terima H0, data menyebar normal"), 
                               `Hasil Uji Anderson`=
                                 data.frame(`P-Value`=
                                              rbind(`Versi 1`=PVALUE1, `Versi 2`=PVALUE2),
                                            Keputusan=rep("Terima H0, data menyebar normal", 2)))), 
                   
                   ifelse((pvalue >= 0.05&(PVALUE1 < 0.05||PVALUE2 < 0.05)),
                          return(list(`Hasil Uji Kolmogorov Smirnov`= data.frame(`P-Value`=pvalue,
                                                                                 Keputusan="Terima H0, data menyebar normal"), 
                                      `Hasil Uji Anderson`=data.frame(`P-Value`=
                                                                        rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2), 
                                                                      Keputusan= rep("Tolak H0, data tidak menyebar normal",2)), 
                                      `lambda transformasi`=lambda,
                                      `Data Hasil Transformasi Tukey`= t,
                                      `Setelah transformasi~Uji Kolmogorov-Smirnov`=
                                        data.frame(`P-Value`=pt, 
                                                   `Keputusan`=ifelse(pt>=0.05, 
                                                                      "Terima H0, data menyebar normal",
                                                                      "Tolak H0, data tidak menyebar normal")))),
                          
                          ifelse(pvalue < 0.05&(PVALUE1 >= 0.05||PVALUE2 >= 0.05),
                                 return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                                       Keputusan="Tolak H0, data tidak menyebar normal"), 
                                             `Hasil Uji Anderson`=data.frame(`P-Value`= rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2), 
                                                                             Keputusan=rep("Terima H0, data menyebar normal",2)),
                                             `lambda transformasi`=lambda,
                                             `Data Hasil Transformasi Tukey`= t,
                                             `Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt, 
                                                                                                      `Keputusan`=ifelse(pt>=0.05, 
                                                                                                                         "Terima H0, data menyebar normal", 
                                                                                                                         "Tolak H0, data tidak menyebar normal")))),
                                 
                                 return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                                       Keputusan="Tolak H0, data tidak menyebar normal"),
                                             `Hasil Uji Anderson`=data.frame(`P-Value`=rbind(`Versi 1`=PVALUE1,
                                                                                             `Versi 2`=PVALUE2), 
                                                                             Keputusan=rep("Tolak H0, data tidak menyebar normal",2)), 
                                             `lambda transformasi`=lambda,
                                             `Data Hasil Transformasi Tukey`= t,
                                             `Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt,
                                                                                                      `Keputusan`=ifelse(pt>=0.05, 
                                                                                                                         "Terima H0, data menyebar normal", 
                                                                                                                         "Tolak H0, data tidak menyebar normal"))))))))
  return(z)
}

library(fitdistrplus)

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## Loading required package: survival

library(kSamples)

## Loading required package: SuppDists

library(rcompanion)
uji.normal(model$residuals,"residuals",graph.transformed = F)

## Warning in ks.test.default(x, "pnorm", mean = mean, sd = sd): ties should not
## be present for the Kolmogorov-Smirnov test

## Warning in ks.test.default(t, "pnorm", mean = fitdist(t, "norm")$estimate[1], :
## ties should not be present for the Kolmogorov-Smirnov test

## $`Hasil Uji Kolmogorov Smirnov`
##     P.Value                       Keputusan
## 1 0.4507046 Terima H0, data menyebar normal
## 
## $`Hasil Uji Anderson`
##         P.Value                       Keputusan
## Versi 1 0.33204 Terima H0, data menyebar normal
## Versi 2 0.33500 Terima H0, data menyebar normal

Didapat p-value sebesar 0,4507046 sehingga terima H0 : data menyebar normal

ks.test(model$residuals, "pnorm", mean = mean(model$residuals), sd = sd(model$residuals))

## Warning in ks.test.default(model$residuals, "pnorm", mean =
## mean(model$residuals), : ties should not be present for the Kolmogorov-Smirnov
## test

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  model$residuals
## D = 0.076694, p-value = 0.4388
## alternative hypothesis: two-sided

berdasarkan test Kolmogorov-Smirnov jufa didapat p-value sebesar 0,4388 sehingga dapat disimpulkan bahwa data menyebar normal

Uji Homoskedastisitas

par(mfrow=c(1,2));plot(model,c(1,3))

## Warning: not plotting observations with leverage one:
##   60

gqtest(model)

## 
##  Goldfeld-Quandt test
## 
## data:  model
## GQ = 0.89713, df1 = 43, df2 = 43, p-value = 0.6382
## alternative hypothesis: variance increases from segment 1 to 2

Berdasarkan Goldfeld-Quandt test didapatkan p-Value sebesar 0,6382 sehingga dapat disimpulkan bahwa ragam sisaan homogen (homoskedastisitas)

Uji Autokorelasi

bgtest(model)

## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  model
## LM test = 4.8112, df = 1, p-value = 0.02828

Berdasarkan uji Breusch-Godfrey didapat p-value sebesar 0,02828 sehingga tolak H0 dan dapat diartikan bahwa terjadi pelanggaran autokorelasi data

Multikolinearitas

library(car)
vif(model)

##                      GVIF Df GVIF^(1/(2*Df))
## company          6.978501  4        1.274883
## diameter         5.334613  1        2.309678
## topping         10.322380 11        1.111939
## size_ordinal     5.833645  1        2.415294
## extra_sauce      1.246202  1        1.116334
## extra_cheese     1.292380  1        1.136829
## extra_mushrooms  1.130514  1        1.063256

Terjadi multikolinearitas pada peubah topping karena nilai VIF>10. Hal ini dapat diatasi pada pemilihan peubah menggunakan metode regresi Ridge Lasso yang umumnya digunakan dalam penanganan data multikolinearitas

Nilai sisaan sama dengan nol

t.test(model$residuals,mu=0,conf.level = 0.95)

## 
##  One Sample t-test
## 
## data:  model$residuals
## t = 4.5334e-16, df = 127, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -3607.797  3607.797
## sample estimates:
##    mean of x 
## 8.265333e-13

Didapat p-value = 1, yaitu lebih besar dari 0,05 sehingga terima H0 : nilai sisaan sama dengan nol

Penanganan Autokorelasi

library(TTR)
library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## 
## Attaching package: 'forecast'

## The following object is masked from 'package:rcompanion':
## 
##     accuracy

library(lmtest)
library(dplyr)
library(orcutt)
library(HoRM)
library(ggplot2)
library(corrplot)

## corrplot 0.92 loaded

modelCO<-cochrane.orcutt(model)
modelCO

## Cochrane-orcutt estimation for first order autocorrelation 
##  
## Call:
## lm(formula = price_rupiah ~ company + diameter + topping + size_ordinal + 
##     extra_sauce + extra_cheese + extra_mushrooms, data = df)
## 
##  number of interaction: 15
##  rho 0.282698
## 
## Durbin-Watson statistic 
## (original):    1.64631 , p-value: 2.04e-03
## (transformed): 1.90974 , p-value: 1.473e-01
##  
##  coefficients: 
##         (Intercept)            companyB            companyC            companyD 
##         -34638.2944         -31767.7828         -30144.7238         -19349.2729 
##            companyE            diameter toppingblack papper      toppingchicken 
##         -30951.3866          11086.0482         -14093.6812           5891.1825 
##         toppingmeat   toppingmozzarella    toppingmushrooms        toppingonion 
##           6518.4344           2222.6240           2969.1165          -9731.8315 
##    toppingpapperoni      toppingsausage  toppingsmoked beef         toppingtuna 
##          28482.1711          14328.1802          16417.1007            991.1608 
##   toppingvegetables        size_ordinal         extra_sauce        extra_cheese 
##          10227.3735          -3373.6892           7629.9762          -2397.5474 
##     extra_mushrooms 
##           2892.2826

bgtest(modelCO)

## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  modelCO
## LM test = 0.30658, df = 1, p-value = 0.5798

Setelah dilakukan penanganan menggunakan Cochrane Orcutt dan dites kembali menggunakan uji Breusch-Godfrey, dapat dilihat bahwa data sudah tidak ada pelanggaran autokorelasi

library(leaps)

Model Forward

forward<-regsubsets(price_rupiah~company+diameter+topping+size_ordinal+extra_sauce+extra_cheese+extra_mushrooms, nvmax=20 #banyak peubah yang muncul saat model awal;
                    ,data=df,method="forward")
sf<-summary(forward);sf

## Subset selection object
## Call: regsubsets.formula(price_rupiah ~ company + diameter + topping + 
##     size_ordinal + extra_sauce + extra_cheese + extra_mushrooms, 
##     nvmax = 20, data = df, method = "forward")
## 20 Variables  (and intercept)
##                     Forced in Forced out
## companyB                FALSE      FALSE
## companyC                FALSE      FALSE
## companyD                FALSE      FALSE
## companyE                FALSE      FALSE
## diameter                FALSE      FALSE
## toppingblack papper     FALSE      FALSE
## toppingchicken          FALSE      FALSE
## toppingmeat             FALSE      FALSE
## toppingmozzarella       FALSE      FALSE
## toppingmushrooms        FALSE      FALSE
## toppingonion            FALSE      FALSE
## toppingpapperoni        FALSE      FALSE
## toppingsausage          FALSE      FALSE
## toppingsmoked beef      FALSE      FALSE
## toppingtuna             FALSE      FALSE
## toppingvegetables       FALSE      FALSE
## size_ordinal            FALSE      FALSE
## extra_sauce             FALSE      FALSE
## extra_cheese            FALSE      FALSE
## extra_mushrooms         FALSE      FALSE
## 1 subsets of each size up to 20
## Selection Algorithm: forward
##           companyB companyC companyD companyE diameter toppingblack papper
## 1  ( 1 )  " "      " "      " "      " "      "*"      " "                
## 2  ( 1 )  " "      " "      " "      " "      "*"      " "                
## 3  ( 1 )  " "      " "      " "      " "      "*"      " "                
## 4  ( 1 )  " "      " "      " "      " "      "*"      " "                
## 5  ( 1 )  "*"      " "      " "      " "      "*"      " "                
## 6  ( 1 )  "*"      " "      " "      "*"      "*"      " "                
## 7  ( 1 )  "*"      "*"      " "      "*"      "*"      " "                
## 8  ( 1 )  "*"      "*"      "*"      "*"      "*"      " "                
## 9  ( 1 )  "*"      "*"      "*"      "*"      "*"      " "                
## 10  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "                
## 11  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 12  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 13  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 14  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 15  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 16  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 17  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 18  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 19  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 20  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
##           toppingchicken toppingmeat toppingmozzarella toppingmushrooms
## 1  ( 1 )  " "            " "         " "               " "             
## 2  ( 1 )  " "            " "         " "               " "             
## 3  ( 1 )  " "            " "         " "               " "             
## 4  ( 1 )  " "            " "         " "               " "             
## 5  ( 1 )  " "            " "         " "               " "             
## 6  ( 1 )  " "            " "         " "               " "             
## 7  ( 1 )  " "            " "         " "               " "             
## 8  ( 1 )  " "            " "         " "               " "             
## 9  ( 1 )  " "            " "         " "               " "             
## 10  ( 1 ) " "            " "         "*"               " "             
## 11  ( 1 ) " "            " "         "*"               " "             
## 12  ( 1 ) " "            " "         "*"               " "             
## 13  ( 1 ) " "            " "         "*"               " "             
## 14  ( 1 ) " "            " "         "*"               " "             
## 15  ( 1 ) " "            " "         "*"               " "             
## 16  ( 1 ) " "            "*"         "*"               " "             
## 17  ( 1 ) " "            "*"         "*"               " "             
## 18  ( 1 ) " "            "*"         "*"               "*"             
## 19  ( 1 ) " "            "*"         "*"               "*"             
## 20  ( 1 ) "*"            "*"         "*"               "*"             
##           toppingonion toppingpapperoni toppingsausage toppingsmoked beef
## 1  ( 1 )  " "          " "              " "            " "               
## 2  ( 1 )  " "          " "              " "            " "               
## 3  ( 1 )  " "          " "              " "            "*"               
## 4  ( 1 )  " "          "*"              " "            "*"               
## 5  ( 1 )  " "          "*"              " "            "*"               
## 6  ( 1 )  " "          "*"              " "            "*"               
## 7  ( 1 )  " "          "*"              " "            "*"               
## 8  ( 1 )  " "          "*"              " "            "*"               
## 9  ( 1 )  " "          "*"              " "            "*"               
## 10  ( 1 ) " "          "*"              " "            "*"               
## 11  ( 1 ) " "          "*"              " "            "*"               
## 12  ( 1 ) "*"          "*"              " "            "*"               
## 13  ( 1 ) "*"          "*"              " "            "*"               
## 14  ( 1 ) "*"          "*"              " "            "*"               
## 15  ( 1 ) "*"          "*"              "*"            "*"               
## 16  ( 1 ) "*"          "*"              "*"            "*"               
## 17  ( 1 ) "*"          "*"              "*"            "*"               
## 18  ( 1 ) "*"          "*"              "*"            "*"               
## 19  ( 1 ) "*"          "*"              "*"            "*"               
## 20  ( 1 ) "*"          "*"              "*"            "*"               
##           toppingtuna toppingvegetables size_ordinal extra_sauce extra_cheese
## 1  ( 1 )  " "         " "               " "          " "         " "         
## 2  ( 1 )  " "         " "               " "          "*"         " "         
## 3  ( 1 )  " "         " "               " "          "*"         " "         
## 4  ( 1 )  " "         " "               " "          "*"         " "         
## 5  ( 1 )  " "         " "               " "          "*"         " "         
## 6  ( 1 )  " "         " "               " "          "*"         " "         
## 7  ( 1 )  " "         " "               " "          "*"         " "         
## 8  ( 1 )  " "         " "               " "          "*"         " "         
## 9  ( 1 )  " "         " "               "*"          "*"         " "         
## 10  ( 1 ) " "         " "               "*"          "*"         " "         
## 11  ( 1 ) " "         " "               "*"          "*"         " "         
## 12  ( 1 ) " "         " "               "*"          "*"         " "         
## 13  ( 1 ) " "         " "               "*"          "*"         " "         
## 14  ( 1 ) " "         "*"               "*"          "*"         " "         
## 15  ( 1 ) " "         "*"               "*"          "*"         " "         
## 16  ( 1 ) " "         "*"               "*"          "*"         " "         
## 17  ( 1 ) " "         "*"               "*"          "*"         "*"         
## 18  ( 1 ) " "         "*"               "*"          "*"         "*"         
## 19  ( 1 ) "*"         "*"               "*"          "*"         "*"         
## 20  ( 1 ) "*"         "*"               "*"          "*"         "*"         
##           extra_mushrooms
## 1  ( 1 )  " "            
## 2  ( 1 )  " "            
## 3  ( 1 )  " "            
## 4  ( 1 )  " "            
## 5  ( 1 )  " "            
## 6  ( 1 )  " "            
## 7  ( 1 )  " "            
## 8  ( 1 )  " "            
## 9  ( 1 )  " "            
## 10  ( 1 ) " "            
## 11  ( 1 ) " "            
## 12  ( 1 ) " "            
## 13  ( 1 ) "*"            
## 14  ( 1 ) "*"            
## 15  ( 1 ) "*"            
## 16  ( 1 ) "*"            
## 17  ( 1 ) "*"            
## 18  ( 1 ) "*"            
## 19  ( 1 ) "*"            
## 20  ( 1 ) "*"

par(mfrow=c(2,2))
plot(sf$rsq, xlab="Jumlah Variabel", ylab="R2",type="l");points(1:20,sf$rsq,col=ifelse(sf$rsq==sf$rsq[which.max(sf$rsq)],"coral","steelblue"),cex=1.5,pch=16)
plot(sf$adjr2, xlab="Jumlah Variabel", ylab="Adj R2",type="l");points(1:20,sf$adjr2,col=ifelse(sf$adjr2==sf$adjr2[which.max(sf$adjr2)],"coral","steelblue"),cex=1.5,pch=16)
plot(sf$bic, xlab="Jumlah Variabel", ylab="BIC",type="l");points(1:20,sf$bic,col=ifelse(sf$bic==sf$bic[which.min(sf$bic)],"coral","steelblue"),cex=1.5,pch=16)
plot(sf$cp, xlab="Jumlah Variabel", ylab="CP",type="l");points(1:20,sf$cp,col=ifelse(sf$cp==sf$cp[which.min(sf$cp)],"coral","steelblue"),cex=1.5,pch=16)

Tampak berdasarkan plot adj R sq akan mencapai maksimal ketika 11 variabel

coef(forward,11)

##         (Intercept)            companyB            companyC            companyD 
##          -37140.813          -33372.057          -29656.775          -20941.642 
##            companyE            diameter toppingblack papper   toppingmozzarella 
##          -32212.970           11846.424          -17535.108           -7166.178 
##    toppingpapperoni  toppingsmoked beef        size_ordinal         extra_sauce 
##           17558.137           10085.357           -3716.072            8910.129

Berdasarkan model forward dengan 11 peubah terbaik yang akan memberikan nilai adj R sq terbesar terhadap peubah bebas harga pizza adalah peubah company B, company C, company D, company E, diameter, topping black papper, topping mozarella, topping papperoni, topping smoked beef, size, dan extra sauce

for_summary <- summary(forward)

# Menghitung AIC secara manual
n <- nrow(df)  # Jumlah pengamatan
k <- for_summary$which[1]  # Jumlah variabel yang dipilih dalam model terbaik

# Menghitung AIC
aic_for <- n * log(for_summary$rss[1] / n) + 2 * k
aic_for

## [1] 2595.677

#rsquared
rsq_for <- summary(forward)$rsq[which.max(summary(forward)$rsq)]
rsq_for

## [1] 0.7884006

Didapat nilai R sq 0,7884006 dan nilai AIC sebesar 2595,677

Model Backward

backward<-regsubsets(price_rupiah~company+diameter+topping+size_ordinal+extra_sauce+extra_cheese+extra_mushrooms, nvmax=20 #banyak peubah yang muncul saat model awal;
                     ,data=df,method="backward")
sb<-summary(backward);sb

## Subset selection object
## Call: regsubsets.formula(price_rupiah ~ company + diameter + topping + 
##     size_ordinal + extra_sauce + extra_cheese + extra_mushrooms, 
##     nvmax = 20, data = df, method = "backward")
## 20 Variables  (and intercept)
##                     Forced in Forced out
## companyB                FALSE      FALSE
## companyC                FALSE      FALSE
## companyD                FALSE      FALSE
## companyE                FALSE      FALSE
## diameter                FALSE      FALSE
## toppingblack papper     FALSE      FALSE
## toppingchicken          FALSE      FALSE
## toppingmeat             FALSE      FALSE
## toppingmozzarella       FALSE      FALSE
## toppingmushrooms        FALSE      FALSE
## toppingonion            FALSE      FALSE
## toppingpapperoni        FALSE      FALSE
## toppingsausage          FALSE      FALSE
## toppingsmoked beef      FALSE      FALSE
## toppingtuna             FALSE      FALSE
## toppingvegetables       FALSE      FALSE
## size_ordinal            FALSE      FALSE
## extra_sauce             FALSE      FALSE
## extra_cheese            FALSE      FALSE
## extra_mushrooms         FALSE      FALSE
## 1 subsets of each size up to 20
## Selection Algorithm: backward
##           companyB companyC companyD companyE diameter toppingblack papper
## 1  ( 1 )  " "      " "      " "      " "      "*"      " "                
## 2  ( 1 )  "*"      " "      " "      " "      "*"      " "                
## 3  ( 1 )  "*"      " "      " "      "*"      "*"      " "                
## 4  ( 1 )  "*"      "*"      " "      "*"      "*"      " "                
## 5  ( 1 )  "*"      "*"      "*"      "*"      "*"      " "                
## 6  ( 1 )  "*"      "*"      "*"      "*"      "*"      " "                
## 7  ( 1 )  "*"      "*"      "*"      "*"      "*"      " "                
## 8  ( 1 )  "*"      "*"      "*"      "*"      "*"      "*"                
## 9  ( 1 )  "*"      "*"      "*"      "*"      "*"      "*"                
## 10  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 11  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 12  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 13  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 14  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 15  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 16  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 17  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 18  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 19  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
## 20  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"                
##           toppingchicken toppingmeat toppingmozzarella toppingmushrooms
## 1  ( 1 )  " "            " "         " "               " "             
## 2  ( 1 )  " "            " "         " "               " "             
## 3  ( 1 )  " "            " "         " "               " "             
## 4  ( 1 )  " "            " "         " "               " "             
## 5  ( 1 )  " "            " "         " "               " "             
## 6  ( 1 )  " "            " "         " "               " "             
## 7  ( 1 )  " "            " "         "*"               " "             
## 8  ( 1 )  " "            " "         "*"               " "             
## 9  ( 1 )  " "            " "         "*"               " "             
## 10  ( 1 ) " "            " "         "*"               " "             
## 11  ( 1 ) " "            " "         "*"               " "             
## 12  ( 1 ) " "            " "         "*"               " "             
## 13  ( 1 ) " "            " "         "*"               " "             
## 14  ( 1 ) " "            "*"         "*"               " "             
## 15  ( 1 ) " "            "*"         "*"               " "             
## 16  ( 1 ) " "            "*"         "*"               " "             
## 17  ( 1 ) " "            "*"         "*"               " "             
## 18  ( 1 ) " "            "*"         "*"               "*"             
## 19  ( 1 ) " "            "*"         "*"               "*"             
## 20  ( 1 ) "*"            "*"         "*"               "*"             
##           toppingonion toppingpapperoni toppingsausage toppingsmoked beef
## 1  ( 1 )  " "          " "              " "            " "               
## 2  ( 1 )  " "          " "              " "            " "               
## 3  ( 1 )  " "          " "              " "            " "               
## 4  ( 1 )  " "          " "              " "            " "               
## 5  ( 1 )  " "          " "              " "            " "               
## 6  ( 1 )  " "          " "              " "            " "               
## 7  ( 1 )  " "          " "              " "            " "               
## 8  ( 1 )  " "          " "              " "            " "               
## 9  ( 1 )  " "          " "              " "            "*"               
## 10  ( 1 ) " "          " "              " "            "*"               
## 11  ( 1 ) " "          "*"              " "            "*"               
## 12  ( 1 ) " "          "*"              " "            "*"               
## 13  ( 1 ) " "          "*"              "*"            "*"               
## 14  ( 1 ) " "          "*"              "*"            "*"               
## 15  ( 1 ) " "          "*"              "*"            "*"               
## 16  ( 1 ) "*"          "*"              "*"            "*"               
## 17  ( 1 ) "*"          "*"              "*"            "*"               
## 18  ( 1 ) "*"          "*"              "*"            "*"               
## 19  ( 1 ) "*"          "*"              "*"            "*"               
## 20  ( 1 ) "*"          "*"              "*"            "*"               
##           toppingtuna toppingvegetables size_ordinal extra_sauce extra_cheese
## 1  ( 1 )  " "         " "               " "          " "         " "         
## 2  ( 1 )  " "         " "               " "          " "         " "         
## 3  ( 1 )  " "         " "               " "          " "         " "         
## 4  ( 1 )  " "         " "               " "          " "         " "         
## 5  ( 1 )  " "         " "               " "          " "         " "         
## 6  ( 1 )  " "         " "               " "          "*"         " "         
## 7  ( 1 )  " "         " "               " "          "*"         " "         
## 8  ( 1 )  " "         " "               " "          "*"         " "         
## 9  ( 1 )  " "         " "               " "          "*"         " "         
## 10  ( 1 ) " "         " "               "*"          "*"         " "         
## 11  ( 1 ) " "         " "               "*"          "*"         " "         
## 12  ( 1 ) " "         "*"               "*"          "*"         " "         
## 13  ( 1 ) " "         "*"               "*"          "*"         " "         
## 14  ( 1 ) " "         "*"               "*"          "*"         " "         
## 15  ( 1 ) " "         "*"               "*"          "*"         " "         
## 16  ( 1 ) " "         "*"               "*"          "*"         " "         
## 17  ( 1 ) " "         "*"               "*"          "*"         "*"         
## 18  ( 1 ) " "         "*"               "*"          "*"         "*"         
## 19  ( 1 ) "*"         "*"               "*"          "*"         "*"         
## 20  ( 1 ) "*"         "*"               "*"          "*"         "*"         
##           extra_mushrooms
## 1  ( 1 )  " "            
## 2  ( 1 )  " "            
## 3  ( 1 )  " "            
## 4  ( 1 )  " "            
## 5  ( 1 )  " "            
## 6  ( 1 )  " "            
## 7  ( 1 )  " "            
## 8  ( 1 )  " "            
## 9  ( 1 )  " "            
## 10  ( 1 ) " "            
## 11  ( 1 ) " "            
## 12  ( 1 ) " "            
## 13  ( 1 ) " "            
## 14  ( 1 ) " "            
## 15  ( 1 ) "*"            
## 16  ( 1 ) "*"            
## 17  ( 1 ) "*"            
## 18  ( 1 ) "*"            
## 19  ( 1 ) "*"            
## 20  ( 1 ) "*"

par(mfrow=c(2,2))
plot(sb$rsq, xlab="Jumlah Variabel", ylab="R2",type="l");points(1:20,sb$rsq,col=ifelse(sb$rsq==sb$rsq[which.max(sb$rsq)],"coral","steelblue"),cex=1.5,pch=16)
plot(sb$adjr2, xlab="Jumlah Variabel", ylab="Adj R2",type="l");points(1:20,sb$adjr2,col=ifelse(sb$adjr2==sb$adjr2[which.max(sb$adjr2)],"coral","steelblue"),cex=1.5,pch=16)
plot(sb$bic, xlab="Jumlah Variabel", ylab="BIC",type="l");points(1:20,sb$bic,col=ifelse(sb$bic==sb$bic[which.min(sb$bic)],"coral","steelblue"),cex=1.5,pch=16)
plot(sb$cp, xlab="Jumlah Variabel", ylab="CP",type="l");points(1:20,sb$cp,col=ifelse(sb$cp==sb$cp[which.min(sb$cp)],"coral","steelblue"),cex=1.5,pch=16)

Tampak berdasarkan plot adj R sq akan mencapai maksimal ketika 11 variabel

coef(backward,11)

##         (Intercept)            companyB            companyC            companyD 
##          -37140.813          -33372.057          -29656.775          -20941.642 
##            companyE            diameter toppingblack papper   toppingmozzarella 
##          -32212.970           11846.424          -17535.108           -7166.178 
##    toppingpapperoni  toppingsmoked beef        size_ordinal         extra_sauce 
##           17558.137           10085.357           -3716.072            8910.129

Berdasarkan model backward dengan 11 peubah terbaik yang akan memberikan nilai adj R sq terbesar terhadap peubah bebas harga pizza adalah peubah company B, company C, company D, company E, diameter, topping black papper, topping mozarella, topping papperoni, topping smoked beef, size, dan extra sauce

back_summary <- summary(backward)

# Menghitung AIC secara manual
n <- nrow(pizza)  # Jumlah pengamatan
k <- back_summary$which[1]  # Jumlah variabel yang dipilih dalam model terbaik

# Menghitung AIC
aic_back <- n * log(back_summary$rss[1] / n) + 2 * k
aic_back

## [1] 2595.677

#rsquared
rsq_back <- summary(backward)$rsq[which.max(summary(backward)$rsq)]
rsq_back

## [1] 0.7884006

Didapat nilai R sq 0,7884006 dan nilai AIC sebesar 2595,677

Model Stepwise

library(MASS)
step <- step(model, direction = "both")

## Start:  AIC=2584.19
## price_rupiah ~ company + diameter + topping + size_ordinal + 
##     extra_sauce + extra_cheese + extra_mushrooms
## 
##                   Df  Sum of Sq        RSS    AIC
## - topping         11 5.5932e+09 5.9629e+10 2574.8
## - extra_cheese     1 5.8861e+07 5.4095e+10 2582.3
## - extra_mushrooms  1 2.4584e+08 5.4282e+10 2582.8
## <none>                          5.4036e+10 2584.2
## - size_ordinal     1 8.6725e+08 5.4904e+10 2584.2
## - extra_sauce      1 2.6968e+09 5.6733e+10 2588.4
## - company          4 1.2585e+10 6.6621e+10 2603.0
## - diameter         1 3.7084e+10 9.1121e+10 2649.1
## 
## Step:  AIC=2574.8
## price_rupiah ~ company + diameter + size_ordinal + extra_sauce + 
##     extra_cheese + extra_mushrooms
## 
##                   Df  Sum of Sq        RSS    AIC
## - extra_cheese     1 7.4040e+04 5.9630e+10 2572.8
## - extra_mushrooms  1 7.2581e+07 5.9702e+10 2573.0
## - size_ordinal     1 8.9091e+08 6.0520e+10 2574.7
## <none>                          5.9629e+10 2574.8
## - extra_sauce      1 2.9952e+09 6.2625e+10 2579.1
## + topping         11 5.5932e+09 5.4036e+10 2584.2
## - company          4 1.5875e+10 7.5504e+10 2597.0
## - diameter         1 3.8393e+10 9.8022e+10 2636.4
## 
## Step:  AIC=2572.8
## price_rupiah ~ company + diameter + size_ordinal + extra_sauce + 
##     extra_mushrooms
## 
##                   Df  Sum of Sq        RSS    AIC
## - extra_mushrooms  1 7.3111e+07 5.9703e+10 2571.0
## - size_ordinal     1 9.1983e+08 6.0549e+10 2572.8
## <none>                          5.9630e+10 2572.8
## + extra_cheese     1 7.4040e+04 5.9629e+10 2574.8
## - extra_sauce      1 3.0043e+09 6.2634e+10 2577.1
## + topping         11 5.5344e+09 5.4095e+10 2582.3
## - company          4 1.5990e+10 7.5620e+10 2595.2
## - diameter         1 3.9002e+10 9.8632e+10 2635.2
## 
## Step:  AIC=2570.96
## price_rupiah ~ company + diameter + size_ordinal + extra_sauce
## 
##                   Df  Sum of Sq        RSS    AIC
## <none>                          5.9703e+10 2571.0
## - size_ordinal     1 9.4745e+08 6.0650e+10 2571.0
## + extra_mushrooms  1 7.3111e+07 5.9630e+10 2572.8
## + extra_cheese     1 6.0360e+05 5.9702e+10 2573.0
## - extra_sauce      1 3.0589e+09 6.2762e+10 2575.3
## + topping         11 5.3712e+09 5.4331e+10 2580.9
## - company          4 1.6166e+10 7.5868e+10 2593.6
## - diameter         1 3.9160e+10 9.8862e+10 2633.5

summary(step)

## 
## Call:
## lm(formula = price_rupiah ~ company + diameter + size_ordinal + 
##     extra_sauce, data = df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -40555 -17008  -1573  11627  85192 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -35211      12314  -2.859  0.00501 ** 
## companyB       -33454       6872  -4.868 3.46e-06 ***
## companyC       -30965       6644  -4.660 8.24e-06 ***
## companyD       -21417       6869  -3.118  0.00228 ** 
## companyE       -32722       6576  -4.976 2.19e-06 ***
## diameter        11739       1323   8.872 8.22e-15 ***
## size_ordinal    -3899       2826  -1.380  0.17016    
## extra_sauce     10115       4080   2.480  0.01454 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22310 on 120 degrees of freedom
## Multiple R-squared:  0.7662, Adjusted R-squared:  0.7526 
## F-statistic: 56.18 on 7 and 120 DF,  p-value: < 2.2e-16

aic_step = AIC(step)
summary(step)$r.squared

## [1] 0.7662117

Didapat nilai R sq 0,7662117 dan nilai AIC sebesar 2936.207

Membandingkan AIC antar model

akurasi <- matrix(c(akurasi_awal,aic_for,aic_back,aic_step),nrow=1,ncol=4,byrow = T)
colnames(akurasi) <- c("Model Awal", "Model Forward", "Model Backward", "Model Stepwise")
row.names(akurasi) <- c("AIC")
akurasi

##     Model Awal Model Forward Model Backward Model Stepwise
## AIC   2949.442      2595.677       2595.677       2936.207

Tampak bahwa nilai AIC terkecil didapat pada model forward dan backward Setelah ditelusuri didapat pula bahwa nilai Rsq model forward dan backward bahkan nilai koefisien regresi hingga peubah yang diambil pun sama persis

Dipilih model forward

#Model Terbaik (ambil forward karena rsq terbesar dan aic terkecil tapi nilainya sama dengan backward)

forward_model <- stepAIC(model, direction = "forward")

## Start:  AIC=2584.19
## price_rupiah ~ company + diameter + topping + size_ordinal + 
##     extra_sauce + extra_cheese + extra_mushrooms

summary(forward_model)

## 
## Call:
## lm(formula = price_rupiah ~ company + diameter + topping + size_ordinal + 
##     extra_sauce + extra_cheese + extra_mushrooms, data = df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -41621 -13511    586  10113  91721 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -44747.3    21805.4  -2.052  0.04260 *  
## companyB            -32489.7     7318.3  -4.440 2.20e-05 ***
## companyC            -33290.6    11601.1  -2.870  0.00495 ** 
## companyD            -20257.6     7332.5  -2.763  0.00675 ** 
## companyE            -31945.5     7188.3  -4.444 2.16e-05 ***
## diameter             12016.9     1402.3   8.569 8.61e-14 ***
## toppingblack papper -13976.3    22452.5  -0.622  0.53495    
## toppingchicken         500.7    17750.1   0.028  0.97755    
## toppingmeat           8124.8    15606.2   0.521  0.60371    
## toppingmozzarella    -4965.3    17419.4  -0.285  0.77616    
## toppingmushrooms      2586.5    17759.7   0.146  0.88448    
## toppingonion        -10326.3    26812.3  -0.385  0.70090    
## toppingpapperoni     20778.9    24009.8   0.865  0.38874    
## toppingsausage       12966.5    19319.2   0.671  0.50356    
## toppingsmoked beef   12656.8    17843.0   0.709  0.47966    
## toppingtuna            870.8    16021.3   0.054  0.95676    
## toppingvegetables    11143.6    15743.1   0.708  0.48058    
## size_ordinal         -3951.8     3015.6  -1.310  0.19285    
## extra_sauce          10433.1     4514.8   2.311  0.02276 *  
## extra_cheese          1632.2     4780.9   0.341  0.73347    
## extra_mushrooms       2964.9     4249.4   0.698  0.48687    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22470 on 107 degrees of freedom
## Multiple R-squared:  0.7884, Adjusted R-squared:  0.7488 
## F-statistic: 19.93 on 20 and 107 DF,  p-value: < 2.2e-16

Uji Asumsi Model Terbaik

Uji Normalitas

ks.test(forward_model$residuals, "pnorm", 
        mean = mean(forward_model$residuals), 
        sd = sd(forward_model$residuals))

## Warning in ks.test.default(forward_model$residuals, "pnorm", mean =
## mean(forward_model$residuals), : ties should not be present for the
## Kolmogorov-Smirnov test

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  forward_model$residuals
## D = 0.076694, p-value = 0.4388
## alternative hypothesis: two-sided

berdasarkan test Kolmogorov-Smirnov jufa didapat p-value sebesar 0,4388 sehingga dapat disimpulkan bahwa data menyebar normal

Uji Homoskedastisitas

par(mfrow=c(1,2));plot(forward_model,c(1,3))

## Warning: not plotting observations with leverage one:
##   60

gqtest(forward_model)

## 
##  Goldfeld-Quandt test
## 
## data:  forward_model
## GQ = 0.89713, df1 = 43, df2 = 43, p-value = 0.6382
## alternative hypothesis: variance increases from segment 1 to 2

Berdasarkan Goldfeld-Quandt test didapatkan p-Value sebesar 0,6382 sehingga dapat disimpulkan bahwa ragam sisaan homogen (homoskedastisitas)

Uji Autokorelasi

bgtest(forward_model)

## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  forward_model
## LM test = 4.8112, df = 1, p-value = 0.02828

Berdasarkan uji Breusch-Godfrey didapat p-value sebesar 0,02828 sehingga tolak H0 dan dapat diartikan bahwa terjadi pelanggaran autokorelasi data

Multikolinearitas

library(car)
vif(forward_model)

##                      GVIF Df GVIF^(1/(2*Df))
## company          6.978501  4        1.274883
## diameter         5.334613  1        2.309678
## topping         10.322380 11        1.111939
## size_ordinal     5.833645  1        2.415294
## extra_sauce      1.246202  1        1.116334
## extra_cheese     1.292380  1        1.136829
## extra_mushrooms  1.130514  1        1.063256

Nilai sisaan sama dengan nol

t.test(model$residuals,mu=0,conf.level = 0.95)

## 
##  One Sample t-test
## 
## data:  model$residuals
## t = 4.5334e-16, df = 127, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -3607.797  3607.797
## sample estimates:
##    mean of x 
## 8.265333e-13

Didapat p-value = 1, yaitu lebih besar dari 0,05 sehingga terima H0 : nilai sisaan sama dengan nol

Penanganan Autokorelasi

library(TTR)
library(forecast)
library(lmtest)
library(dplyr)
library(orcutt)
library(HoRM)
library(ggplot2)
library(corrplot)

modelCO.for<-cochrane.orcutt(forward_model)
modelCO.for

## Cochrane-orcutt estimation for first order autocorrelation 
##  
## Call:
## lm(formula = price_rupiah ~ company + diameter + topping + size_ordinal + 
##     extra_sauce + extra_cheese + extra_mushrooms, data = df)
## 
##  number of interaction: 15
##  rho 0.282698
## 
## Durbin-Watson statistic 
## (original):    1.64631 , p-value: 2.04e-03
## (transformed): 1.90974 , p-value: 1.473e-01
##  
##  coefficients: 
##         (Intercept)            companyB            companyC            companyD 
##         -34638.2944         -31767.7828         -30144.7238         -19349.2729 
##            companyE            diameter toppingblack papper      toppingchicken 
##         -30951.3866          11086.0482         -14093.6812           5891.1825 
##         toppingmeat   toppingmozzarella    toppingmushrooms        toppingonion 
##           6518.4344           2222.6240           2969.1165          -9731.8315 
##    toppingpapperoni      toppingsausage  toppingsmoked beef         toppingtuna 
##          28482.1711          14328.1802          16417.1007            991.1608 
##   toppingvegetables        size_ordinal         extra_sauce        extra_cheese 
##          10227.3735          -3373.6892           7629.9762          -2397.5474 
##     extra_mushrooms 
##           2892.2826

bgtest(modelCO.for)

## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  modelCO.for
## LM test = 0.30658, df = 1, p-value = 0.5798

Setelah dilakukan penanganan menggunakan Cochrane Orcutt dan dites kembali menggunakan uji Breusch-Godfrey, dapat dilihat bahwa data sudah tidak ada pelanggaran autokorelasi

Uji Shrinkage

library(glmnet)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Loaded glmnet 4.1-7

Yang mana ke 11 peubah itu dari peubah company, diameter, topping, size_ordinal, dan extra sauce

x <- data.matrix(pizza[,c('company','diameter','topping','size_ordinal','extra_sauce')])
y <- pizza$price_rupiah

alphas <- c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)  # Ganti dengan nilai alpha yang sesuai
rss_values <- numeric(length(alphas))
num_parameters <- numeric(length(alphas))

for (i in 1:length(alphas)) {
  model <- glmnet(x, y, alpha=alphas[i])
  y_pred <- predict(model, newx=x, s=0)  # s=0 untuk mendapatkan model Ridge
  rss <- sum((y - y_pred)^2)
  k <- sum(model$beta != 0) + 1  # Jumlah parameter termasuk intercept
  rss_values[i] <- rss
  num_parameters[i] <- k
}

n <- length(y)  # Jumlah sampel

aic_values <- numeric(length(alphas))

for (i in 1:length(alphas)) {
  aic <- n * log(rss_values[i] / n) + 2 * num_parameters[i]
  aic_values[i] <- aic
}

best_alpha <- alphas[which.min(aic_values)]
best_alpha

## [1] 1

Karena alpha terbaik di 1 maka dipilih regresi lasso karena alpha yang digunakan regresi lasso sebesar 1

Regresi Lasso

lasso<-cv.glmnet(x,y,alpha=1);plot(lasso)

best.ll<-lasso$lambda.min
bestlasso<-glmnet(x,y,alpha=1,lambda=best.ll);coef(bestlasso)

## 6 x 1 sparse Matrix of class "dgCMatrix"
##                       s0
## (Intercept)  -53981.6321
## company       -4407.3023
## diameter      11048.2153
## topping         716.8855
## size_ordinal      .     
## extra_sauce   11016.2986

rsq<-function(bestmodel,bestlambda,x,y){
 #y duga
 y.duga <- predict(bestmodel, s = bestlambda, newx = x)

 #JKG dan JKT
 jkt <- sum((y - mean(y))^2)
 jkg <- sum((y.duga- y)^2)

#find R-Squared
rsq <- 1 - jkg/jkt
return(rsq) 
}

rsq_lasso <- rsq(bestlasso,best.ll,x,y)

Didapat nilai Rsq regresi Lasso sebesar 0,7278051

# Cari aic lasso
alpha <- 1  # 0 untuk Lasso
lambda_seq <- seq(0.01, 1, by = 0.01)  # Atur lambda sesuai kebutuhan
lasso_model <- glmnet(x, y, alpha = alpha, lambda = lambda_seq)

# Hitung RSS untuk setiap nilai lambda
rss_values <- rep(NA, length(lambda_seq))
for (i in 1:length(lambda_seq)) {
  predictions <- predict(lasso_model, newx = x, s = lambda_seq[i])
  residuals <- y - predictions
  rss_values[i] <- sum(residuals^2)
}

# Hitung jumlah parameter
num_parameters <- sapply(coef(lasso_model, s = lambda_seq), function(x) sum(x != 0))

# Hitung AIC
n <- length(y)  # Jumlah sampel
aic_values <- n * log(rss_values) + 2 * num_parameters

# Temukan lambda dengan AIC terkecil
best_lambda <- lambda_seq[which.min(aic_values)]
best_aic <- min(aic_values)
aic_lasso <- best_aic
aic_lasso

## [1] 3197.22

Serta didapat pula bahwa AIC regresi Lasso sebesar 3197,22

Membandingkan AIC model awal, model subset terbaik (forward), dan model regresi Lasso

akurasi <- matrix(c(akurasi_awal,aic_for,aic_lasso,
                    Rsq_awal, rsq_for, rsq_lasso),nrow=2,ncol=3,byrow = T)
colnames(akurasi) <- c("Model Awal", "Model Forward", "Model Lasso")
row.names(akurasi) <- c("AIC", "Rsq")
akurasi

##       Model Awal Model Forward  Model Lasso
## AIC 2949.4423597  2595.6766765 3197.2203932
## Rsq    0.7884006     0.7884006    0.7277425

Berdasarkan tabel akurasi dapat dilihat bahwa Rsq terbesar didapat pada model awal dan model forward Sedangkan untuk nilai AIC terkecil didapat pada model forward(atau backward karena di awal didapat nilai AIC, Rsq, koefisien, bahkan peubah antara kedua model tersebut sama persis) Oleh karena itu, model terbaik dalam data pizza ini adalah model forward

Diduga model regresi Lasso tidak sepenuhnya baik karena multikolinearitas yang terjadi sangat kecil, memang lebih dari 10 namun hanya dikisaran 10,89 sehingga model lasso yang notabene bekerja sebagai penanganan multikolinearitas tidak memiliki Rsq yang tinggi

PSD Individu UTS (Regresi Ridge Lasso)

G1401211094 Hanifa Rahmacindia Nasution

2023-10-06

Library

Input data

Faktor

Relevel

Model Regresi Awal

Asumsi Klasik

Uji Asumsi Normalitas

Uji Homoskedastisitas

Uji Autokorelasi

Multikolinearitas

Nilai sisaan sama dengan nol

Penanganan Autokorelasi

Model Forward

Model Backward

Model Stepwise

Membandingkan AIC antar model

Uji Asumsi Model Terbaik

Uji Normalitas

Uji Homoskedastisitas

Uji Autokorelasi

Multikolinearitas

Nilai sisaan sama dengan nol

Penanganan Autokorelasi

Uji Shrinkage

Regresi Lasso

Membandingkan AIC model awal, model subset terbaik (forward), dan model regresi Lasso