Praktikum PSD

2023-09-21

PEKAN 1

Packages

lapply(c("tidyverse","rvest","kableExtra"),library,character.only=T)[[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"

Kuliah

url <- "https://www.carsome.id/beli-mobil-bekas"
card <- read_html(url) %>% html_elements('.mod-card')
card %>% 
  html_element('.mod-card__high-tag-list') %>% 
  html_elements('span') %>% 
  html_text() %>% 
  paste(collapse = ',')
## [1] "Plat Ganjil,Electric Parking Brake,360 Camera View,Plat Genap,Sports Mode,Map Navigator,Plat Ganjil,Keyless Push Start,Sports Mode,Plat Ganjil,Plat Genap,Jarak Tempuh Rendah,Retract Mirror,Plat Genap,Sports Mode,Plat Ganjil,Electric Parking Brake,Paddle Shift,Plat Ganjil,Panoramic Sun Roof,Electric Parking Brake,Plat Genap,Retract Mirror,Paddle Shift,Plat Ganjil,360 Camera View,Keyless Push Start,Plat Ganjil,Kamera Mundur,Retract Mirror,Plat Genap,Plat Genap,Sun Roof,Retract Mirror,Plat Genap,Retract Mirror,Plat Genap,Sports Mode,Plat Genap,Jarak Tempuh Rendah,Auto Retract Mirror,Plat Ganjil,Retract Mirror,Electric Parking Brake,Plat Ganjil"
sapply(card,
         function(card){
           card %>% 
             html_element('.mod-card__high-tag-list') %>% 
             html_elements('span') %>% 
             html_text() %>% 
             paste(collapse = ',')
         })
##  [1] "Plat Ganjil,Electric Parking Brake,360 Camera View"   
##  [2] "Plat Genap,Sports Mode,Map Navigator"                 
##  [3] "Plat Ganjil,Keyless Push Start,Sports Mode"           
##  [4] "Plat Ganjil"                                          
##  [5] "Plat Genap,Jarak Tempuh Rendah,Retract Mirror"        
##  [6] "Plat Genap,Sports Mode"                               
##  [7] "Plat Ganjil,Electric Parking Brake,Paddle Shift"      
##  [8] "Plat Ganjil,Panoramic Sun Roof,Electric Parking Brake"
##  [9] "Plat Genap,Retract Mirror,Paddle Shift"               
## [10] "Plat Ganjil,360 Camera View,Keyless Push Start"       
## [11] "Plat Ganjil,Kamera Mundur,Retract Mirror"             
## [12] "Plat Genap"                                           
## [13] "Plat Genap,Sun Roof,Retract Mirror"                   
## [14] "Plat Genap,Retract Mirror"                            
## [15] "Plat Genap,Sports Mode"                               
## [16] "Plat Genap,Jarak Tempuh Rendah,Auto Retract Mirror"   
## [17] "Plat Ganjil,Retract Mirror,Electric Parking Brake"    
## [18] "Plat Ganjil"

1 Page

url <- paste("https://www.carsome.id/beli-mobil-bekas?pageNo=",1,sep="")
card <- read_html(url) %>% html_elements('.mod-card')
Detail<-sapply(card,
               function(card){
                 card %>% 
                   html_element('.mod-card__high-tag-list') %>% 
                   html_elements('span') %>% 
                   html_text() %>% 
                   paste(collapse = ',')
               })
Harga.Kredit.Diskon<-card %>% 
  html_element('.mod-card__price__total')%>%
  html_element("strong")%>%html_text2()
Harga.Cash.Diskon<-card %>% 
  html_element('.mod-card__price-cash')%>%html_text2()
Cicil<-card %>% 
  html_element('.mod-tooltipMonthPay')%>%
  html_element("div")%>%html_text2()
Nama<-card %>% 
  html_element('a.mod-card__title')%>%html_text2()%>%gsub("\n","",.)
data<-cbind(Nama,Harga.Kredit.Diskon,Harga.Cash.Diskon,Cicil,
            Detail);kable(head(data,15),caption="Tabel 1. Hasil Scraping Page 1")
Tabel 1. Hasil Scraping Page 1
Nama Harga.Kredit.Diskon Harga.Cash.Diskon Cicil Detail
2019 WulingALMAZ LT LUX 1.5 198.000.000 Rp 204.000.000 (Cash) Rp 4,03 jt/bln Plat Ganjil,Electric Parking Brake,360 Camera View
2019 BMW3 20I (CKD) 2.0 474.000.000 Rp 487.000.000 (Cash) Rp 9,66 jt/bln Plat Genap,Sports Mode,Map Navigator
2018 ToyotaYARIS S TRD 1.5 199.000.000 Rp 205.000.000 (Cash) Rp 4,05 jt/bln Plat Ganjil,Keyless Push Start,Sports Mode
2019 DaihatsuAYLA R 1.2 113.500.000 Rp 117.000.000 (Cash) Rp 2,31 jt/bln Plat Ganjil
2022 ToyotaAVANZA G 1.5 211.000.000 Rp 217.000.000 (Cash) Rp 4,30 jt/bln Plat Genap,Jarak Tempuh Rendah,Retract Mirror
2020 HondaBRIO SATYA E 1.2 153.000.000 Rp 158.000.000 (Cash) Rp 3,12 jt/bln Plat Genap,Sports Mode
2016 HondaHR-V E 1.5 197.000.000 Rp 203.000.000 (Cash) Rp 4,01 jt/bln Plat Ganjil,Electric Parking Brake,Paddle Shift
2020 BMWX1 SDRIVE18I XLINE 1.5 554.000.000 Rp 570.000.000 (Cash) Rp 11,29 jt/bln Plat Ganjil,Panoramic Sun Roof,Electric Parking Brake
2018 ToyotaYARIS S TRD 1.5 192.000.000 Rp 198.000.000 (Cash) Rp 3,91 jt/bln Plat Genap,Retract Mirror,Paddle Shift
2015 NissanX-TRAIL 2.5 184.000.000 Rp 190.000.000 (Cash) Rp 3,75 jt/bln Plat Ganjil,360 Camera View,Keyless Push Start
2019 NissanLIVINA VL 1.5 192.000.000 Rp 198.000.000 (Cash) Rp 3,91 jt/bln Plat Ganjil,Kamera Mundur,Retract Mirror
2017 ToyotaAGYA G TRD 1.2 101.000.000 Rp 104.500.000 (Cash) Rp 2,06 jt/bln Plat Genap
2017 ChevroletTRAX TURBO LTZ 1.4 168.000.000 Rp 173.000.000 (Cash) Rp 3,42 jt/bln Plat Genap,Sun Roof,Retract Mirror
2021 ToyotaAVANZA G 1.3 186.000.000 Rp 191.000.000 (Cash) Rp 3,79 jt/bln Plat Genap,Retract Mirror
2017 ToyotaKIJANG INNOVA REBORN G 2.0 236.000.000 Rp 243.000.000 (Cash) Rp 4,81 jt/bln Plat Genap,Sports Mode

Looping

df<-data.frame()
for(hal in 1:10){
  url <- paste("https://www.carsome.id/beli-mobil-bekas?pageNo=",hal,sep="")
  card <- read_html(url) %>% html_elements('.mod-card')
  card %>% 
    html_element('.mod-card__high-tag-list') %>% 
    html_elements('span') %>% 
    html_text() %>% 
    paste(collapse = ',')
  Harga.Kredit.Diskon<-card %>% 
    html_element('.mod-card__price__total')%>%
    html_element("strong")%>%html_text2()
  Harga.Cash.Diskon<-card %>% 
    html_element('.mod-card__price-cash')%>%html_text2()
  Cicil<-card %>% 
    html_element('.mod-tooltipMonthPay')%>%
    html_element("div")%>%html_text2()
  Nama<-card %>% 
    html_element('a.mod-card__title')%>%html_text2()%>%gsub("\n","",.)
  Detail<-sapply(card,
         function(card){
           card %>% 
             html_element('.mod-card__high-tag-list') %>% 
             html_elements('span') %>% 
             html_text() %>% 
             paste(collapse = ',')
         })
  data<-cbind(Nama,Harga.Kredit.Diskon,Harga.Cash.Diskon,Cicil,
              Detail)
  df<-rbind(df,data)
  cat("page",hal)
  cat("\n")
}
## page 1
## page 2
## page 3
## page 4
## page 5
## page 6
## page 7
## page 8
## page 9
## page 10
kable(head(df,15),caption="Tabel 2. Hasil Scraping Page 1-10")
Tabel 2. Hasil Scraping Page 1-10
Nama Harga.Kredit.Diskon Harga.Cash.Diskon Cicil Detail
2019 WulingALMAZ LT LUX 1.5 198.000.000 Rp 204.000.000 (Cash) Rp 4,03 jt/bln Plat Ganjil,Electric Parking Brake,360 Camera View
2019 BMW3 20I (CKD) 2.0 474.000.000 Rp 487.000.000 (Cash) Rp 9,66 jt/bln Plat Genap,Sports Mode,Map Navigator
2018 ToyotaYARIS S TRD 1.5 199.000.000 Rp 205.000.000 (Cash) Rp 4,05 jt/bln Plat Ganjil,Keyless Push Start,Sports Mode
2019 DaihatsuAYLA R 1.2 113.500.000 Rp 117.000.000 (Cash) Rp 2,31 jt/bln Plat Ganjil
2022 ToyotaAVANZA G 1.5 211.000.000 Rp 217.000.000 (Cash) Rp 4,30 jt/bln Plat Genap,Jarak Tempuh Rendah,Retract Mirror
2020 HondaBRIO SATYA E 1.2 153.000.000 Rp 158.000.000 (Cash) Rp 3,12 jt/bln Plat Genap,Sports Mode
2016 HondaHR-V E 1.5 197.000.000 Rp 203.000.000 (Cash) Rp 4,01 jt/bln Plat Ganjil,Electric Parking Brake,Paddle Shift
2020 BMWX1 SDRIVE18I XLINE 1.5 554.000.000 Rp 570.000.000 (Cash) Rp 11,29 jt/bln Plat Ganjil,Panoramic Sun Roof,Electric Parking Brake
2018 ToyotaYARIS S TRD 1.5 192.000.000 Rp 198.000.000 (Cash) Rp 3,91 jt/bln Plat Genap,Retract Mirror,Paddle Shift
2015 NissanX-TRAIL 2.5 184.000.000 Rp 190.000.000 (Cash) Rp 3,75 jt/bln Plat Ganjil,360 Camera View,Keyless Push Start
2019 NissanLIVINA VL 1.5 192.000.000 Rp 198.000.000 (Cash) Rp 3,91 jt/bln Plat Ganjil,Kamera Mundur,Retract Mirror
2017 ToyotaAGYA G TRD 1.2 101.000.000 Rp 104.500.000 (Cash) Rp 2,06 jt/bln Plat Genap
2017 ChevroletTRAX TURBO LTZ 1.4 168.000.000 Rp 173.000.000 (Cash) Rp 3,42 jt/bln Plat Genap,Sun Roof,Retract Mirror
2021 ToyotaAVANZA G 1.3 186.000.000 Rp 191.000.000 (Cash) Rp 3,79 jt/bln Plat Genap,Retract Mirror
2017 ToyotaKIJANG INNOVA REBORN G 2.0 236.000.000 Rp 243.000.000 (Cash) Rp 4,81 jt/bln Plat Genap,Sports Mode

Export Data Scraping

writexl::write_xlsx(df,"Data Scraping Mobil Bekas.xlsx")

PEKAN 2

Packages

lapply(c("readxl","stringr","ggridges","hrbrthemes"),library,character.only=T)[[1]]
##  [1] "readxl"     "kableExtra" "rvest"      "lubridate"  "forcats"   
##  [6] "stringr"    "dplyr"      "purrr"      "readr"      "tidyr"     
## [11] "tibble"     "ggplot2"    "tidyverse"  "stats"      "graphics"  
## [16] "grDevices"  "utils"      "datasets"   "methods"    "base"

Managing Data

Detail

Cleaning Detail menggunakan Excel

detail<-read_xlsx("Detail.xlsx")
df<-cbind(df,detail)
kable(head(df,15),caption="Tabel 3. Hasil Managing-Detail")
Tabel 3. Hasil Managing-Detail
Nama Harga.Kredit.Diskon Harga.Cash.Diskon Cicil Detail Plat Detail1 Detail2
2019 WulingALMAZ LT LUX 1.5 198.000.000 Rp 204.000.000 (Cash) Rp 4,03 jt/bln Plat Ganjil,Electric Parking Brake,360 Camera View Plat Genap Sports Mode Map Navigator
2019 BMW3 20I (CKD) 2.0 474.000.000 Rp 487.000.000 (Cash) Rp 9,66 jt/bln Plat Genap,Sports Mode,Map Navigator Plat Ganjil Panoramic Sun Roof Electric Parking Brake
2018 ToyotaYARIS S TRD 1.5 199.000.000 Rp 205.000.000 (Cash) Rp 4,05 jt/bln Plat Ganjil,Keyless Push Start,Sports Mode Plat Ganjil Auto Cruise Control Paddle Shift
2019 DaihatsuAYLA R 1.2 113.500.000 Rp 117.000.000 (Cash) Rp 2,31 jt/bln Plat Ganjil Plat Ganjil Electric Parking Brake 360 Camera View
2022 ToyotaAVANZA G 1.5 211.000.000 Rp 217.000.000 (Cash) Rp 4,30 jt/bln Plat Genap,Jarak Tempuh Rendah,Retract Mirror Plat Ganjil Retract Mirror Power Sliding Door
2020 HondaBRIO SATYA E 1.2 153.000.000 Rp 158.000.000 (Cash) Rp 3,12 jt/bln Plat Genap,Sports Mode Plat Genap Auto Retract Mirror Auto Cruise Control
2016 HondaHR-V E 1.5 197.000.000 Rp 203.000.000 (Cash) Rp 4,01 jt/bln Plat Ganjil,Electric Parking Brake,Paddle Shift Plat Ganjil Auto Retract Mirror Panoramic Sun Roof
2020 BMWX1 SDRIVE18I XLINE 1.5 554.000.000 Rp 570.000.000 (Cash) Rp 11,29 jt/bln Plat Ganjil,Panoramic Sun Roof,Electric Parking Brake Plat Genap Retract Mirror Paddle Shift
2018 ToyotaYARIS S TRD 1.5 192.000.000 Rp 198.000.000 (Cash) Rp 3,91 jt/bln Plat Genap,Retract Mirror,Paddle Shift Plat Ganjil Keyless Push Start Retract Mirror
2015 NissanX-TRAIL 2.5 184.000.000 Rp 190.000.000 (Cash) Rp 3,75 jt/bln Plat Ganjil,360 Camera View,Keyless Push Start Plat Ganjil 360 Camera View Keyless Push Start
2019 NissanLIVINA VL 1.5 192.000.000 Rp 198.000.000 (Cash) Rp 3,91 jt/bln Plat Ganjil,Kamera Mundur,Retract Mirror Plat Ganjil Kamera Mundur Retract Mirror
2017 ToyotaAGYA G TRD 1.2 101.000.000 Rp 104.500.000 (Cash) Rp 2,06 jt/bln Plat Genap Plat Ganjil Jarak Tempuh Rendah Electric Parking Brake
2017 ChevroletTRAX TURBO LTZ 1.4 168.000.000 Rp 173.000.000 (Cash) Rp 3,42 jt/bln Plat Genap,Sun Roof,Retract Mirror Plat Ganjil 360 Camera View Electric Parking Brake
2021 ToyotaAVANZA G 1.3 186.000.000 Rp 191.000.000 (Cash) Rp 3,79 jt/bln Plat Genap,Retract Mirror Plat Genap NA NA
2017 ToyotaKIJANG INNOVA REBORN G 2.0 236.000.000 Rp 243.000.000 (Cash) Rp 4,81 jt/bln Plat Genap,Sports Mode Plat Ganjil Auto Retract Mirror Keyless Push Start

Harga Kredit Diskon

df$Harga.Kredit.Diskon<-str_remove_all(df$Harga.Kredit.Diskon,"[.]")%>%as.numeric();df$Harga.Kredit.Diskon
##   [1] 198000000 474000000 199000000 113500000 211000000 153000000 197000000
##   [8] 554000000 192000000 184000000 192000000 101000000 168000000 186000000
##  [15] 236000000 199000000 175000000 116500000 200000000 131000000 200000000
##  [22] 104500000 192000000 260000000 188000000 143000000 136000000 151000000
##  [29] 130000000 183000000  79500000 209000000 116500000 114500000 109500000
##  [36] 217000000 189000000 118500000 199000000 116500000 155000000 109500000
##  [43] 170000000 108500000  92000000 109500000 150000000 116500000 188000000
##  [50] 116000000 139500000 340000000 111500000 167000000 120500000 102000000
##  [57] 204000000 197000000 140000000 136000000 193000000 282000000 129500000
##  [64] 193000000 197000000 211000000 210000000 218000000 229000000 213000000
##  [71] 210000000  90000000 157000000 109500000 133500000 259000000 233000000
##  [78] 287000000 143000000 143000000 203000000 257000000 199000000 114500000
##  [85] 203000000  86000000 124500000 275000000 193000000 219000000 343000000
##  [92] 257000000 245000000 157000000 193000000 124000000 237000000 110000000
##  [99] 134000000 145000000 162000000 108000000 123500000 127000000 131000000
## [106] 110500000 140500000 195000000 119500000 204000000 124000000 209000000
## [113] 147000000 111000000 262000000 412000000 257000000 140000000 180000000
## [120] 113000000 123000000 370000000 429000000 220000000 195000000 166000000
## [127] 129000000  99000000 132000000 107000000 197000000 176000000 148000000
## [134] 222000000 232000000 170000000 278000000 109000000 100000000 206000000
## [141] 118000000 319000000  87000000 144000000 176000000 161000000 117000000
## [148] 211000000 140000000 204000000 149000000 230000000 213000000 219000000
## [155] 143000000 269000000 201000000 213000000 192000000 223000000  98000000
## [162] 185000000 164000000 190000000 360000000 119000000 139000000 207000000
## [169] 229000000 141000000 167000000 180000000 208000000 206000000  79000000
## [176] 236000000 184000000 134000000 188000000 394000000

Harga Cash Diskon

df$Harga.Cash.Diskon<-str_remove_all(df$Harga.Cash.Diskon,"[.]")%>%
  str_extract_all(.,"[0-9]+")%>%as.numeric();df$Harga.Cash.Diskon
##   [1] 204000000 487000000 205000000 117000000 217000000 158000000 203000000
##   [8] 570000000 198000000 190000000 198000000 104500000 173000000 191000000
##  [15] 243000000 205000000 180000000 120000000 206000000 135000000 206000000
##  [22] 108000000 198000000 267000000 193000000 147000000 140500000 155000000
##  [29] 134500000 188000000  82000000 215000000 120000000 118000000 113000000
##  [36] 223000000 194000000 122000000 205000000 120000000 159000000 113000000
##  [43] 175000000 112000000  94500000 113000000 154000000 120000000 193000000
##  [50] 119500000 144000000 349000000 115000000 172000000 124000000 105500000
##  [57] 210000000 203000000 144000000 139500000 199000000 290000000 133000000
##  [64] 199000000 203000000 217000000 216000000 224000000 235000000 219000000
##  [71] 216000000  92500000 162000000 113000000 137000000 266000000 240000000
##  [78] 295000000 147000000 147000000 209000000 264000000 205000000 118000000
##  [85] 209000000  88500000 128000000 283000000 199000000 225000000 352000000
##  [92] 264000000 252000000 162000000 198000000 128000000 244000000 113500000
##  [99] 138000000 149000000 166500000 111000000 127000000 131000000 135000000
## [106] 114000000 145000000 200000000 123000000 210000000 127500000 215000000
## [113] 151000000 114500000 269000000 423000000 264000000 144500000 185000000
## [120] 116500000 126500000 380000000 440500000 226000000 200500000 171000000
## [127] 132500000 102000000 136000000 110000000 203000000 181500000 152000000
## [134] 228500000 238500000 175000000 286000000 112500000 103000000 212000000
## [141] 122000000 328000000  89500000 148000000 181500000 166000000 121000000
## [148] 217000000 144000000 210000000 153500000 236000000 219000000 225000000
## [155] 147000000 276000000 206500000 219000000 197000000 229000000 101000000
## [162] 190500000 169000000 195000000 370000000 123000000 143000000 212500000
## [169] 235500000 145500000 172000000 185000000 214000000 212000000  81500000
## [176] 243000000 189500000 138000000 193000000 405000000

Cicilan

df$Cicil<-str_replace_all(df$Cicil,",",".")%>%str_remove_all(.,"[A-Za-z]+")%>%
  str_remove_all(.,"[/]")%>%as.numeric();df$Cicil
##   [1]  4.03  9.66  4.05  2.31  4.30  3.12  4.01 11.29  3.91  3.75  3.91  2.06
##  [13]  3.42  3.79  4.81  4.05  3.57  2.37  4.08  2.67  4.08  2.13  3.91  5.30
##  [25]  3.83  2.91  2.77  3.08  2.65  3.73  1.62  4.26  2.37  2.33  2.23  4.42
##  [37]  3.85  2.41  4.05  2.37  3.16  2.23  3.46  2.21  1.87  2.23  3.06  2.37
##  [49]  3.83  2.36  2.84  6.93  2.27  3.40  2.46  2.08  4.16  4.01  2.85  2.77
##  [61]  3.93  5.75  2.64  3.93  4.01  4.30  4.28  4.44  4.67  4.34  4.28  1.83
##  [73]  3.20  2.23  2.72  5.28  4.75  5.85  2.91  2.91  4.14  5.24  4.05  2.33
##  [85]  4.14  1.75  2.54  5.60  3.93  4.46  6.99  5.24  4.99  3.20  3.93  2.53
##  [97]  4.83  2.24  2.73  2.95  3.30  2.20  2.52  2.59  2.67  2.25  2.86  3.97
## [109]  2.43  4.16  2.53  4.26  3.00  2.26  5.34  8.39  5.24  2.85  3.67  2.30
## [121]  2.51  7.54  8.74  4.48  3.97  3.38  2.63  2.02  2.69  2.18  4.01  3.59
## [133]  3.02  4.52  4.73  3.46  5.66  2.22  2.04  4.20  2.40  6.50  1.77  2.93
## [145]  3.59  3.28  2.38  4.30  2.85  4.16  3.04  4.69  4.34  4.46  2.91  5.48
## [157]  4.10  4.34  3.91  4.54  2.00  3.77  3.34  3.87  7.33  2.42  2.83  4.22
## [169]  4.67  2.87  3.40  3.67  4.24  4.20  1.61  4.81  3.75  2.73  3.83  8.03
colnames(df)[4]<-"Cicil (jt/bln)"

Merek

Merek<-c("BMW","Chevrolet","Daihatsu","Datsun",
         "Honda","Ford","Mazda","Mitsubishi","Nissan",
         "Suzuki","Toyota","Wuling")
df$Merek<-unlist(str_extract_all(df$Nama,paste(Merek,collapse = "|")));df$Merek
##   [1] "Wuling"     "BMW"        "Toyota"     "Daihatsu"   "Toyota"    
##   [6] "Honda"      "Honda"      "BMW"        "Toyota"     "Nissan"    
##  [11] "Nissan"     "Toyota"     "Chevrolet"  "Toyota"     "Toyota"    
##  [16] "Toyota"     "Honda"      "Toyota"     "Nissan"     "Honda"     
##  [21] "Wuling"     "Daihatsu"   "Toyota"     "Honda"      "Mitsubishi"
##  [26] "Honda"      "Honda"      "Daihatsu"   "Suzuki"     "Suzuki"    
##  [31] "Datsun"     "Honda"      "Toyota"     "Daihatsu"   "Daihatsu"  
##  [36] "Mitsubishi" "Wuling"     "Toyota"     "Wuling"     "Toyota"    
##  [41] "Honda"      "Daihatsu"   "Honda"      "Daihatsu"   "Suzuki"    
##  [46] "Daihatsu"   "Toyota"     "Daihatsu"   "Suzuki"     "Daihatsu"  
##  [51] "Suzuki"     "Honda"      "Wuling"     "Nissan"     "Toyota"    
##  [56] "Nissan"     "Wuling"     "Nissan"     "Toyota"     "Honda"     
##  [61] "Daihatsu"   "Honda"      "Toyota"     "Daihatsu"   "Daihatsu"  
##  [66] "Honda"      "Toyota"     "Mitsubishi" "Mitsubishi" "Nissan"    
##  [71] "Mitsubishi" "Daihatsu"   "Toyota"     "Daihatsu"   "Toyota"    
##  [76] "Honda"      "Mitsubishi" "Toyota"     "Honda"      "Toyota"    
##  [81] "Mitsubishi" "Honda"      "Wuling"     "Suzuki"     "Wuling"    
##  [86] "Daihatsu"   "Honda"      "Wuling"     "Daihatsu"   "Mitsubishi"
##  [91] "Honda"      "Honda"      "Toyota"     "Honda"      "Toyota"    
##  [96] "Toyota"     "Toyota"     "Suzuki"     "Suzuki"     "Toyota"    
## [101] "Suzuki"     "Toyota"     "Daihatsu"   "Honda"      "Daihatsu"  
## [106] "Daihatsu"   "Honda"      "Daihatsu"   "Suzuki"     "Wuling"    
## [111] "Honda"      "Daihatsu"   "Honda"      "Daihatsu"   "Honda"     
## [116] "Mazda"      "Honda"      "Honda"      "Honda"      "Toyota"    
## [121] "Toyota"     "Toyota"     "Mazda"      "Honda"      "Suzuki"    
## [126] "Suzuki"     "Suzuki"     "Toyota"     "Toyota"     "Toyota"    
## [131] "Toyota"     "Toyota"     "Suzuki"     "Toyota"     "Honda"     
## [136] "Wuling"     "Wuling"     "Wuling"     "Daihatsu"   "Wuling"    
## [141] "Daihatsu"   "Toyota"     "Toyota"     "Suzuki"     "Suzuki"    
## [146] "Suzuki"     "Daihatsu"   "Mitsubishi" "Daihatsu"   "Wuling"    
## [151] "Daihatsu"   "Wuling"     "Toyota"     "Toyota"     "Honda"     
## [156] "Honda"      "Honda"      "Honda"      "Honda"      "Honda"     
## [161] "Daihatsu"   "Honda"      "Honda"      "Toyota"     "Toyota"    
## [166] "Daihatsu"   "Toyota"     "Mitsubishi" "Toyota"     "Honda"     
## [171] "Daihatsu"   "Wuling"     "Wuling"     "Wuling"     "Datsun"    
## [176] "Honda"      "Honda"      "Honda"      "Mitsubishi" "Mitsubishi"

Tahun

df$Tahun<-substr(df$Nama,1,4);df$Tahun
##   [1] "2019" "2019" "2018" "2019" "2022" "2020" "2016" "2020" "2018" "2015"
##  [11] "2019" "2017" "2017" "2021" "2017" "2022" "2015" "2019" "2019" "2018"
##  [21] "2019" "2018" "2018" "2019" "2016" "2019" "2018" "2015" "2014" "2022"
##  [31] "2017" "2021" "2018" "2019" "2018" "2019" "2021" "2019" "2019" "2018"
##  [41] "2018" "2018" "2019" "2017" "2019" "2018" "2014" "2019" "2022" "2019"
##  [51] "2022" "2017" "2020" "2019" "2019" "2014" "2019" "2019" "2021" "2018"
##  [61] "2019" "2021" "2015" "2019" "2018" "2018" "2019" "2019" "2020" "2021"
##  [71] "2018" "2016" "2015" "2019" "2020" "2019" "2018" "2014" "2019" "2016"
##  [81] "2016" "2018" "2019" "2018" "2019" "2015" "2018" "2021" "2019" "2019"
##  [91] "2017" "2018" "2017" "2016" "2014" "2019" "2016" "2017" "2018" "2022"
## [101] "2018" "2016" "2020" "2014" "2020" "2018" "2018" "2021" "2013" "2019"
## [111] "2017" "2021" "2020" "2018" "2021" "2019" "2018" "2018" "2020" "2017"
## [121] "2019" "2016" "2019" "2017" "2019" "2019" "2018" "2017" "2019" "2016"
## [131] "2020" "2019" "2021" "2021" "2019" "2019" "2021" "2019" "2019" "2019"
## [141] "2019" "2017" "2014" "2019" "2019" "2019" "2018" "2018" "2020" "2019"
## [151] "2015" "2019" "2018" "2022" "2019" "2021" "2015" "2016" "2016" "2017"
## [161] "2017" "2020" "2020" "2018" "2022" "2018" "2017" "2017" "2020" "2016"
## [171] "2019" "2021" "2021" "2019" "2016" "2018" "2020" "2015" "2015" "2016"

Hasil

kable(head(df,15),caption="Tabel 4. Hasil Managing Final")
Tabel 4. Hasil Managing Final
Nama Harga.Kredit.Diskon Harga.Cash.Diskon Cicil (jt/bln) Detail Plat Detail1 Detail2 Merek Tahun
2019 WulingALMAZ LT LUX 1.5 198000000 204000000 4.03 Plat Ganjil,Electric Parking Brake,360 Camera View Plat Genap Sports Mode Map Navigator Wuling 2019
2019 BMW3 20I (CKD) 2.0 474000000 487000000 9.66 Plat Genap,Sports Mode,Map Navigator Plat Ganjil Panoramic Sun Roof Electric Parking Brake BMW 2019
2018 ToyotaYARIS S TRD 1.5 199000000 205000000 4.05 Plat Ganjil,Keyless Push Start,Sports Mode Plat Ganjil Auto Cruise Control Paddle Shift Toyota 2018
2019 DaihatsuAYLA R 1.2 113500000 117000000 2.31 Plat Ganjil Plat Ganjil Electric Parking Brake 360 Camera View Daihatsu 2019
2022 ToyotaAVANZA G 1.5 211000000 217000000 4.30 Plat Genap,Jarak Tempuh Rendah,Retract Mirror Plat Ganjil Retract Mirror Power Sliding Door Toyota 2022
2020 HondaBRIO SATYA E 1.2 153000000 158000000 3.12 Plat Genap,Sports Mode Plat Genap Auto Retract Mirror Auto Cruise Control Honda 2020
2016 HondaHR-V E 1.5 197000000 203000000 4.01 Plat Ganjil,Electric Parking Brake,Paddle Shift Plat Ganjil Auto Retract Mirror Panoramic Sun Roof Honda 2016
2020 BMWX1 SDRIVE18I XLINE 1.5 554000000 570000000 11.29 Plat Ganjil,Panoramic Sun Roof,Electric Parking Brake Plat Genap Retract Mirror Paddle Shift BMW 2020
2018 ToyotaYARIS S TRD 1.5 192000000 198000000 3.91 Plat Genap,Retract Mirror,Paddle Shift Plat Ganjil Keyless Push Start Retract Mirror Toyota 2018
2015 NissanX-TRAIL 2.5 184000000 190000000 3.75 Plat Ganjil,360 Camera View,Keyless Push Start Plat Ganjil 360 Camera View Keyless Push Start Nissan 2015
2019 NissanLIVINA VL 1.5 192000000 198000000 3.91 Plat Ganjil,Kamera Mundur,Retract Mirror Plat Ganjil Kamera Mundur Retract Mirror Nissan 2019
2017 ToyotaAGYA G TRD 1.2 101000000 104500000 2.06 Plat Genap Plat Ganjil Jarak Tempuh Rendah Electric Parking Brake Toyota 2017
2017 ChevroletTRAX TURBO LTZ 1.4 168000000 173000000 3.42 Plat Genap,Sun Roof,Retract Mirror Plat Ganjil 360 Camera View Electric Parking Brake Chevrolet 2017
2021 ToyotaAVANZA G 1.3 186000000 191000000 3.79 Plat Genap,Retract Mirror Plat Genap NA NA Toyota 2021
2017 ToyotaKIJANG INNOVA REBORN G 2.0 236000000 243000000 4.81 Plat Genap,Sports Mode Plat Ganjil Auto Retract Mirror Keyless Push Start Toyota 2017

Visualisasi

ggplot(df,aes(x=`Cicil (jt/bln)`,y=Merek,fill=Merek))+geom_density_ridges()+
  scale_fill_brewer(palette="RdBu")+theme_modern_rc(axis_title_just = "center",
                                                      axis_title_size = 12)+
  theme(plot.title = element_text(hjust=0.5),legend.position = "none")+
  labs(x="\nMerek",y="Cicilan/Bulan (Juta Rupiah)\n",title="Sebaran Harga Cicilan/Bulan Tiap Merek Mobil")

ggplot(df,aes(x=`Cicil (jt/bln)`,y=Tahun,fill=Tahun))+geom_density_ridges()+
  scale_fill_brewer(palette="Oranges")+theme_modern_rc(axis_title_just = "center",
                                                      axis_title_size = 12)+
  theme(plot.title = element_text(hjust=0.5),legend.position = "none")+
  labs(x="\nMerek",y="Cicilan/Bulan (Juta Rupiah)\n",title="Sebaran Harga Cicilan/Bulan Tiap Tahun")

ggplot(df,aes(x=Tahun,y=df$`Cicil (jt/bln)`,fill=Merek))+geom_violin(aes(group=Tahun),col="coral")+geom_boxplot()+
  scale_fill_brewer(palette="Spectral")+theme_modern_rc(axis_title_just = "center",
                                                      axis_title_size = 12)+
  theme(plot.title = element_text(hjust=0.5),legend.position = "bottom",
        plot.subtitle = element_text(hjust=0.5,color="white"))+
  labs(x="\nTahun",y="Cicilan/Bulan (Juta Rupiah)\n",title="Sebaran Harga Cicilan/bulan Tiap Merek Mobil",
       subtitle="Selama 10 Tahun Terakhir")

PEKAN 3

Packages

lapply(c("car","lmtest"),library,character.only=T)[[1]]
##  [1] "car"        "carData"    "hrbrthemes" "ggridges"   "readxl"    
##  [6] "kableExtra" "rvest"      "lubridate"  "forcats"    "stringr"   
## [11] "dplyr"      "purrr"      "readr"      "tidyr"      "tibble"    
## [16] "ggplot2"    "tidyverse"  "stats"      "graphics"   "grDevices" 
## [21] "utils"      "datasets"   "methods"    "base"

Jadikan peubah kategorik sebagai faktor

df$Merek<-as.factor(df$Merek)
df$Tahun<-as.factor(df$Tahun)

Relevel Factor sebagai kategori referensi

DF<-df%>%mutate(Merek = relevel(df$Merek, ref = "Daihatsu"));DF$Merek;DF$Tahun
##   [1] Wuling     BMW        Toyota     Daihatsu   Toyota     Honda     
##   [7] Honda      BMW        Toyota     Nissan     Nissan     Toyota    
##  [13] Chevrolet  Toyota     Toyota     Toyota     Honda      Toyota    
##  [19] Nissan     Honda      Wuling     Daihatsu   Toyota     Honda     
##  [25] Mitsubishi Honda      Honda      Daihatsu   Suzuki     Suzuki    
##  [31] Datsun     Honda      Toyota     Daihatsu   Daihatsu   Mitsubishi
##  [37] Wuling     Toyota     Wuling     Toyota     Honda      Daihatsu  
##  [43] Honda      Daihatsu   Suzuki     Daihatsu   Toyota     Daihatsu  
##  [49] Suzuki     Daihatsu   Suzuki     Honda      Wuling     Nissan    
##  [55] Toyota     Nissan     Wuling     Nissan     Toyota     Honda     
##  [61] Daihatsu   Honda      Toyota     Daihatsu   Daihatsu   Honda     
##  [67] Toyota     Mitsubishi Mitsubishi Nissan     Mitsubishi Daihatsu  
##  [73] Toyota     Daihatsu   Toyota     Honda      Mitsubishi Toyota    
##  [79] Honda      Toyota     Mitsubishi Honda      Wuling     Suzuki    
##  [85] Wuling     Daihatsu   Honda      Wuling     Daihatsu   Mitsubishi
##  [91] Honda      Honda      Toyota     Honda      Toyota     Toyota    
##  [97] Toyota     Suzuki     Suzuki     Toyota     Suzuki     Toyota    
## [103] Daihatsu   Honda      Daihatsu   Daihatsu   Honda      Daihatsu  
## [109] Suzuki     Wuling     Honda      Daihatsu   Honda      Daihatsu  
## [115] Honda      Mazda      Honda      Honda      Honda      Toyota    
## [121] Toyota     Toyota     Mazda      Honda      Suzuki     Suzuki    
## [127] Suzuki     Toyota     Toyota     Toyota     Toyota     Toyota    
## [133] Suzuki     Toyota     Honda      Wuling     Wuling     Wuling    
## [139] Daihatsu   Wuling     Daihatsu   Toyota     Toyota     Suzuki    
## [145] Suzuki     Suzuki     Daihatsu   Mitsubishi Daihatsu   Wuling    
## [151] Daihatsu   Wuling     Toyota     Toyota     Honda      Honda     
## [157] Honda      Honda      Honda      Honda      Daihatsu   Honda     
## [163] Honda      Toyota     Toyota     Daihatsu   Toyota     Mitsubishi
## [169] Toyota     Honda      Daihatsu   Wuling     Wuling     Wuling    
## [175] Datsun     Honda      Honda      Honda      Mitsubishi Mitsubishi
## 11 Levels: Daihatsu BMW Chevrolet Datsun Honda Mazda Mitsubishi ... Wuling
##   [1] 2019 2019 2018 2019 2022 2020 2016 2020 2018 2015 2019 2017 2017 2021 2017
##  [16] 2022 2015 2019 2019 2018 2019 2018 2018 2019 2016 2019 2018 2015 2014 2022
##  [31] 2017 2021 2018 2019 2018 2019 2021 2019 2019 2018 2018 2018 2019 2017 2019
##  [46] 2018 2014 2019 2022 2019 2022 2017 2020 2019 2019 2014 2019 2019 2021 2018
##  [61] 2019 2021 2015 2019 2018 2018 2019 2019 2020 2021 2018 2016 2015 2019 2020
##  [76] 2019 2018 2014 2019 2016 2016 2018 2019 2018 2019 2015 2018 2021 2019 2019
##  [91] 2017 2018 2017 2016 2014 2019 2016 2017 2018 2022 2018 2016 2020 2014 2020
## [106] 2018 2018 2021 2013 2019 2017 2021 2020 2018 2021 2019 2018 2018 2020 2017
## [121] 2019 2016 2019 2017 2019 2019 2018 2017 2019 2016 2020 2019 2021 2021 2019
## [136] 2019 2021 2019 2019 2019 2019 2017 2014 2019 2019 2019 2018 2018 2020 2019
## [151] 2015 2019 2018 2022 2019 2021 2015 2016 2016 2017 2017 2020 2020 2018 2022
## [166] 2018 2017 2017 2020 2016 2019 2021 2021 2019 2016 2018 2020 2015 2015 2016
## Levels: 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022

Model Regresi dengan peubah dummy

model<-lm(`Cicil (jt/bln)`~Merek+Tahun,DF);model;summary(model)
## 
## Call:
## lm(formula = `Cicil (jt/bln)` ~ Merek + Tahun, data = DF)
## 
## Coefficients:
##     (Intercept)         MerekBMW   MerekChevrolet      MerekDatsun  
##         2.31431          7.85866          0.40453         -1.33278  
##      MerekHonda       MerekMazda  MerekMitsubishi      MerekNissan  
##         1.21335          5.85839          1.91724          0.91946  
##     MerekSuzuki      MerekToyota      MerekWuling        Tahun2014  
##         0.11569          0.74859          1.12731          0.06956  
##       Tahun2015        Tahun2016        Tahun2017        Tahun2018  
##         0.05729          0.56578          0.70116          0.24898  
##       Tahun2019        Tahun2020        Tahun2021        Tahun2022  
##         0.39230          0.21175          1.23751          1.36069
## 
## Call:
## lm(formula = `Cicil (jt/bln)` ~ Merek + Tahun, data = DF)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7441 -0.7239 -0.0709  0.5319  3.9113 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      2.31431    1.08821   2.127 0.034977 *  
## MerekBMW         7.85866    0.76728  10.242  < 2e-16 ***
## MerekChevrolet   0.40453    1.08455   0.373 0.709646    
## MerekDatsun     -1.33278    0.78241  -1.703 0.090429 .  
## MerekHonda       1.21335    0.24843   4.884 2.50e-06 ***
## MerekMazda       5.85839    0.76634   7.645 1.83e-12 ***
## MerekMitsubishi  1.91724    0.35886   5.343 3.10e-07 ***
## MerekNissan      0.91946    0.44448   2.069 0.040190 *  
## MerekSuzuki      0.11569    0.33176   0.349 0.727764    
## MerekToyota      0.74859    0.25604   2.924 0.003960 ** 
## MerekWuling      1.12731    0.31953   3.528 0.000547 ***
## Tahun2014        0.06956    1.13663   0.061 0.951275    
## Tahun2015        0.05729    1.12442   0.051 0.959426    
## Tahun2016        0.56578    1.11036   0.510 0.611074    
## Tahun2017        0.70116    1.10094   0.637 0.525119    
## Tahun2018        0.24898    1.08175   0.230 0.818260    
## Tahun2019        0.39230    1.07682   0.364 0.716105    
## Tahun2020        0.21175    1.10893   0.191 0.848808    
## Tahun2021        1.23751    1.10223   1.123 0.263235    
## Tahun2022        1.36069    1.11621   1.219 0.224631    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.036 on 160 degrees of freedom
## Multiple R-squared:  0.567,  Adjusted R-squared:  0.5156 
## F-statistic: 11.03 on 19 and 160 DF,  p-value: < 2.2e-16
avPlots(model)

Uji Asumsi

Fungsi Uji Normal

uji.normal<-function(x, object.name="x", graph=TRUE, graph.transformed=TRUE){
  lapply(c("fitdistrplus", "kSamples", "rcompanion"), library, character.only=T) 
  if(any(x<0))x<-x-min(x)+1
  mean <- fitdist(x, "norm")$estimate[1]; sd <- fitdist(x, "norm")$estimate[2]
  uji<-ks.test(x, "pnorm", mean=mean, sd=sd)
  uji1<- ad.test(x, rnorm(length(x), mean=mean, sd=sd))
  pvalue<-uji$p.value
  PVALUE1<-uji1$ad[1,3]
  PVALUE2<-uji1$ad[2,3]
  t<-transformTukey(x,quiet = TRUE,plotit = FALSE)
  pt<-ks.test(t, "pnorm", mean=fitdist(t,"norm")$estimate[1], 
              sd=fitdist(t,"norm")$estimate[2])$p.value
  lambda<-transformTukey(x,returnLambda =TRUE,quiet=TRUE,plotit = FALSE)
  if(graph==TRUE){
    if(graph.transformed==FALSE){
      par(mfrow=c(1,2))
      hist(x, freq=F, col="steelblue", border="white", 
           main=paste("Histogram of ",object.name),xlab=object.name)
      lines(density(x),lwd=2, col="coral")
      qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
    }
    else{
      par(mfrow=c(2,2))
      hist(x, freq=F, col="steelblue", border="white", 
           main=paste("Histogram of ",object.name),xlab=object.name)
      lines(density(x),lwd=2, col="coral")
      hist(t, main=paste("Histogram of ",object.name,"transformed"), 
           xlab=paste(object.name,"transformed"), freq=F, 
           col="steelblue",border = "white")
      lines(density(t),lwd=2, col="coral")
      qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
      qqnorm(t, col="coral");qqline(t,col="steelblue", lwd=2)
    }
  }
  z<-ifelse((PVALUE1>=0.05 & PVALUE2<0.05 ||PVALUE1<0.05 & PVALUE2>=0.05), 
            
            ifelse(pvalue>=0.05, 
                   return(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                            Keputusan="Terima H0, data menyebar normal")), 
                   return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                 Keputusan="Tolak H0, data tidak menyebar normal"),
                                 `lambda transformasi`=lambda,
                                 `Data Hasil Transformasi Tukey`= t,
                                `Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt, 
                                  `Keputusan`=ifelse(pt>=0.05, 
                                  "Terima H0, data menyebar normal", 
                                  "Tolak H0, data tidak menyebar normal"))))),
            
            ifelse(((pvalue >= 0.05)&(PVALUE1 >= 0.05||PVALUE2>= 0.05)), 
                   return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                 Keputusan="Terima H0, data menyebar normal"), 
                                 `Hasil Uji Anderson`=
                                 data.frame(`P-Value`=
                                rbind(`Versi 1`=PVALUE1, `Versi 2`=PVALUE2),
                                Keputusan=rep("Terima H0, data menyebar normal", 2)))), 
                   
                   ifelse((pvalue >= 0.05&(PVALUE1 < 0.05||PVALUE2 < 0.05)),
                          return(list(`Hasil Uji Kolmogorov Smirnov`= data.frame(`P-Value`=pvalue,
                                        Keputusan="Terima H0, data menyebar normal"), 
                                      `Hasil Uji Anderson`=data.frame(`P-Value`=
                                        rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2), 
                                        Keputusan= rep("Tolak H0, data tidak menyebar normal",2)), 
                                        `lambda transformasi`=lambda,
                                        `Data Hasil Transformasi Tukey`= t,
                                        `Setelah transformasi~Uji Kolmogorov-Smirnov`=
                                        data.frame(`P-Value`=pt, 
                                        `Keputusan`=ifelse(pt>=0.05, 
                                            "Terima H0, data menyebar normal",
                                            "Tolak H0, data tidak menyebar normal")))),
                          
                          ifelse(pvalue < 0.05&(PVALUE1 >= 0.05||PVALUE2 >= 0.05),
                                 return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                  Keputusan="Tolak H0, data tidak menyebar normal"), 
                                  `Hasil Uji Anderson`=data.frame(`P-Value`= rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2), 
                                    Keputusan=rep("Terima H0, data menyebar normal",2)),
                                  `lambda transformasi`=lambda,
                                  `Data Hasil Transformasi Tukey`= t,
                                  `Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt, 
                                  `Keputusan`=ifelse(pt>=0.05, 
                                  "Terima H0, data menyebar normal", 
                                  "Tolak H0, data tidak menyebar normal")))),
                                 
                                 return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                               Keputusan="Tolak H0, data tidak menyebar normal"),
                                               `Hasil Uji Anderson`=data.frame(`P-Value`=rbind(`Versi 1`=PVALUE1,
                                                    `Versi 2`=PVALUE2), 
                                                                                                                                                                          Keputusan=rep("Tolak H0, data tidak menyebar normal",2)), 
                                             `lambda transformasi`=lambda,
                                             `Data Hasil Transformasi Tukey`= t,
                                             `Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt,
                                              `Keputusan`=ifelse(pt>=0.05, 
                                              "Terima H0, data menyebar normal", 
                                              "Tolak H0, data tidak menyebar normal"))))))))
  return(z)
}

uji.normal1<-function(x, object.name="x", graph=TRUE, graph.transformed=TRUE){
  lapply(c("fitdistrplus", "kSamples", "rcompanion"), library, character.only=T) 
  if(any(x<0))x<-x-min(x)+1
  mean <- mean(x); sd <- sd(x)
  uji<-ks.test(x, "pnorm", mean=mean, sd=sd)
  uji1<- ad.test(x, rnorm(length(x), mean=mean, sd=sd))
  pvalue<-uji$p.value
  PVALUE1<-uji1$ad[1,3]
  PVALUE2<-uji1$ad[2,3]
  t<-transformTukey(x,quiet = TRUE,plotit = FALSE)
  pt<-ks.test(t, "pnorm", mean=mean(t), 
              sd=sd(t))$p.value
  lambda<-transformTukey(x,returnLambda =TRUE,quiet=TRUE,plotit = FALSE)
  if(graph==TRUE){
    if(graph.transformed==FALSE){
      par(mfrow=c(1,2))
      hist(x, freq=F, col="steelblue", border="white", 
           main=paste("Histogram of ",object.name),xlab=object.name)
      lines(density(x),lwd=2, col="coral")
      qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
    }
    else{
      par(mfrow=c(2,2))
      hist(x, freq=F, col="steelblue", border="white", 
           main=paste("Histogram of ",object.name),xlab=object.name)
      lines(density(x),lwd=2, col="coral")
      hist(t, main=paste("Histogram of ",object.name,"transformed"), 
           xlab=paste(object.name,"transformed"), freq=F, 
           col="steelblue",border = "white")
      lines(density(t),lwd=2, col="coral")
      qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
      qqnorm(t, col="coral");qqline(t,col="steelblue", lwd=2)
    }
  }
  z<-ifelse((PVALUE1>=0.05 & PVALUE2<0.05 ||PVALUE1<0.05 & PVALUE2>=0.05), 
            
            ifelse(pvalue>=0.05, 
                   return(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                    Keputusan="Terima H0, data menyebar normal")), 
                   return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                         Keputusan="Tolak H0, data tidak menyebar normal"),
                               `lambda transformasi`=lambda,
                               `Data Hasil Transformasi Tukey`= t,
                               `Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt, 
                                                                                        `Keputusan`=ifelse(pt>=0.05, 
                                                                                                           "Terima H0, data menyebar normal", 
                                                                                                           "Tolak H0, data tidak menyebar normal"))))),
            
            ifelse(((pvalue >= 0.05)&(PVALUE1 >= 0.05||PVALUE2>= 0.05)), 
                   return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                         Keputusan="Terima H0, data menyebar normal"), 
                               `Hasil Uji Anderson`=
                                 data.frame(`P-Value`=
                                              rbind(`Versi 1`=PVALUE1, `Versi 2`=PVALUE2),
                                            Keputusan=rep("Terima H0, data menyebar normal", 2)))), 
                   
                   ifelse((pvalue >= 0.05&(PVALUE1 < 0.05||PVALUE2 < 0.05)),
                          return(list(`Hasil Uji Kolmogorov Smirnov`= data.frame(`P-Value`=pvalue,
                                                                                 Keputusan="Terima H0, data menyebar normal"), 
                                      `Hasil Uji Anderson`=data.frame(`P-Value`=
                                                                        rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2), 
                                                                      Keputusan= rep("Tolak H0, data tidak menyebar normal",2)), 
                                      `lambda transformasi`=lambda,
                                      `Data Hasil Transformasi Tukey`= t,
                                      `Setelah transformasi~Uji Kolmogorov-Smirnov`=
                                        data.frame(`P-Value`=pt, 
                                                   `Keputusan`=ifelse(pt>=0.05, 
                                                                      "Terima H0, data menyebar normal",
                                                                      "Tolak H0, data tidak menyebar normal")))),
                          
                          ifelse(pvalue < 0.05&(PVALUE1 >= 0.05||PVALUE2 >= 0.05),
                                 return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                                       Keputusan="Tolak H0, data tidak menyebar normal"), 
                                             `Hasil Uji Anderson`=data.frame(`P-Value`= rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2), 
                                                                             Keputusan=rep("Terima H0, data menyebar normal",2)),
                                             `lambda transformasi`=lambda,
                                             `Data Hasil Transformasi Tukey`= t,
                                             `Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt, 
                                                                                                      `Keputusan`=ifelse(pt>=0.05, 
                                                                                                                         "Terima H0, data menyebar normal", 
                                                                                                                         "Tolak H0, data tidak menyebar normal")))),
                                 
                                 return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
                                                                                       Keputusan="Tolak H0, data tidak menyebar normal"),
                                             `Hasil Uji Anderson`=data.frame(`P-Value`=rbind(`Versi 1`=PVALUE1,
                                                                                             `Versi 2`=PVALUE2), 
                                                                             Keputusan=rep("Tolak H0, data tidak menyebar normal",2)), 
                                             `lambda transformasi`=lambda,
                                             `Data Hasil Transformasi Tukey`= t,
                                             `Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt,
                                                                                                      `Keputusan`=ifelse(pt>=0.05, 
                                                                                                                         "Terima H0, data menyebar normal", 
                                                                                                                         "Tolak H0, data tidak menyebar normal"))))))))
  return(z)
}

Normalitas

uji.normal(model$residuals,"residuals",graph.transformed = F)

## $`Hasil Uji Kolmogorov Smirnov`
##   P.Value                       Keputusan
## 1   0.353 Terima H0, data menyebar normal
## 
## $`Hasil Uji Anderson`
##         P.Value                       Keputusan
## Versi 1 0.10401 Terima H0, data menyebar normal
## Versi 2 0.10441 Terima H0, data menyebar normal

Normalitas sisaan terpenuhi (p-value > 0,05)

Homoskedastisitas dan Non-Autokorelasi

par(mfrow=c(1,2));plot(model,c(1,3))

gqtest(model);bgtest(model)
## 
##  Goldfeld-Quandt test
## 
## data:  model
## GQ = 1.6863, df1 = 70, df2 = 70, p-value = 0.01516
## alternative hypothesis: variance increases from segment 1 to 2
## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  model
## LM test = 1.4281, df = 1, p-value = 0.2321

Homoskedastisitas dan non-autokorelasi pada sisaan terpenuhi (p-value > 0,05)

PEKAN 4

Packages

library(leaps)

Metode Forward

forward<-regsubsets(`Cicil (jt/bln)`~Merek+Tahun,DF,method="forward")
sf<-summary(forward);sf
## Subset selection object
## Call: regsubsets.formula(`Cicil (jt/bln)` ~ Merek + Tahun, DF, method = "forward")
## 19 Variables  (and intercept)
##                 Forced in Forced out
## MerekBMW            FALSE      FALSE
## MerekChevrolet      FALSE      FALSE
## MerekDatsun         FALSE      FALSE
## MerekHonda          FALSE      FALSE
## MerekMazda          FALSE      FALSE
## MerekMitsubishi     FALSE      FALSE
## MerekNissan         FALSE      FALSE
## MerekSuzuki         FALSE      FALSE
## MerekToyota         FALSE      FALSE
## MerekWuling         FALSE      FALSE
## Tahun2014           FALSE      FALSE
## Tahun2015           FALSE      FALSE
## Tahun2016           FALSE      FALSE
## Tahun2017           FALSE      FALSE
## Tahun2018           FALSE      FALSE
## Tahun2019           FALSE      FALSE
## Tahun2020           FALSE      FALSE
## Tahun2021           FALSE      FALSE
## Tahun2022           FALSE      FALSE
## 1 subsets of each size up to 8
## Selection Algorithm: forward
##          MerekBMW MerekChevrolet MerekDatsun MerekHonda MerekMazda
## 1  ( 1 ) "*"      " "            " "         " "        " "       
## 2  ( 1 ) "*"      " "            " "         " "        "*"       
## 3  ( 1 ) "*"      " "            " "         " "        "*"       
## 4  ( 1 ) "*"      " "            " "         " "        "*"       
## 5  ( 1 ) "*"      " "            " "         "*"        "*"       
## 6  ( 1 ) "*"      " "            " "         "*"        "*"       
## 7  ( 1 ) "*"      " "            " "         "*"        "*"       
## 8  ( 1 ) "*"      " "            " "         "*"        "*"       
##          MerekMitsubishi MerekNissan MerekSuzuki MerekToyota MerekWuling
## 1  ( 1 ) " "             " "         " "         " "         " "        
## 2  ( 1 ) " "             " "         " "         " "         " "        
## 3  ( 1 ) "*"             " "         " "         " "         " "        
## 4  ( 1 ) "*"             " "         " "         " "         " "        
## 5  ( 1 ) "*"             " "         " "         " "         " "        
## 6  ( 1 ) "*"             " "         " "         " "         " "        
## 7  ( 1 ) "*"             " "         " "         " "         "*"        
## 8  ( 1 ) "*"             " "         " "         "*"         "*"        
##          Tahun2014 Tahun2015 Tahun2016 Tahun2017 Tahun2018 Tahun2019 Tahun2020
## 1  ( 1 ) " "       " "       " "       " "       " "       " "       " "      
## 2  ( 1 ) " "       " "       " "       " "       " "       " "       " "      
## 3  ( 1 ) " "       " "       " "       " "       " "       " "       " "      
## 4  ( 1 ) " "       " "       " "       " "       " "       " "       " "      
## 5  ( 1 ) " "       " "       " "       " "       " "       " "       " "      
## 6  ( 1 ) " "       " "       " "       " "       " "       " "       " "      
## 7  ( 1 ) " "       " "       " "       " "       " "       " "       " "      
## 8  ( 1 ) " "       " "       " "       " "       " "       " "       " "      
##          Tahun2021 Tahun2022
## 1  ( 1 ) " "       " "      
## 2  ( 1 ) " "       " "      
## 3  ( 1 ) " "       " "      
## 4  ( 1 ) "*"       " "      
## 5  ( 1 ) "*"       " "      
## 6  ( 1 ) "*"       "*"      
## 7  ( 1 ) "*"       "*"      
## 8  ( 1 ) "*"       "*"
par(mfrow=c(2,2))
plot(sf$rsq, xlab="Jumlah Variabel", ylab="R2",type="l");points(1:8,sf$rsq,col=ifelse(sf$rsq==sf$rsq[which.max(sf$rsq)],"coral","steelblue"),cex=1.5,pch=16)
plot(sf$adjr2, xlab="Jumlah Variabel", ylab="Adj R2",type="l");points(1:8,sf$adjr2,col=ifelse(sf$adjr2==sf$adjr2[which.max(sf$adjr2)],"coral","steelblue"),cex=1.5,pch=16)
plot(sf$bic, xlab="Jumlah Variabel", ylab="BIC",type="l");points(1:8,sf$bic,col=ifelse(sf$bic==sf$bic[which.min(sf$bic)],"coral","steelblue"),cex=1.5,pch=16)
plot(sf$cp, xlab="Jumlah Variabel", ylab="CP",type="l");points(1:8,sf$cp,col=ifelse(sf$cp==sf$cp[which.min(sf$cp)],"coral","steelblue"),cex=1.5,pch=16)

Jumlah dummy variabel yang digunakan sebanyak 8 jika berdasarkan R2

Metode Backward

backward<-regsubsets(`Cicil (jt/bln)`~Merek+Tahun,DF,method="backward")
sb<-summary(backward);sb
## Subset selection object
## Call: regsubsets.formula(`Cicil (jt/bln)` ~ Merek + Tahun, DF, method = "backward")
## 19 Variables  (and intercept)
##                 Forced in Forced out
## MerekBMW            FALSE      FALSE
## MerekChevrolet      FALSE      FALSE
## MerekDatsun         FALSE      FALSE
## MerekHonda          FALSE      FALSE
## MerekMazda          FALSE      FALSE
## MerekMitsubishi     FALSE      FALSE
## MerekNissan         FALSE      FALSE
## MerekSuzuki         FALSE      FALSE
## MerekToyota         FALSE      FALSE
## MerekWuling         FALSE      FALSE
## Tahun2014           FALSE      FALSE
## Tahun2015           FALSE      FALSE
## Tahun2016           FALSE      FALSE
## Tahun2017           FALSE      FALSE
## Tahun2018           FALSE      FALSE
## Tahun2019           FALSE      FALSE
## Tahun2020           FALSE      FALSE
## Tahun2021           FALSE      FALSE
## Tahun2022           FALSE      FALSE
## 1 subsets of each size up to 8
## Selection Algorithm: backward
##          MerekBMW MerekChevrolet MerekDatsun MerekHonda MerekMazda
## 1  ( 1 ) "*"      " "            " "         " "        " "       
## 2  ( 1 ) "*"      " "            " "         " "        "*"       
## 3  ( 1 ) "*"      " "            " "         " "        "*"       
## 4  ( 1 ) "*"      " "            " "         "*"        "*"       
## 5  ( 1 ) "*"      " "            " "         "*"        "*"       
## 6  ( 1 ) "*"      " "            " "         "*"        "*"       
## 7  ( 1 ) "*"      " "            " "         "*"        "*"       
## 8  ( 1 ) "*"      " "            " "         "*"        "*"       
##          MerekMitsubishi MerekNissan MerekSuzuki MerekToyota MerekWuling
## 1  ( 1 ) " "             " "         " "         " "         " "        
## 2  ( 1 ) " "             " "         " "         " "         " "        
## 3  ( 1 ) "*"             " "         " "         " "         " "        
## 4  ( 1 ) "*"             " "         " "         " "         " "        
## 5  ( 1 ) "*"             " "         " "         " "         "*"        
## 6  ( 1 ) "*"             " "         " "         "*"         "*"        
## 7  ( 1 ) "*"             " "         " "         "*"         "*"        
## 8  ( 1 ) "*"             " "         " "         "*"         "*"        
##          Tahun2014 Tahun2015 Tahun2016 Tahun2017 Tahun2018 Tahun2019 Tahun2020
## 1  ( 1 ) " "       " "       " "       " "       " "       " "       " "      
## 2  ( 1 ) " "       " "       " "       " "       " "       " "       " "      
## 3  ( 1 ) " "       " "       " "       " "       " "       " "       " "      
## 4  ( 1 ) " "       " "       " "       " "       " "       " "       " "      
## 5  ( 1 ) " "       " "       " "       " "       " "       " "       " "      
## 6  ( 1 ) " "       " "       " "       " "       " "       " "       " "      
## 7  ( 1 ) " "       " "       " "       " "       " "       " "       " "      
## 8  ( 1 ) " "       " "       " "       " "       " "       " "       " "      
##          Tahun2021 Tahun2022
## 1  ( 1 ) " "       " "      
## 2  ( 1 ) " "       " "      
## 3  ( 1 ) " "       " "      
## 4  ( 1 ) " "       " "      
## 5  ( 1 ) " "       " "      
## 6  ( 1 ) " "       " "      
## 7  ( 1 ) "*"       " "      
## 8  ( 1 ) "*"       "*"
par(mfrow=c(2,2))
plot(sb$rsq, xlab="Jumlah Variabel", ylab="R2",type="l");points(1:8,sb$rsq,col=ifelse(sb$rsq==sb$rsq[which.max(sb$rsq)],"coral","steelblue"),cex=1.5,pch=16)
plot(sb$adjr2, xlab="Jumlah Variabel", ylab="Adj R2",type="l");points(1:8,sb$adjr2,col=ifelse(sb$adjr2==sb$adjr2[which.max(sb$adjr2)],"coral","steelblue"),cex=1.5,pch=16)
plot(sb$bic, xlab="Jumlah Variabel", ylab="BIC",type="l");points(1:8,sb$bic,col=ifelse(sb$bic==sb$bic[which.min(sb$bic)],"coral","steelblue"),cex=1.5,pch=16)
plot(sb$cp, xlab="Jumlah Variabel", ylab="CP",type="l");points(1:8,sb$cp,col=ifelse(sb$cp==sb$cp[which.min(sb$cp)],"coral","steelblue"),cex=1.5,pch=16)

Jumlah dummy variabel yang digunakan sebanyak 8 jika berdasarkan R2

Model Final

coef(backward,8)
##     (Intercept)        MerekBMW      MerekHonda      MerekMazda MerekMitsubishi 
##       2.7553536       7.7196464       1.1174198       5.8096464       1.8588131 
##     MerekToyota     MerekWuling       Tahun2021       Tahun2022 
##       0.6794349       1.0486764       0.9226860       1.0062496

PEKAN 5

Packages

lapply(c("glmnet","lmridge"),library,character.only=T)[[1]]
##  [1] "glmnet"       "Matrix"       "leaps"        "rcompanion"   "kSamples"    
##  [6] "SuppDists"    "fitdistrplus" "survival"     "MASS"         "lmtest"      
## [11] "zoo"          "car"          "carData"      "hrbrthemes"   "ggridges"    
## [16] "readxl"       "kableExtra"   "rvest"        "lubridate"    "forcats"     
## [21] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [26] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [31] "grDevices"    "utils"        "datasets"     "methods"      "base"

Regresi Ridge (glmnet)

Peubah

x<-data.matrix(economics[, c('pop', 'psavert', 'uempmed', 'pce')])
y<-economics$unemploy

CV

cv.r<-cv.glmnet(x,y,alpha=0);plot(cv.r)

Best Model

best.lr<-cv.r$lambda.min
bestridge<-glmnet(x,y,alpha=0,lambda=best.lr);coef(bestridge)
## 5 x 1 sparse Matrix of class "dgCMatrix"
##                        s0
## (Intercept) -1.445024e+03
## pop          2.093831e-02
## psavert      3.951673e+01
## uempmed      4.704123e+02
## pce         -1.154226e-01

Fungsi R-Square

rsq<-function(bestmodel,bestlambda,x,y){
 #y duga
 y.duga <- predict(bestmodel, s = bestlambda, newx = x)

 #JKG dan JKT
 jkt <- sum((y - mean(y))^2)
 jkg <- sum((y.duga- y)^2)

#find R-Squared
rsq <- 1 - jkg/jkt
return(rsq) 
}

R-Square Ridge

rsq(bestridge,best.lr,x,y)
## [1] 0.7666784

Regresi Lasso (glmnet)

CV

cv.l<-cv.glmnet(x,y,alpha=1);plot(cv.l)

Best Model

best.ll<-cv.l$lambda.min
bestlasso<-glmnet(x,y,alpha=1,lambda=best.ll);coef(bestlasso)
## 5 x 1 sparse Matrix of class "dgCMatrix"
##                        s0
## (Intercept) -3.266128e+04
## pop          1.612626e-01
## psavert      1.734076e+02
## uempmed      5.779452e+02
## pce         -1.555702e+00

R-Square Lasso

rsq(bestlasso,best.ll,x,y)
## [1] 0.855554

Regresi Ridge (lmridge)

lmr<-lmridge(`Cicil (jt/bln)`~Merek+Tahun,DF,scaling="centered");plot(lmr);vif(lmr)

##     MerekBMW MerekChevrolet MerekDatsun MerekHonda MerekMazda MerekMitsubishi
## k=0  0.54809        1.09507     0.56991    0.05746    0.54674         0.11989
##     MerekNissan MerekSuzuki MerekToyota MerekWuling Tahun2014 Tahun2015
## k=0     0.18393     0.10247     0.06103     0.09505   1.20277   1.17707
##     Tahun2016 Tahun2017 Tahun2018 Tahun2019 Tahun2020 Tahun2021 Tahun2022
## k=0   1.14782   1.12842   1.08943   1.07951   1.14485   1.13107   1.15994
summary(lmr)
## 
## Call:
## lmridge.default(formula = `Cicil (jt/bln)` ~ Merek + Tahun, data = DF, 
##     scaling = "centered")
## 
## 
## Coefficients: for Ridge parameter K= 0 
##                 Estimate Estimate (Sc) StdErr (Sc) t-value (Sc) Pr(>|t|)    
## Intercept         2.3143        2.3143      0.4705       4.9192   <2e-16 ***
## MerekBMW          7.8587        7.8587      0.7649      10.2742   <2e-16 ***
## MerekChevrolet    0.4045        0.4045      1.0812       0.3742   0.7088    
## MerekDatsun      -1.3328       -1.3328      0.7800      -1.7087   0.0894 .  
## MerekHonda        1.2133        1.2134      0.2477       4.8994   <2e-16 ***
## MerekMazda        5.8584        5.8584      0.7640       7.6685   <2e-16 ***
## MerekMitsubishi   1.9172        1.9172      0.3577       5.3592   <2e-16 ***
## MerekNissan       0.9195        0.9195      0.4431       2.0751   0.0396 *  
## MerekSuzuki       0.1157        0.1157      0.3307       0.3498   0.7269    
## MerekToyota       0.7486        0.7486      0.2552       2.9329   0.0039 ** 
## MerekWuling       1.1273        1.1273      0.3185       3.5391   0.0005 ***
## Tahun2014         0.0696        0.0696      1.1331       0.0614   0.9511    
## Tahun2015         0.0573        0.0573      1.1209       0.0511   0.9593    
## Tahun2016         0.5658        0.5658      1.1069       0.5111   0.6100    
## Tahun2017         0.7012        0.7012      1.0975       0.6389   0.5238    
## Tahun2018         0.2490        0.2490      1.0784       0.2309   0.8177    
## Tahun2019         0.3923        0.3923      1.0735       0.3655   0.7153    
## Tahun2020         0.2118        0.2117      1.1055       0.1915   0.8483    
## Tahun2021         1.2375        1.2375      1.0988       1.1262   0.2618    
## Tahun2022         1.3607        1.3607      1.1127       1.2228   0.2232    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Ridge Summary
##         R2     adj-R2   DF ridge          F        AIC        BIC 
##    0.56700    0.51860   19.00020   11.09653   29.67161 1025.07066 
## Ridge minimum MSE= 14.56072 at K= 0 
## P-value for F-test ( 19.0002 , 161.0001 ) = 1.023754e-20 
## -------------------------------------------------------------------