#Praktikum 3. Regresi Logistik
#Regresi Logistik Multinomial dan Ordinal dengan tidymodels
#===Package
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.4.1 ──
## ✔ broom 1.0.9 ✔ rsample 1.3.1
## ✔ dials 1.4.2 ✔ tailor 0.1.0
## ✔ infer 1.0.9 ✔ tune 2.0.0
## ✔ modeldata 1.5.1 ✔ workflows 1.3.0
## ✔ parsnip 1.3.3 ✔ workflowsets 1.1.1
## ✔ recipes 1.3.1 ✔ yardstick 1.3.2
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ recipes::fixed() masks stringr::fixed()
## ✖ dplyr::lag() masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step() masks stats::step()
library(DataExplorer)
library(VGAM)
## Loading required package: stats4
## Loading required package: splines
##
## Attaching package: 'VGAM'
##
## The following object is masked from 'package:workflows':
##
## update_formula
library(broom.helpers)
#===Import Data
cust <- read_csv("D:/Kuliah/IPB 2025 Semester 3/Pemodelan Klasifikasi/Praktikum/Praktikum 3 dan 4/train.csv")
## Rows: 8068 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Gender, Ever_Married, Graduated, Profession, Spending_Score, Var_1,...
## dbl (4): ID, Age, Work_Experience, Family_Size
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(cust)
## Rows: 8,068
## Columns: 11
## $ ID <dbl> 462809, 462643, 466315, 461735, 462669, 461319, 460156…
## $ Gender <chr> "Male", "Female", "Female", "Male", "Female", "Male", …
## $ Ever_Married <chr> "No", "Yes", "Yes", "Yes", "Yes", "Yes", "No", "No", "…
## $ Age <dbl> 22, 38, 67, 67, 40, 56, 32, 33, 61, 55, 26, 19, 19, 70…
## $ Graduated <chr> "No", "Yes", "Yes", "Yes", "Yes", "No", "Yes", "Yes", …
## $ Profession <chr> "Healthcare", "Engineer", "Engineer", "Lawyer", "Enter…
## $ Work_Experience <dbl> 1, NA, 1, 0, NA, 0, 1, 1, 0, 1, 1, 4, 0, NA, 0, 1, 9, …
## $ Spending_Score <chr> "Low", "Average", "Low", "High", "High", "Average", "L…
## $ Family_Size <dbl> 4, 3, 1, 2, 6, 2, 3, 3, 3, 4, 3, 4, NA, 1, 1, 2, 5, 6,…
## $ Var_1 <chr> "Cat_4", "Cat_4", "Cat_6", "Cat_6", "Cat_6", "Cat_6", …
## $ Segmentation <chr> "D", "A", "B", "B", "A", "C", "C", "D", "D", "C", "A",…
#Diilustrasi kali ini kita hanya akan menggunakan 400 amatan saja dengan 100 setiap kelas pada variabel respon Segmentation
set.seed(2023)
cust <- cust %>% slice_sample(n = 100,by = Segmentation)
glimpse(cust)
## Rows: 400
## Columns: 11
## $ ID <dbl> 460101, 465365, 461953, 467127, 464946, 462091, 459331…
## $ Gender <chr> "Female", "Male", "Male", "Male", "Male", "Male", "Fem…
## $ Ever_Married <chr> "No", "No", "Yes", "Yes", "Yes", "No", "No", "Yes", "N…
## $ Age <dbl> 31, 31, 33, 70, 53, 18, 70, 33, 22, 79, 25, 19, 39, 28…
## $ Graduated <chr> "Yes", "No", "No", "No", "Yes", "No", "Yes", "Yes", "N…
## $ Profession <chr> "Healthcare", "Entertainment", "Healthcare", "Lawyer",…
## $ Work_Experience <dbl> 9, 1, 11, NA, 9, 0, 0, 8, NA, 1, 12, 1, 9, 7, NA, 4, N…
## $ Spending_Score <chr> "Low", "Low", "High", "Low", "Low", "Low", "Low", "Hig…
## $ Family_Size <dbl> 3, 4, 2, 1, 1, 5, 1, 2, 4, 1, 1, 4, 6, 4, 1, 2, 4, 3, …
## $ Var_1 <chr> "Cat_3", "Cat_2", "Cat_6", "Cat_6", "Cat_4", "Cat_4", …
## $ Segmentation <chr> "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D",…
#Kemudian kita akan menghapus kolom ID
cust <- cust %>%
select(-ID)
glimpse(cust)
## Rows: 400
## Columns: 10
## $ Gender <chr> "Female", "Male", "Male", "Male", "Male", "Male", "Fem…
## $ Ever_Married <chr> "No", "No", "Yes", "Yes", "Yes", "No", "No", "Yes", "N…
## $ Age <dbl> 31, 31, 33, 70, 53, 18, 70, 33, 22, 79, 25, 19, 39, 28…
## $ Graduated <chr> "Yes", "No", "No", "No", "Yes", "No", "Yes", "Yes", "N…
## $ Profession <chr> "Healthcare", "Entertainment", "Healthcare", "Lawyer",…
## $ Work_Experience <dbl> 9, 1, 11, NA, 9, 0, 0, 8, NA, 1, 12, 1, 9, 7, NA, 4, N…
## $ Spending_Score <chr> "Low", "Low", "High", "Low", "Low", "Low", "Low", "Hig…
## $ Family_Size <dbl> 3, 4, 2, 1, 1, 5, 1, 2, 4, 1, 1, 4, 6, 4, 1, 2, 4, 3, …
## $ Var_1 <chr> "Cat_3", "Cat_2", "Cat_6", "Cat_6", "Cat_4", "Cat_4", …
## $ Segmentation <chr> "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D",…
#mengubah ke factor
cust <- cust %>%
mutate(across(where(is.character),as.factor))
glimpse(cust)
## Rows: 400
## Columns: 10
## $ Gender <fct> Female, Male, Male, Male, Male, Male, Female, Female, …
## $ Ever_Married <fct> No, No, Yes, Yes, Yes, No, No, Yes, No, Yes, No, No, Y…
## $ Age <dbl> 31, 31, 33, 70, 53, 18, 70, 33, 22, 79, 25, 19, 39, 28…
## $ Graduated <fct> Yes, No, No, No, Yes, No, Yes, Yes, No, NA, Yes, No, N…
## $ Profession <fct> Healthcare, Entertainment, Healthcare, Lawyer, Executi…
## $ Work_Experience <dbl> 9, 1, 11, NA, 9, 0, 0, 8, NA, 1, 12, 1, 9, 7, NA, 4, N…
## $ Spending_Score <fct> Low, Low, High, Low, Low, Low, Low, High, Low, Low, Lo…
## $ Family_Size <dbl> 3, 4, 2, 1, 1, 5, 1, 2, 4, 1, 1, 4, 6, 4, 1, 2, 4, 3, …
## $ Var_1 <fct> Cat_3, Cat_2, Cat_6, Cat_6, Cat_4, Cat_4, Cat_6, Cat_6…
## $ Segmentation <fct> D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, …
#===Ekplorasi Data
#1. PLot na
plot_intro(cust,theme_config = theme_classic())

#menghapus missing value
cust <- cust %>%
na.omit
#Boxplot variabel kontinue dengan target
plot_boxplot(data = cust,
by = "Segmentation",
geom_boxplot_args = list(fill="#03A9F4"),
ggtheme = theme_classic())

#Boxplot variabel diskret dengan target
plot_bar(data = cust,
by = "Segmentation",
ggtheme = theme_classic())
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the DataExplorer package.
## Please report the issue at
## <https://github.com/boxuancui/DataExplorer/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#===Pembagian Data
# K-fold Cross Validation
set.seed(2045)
folds <- vfold_cv(cust, v = 10,strata = "Segmentation" )
folds
## # 10-fold cross-validation using stratification
## # A tibble: 10 × 2
## splits id
## <list> <chr>
## 1 <split [307/35]> Fold01
## 2 <split [307/35]> Fold02
## 3 <split [307/35]> Fold03
## 4 <split [307/35]> Fold04
## 5 <split [307/35]> Fold05
## 6 <split [308/34]> Fold06
## 7 <split [308/34]> Fold07
## 8 <split [308/34]> Fold08
## 9 <split [309/33]> Fold09
## 10 <split [310/32]> Fold10
cust %>%
mutate(Row=seq(nrow(cust))) %>%
select(Segmentation,Row) %>%
left_join(tidy(folds),by="Row")%>%
count(Segmentation,Fold,Data) %>%
group_by(Segmentation,Fold) %>%
mutate(percent=n*100/sum(n)) %>%
arrange(Fold,Data)
## # A tibble: 80 × 5
## # Groups: Segmentation, Fold [40]
## Segmentation Fold Data n percent
## <fct> <chr> <chr> <int> <dbl>
## 1 A Fold01 Analysis 76 89.4
## 2 B Fold01 Analysis 79 89.8
## 3 C Fold01 Analysis 81 90
## 4 D Fold01 Analysis 71 89.9
## 5 A Fold01 Assessment 9 10.6
## 6 B Fold01 Assessment 9 10.2
## 7 C Fold01 Assessment 9 10
## 8 D Fold01 Assessment 8 10.1
## 9 A Fold02 Analysis 76 89.4
## 10 B Fold02 Analysis 79 89.8
## # ℹ 70 more rows
#===Pemodelan Regresi Logistik
#B. Pemodelan dengan Regresi Logistik Ordinal (target memiliki tingkatan)
#B.1 Model Regresi
regresi_ord <- vglm(Segmentation~.,
data = cust %>% mutate(Segmentation=ordered(Segmentation)),
family = propodds)
#B.2 Mencari nilai Odds Ratio
tidy_parameters(regresi_ord,conf.int = TRUE,conf.level = 0.95,exponentiate = TRUE) %>%
mutate(across(where(is.numeric),~round(.x,4)))
## term estimate std.error conf.level conf.low conf.high
## 1 (Intercept):1 3.8072 4.2593 0.95 0.4249 34.1100
## 2 (Intercept):2 1.0569 1.1792 0.95 0.1187 9.4139
## 3 (Intercept):3 0.2451 0.2744 0.95 0.0273 2.1996
## 4 GenderMale 0.9417 0.2098 0.95 0.6085 1.4574
## 5 Ever_MarriedYes 1.0710 0.3491 0.95 0.5654 2.0290
## 6 Age 0.9987 0.0095 0.95 0.9804 1.0175
## 7 GraduatedYes 0.7980 0.1972 0.95 0.4916 1.2952
## 8 ProfessionDoctor 1.7029 0.6921 0.95 0.7677 3.7770
## 9 ProfessionEngineer 0.6142 0.2581 0.95 0.2696 1.3994
## 10 ProfessionEntertainment 0.5629 0.1932 0.95 0.2872 1.1031
## 11 ProfessionExecutive 2.4576 1.2027 0.95 0.9418 6.4131
## 12 ProfessionHealthcare 12.1385 4.8698 0.95 5.5294 26.6475
## 13 ProfessionHomemaker 1.1043 0.7379 0.95 0.2981 4.0914
## 14 ProfessionLawyer 0.9608 0.4525 0.95 0.3817 2.4182
## 15 ProfessionMarketing 1.3125 0.7330 0.95 0.4393 3.9219
## 16 Work_Experience 1.0392 0.0331 0.95 0.9764 1.1061
## 17 Spending_ScoreHigh 0.5807 0.2081 0.95 0.2877 1.1724
## 18 Spending_ScoreLow 0.6229 0.2035 0.95 0.3283 1.1818
## 19 Family_Size 1.0593 0.0804 0.95 0.9129 1.2292
## 20 Var_1Cat_2 0.7736 0.7189 0.95 0.1252 4.7816
## 21 Var_1Cat_3 0.8696 0.7628 0.95 0.1558 4.8525
## 22 Var_1Cat_4 0.6618 0.5755 0.95 0.1204 3.6390
## 23 Var_1Cat_5 0.3232 0.4055 0.95 0.0276 3.7798
## 24 Var_1Cat_6 0.9930 0.8261 0.95 0.1945 5.0709
## 25 Var_1Cat_7 6.4113 7.2518 0.95 0.6985 58.8497
## statistic df.error p.value
## 1 1.1950 Inf 0.2321
## 2 0.0496 Inf 0.9605
## 3 -1.2559 Inf 0.2091
## 4 -0.2694 Inf 0.7876
## 5 0.2105 Inf 0.8333
## 6 -0.1325 Inf 0.8946
## 7 -0.9133 Inf 0.3611
## 8 1.3097 Inf 0.1903
## 9 -1.1601 Inf 0.2460
## 10 -1.6742 Inf 0.0941
## 11 1.8375 Inf 0.0661
## 12 6.2225 Inf 0.0000
## 13 0.1485 Inf 0.8820
## 14 -0.0849 Inf 0.9323
## 15 0.4869 Inf 0.6263
## 16 1.2086 Inf 0.2268
## 17 -1.5163 Inf 0.1294
## 18 -1.4487 Inf 0.1474
## 19 0.7595 Inf 0.4476
## 20 -0.2762 Inf 0.7824
## 21 -0.1593 Inf 0.8735
## 22 -0.4746 Inf 0.6350
## 23 -0.9002 Inf 0.3680
## 24 -0.0084 Inf 0.9933
## 25 1.6427 Inf 0.1004
#B.3 Evaluasi Model
## K-fold Cross Validation
pred_regresi_ord_dummy <- predict(regresi_ord,newdata=cust %>% slice_sample(n=10),type="response")
pred_regresi_ord_dummy
## A B C D
## 1 0.3411697 0.3098414 0.2384252 0.11056370
## 2 0.3467739 0.3098583 0.2352228 0.10814492
## 3 0.3278681 0.3094469 0.2461066 0.11657838
## 4 0.3113825 0.3082311 0.2557696 0.12461683
## 5 0.4594402 0.2943585 0.1757963 0.07040495
## 6 0.1287477 0.2186485 0.3491712 0.30343260
## 7 0.3792903 0.3083286 0.2170765 0.09530460
## 8 0.1899902 0.2679838 0.3266819 0.21534410
## 9 0.4974308 0.2835336 0.1579704 0.06106514
## 10 0.3375271 0.3097835 0.2405177 0.11217168
pred_regresi_ord_dummy %>%
as.data.frame() %>%
apply(MARGIN=1,FUN=function(x) names(x[which.max(x)])) %>%
factor(levels = c("A","B","C","D"))
## 1 2 3 4 5 6 7 8 9 10
## A A A A A C A C A A
## Levels: A B C D
train_test_custom_mod <- function(split){
train_dt <- training(split)
test_dt <- testing(split)
model <- vglm(Segmentation~.,
data = train_dt %>% mutate(Segmentation=ordered(Segmentation)),
family = propodds)
pred_test_dt <- predict(model,newdata=test_dt,type="response") %>%
as.data.frame() %>%
apply(MARGIN=1,FUN=function(x) names(x[which.max(x)])) %>%
factor(levels = c("A","B","C","D"))
pred_test_dt <- data.frame(estimate=pred_test_dt,truth=test_dt$Segmentation)
return(pred_test_dt)
}
regresi_ord_cv <- map(folds$splits,train_test_custom_mod)
regresi_ord_cv
## [[1]]
## estimate truth
## 1 A D
## 2 C D
## 3 C D
## 4 B D
## 5 B D
## 6 C D
## 7 D D
## 8 D D
## 9 C A
## 10 C A
## 11 A A
## 12 A A
## 13 A A
## 14 C A
## 15 D A
## 16 B A
## 17 A A
## 18 C B
## 19 A B
## 20 A B
## 21 D B
## 22 C B
## 23 A B
## 24 C B
## 25 A B
## 26 D B
## 27 A C
## 28 A C
## 29 A C
## 30 D C
## 31 C C
## 32 A C
## 33 C C
## 34 D C
## 35 C C
##
## [[2]]
## estimate truth
## 1 D D
## 2 D D
## 3 A D
## 4 D D
## 5 A D
## 6 D D
## 7 D D
## 8 A D
## 9 A A
## 10 A A
## 11 C A
## 12 C A
## 13 A A
## 14 A A
## 15 B A
## 16 C A
## 17 A A
## 18 B B
## 19 B B
## 20 A B
## 21 A B
## 22 A B
## 23 C B
## 24 B B
## 25 A B
## 26 D B
## 27 A C
## 28 B C
## 29 A C
## 30 C C
## 31 A C
## 32 B C
## 33 C C
## 34 A C
## 35 D C
##
## [[3]]
## estimate truth
## 1 A D
## 2 B D
## 3 A D
## 4 D D
## 5 C D
## 6 A D
## 7 D D
## 8 D D
## 9 A A
## 10 A A
## 11 B A
## 12 A A
## 13 C A
## 14 C A
## 15 A A
## 16 B A
## 17 C A
## 18 A B
## 19 B B
## 20 C B
## 21 B B
## 22 B B
## 23 A B
## 24 C B
## 25 A B
## 26 B B
## 27 B C
## 28 C C
## 29 B C
## 30 C C
## 31 B C
## 32 A C
## 33 A C
## 34 A C
## 35 C C
##
## [[4]]
## estimate truth
## 1 D D
## 2 C D
## 3 A D
## 4 D D
## 5 D D
## 6 D D
## 7 D D
## 8 D D
## 9 C A
## 10 D A
## 11 A A
## 12 A A
## 13 B A
## 14 A A
## 15 D A
## 16 C A
## 17 A A
## 18 B B
## 19 B B
## 20 A B
## 21 A B
## 22 C B
## 23 C B
## 24 C B
## 25 B B
## 26 C B
## 27 D C
## 28 B C
## 29 A C
## 30 C C
## 31 A C
## 32 C C
## 33 C C
## 34 C C
## 35 A C
##
## [[5]]
## estimate truth
## 1 D D
## 2 B D
## 3 D D
## 4 A D
## 5 A D
## 6 B D
## 7 B D
## 8 D D
## 9 B A
## 10 B A
## 11 C A
## 12 A A
## 13 B A
## 14 A A
## 15 C A
## 16 B A
## 17 D A
## 18 B B
## 19 D B
## 20 A B
## 21 C B
## 22 A B
## 23 A B
## 24 B B
## 25 D B
## 26 A B
## 27 B C
## 28 A C
## 29 C C
## 30 A C
## 31 B C
## 32 B C
## 33 C C
## 34 C C
## 35 A C
##
## [[6]]
## estimate truth
## 1 D D
## 2 B D
## 3 D D
## 4 D D
## 5 D D
## 6 C D
## 7 D D
## 8 A D
## 9 A A
## 10 C A
## 11 C A
## 12 B A
## 13 D A
## 14 D A
## 15 A A
## 16 B A
## 17 A B
## 18 A B
## 19 B B
## 20 C B
## 21 B B
## 22 A B
## 23 B B
## 24 A B
## 25 C B
## 26 C C
## 27 D C
## 28 A C
## 29 A C
## 30 A C
## 31 C C
## 32 C C
## 33 C C
## 34 B C
##
## [[7]]
## estimate truth
## 1 A D
## 2 D D
## 3 C D
## 4 A D
## 5 D D
## 6 D D
## 7 D D
## 8 D D
## 9 A A
## 10 A A
## 11 A A
## 12 D A
## 13 A A
## 14 D A
## 15 A A
## 16 A A
## 17 A B
## 18 B B
## 19 D B
## 20 B B
## 21 A B
## 22 C B
## 23 D B
## 24 C B
## 25 C B
## 26 C C
## 27 D C
## 28 A C
## 29 B C
## 30 C C
## 31 C C
## 32 C C
## 33 A C
## 34 C C
##
## [[8]]
## estimate truth
## 1 D D
## 2 B D
## 3 B D
## 4 A D
## 5 D D
## 6 C D
## 7 D D
## 8 D D
## 9 C A
## 10 C A
## 11 A A
## 12 A A
## 13 A A
## 14 A A
## 15 A A
## 16 A A
## 17 B B
## 18 A B
## 19 A B
## 20 B B
## 21 A B
## 22 C B
## 23 A B
## 24 C B
## 25 C B
## 26 B C
## 27 B C
## 28 A C
## 29 D C
## 30 C C
## 31 B C
## 32 C C
## 33 C C
## 34 A C
##
## [[9]]
## estimate truth
## 1 B D
## 2 B D
## 3 D D
## 4 D D
## 5 B D
## 6 D D
## 7 D D
## 8 B D
## 9 A A
## 10 A A
## 11 C A
## 12 C A
## 13 A A
## 14 B A
## 15 D A
## 16 A A
## 17 D B
## 18 B B
## 19 A B
## 20 C B
## 21 D B
## 22 A B
## 23 A B
## 24 B B
## 25 B C
## 26 A C
## 27 D C
## 28 B C
## 29 C C
## 30 C C
## 31 A C
## 32 D C
## 33 B C
##
## [[10]]
## estimate truth
## 1 B D
## 2 A D
## 3 D D
## 4 D D
## 5 A D
## 6 A D
## 7 D D
## 8 D A
## 9 C A
## 10 A A
## 11 A A
## 12 C A
## 13 B A
## 14 B A
## 15 C A
## 16 C B
## 17 B B
## 18 C B
## 19 C B
## 20 A B
## 21 C B
## 22 A B
## 23 A B
## 24 C C
## 25 A C
## 26 C C
## 27 C C
## 28 C C
## 29 C C
## 30 A C
## 31 C C
## 32 A C
cm_regresi_ord_cv <- map(regresi_ord_cv,
function(x){
cm <- conf_mat(x,truth = truth,estimate = estimate)
autoplot(cm,type = "heatmap")+
scale_fill_viridis_c(direction = -1,option = "inferno",alpha = 0.6)
}
)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
cm_regresi_ord_cv
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

##
## [[9]]

##
## [[10]]

perf_regresi_ord_cv <- map(regresi_ord_cv,
function(x){
accuracy(x,truth = truth,estimate = estimate)
}
) %>%
list_rbind() %>%
mutate(id=str_c("fold",seq(nrow(folds)))) %>%
relocate(id)
perf_regresi_ord_cv
## # A tibble: 10 × 4
## id .metric .estimator .estimate
## <chr> <chr> <chr> <dbl>
## 1 fold1 accuracy multiclass 0.257
## 2 fold2 accuracy multiclass 0.429
## 3 fold3 accuracy multiclass 0.4
## 4 fold4 accuracy multiclass 0.486
## 5 fold5 accuracy multiclass 0.286
## 6 fold6 accuracy multiclass 0.412
## 7 fold7 accuracy multiclass 0.529
## 8 fold8 accuracy multiclass 0.441
## 9 fold9 accuracy multiclass 0.364
## 10 fold10 accuracy multiclass 0.375
perf_regresi_ord_cv %>%
group_by(.metric,.estimator) %>%
summarize(mean=mean(.estimate),n=n(),std_err=sd(.estimate)/sqrt(n))
## `summarise()` has grouped output by '.metric'. You can override using the
## `.groups` argument.
## # A tibble: 1 × 5
## # Groups: .metric [1]
## .metric .estimator mean n std_err
## <chr> <chr> <dbl> <int> <dbl>
## 1 accuracy multiclass 0.398 10 0.0263
#B.4 Prediksi Data Baru
set.seed(1234)
data_baru <- cust %>%
slice_sample(n = 2,by = Segmentation) %>%
select(-Segmentation)
data_baru
## # A tibble: 8 × 9
## Gender Ever_Married Age Graduated Profession Work_Experience Spending_Score
## <fct> <fct> <dbl> <fct> <fct> <dbl> <fct>
## 1 Male No 43 Yes Healthcare 1 Low
## 2 Male No 19 No Healthcare 3 Low
## 3 Male No 30 Yes Artist 9 Low
## 4 Male No 41 Yes Artist 7 Low
## 5 Female No 38 Yes Doctor 9 Low
## 6 Female Yes 47 Yes Artist 6 Low
## 7 Female Yes 60 Yes Artist 8 High
## 8 Male No 42 Yes Doctor 8 Low
## # ℹ 2 more variables: Family_Size <dbl>, Var_1 <fct>
regresi_ord_opt <- regresi_ord
pred_data_baru2 <-regresi_ord_opt %>%
predict(newdata=data_baru,type="response") %>%
as.data.frame() %>%
apply(MARGIN=1,FUN=function(x) names(x[which.max(x)])) %>%
factor(levels = c("A","B","C","D")) %>%
as_tibble() %>%
rename(.pred_class=value)
pred_data_baru2
## # A tibble: 8 × 1
## .pred_class
## <fct>
## 1 D
## 2 D
## 3 B
## 4 B
## 5 C
## 6 B
## 7 B
## 8 C