NAMA : FIRLY FARAH AULIA
NIM : 23031554047
MATA KULIAH : ANALISIS MULTIVARIAT
DOSEN PENGAMPU : Ike Fitriyaningsih, M.Si
NIDN : 0109049001
## battery_power blue clock_speed dual_sim fc four_g int_memory m_dep mobile_wt
## 1 842 0 2.2 0 1 0 7 0.6 188
## 2 1021 1 0.5 1 0 1 53 0.7 136
## 3 563 1 0.5 1 2 1 41 0.9 145
## 4 615 1 2.5 0 0 0 10 0.8 131
## 5 1821 1 1.2 0 13 1 44 0.6 141
## 6 1859 0 0.5 1 3 0 22 0.7 164
## n_cores pc px_height px_width ram sc_h sc_w talk_time three_g touch_screen
## 1 2 2 20 756 2549 9 7 19 0 0
## 2 3 6 905 1988 2631 17 3 7 1 1
## 3 5 6 1263 1716 2603 11 2 9 1 1
## 4 6 9 1216 1786 2769 16 8 11 1 0
## 5 2 14 1208 1212 1411 8 2 15 1 1
## 6 1 7 1004 1654 1067 17 1 10 1 0
## wifi price_range
## 1 1 1
## 2 0 2
## 3 0 2
## 4 0 2
## 5 0 1
## 6 0 1
## 'data.frame': 2000 obs. of 21 variables:
## $ battery_power: int 842 1021 563 615 1821 1859 1821 1954 1445 509 ...
## $ blue : int 0 1 1 1 1 0 0 0 1 1 ...
## $ clock_speed : num 2.2 0.5 0.5 2.5 1.2 0.5 1.7 0.5 0.5 0.6 ...
## $ dual_sim : int 0 1 1 0 0 1 0 1 0 1 ...
## $ fc : int 1 0 2 0 13 3 4 0 0 2 ...
## $ four_g : int 0 1 1 0 1 0 1 0 0 1 ...
## $ int_memory : int 7 53 41 10 44 22 10 24 53 9 ...
## $ m_dep : num 0.6 0.7 0.9 0.8 0.6 0.7 0.8 0.8 0.7 0.1 ...
## $ mobile_wt : int 188 136 145 131 141 164 139 187 174 93 ...
## $ n_cores : int 2 3 5 6 2 1 8 4 7 5 ...
## $ pc : int 2 6 6 9 14 7 10 0 14 15 ...
## $ px_height : int 20 905 1263 1216 1208 1004 381 512 386 1137 ...
## $ px_width : int 756 1988 1716 1786 1212 1654 1018 1149 836 1224 ...
## $ ram : int 2549 2631 2603 2769 1411 1067 3220 700 1099 513 ...
## $ sc_h : int 9 17 11 16 8 17 13 16 17 19 ...
## $ sc_w : int 7 3 2 8 2 1 8 3 1 10 ...
## $ talk_time : int 19 7 9 11 15 10 18 5 20 12 ...
## $ three_g : int 0 1 1 1 1 1 1 1 1 1 ...
## $ touch_screen : int 0 1 1 0 1 0 0 1 0 0 ...
## $ wifi : int 1 0 0 0 0 0 1 1 0 0 ...
## $ price_range : int 1 2 2 2 1 1 3 0 0 0 ...
## battery_power blue clock_speed dual_sim
## Min. : 501.0 Min. :0.000 Min. :0.500 Min. :0.0000
## 1st Qu.: 851.8 1st Qu.:0.000 1st Qu.:0.700 1st Qu.:0.0000
## Median :1226.0 Median :0.000 Median :1.500 Median :1.0000
## Mean :1238.5 Mean :0.495 Mean :1.522 Mean :0.5095
## 3rd Qu.:1615.2 3rd Qu.:1.000 3rd Qu.:2.200 3rd Qu.:1.0000
## Max. :1998.0 Max. :1.000 Max. :3.000 Max. :1.0000
## fc four_g int_memory m_dep
## Min. : 0.000 Min. :0.0000 Min. : 2.00 Min. :0.1000
## 1st Qu.: 1.000 1st Qu.:0.0000 1st Qu.:16.00 1st Qu.:0.2000
## Median : 3.000 Median :1.0000 Median :32.00 Median :0.5000
## Mean : 4.309 Mean :0.5215 Mean :32.05 Mean :0.5018
## 3rd Qu.: 7.000 3rd Qu.:1.0000 3rd Qu.:48.00 3rd Qu.:0.8000
## Max. :19.000 Max. :1.0000 Max. :64.00 Max. :1.0000
## mobile_wt n_cores pc px_height
## Min. : 80.0 Min. :1.000 Min. : 0.000 Min. : 0.0
## 1st Qu.:109.0 1st Qu.:3.000 1st Qu.: 5.000 1st Qu.: 282.8
## Median :141.0 Median :4.000 Median :10.000 Median : 564.0
## Mean :140.2 Mean :4.521 Mean : 9.916 Mean : 645.1
## 3rd Qu.:170.0 3rd Qu.:7.000 3rd Qu.:15.000 3rd Qu.: 947.2
## Max. :200.0 Max. :8.000 Max. :20.000 Max. :1960.0
## px_width ram sc_h sc_w
## Min. : 500.0 Min. : 256 Min. : 5.00 Min. : 0.000
## 1st Qu.: 874.8 1st Qu.:1208 1st Qu.: 9.00 1st Qu.: 2.000
## Median :1247.0 Median :2146 Median :12.00 Median : 5.000
## Mean :1251.5 Mean :2124 Mean :12.31 Mean : 5.767
## 3rd Qu.:1633.0 3rd Qu.:3064 3rd Qu.:16.00 3rd Qu.: 9.000
## Max. :1998.0 Max. :3998 Max. :19.00 Max. :18.000
## talk_time three_g touch_screen wifi
## Min. : 2.00 Min. :0.0000 Min. :0.000 Min. :0.000
## 1st Qu.: 6.00 1st Qu.:1.0000 1st Qu.:0.000 1st Qu.:0.000
## Median :11.00 Median :1.0000 Median :1.000 Median :1.000
## Mean :11.01 Mean :0.7615 Mean :0.503 Mean :0.507
## 3rd Qu.:16.00 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:1.000
## Max. :20.00 Max. :1.0000 Max. :1.000 Max. :1.000
## price_range
## Min. :0.00
## 1st Qu.:0.75
## Median :1.50
## Mean :1.50
## 3rd Qu.:2.25
## Max. :3.00
Pre-processing
## Missing values per kolom:
## battery_power blue clock_speed dual_sim fc
## 0 0 0 0 0
## four_g int_memory m_dep mobile_wt n_cores
## 0 0 0 0 0
## pc px_height px_width ram sc_h
## 0 0 0 0 0
## sc_w talk_time three_g touch_screen wifi
## 0 0 0 0 0
## price_range
## 0
## Jumlah data duplikat: 0
dataset <- dataset %>%
mutate(
blue = as.factor(blue),
dual_sim = as.factor(dual_sim),
four_g = as.factor(four_g),
three_g = as.factor(three_g),
touch_screen = as.factor(touch_screen),
wifi = as.factor(wifi),
price_range = recode(as.character(price_range),
"0" = "low cost",
"1" = "medium cost",
"2" = "high cost",
"3" = "very high cost"),
price_range = ordered(price_range,
levels = c("low cost", "medium cost", "high cost", "very high cost"))
)## battery_power blue clock_speed dual_sim fc four_g
## Min. : 501.0 0:1010 Min. :0.500 0: 981 Min. : 0.000 0: 957
## 1st Qu.: 851.8 1: 990 1st Qu.:0.700 1:1019 1st Qu.: 1.000 1:1043
## Median :1226.0 Median :1.500 Median : 3.000
## Mean :1238.5 Mean :1.522 Mean : 4.309
## 3rd Qu.:1615.2 3rd Qu.:2.200 3rd Qu.: 7.000
## Max. :1998.0 Max. :3.000 Max. :19.000
## int_memory m_dep mobile_wt n_cores
## Min. : 2.00 Min. :0.1000 Min. : 80.0 Min. :1.000
## 1st Qu.:16.00 1st Qu.:0.2000 1st Qu.:109.0 1st Qu.:3.000
## Median :32.00 Median :0.5000 Median :141.0 Median :4.000
## Mean :32.05 Mean :0.5018 Mean :140.2 Mean :4.521
## 3rd Qu.:48.00 3rd Qu.:0.8000 3rd Qu.:170.0 3rd Qu.:7.000
## Max. :64.00 Max. :1.0000 Max. :200.0 Max. :8.000
## pc px_height px_width ram
## Min. : 0.000 Min. : 0.0 Min. : 500.0 Min. : 256
## 1st Qu.: 5.000 1st Qu.: 282.8 1st Qu.: 874.8 1st Qu.:1208
## Median :10.000 Median : 564.0 Median :1247.0 Median :2146
## Mean : 9.916 Mean : 645.1 Mean :1251.5 Mean :2124
## 3rd Qu.:15.000 3rd Qu.: 947.2 3rd Qu.:1633.0 3rd Qu.:3064
## Max. :20.000 Max. :1960.0 Max. :1998.0 Max. :3998
## sc_h sc_w talk_time three_g touch_screen
## Min. : 5.00 Min. : 0.000 Min. : 2.00 0: 477 0: 994
## 1st Qu.: 9.00 1st Qu.: 2.000 1st Qu.: 6.00 1:1523 1:1006
## Median :12.00 Median : 5.000 Median :11.00
## Mean :12.31 Mean : 5.767 Mean :11.01
## 3rd Qu.:16.00 3rd Qu.: 9.000 3rd Qu.:16.00
## Max. :19.00 Max. :18.000 Max. :20.00
## wifi price_range
## 0: 986 low cost :500
## 1:1014 medium cost :500
## high cost :500
## very high cost:500
##
##
## 'data.frame': 2000 obs. of 21 variables:
## $ battery_power: int 842 1021 563 615 1821 1859 1821 1954 1445 509 ...
## $ blue : Factor w/ 2 levels "0","1": 1 2 2 2 2 1 1 1 2 2 ...
## $ clock_speed : num 2.2 0.5 0.5 2.5 1.2 0.5 1.7 0.5 0.5 0.6 ...
## $ dual_sim : Factor w/ 2 levels "0","1": 1 2 2 1 1 2 1 2 1 2 ...
## $ fc : int 1 0 2 0 13 3 4 0 0 2 ...
## $ four_g : Factor w/ 2 levels "0","1": 1 2 2 1 2 1 2 1 1 2 ...
## $ int_memory : int 7 53 41 10 44 22 10 24 53 9 ...
## $ m_dep : num 0.6 0.7 0.9 0.8 0.6 0.7 0.8 0.8 0.7 0.1 ...
## $ mobile_wt : int 188 136 145 131 141 164 139 187 174 93 ...
## $ n_cores : int 2 3 5 6 2 1 8 4 7 5 ...
## $ pc : int 2 6 6 9 14 7 10 0 14 15 ...
## $ px_height : int 20 905 1263 1216 1208 1004 381 512 386 1137 ...
## $ px_width : int 756 1988 1716 1786 1212 1654 1018 1149 836 1224 ...
## $ ram : int 2549 2631 2603 2769 1411 1067 3220 700 1099 513 ...
## $ sc_h : int 9 17 11 16 8 17 13 16 17 19 ...
## $ sc_w : int 7 3 2 8 2 1 8 3 1 10 ...
## $ talk_time : int 19 7 9 11 15 10 18 5 20 12 ...
## $ three_g : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 2 2 2 2 ...
## $ touch_screen : Factor w/ 2 levels "0","1": 1 2 2 1 2 1 1 2 1 1 ...
## $ wifi : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 2 2 1 1 ...
## $ price_range : Ord.factor w/ 4 levels "low cost"<"medium cost"<..: 2 3 3 3 2 2 4 1 1 1 ...
Analisis Statistik Deskriptif
statistik deskriptif untuk variabel numerik
## vars n mean sd median trimmed mad min max
## battery_power 1 2000 1238.52 439.42 1226.0 1236.25 566.35 501.0 1998
## clock_speed 2 2000 1.52 0.82 1.5 1.48 1.19 0.5 3
## fc 3 2000 4.31 4.34 3.0 3.69 4.45 0.0 19
## int_memory 4 2000 32.05 18.15 32.0 31.88 23.72 2.0 64
## m_dep 5 2000 0.50 0.29 0.5 0.50 0.44 0.1 1
## mobile_wt 6 2000 140.25 35.40 141.0 140.22 45.96 80.0 200
## n_cores 7 2000 4.52 2.29 4.0 4.53 2.97 1.0 8
## pc 8 2000 9.92 6.06 10.0 9.89 7.41 0.0 20
## px_height 9 2000 645.11 443.78 564.0 606.95 471.47 0.0 1960
## px_width 10 2000 1251.52 432.20 1247.0 1250.53 557.46 500.0 1998
## ram 11 2000 2124.21 1084.73 2146.5 2122.93 1382.52 256.0 3998
## sc_h 12 2000 12.31 4.21 12.0 12.37 5.93 5.0 19
## sc_w 13 2000 5.77 4.36 5.0 5.40 4.45 0.0 18
## talk_time 14 2000 11.01 5.46 11.0 11.01 7.41 2.0 20
## range skew kurtosis se
## battery_power 1497.0 0.03 -1.23 9.83
## clock_speed 2.5 0.18 -1.32 0.02
## fc 19.0 1.02 0.27 0.10
## int_memory 62.0 0.06 -1.22 0.41
## m_dep 0.9 0.09 -1.28 0.01
## mobile_wt 120.0 0.01 -1.21 0.79
## n_cores 7.0 0.00 -1.23 0.05
## pc 20.0 0.02 -1.17 0.14
## px_height 1960.0 0.67 -0.32 9.92
## px_width 1498.0 0.01 -1.19 9.66
## ram 3742.0 0.01 -1.19 24.26
## sc_h 14.0 -0.10 -1.19 0.09
## sc_w 18.0 0.63 -0.39 0.10
## talk_time 18.0 0.01 -1.22 0.12
dataset %>%
select_if(is.numeric) %>%
gather(key = "Variabel", value = "Nilai") %>%
ggplot(aes(x = Nilai)) +
facet_wrap(~ Variabel, scales = "free", ncol = 3) +
geom_histogram(bins = 10, fill = "steelblue", color = "black") +
theme_minimal()numerik_vars <- names(dataset)[sapply(dataset, is.numeric)]
for (var in numerik_vars) {
p <- ggplot(dataset, aes(x = price_range, y = dataset[[var]], fill = price_range)) +
geom_boxplot() +
labs(title = paste("Distribusi", var, "berdasarkan Harga")) +
theme_minimal() +
theme(legend.position = "none")
print(p)
}## Warning: Use of `dataset[[var]]` is discouraged.
## ℹ Use `.data[[var]]` instead.
## Warning: Use of `dataset[[var]]` is discouraged.
## ℹ Use `.data[[var]]` instead.
## Warning: Use of `dataset[[var]]` is discouraged.
## ℹ Use `.data[[var]]` instead.
## Warning: Use of `dataset[[var]]` is discouraged.
## ℹ Use `.data[[var]]` instead.
## Warning: Use of `dataset[[var]]` is discouraged.
## ℹ Use `.data[[var]]` instead.
## Warning: Use of `dataset[[var]]` is discouraged.
## ℹ Use `.data[[var]]` instead.
## Warning: Use of `dataset[[var]]` is discouraged.
## ℹ Use `.data[[var]]` instead.
## Warning: Use of `dataset[[var]]` is discouraged.
## ℹ Use `.data[[var]]` instead.
## Warning: Use of `dataset[[var]]` is discouraged.
## ℹ Use `.data[[var]]` instead.
## Warning: Use of `dataset[[var]]` is discouraged.
## ℹ Use `.data[[var]]` instead.
## Warning: Use of `dataset[[var]]` is discouraged.
## ℹ Use `.data[[var]]` instead.
## Warning: Use of `dataset[[var]]` is discouraged.
## ℹ Use `.data[[var]]` instead.
## Warning: Use of `dataset[[var]]` is discouraged.
## ℹ Use `.data[[var]]` instead.
## Warning: Use of `dataset[[var]]` is discouraged.
## ℹ Use `.data[[var]]` instead.
numeric_data <- dataset %>% select_if(is.numeric)
cor_matrix <- cor(numeric_data, use = "complete.obs")
ggcorr(numeric_data,label = TRUE,label_size = 3, hjust = 0.95, layout.exp = 3, name = "Correlation") +
ggtitle("Matriks Korelasi Tiap Variabel") +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10)
)ggplot(dataset, aes(x = price_range, fill = price_range)) +
geom_bar() +
scale_fill_brewer(palette = "Set2") +
labs(
title = "Distribusi Variabel Price Range",
x = "Price Range",
y = "Jumlah Data",
fill = "Price Range"
) +
theme_minimal(base_size = 14) +
theme(legend.position = "right")dataset %>%
group_by(price_range) %>%
summarise(across(where(is.numeric),
list(mean = ~mean(.x, na.rm = TRUE),
sd = ~sd(.x, na.rm = TRUE)),
.names = "{.col}_{.fn}"))## # A tibble: 4 × 29
## price_range battery_power_mean battery_power_sd clock_speed_mean
## <ord> <dbl> <dbl> <dbl>
## 1 low cost 1117. 411. 1.55
## 2 medium cost 1229. 439. 1.49
## 3 high cost 1228. 453. 1.53
## 4 very high cost 1380. 415. 1.52
## # ℹ 25 more variables: clock_speed_sd <dbl>, fc_mean <dbl>, fc_sd <dbl>,
## # int_memory_mean <dbl>, int_memory_sd <dbl>, m_dep_mean <dbl>,
## # m_dep_sd <dbl>, mobile_wt_mean <dbl>, mobile_wt_sd <dbl>,
## # n_cores_mean <dbl>, n_cores_sd <dbl>, pc_mean <dbl>, pc_sd <dbl>,
## # px_height_mean <dbl>, px_height_sd <dbl>, px_width_mean <dbl>,
## # px_width_sd <dbl>, ram_mean <dbl>, ram_sd <dbl>, sc_h_mean <dbl>,
## # sc_h_sd <dbl>, sc_w_mean <dbl>, sc_w_sd <dbl>, talk_time_mean <dbl>, …
## battery_power blue clock_speed dual_sim fc four_g int_memory m_dep mobile_wt
## 1 842 0 2.2 0 1 0 7 0.6 188
## 2 1021 1 0.5 1 0 1 53 0.7 136
## 3 563 1 0.5 1 2 1 41 0.9 145
## 4 615 1 2.5 0 0 0 10 0.8 131
## 5 1821 1 1.2 0 13 1 44 0.6 141
## 6 1859 0 0.5 1 3 0 22 0.7 164
## n_cores pc px_height px_width ram sc_h sc_w talk_time three_g touch_screen
## 1 2 2 20 756 2549 9 7 19 0 0
## 2 3 6 905 1988 2631 17 3 7 1 1
## 3 5 6 1263 1716 2603 11 2 9 1 1
## 4 6 9 1216 1786 2769 16 8 11 1 0
## 5 2 14 1208 1212 1411 8 2 15 1 1
## 6 1 7 1004 1654 1067 17 1 10 1 0
## wifi price_range
## 1 1 medium cost
## 2 0 high cost
## 3 0 high cost
## 4 0 high cost
## 5 0 medium cost
## 6 0 medium cost
memastikan price_range adalah factor ordinal
mengambil variabel numerik saja untuk PCA
standarisasi
Train-Test Split
PCA
train_num <- train_data %>% select_if(is.numeric)
train_scaled <- scale(train_num)
pca_model <- prcomp(train_scaled, center = TRUE, scale. = TRUE)## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.2948 1.2465 1.2084 1.03104 1.01890 1.01806 1.00530
## Proportion of Variance 0.1197 0.1110 0.1043 0.07593 0.07415 0.07403 0.07219
## Cumulative Proportion 0.1197 0.2307 0.3350 0.41097 0.48512 0.55916 0.63134
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.99505 0.98416 0.98057 0.94678 0.70256 0.70026 0.60054
## Proportion of Variance 0.07072 0.06918 0.06868 0.06403 0.03526 0.03503 0.02576
## Cumulative Proportion 0.70207 0.77125 0.83993 0.90396 0.93921 0.97424 1.00000
mengambil 10 komponen karena sudah menjelaskan 84% variasi data
## Call:
## polr(formula = price_range ~ ., data = train_pca, Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## PC1 0.1007 0.06504 1.549
## PC2 1.3134 0.09058 14.500
## PC3 1.1329 0.08798 12.876
## PC4 3.4402 0.17467 19.696
## PC5 3.7709 0.18652 20.217
## PC6 2.8552 0.15328 18.627
## PC7 -0.8928 0.09266 -9.635
## PC8 -6.6378 0.30926 -21.463
## PC9 2.9044 0.15419 18.837
## PC10 2.1759 0.12908 16.857
##
## Intercepts:
## Value Std. Error t value
## low cost|medium cost -8.0296 0.3939 -20.3847
## medium cost|high cost 0.0822 0.1375 0.5982
## high cost|very high cost 7.8400 0.3850 20.3640
##
## Residual Deviance: 952.1686
## AIC: 978.1686
Asumsi Proportional Odds
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## --------------------------------------------
## Test for X2 df probability
## --------------------------------------------
## Omnibus 33.89 20 0.03
## PC1 1.18 2 0.55
## PC2 3.85 2 0.15
## PC3 2.28 2 0.32
## PC4 2.58 2 0.27
## PC5 2.97 2 0.23
## PC6 1.14 2 0.56
## PC7 7.3 2 0.03
## PC8 0.43 2 0.81
## PC9 0.19 2 0.91
## PC10 2.15 2 0.34
## --------------------------------------------
##
## H0: Parallel Regression Assumption holds
Transform test set menggunakan PCA yang sama
test_num <- test_data %>% select_if(is.numeric)
# standardisasi
test_scaled <- scale(test_num,
center = attr(train_scaled, "scaled:center"),
scale = attr(train_scaled, "scaled:scale"))
test_pca_data <- as.data.frame(predict(pca_model, newdata = test_scaled)[, 1:10])
label_mapping <- c("low cost", "medium cost", "high cost", "very high cost")
test_pca_data$price_range <- factor(test_data$price_range,
levels = label_mapping,
ordered = TRUE)Evaluasi Model
## Confusion Matrix and Statistics
##
## Reference
## Prediction low cost medium cost high cost very high cost
## low cost 90 7 0 0
## medium cost 10 74 16 0
## high cost 0 19 71 9
## very high cost 0 0 13 91
##
## Overall Statistics
##
## Accuracy : 0.815
## 95% CI : (0.7734, 0.8519)
## No Information Rate : 0.25
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7533
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: low cost Class: medium cost Class: high cost
## Sensitivity 0.9000 0.7400 0.7100
## Specificity 0.9767 0.9133 0.9067
## Pos Pred Value 0.9278 0.7400 0.7172
## Neg Pred Value 0.9670 0.9133 0.9037
## Prevalence 0.2500 0.2500 0.2500
## Detection Rate 0.2250 0.1850 0.1775
## Detection Prevalence 0.2425 0.2500 0.2475
## Balanced Accuracy 0.9383 0.8267 0.8083
## Class: very high cost
## Sensitivity 0.9100
## Specificity 0.9567
## Pos Pred Value 0.8750
## Neg Pred Value 0.9696
## Prevalence 0.2500
## Detection Rate 0.2275
## Detection Prevalence 0.2600
## Balanced Accuracy 0.9333
Cohen’s Kappa
kappa_val <- kappa2(data.frame(pred_label, test_pca_data$price_range), "unweighted")
print(kappa_val)## Cohen's Kappa for 2 Raters (Weights: unweighted)
##
## Subjects = 400
## Raters = 2
## Kappa = 0.753
##
## z = 26.1
## p-value = 0
MAE
mae_cat <- mean(abs(as.numeric(pred_label) - as.numeric(test_pca_data$price_range)))
cat("MAE Kategori:", round(mae_cat, 3), "\n")## MAE Kategori: 0.185
pred_train <- predict(model_olr, newdata = train_pca)
conf_train <- confusionMatrix(pred_train, train_pca$price_range)
cat("Akurasi Data Train:", round(conf_train$overall["Accuracy"], 4), "\n")## Akurasi Data Train: 0.8625
conf_test <- confusionMatrix(pred, test_pca_data$price_range)
cat("Akurasi Data Test :", round(conf_test$overall["Accuracy"], 4), "\n")## Akurasi Data Test : 0.815