# LOGISTIK ORDINAL (PREDIKSI TINGKAT KEBAHAGIAAN INDIVIDU)
# Install library yang diperlukan
library(ordinal)
## Warning: package 'ordinal' was built under R version 4.4.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::slice() masks ordinal::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Membaca dataset melalui penyimpanan
library(readr)
data <- read_csv("C:/Users/hp/OneDrive/Documents/2015.csv")
## Rows: 158 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Country, Region
## dbl (10): Happiness Rank, Happiness Score, Standard Error, Economy (GDP per ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(data)
# Mengecek nama kolom
names(data)
## [1] "Country" "Region"
## [3] "Happiness Rank" "Happiness Score"
## [5] "Standard Error" "Economy (GDP per Capita)"
## [7] "Family" "Health (Life Expectancy)"
## [9] "Freedom" "Trust (Government Corruption)"
## [11] "Generosity" "Dystopia Residual"
# Mengecek total nilai yang hilang
sum(is.na(data))
## [1] 0
# EDA
str(data)
## spc_tbl_ [158 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Country : chr [1:158] "Switzerland" "Iceland" "Denmark" "Norway" ...
## $ Region : chr [1:158] "Western Europe" "Western Europe" "Western Europe" "Western Europe" ...
## $ Happiness Rank : num [1:158] 1 2 3 4 5 6 7 8 9 10 ...
## $ Happiness Score : num [1:158] 7.59 7.56 7.53 7.52 7.43 ...
## $ Standard Error : num [1:158] 0.0341 0.0488 0.0333 0.0388 0.0355 ...
## $ Economy (GDP per Capita) : num [1:158] 1.4 1.3 1.33 1.46 1.33 ...
## $ Family : num [1:158] 1.35 1.4 1.36 1.33 1.32 ...
## $ Health (Life Expectancy) : num [1:158] 0.941 0.948 0.875 0.885 0.906 ...
## $ Freedom : num [1:158] 0.666 0.629 0.649 0.67 0.633 ...
## $ Trust (Government Corruption): num [1:158] 0.42 0.141 0.484 0.365 0.33 ...
## $ Generosity : num [1:158] 0.297 0.436 0.341 0.347 0.458 ...
## $ Dystopia Residual : num [1:158] 2.52 2.7 2.49 2.47 2.45 ...
## - attr(*, "spec")=
## .. cols(
## .. Country = col_character(),
## .. Region = col_character(),
## .. `Happiness Rank` = col_double(),
## .. `Happiness Score` = col_double(),
## .. `Standard Error` = col_double(),
## .. `Economy (GDP per Capita)` = col_double(),
## .. Family = col_double(),
## .. `Health (Life Expectancy)` = col_double(),
## .. Freedom = col_double(),
## .. `Trust (Government Corruption)` = col_double(),
## .. Generosity = col_double(),
## .. `Dystopia Residual` = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
summary(data)
## Country Region Happiness Rank Happiness Score
## Length:158 Length:158 Min. : 1.00 Min. :2.839
## Class :character Class :character 1st Qu.: 40.25 1st Qu.:4.526
## Mode :character Mode :character Median : 79.50 Median :5.232
## Mean : 79.49 Mean :5.376
## 3rd Qu.:118.75 3rd Qu.:6.244
## Max. :158.00 Max. :7.587
## Standard Error Economy (GDP per Capita) Family
## Min. :0.01848 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.03727 1st Qu.:0.5458 1st Qu.:0.8568
## Median :0.04394 Median :0.9102 Median :1.0295
## Mean :0.04788 Mean :0.8461 Mean :0.9910
## 3rd Qu.:0.05230 3rd Qu.:1.1584 3rd Qu.:1.2144
## Max. :0.13693 Max. :1.6904 Max. :1.4022
## Health (Life Expectancy) Freedom Trust (Government Corruption)
## Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.4392 1st Qu.:0.3283 1st Qu.:0.06168
## Median :0.6967 Median :0.4355 Median :0.10722
## Mean :0.6303 Mean :0.4286 Mean :0.14342
## 3rd Qu.:0.8110 3rd Qu.:0.5491 3rd Qu.:0.18025
## Max. :1.0252 Max. :0.6697 Max. :0.55191
## Generosity Dystopia Residual
## Min. :0.0000 Min. :0.3286
## 1st Qu.:0.1506 1st Qu.:1.7594
## Median :0.2161 Median :2.0954
## Mean :0.2373 Mean :2.0990
## 3rd Qu.:0.3099 3rd Qu.:2.4624
## Max. :0.7959 Max. :3.6021
# Pengelompokan ordinal
# Ubah nama kolom agar bisa dipanggil tanpa tanda petik
names(data) <- make.names(names(data)) # ubah "Happiness Score" jadi "Happiness.Score"
# Pastikan tidak ada NA
data <- data %>% filter(!is.na(Happiness.Score))
# Buat kategori ordinal
data$Happiness.Level <- cut(
data$Happiness.Score,
breaks = c(-Inf, 4, 7, Inf),
labels = c("Low", "Medium", "High")
)
table(data$Happiness.Level)
##
## Low Medium High
## 21 122 15
data$Happiness.Level <- ordered(data$Happiness.Level, levels = c("Low", "Medium", "High"))
# Model
names(data) <- make.names(names(data))
model_data <- data %>%
select(Happiness.Level,
Economy..GDP.per.Capita.,
Family,
Health..Life.Expectancy.,
Freedom,
Trust..Government.Corruption.)
head(model_data)
## # A tibble: 6 × 6
## Happiness.Level Economy..GDP.per.Capita. Family Health..Life.Expecta…¹ Freedom
## <ord> <dbl> <dbl> <dbl> <dbl>
## 1 High 1.40 1.35 0.941 0.666
## 2 High 1.30 1.40 0.948 0.629
## 3 High 1.33 1.36 0.875 0.649
## 4 High 1.46 1.33 0.885 0.670
## 5 High 1.33 1.32 0.906 0.633
## 6 High 1.29 1.32 0.889 0.642
## # ℹ abbreviated name: ¹Health..Life.Expectancy.
## # ℹ 1 more variable: Trust..Government.Corruption. <dbl>
model_ord <- clm(Happiness.Level ~ Economy..GDP.per.Capita. + Family + Health..Life.Expectancy. + Freedom + Trust..Government.Corruption.,
data = model_data)
summary(model_ord)
## formula:
## Happiness.Level ~ Economy..GDP.per.Capita. + Family + Health..Life.Expectancy. + Freedom + Trust..Government.Corruption.
## data: model_data
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 158 -63.16 140.31 8(0) 2.45e-08 5.9e+02
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## Economy..GDP.per.Capita. 2.275 1.167 1.950 0.05114 .
## Family 4.548 1.340 3.394 0.00069 ***
## Health..Life.Expectancy. 4.164 1.706 2.441 0.01466 *
## Freedom 1.942 2.194 0.885 0.37609
## Trust..Government.Corruption. 1.524 2.183 0.698 0.48518
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## Low|Medium 6.065 1.355 4.477
## Medium|High 14.593 2.371 6.154
# Evaluasi model, Pastikan Y bertipe factor ordered
model_data$Happiness.Level <- factor(model_data$Happiness.Level, ordered = TRUE)
# Load package
library(MASS)
## Warning: package 'MASS' was built under R version 4.4.3
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
# Build model polr
model_ord <- polr(
Happiness.Level ~ Economy..GDP.per.Capita. + Family + Health..Life.Expectancy. + Freedom + Trust..Government.Corruption.,
data = model_data, Hess = TRUE
)
# Predict probabilitas
predicted_prob <- predict(model_ord, type = "prob")
# Cek bentuk predicted_prob
dim(predicted_prob)
## [1] 158 3
# Ambil prediksi (kelas dengan probabilitas tertinggi)
predicted <- apply(predicted_prob, 1, function(x) colnames(predicted_prob)[which.max(x)])
predicted <- factor(predicted, levels = levels(model_data$Happiness.Level))
# Confusion Matrix
confusion_matrix <- table(Predicted = predicted, Actual = model_data$Happiness.Level)
print(confusion_matrix)
## Actual
## Predicted Low Medium High
## Low 8 5 0
## Medium 13 115 9
## High 0 2 6
# Hitung akurasi
accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
cat("Akurasi:", round(accuracy * 100, 2), "%\n")
## Akurasi: 81.65 %
# Evaluasi
library(ggplot2)
df_pred <- data.frame(Actual = model_data$Happiness.Level, Predicted = predicted)
ggplot(df_pred, aes(x = Actual, fill = Predicted)) +
geom_bar(position = "dodge") +
labs(title = "Actual vs Predicted Happiness Level",
x = "Actual Level",
y = "Count") +
theme_minimal()
