# LOGISTIK ORDINAL (PREDIKSI TINGKAT KEBAHAGIAAN INDIVIDU)
# Install library yang diperlukan 
library(ordinal)
## Warning: package 'ordinal' was built under R version 4.4.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::slice()  masks ordinal::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Membaca dataset melalui penyimpanan 
library(readr)
data <- read_csv("C:/Users/hp/OneDrive/Documents/2015.csv")
## Rows: 158 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): Country, Region
## dbl (10): Happiness Rank, Happiness Score, Standard Error, Economy (GDP per ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(data)
# Mengecek nama kolom
names(data)
##  [1] "Country"                       "Region"                       
##  [3] "Happiness Rank"                "Happiness Score"              
##  [5] "Standard Error"                "Economy (GDP per Capita)"     
##  [7] "Family"                        "Health (Life Expectancy)"     
##  [9] "Freedom"                       "Trust (Government Corruption)"
## [11] "Generosity"                    "Dystopia Residual"
# Mengecek total nilai yang hilang
sum(is.na(data))
## [1] 0
# EDA 
str(data)
## spc_tbl_ [158 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Country                      : chr [1:158] "Switzerland" "Iceland" "Denmark" "Norway" ...
##  $ Region                       : chr [1:158] "Western Europe" "Western Europe" "Western Europe" "Western Europe" ...
##  $ Happiness Rank               : num [1:158] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Happiness Score              : num [1:158] 7.59 7.56 7.53 7.52 7.43 ...
##  $ Standard Error               : num [1:158] 0.0341 0.0488 0.0333 0.0388 0.0355 ...
##  $ Economy (GDP per Capita)     : num [1:158] 1.4 1.3 1.33 1.46 1.33 ...
##  $ Family                       : num [1:158] 1.35 1.4 1.36 1.33 1.32 ...
##  $ Health (Life Expectancy)     : num [1:158] 0.941 0.948 0.875 0.885 0.906 ...
##  $ Freedom                      : num [1:158] 0.666 0.629 0.649 0.67 0.633 ...
##  $ Trust (Government Corruption): num [1:158] 0.42 0.141 0.484 0.365 0.33 ...
##  $ Generosity                   : num [1:158] 0.297 0.436 0.341 0.347 0.458 ...
##  $ Dystopia Residual            : num [1:158] 2.52 2.7 2.49 2.47 2.45 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Country = col_character(),
##   ..   Region = col_character(),
##   ..   `Happiness Rank` = col_double(),
##   ..   `Happiness Score` = col_double(),
##   ..   `Standard Error` = col_double(),
##   ..   `Economy (GDP per Capita)` = col_double(),
##   ..   Family = col_double(),
##   ..   `Health (Life Expectancy)` = col_double(),
##   ..   Freedom = col_double(),
##   ..   `Trust (Government Corruption)` = col_double(),
##   ..   Generosity = col_double(),
##   ..   `Dystopia Residual` = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
summary(data)
##    Country             Region          Happiness Rank   Happiness Score
##  Length:158         Length:158         Min.   :  1.00   Min.   :2.839  
##  Class :character   Class :character   1st Qu.: 40.25   1st Qu.:4.526  
##  Mode  :character   Mode  :character   Median : 79.50   Median :5.232  
##                                        Mean   : 79.49   Mean   :5.376  
##                                        3rd Qu.:118.75   3rd Qu.:6.244  
##                                        Max.   :158.00   Max.   :7.587  
##  Standard Error    Economy (GDP per Capita)     Family      
##  Min.   :0.01848   Min.   :0.0000           Min.   :0.0000  
##  1st Qu.:0.03727   1st Qu.:0.5458           1st Qu.:0.8568  
##  Median :0.04394   Median :0.9102           Median :1.0295  
##  Mean   :0.04788   Mean   :0.8461           Mean   :0.9910  
##  3rd Qu.:0.05230   3rd Qu.:1.1584           3rd Qu.:1.2144  
##  Max.   :0.13693   Max.   :1.6904           Max.   :1.4022  
##  Health (Life Expectancy)    Freedom       Trust (Government Corruption)
##  Min.   :0.0000           Min.   :0.0000   Min.   :0.00000              
##  1st Qu.:0.4392           1st Qu.:0.3283   1st Qu.:0.06168              
##  Median :0.6967           Median :0.4355   Median :0.10722              
##  Mean   :0.6303           Mean   :0.4286   Mean   :0.14342              
##  3rd Qu.:0.8110           3rd Qu.:0.5491   3rd Qu.:0.18025              
##  Max.   :1.0252           Max.   :0.6697   Max.   :0.55191              
##    Generosity     Dystopia Residual
##  Min.   :0.0000   Min.   :0.3286   
##  1st Qu.:0.1506   1st Qu.:1.7594   
##  Median :0.2161   Median :2.0954   
##  Mean   :0.2373   Mean   :2.0990   
##  3rd Qu.:0.3099   3rd Qu.:2.4624   
##  Max.   :0.7959   Max.   :3.6021
# Pengelompokan ordinal 
# Ubah nama kolom agar bisa dipanggil tanpa tanda petik
names(data) <- make.names(names(data))  # ubah "Happiness Score" jadi "Happiness.Score"

# Pastikan tidak ada NA
data <- data %>% filter(!is.na(Happiness.Score))

# Buat kategori ordinal
data$Happiness.Level <- cut(
  data$Happiness.Score,
  breaks = c(-Inf, 4, 7, Inf),
  labels = c("Low", "Medium", "High")
)

table(data$Happiness.Level)
## 
##    Low Medium   High 
##     21    122     15
data$Happiness.Level <- ordered(data$Happiness.Level, levels = c("Low", "Medium", "High"))
# Model 
names(data) <- make.names(names(data))
model_data <- data %>%
  select(Happiness.Level,
         Economy..GDP.per.Capita.,
         Family,
         Health..Life.Expectancy.,
         Freedom,
         Trust..Government.Corruption.)

head(model_data)
## # A tibble: 6 × 6
##   Happiness.Level Economy..GDP.per.Capita. Family Health..Life.Expecta…¹ Freedom
##   <ord>                              <dbl>  <dbl>                  <dbl>   <dbl>
## 1 High                                1.40   1.35                  0.941   0.666
## 2 High                                1.30   1.40                  0.948   0.629
## 3 High                                1.33   1.36                  0.875   0.649
## 4 High                                1.46   1.33                  0.885   0.670
## 5 High                                1.33   1.32                  0.906   0.633
## 6 High                                1.29   1.32                  0.889   0.642
## # ℹ abbreviated name: ¹​Health..Life.Expectancy.
## # ℹ 1 more variable: Trust..Government.Corruption. <dbl>
model_ord <- clm(Happiness.Level ~ Economy..GDP.per.Capita. + Family + Health..Life.Expectancy. + Freedom + Trust..Government.Corruption.,
                 data = model_data)

summary(model_ord)
## formula: 
## Happiness.Level ~ Economy..GDP.per.Capita. + Family + Health..Life.Expectancy. + Freedom + Trust..Government.Corruption.
## data:    model_data
## 
##  link  threshold nobs logLik AIC    niter max.grad cond.H 
##  logit flexible  158  -63.16 140.31 8(0)  2.45e-08 5.9e+02
## 
## Coefficients:
##                               Estimate Std. Error z value Pr(>|z|)    
## Economy..GDP.per.Capita.         2.275      1.167   1.950  0.05114 .  
## Family                           4.548      1.340   3.394  0.00069 ***
## Health..Life.Expectancy.         4.164      1.706   2.441  0.01466 *  
## Freedom                          1.942      2.194   0.885  0.37609    
## Trust..Government.Corruption.    1.524      2.183   0.698  0.48518    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold coefficients:
##             Estimate Std. Error z value
## Low|Medium     6.065      1.355   4.477
## Medium|High   14.593      2.371   6.154
# Evaluasi model, Pastikan Y bertipe factor ordered
model_data$Happiness.Level <- factor(model_data$Happiness.Level, ordered = TRUE)
# Load package
library(MASS)
## Warning: package 'MASS' was built under R version 4.4.3
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
# Build model polr
model_ord <- polr(
  Happiness.Level ~ Economy..GDP.per.Capita. + Family + Health..Life.Expectancy. + Freedom + Trust..Government.Corruption.,
  data = model_data, Hess = TRUE
)
# Predict probabilitas
predicted_prob <- predict(model_ord, type = "prob")
# Cek bentuk predicted_prob
dim(predicted_prob)
## [1] 158   3
# Ambil prediksi (kelas dengan probabilitas tertinggi)
predicted <- apply(predicted_prob, 1, function(x) colnames(predicted_prob)[which.max(x)])
predicted <- factor(predicted, levels = levels(model_data$Happiness.Level))
# Confusion Matrix
confusion_matrix <- table(Predicted = predicted, Actual = model_data$Happiness.Level)
print(confusion_matrix)
##          Actual
## Predicted Low Medium High
##    Low      8      5    0
##    Medium  13    115    9
##    High     0      2    6
# Hitung akurasi
accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
cat("Akurasi:", round(accuracy * 100, 2), "%\n")
## Akurasi: 81.65 %
# Evaluasi
library(ggplot2)

df_pred <- data.frame(Actual = model_data$Happiness.Level, Predicted = predicted)

ggplot(df_pred, aes(x = Actual, fill = Predicted)) +
  geom_bar(position = "dodge") +
  labs(title = "Actual vs Predicted Happiness Level",
       x = "Actual Level",
       y = "Count") +
  theme_minimal()