knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
# library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(MASS)
## Warning: package 'MASS' was built under R version 4.4.3
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(ordinal)
## Warning: package 'ordinal' was built under R version 4.4.3
##
## Attaching package: 'ordinal'
##
## The following object is masked from 'package:dplyr':
##
## slice
Dataset ini berasal dari kaggle yang menunjukkan variabel-variabel yang menyebabkan anemia pada anak berdasarkan karakteristik ibu nya.
#1 Load dan Cek data
# load data
anemia_data <- read.csv("children anemia.csv")
# tampilkan data
print(head(anemia_data))
## Age.in.5.year.groups Type.of.place.of.residence Highest.educational.level
## 1 40-44 Urban Higher
## 2 35-39 Urban Higher
## 3 25-29 Urban Higher
## 4 25-29 Urban Secondary
## 5 20-24 Urban Secondary
## 6 30-34 Urban Higher
## Wealth.index.combined Births.in.last.five.years
## 1 Richest 1
## 2 Richest 1
## 3 Richest 1
## 4 Richest 1
## 5 Richest 1
## 6 Richest 1
## Age.of.respondent.at.1st.birth
## 1 22
## 2 28
## 3 26
## 4 25
## 5 21
## 6 30
## Hemoglobin.level.adjusted.for.altitude.and.smoking..g.dl...1.decimal.
## 1 NA
## 2 NA
## 3 NA
## 4 95
## 5 NA
## 6 113
## Anemia.level
## 1
## 2
## 3
## 4 Moderate
## 5
## 6 Mild
## Have.mosquito.bed.net.for.sleeping..from.household.questionnaire.
## 1 Yes
## 2 Yes
## 3 No
## 4 Yes
## 5 Yes
## 6 Yes
## Smokes.cigarettes Current.marital.status
## 1 No Living with partner
## 2 No Married
## 3 No Married
## 4 No Married
## 5 No No longer living together/separated
## 6 No Married
## Currently.residing.with.husband.partner When.child.put.to.breast
## 1 Staying elsewhere Immediately
## 2 Living with her Hours: 1
## 3 Living with her Immediately
## 4 Living with her 105.0
## 5 Immediately
## 6 Living with her
## Had.fever.in.last.two.weeks
## 1 No
## 2 No
## 3 No
## 4 No
## 5 No
## 6 No
## Hemoglobin.level.adjusted.for.altitude..g.dl...1.decimal. Anemia.level.1
## 1 NA
## 2 NA
## 3 NA
## 4 114 Not anemic
## 5 NA
## 6 119 Not anemic
## Taking.iron.pills..sprinkles.or.syrup
## 1 Yes
## 2 No
## 3 No
## 4 No
## 5 No
## 6 No
# tampilkan struktur
print(str(anemia_data))
## 'data.frame': 33924 obs. of 17 variables:
## $ Age.in.5.year.groups : chr "40-44" "35-39" "25-29" "25-29" ...
## $ Type.of.place.of.residence : chr "Urban" "Urban" "Urban" "Urban" ...
## $ Highest.educational.level : chr "Higher" "Higher" "Higher" "Secondary" ...
## $ Wealth.index.combined : chr "Richest" "Richest" "Richest" "Richest" ...
## $ Births.in.last.five.years : int 1 1 1 1 1 1 2 2 1 1 ...
## $ Age.of.respondent.at.1st.birth : int 22 28 26 25 21 30 32 32 32 19 ...
## $ Hemoglobin.level.adjusted.for.altitude.and.smoking..g.dl...1.decimal.: num NA NA NA 95 NA 113 121 121 NA 108 ...
## $ Anemia.level : chr "" "" "" "Moderate" ...
## $ Have.mosquito.bed.net.for.sleeping..from.household.questionnaire. : chr "Yes" "Yes" "No" "Yes" ...
## $ Smokes.cigarettes : chr "No" "No" "No" "No" ...
## $ Current.marital.status : chr "Living with partner" "Married" "Married" "Married" ...
## $ Currently.residing.with.husband.partner : chr "Staying elsewhere" "Living with her" "Living with her" "Living with her" ...
## $ When.child.put.to.breast : chr "Immediately" "Hours: 1" "Immediately" "105.0" ...
## $ Had.fever.in.last.two.weeks : chr "No" "No" "No" "No" ...
## $ Hemoglobin.level.adjusted.for.altitude..g.dl...1.decimal. : num NA NA NA 114 NA 119 102 NA NA 113 ...
## $ Anemia.level.1 : chr "" "" "" "Not anemic" ...
## $ Taking.iron.pills..sprinkles.or.syrup : chr "Yes" "No" "No" "No" ...
## NULL
#2 Pre processing data
Pada tahap ini akan dilakukan pembersihan data agar bisa diproses lebih lanjut untuk modelling
# ubah semua karakter jadi faktor
anemia_data <- anemia_data %>%
mutate(across(where(is.character), as.factor))
# ubah target ke ordinal
anemia_data$Anemia.level. <- factor(anemia_data$Anemia.level,
levels = c("Not anemic", "Mild", "Moderate", "Severe"),
ordered = TRUE)
# hapus missing values
anemia_data <- na.omit(anemia_data)
# sampling data
set.seed(123)
data_sample <- anemia_data[sample(nrow(anemia_data), 1000), ]
# cek distribusi target
print(table(data_sample$Anemia.level))
##
## Mild Moderate Not anemic Severe
## 0 273 286 429 12
#3 Bangun Model
#modeling
model_ordinal <- polr(Anemia.level ~ Age.in.5.year.groups +
Type.of.place.of.residence +
Highest.educational.level +
Wealth.index.combined +
Births.in.last.five.years +
Age.of.respondent.at.1st.birth +
Smokes.cigarettes +
Had.fever.in.last.two.weeks +
Taking.iron.pills..sprinkles.or.syrup,
data = data_sample, Hess = TRUE)
# hasil modeling
print(summary(model_ordinal))
## Call:
## polr(formula = Anemia.level ~ Age.in.5.year.groups + Type.of.place.of.residence +
## Highest.educational.level + Wealth.index.combined + Births.in.last.five.years +
## Age.of.respondent.at.1st.birth + Smokes.cigarettes + Had.fever.in.last.two.weeks +
## Taking.iron.pills..sprinkles.or.syrup, data = data_sample,
## Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## Age.in.5.year.groups20-24 0.60001 0.32999 1.8182
## Age.in.5.year.groups25-29 0.79465 0.32093 2.4761
## Age.in.5.year.groups30-34 0.75037 0.32996 2.2741
## Age.in.5.year.groups35-39 0.87018 0.33772 2.5766
## Age.in.5.year.groups40-44 0.83304 0.37401 2.2273
## Age.in.5.year.groups45-49 1.48557 0.50337 2.9512
## Type.of.place.of.residenceUrban -0.05636 0.14409 -0.3911
## Highest.educational.levelNo education -0.39397 0.29410 -1.3396
## Highest.educational.levelPrimary -0.06665 0.30062 -0.2217
## Highest.educational.levelSecondary -0.33639 0.25562 -1.3160
## Wealth.index.combinedPoorer -0.02623 0.18521 -0.1416
## Wealth.index.combinedPoorest -0.08940 0.20017 -0.4466
## Wealth.index.combinedRicher 0.13681 0.18791 0.7280
## Wealth.index.combinedRichest 0.29680 0.23075 1.2862
## Births.in.last.five.years -0.03881 0.09027 -0.4300
## Age.of.respondent.at.1st.birth -0.01296 0.01681 -0.7712
## Had.fever.in.last.two.weeksNo 0.02875 0.13522 0.2126
## Taking.iron.pills..sprinkles.or.syrupDon't know 0.53182 1.25453 0.4239
## Taking.iron.pills..sprinkles.or.syrupNo 0.07668 0.15331 0.5002
##
## Intercepts:
## Value Std. Error t value
## |Mild -11.6031 13.2353 -0.8767
## Mild|Moderate -0.7408 0.5526 -1.3405
## Moderate|Not anemic 0.5009 0.5524 0.9066
## Not anemic|Severe 4.7188 0.6208 7.6014
##
## Residual Deviance: 2232.501
## AIC: 2278.501
-Kelompok Usia: Usia ibu yang lebih tua (20-49 tahun) cenderung memiliki risiko anemia yang lebih tinggi.
-Tempat Tinggal Urban: Tidak menunjukkan pengaruh signifikan terhadap tingkat anemia.
-Pendidikan: Tingkat pendidikan ibu tidak memiliki pengaruh signifikan terhadap status anemia.
-Indeks Kekayaan: Tidak ada pengaruh signifikan antara status kekayaan dan anemia.
-Jumlah Kelahiran & Usia Ibu saat Melahirkan Pertama Kali: Tidak menunjukkan pengaruh signifikan.
-Gejala Demam & Penggunaan Suplemen Besi: Tidak memiliki pengaruh signifikan terhadap tingkat anemia.
-Intersep: Menunjukkan batasan antara kategori-kategori tingkat anemia, namun tidak cukup signifikan.
#4 Hitung P-value
ctable <- coef(summary(model_ordinal))
p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2
ctable <- cbind(ctable, "p value" = p)
# Tampilkan tabel koefisien + p-value
print(ctable)
## Value Std. Error
## Age.in.5.year.groups20-24 0.60000823 0.32999346
## Age.in.5.year.groups25-29 0.79464892 0.32093408
## Age.in.5.year.groups30-34 0.75036681 0.32995685
## Age.in.5.year.groups35-39 0.87017568 0.33772093
## Age.in.5.year.groups40-44 0.83303846 0.37400876
## Age.in.5.year.groups45-49 1.48557312 0.50337183
## Type.of.place.of.residenceUrban -0.05635530 0.14409464
## Highest.educational.levelNo education -0.39396761 0.29409576
## Highest.educational.levelPrimary -0.06665301 0.30062141
## Highest.educational.levelSecondary -0.33639278 0.25561948
## Wealth.index.combinedPoorer -0.02623049 0.18521194
## Wealth.index.combinedPoorest -0.08939596 0.20017414
## Wealth.index.combinedRicher 0.13680595 0.18791277
## Wealth.index.combinedRichest 0.29680283 0.23075353
## Births.in.last.five.years -0.03881426 0.09026689
## Age.of.respondent.at.1st.birth -0.01296130 0.01680583
## Had.fever.in.last.two.weeksNo 0.02874999 0.13522079
## Taking.iron.pills..sprinkles.or.syrupDon't know 0.53181587 1.25452869
## Taking.iron.pills..sprinkles.or.syrupNo 0.07668383 0.15331226
## |Mild -11.60309064 13.23528830
## Mild|Moderate -0.74082285 0.55263486
## Moderate|Not anemic 0.50087647 0.55244783
## Not anemic|Severe 4.71884168 0.62078971
## t value p value
## Age.in.5.year.groups20-24 1.8182428 6.902703e-02
## Age.in.5.year.groups25-29 2.4760503 1.328449e-02
## Age.in.5.year.groups30-34 2.2741362 2.295780e-02
## Age.in.5.year.groups35-39 2.5766117 9.977397e-03
## Age.in.5.year.groups40-44 2.2273234 2.592567e-02
## Age.in.5.year.groups45-49 2.9512440 3.164967e-03
## Type.of.place.of.residenceUrban -0.3910992 6.957239e-01
## Highest.educational.levelNo education -1.3395896 1.803788e-01
## Highest.educational.levelPrimary -0.2217175 8.245338e-01
## Highest.educational.levelSecondary -1.3159904 1.881773e-01
## Wealth.index.combinedPoorer -0.1416242 8.873769e-01
## Wealth.index.combinedPoorest -0.4465910 6.551704e-01
## Wealth.index.combinedRicher 0.7280290 4.665958e-01
## Wealth.index.combinedRichest 1.2862331 1.983617e-01
## Births.in.last.five.years -0.4299944 6.671997e-01
## Age.of.respondent.at.1st.birth -0.7712387 4.405655e-01
## Had.fever.in.last.two.weeksNo 0.2126152 8.316271e-01
## Taking.iron.pills..sprinkles.or.syrupDon't know 0.4239169 6.716264e-01
## Taking.iron.pills..sprinkles.or.syrupNo 0.5001807 6.169479e-01
## |Mild -0.8766783 3.806614e-01
## Mild|Moderate -1.3405286 1.800735e-01
## Moderate|Not anemic 0.9066494 3.645923e-01
## Not anemic|Severe 7.6013529 2.930508e-14
Hanya variabel usia ibu (kelompok 25–29, 30–34, 35–39, 40–44, 45–49 tahun) yang memiliki koefisien positif signifikan (p<0,05), artinya semakin tua rentang usia ibu, semakin besar odds anak berada di kategori anemia yang lebih berat. Semua variabel lain—tempat tinggal (urban), tingkat pendidikan, indeks kekayaan, jumlah kelahiran, usia ibu saat lahir pertama, riwayat demam, dan status suplementasi besi—memiliki p‑value >0,05 dan tidak menunjukkan pengaruh signifikan. Intersep (threshold) untuk kategori “Not anemic|Severe” satu‑satunya yang signifikan, menegaskan batas logit antara anak tidak anemis dan yang sangat parah
Model menunjukkan bahwa usia ibu pada kelompok 25–29 s.d. 45–49 tahun secara signifikan meningkatkan odds anak berada di tingkat anemia lebih berat (p < 0,05). Semua variabel lain—tempat tinggal, pendidikan, status kekayaan, jumlah kelahiran, usia saat melahirkan pertama, riwayat demam, dan suplemen besi—tidak signifikan (p > 0,05), dan satu intersep signifikan hanya menegaskan batas logit antara kategori “Not anemic” dan “Severe.”