Berikut adalah package yang akan digunakan, yaitu
1. ggplot2 dan ggpubr untuk memvisualisasikan data
2. dplyr untuk memanipulasi data
3. ggthemes adalah Tema, Skala, dan Geom Ekstra untuk 'ggplot2'
4. RColoeBrewer menyediakan skema warna untuk grafik lainnya
5. gridExtra menyediakan sejumlah fungsi untuk grid grafik
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 3.6.3
## Loading required package: magrittr
library(kableExtra)
library(data.table)
## Warning: package 'data.table' was built under R version 3.6.3
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 3.6.3
library(RColorBrewer)
library(gridExtra)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following object is masked from 'package:kableExtra':
##
## group_rows
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data = read.csv("heart_failure_clinical_records_dataset.csv",sep =";", header = TRUE)
head(data) #Melihat data teratas
## umur anaemia creatinine_phosphokinase diabetes ejection_fraction
## 1 75 0 582 0 20
## 2 55 0 7861 0 38
## 3 65 0 146 0 20
## 4 50 1 111 0 20
## 5 65 1 160 1 20
## 6 90 1 47 0 40
## tekanan_darah_tinggi platelets serum_creatinine serum_sodium Jenis_kelamin
## 1 1 265000 1.9 130 1
## 2 0 263358 1.1 136 1
## 3 0 162000 1.3 129 1
## 4 0 210000 1.9 137 1
## 5 0 327000 2.7 116 0
## 6 1 204000 2.1 132 1
## smoking time Peristiwa_Kematian
## 1 0 4 1
## 2 0 6 1
## 3 1 7 1
## 4 0 7 1
## 5 0 8 1
## 6 1 8 1
tail(data) #Melihat data terbawah
## umur anaemia creatinine_phosphokinase diabetes ejection_fraction
## 294 63 1 103 1 35
## 295 62 0 61 1 38
## 296 55 0 1820 0 38
## 297 45 0 2060 1 60
## 298 45 0 2413 0 38
## 299 50 0 196 0 45
## tekanan_darah_tinggi platelets serum_creatinine serum_sodium Jenis_kelamin
## 294 0 179000 0.9 136 1
## 295 1 155000 1.1 143 1
## 296 0 270000 1.2 139 0
## 297 0 742000 0.8 138 0
## 298 0 140000 1.4 140 1
## 299 0 395000 1.6 136 1
## smoking time Peristiwa_Kematian
## 294 1 270 0
## 295 1 270 0
## 296 0 271 0
## 297 0 278 0
## 298 1 280 0
## 299 1 285 0
fitur <-names(data)
Deskripsi<-c(
"Menjelaskan usia subjek dalam kumpulan data",
"Kondisi di mana seseorang kekurangan sel darah merah",
"Tingkat enzim CPK dalam darah",
"Penyakit metabolisme yang menyebabkan gula darah tinggi",
"Persentase darah yang keluar",
"Menunjukkan apakah tekanan darah tinggi atau tidak",
"Jumlah trombosit dalam darah",
"Mengukur tingkat kreatinin dalam darah dan memberikan perkiraan seberapa baik ginjal menyaring",
"Tingkat natrium dalam darah",
"Laki-laki atau Perempuan",
"Ya atau Tidak",
"Waktu",
"Meninggal atau Tidak Meninggal")
Pengukuran <-c(
"Tahun",
"Boolean",
"mcg/L",
"Boolean",
"Persen",
"Boolean",
"kiloplatelets/mL",
"mg/dL",
"mEq/L",
"Binary",
"Boolean",
"Hari",
"Boolean")
n<-as.data.frame(cbind(fitur,Deskripsi,Pengukuran))
n %>% kable(caption = "Penjelasan Data, sumber: https://doi.org/10.1186/s12911-020-1023-5") %>% kable_styling()
fitur | Deskripsi | Pengukuran |
---|---|---|
umur | Menjelaskan usia subjek dalam kumpulan data | Tahun |
anaemia | Kondisi di mana seseorang kekurangan sel darah merah | Boolean |
creatinine_phosphokinase | Tingkat enzim CPK dalam darah | mcg/L |
diabetes | Penyakit metabolisme yang menyebabkan gula darah tinggi | Boolean |
ejection_fraction | Persentase darah yang keluar | Persen |
tekanan_darah_tinggi | Menunjukkan apakah tekanan darah tinggi atau tidak | Boolean |
platelets | Jumlah trombosit dalam darah | kiloplatelets/mL |
serum_creatinine | Mengukur tingkat kreatinin dalam darah dan memberikan perkiraan seberapa baik ginjal menyaring | mg/dL |
serum_sodium | Tingkat natrium dalam darah | mEq/L |
Jenis_kelamin | Laki-laki atau Perempuan | Binary |
smoking | Ya atau Tidak | Boolean |
time | Waktu | Hari |
Peristiwa_Kematian | Meninggal atau Tidak Meninggal | Boolean |
data$Peristiwa_Kematian <- factor(data$Peristiwa_Kematian)
age <-ggplot(data,aes(x = umur))+geom_histogram(binwidth = 5, color = "white", fill = "#5757bc",alpha = 0.5)+theme_fivethirtyeight()+labs(title = "Distribusi Umur", caption = "i. Distribusi Umur")+
theme(plot.caption = element_text(hjust = 0.5,face = "italic"))+
scale_x_continuous(breaks = seq(40,100,10))
de<-ggplot(data,aes(x = umur, fill = Peristiwa_Kematian))+geom_histogram(binwidth = 5, position = "identity",alpha = 0.5,color = "white")+theme_fivethirtyeight()+scale_fill_manual(values = c("#b3d3dd", "#2d7291"))+
labs(caption = "ii. Distribusi Usia dengan Peristiwa Kematian")+
theme(plot.caption = element_text(hjust = 0.5,face = "italic"))+
scale_x_continuous(breaks = seq(40,100,10))
gridExtra::grid.arrange(age,de)
ef1<-ggplot(data, aes(x = ejection_fraction))+geom_density(fill = "#2043b7", alpha = 0.5)+theme_fivethirtyeight()+
geom_vline(xintercept = 50, linetype = "dashed")+
geom_vline(xintercept = 70, linetype = "dashed")+
scale_x_continuous(breaks = seq(20,80,10))+
annotate("text",x = 60, y = 0.03, label = "Normal", color = "#0a4c41")+
annotate("text", x = 78, y = 0.03, label = "Tinggi", color = "#ad652a")+
annotate("text", x = 35, y = 0.03, label = "Rendah", color = "#082451")+
labs(title = "Distribusi ejection_fraction", caption = "i. Distribusi ejection_fraction")+
theme(plot.caption = element_text(hjust = 0.5, face = "italic"))
ef2<-ggplot(data, aes(x = ejection_fraction, fill = Peristiwa_Kematian))+geom_density(alpha = 0.5)+theme_fivethirtyeight()+
scale_fill_manual(values = c("#a9d5e0", "#56c7e2"))+
scale_x_continuous(breaks = seq(20,80,10))+
geom_vline(aes(xintercept = mean(ejection_fraction[Peristiwa_Kematian == 0])), color = "#a8efe3")+
geom_vline(aes(xintercept = mean(ejection_fraction[Peristiwa_Kematian == 1])), color = "#39d1b8")+
geom_curve(aes(xend = mean(ejection_fraction[Peristiwa_Kematian == 0])), y = 0.05, x = 50, yend = 0.04, arrow = arrow(length = unit(0.2,"cm")),color = "black")+
geom_curve(aes(xend = mean(ejection_fraction[Peristiwa_Kematian == 1])), x = 27,yend= 0.04, y = 0.05, arrow = arrow(length = unit(0.2,"cm")), color = "black")+
annotate("text", x = 50, y = 0.048, label = "Rata-rata kejadian tidak meninggal", size = 3)+
annotate("text", x = 27, y = 0.052, label = "Rata-rata kejadian meninggal", size = 3)+
geom_vline(xintercept = 50, linetype = "dashed")+
geom_vline(xintercept = 70, linetype = "dashed")+
theme(plot.caption = element_text(hjust = 0.5, face = "italic"))+
labs(caption = "ii. Distribusi ejection_fraction dengan peristiwa kematian")
gridExtra::grid.arrange(ef1,ef2)
sc1 <- ggplot(data, aes(x = serum_creatinine))+geom_density(fill = "#bca6bc", alpha = 0.5)+theme_fivethirtyeight()+
geom_vline(xintercept = 0.84, linetype = "dashed")+
geom_vline(xintercept = 1.4, linetype = "dashed")+
annotate("text",x = 1.05, y = 0.5, label = "Normal", color = "darkgreen", angle = 90)+
annotate("text", x = 3, y = 0.5, label = "Kemungkinan kerusakan \nginjal", color = "#af6c35")+
labs(title = "Distribusi serum_creatinine", caption = "i. Distribusi serum_creatinine")+
theme(plot.caption = element_text(hjust = 0.5, face = "italic"))
sc2 <- ggplot(data, aes(x = serum_creatinine, fill = Peristiwa_Kematian))+geom_density(alpha = 0.5)+theme_fivethirtyeight()+
scale_fill_manual(values = c("#76b5a1", "#108963"))+
#scale_x_continuous(breaks = seq(20,80,10))+
geom_vline(aes(xintercept = mean(serum_creatinine[Peristiwa_Kematian == 0])), color = "#a8efe3")+
geom_vline(aes(xintercept = mean(serum_creatinine[Peristiwa_Kematian == 1])), color = "#39d1b8")+
geom_curve(aes(xend = mean(serum_creatinine[Peristiwa_Kematian == 0])), yend = 0.9, x = 2.5, y = 1.25, arrow = arrow(length = unit(0.2,"cm")),color = "#19299b")+
geom_curve(aes(xend = mean(serum_creatinine[Peristiwa_Kematian == 1])), x = 3,yend= 0.5, y = 0.9, arrow = arrow(length = unit(0.2,"cm")), color = "#19299b")+
annotate("text", x = 2.5, y = 1.2, label = "Rata-rata kejadian tidak meninggal", size = 2.5)+
annotate("text", x = 3, y = 0.85, label = "Rata-rata kejadian meninggal", size = 2.5)+
geom_vline(xintercept = 0.84, linetype = "dashed")+
geom_vline(xintercept = 1.4, linetype = "dashed")+
theme(plot.caption = element_text(hjust = 0.5, face = "italic"))+
labs(caption = "ii. Distribution of creatinine with death event")+
annotate("text",label = "creatinine > 2.5 \n60% kemungkinan kematian", x = 5, y = 0.5)
gridExtra::grid.arrange(sc1,sc2)
vis1 <- ggplot(data, aes(x = Peristiwa_Kematian, fill = factor(anaemia)))+geom_bar(position = "fill")+theme_fivethirtyeight()+
scale_x_discrete(labels = c("Peristiwa \nKematian:Tidak","Peristiwa \nKematian:Ya"))+scale_fill_manual(values = c("#a8efe3", "#39d1b8"), name = "Anaemia",
labels = c("Tidak","Ya"))+labs(subtitle = "Anemia")
vis1
vis2<-ggplot(data, aes(x = Peristiwa_Kematian, fill = factor(diabetes)))+geom_bar(position = "fill")+theme_fivethirtyeight()+
scale_x_discrete(labels = c("Peristiwa \nKematian:Tidak","Peristiwa \nKematian:Ya"))+scale_fill_manual(values = c("#a8efe3", "#39d1b8"), name = "Diabetes", labels = c("No","Yes"))+labs(subtitle = "Diabetes")
vis2
vis3<-ggplot(data, aes(x = Peristiwa_Kematian, fill = factor(tekanan_darah_tinggi)))+geom_bar(position = "fill")+theme_fivethirtyeight()+
scale_x_discrete(labels = c("Peristiwa \nKematian:Tidak","Peristiwa \nKematian:Ya"))+scale_fill_manual(values = c("#a8efe3", "#39d1b8"), name = "tekanan darah tinggi", labels = c("No","Yes"))+labs(subtitle = "tekanan darah tinggi")
vis3
vis4<-ggplot(data, aes(x = Peristiwa_Kematian, fill = factor(Jenis_kelamin)))+geom_bar(position = "fill")+theme_fivethirtyeight()+
scale_x_discrete(labels = c("Peristiwa \nKematian:Tidak","Peristiwa \nKematian:Ya"))+scale_fill_manual(values = c("#a8efe3", "#39d1b8"), name = "Jenis Kelamin", labels = c("Perempuan","Laki-laki"))+labs(subtitle = "Jenis Kelamin")
vis4
vis5<-ggplot(data, aes(x = Peristiwa_Kematian, fill = factor(smoking)))+geom_bar(position = "fill")+theme_fivethirtyeight()+
scale_x_discrete(labels = c("Peristiwa \nKematian:Tidak","Peristiwa \nKematian:Ya"))+scale_fill_manual(values = c("#a8efe3", "#39d1b8"), name = "Smoking", labels = c("Tidak","Ya"))+labs(subtitle = "Smoking")
vis5
boxplot(umur ~ Peristiwa_Kematian, data = data, ylab = "Umur", frame = FALSE, col = "#8fb2e5")
boxplot(platelets ~ Peristiwa_Kematian, data = data, ylab = "platelets", frame = FALSE, col = "#c38fe5")
boxplot(ejection_fraction ~ Peristiwa_Kematian, data = data, ylab = "ejection_fraction", frame = FALSE, col = "#d35eb2")