library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
drs <- read_csv("hospital_dataset.csv")
## Rows: 700 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Nama, Tanggal_Lahir, Tensi, Suhu_Tubuh_Celcius, Penyakit
## dbl (3): Skin_Stiffness_N_per_mm, Microcirculation_PU, Peak_Plantar_Pressure...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
drs
## # A tibble: 700 × 8
## Nama Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Michael Ander… 01/04/1957 112/… 0.69 42
## 2 N/A 20/09/1975 140 … 1.5 41.9
## 3 Tan Wei Ming 12/04/1965 134/… 0.76 26.3
## 4 Shen Yi-Ching 11/09/1980 120/… 1.92 NA
## 5 Kung Mei-Lin 22/08/1985 99/77 0.81 25.5
## 6 Ho Chuan-Wei 10/08/1962 149/… 0.61 42.2
## 7 <NA> 18/01/1994 110/… 1.04 2
## 8 Betty Lewis 02/08/1982 108/… 2.24 9.5
## 9 Joseph Garcia 06/12/1982 <NA> 0.18 24.8
## 10 Ong Lay Kheng 26/02/1951 128/… NA 40.9
## # ℹ 690 more rows
## # ℹ abbreviated name: ¹Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## # Peak_Plantar_Pressure_kPa <dbl>
drs <- drs %>%
rename_with(tolower) %>%
distinct()
colnames(drs)
## [1] "nama" "tanggal_lahir"
## [3] "tensi" "skin_stiffness_n_per_mm"
## [5] "microcirculation_pu" "suhu_tubuh_celcius"
## [7] "penyakit" "peak_plantar_pressure_kpa"
drs <- drs %>%
mutate(suhu_tubuh_celcius = str_replace(suhu_tubuh_celcius, "°C", ""),
suhu_tubuh_celcius = as.numeric(suhu_tubuh_celcius))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `suhu_tubuh_celcius = as.numeric(suhu_tubuh_celcius)`.
## Caused by warning:
## ! NAs introduced by coercion
glimpse(drs)
## Rows: 698
## Columns: 8
## $ nama <chr> "Michael Anderson", "N/A", "Tan Wei Ming", "…
## $ tanggal_lahir <chr> "01/04/1957", "20/09/1975", "12/04/1965", "1…
## $ tensi <chr> "112/67", "140 / 91", "134/72", "120/79", "9…
## $ skin_stiffness_n_per_mm <dbl> 0.69, 1.50, 0.76, 1.92, 0.81, 0.61, 1.04, 2.…
## $ microcirculation_pu <dbl> 42.0, 41.9, 26.3, NA, 25.5, 42.2, 2.0, 9.5, …
## $ suhu_tubuh_celcius <dbl> 37.6, 36.5, 37.5, 37.0, 36.0, 36.8, 36.3, 36…
## $ penyakit <chr> "Non-Diabetic", "Non-Diabetic", "Non-Diabeti…
## $ peak_plantar_pressure_kpa <dbl> 294.0, NA, 431.8, 577.5, 502.3, 201.4, 512.8…
drs <- read_csv("hospital_dataset.csv")
## Rows: 700 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Nama, Tanggal_Lahir, Tensi, Suhu_Tubuh_Celcius, Penyakit
## dbl (3): Skin_Stiffness_N_per_mm, Microcirculation_PU, Peak_Plantar_Pressure...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
drs <- drs %>%
setNames(tolower(names(.))) %>%
mutate(tensi = str_replace_all(tensi, "[-|]", "/"),
tensi = str_replace_all(tensi, "Sys:", ""),
tensi = str_replace_all(tensi, " Dia:", "/"),
tensi = str_replace_all(tensi, " ", "")) %>%
separate(tensi, into = c("sistolik", "diastolik"), sep = "/", convert = TRUE) %>%
mutate(suhu_tubuh_celcius = str_replace(suhu_tubuh_celcius, "°C", ""),
suhu_tubuh_celcius = as.numeric(suhu_tubuh_celcius))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 2 rows [78,
## 440].
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `suhu_tubuh_celcius = as.numeric(suhu_tubuh_celcius)`.
## Caused by warning:
## ! NAs introduced by coercion
drs
## # A tibble: 700 × 9
## nama tanggal_lahir sistolik diastolik skin_stiffness_n_per_mm
## <chr> <chr> <chr> <chr> <dbl>
## 1 Michael Anderson 01/04/1957 112 67 0.69
## 2 N/A 20/09/1975 140 91 1.5
## 3 Tan Wei Ming 12/04/1965 134 72 0.76
## 4 Shen Yi-Ching 11/09/1980 120 79 1.92
## 5 Kung Mei-Lin 22/08/1985 99 77 0.81
## 6 Ho Chuan-Wei 10/08/1962 149 65 0.61
## 7 <NA> 18/01/1994 110 71 1.04
## 8 Betty Lewis 02/08/1982 108 67 2.24
## 9 Joseph Garcia 06/12/1982 <NA> <NA> 0.18
## 10 Ong Lay Kheng 26/02/1951 128 78 NA
## # ℹ 690 more rows
## # ℹ 4 more variables: microcirculation_pu <dbl>, suhu_tubuh_celcius <dbl>,
## # penyakit <chr>, peak_plantar_pressure_kpa <dbl>
drs <- drs %>%
rename_with(~tolower(gsub("[[:punct:] ]+", "_", .x))) %>%
filter(
sistolik > 0,
diastolik > 0,
peak_plantar_pressure_kpa < 5000,
skin_stiffness_n_per_mm >= 0,
suhu_tubuh_celcius > 30 & suhu_tubuh_celcius < 45,
!is.na(nama)
)
View(drs)
summary(drs)
## nama tanggal_lahir sistolik diastolik
## Length:490 Length:490 Length:490 Length:490
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## skin_stiffness_n_per_mm microcirculation_pu suhu_tubuh_celcius
## Min. : 0.100 Min. :-32.50 Min. :35.50
## 1st Qu.: 0.710 1st Qu.: 17.70 1st Qu.:36.50
## Median : 1.145 Median : 27.70 Median :36.80
## Mean : 1.460 Mean : 27.81 Mean :36.79
## 3rd Qu.: 1.610 3rd Qu.: 38.35 3rd Qu.:37.00
## Max. :150.000 Max. : 77.30 Max. :42.50
## NA's :31
## penyakit peak_plantar_pressure_kpa
## Length:490 Min. :-100.0
## Class :character 1st Qu.: 272.0
## Mode :character Median : 386.4
## Mean : 387.0
## 3rd Qu.: 504.1
## Max. : 715.4
##
summary(drs)
## nama tanggal_lahir sistolik diastolik
## Length:490 Length:490 Length:490 Length:490
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## skin_stiffness_n_per_mm microcirculation_pu suhu_tubuh_celcius
## Min. : 0.100 Min. :-32.50 Min. :35.50
## 1st Qu.: 0.710 1st Qu.: 17.70 1st Qu.:36.50
## Median : 1.145 Median : 27.70 Median :36.80
## Mean : 1.460 Mean : 27.81 Mean :36.79
## 3rd Qu.: 1.610 3rd Qu.: 38.35 3rd Qu.:37.00
## Max. :150.000 Max. : 77.30 Max. :42.50
## NA's :31
## penyakit peak_plantar_pressure_kpa
## Length:490 Min. :-100.0
## Class :character 1st Qu.: 272.0
## Mode :character Median : 386.4
## Mean : 387.0
## 3rd Qu.: 504.1
## Max. : 715.4
##
drs <- drs %>%
mutate(sistolik = as.numeric(sistolik))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `sistolik = as.numeric(sistolik)`.
## Caused by warning:
## ! NAs introduced by coercion
class(drs$sistolik)
## [1] "numeric"
# Jalankan ggplot-nya sekarang
ggplot(drs, aes(x = sistolik)) +
geom_histogram(bins = 20, fill = "yellow", color = "black") +
theme_minimal() +
labs(title = "Distribusi Tekanan Darah Sistolik",
x = "Sistolik (mmHg)",
y = "Frekuensi")
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_bin()`).

drs <- drs %>%
drop_na(sistolik, diastolik)
summary(drs)
## nama tanggal_lahir sistolik diastolik
## Length:487 Length:487 Min. : 78.0 Length:487
## Class :character Class :character 1st Qu.:112.0 Class :character
## Mode :character Mode :character Median :122.0 Mode :character
## Mean :121.6
## 3rd Qu.:131.0
## Max. :164.0
##
## skin_stiffness_n_per_mm microcirculation_pu suhu_tubuh_celcius
## Min. : 0.100 Min. :-32.50 Min. :35.50
## 1st Qu.: 0.720 1st Qu.: 17.70 1st Qu.:36.50
## Median : 1.150 Median : 27.40 Median :36.80
## Mean : 1.467 Mean : 27.72 Mean :36.79
## 3rd Qu.: 1.610 3rd Qu.: 37.92 3rd Qu.:37.00
## Max. :150.000 Max. : 77.30 Max. :42.50
## NA's :31
## penyakit peak_plantar_pressure_kpa
## Length:487 Min. :-100.0
## Class :character 1st Qu.: 274.6
## Mode :character Median : 386.7
## Mean : 388.4
## 3rd Qu.: 504.8
## Max. : 715.4
##