library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'tidyr' was built under R version 4.5.3
## Warning: package 'readr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.3
## Warning: package 'forcats' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Ganti bagian load data dengan ini:
datars <- read_csv("D:/PSD/rsconnect/documents/hospital_dataset.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 700 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Nama, Tanggal_Lahir, Tensi, Suhu_Tubuh_Celcius, Penyakit
## dbl (3): Skin_Stiffness_N_per_mm, Microcirculation_PU, Peak_Plantar_Pressure...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(datars)
## Rows: 700
## Columns: 8
## $ Nama <chr> "Michael Anderson", "N/A", "Tan Wei Ming", "…
## $ Tanggal_Lahir <chr> "01/04/1957", "20/09/1975", "12/04/1965", "1…
## $ Tensi <chr> "112/67", "140 / 91", "134/72", "120/79", "9…
## $ Skin_Stiffness_N_per_mm <dbl> 0.69, 1.50, 0.76, 1.92, 0.81, 0.61, 1.04, 2.…
## $ Microcirculation_PU <dbl> 42.0, 41.9, 26.3, NA, 25.5, 42.2, 2.0, 9.5, …
## $ Suhu_Tubuh_Celcius <chr> "37.6", "36.5°C", "37.5", "37.0", "36.0", "…
## $ Penyakit <chr> "Non-Diabetic", "Non-Diabetic", "Non-Diabeti…
## $ Peak_Plantar_Pressure_kPa <dbl> 294.0, NA, 431.8, 577.5, 502.3, 201.4, 512.8…
summary(datars)
## Nama Tanggal_Lahir Tensi
## Length:700 Length:700 Length:700
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Skin_Stiffness_N_per_mm Microcirculation_PU Suhu_Tubuh_Celcius
## Min. : -2.180 Min. : -32.50 Length:700
## 1st Qu.: 0.700 1st Qu.: 18.00 Class :character
## Median : 1.100 Median : 27.70 Mode :character
## Mean : 1.343 Mean : 35.60
## 3rd Qu.: 1.590 3rd Qu.: 39.05
## Max. :150.000 Max. :5000.00
## NA's :39 NA's :52
## Penyakit Peak_Plantar_Pressure_kPa
## Length:700 Min. : -100.0
## Class :character 1st Qu.: 268.9
## Mode :character Median : 384.3
## Mean : 993.8
## 3rd Qu.: 508.9
## Max. :99999.0
## NA's :45
colSums(is.na(datars))
## Nama Tanggal_Lahir Tensi
## 40 44 49
## Skin_Stiffness_N_per_mm Microcirculation_PU Suhu_Tubuh_Celcius
## 39 52 51
## Penyakit Peak_Plantar_Pressure_kPa
## 46 45
df_clean <- datars %>%
drop_na() %>%
distinct() %>%
mutate(
Suhu_Tubuh_Celcius = as.numeric(str_replace_all(Suhu_Tubuh_Celcius, "[^0-9.]", ""))
) %>%
filter(str_detect(Tensi, "/")) %>%
separate(
Tensi,
into = c("Sistolik", "Diastolik"),
sep = "/",
remove = TRUE
) %>%
mutate(
Sistolik = as.numeric(str_replace_all(Sistolik, "[^0-9]", "")),
Diastolik = as.numeric(str_replace_all(Diastolik, "[^0-9]", ""))
) %>%
drop_na()
colSums(is.na(df_clean))
## Nama Tanggal_Lahir Sistolik
## 0 0 0
## Diastolik Skin_Stiffness_N_per_mm Microcirculation_PU
## 0 0 0
## Suhu_Tubuh_Celcius Penyakit Peak_Plantar_Pressure_kPa
## 0 0 0
sum(duplicated(df_clean))
## [1] 2
df_clean <- df_clean %>%
distinct()
sum(duplicated(df_clean))
## [1] 0
glimpse(df_clean)
## Rows: 411
## Columns: 9
## $ Nama <chr> "Michael Anderson", "Tan Wei Ming", "Kung Me…
## $ Tanggal_Lahir <chr> "01/04/1957", "12/04/1965", "22/08/1985", "1…
## $ Sistolik <dbl> 112, 134, 99, 149, 108, 105, 128, 135, 106, …
## $ Diastolik <dbl> 67, 72, 77, 65, 67, 90, 62, 64, 67, 91, 83, …
## $ Skin_Stiffness_N_per_mm <dbl> 0.69, 0.76, 0.81, 0.61, 2.24, 1.92, 1.07, 0.…
## $ Microcirculation_PU <dbl> 42.0, 26.3, 25.5, 42.2, 9.5, 6.5, 20.0, 31.9…
## $ Suhu_Tubuh_Celcius <dbl> 37.6, 37.5, 36.0, 36.8, 36.4, 37.1, 37.1, 36…
## $ Penyakit <chr> "Non-Diabetic", "Non-Diabetic", "Diabetic", …
## $ Peak_Plantar_Pressure_kPa <dbl> 294.0, 431.8, 502.3, 201.4, 327.7, 623.0, 51…
summary(df_clean)
## Nama Tanggal_Lahir Sistolik Diastolik
## Length:411 Length:411 Min. : 78.0 Min. : 46.00
## Class :character Class :character 1st Qu.:112.0 1st Qu.: 71.00
## Mode :character Mode :character Median :122.0 Median : 78.00
## Mean :121.8 Mean : 78.18
## 3rd Qu.:132.0 3rd Qu.: 85.00
## Max. :164.0 Max. :113.00
## Skin_Stiffness_N_per_mm Microcirculation_PU Suhu_Tubuh_Celcius
## Min. : -1.500 Min. :-32.50 Min. :35.60
## 1st Qu.: 0.700 1st Qu.: 17.70 1st Qu.:36.50
## Median : 1.150 Median : 27.30 Median :36.80
## Mean : 1.501 Mean : 27.63 Mean :36.93
## 3rd Qu.: 1.610 3rd Qu.: 37.95 3rd Qu.:37.00
## Max. :150.000 Max. : 77.30 Max. :99.90
## Penyakit Peak_Plantar_Pressure_kPa
## Length:411 Min. : -100.0
## Class :character 1st Qu.: 272.4
## Mode :character Median : 384.3
## Mean : 871.0
## 3rd Qu.: 505.8
## Max. :99999.0