library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
datars <- read_csv("hospital_dataset.csv")
## Rows: 700 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Nama, Tanggal_Lahir, Tensi, Suhu_Tubuh_Celcius, Penyakit
## dbl (3): Skin_Stiffness_N_per_mm, Microcirculation_PU, Peak_Plantar_Pressure...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(datars)
## Rows: 700
## Columns: 8
## $ Nama <chr> "Michael Anderson", "N/A", "Tan Wei Ming", "…
## $ Tanggal_Lahir <chr> "01/04/1957", "20/09/1975", "12/04/1965", "1…
## $ Tensi <chr> "112/67", "140 / 91", "134/72", "120/79", "9…
## $ Skin_Stiffness_N_per_mm <dbl> 0.69, 1.50, 0.76, 1.92, 0.81, 0.61, 1.04, 2.…
## $ Microcirculation_PU <dbl> 42.0, 41.9, 26.3, NA, 25.5, 42.2, 2.0, 9.5, …
## $ Suhu_Tubuh_Celcius <chr> "37.6", "36.5°C", "37.5", "37.0", "36.0", "3…
## $ Penyakit <chr> "Non-Diabetic", "Non-Diabetic", "Non-Diabeti…
## $ Peak_Plantar_Pressure_kPa <dbl> 294.0, NA, 431.8, 577.5, 502.3, 201.4, 512.8…
summary(datars)
## Nama Tanggal_Lahir Tensi
## Length:700 Length:700 Length:700
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Skin_Stiffness_N_per_mm Microcirculation_PU Suhu_Tubuh_Celcius
## Min. : -2.180 Min. : -32.50 Length:700
## 1st Qu.: 0.700 1st Qu.: 18.00 Class :character
## Median : 1.100 Median : 27.70 Mode :character
## Mean : 1.342 Mean : 35.58
## 3rd Qu.: 1.595 3rd Qu.: 39.00
## Max. :150.000 Max. :5000.00
## NA's :37 NA's :50
## Penyakit Peak_Plantar_Pressure_kPa
## Length:700 Min. : -100.0
## Class :character 1st Qu.: 268.6
## Mode :character Median : 384.3
## Mean : 991.9
## 3rd Qu.: 508.5
## Max. :99999.0
## NA's :43
colSums(is.na(datars))
## Nama Tanggal_Lahir Tensi
## 40 42 47
## Skin_Stiffness_N_per_mm Microcirculation_PU Suhu_Tubuh_Celcius
## 37 50 49
## Penyakit Peak_Plantar_Pressure_kPa
## 45 43
data_clean <- datars%>% drop_na()
data_clean
## # A tibble: 423 × 8
## Nama Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Michael Ander… 01/04/1957 112/… 0.69 42
## 2 Tan Wei Ming 12/04/1965 134/… 0.76 26.3
## 3 Kung Mei-Lin 22/08/1985 99/77 0.81 25.5
## 4 Ho Chuan-Wei 10/08/1962 149/… 0.61 42.2
## 5 Betty Lewis 02/08/1982 108/… 2.24 9.5
## 6 Tan Wei Ming 03/10/1946 105/… 1.92 6.5
## 7 N/A 02/11/1957 128/… 1.07 20
## 8 Lee Siew Eng 04/07/1964 135/… 0.42 31.9
## 9 John Smith 1967 106/… 0.83 49.5
## 10 Karen Thompson 08/02/1988 121/… 0.71 40.8
## # ℹ 413 more rows
## # ℹ abbreviated name: ¹Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## # Peak_Plantar_Pressure_kPa <dbl>
df_clean <- datars %>% drop_na(Nama)
df_clean
## # A tibble: 660 × 8
## Nama Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Michael Ander… 01/04/1957 112/… 0.69 42
## 2 N/A 20/09/1975 140 … 1.5 41.9
## 3 Tan Wei Ming 12/04/1965 134/… 0.76 26.3
## 4 Shen Yi-Ching 11/09/1980 120/… 1.92 NA
## 5 Kung Mei-Lin 22/08/1985 99/77 0.81 25.5
## 6 Ho Chuan-Wei 10/08/1962 149/… 0.61 42.2
## 7 Betty Lewis 02/08/1982 108/… 2.24 9.5
## 8 Joseph Garcia 06/12/1982 <NA> 0.18 24.8
## 9 Ong Lay Kheng 26/02/1951 128/… NA 40.9
## 10 Lin Mei-Ling 16/02/1944 113/… 0.25 44
## # ℹ 650 more rows
## # ℹ abbreviated name: ¹Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## # Peak_Plantar_Pressure_kPa <dbl>
df_isimedian <- datars %>% mutate(Microcirculation_PU=replace_na(Microcirculation_PU, median(Microcirculation_PU, na.rm = TRUE)))
df_isimedian
## # A tibble: 700 × 8
## Nama Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Michael Ander… 01/04/1957 112/… 0.69 42
## 2 N/A 20/09/1975 140 … 1.5 41.9
## 3 Tan Wei Ming 12/04/1965 134/… 0.76 26.3
## 4 Shen Yi-Ching 11/09/1980 120/… 1.92 27.7
## 5 Kung Mei-Lin 22/08/1985 99/77 0.81 25.5
## 6 Ho Chuan-Wei 10/08/1962 149/… 0.61 42.2
## 7 <NA> 18/01/1994 110/… 1.04 2
## 8 Betty Lewis 02/08/1982 108/… 2.24 9.5
## 9 Joseph Garcia 06/12/1982 <NA> 0.18 24.8
## 10 Ong Lay Kheng 26/02/1951 128/… NA 40.9
## # ℹ 690 more rows
## # ℹ abbreviated name: ¹Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## # Peak_Plantar_Pressure_kPa <dbl>
df_duplicate <- datars %>% distinct()
df_duplicate
## # A tibble: 698 × 8
## Nama Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Michael Ander… 01/04/1957 112/… 0.69 42
## 2 N/A 20/09/1975 140 … 1.5 41.9
## 3 Tan Wei Ming 12/04/1965 134/… 0.76 26.3
## 4 Shen Yi-Ching 11/09/1980 120/… 1.92 NA
## 5 Kung Mei-Lin 22/08/1985 99/77 0.81 25.5
## 6 Ho Chuan-Wei 10/08/1962 149/… 0.61 42.2
## 7 <NA> 18/01/1994 110/… 1.04 2
## 8 Betty Lewis 02/08/1982 108/… 2.24 9.5
## 9 Joseph Garcia 06/12/1982 <NA> 0.18 24.8
## 10 Ong Lay Kheng 26/02/1951 128/… NA 40.9
## # ℹ 688 more rows
## # ℹ abbreviated name: ¹Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## # Peak_Plantar_Pressure_kPa <dbl>