library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'tidyr' was built under R version 4.5.3
## Warning: package 'readr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.3
## Warning: package 'forcats' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Ganti bagian load data dengan ini:
datars <- read_csv("D:/PSD/rsconnect/documents/hospital_dataset.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 700 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Nama, Tanggal_Lahir, Tensi, Suhu_Tubuh_Celcius, Penyakit
## dbl (3): Skin_Stiffness_N_per_mm, Microcirculation_PU, Peak_Plantar_Pressure...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(datars)
## Rows: 700
## Columns: 8
## $ Nama                      <chr> "Michael Anderson", "N/A", "Tan Wei Ming", "…
## $ Tanggal_Lahir             <chr> "01/04/1957", "20/09/1975", "12/04/1965", "1…
## $ Tensi                     <chr> "112/67", "140 / 91", "134/72", "120/79", "9…
## $ Skin_Stiffness_N_per_mm   <dbl> 0.69, 1.50, 0.76, 1.92, 0.81, 0.61, 1.04, 2.…
## $ Microcirculation_PU       <dbl> 42.0, 41.9, 26.3, NA, 25.5, 42.2, 2.0, 9.5, …
## $ Suhu_Tubuh_Celcius        <chr> "37.6", "36.5°C", "37.5", "37.0", "36.0", "…
## $ Penyakit                  <chr> "Non-Diabetic", "Non-Diabetic", "Non-Diabeti…
## $ Peak_Plantar_Pressure_kPa <dbl> 294.0, NA, 431.8, 577.5, 502.3, 201.4, 512.8…
summary(datars)
##      Nama           Tanggal_Lahir         Tensi          
##  Length:700         Length:700         Length:700        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##  Skin_Stiffness_N_per_mm Microcirculation_PU Suhu_Tubuh_Celcius
##  Min.   : -2.180         Min.   : -32.50     Length:700        
##  1st Qu.:  0.700         1st Qu.:  18.00     Class :character  
##  Median :  1.100         Median :  27.70     Mode  :character  
##  Mean   :  1.343         Mean   :  35.60                       
##  3rd Qu.:  1.590         3rd Qu.:  39.05                       
##  Max.   :150.000         Max.   :5000.00                       
##  NA's   :39              NA's   :52                            
##    Penyakit         Peak_Plantar_Pressure_kPa
##  Length:700         Min.   : -100.0          
##  Class :character   1st Qu.:  268.9          
##  Mode  :character   Median :  384.3          
##                     Mean   :  993.8          
##                     3rd Qu.:  508.9          
##                     Max.   :99999.0          
##                     NA's   :45
colSums(is.na(datars))
##                      Nama             Tanggal_Lahir                     Tensi 
##                        40                        44                        49 
##   Skin_Stiffness_N_per_mm       Microcirculation_PU        Suhu_Tubuh_Celcius 
##                        39                        52                        51 
##                  Penyakit Peak_Plantar_Pressure_kPa 
##                        46                        45
df_clean <- datars %>%
  drop_na() %>%
  distinct() %>%
  mutate(
    Suhu_Tubuh_Celcius = as.numeric(str_replace_all(Suhu_Tubuh_Celcius, "[^0-9.]", ""))
  ) %>%
  filter(str_detect(Tensi, "/")) %>%
  separate(
    Tensi,
    into = c("Sistolik", "Diastolik"),
    sep = "/",
    remove = TRUE
  ) %>%
  mutate(
    Sistolik = as.numeric(str_replace_all(Sistolik, "[^0-9]", "")),
    Diastolik = as.numeric(str_replace_all(Diastolik, "[^0-9]", ""))
  ) %>%
  drop_na()
colSums(is.na(df_clean))
##                      Nama             Tanggal_Lahir                  Sistolik 
##                         0                         0                         0 
##                 Diastolik   Skin_Stiffness_N_per_mm       Microcirculation_PU 
##                         0                         0                         0 
##        Suhu_Tubuh_Celcius                  Penyakit Peak_Plantar_Pressure_kPa 
##                         0                         0                         0
sum(duplicated(df_clean))
## [1] 2
df_clean <- df_clean %>%
  distinct()
sum(duplicated(df_clean))
## [1] 0
glimpse(df_clean)
## Rows: 411
## Columns: 9
## $ Nama                      <chr> "Michael Anderson", "Tan Wei Ming", "Kung Me…
## $ Tanggal_Lahir             <chr> "01/04/1957", "12/04/1965", "22/08/1985", "1…
## $ Sistolik                  <dbl> 112, 134, 99, 149, 108, 105, 128, 135, 106, …
## $ Diastolik                 <dbl> 67, 72, 77, 65, 67, 90, 62, 64, 67, 91, 83, …
## $ Skin_Stiffness_N_per_mm   <dbl> 0.69, 0.76, 0.81, 0.61, 2.24, 1.92, 1.07, 0.…
## $ Microcirculation_PU       <dbl> 42.0, 26.3, 25.5, 42.2, 9.5, 6.5, 20.0, 31.9…
## $ Suhu_Tubuh_Celcius        <dbl> 37.6, 37.5, 36.0, 36.8, 36.4, 37.1, 37.1, 36…
## $ Penyakit                  <chr> "Non-Diabetic", "Non-Diabetic", "Diabetic", …
## $ Peak_Plantar_Pressure_kPa <dbl> 294.0, 431.8, 502.3, 201.4, 327.7, 623.0, 51…
summary(df_clean)
##      Nama           Tanggal_Lahir         Sistolik       Diastolik     
##  Length:411         Length:411         Min.   : 78.0   Min.   : 46.00  
##  Class :character   Class :character   1st Qu.:112.0   1st Qu.: 71.00  
##  Mode  :character   Mode  :character   Median :122.0   Median : 78.00  
##                                        Mean   :121.8   Mean   : 78.18  
##                                        3rd Qu.:132.0   3rd Qu.: 85.00  
##                                        Max.   :164.0   Max.   :113.00  
##  Skin_Stiffness_N_per_mm Microcirculation_PU Suhu_Tubuh_Celcius
##  Min.   : -1.500         Min.   :-32.50      Min.   :35.60     
##  1st Qu.:  0.700         1st Qu.: 17.70      1st Qu.:36.50     
##  Median :  1.150         Median : 27.30      Median :36.80     
##  Mean   :  1.501         Mean   : 27.63      Mean   :36.93     
##  3rd Qu.:  1.610         3rd Qu.: 37.95      3rd Qu.:37.00     
##  Max.   :150.000         Max.   : 77.30      Max.   :99.90     
##    Penyakit         Peak_Plantar_Pressure_kPa
##  Length:411         Min.   : -100.0          
##  Class :character   1st Qu.:  272.4          
##  Mode  :character   Median :  384.3          
##                     Mean   :  871.0          
##                     3rd Qu.:  505.8          
##                     Max.   :99999.0