library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
datars <- read_csv("hospital_dataset.csv")
## Rows: 700 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Nama, Tanggal_Lahir, Tensi, Suhu_Tubuh_Celcius, Penyakit
## dbl (3): Skin_Stiffness_N_per_mm, Microcirculation_PU, Peak_Plantar_Pressure...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(datars)
## Rows: 700
## Columns: 8
## $ Nama                      <chr> "Michael Anderson", "N/A", "Tan Wei Ming", "…
## $ Tanggal_Lahir             <chr> "01/04/1957", "20/09/1975", "12/04/1965", "1…
## $ Tensi                     <chr> "112/67", "140 / 91", "134/72", "120/79", "9…
## $ Skin_Stiffness_N_per_mm   <dbl> 0.69, 1.50, 0.76, 1.92, 0.81, 0.61, 1.04, 2.…
## $ Microcirculation_PU       <dbl> 42.0, 41.9, 26.3, NA, 25.5, 42.2, 2.0, 9.5, …
## $ Suhu_Tubuh_Celcius        <chr> "37.6", "36.5°C", "37.5", "37.0", "36.0", "3…
## $ Penyakit                  <chr> "Non-Diabetic", "Non-Diabetic", "Non-Diabeti…
## $ Peak_Plantar_Pressure_kPa <dbl> 294.0, NA, 431.8, 577.5, 502.3, 201.4, 512.8…
summary(datars)
##      Nama           Tanggal_Lahir         Tensi          
##  Length:700         Length:700         Length:700        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##  Skin_Stiffness_N_per_mm Microcirculation_PU Suhu_Tubuh_Celcius
##  Min.   : -2.180         Min.   : -32.50     Length:700        
##  1st Qu.:  0.700         1st Qu.:  18.00     Class :character  
##  Median :  1.100         Median :  27.70     Mode  :character  
##  Mean   :  1.342         Mean   :  35.58                       
##  3rd Qu.:  1.595         3rd Qu.:  39.00                       
##  Max.   :150.000         Max.   :5000.00                       
##  NA's   :37              NA's   :50                            
##    Penyakit         Peak_Plantar_Pressure_kPa
##  Length:700         Min.   : -100.0          
##  Class :character   1st Qu.:  268.6          
##  Mode  :character   Median :  384.3          
##                     Mean   :  991.9          
##                     3rd Qu.:  508.5          
##                     Max.   :99999.0          
##                     NA's   :43
colSums(is.na(datars))
##                      Nama             Tanggal_Lahir                     Tensi 
##                        40                        42                        47 
##   Skin_Stiffness_N_per_mm       Microcirculation_PU        Suhu_Tubuh_Celcius 
##                        37                        50                        49 
##                  Penyakit Peak_Plantar_Pressure_kPa 
##                        45                        43
data_clean <- datars%>% drop_na()
data_clean
## # A tibble: 423 × 8
##    Nama           Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
##    <chr>          <chr>         <chr>                  <dbl>               <dbl>
##  1 Michael Ander… 01/04/1957    112/…                   0.69                42  
##  2 Tan Wei Ming   12/04/1965    134/…                   0.76                26.3
##  3 Kung Mei-Lin   22/08/1985    99/77                   0.81                25.5
##  4 Ho Chuan-Wei   10/08/1962    149/…                   0.61                42.2
##  5 Betty Lewis    02/08/1982    108/…                   2.24                 9.5
##  6 Tan Wei Ming   03/10/1946    105/…                   1.92                 6.5
##  7 N/A            02/11/1957    128/…                   1.07                20  
##  8 Lee Siew Eng   04/07/1964    135/…                   0.42                31.9
##  9 John Smith     1967          106/…                   0.83                49.5
## 10 Karen Thompson 08/02/1988    121/…                   0.71                40.8
## # ℹ 413 more rows
## # ℹ abbreviated name: ¹​Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## #   Peak_Plantar_Pressure_kPa <dbl>
df_clean <- datars %>% drop_na(Nama)
df_clean
## # A tibble: 660 × 8
##    Nama           Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
##    <chr>          <chr>         <chr>                  <dbl>               <dbl>
##  1 Michael Ander… 01/04/1957    112/…                   0.69                42  
##  2 N/A            20/09/1975    140 …                   1.5                 41.9
##  3 Tan Wei Ming   12/04/1965    134/…                   0.76                26.3
##  4 Shen Yi-Ching  11/09/1980    120/…                   1.92                NA  
##  5 Kung Mei-Lin   22/08/1985    99/77                   0.81                25.5
##  6 Ho Chuan-Wei   10/08/1962    149/…                   0.61                42.2
##  7 Betty Lewis    02/08/1982    108/…                   2.24                 9.5
##  8 Joseph Garcia  06/12/1982    <NA>                    0.18                24.8
##  9 Ong Lay Kheng  26/02/1951    128/…                  NA                   40.9
## 10 Lin Mei-Ling   16/02/1944    113/…                   0.25                44  
## # ℹ 650 more rows
## # ℹ abbreviated name: ¹​Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## #   Peak_Plantar_Pressure_kPa <dbl>
df_isimedian <- datars %>% mutate(Microcirculation_PU=replace_na(Microcirculation_PU, median(Microcirculation_PU, na.rm = TRUE)))
df_isimedian
## # A tibble: 700 × 8
##    Nama           Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
##    <chr>          <chr>         <chr>                  <dbl>               <dbl>
##  1 Michael Ander… 01/04/1957    112/…                   0.69                42  
##  2 N/A            20/09/1975    140 …                   1.5                 41.9
##  3 Tan Wei Ming   12/04/1965    134/…                   0.76                26.3
##  4 Shen Yi-Ching  11/09/1980    120/…                   1.92                27.7
##  5 Kung Mei-Lin   22/08/1985    99/77                   0.81                25.5
##  6 Ho Chuan-Wei   10/08/1962    149/…                   0.61                42.2
##  7 <NA>           18/01/1994    110/…                   1.04                 2  
##  8 Betty Lewis    02/08/1982    108/…                   2.24                 9.5
##  9 Joseph Garcia  06/12/1982    <NA>                    0.18                24.8
## 10 Ong Lay Kheng  26/02/1951    128/…                  NA                   40.9
## # ℹ 690 more rows
## # ℹ abbreviated name: ¹​Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## #   Peak_Plantar_Pressure_kPa <dbl>
df_duplicate <- datars %>% distinct()
df_duplicate
## # A tibble: 698 × 8
##    Nama           Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
##    <chr>          <chr>         <chr>                  <dbl>               <dbl>
##  1 Michael Ander… 01/04/1957    112/…                   0.69                42  
##  2 N/A            20/09/1975    140 …                   1.5                 41.9
##  3 Tan Wei Ming   12/04/1965    134/…                   0.76                26.3
##  4 Shen Yi-Ching  11/09/1980    120/…                   1.92                NA  
##  5 Kung Mei-Lin   22/08/1985    99/77                   0.81                25.5
##  6 Ho Chuan-Wei   10/08/1962    149/…                   0.61                42.2
##  7 <NA>           18/01/1994    110/…                   1.04                 2  
##  8 Betty Lewis    02/08/1982    108/…                   2.24                 9.5
##  9 Joseph Garcia  06/12/1982    <NA>                    0.18                24.8
## 10 Ong Lay Kheng  26/02/1951    128/…                  NA                   40.9
## # ℹ 688 more rows
## # ℹ abbreviated name: ¹​Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## #   Peak_Plantar_Pressure_kPa <dbl>