library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <- read_csv("hospital.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 700 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Nama, Tanggal_Lahir, Tensi, Suhu_Tubuh_Celcius, Penyakit
## dbl (3): Skin_Stiffness_N_per_mm, Microcirculation_PU, Peak_Plantar_Pressure...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 8
##   Nama            Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
##   <chr>           <chr>         <chr>                  <dbl>               <dbl>
## 1 Michael Anders… 01/04/1957    112/…                   0.69                42  
## 2 N/A             20/09/1975    140 …                   1.5                 41.9
## 3 Tan Wei Ming    12/04/1965    134/…                   0.76                26.3
## 4 Shen Yi-Ching   11/09/1980    120/…                   1.92                NA  
## 5 Kung Mei-Lin    22/08/1985    99/77                   0.81                25.5
## 6 Ho Chuan-Wei    10/08/1962    149/…                   0.61                42.2
## # ℹ abbreviated name: ¹​Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## #   Peak_Plantar_Pressure_kPa <dbl>
str(data)
## spc_tbl_ [700 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Nama                     : chr [1:700] "Michael Anderson" "N/A" "Tan Wei Ming" "Shen Yi-Ching" ...
##  $ Tanggal_Lahir            : chr [1:700] "01/04/1957" "20/09/1975" "12/04/1965" "11/09/1980" ...
##  $ Tensi                    : chr [1:700] "112/67" "140 / 91" "134/72" "120/79" ...
##  $ Skin_Stiffness_N_per_mm  : num [1:700] 0.69 1.5 0.76 1.92 0.81 0.61 1.04 2.24 0.18 NA ...
##  $ Microcirculation_PU      : num [1:700] 42 41.9 26.3 NA 25.5 42.2 2 9.5 24.8 40.9 ...
##  $ Suhu_Tubuh_Celcius       : chr [1:700] "37.6" "36.5°C" "37.5" "37.0" ...
##  $ Penyakit                 : chr [1:700] "Non-Diabetic" "Non-Diabetic" "Non-Diabetic" "Diabetic" ...
##  $ Peak_Plantar_Pressure_kPa: num [1:700] 294 NA 432 578 502 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Nama = col_character(),
##   ..   Tanggal_Lahir = col_character(),
##   ..   Tensi = col_character(),
##   ..   Skin_Stiffness_N_per_mm = col_double(),
##   ..   Microcirculation_PU = col_double(),
##   ..   Suhu_Tubuh_Celcius = col_character(),
##   ..   Penyakit = col_character(),
##   ..   Peak_Plantar_Pressure_kPa = col_double()
##   .. )
##  - attr(*, "problems")=<pointer: 0x00000208ee185ee0>
colSums(is.na(data))
##                      Nama             Tanggal_Lahir                     Tensi 
##                        40                        44                        49 
##   Skin_Stiffness_N_per_mm       Microcirculation_PU        Suhu_Tubuh_Celcius 
##                        39                        52                        51 
##                  Penyakit Peak_Plantar_Pressure_kPa 
##                        46                        45
data <- data %>%
  filter(!is.na(Nama))
data$Tensi <- gsub(" ", "", data$Tensi)
data$Suhu_Tubuh_Celcius <- gsub("°C", "",
                                data$Suhu_Tubuh_Celcius)
data$Suhu_Tubuh_Celcius <- as.numeric(
  data$Suhu_Tubuh_Celcius)
## Warning: NAs introduced by coercion
data$Penyakit <- tolower(data$Penyakit)
data$Penyakit <- trimws(data$Penyakit)
data$Tanggal_Lahir <- as.Date(
  data$Tanggal_Lahir,
  format="%d/%m/%Y"
)
data <- distinct(data)
data <- data %>%
  filter(Suhu_Tubuh_Celcius >= 35 &
         Suhu_Tubuh_Celcius <= 42)
summary(data)
##         Nama     Tanggal_Lahir              Tensi     Skin_Stiffness_N_per_mm
##  Length   :600   Min.   :1940-03-28   Length   :600   Min.   : -2.180        
##  N.unique : 58   1st Qu.:1957-02-05   N.unique :483   1st Qu.:  0.700        
##  N.blank  :  0   Median :1973-12-31   N.blank  :  0   Median :  1.110        
##  Min.nchar:  1   Mean   :1973-04-20   Min.nchar:  3   Mean   :  1.387        
##  Max.nchar: 16   3rd Qu.:1989-07-11   Max.nchar: 13   3rd Qu.:  1.610        
##                  Max.   :2005-11-27   NAs      : 39   Max.   :150.000        
##                  NAs    :51                           NAs    :34             
##  Microcirculation_PU Suhu_Tubuh_Celcius      Penyakit  
##  Min.   : -32.5      Min.   :35.50      Length   :600  
##  1st Qu.:  18.0      1st Qu.:36.50      N.unique : 11  
##  Median :  27.8      Median :36.80      N.blank  :  0  
##  Mean   :  36.9      Mean   :36.78      Min.nchar:  1  
##  3rd Qu.:  39.1      3rd Qu.:37.10      Max.nchar: 12  
##  Max.   :5000.0      Max.   :38.00      NAs      : 35  
##  NAs    :40                                            
##  Peak_Plantar_Pressure_kPa
##  Min.   : -100.0          
##  1st Qu.:  268.8          
##  Median :  379.4          
##  Mean   :  917.2          
##  3rd Qu.:  508.5          
##  Max.   :99999.0          
##  NAs    :38