library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
drs <- read_csv("hospital_dataset.csv")
## Rows: 700 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Nama, Tanggal_Lahir, Tensi, Suhu_Tubuh_Celcius, Penyakit
## dbl (3): Skin_Stiffness_N_per_mm, Microcirculation_PU, Peak_Plantar_Pressure...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
drs
## # A tibble: 700 × 8
##    Nama           Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
##    <chr>          <chr>         <chr>                  <dbl>               <dbl>
##  1 Michael Ander… 01/04/1957    112/…                   0.69                42  
##  2 N/A            20/09/1975    140 …                   1.5                 41.9
##  3 Tan Wei Ming   12/04/1965    134/…                   0.76                26.3
##  4 Shen Yi-Ching  11/09/1980    120/…                   1.92                NA  
##  5 Kung Mei-Lin   22/08/1985    99/77                   0.81                25.5
##  6 Ho Chuan-Wei   10/08/1962    149/…                   0.61                42.2
##  7 <NA>           18/01/1994    110/…                   1.04                 2  
##  8 Betty Lewis    02/08/1982    108/…                   2.24                 9.5
##  9 Joseph Garcia  06/12/1982    <NA>                    0.18                24.8
## 10 Ong Lay Kheng  26/02/1951    128/…                  NA                   40.9
## # ℹ 690 more rows
## # ℹ abbreviated name: ¹​Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## #   Peak_Plantar_Pressure_kPa <dbl>
drs <- drs %>%                  
  rename_with(tolower) %>% 
  distinct()

colnames(drs)
## [1] "nama"                      "tanggal_lahir"            
## [3] "tensi"                     "skin_stiffness_n_per_mm"  
## [5] "microcirculation_pu"       "suhu_tubuh_celcius"       
## [7] "penyakit"                  "peak_plantar_pressure_kpa"
drs <- drs %>%
  mutate(suhu_tubuh_celcius = str_replace(suhu_tubuh_celcius, "°C", ""),
         suhu_tubuh_celcius = as.numeric(suhu_tubuh_celcius))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `suhu_tubuh_celcius = as.numeric(suhu_tubuh_celcius)`.
## Caused by warning:
## ! NAs introduced by coercion
glimpse(drs)
## Rows: 698
## Columns: 8
## $ nama                      <chr> "Michael Anderson", "N/A", "Tan Wei Ming", "…
## $ tanggal_lahir             <chr> "01/04/1957", "20/09/1975", "12/04/1965", "1…
## $ tensi                     <chr> "112/67", "140 / 91", "134/72", "120/79", "9…
## $ skin_stiffness_n_per_mm   <dbl> 0.69, 1.50, 0.76, 1.92, 0.81, 0.61, 1.04, 2.…
## $ microcirculation_pu       <dbl> 42.0, 41.9, 26.3, NA, 25.5, 42.2, 2.0, 9.5, …
## $ suhu_tubuh_celcius        <dbl> 37.6, 36.5, 37.5, 37.0, 36.0, 36.8, 36.3, 36…
## $ penyakit                  <chr> "Non-Diabetic", "Non-Diabetic", "Non-Diabeti…
## $ peak_plantar_pressure_kpa <dbl> 294.0, NA, 431.8, 577.5, 502.3, 201.4, 512.8…
drs <- read_csv("hospital_dataset.csv")
## Rows: 700 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Nama, Tanggal_Lahir, Tensi, Suhu_Tubuh_Celcius, Penyakit
## dbl (3): Skin_Stiffness_N_per_mm, Microcirculation_PU, Peak_Plantar_Pressure...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
drs <- drs %>%
  
  setNames(tolower(names(.))) %>% 
  
  mutate(tensi = str_replace_all(tensi, "[-|]", "/"),
         tensi = str_replace_all(tensi, "Sys:", ""),
         tensi = str_replace_all(tensi, " Dia:", "/"),
         tensi = str_replace_all(tensi, " ", "")) %>%
  
  separate(tensi, into = c("sistolik", "diastolik"), sep = "/", convert = TRUE) %>%
  
  mutate(suhu_tubuh_celcius = str_replace(suhu_tubuh_celcius, "°C", ""),
         suhu_tubuh_celcius = as.numeric(suhu_tubuh_celcius))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 2 rows [78,
## 440].
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `suhu_tubuh_celcius = as.numeric(suhu_tubuh_celcius)`.
## Caused by warning:
## ! NAs introduced by coercion
drs
## # A tibble: 700 × 9
##    nama             tanggal_lahir sistolik diastolik skin_stiffness_n_per_mm
##    <chr>            <chr>         <chr>    <chr>                       <dbl>
##  1 Michael Anderson 01/04/1957    112      67                           0.69
##  2 N/A              20/09/1975    140      91                           1.5 
##  3 Tan Wei Ming     12/04/1965    134      72                           0.76
##  4 Shen Yi-Ching    11/09/1980    120      79                           1.92
##  5 Kung Mei-Lin     22/08/1985    99       77                           0.81
##  6 Ho Chuan-Wei     10/08/1962    149      65                           0.61
##  7 <NA>             18/01/1994    110      71                           1.04
##  8 Betty Lewis      02/08/1982    108      67                           2.24
##  9 Joseph Garcia    06/12/1982    <NA>     <NA>                         0.18
## 10 Ong Lay Kheng    26/02/1951    128      78                          NA   
## # ℹ 690 more rows
## # ℹ 4 more variables: microcirculation_pu <dbl>, suhu_tubuh_celcius <dbl>,
## #   penyakit <chr>, peak_plantar_pressure_kpa <dbl>
drs <- drs %>%
 
  rename_with(~tolower(gsub("[[:punct:] ]+", "_", .x))) %>%

  filter(
    sistolik > 0, 
    diastolik > 0,
    peak_plantar_pressure_kpa < 5000,   
    skin_stiffness_n_per_mm >= 0,
    suhu_tubuh_celcius > 30 & suhu_tubuh_celcius < 45,
    !is.na(nama)
  )

View(drs)
summary(drs)
##      nama           tanggal_lahir        sistolik          diastolik        
##  Length:490         Length:490         Length:490         Length:490        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  skin_stiffness_n_per_mm microcirculation_pu suhu_tubuh_celcius
##  Min.   :  0.100         Min.   :-32.50      Min.   :35.50     
##  1st Qu.:  0.710         1st Qu.: 17.70      1st Qu.:36.50     
##  Median :  1.145         Median : 27.70      Median :36.80     
##  Mean   :  1.460         Mean   : 27.81      Mean   :36.79     
##  3rd Qu.:  1.610         3rd Qu.: 38.35      3rd Qu.:37.00     
##  Max.   :150.000         Max.   : 77.30      Max.   :42.50     
##                          NA's   :31                            
##    penyakit         peak_plantar_pressure_kpa
##  Length:490         Min.   :-100.0           
##  Class :character   1st Qu.: 272.0           
##  Mode  :character   Median : 386.4           
##                     Mean   : 387.0           
##                     3rd Qu.: 504.1           
##                     Max.   : 715.4           
## 
summary(drs)
##      nama           tanggal_lahir        sistolik          diastolik        
##  Length:490         Length:490         Length:490         Length:490        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  skin_stiffness_n_per_mm microcirculation_pu suhu_tubuh_celcius
##  Min.   :  0.100         Min.   :-32.50      Min.   :35.50     
##  1st Qu.:  0.710         1st Qu.: 17.70      1st Qu.:36.50     
##  Median :  1.145         Median : 27.70      Median :36.80     
##  Mean   :  1.460         Mean   : 27.81      Mean   :36.79     
##  3rd Qu.:  1.610         3rd Qu.: 38.35      3rd Qu.:37.00     
##  Max.   :150.000         Max.   : 77.30      Max.   :42.50     
##                          NA's   :31                            
##    penyakit         peak_plantar_pressure_kpa
##  Length:490         Min.   :-100.0           
##  Class :character   1st Qu.: 272.0           
##  Mode  :character   Median : 386.4           
##                     Mean   : 387.0           
##                     3rd Qu.: 504.1           
##                     Max.   : 715.4           
## 
drs <- drs %>%
  mutate(sistolik = as.numeric(sistolik))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `sistolik = as.numeric(sistolik)`.
## Caused by warning:
## ! NAs introduced by coercion
class(drs$sistolik)
## [1] "numeric"
# Jalankan ggplot-nya sekarang
ggplot(drs, aes(x = sistolik)) +
  geom_histogram(bins = 20, fill = "yellow", color = "black") +
  theme_minimal() +
  labs(title = "Distribusi Tekanan Darah Sistolik", 
       x = "Sistolik (mmHg)", 
       y = "Frekuensi")
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_bin()`).

drs <- drs %>% 
  drop_na(sistolik, diastolik)
summary(drs)
##      nama           tanggal_lahir         sistolik      diastolik        
##  Length:487         Length:487         Min.   : 78.0   Length:487        
##  Class :character   Class :character   1st Qu.:112.0   Class :character  
##  Mode  :character   Mode  :character   Median :122.0   Mode  :character  
##                                        Mean   :121.6                     
##                                        3rd Qu.:131.0                     
##                                        Max.   :164.0                     
##                                                                          
##  skin_stiffness_n_per_mm microcirculation_pu suhu_tubuh_celcius
##  Min.   :  0.100         Min.   :-32.50      Min.   :35.50     
##  1st Qu.:  0.720         1st Qu.: 17.70      1st Qu.:36.50     
##  Median :  1.150         Median : 27.40      Median :36.80     
##  Mean   :  1.467         Mean   : 27.72      Mean   :36.79     
##  3rd Qu.:  1.610         3rd Qu.: 37.92      3rd Qu.:37.00     
##  Max.   :150.000         Max.   : 77.30      Max.   :42.50     
##                          NA's   :31                            
##    penyakit         peak_plantar_pressure_kpa
##  Length:487         Min.   :-100.0           
##  Class :character   1st Qu.: 274.6           
##  Mode  :character   Median : 386.7           
##                     Mean   : 388.4           
##                     3rd Qu.: 504.8           
##                     Max.   : 715.4           
##