library(readxl)
data <- read_excel("hospital_dataset.xlsx")
data
## # A tibble: 700 × 8
##    Nama           Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
##    <chr>          <chr>         <chr> <chr>                  <chr>              
##  1 Michael Ander… 20911         112/… 0.69                   42.0               
##  2 N/A            27657         140 … 1.5                    41.9               
##  3 Tan Wei Ming   23844         134/… 0.76                   26.3               
##  4 Shen Yi-Ching  29475         120/… 1.92                   <NA>               
##  5 Kung Mei-Lin   31281         99/77 0.81                   25.5               
##  6 Ho Chuan-Wei   22868         149/… 0.61                   42.2               
##  7 <NA>           34352         110/… 1.04                   2.0                
##  8 Betty Lewis    30165         108/… 2.24                   9.5                
##  9 Joseph Garcia  30291         <NA>  0.18                   24.8               
## 10 Ong Lay Kheng  18685         128/… <NA>                   40.9               
## # ℹ 690 more rows
## # ℹ abbreviated name: ¹​Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## #   Peak_Plantar_Pressure_kPa <chr>
head(data)
## # A tibble: 6 × 8
##   Nama            Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
##   <chr>           <chr>         <chr> <chr>                  <chr>              
## 1 Michael Anders… 20911         112/… 0.69                   42.0               
## 2 N/A             27657         140 … 1.5                    41.9               
## 3 Tan Wei Ming    23844         134/… 0.76                   26.3               
## 4 Shen Yi-Ching   29475         120/… 1.92                   <NA>               
## 5 Kung Mei-Lin    31281         99/77 0.81                   25.5               
## 6 Ho Chuan-Wei    22868         149/… 0.61                   42.2               
## # ℹ abbreviated name: ¹​Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## #   Peak_Plantar_Pressure_kPa <chr>
str(data)
## tibble [700 × 8] (S3: tbl_df/tbl/data.frame)
##  $ Nama                     : chr [1:700] "Michael Anderson" "N/A" "Tan Wei Ming" "Shen Yi-Ching" ...
##  $ Tanggal_Lahir            : chr [1:700] "20911" "27657" "23844" "29475" ...
##  $ Tensi                    : chr [1:700] "112/67" "140 / 91" "134/72" "120/79" ...
##  $ Skin_Stiffness_N_per_mm  : chr [1:700] "0.69" "1.5" "0.76" "1.92" ...
##  $ Microcirculation_PU      : chr [1:700] "42.0" "41.9" "26.3" NA ...
##  $ Suhu_Tubuh_Celcius       : chr [1:700] "37.6" "36.5掳C" "37.5" "37.0" ...
##  $ Penyakit                 : chr [1:700] "Non-Diabetic" "Non-Diabetic" "Non-Diabetic" "Diabetic" ...
##  $ Peak_Plantar_Pressure_kPa: chr [1:700] "294.0" NA "431.8" "577.5" ...
colnames(data)
## [1] "Nama"                      "Tanggal_Lahir"            
## [3] "Tensi"                     "Skin_Stiffness_N_per_mm"  
## [5] "Microcirculation_PU"       "Suhu_Tubuh_Celcius"       
## [7] "Penyakit"                  "Peak_Plantar_Pressure_kPa"
colSums(is.na(data))
##                      Nama             Tanggal_Lahir                     Tensi 
##                        40                        42                        47 
##   Skin_Stiffness_N_per_mm       Microcirculation_PU        Suhu_Tubuh_Celcius 
##                        37                        50                        49 
##                  Penyakit Peak_Plantar_Pressure_kPa 
##                        45                        43
data$Suhu_Tubuh_Celcius <- gsub("[^0-9.]", "", data$Suhu_Tubuh_Celcius)
data
## # A tibble: 700 × 8
##    Nama           Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
##    <chr>          <chr>         <chr> <chr>                  <chr>              
##  1 Michael Ander… 20911         112/… 0.69                   42.0               
##  2 N/A            27657         140 … 1.5                    41.9               
##  3 Tan Wei Ming   23844         134/… 0.76                   26.3               
##  4 Shen Yi-Ching  29475         120/… 1.92                   <NA>               
##  5 Kung Mei-Lin   31281         99/77 0.81                   25.5               
##  6 Ho Chuan-Wei   22868         149/… 0.61                   42.2               
##  7 <NA>           34352         110/… 1.04                   2.0                
##  8 Betty Lewis    30165         108/… 2.24                   9.5                
##  9 Joseph Garcia  30291         <NA>  0.18                   24.8               
## 10 Ong Lay Kheng  18685         128/… <NA>                   40.9               
## # ℹ 690 more rows
## # ℹ abbreviated name: ¹​Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## #   Peak_Plantar_Pressure_kPa <chr>
data$Nama[data$Nama == "N/A"] <- NA
data <- na.omit(data)
data$Penyakit[data$Penyakit %in% 
c("Diabetic", "diabetic", "DIABETIC", "DM", "Yes", "1")] <- "Diabetic"
data$Penyakit[data$Penyakit %in% 
c("Non-Diabetic", "non-diabetic", "Sehat", 
"Healthy", "Tidak", "No", "Normal")] <- "Non-Diabetic"
data$Microcirculation_PU <- as.numeric(data$Microcirculation_PU)
data$Microcirculation_PU <- round(data$Microcirculation_PU, 2)
data$Tensi <- gsub(" mmHg", "", data$Tensi)
data$Microcirculation_PU[data$Microcirculation_PU < 0] <- NA
data <- data[data$Microcirculation_PU >= 0, ]
data$Tensi <- gsub("mmHg", "", data$Tensi, ignore.case = TRUE)
data$Tensi <- gsub(".*[Ss]ys[: ]*([0-9]+).*?[Dd]ia[: ]*([0-9]+).*",
                   "\\1/\\2",
                   data$Tensi)

data$Tensi <- gsub("\\s+", "", data$Tensi)
View(data)
data$Skin_Stiffness_N_per_mm[data$Skin_Stiffness_N_per_mm < 0] <- NA
data$Nama[data$Nama %in% c("???", "123456")] <- NA
data$Peak_Plantar_Pressure_kPa[data$Peak_Plantar_Pressure_kPa < 0] <- NA
angka_bersih <- as.numeric(as.character(data$Tanggal_Lahir))
## Warning: NAs introduced by coercion
data$Tanggal_Lahir <- as.Date(angka_bersih, origin = "1899-12-30")
print(head(data$Tanggal_Lahir))
## [1] "1957-04-01" "1965-04-12" "1985-08-22" "1962-08-10" "1982-08-02"
## [6] "1946-10-03"