library(readxl)
data <- read_excel("hospital_dataset.xlsx")
data
## # A tibble: 700 × 8
## Nama Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
## <chr> <chr> <chr> <chr> <chr>
## 1 Michael Ander… 20911 112/… 0.69 42.0
## 2 N/A 27657 140 … 1.5 41.9
## 3 Tan Wei Ming 23844 134/… 0.76 26.3
## 4 Shen Yi-Ching 29475 120/… 1.92 <NA>
## 5 Kung Mei-Lin 31281 99/77 0.81 25.5
## 6 Ho Chuan-Wei 22868 149/… 0.61 42.2
## 7 <NA> 34352 110/… 1.04 2.0
## 8 Betty Lewis 30165 108/… 2.24 9.5
## 9 Joseph Garcia 30291 <NA> 0.18 24.8
## 10 Ong Lay Kheng 18685 128/… <NA> 40.9
## # ℹ 690 more rows
## # ℹ abbreviated name: ¹Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## # Peak_Plantar_Pressure_kPa <chr>
head(data)
## # A tibble: 6 × 8
## Nama Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
## <chr> <chr> <chr> <chr> <chr>
## 1 Michael Anders… 20911 112/… 0.69 42.0
## 2 N/A 27657 140 … 1.5 41.9
## 3 Tan Wei Ming 23844 134/… 0.76 26.3
## 4 Shen Yi-Ching 29475 120/… 1.92 <NA>
## 5 Kung Mei-Lin 31281 99/77 0.81 25.5
## 6 Ho Chuan-Wei 22868 149/… 0.61 42.2
## # ℹ abbreviated name: ¹Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## # Peak_Plantar_Pressure_kPa <chr>
str(data)
## tibble [700 × 8] (S3: tbl_df/tbl/data.frame)
## $ Nama : chr [1:700] "Michael Anderson" "N/A" "Tan Wei Ming" "Shen Yi-Ching" ...
## $ Tanggal_Lahir : chr [1:700] "20911" "27657" "23844" "29475" ...
## $ Tensi : chr [1:700] "112/67" "140 / 91" "134/72" "120/79" ...
## $ Skin_Stiffness_N_per_mm : chr [1:700] "0.69" "1.5" "0.76" "1.92" ...
## $ Microcirculation_PU : chr [1:700] "42.0" "41.9" "26.3" NA ...
## $ Suhu_Tubuh_Celcius : chr [1:700] "37.6" "36.5掳C" "37.5" "37.0" ...
## $ Penyakit : chr [1:700] "Non-Diabetic" "Non-Diabetic" "Non-Diabetic" "Diabetic" ...
## $ Peak_Plantar_Pressure_kPa: chr [1:700] "294.0" NA "431.8" "577.5" ...
colnames(data)
## [1] "Nama" "Tanggal_Lahir"
## [3] "Tensi" "Skin_Stiffness_N_per_mm"
## [5] "Microcirculation_PU" "Suhu_Tubuh_Celcius"
## [7] "Penyakit" "Peak_Plantar_Pressure_kPa"
colSums(is.na(data))
## Nama Tanggal_Lahir Tensi
## 40 42 47
## Skin_Stiffness_N_per_mm Microcirculation_PU Suhu_Tubuh_Celcius
## 37 50 49
## Penyakit Peak_Plantar_Pressure_kPa
## 45 43
data$Suhu_Tubuh_Celcius <- gsub("[^0-9.]", "", data$Suhu_Tubuh_Celcius)
data
## # A tibble: 700 × 8
## Nama Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
## <chr> <chr> <chr> <chr> <chr>
## 1 Michael Ander… 20911 112/… 0.69 42.0
## 2 N/A 27657 140 … 1.5 41.9
## 3 Tan Wei Ming 23844 134/… 0.76 26.3
## 4 Shen Yi-Ching 29475 120/… 1.92 <NA>
## 5 Kung Mei-Lin 31281 99/77 0.81 25.5
## 6 Ho Chuan-Wei 22868 149/… 0.61 42.2
## 7 <NA> 34352 110/… 1.04 2.0
## 8 Betty Lewis 30165 108/… 2.24 9.5
## 9 Joseph Garcia 30291 <NA> 0.18 24.8
## 10 Ong Lay Kheng 18685 128/… <NA> 40.9
## # ℹ 690 more rows
## # ℹ abbreviated name: ¹Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## # Peak_Plantar_Pressure_kPa <chr>
data$Nama[data$Nama == "N/A"] <- NA
data <- na.omit(data)
data$Penyakit[data$Penyakit %in%
c("Diabetic", "diabetic", "DIABETIC", "DM", "Yes", "1")] <- "Diabetic"
data$Penyakit[data$Penyakit %in%
c("Non-Diabetic", "non-diabetic", "Sehat",
"Healthy", "Tidak", "No", "Normal")] <- "Non-Diabetic"
data$Microcirculation_PU <- as.numeric(data$Microcirculation_PU)
data$Microcirculation_PU <- round(data$Microcirculation_PU, 2)
data$Tensi <- gsub(" mmHg", "", data$Tensi)
data$Microcirculation_PU[data$Microcirculation_PU < 0] <- NA
data <- data[data$Microcirculation_PU >= 0, ]
data$Tensi <- gsub("mmHg", "", data$Tensi, ignore.case = TRUE)
data$Tensi <- gsub(".*[Ss]ys[: ]*([0-9]+).*?[Dd]ia[: ]*([0-9]+).*",
"\\1/\\2",
data$Tensi)
data$Tensi <- gsub("\\s+", "", data$Tensi)
View(data)
data$Skin_Stiffness_N_per_mm[data$Skin_Stiffness_N_per_mm < 0] <- NA
data$Nama[data$Nama %in% c("???", "123456")] <- NA
data$Peak_Plantar_Pressure_kPa[data$Peak_Plantar_Pressure_kPa < 0] <- NA
angka_bersih <- as.numeric(as.character(data$Tanggal_Lahir))
## Warning: NAs introduced by coercion
data$Tanggal_Lahir <- as.Date(angka_bersih, origin = "1899-12-30")
print(head(data$Tanggal_Lahir))
## [1] "1957-04-01" "1965-04-12" "1985-08-22" "1962-08-10" "1982-08-02"
## [6] "1946-10-03"