library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(lubridate)
data <- read.csv("hospital_dataset.csv", stringsAsFactors = FALSE)
str(data)
## 'data.frame':    700 obs. of  8 variables:
##  $ Nama                     : chr  "Michael Anderson" "N/A" "Tan Wei Ming" "Shen Yi-Ching" ...
##  $ Tanggal_Lahir            : chr  "01/04/1957" "20/09/1975" "12/04/1965" "11/09/1980" ...
##  $ Tensi                    : chr  "112/67" "140 / 91" "134/72" "120/79" ...
##  $ Skin_Stiffness_N_per_mm  : num  0.69 1.5 0.76 1.92 0.81 0.61 1.04 2.24 0.18 NA ...
##  $ Microcirculation_PU      : num  42 41.9 26.3 NA 25.5 42.2 2 9.5 24.8 40.9 ...
##  $ Suhu_Tubuh_Celcius       : chr  "37.6" "36.5°C" "37.5" "37.0" ...
##  $ Penyakit                 : chr  "Non-Diabetic" "Non-Diabetic" "Non-Diabetic" "Diabetic" ...
##  $ Peak_Plantar_Pressure_kPa: num  294 NA 432 578 502 ...
head(data)
##               Nama Tanggal_Lahir    Tensi Skin_Stiffness_N_per_mm
## 1 Michael Anderson    01/04/1957   112/67                    0.69
## 2              N/A    20/09/1975 140 / 91                    1.50
## 3     Tan Wei Ming    12/04/1965   134/72                    0.76
## 4    Shen Yi-Ching    11/09/1980   120/79                    1.92
## 5     Kung Mei-Lin    22/08/1985    99/77                    0.81
## 6     Ho Chuan-Wei    10/08/1962   149/65                    0.61
##   Microcirculation_PU Suhu_Tubuh_Celcius     Penyakit Peak_Plantar_Pressure_kPa
## 1                42.0               37.6 Non-Diabetic                     294.0
## 2                41.9             36.5°C Non-Diabetic                        NA
## 3                26.3               37.5 Non-Diabetic                     431.8
## 4                  NA               37.0     Diabetic                     577.5
## 5                25.5               36.0     Diabetic                     502.3
## 6                42.2               36.8 Non-Diabetic                     201.4
data <- clean_names(data)
colSums(is.na(data))
##                       nama              tanggal_lahir 
##                          0                          0 
##                      tensi    skin_stiffness_n_per_mm 
##                          0                         37 
##        microcirculation_pu         suhu_tubuh_celcius 
##                         50                          0 
##                   penyakit peak_plantar_pressure_k_pa 
##                          0                         43
data <- data %>%
  mutate(across(where(is.character), ~trimws(.)))
data <- data %>%
  mutate(across(where(is.numeric),
                ~ifelse(is.na(.), median(., na.rm = TRUE), .)))
sum(duplicated(data))
## [1] 2
data[duplicated(data), ]
##              nama tanggal_lahir  tensi skin_stiffness_n_per_mm
## 342   Betty Lewis    23/05/1988 121/96                    1.66
## 556 Hsieh Shu-Hui    15/01/1967  96/73                    1.43
##     microcirculation_pu suhu_tubuh_celcius penyakit peak_plantar_pressure_k_pa
## 342                14.6               36.6 Diabetic                      475.8
## 556                36.9               37.0 Diabetic                      393.2
data <- data %>% distinct()
sum(duplicated(data))
## [1] 0
data$tensi <- gsub("\\s+", "", data$tensi)
head(data$tensi)
## [1] "112/67" "140/91" "134/72" "120/79" "99/77"  "149/65"
data$suhu_tubuh_celcius <- gsub(",", ".", data$suhu_tubuh_celcius)
data$suhu_tubuh_celcius <- gsub("[^0-9.]", "", data$suhu_tubuh_celcius)
data$suhu_tubuh_celcius <- as.numeric(data$suhu_tubuh_celcius)

head(data$suhu_tubuh_celcius)
## [1] 37.6 36.5 37.5 37.0 36.0 36.8
data[is.na(data$suhu_tubuh_celcius), ]
##                nama tanggal_lahir   tensi skin_stiffness_n_per_mm
## 66    Ong Lay Kheng    16/03/1995  132/76                    0.84
## 69      Ng Boon Hua    29/04/1990  114/79                    2.07
## 79     Kung Mei-Lin    08/05/1959  117/83                    1.76
## 82      Wu Ming-Hui    01/12/1991  136/69                    0.33
## 97      Betty Lewis    23/03/1955  115/84                    1.01
## 125  Karen Thompson    24/11/1996  102/71                    1.10
## 147  Richard Martin    22/03/1949  118/79                    1.06
## 163     Chou Mei-Yu    01/02/1992                            0.53
## 175        Wang Jie    07/02/1945  135/64                    1.42
## 187                    22/09/1949  109/94                    0.17
## 189    David Harris    26/01/1998  106/71                    2.27
## 193   Charles Clark    12/08/1944  145/84                    0.94
## 195  Patricia Davis    08/05/1972  110/81                    1.28
## 206   Hsu Kuo-Chang                113/98                    1.50
## 208   Jessica White    25/05/1998  119/85                    2.22
## 209  Lu Hsiang-Ling    12/05/1990                            1.41
## 262      John Smith    20/03/1971                            1.56
## 284     Ng Boon Hua                126/76                    0.97
## 311   Pan Mei-Hsuan    03/07/1981  115/87                    1.29
## 318    Tan Wei Ming    21/10/1965                            1.74
## 337  Richard Martin    06/04/1950                            0.68
## 353  Lu Hsiang-Ling    17/11/1997  139/84                    0.99
## 370   Ong Lay Kheng    15/04/1991  105/83                    2.00
## 387    Mary Johnson    08/11/1975  128/76                    1.04
## 410   Joseph Walker    13/10/1942  107/63                    1.82
## 425    Yen Kuo-Jung    11/05/2005  102/83                    0.85
## 443  William Thomas    18/06/1962  115/84                    1.07
## 494      Helen Hall    17/12/1965  122/85                    1.59
## 499      John Smith    31/03/1942  135/96                    1.10
## 508                    27/01/1996  117/78                    0.63
## 522    Kung Mei-Lin                116/84                    0.58
## 542    Kung Mei-Lin    26/07/1965  129/69                    1.97
## 544   Hsu Kuo-Chang    13/07/1954  127/69                    1.42
## 548     James Brown    05/04/1993  120/81                    0.28
## 552 Chang Chung-Wei    30/07/1969  123/66                    0.84
## 570  Lu Hsiang-Ling    17/05/1964   92/75                    0.35
## 572   Ong Lay Kheng    06/01/1987  114/67                    1.10
## 578     Ng Boon Hua    21/02/1940  146/66                    0.42
## 587                    01/12/1998  148/79                    1.50
## 595                                105/79                    1.28
## 599   Huang Li-Chen    26/08/1961  132/76                    1.13
## 600   Huang Li-Chen    15/07/1960  140/93                    1.10
## 607  Lu Hsiang-Ling    23/12/1980 133/103                    0.91
## 611    Chiu Yu-Chin    05/09/1944  114/69                    1.99
## 646 Tseng Wen-Liang                150/82                    0.10
## 663        Wang Jie    19/06/2005  119/56                    0.66
## 669     Wu Ming-Hui    13/02/2004  122/90                    1.10
## 686   Joseph Garcia    24/09/1980  122/89                    0.10
## 693   Liu Hsiao-Fen                120/89                    1.73
##     microcirculation_pu suhu_tubuh_celcius     penyakit
## 66                 48.7                 NA Non-Diabetic
## 69                 19.9                 NA     Diabetic
## 79                 19.3                 NA     Diabetic
## 82                 43.4                 NA Non-Diabetic
## 97                 22.2                 NA Non-Diabetic
## 125                28.5                 NA     Diabetic
## 147                 8.8                 NA     Diabetic
## 163                41.8                 NA Non-Diabetic
## 175                53.2                 NA             
## 187                16.0                 NA Non-Diabetic
## 189                 8.5                 NA     Diabetic
## 193                41.7                 NA Non-Diabetic
## 195                 6.9                 NA     Diabetic
## 206                18.0                 NA     Diabetic
## 208                26.5                 NA     Diabetic
## 209                27.7                 NA     Diabetic
## 262                27.7                 NA     Diabetic
## 284                28.8                 NA     DIABETIC
## 311                 1.0                 NA             
## 318                13.1                 NA     Diabetic
## 337                55.1                 NA Non-Diabetic
## 353                16.0                 NA     Diabetic
## 370                27.7                 NA     Diabetic
## 387                37.2                 NA Non-Diabetic
## 410                25.6                 NA     Diabetic
## 425                32.4                 NA Non-Diabetic
## 443                20.5                 NA Non-Diabetic
## 494                 6.3                 NA     Diabetic
## 499                20.4                 NA     Diabetic
## 508                51.6                 NA Non-Diabetic
## 522                27.7                 NA Non-Diabetic
## 542                16.2                 NA     Diabetic
## 544                18.9                 NA Non-Diabetic
## 548                29.0                 NA Non-Diabetic
## 552                27.7                 NA Non-Diabetic
## 570                27.0                 NA Non-Diabetic
## 572                45.2                 NA Non-Diabetic
## 578                45.6                 NA Non-Diabetic
## 587                15.0                 NA             
## 595                17.4                 NA     Diabetic
## 599                 1.0                 NA     Diabetic
## 600                13.9                 NA     Diabetic
## 607                32.9                 NA Non-Diabetic
## 611                 1.0                 NA             
## 646                39.9                 NA Non-Diabetic
## 663                27.7                 NA Non-Diabetic
## 669                27.2                 NA Non-Diabetic
## 686                39.2                 NA Non-Diabetic
## 693                21.2                 NA           DM
##     peak_plantar_pressure_k_pa
## 66                       352.3
## 69                       538.7
## 79                       384.3
## 82                       337.5
## 97                       390.4
## 125                      710.6
## 147                    99999.0
## 163                      274.7
## 175                      422.5
## 187                      239.8
## 189                      591.0
## 193                      125.8
## 195                      517.0
## 206                      601.8
## 208                      537.1
## 209                      350.6
## 262                      409.9
## 284                      166.9
## 311                      503.8
## 318                      502.7
## 337                      150.9
## 353                      514.0
## 370                      575.0
## 387                      120.1
## 410                      292.7
## 425                      349.8
## 443                      147.0
## 494                      560.2
## 499                      537.8
## 508                      298.5
## 522                      390.5
## 542                      583.9
## 544                      262.1
## 548                      204.1
## 552                      364.9
## 570                      391.4
## 572                      388.4
## 578                      384.3
## 587                      630.1
## 595                      463.4
## 599                      641.8
## 600                      482.8
## 607                      138.5
## 611                      610.9
## 646                      384.3
## 663                      238.5
## 669                      305.1
## 686                      333.7
## 693                      377.2
unique(data$tanggal_lahir)
##   [1] "01/04/1957"        "20/09/1975"        "12/04/1965"       
##   [4] "11/09/1980"        "22/08/1985"        "10/08/1962"       
##   [7] "18/01/1994"        "02/08/1982"        "06/12/1982"       
##  [10] "26/02/1951"        "16/02/1944"        ""                 
##  [13] "03/10/1946"        "02/11/1957"        "18/03/1973"       
##  [16] "04/07/1964"        "1967"              "08/02/1988"       
##  [19] "02/05/1996"        "24/02/1988"        "25/08/1987"       
##  [22] "19/11/1946"        "07/11/1977"        "06/03/1982"       
##  [25] "19/02/1969"        "05/11/1965"        "07/07/1985"       
##  [28] "22/09/2001"        "29/12/2001"        "05/11/2001"       
##  [31] "30/04/1989"        "30/11/1944"        "10/05/1946"       
##  [34] "17/11/1942"        "17/09/1972"        "18/08/1971"       
##  [37] "19/05/1988"        "01/02/1951"        "08/06/1951"       
##  [40] "12/11/1940"        "03/05/1993"        "16/05/1970"       
##  [43] "26/09/1951"        "12/05/1992"        "29/07/1942"       
##  [46] "11/07/1989"        "16/09/1967"        "11/01/1958"       
##  [49] "05/10/1970"        "03/08/1942"        "16/03/1947"       
##  [52] "03/10/1989"        "04/03/1992"        "23/04/1952"       
##  [55] "27/11/2005"        "10/11/1959"        "1977"             
##  [58] "17/03/1966"        "10/01/1992"        "1980"             
##  [61] "20-02-2003"        "16/03/1995"        "31/10/1971"       
##  [64] "27/07/1989"        "29/04/1990"        "06/11/1991"       
##  [67] "05/12/1964"        "26/01/2001"        "26/05/1992"       
##  [70] "17/02/1954"        "23/12/1943"        "06/09/1941"       
##  [73] "20/11/1952"        "08/05/1959"        "17/05/1973"       
##  [76] "30/10/1983"        "01/12/1991"        "22/04/1999"       
##  [79] "07/06/1982"        "01/10/1985"        "27/07/1978"       
##  [82] "01/10/1989"        "22/04/1962"        "21/06/1978"       
##  [85] "29/10/1995"        "19/11/1990"        "11/10/1994"       
##  [88] "27/04/1984"        "19/09/1974"        "21/06/1981"       
##  [91] "23/03/1955"        "02/09/1976"        "18/04/1954"       
##  [94] "24/08/1968"        "24/03/1962"        "13/11/1962"       
##  [97] "16/08/1975"        "26/03/1988"        "05/08/1993"       
## [100] "01/01/1995"        "04/03/1946"        "15/01/1967"       
## [103] "04/05/1963"        "29/04/1999"        "28/03/1940"       
## [106] "06/02/2000"        "29/04/1951"        "09/12/1960"       
## [109] "28/03/1953"        "08/06/1969"        "09/05/1963"       
## [112] "05/07/1967"        "29/09/1978"        "17/11/1976"       
## [115] "12/11/1953"        "14/04/1949"        "24/11/1996"       
## [118] "06/03/1970"        "26/03/1995"        "03/02/1992"       
## [121] "31/05/2005"        "17/06/1989"        "17/11/1952"       
## [124] "14/11/1957"        "20/08/1979"        "23/07/1985"       
## [127] "13/04/1967"        "11/11/1944"        "29/05/1992"       
## [130] "07/04/1979"        "09/09/1956"        "29/06/1941"       
## [133] "April 10, 1989"    "06/01/1985"        "05/03/1952"       
## [136] "04/06/1961"        "20/05/1953"        "22/03/1949"       
## [139] "28/01/1979"        "24/04/1947"        "30/07/1982"       
## [142] "18/09/1948"        "11/04/1981"        "13/03/1970"       
## [145] "16/06/1961"        "17/06/1999"        "02/09/1998"       
## [148] "24/10/1972"        "14/09/1980"        "18/12/1996"       
## [151] "01/02/1992"        "20/03/1980"        "09/03/2005"       
## [154] "25/06/1965"        "14-12-1963"        "13/04/1954"       
## [157] "06/11/1953"        "03/09/1948"        "1945"             
## [160] "23/09/1953"        "23-08-1998"        "07/02/1945"       
## [163] "26/11/1957"        "15/02/1990"        "25/04/1992"       
## [166] "21/01/1947"        "09/11/1951"        "07/04/2005"       
## [169] "19/01/1999"        "17/03/1954"        "19/07/1941"       
## [172] "27/06/1962"        "22/09/1949"        "09/09/1999"       
## [175] "26/01/1998"        "13/08/1946"        "22/10/1996"       
## [178] "24/12/1952"        "12/08/1944"        "09/11/1998"       
## [181] "08/05/1972"        "06/01/1973"        "11/09/1943"       
## [184] "02/21/1995"        "06/06/1995"        "11/06/1992"       
## [187] "12/04/1970"        "22/11/1999"        "25/05/1998"       
## [190] "12/05/1990"        "12/09/1957"        "17/09/1982"       
## [193] "24/11/2005"        "12/01/1968"        "28/08/1979"       
## [196] "11/07/1947"        "27/03/1986"        "21/06/1998"       
## [199] "20/10/1955"        "29/04/1997"        "21/01/1993"       
## [202] "12/11/1957"        "07/03/1951"        "19/04/1980"       
## [205] "03/02/1973"        "22/10/1990"        "20/01/2003"       
## [208] "02/06/1951"        "05-05-1994"        "04/10/1959"       
## [211] "04/04/2002"        "23/04/1991"        "11/04/1994"       
## [214] "February 16, 1961" "19/12/1967"        "29/11/1992"       
## [217] "19/08/1955"        "08/09/1961"        "15/04/2004"       
## [220] "26/11/1997"        "21/08/1978"        "04/12/1954"       
## [223] "20/04/1959"        "29/07/1990"        "13/08/2001"       
## [226] "29/11/1958"        "16/06/1941"        "06/06/1973"       
## [229] "30/11/1957"        "19/04/2003"        "18/07/1985"       
## [232] "13/05/1968"        "17/05/1972"        "20/09/1979"       
## [235] "29/10/1941"        "13/12/1946"        "20/03/1971"       
## [238] "02/07/1990"        "05/11/1958"        "14/04/1986"       
## [241] "18/09/1995"        "02/24/2002"        "05/07/1980"       
## [244] "31/05/1959"        "26/07/1949"        "02/12/1972"       
## [247] "11/02/1980"        "06/07/1977"        "12/07/1940"       
## [250] "14/05/1953"        "23/05/1988"        "07/05/1950"       
## [253] "07/03/1977"        "22/09/1940"        "11/11/1987"       
## [256] "19/11/1955"        "22/09/1952"        "10/04/1980"       
## [259] "03/02/1956"        "04/12/1988"        "28/05/1948"       
## [262] "11/02/1984"        "13/12/1975"        "19/10/1981"       
## [265] "15/09/1955"        "04/10/1956"        "14/06/2001"       
## [268] "08/12/1942"        "26/08/1981"        "24/06/1994"       
## [271] "19/07/2002"        "17/11/1981"        "12/01/1992"       
## [274] "07 Nov 2004"       "25/03/1996"        "18/06/1961"       
## [277] "10/11/2000"        "23/02/1946"        "24/11/1984"       
## [280] "08/09/1959"        "15/08/1969"        "16/12/2002"       
## [283] "14/08/1984"        "03/07/1981"        "16/12/1963"       
## [286] "02/01/1980"        "20/07/1985"        "23/02/1944"       
## [289] "11/12/1949"        "21/10/1965"        "28/08/1991"       
## [292] "06/12/1950"        "10/09/1954"        "27/08/1987"       
## [295] "30/12/1963"        "17/05/1965"        "16/09/1949"       
## [298] "02/06/1981"        "01/01/1969"        "15/04/1977"       
## [301] "17/11/1997"        "19/03/1970"        "07/11/1967"       
## [304] "27/03/1974"        "23/10/1988"        "27/04/1966"       
## [307] "06/04/1950"        "28/04/1947"        "03/05/1985"       
## [310] "29/10/1961"        "20/06/1969"        "13/01/1998"       
## [313] "12/01/1987"        "20/10/1968"        "25/09/1988"       
## [316] "17/05/1945"        "24/04/2000"        "07/01/2000"       
## [319] "04/03/1959"        "11/01/1956"        "19/12/1950"       
## [322] "08/03/1956"        "03/08/1969"        "02 Feb 1967"      
## [325] "27/11/1962"        "05/03/1991"        "29/10/1992"       
## [328] "08/03/1948"        "31/12/1973"        "17/08/1997"       
## [331] "23/01/1961"        "13/12/1998"        "14/02/1942"       
## [334] "15/04/1991"        "05/02/1957"        "02/09/1964"       
## [337] "12/03/1959"        "22/12/1972"        "27/07/1983"       
## [340] "26/10/1977"        "01/06/1997"        "24/09/1951"       
## [343] "03/07/2003"        "01/02/1968"        "16/12/1950"       
## [346] "10/11/1982"        "04/07/1998"        "03/03/1971"       
## [349] "24/01/1992"        "08/11/1975"        "30/03/1986"       
## [352] "16/07/1950"        "13/10/1959"        "14/09/1989"       
## [355] "02/01/2005"        "01/05/1941"        "21/08/1952"       
## [358] "30/10/1963"        "28/05/1957"        "07/12/1970"       
## [361] "19/05/1945"        "25/11/1947"        "02/09/1978"       
## [364] "18/12/1985"        "21/04/1958"        "24/03/1941"       
## [367] "02/12/1969"        "02/06/1994"        "10/03/1999"       
## [370] "02/03/2001"        "12/12/1982"        "10/07/1966"       
## [373] "13/10/1942"        "16/07/1956"        "23/01/1994"       
## [376] "14/04/2003"        "03/01/1990"        "28/04/1954"       
## [379] "11/02/1988"        "15/11/1977"        "31/05/1953"       
## [382] "06/09/1972"        "25/08/1991"        "30 Jan 1980"      
## [385] "13/10/1964"        "21/05/1999"        "11/05/2005"       
## [388] "20/10/1998"        "07/03/1979"        "10/05/1950"       
## [391] "08/09/1980"        "26/02/1970"        "05/08/1955"       
## [394] "19/07/1940"        "09/12/1975"        "18/10/1973"       
## [397] "12/02/1993"        "30/09/1967"        "22/03/1956"       
## [400] "19/07/1979"        "09/08/1999"        "15/08/1996"       
## [403] "18/06/1962"        "03/11/1994"        "20/01/1963"       
## [406] "19/08/1984"        "20/03/1947"        "01/05/1970"       
## [409] "31/05/1957"        "17/04/1952"        "15/02/2000"       
## [412] "31/01/1955"        "14/04/1947"        "06 May 1947"      
## [415] "10/12/1952"        "17/07/1999"        "25/11/1990"       
## [418] "20/12/1944"        "08/29/1959"        "17/06/1948"       
## [421] "21/01/2003"        "05/12/1975"        "11/10/1944"       
## [424] "02/11/1966"        "24/12/1975"        "01/02/1979"       
## [427] "24/03/1948"        "06/03/1950"        "19/05/1967"       
## [430] "22/10/1954"        "16/05/1951"        "10/02/1997"       
## [433] "05/12/1963"        "22/04/1979"        "14/09/1941"       
## [436] "15/01/1984"        "09-01-1941"        "07/11/1999"       
## [439] "23/08/1960"        "21/07/2001"        "26/01/1961"       
## [442] "09/11/1970"        "06/01/1999"        "28/05/1946"       
## [445] "02/12/1990"        "28/08/1970"        "03/02/1988"       
## [448] "22/05/1994"        "13/04/1976"        "13 Jan 2001"      
## [451] "17/12/1965"        "09/02/1948"        "05/04/1976"       
## [454] "18/11/1959"        "06/10/1953"        "31/03/1942"       
## [457] "22/05/1970"        "17/06/1970"        "31/07/1987"       
## [460] "22/04/1953"        "22/05/1990"        "24/03/1960"       
## [463] "15/01/1965"        "28/04/1960"        "27/01/1996"       
## [466] "14/07/1953"        "21/05/1982"        "17/05/1969"       
## [469] "10/05/1947"        "21/08/1993"        "28/04/2001"       
## [472] "26/10/1967"        "10/11/1944"        "22/10/1952"       
## [475] "28/06/1955"        "18/08/1979"        "17/05/1988"       
## [478] "24/07/1984"        "06/12/1958"        "04/04/2000"       
## [481] "13/03/1991"        "12/04/1988"        "22/06/1943"       
## [484] "15/12/1943"        "09/08/1944"        "15/01/2001"       
## [487] "26/09/2004"        "06/04/1996"        "16/03/1961"       
## [490] "14/05/1991"        "27/09/1965"        "13/01/1950"       
## [493] "01/04/1941"        "23/05/1985"        "28/10/1969"       
## [496] "31/03/1945"        "26/07/1965"        "26/07/1976"       
## [499] "13/07/1954"        "20/09/1967"        "07/10/1986"       
## [502] "05/04/1993"        "09/05/1985"        "23/08/1990"       
## [505] "15/03/1992"        "30/07/1969"        "21/06/1982"       
## [508] "05/12/1942"        "03/07/1997"        "14/06/1979"       
## [511] "23/06/1966"        "10/01/1972"        "24/02/2000"       
## [514] "18/09/1953"        "12/09/1941"        "14/03/1951"       
## [517] "24/04/1956"        "12/11/1993"        "17/04/1949"       
## [520] "03/05/1977"        "31/10/1967"        "17/05/1964"       
## [523] "23/01/1980"        "06/01/1987"        "09/09/1981"       
## [526] "31/01/1978"        "18/08/1976"        "13/02/1954"       
## [529] "26/08/1990"        "21/02/1940"        "25/11/1948"       
## [532] "28/06/1995"        "23/09/1991"        "22/11/1978"       
## [535] "21/04/1998"        "22/02/2002"        "03/04/1975"       
## [538] "01/12/1998"        "27/04/1987"        "06/05/1971"       
## [541] "15/02/1946"        "17/07/2000"        "28/06/1989"       
## [544] "17/05/1959"        "31/08/1977"        "13/09/1985"       
## [547] "15/06/1972"        "26/08/1961"        "15/07/1960"       
## [550] "06/08/1967"        "1988"              "26/03/1943"       
## [553] "08/01/1964"        "29/05/1970"        "10/09/1978"       
## [556] "23/12/1980"        "18/11/1986"        "09/01/1962"       
## [559] "05/03/1976"        "05/09/1944"        "18/11/2001"       
## [562] "08/07/1952"        "27/12/1974"        "13/01/1966"       
## [565] "18/05/1989"        "31/01/1944"        "10/09/1989"       
## [568] "14/03/1985"        "07/05/1974"        "19/02/1968"       
## [571] "29/08/1978"        "04/09/1980"        "12/03/1984"       
## [574] "13/06/1969"        "02/07/1979"        "03/09/1985"       
## [577] "10/10/1966"        "13/07/1953"        "19/08/1989"       
## [580] "22/07/1958"        "07/06/1986"        "12/02/1960"       
## [583] "12/04/1948"        "25/03/1990"        "26/06/2002"       
## [586] "27/12/1958"        "13/06/1973"        "03 Jan 1947"      
## [589] "17/10/1963"        "28/11/1940"        "16/12/1953"       
## [592] "11/01/1986"        "22/08/1979"        "08/03/1947"       
## [595] "15/09/1979"        "12/07/1942"        "28/03/1960"       
## [598] "31/10/1975"        "12/12/1992"        "29/04/1966"       
## [601] "24/11/1946"        "04/06/1981"        "15/08/1984"       
## [604] "25/12/1943"        "24/09/1972"        "18/04/1993"       
## [607] "1985"              "19/06/2005"        "22/11/1994"       
## [610] "28/12/1964"        "23/07/1986"        "15/11/1987"       
## [613] "13/02/2004"        "07/11/1976"        "03/12/1946"       
## [616] "28/12/1980"        "02/09/1997"        "27/02/1960"       
## [619] "15/05/1981"        "24/07/1949"        "15/07/1972"       
## [622] "04/09/1988"        "07/04/1962"        "26/06/1962"       
## [625] "06/02/1976"        "13/11/1968"        "24/09/1980"       
## [628] "02/10/2002"        "16/04/1996"        "19/07/1973"       
## [631] "26/07/1947"        "18/09/1997"        "28/08/2004"       
## [634] "29/04/1958"        "04/06/2001"        "06/03/2001"
data$penyakit <- trimws(data$penyakit)
data$penyakit <- tolower(data$penyakit)

data$penyakit <- recode(data$penyakit,
                        "diabetic" = "Diabetic",
                        "non-diabetic" = "Non-Diabetic")

table(data$penyakit)
## 
##                         1     Diabetic           dm      healthy           no 
##           45            2          297            5            1            3 
## Non-Diabetic       normal        sakit        sehat        tidak          yes 
##          337            1            1            1            3            2
remove_outlier <- function(x) {
  Q1 <- quantile(x, 0.25, na.rm = TRUE)
  Q3 <- quantile(x, 0.75, na.rm = TRUE)
  IQR_val <- Q3 - Q1

  lower <- Q1 - 1.5 * IQR_val
  upper <- Q3 + 1.5 * IQR_val

  x[x < lower | x > upper] <- median(x, na.rm = TRUE)

  return(x)
}

data$suhu_tubuh_celcius <- remove_outlier(data$suhu_tubuh_celcius)
head(data$suhu_tubuh_celcius)
## [1] 37.6 36.5 37.5 37.0 36.0 36.8
summary(data$suhu_tubuh_celcius)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.     NAs 
##   35.60   36.50   36.80   36.78   37.00   38.00      49
summary(data$skin_stiffness_n_per_mm)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -2.180   0.710   1.100   1.329   1.560 150.000
summary(data$microcirculation_pu)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -32.50   19.32   27.70   35.04   37.88 5000.00
summary(data$peak_plantar_pressure_k_pa)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -100.0   276.6   384.3   956.1   502.6 99999.0
summary(data$suhu_tubuh_celcius)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.     NAs 
##   35.60   36.50   36.80   36.78   37.00   38.00      49
boxplot(data$skin_stiffness_n_per_mm)

remove_outlier <- function(x) {
  Q1 <- quantile(x, 0.25, na.rm = TRUE)
  Q3 <- quantile(x, 0.75, na.rm = TRUE)
  IQR_val <- Q3 - Q1

  lower <- Q1 - 1.5 * IQR_val
  upper <- Q3 + 1.5 * IQR_val

  outliers <- sum(x < lower | x > upper, na.rm = TRUE)
  cat("Jumlah outlier:", outliers, "\n")

  x[x < lower | x > upper] <- median(x, na.rm = TRUE)

  return(x)
}
data$skin_stiffness_n_per_mm <- remove_outlier(data$skin_stiffness_n_per_mm)
## Jumlah outlier: 6
write.csv(data, "hospital_dataset_cleaned.csv", row.names = FALSE)

cat("File berhasil disimpan!\n")
## File berhasil disimpan!
summary(data)
##         nama       tanggal_lahir       tensi     skin_stiffness_n_per_mm
##  Length   :698   Length   :698   Length   :698   Min.   :-0.530         
##  N.unique : 59   N.unique :636   N.unique :547   1st Qu.: 0.720         
##  N.blank  : 40   N.blank  : 42   N.blank  : 47   Median : 1.100         
##  Min.nchar:  0   Min.nchar:  0   Min.nchar:  0   Mean   : 1.128         
##  Max.nchar: 16   Max.nchar: 17   Max.nchar: 13   3rd Qu.: 1.560         
##                                                  Max.   : 2.590         
##                                                                         
##  microcirculation_pu suhu_tubuh_celcius      penyakit  
##  Min.   : -32.50     Min.   :35.60      Length   :698  
##  1st Qu.:  19.32     1st Qu.:36.50      N.unique : 12  
##  Median :  27.70     Median :36.80      N.blank  : 45  
##  Mean   :  35.04     Mean   :36.78      Min.nchar:  0  
##  3rd Qu.:  37.88     3rd Qu.:37.00      Max.nchar: 12  
##  Max.   :5000.00     Max.   :38.00                     
##                      NAs    :49                        
##  peak_plantar_pressure_k_pa
##  Min.   : -100.0           
##  1st Qu.:  276.6           
##  Median :  384.3           
##  Mean   :  956.1           
##  3rd Qu.:  502.6           
##  Max.   :99999.0           
## 
head(data)
##               nama tanggal_lahir  tensi skin_stiffness_n_per_mm
## 1 Michael Anderson    01/04/1957 112/67                    0.69
## 2              N/A    20/09/1975 140/91                    1.50
## 3     Tan Wei Ming    12/04/1965 134/72                    0.76
## 4    Shen Yi-Ching    11/09/1980 120/79                    1.92
## 5     Kung Mei-Lin    22/08/1985  99/77                    0.81
## 6     Ho Chuan-Wei    10/08/1962 149/65                    0.61
##   microcirculation_pu suhu_tubuh_celcius     penyakit
## 1                42.0               37.6 Non-Diabetic
## 2                41.9               36.5 Non-Diabetic
## 3                26.3               37.5 Non-Diabetic
## 4                27.7               37.0     Diabetic
## 5                25.5               36.0     Diabetic
## 6                42.2               36.8 Non-Diabetic
##   peak_plantar_pressure_k_pa
## 1                      294.0
## 2                      384.3
## 3                      431.8
## 4                      577.5
## 5                      502.3
## 6                      201.4
str(data)
## 'data.frame':    698 obs. of  8 variables:
##  $ nama                      : chr  "Michael Anderson" "N/A" "Tan Wei Ming" "Shen Yi-Ching" ...
##  $ tanggal_lahir             : chr  "01/04/1957" "20/09/1975" "12/04/1965" "11/09/1980" ...
##  $ tensi                     : chr  "112/67" "140/91" "134/72" "120/79" ...
##  $ skin_stiffness_n_per_mm   : num  0.69 1.5 0.76 1.92 0.81 0.61 1.04 2.24 0.18 1.1 ...
##  $ microcirculation_pu       : num  42 41.9 26.3 27.7 25.5 42.2 2 9.5 24.8 40.9 ...
##  $ suhu_tubuh_celcius        : num  37.6 36.5 37.5 37 36 36.8 36.3 36.4 36.9 36.6 ...
##  $ penyakit                  : chr  "Non-Diabetic" "Non-Diabetic" "Non-Diabetic" "Diabetic" ...
##  $ peak_plantar_pressure_k_pa: num  294 384 432 578 502 ...