library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
data <- read.csv("hospital_dataset.csv")
colSums(is.na(data))
##                      Nama             Tanggal_Lahir                     Tensi 
##                         0                         0                         0 
##   Skin_Stiffness_N_per_mm       Microcirculation_PU        Suhu_Tubuh_Celcius 
##                        37                        50                         0 
##                  Penyakit Peak_Plantar_Pressure_kPa 
##                         0                        43
data[!complete.cases(data), ]
##                Nama Tanggal_Lahir    Tensi Skin_Stiffness_N_per_mm
## 2               N/A    20/09/1975 140 / 91                    1.50
## 4     Shen Yi-Ching    11/09/1980   120/79                    1.92
## 9     Joseph Garcia    06/12/1982                             0.18
## 10    Ong Lay Kheng    26/02/1951   128/78                      NA
## 11     Lin Mei-Ling    16/02/1944   113/75                    0.25
## 23   Barbara Taylor    07/11/1977   111/67                      NA
## 28    Charles Clark    22/09/2001   135/78                    1.26
## 32   William Thomas    30/11/1944   103/76                      NA
## 33    Fang Shu-Chen    10/05/1946   105/88                      NA
## 35     Tung Li-Fang    17/09/1972   129/90                    1.63
## 39           Pasien    08/06/1951   122/77                   -2.18
## 40          UNKNOWN                 129/80                    1.64
## 46    Fang Shu-Chen    29/07/1942   128/85                    0.79
## 48                                  122/69                    0.67
## 53    Jessica White    16/03/1947   101/78                    0.95
## 58    Susan Jackson    10/11/1959   119/83                    0.94
## 60    Huang Li-Chen                 106/74                    0.95
## 63   Barbara Taylor    10/01/1992   121/70                    0.10
## 74    Shen Yi-Ching    26/05/1992   115/74                      NA
## 79     Kung Mei-Lin    08/05/1959   117/83                    1.76
## 80     Kung Mei-Lin    17/05/1973   112/86                      NA
## 87     Kung Mei-Lin    01/10/1989   113/49                      NA
## 107    Yen Kuo-Jung    04/03/1946   119/93                    1.17
## 111   Hsu Kuo-Chang    28/03/1940   121/73                   -1.50
## 124   Ong Lay Kheng    14/04/1949   134/92                    1.75
## 125  Karen Thompson    24/11/1996 102 / 71                      NA
## 130  Tsai Chin-Lung    17/06/1989                             1.13
## 135   Liu Hsiao-Fen    23/07/1985   113/69                      NA
## 138   Liu Hsiao-Fen    29/05/1992   140/76                      NA
## 146   Yang Hsiu-Mei    20/05/1953   132/83                    1.74
## 164      John Smith    20/03/1980   132/92                      NA
## 172          123456          1945   149/83                    1.24
## 183  Nancy Robinson    17/03/1954   142/89                    1.74
## 204    Tung Li-Fang                 121/70                    0.56
## 209  Lu Hsiang-Ling    12/05/1990                             1.41
## 224  Richard Martin    07/03/1951    94/83                    1.06
## 225 Tseng Wen-Liang    19/04/1980   116/72                    0.16
## 227   Yang Hsiu-Mei    22/10/1990   117/95                    1.45
## 228   Susan Jackson                 157/67                    0.86
## 238  Nancy Robinson    19/12/1967   111/86                    1.07
## 241   Kao Chin-Feng    08/09/1961   104/87                    2.35
## 244   Charles Clark    26/11/1997    92/90                      NA
## 247  Lu Hsiang-Ling    20/04/1959   106/78                    0.57
## 251    Chiu Yu-Chin    16/06/1941   128/81                    0.69
## 256          Pasien    13/05/1968                             0.10
## 259 Liao Chih-Cheng    29/10/1941   116/85                    1.01
## 262      John Smith    20/03/1971                             1.56
## 273   Susan Jackson    06/07/1977   125/75                      NA
## 275  Karen Thompson                 132/84                      NA
## 278    Lin Mei-Ling    07/05/1950   111/77                      NA
## 280   Pan Mei-Hsuan    22/09/1940   124/92                    0.75
## 295                    08/12/1942   124/82                    1.57
## 303    David Harris    18/06/1961  133/100                      NA
## 304  Linda Martinez    10/11/2000                               NA
## 308  Richard Martin    15/08/1969                             1.42
## 309     Wu Ming-Hui    16/12/2002   136/92                    0.98
## 320    Tung Li-Fang    06/12/1950   143/78                    2.03
## 324   Cheng Shu-Fen    30/12/1963    94/89                      NA
## 338   Shen Yi-Ching    28/04/1947    97/71                      NA
## 350   Joseph Walker    24/04/2000                             2.12
## 353  Karen Thompson    04/03/1959   121/86                    1.42
## 357    Lin Mei-Ling    08/03/1956   126/88                    0.50
## 360      Tan Ah Kow                 137/81                    0.93
## 363  Lu Hsiang-Ling    29/10/1992   111/71                      NA
## 364                    08/03/1948   117/80                    1.10
## 366   Kao Chin-Feng    17/08/1997   138/77                    1.88
## 371   Ong Lay Kheng    15/04/1991   105/83                    2.00
## 383 Liao Chih-Cheng    16/12/1950    78/76                    0.88
## 389    Ho Chuan-Wei    30/03/1986   115/85                      NA
## 392    Kung Mei-Lin    14/09/1989   117/88                    1.61
## 394  Tsai Chin-Lung    01/05/1941   126/96                    1.76
## 400    Tan Wei Ming    25/11/1947   118/80                      NA
## 409      John Smith    12/12/1982   130/81                    0.65
## 410  Richard Martin    10/07/1966   112/74                    0.59
## 413   Ong Lay Kheng    23/01/1994   117/77                    0.48
## 417        Chen Wei    28/04/1954   123/58                      NA
## 427   Hsieh Shu-Hui    20/10/1998   118/64                    0.89
## 431   Shen Yi-Ching    26/02/1970   123/72                    1.31
## 442  Lu Hsiang-Ling    15/08/1996    91/97                      NA
## 462    Yen Kuo-Jung    21/01/2003   101/46                    0.92
## 470  Lu Hsiang-Ling    06/03/1950   132/75                    1.49
## 472   Fang Shu-Chen    29/07/1942   128/85                      NA
## 474   Liu Hsiao-Fen    16/05/1951   145/73                    0.41
## 475 Tseng Wen-Liang    10/02/1997   128/80                    0.95
## 481  Patricia Davis    07/11/1999   128/78                      NA
## 483      Helen Hall    23/08/1960                             0.95
## 486    Yen Kuo-Jung    09/11/1970   124/97                    0.55
## 487  William Thomas    06/01/1999   108/88                    1.76
## 490   Hsieh Shu-Hui    28/08/1970  146/100                    0.93
## 500      John Smith    31/03/1942   135/96                      NA
## 511    Ho Chuan-Wei    21/05/1982  130/107                    0.86
## 512   Hsu Kuo-Chang    17/05/1969   128/92                    1.27
## 517   Charles Clark    10/11/1944   119/67                      NA
## 523    Kung Mei-Lin                 116/84                    0.58
## 532    Tung Li-Fang    26/09/2004   115/82                      NA
## 535   Joseph Walker    25/08/1991   122-71                    1.56
## 536        Wang Jie    14/05/1991   138/82                    1.76
## 537    Tung Li-Fang    27/09/1965   132/76                    1.59
## 540 Chang Chung-Wei    23/05/1985   110/81                      NA
## 547    Chiu Yu-Chin    01/01/1969   134/76                    0.87
## 553 Chang Chung-Wei    30/07/1969   123/66                    0.84
## 555    Yen Kuo-Jung    27/07/1978                               NA
## 558     James Brown    03/07/1997   121/63                    1.42
## 560   Huang Li-Chen    23/06/1966   122/76                    1.66
## 577   Jessica White    18/08/1976   106/62                    0.87
## 580     Ng Boon Hua    21/02/1940   146/66                    0.42
## 587   Hsu Kuo-Chang    03/04/1975   121/56                    0.61
## 594     Wu Ming-Hui    17/07/2000   106/69                      NA
## 602   Huang Li-Chen    15/07/1960   140/93                      NA
## 606      Tan Ah Kow    08/01/1964   125/75                    0.41
## 607    Lee Siew Eng    29/05/1970   130/79                    2.30
## 611   Kao Chin-Feng    09/01/1962    95/88                    0.53
## 621  Barbara Taylor    10/09/1989   155/99                      NA
## 640  Richard Martin    26/06/2002   113/69                    0.32
## 648 Tseng Wen-Liang                 150/82                    0.10
## 656     James Brown    12/12/1992    86/80                    2.57
## 663         unknown    18/04/1993   145/88                    1.71
## 664   Robert Wilson          1985   127/87                    0.82
## 665        Wang Jie    19/06/2005   119/56                    0.66
## 672   Joseph Garcia                  88/83                      NA
## 677   Huang Li-Chen    27/02/1960    89/78                      NA
## 687    Tan Wei Ming    13/11/1968    96/53                    0.53
## 691   Yang Hsiu-Mei    16/04/1996   117/75                    2.31
## 692     James Brown    19/07/1973   103/92                      NA
##     Microcirculation_PU Suhu_Tubuh_Celcius     Penyakit
## 2                  41.9             36.5°C Non-Diabetic
## 4                    NA               37.0     Diabetic
## 9                  24.8               36.9 Non-Diabetic
## 10                 40.9               36.6 Non-Diabetic
## 11                 44.0        37.2celcius Non-Diabetic
## 23                 15.7               36.8     Diabetic
## 28                   NA               36.3 Non-Diabetic
## 32                 25.6               36.9 Non-Diabetic
## 33                  8.6               36.6     Diabetic
## 35                   NA               36.6     Diabetic
## 39                   NA               37.2     Diabetic
## 40                   NA               36.5     Diabetic
## 46                 52.8               36.3 Non-Diabetic
## 48                   NA               36.6 Non-Diabetic
## 53                 49.4               37.4 Non-Diabetic
## 58                   NA               37.1 Non-Diabetic
## 60                   NA               36.3 Non-Diabetic
## 63                   NA               37.4 Non-Diabetic
## 74                 36.5               37.2 Non-Diabetic
## 79                 19.3                        Diabetic
## 80                 45.1               37.0 Non-Diabetic
## 87                 37.4               36.4 Non-Diabetic
## 107                  NA               37.0        Tidak
## 111                  NA               36.8     Diabetic
## 124                19.5               36.4     Diabetic
## 125                28.5                        Diabetic
## 130                  NA               37.1 Non-Diabetic
## 135                33.0               37.1     Diabetic
## 138                 8.2               36.6     Diabetic
## 146                22.9               37.1     Diabetic
## 164                26.5               36.4     Diabetic
## 172                 4.7               36.7     Diabetic
## 183                28.7               36.8     Diabetic
## 204                  NA               36.2 Non-Diabetic
## 209                  NA                        Diabetic
## 224                  NA               36.4 Non-Diabetic
## 225                  NA               36.6 Non-Diabetic
## 227                21.8               36.9     Diabetic
## 228                  NA               42.5     Diabetic
## 238                36.3               37.3     Diabetic
## 241                22.2               36.9     Diabetic
## 244                42.4               37.2 Non-Diabetic
## 247                  NA               37.4 Non-Diabetic
## 251                41.4               37.2 Non-Diabetic
## 256                  NA               37.3 Non-Diabetic
## 259                  NA               36.8             
## 262                  NA                        Diabetic
## 273                40.1               36.9 Non-Diabetic
## 275                22.7               37.6     Diabetic
## 278                27.7             36.9°C Non-Diabetic
## 280                44.7               36.4           No
## 295                  NA               37.0 Non-Diabetic
## 303                  NA               37.0 Non-Diabetic
## 304                18.0               36.3     Diabetic
## 308                 1.0               36.6     Diabetic
## 309                29.2               37.6 Non-Diabetic
## 320                24.0               37.7     Diabetic
## 324                43.0               36.2 Non-Diabetic
## 338                36.6               36.8     Diabetic
## 350                  NA               36.6     Diabetic
## 353                  NA               37.2 Non-Diabetic
## 357                  NA               36.0 Non-Diabetic
## 360                  NA               36.5 Non-Diabetic
## 363                42.6               36.9 Non-Diabetic
## 364                  NA               36.3 Non-Diabetic
## 366                46.1               36.8     Diabetic
## 371                  NA                        Diabetic
## 383                  NA               36.4 Non-Diabetic
## 389                20.6               36.6     Diabetic
## 392                 3.9               36.4     Diabetic
## 394                  NA               37.4     Diabetic
## 400                 3.0               36.9             
## 409                34.3               36.6 Non-Diabetic
## 410                26.2               36.6 Non-Diabetic
## 413              5000.0               36.3 Non-Diabetic
## 417                  NA               36.5             
## 427                  NA               37.2 Non-Diabetic
## 431                  NA               37.2     Diabetic
## 442                15.3               36.6     Diabetic
## 462                47.0               -1.0 Non-Diabetic
## 470                  NA               36.7 Non-Diabetic
## 472                52.8               36.3 Non-Diabetic
## 474                  NA               36.4 Non-Diabetic
## 475                32.7               35.9             
## 481                28.4               36.8 Non-Diabetic
## 483                  NA               36.8 Non-Diabetic
## 486                26.2               36.8     Diabetic
## 487                  NA               36.5     Diabetic
## 490                 7.8               36.7     Diabetic
## 500                20.4                        Diabetic
## 511                  NA               37.1     Diabetic
## 512                  NA               36.8     Diabetic
## 517                43.4               36.4 Non-Diabetic
## 523                  NA                    Non-Diabetic
## 532                51.0               36.5 Non-Diabetic
## 535                43.8               36.5     diabetic
## 536                  NA               36.6     Diabetic
## 537                39.4               37.5             
## 540                38.2               36.9 Non-Diabetic
## 547                34.1               36.9 Non-Diabetic
## 553                  NA                    Non-Diabetic
## 555                37.9               37.2     Diabetic
## 558                 7.6               36.6     Diabetic
## 560                  NA               37.2     Diabetic
## 577                  NA               36.7 Non-Diabetic
## 580                45.6                    Non-Diabetic
## 587                15.8               37.3 Non-Diabetic
## 594                29.7               36.8 Non-Diabetic
## 602                13.9                        Diabetic
## 606                  NA               36.6 Non-Diabetic
## 607                  NA               36.6     Diabetic
## 611                63.6               36.6 Non-Diabetic
## 621                27.7               37.1     Diabetic
## 640                  NA               38.0 Non-Diabetic
## 648                39.9                    Non-Diabetic
## 656                25.8               36.8     Diabetic
## 663                 8.1               36.5            1
## 664                25.0               37.0 Non-Diabetic
## 665                  NA                    Non-Diabetic
## 672                40.4               37.6 Non-Diabetic
## 677                29.8               37.1 Non-Diabetic
## 687                25.8               36.6 Non-Diabetic
## 691                  NA               36.3     Diabetic
## 692                26.3               36.7 Non-Diabetic
##     Peak_Plantar_Pressure_kPa
## 2                          NA
## 4                       577.5
## 9                          NA
## 10                      308.9
## 11                         NA
## 23                      386.0
## 28                      173.3
## 32                      267.6
## 33                      415.3
## 35                      612.7
## 39                         NA
## 40                      667.4
## 46                         NA
## 48                      348.0
## 53                         NA
## 58                      351.0
## 60                      253.2
## 63                      354.1
## 74                      456.8
## 79                         NA
## 80                      310.4
## 87                      238.8
## 107                     475.2
## 111                     544.1
## 124                        NA
## 125                     710.6
## 130                     302.3
## 135                     495.6
## 138                     530.2
## 146                        NA
## 164                     571.2
## 172                        NA
## 183                        NA
## 204                     167.4
## 209                     350.6
## 224                     276.2
## 225                     364.6
## 227                        NA
## 228                     474.5
## 238                        NA
## 241                        NA
## 244                     198.3
## 247                     328.4
## 251                        NA
## 256                     114.1
## 259                     546.3
## 262                     409.9
## 273                     192.7
## 275                     607.6
## 278                     224.0
## 280                        NA
## 295                     205.9
## 303                     348.3
## 304                     536.0
## 308                        NA
## 309                        NA
## 320                        NA
## 324                     185.6
## 338                     400.9
## 350                     483.3
## 353                     276.9
## 357                     233.2
## 360                     354.2
## 363                     304.9
## 364                     196.6
## 366                        NA
## 371                     575.0
## 383                     286.9
## 389                     446.3
## 392                        NA
## 394                     555.8
## 400                     537.4
## 409                        NA
## 410                        NA
## 413                        NA
## 417                     328.9
## 427                        NA
## 431                     548.7
## 442                     547.1
## 462                        NA
## 470                     151.1
## 472                     264.5
## 474                     217.2
## 475                        NA
## 481                     287.5
## 483                     227.2
## 486                        NA
## 487                        NA
## 490                        NA
## 500                     537.8
## 511                     509.8
## 512                     429.0
## 517                     264.8
## 523                     390.5
## 532                     339.6
## 535                        NA
## 536                     463.3
## 537                        NA
## 540                     416.2
## 547                        NA
## 553                     364.9
## 555                     508.5
## 558                        NA
## 560                     615.9
## 577                     396.3
## 580                        NA
## 587                        NA
## 594                     222.7
## 602                     482.8
## 606                     348.5
## 607                     487.9
## 611                        NA
## 621                     609.5
## 640                     379.6
## 648                        NA
## 656                        NA
## 663                        NA
## 664                        NA
## 665                     238.5
## 672                        NA
## 677                     355.0
## 687                        NA
## 691                     573.4
## 692                     445.4
data_clean <- na.omit(data)
data$Skin_Stiffness_N_per_mm[is.na(data$Skin_Stiffness_N_per_mm)] <- 
  mean(data$Skin_Stiffness_N_per_mm, na.rm = TRUE)
sum(duplicated(data))
## [1] 2
data[duplicated(data), ]
##              Nama Tanggal_Lahir  Tensi Skin_Stiffness_N_per_mm
## 342   Betty Lewis    23/05/1988 121/96                    1.66
## 556 Hsieh Shu-Hui    15/01/1967  96/73                    1.43
##     Microcirculation_PU Suhu_Tubuh_Celcius Penyakit Peak_Plantar_Pressure_kPa
## 342                14.6               36.6 Diabetic                     475.8
## 556                36.9               37.0 Diabetic                     393.2
data <- data[!duplicated(data), ]
Q1 <- quantile(data$Skin_Stiffness_N_per_mm, 0.25, na.rm = TRUE)
Q3 <- quantile(data$Skin_Stiffness_N_per_mm, 0.75, na.rm = TRUE)
IQR_value <- IQR(data$Skin_Stiffness_N_per_mm, na.rm = TRUE)

lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value

data_no_outlier <- data %>%
  filter(Skin_Stiffness_N_per_mm >= lower_bound &
         Skin_Stiffness_N_per_mm <= upper_bound)

ggplot(data, aes(y = Skin_Stiffness_N_per_mm)) +
  geom_boxplot()

sum(duplicated(data))
## [1] 0
data[duplicated(data), ]
## [1] Nama                      Tanggal_Lahir            
## [3] Tensi                     Skin_Stiffness_N_per_mm  
## [5] Microcirculation_PU       Suhu_Tubuh_Celcius       
## [7] Penyakit                  Peak_Plantar_Pressure_kPa
## <0 rows> (or 0-length row.names)
data <- data[!duplicated(data), ]
Q1 <- quantile(data$Skin_Stiffness_N_per_mm, 0.25, na.rm = TRUE)
Q3 <- quantile(data$Skin_Stiffness_N_per_mm, 0.75, na.rm = TRUE)
IQR_value <- IQR(data$Skin_Stiffness_N_per_mm, na.rm = TRUE)

lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value

data_no_outlier <- data %>%
  filter(Skin_Stiffness_N_per_mm >= lower_bound &
         Skin_Stiffness_N_per_mm <= upper_bound)

ggplot(data, aes(y = Skin_Stiffness_N_per_mm)) +
  geom_boxplot()

unique(data$Penyakit)
##  [1] "Non-Diabetic" "Diabetic"     ""             "Sehat"        "Sakit"       
##  [6] "Tidak"        "Yes"          "No"           "NON-DIABETIC" "Normal"      
## [11] "DIABETIC"     "DM"           "diabetic"     "1"            "non-diabetic"
## [16] "Healthy"
data$Penyakit <- tolower(data$Penyakit)
data$Penyakit <- trimws(data$Penyakit)
data$Penyakit[data$Penyakit == "fluu"] <- "flu"
write.csv(data, "data_clean.csv", row.names = FALSE)