library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(lubridate)
data <- read.csv("hospital_dataset.csv", stringsAsFactors = FALSE)
str(data)
## 'data.frame': 700 obs. of 8 variables:
## $ Nama : chr "Michael Anderson" "N/A" "Tan Wei Ming" "Shen Yi-Ching" ...
## $ Tanggal_Lahir : chr "01/04/1957" "20/09/1975" "12/04/1965" "11/09/1980" ...
## $ Tensi : chr "112/67" "140 / 91" "134/72" "120/79" ...
## $ Skin_Stiffness_N_per_mm : num 0.69 1.5 0.76 1.92 0.81 0.61 1.04 2.24 0.18 NA ...
## $ Microcirculation_PU : num 42 41.9 26.3 NA 25.5 42.2 2 9.5 24.8 40.9 ...
## $ Suhu_Tubuh_Celcius : chr "37.6" "36.5°C" "37.5" "37.0" ...
## $ Penyakit : chr "Non-Diabetic" "Non-Diabetic" "Non-Diabetic" "Diabetic" ...
## $ Peak_Plantar_Pressure_kPa: num 294 NA 432 578 502 ...
head(data)
## Nama Tanggal_Lahir Tensi Skin_Stiffness_N_per_mm
## 1 Michael Anderson 01/04/1957 112/67 0.69
## 2 N/A 20/09/1975 140 / 91 1.50
## 3 Tan Wei Ming 12/04/1965 134/72 0.76
## 4 Shen Yi-Ching 11/09/1980 120/79 1.92
## 5 Kung Mei-Lin 22/08/1985 99/77 0.81
## 6 Ho Chuan-Wei 10/08/1962 149/65 0.61
## Microcirculation_PU Suhu_Tubuh_Celcius Penyakit Peak_Plantar_Pressure_kPa
## 1 42.0 37.6 Non-Diabetic 294.0
## 2 41.9 36.5°C Non-Diabetic NA
## 3 26.3 37.5 Non-Diabetic 431.8
## 4 NA 37.0 Diabetic 577.5
## 5 25.5 36.0 Diabetic 502.3
## 6 42.2 36.8 Non-Diabetic 201.4
data <- clean_names(data)
colSums(is.na(data))
## nama tanggal_lahir
## 0 0
## tensi skin_stiffness_n_per_mm
## 0 37
## microcirculation_pu suhu_tubuh_celcius
## 50 0
## penyakit peak_plantar_pressure_k_pa
## 0 43
data <- data %>%
mutate(across(where(is.character), ~trimws(.)))
data <- data %>%
mutate(across(where(is.numeric),
~ifelse(is.na(.), median(., na.rm = TRUE), .)))
sum(duplicated(data))
## [1] 2
data[duplicated(data), ]
## nama tanggal_lahir tensi skin_stiffness_n_per_mm
## 342 Betty Lewis 23/05/1988 121/96 1.66
## 556 Hsieh Shu-Hui 15/01/1967 96/73 1.43
## microcirculation_pu suhu_tubuh_celcius penyakit peak_plantar_pressure_k_pa
## 342 14.6 36.6 Diabetic 475.8
## 556 36.9 37.0 Diabetic 393.2
data <- data %>% distinct()
sum(duplicated(data))
## [1] 0
data$tensi <- gsub("\\s+", "", data$tensi)
head(data$tensi)
## [1] "112/67" "140/91" "134/72" "120/79" "99/77" "149/65"
data$suhu_tubuh_celcius <- gsub(",", ".", data$suhu_tubuh_celcius)
data$suhu_tubuh_celcius <- gsub("[^0-9.]", "", data$suhu_tubuh_celcius)
data$suhu_tubuh_celcius <- as.numeric(data$suhu_tubuh_celcius)
head(data$suhu_tubuh_celcius)
## [1] 37.6 36.5 37.5 37.0 36.0 36.8
data[is.na(data$suhu_tubuh_celcius), ]
## nama tanggal_lahir tensi skin_stiffness_n_per_mm
## 66 Ong Lay Kheng 16/03/1995 132/76 0.84
## 69 Ng Boon Hua 29/04/1990 114/79 2.07
## 79 Kung Mei-Lin 08/05/1959 117/83 1.76
## 82 Wu Ming-Hui 01/12/1991 136/69 0.33
## 97 Betty Lewis 23/03/1955 115/84 1.01
## 125 Karen Thompson 24/11/1996 102/71 1.10
## 147 Richard Martin 22/03/1949 118/79 1.06
## 163 Chou Mei-Yu 01/02/1992 0.53
## 175 Wang Jie 07/02/1945 135/64 1.42
## 187 22/09/1949 109/94 0.17
## 189 David Harris 26/01/1998 106/71 2.27
## 193 Charles Clark 12/08/1944 145/84 0.94
## 195 Patricia Davis 08/05/1972 110/81 1.28
## 206 Hsu Kuo-Chang 113/98 1.50
## 208 Jessica White 25/05/1998 119/85 2.22
## 209 Lu Hsiang-Ling 12/05/1990 1.41
## 262 John Smith 20/03/1971 1.56
## 284 Ng Boon Hua 126/76 0.97
## 311 Pan Mei-Hsuan 03/07/1981 115/87 1.29
## 318 Tan Wei Ming 21/10/1965 1.74
## 337 Richard Martin 06/04/1950 0.68
## 353 Lu Hsiang-Ling 17/11/1997 139/84 0.99
## 370 Ong Lay Kheng 15/04/1991 105/83 2.00
## 387 Mary Johnson 08/11/1975 128/76 1.04
## 410 Joseph Walker 13/10/1942 107/63 1.82
## 425 Yen Kuo-Jung 11/05/2005 102/83 0.85
## 443 William Thomas 18/06/1962 115/84 1.07
## 494 Helen Hall 17/12/1965 122/85 1.59
## 499 John Smith 31/03/1942 135/96 1.10
## 508 27/01/1996 117/78 0.63
## 522 Kung Mei-Lin 116/84 0.58
## 542 Kung Mei-Lin 26/07/1965 129/69 1.97
## 544 Hsu Kuo-Chang 13/07/1954 127/69 1.42
## 548 James Brown 05/04/1993 120/81 0.28
## 552 Chang Chung-Wei 30/07/1969 123/66 0.84
## 570 Lu Hsiang-Ling 17/05/1964 92/75 0.35
## 572 Ong Lay Kheng 06/01/1987 114/67 1.10
## 578 Ng Boon Hua 21/02/1940 146/66 0.42
## 587 01/12/1998 148/79 1.50
## 595 105/79 1.28
## 599 Huang Li-Chen 26/08/1961 132/76 1.13
## 600 Huang Li-Chen 15/07/1960 140/93 1.10
## 607 Lu Hsiang-Ling 23/12/1980 133/103 0.91
## 611 Chiu Yu-Chin 05/09/1944 114/69 1.99
## 646 Tseng Wen-Liang 150/82 0.10
## 663 Wang Jie 19/06/2005 119/56 0.66
## 669 Wu Ming-Hui 13/02/2004 122/90 1.10
## 686 Joseph Garcia 24/09/1980 122/89 0.10
## 693 Liu Hsiao-Fen 120/89 1.73
## microcirculation_pu suhu_tubuh_celcius penyakit
## 66 48.7 NA Non-Diabetic
## 69 19.9 NA Diabetic
## 79 19.3 NA Diabetic
## 82 43.4 NA Non-Diabetic
## 97 22.2 NA Non-Diabetic
## 125 28.5 NA Diabetic
## 147 8.8 NA Diabetic
## 163 41.8 NA Non-Diabetic
## 175 53.2 NA
## 187 16.0 NA Non-Diabetic
## 189 8.5 NA Diabetic
## 193 41.7 NA Non-Diabetic
## 195 6.9 NA Diabetic
## 206 18.0 NA Diabetic
## 208 26.5 NA Diabetic
## 209 27.7 NA Diabetic
## 262 27.7 NA Diabetic
## 284 28.8 NA DIABETIC
## 311 1.0 NA
## 318 13.1 NA Diabetic
## 337 55.1 NA Non-Diabetic
## 353 16.0 NA Diabetic
## 370 27.7 NA Diabetic
## 387 37.2 NA Non-Diabetic
## 410 25.6 NA Diabetic
## 425 32.4 NA Non-Diabetic
## 443 20.5 NA Non-Diabetic
## 494 6.3 NA Diabetic
## 499 20.4 NA Diabetic
## 508 51.6 NA Non-Diabetic
## 522 27.7 NA Non-Diabetic
## 542 16.2 NA Diabetic
## 544 18.9 NA Non-Diabetic
## 548 29.0 NA Non-Diabetic
## 552 27.7 NA Non-Diabetic
## 570 27.0 NA Non-Diabetic
## 572 45.2 NA Non-Diabetic
## 578 45.6 NA Non-Diabetic
## 587 15.0 NA
## 595 17.4 NA Diabetic
## 599 1.0 NA Diabetic
## 600 13.9 NA Diabetic
## 607 32.9 NA Non-Diabetic
## 611 1.0 NA
## 646 39.9 NA Non-Diabetic
## 663 27.7 NA Non-Diabetic
## 669 27.2 NA Non-Diabetic
## 686 39.2 NA Non-Diabetic
## 693 21.2 NA DM
## peak_plantar_pressure_k_pa
## 66 352.3
## 69 538.7
## 79 384.3
## 82 337.5
## 97 390.4
## 125 710.6
## 147 99999.0
## 163 274.7
## 175 422.5
## 187 239.8
## 189 591.0
## 193 125.8
## 195 517.0
## 206 601.8
## 208 537.1
## 209 350.6
## 262 409.9
## 284 166.9
## 311 503.8
## 318 502.7
## 337 150.9
## 353 514.0
## 370 575.0
## 387 120.1
## 410 292.7
## 425 349.8
## 443 147.0
## 494 560.2
## 499 537.8
## 508 298.5
## 522 390.5
## 542 583.9
## 544 262.1
## 548 204.1
## 552 364.9
## 570 391.4
## 572 388.4
## 578 384.3
## 587 630.1
## 595 463.4
## 599 641.8
## 600 482.8
## 607 138.5
## 611 610.9
## 646 384.3
## 663 238.5
## 669 305.1
## 686 333.7
## 693 377.2
unique(data$tanggal_lahir)
## [1] "01/04/1957" "20/09/1975" "12/04/1965"
## [4] "11/09/1980" "22/08/1985" "10/08/1962"
## [7] "18/01/1994" "02/08/1982" "06/12/1982"
## [10] "26/02/1951" "16/02/1944" ""
## [13] "03/10/1946" "02/11/1957" "18/03/1973"
## [16] "04/07/1964" "1967" "08/02/1988"
## [19] "02/05/1996" "24/02/1988" "25/08/1987"
## [22] "19/11/1946" "07/11/1977" "06/03/1982"
## [25] "19/02/1969" "05/11/1965" "07/07/1985"
## [28] "22/09/2001" "29/12/2001" "05/11/2001"
## [31] "30/04/1989" "30/11/1944" "10/05/1946"
## [34] "17/11/1942" "17/09/1972" "18/08/1971"
## [37] "19/05/1988" "01/02/1951" "08/06/1951"
## [40] "12/11/1940" "03/05/1993" "16/05/1970"
## [43] "26/09/1951" "12/05/1992" "29/07/1942"
## [46] "11/07/1989" "16/09/1967" "11/01/1958"
## [49] "05/10/1970" "03/08/1942" "16/03/1947"
## [52] "03/10/1989" "04/03/1992" "23/04/1952"
## [55] "27/11/2005" "10/11/1959" "1977"
## [58] "17/03/1966" "10/01/1992" "1980"
## [61] "20-02-2003" "16/03/1995" "31/10/1971"
## [64] "27/07/1989" "29/04/1990" "06/11/1991"
## [67] "05/12/1964" "26/01/2001" "26/05/1992"
## [70] "17/02/1954" "23/12/1943" "06/09/1941"
## [73] "20/11/1952" "08/05/1959" "17/05/1973"
## [76] "30/10/1983" "01/12/1991" "22/04/1999"
## [79] "07/06/1982" "01/10/1985" "27/07/1978"
## [82] "01/10/1989" "22/04/1962" "21/06/1978"
## [85] "29/10/1995" "19/11/1990" "11/10/1994"
## [88] "27/04/1984" "19/09/1974" "21/06/1981"
## [91] "23/03/1955" "02/09/1976" "18/04/1954"
## [94] "24/08/1968" "24/03/1962" "13/11/1962"
## [97] "16/08/1975" "26/03/1988" "05/08/1993"
## [100] "01/01/1995" "04/03/1946" "15/01/1967"
## [103] "04/05/1963" "29/04/1999" "28/03/1940"
## [106] "06/02/2000" "29/04/1951" "09/12/1960"
## [109] "28/03/1953" "08/06/1969" "09/05/1963"
## [112] "05/07/1967" "29/09/1978" "17/11/1976"
## [115] "12/11/1953" "14/04/1949" "24/11/1996"
## [118] "06/03/1970" "26/03/1995" "03/02/1992"
## [121] "31/05/2005" "17/06/1989" "17/11/1952"
## [124] "14/11/1957" "20/08/1979" "23/07/1985"
## [127] "13/04/1967" "11/11/1944" "29/05/1992"
## [130] "07/04/1979" "09/09/1956" "29/06/1941"
## [133] "April 10, 1989" "06/01/1985" "05/03/1952"
## [136] "04/06/1961" "20/05/1953" "22/03/1949"
## [139] "28/01/1979" "24/04/1947" "30/07/1982"
## [142] "18/09/1948" "11/04/1981" "13/03/1970"
## [145] "16/06/1961" "17/06/1999" "02/09/1998"
## [148] "24/10/1972" "14/09/1980" "18/12/1996"
## [151] "01/02/1992" "20/03/1980" "09/03/2005"
## [154] "25/06/1965" "14-12-1963" "13/04/1954"
## [157] "06/11/1953" "03/09/1948" "1945"
## [160] "23/09/1953" "23-08-1998" "07/02/1945"
## [163] "26/11/1957" "15/02/1990" "25/04/1992"
## [166] "21/01/1947" "09/11/1951" "07/04/2005"
## [169] "19/01/1999" "17/03/1954" "19/07/1941"
## [172] "27/06/1962" "22/09/1949" "09/09/1999"
## [175] "26/01/1998" "13/08/1946" "22/10/1996"
## [178] "24/12/1952" "12/08/1944" "09/11/1998"
## [181] "08/05/1972" "06/01/1973" "11/09/1943"
## [184] "02/21/1995" "06/06/1995" "11/06/1992"
## [187] "12/04/1970" "22/11/1999" "25/05/1998"
## [190] "12/05/1990" "12/09/1957" "17/09/1982"
## [193] "24/11/2005" "12/01/1968" "28/08/1979"
## [196] "11/07/1947" "27/03/1986" "21/06/1998"
## [199] "20/10/1955" "29/04/1997" "21/01/1993"
## [202] "12/11/1957" "07/03/1951" "19/04/1980"
## [205] "03/02/1973" "22/10/1990" "20/01/2003"
## [208] "02/06/1951" "05-05-1994" "04/10/1959"
## [211] "04/04/2002" "23/04/1991" "11/04/1994"
## [214] "February 16, 1961" "19/12/1967" "29/11/1992"
## [217] "19/08/1955" "08/09/1961" "15/04/2004"
## [220] "26/11/1997" "21/08/1978" "04/12/1954"
## [223] "20/04/1959" "29/07/1990" "13/08/2001"
## [226] "29/11/1958" "16/06/1941" "06/06/1973"
## [229] "30/11/1957" "19/04/2003" "18/07/1985"
## [232] "13/05/1968" "17/05/1972" "20/09/1979"
## [235] "29/10/1941" "13/12/1946" "20/03/1971"
## [238] "02/07/1990" "05/11/1958" "14/04/1986"
## [241] "18/09/1995" "02/24/2002" "05/07/1980"
## [244] "31/05/1959" "26/07/1949" "02/12/1972"
## [247] "11/02/1980" "06/07/1977" "12/07/1940"
## [250] "14/05/1953" "23/05/1988" "07/05/1950"
## [253] "07/03/1977" "22/09/1940" "11/11/1987"
## [256] "19/11/1955" "22/09/1952" "10/04/1980"
## [259] "03/02/1956" "04/12/1988" "28/05/1948"
## [262] "11/02/1984" "13/12/1975" "19/10/1981"
## [265] "15/09/1955" "04/10/1956" "14/06/2001"
## [268] "08/12/1942" "26/08/1981" "24/06/1994"
## [271] "19/07/2002" "17/11/1981" "12/01/1992"
## [274] "07 Nov 2004" "25/03/1996" "18/06/1961"
## [277] "10/11/2000" "23/02/1946" "24/11/1984"
## [280] "08/09/1959" "15/08/1969" "16/12/2002"
## [283] "14/08/1984" "03/07/1981" "16/12/1963"
## [286] "02/01/1980" "20/07/1985" "23/02/1944"
## [289] "11/12/1949" "21/10/1965" "28/08/1991"
## [292] "06/12/1950" "10/09/1954" "27/08/1987"
## [295] "30/12/1963" "17/05/1965" "16/09/1949"
## [298] "02/06/1981" "01/01/1969" "15/04/1977"
## [301] "17/11/1997" "19/03/1970" "07/11/1967"
## [304] "27/03/1974" "23/10/1988" "27/04/1966"
## [307] "06/04/1950" "28/04/1947" "03/05/1985"
## [310] "29/10/1961" "20/06/1969" "13/01/1998"
## [313] "12/01/1987" "20/10/1968" "25/09/1988"
## [316] "17/05/1945" "24/04/2000" "07/01/2000"
## [319] "04/03/1959" "11/01/1956" "19/12/1950"
## [322] "08/03/1956" "03/08/1969" "02 Feb 1967"
## [325] "27/11/1962" "05/03/1991" "29/10/1992"
## [328] "08/03/1948" "31/12/1973" "17/08/1997"
## [331] "23/01/1961" "13/12/1998" "14/02/1942"
## [334] "15/04/1991" "05/02/1957" "02/09/1964"
## [337] "12/03/1959" "22/12/1972" "27/07/1983"
## [340] "26/10/1977" "01/06/1997" "24/09/1951"
## [343] "03/07/2003" "01/02/1968" "16/12/1950"
## [346] "10/11/1982" "04/07/1998" "03/03/1971"
## [349] "24/01/1992" "08/11/1975" "30/03/1986"
## [352] "16/07/1950" "13/10/1959" "14/09/1989"
## [355] "02/01/2005" "01/05/1941" "21/08/1952"
## [358] "30/10/1963" "28/05/1957" "07/12/1970"
## [361] "19/05/1945" "25/11/1947" "02/09/1978"
## [364] "18/12/1985" "21/04/1958" "24/03/1941"
## [367] "02/12/1969" "02/06/1994" "10/03/1999"
## [370] "02/03/2001" "12/12/1982" "10/07/1966"
## [373] "13/10/1942" "16/07/1956" "23/01/1994"
## [376] "14/04/2003" "03/01/1990" "28/04/1954"
## [379] "11/02/1988" "15/11/1977" "31/05/1953"
## [382] "06/09/1972" "25/08/1991" "30 Jan 1980"
## [385] "13/10/1964" "21/05/1999" "11/05/2005"
## [388] "20/10/1998" "07/03/1979" "10/05/1950"
## [391] "08/09/1980" "26/02/1970" "05/08/1955"
## [394] "19/07/1940" "09/12/1975" "18/10/1973"
## [397] "12/02/1993" "30/09/1967" "22/03/1956"
## [400] "19/07/1979" "09/08/1999" "15/08/1996"
## [403] "18/06/1962" "03/11/1994" "20/01/1963"
## [406] "19/08/1984" "20/03/1947" "01/05/1970"
## [409] "31/05/1957" "17/04/1952" "15/02/2000"
## [412] "31/01/1955" "14/04/1947" "06 May 1947"
## [415] "10/12/1952" "17/07/1999" "25/11/1990"
## [418] "20/12/1944" "08/29/1959" "17/06/1948"
## [421] "21/01/2003" "05/12/1975" "11/10/1944"
## [424] "02/11/1966" "24/12/1975" "01/02/1979"
## [427] "24/03/1948" "06/03/1950" "19/05/1967"
## [430] "22/10/1954" "16/05/1951" "10/02/1997"
## [433] "05/12/1963" "22/04/1979" "14/09/1941"
## [436] "15/01/1984" "09-01-1941" "07/11/1999"
## [439] "23/08/1960" "21/07/2001" "26/01/1961"
## [442] "09/11/1970" "06/01/1999" "28/05/1946"
## [445] "02/12/1990" "28/08/1970" "03/02/1988"
## [448] "22/05/1994" "13/04/1976" "13 Jan 2001"
## [451] "17/12/1965" "09/02/1948" "05/04/1976"
## [454] "18/11/1959" "06/10/1953" "31/03/1942"
## [457] "22/05/1970" "17/06/1970" "31/07/1987"
## [460] "22/04/1953" "22/05/1990" "24/03/1960"
## [463] "15/01/1965" "28/04/1960" "27/01/1996"
## [466] "14/07/1953" "21/05/1982" "17/05/1969"
## [469] "10/05/1947" "21/08/1993" "28/04/2001"
## [472] "26/10/1967" "10/11/1944" "22/10/1952"
## [475] "28/06/1955" "18/08/1979" "17/05/1988"
## [478] "24/07/1984" "06/12/1958" "04/04/2000"
## [481] "13/03/1991" "12/04/1988" "22/06/1943"
## [484] "15/12/1943" "09/08/1944" "15/01/2001"
## [487] "26/09/2004" "06/04/1996" "16/03/1961"
## [490] "14/05/1991" "27/09/1965" "13/01/1950"
## [493] "01/04/1941" "23/05/1985" "28/10/1969"
## [496] "31/03/1945" "26/07/1965" "26/07/1976"
## [499] "13/07/1954" "20/09/1967" "07/10/1986"
## [502] "05/04/1993" "09/05/1985" "23/08/1990"
## [505] "15/03/1992" "30/07/1969" "21/06/1982"
## [508] "05/12/1942" "03/07/1997" "14/06/1979"
## [511] "23/06/1966" "10/01/1972" "24/02/2000"
## [514] "18/09/1953" "12/09/1941" "14/03/1951"
## [517] "24/04/1956" "12/11/1993" "17/04/1949"
## [520] "03/05/1977" "31/10/1967" "17/05/1964"
## [523] "23/01/1980" "06/01/1987" "09/09/1981"
## [526] "31/01/1978" "18/08/1976" "13/02/1954"
## [529] "26/08/1990" "21/02/1940" "25/11/1948"
## [532] "28/06/1995" "23/09/1991" "22/11/1978"
## [535] "21/04/1998" "22/02/2002" "03/04/1975"
## [538] "01/12/1998" "27/04/1987" "06/05/1971"
## [541] "15/02/1946" "17/07/2000" "28/06/1989"
## [544] "17/05/1959" "31/08/1977" "13/09/1985"
## [547] "15/06/1972" "26/08/1961" "15/07/1960"
## [550] "06/08/1967" "1988" "26/03/1943"
## [553] "08/01/1964" "29/05/1970" "10/09/1978"
## [556] "23/12/1980" "18/11/1986" "09/01/1962"
## [559] "05/03/1976" "05/09/1944" "18/11/2001"
## [562] "08/07/1952" "27/12/1974" "13/01/1966"
## [565] "18/05/1989" "31/01/1944" "10/09/1989"
## [568] "14/03/1985" "07/05/1974" "19/02/1968"
## [571] "29/08/1978" "04/09/1980" "12/03/1984"
## [574] "13/06/1969" "02/07/1979" "03/09/1985"
## [577] "10/10/1966" "13/07/1953" "19/08/1989"
## [580] "22/07/1958" "07/06/1986" "12/02/1960"
## [583] "12/04/1948" "25/03/1990" "26/06/2002"
## [586] "27/12/1958" "13/06/1973" "03 Jan 1947"
## [589] "17/10/1963" "28/11/1940" "16/12/1953"
## [592] "11/01/1986" "22/08/1979" "08/03/1947"
## [595] "15/09/1979" "12/07/1942" "28/03/1960"
## [598] "31/10/1975" "12/12/1992" "29/04/1966"
## [601] "24/11/1946" "04/06/1981" "15/08/1984"
## [604] "25/12/1943" "24/09/1972" "18/04/1993"
## [607] "1985" "19/06/2005" "22/11/1994"
## [610] "28/12/1964" "23/07/1986" "15/11/1987"
## [613] "13/02/2004" "07/11/1976" "03/12/1946"
## [616] "28/12/1980" "02/09/1997" "27/02/1960"
## [619] "15/05/1981" "24/07/1949" "15/07/1972"
## [622] "04/09/1988" "07/04/1962" "26/06/1962"
## [625] "06/02/1976" "13/11/1968" "24/09/1980"
## [628] "02/10/2002" "16/04/1996" "19/07/1973"
## [631] "26/07/1947" "18/09/1997" "28/08/2004"
## [634] "29/04/1958" "04/06/2001" "06/03/2001"
data$penyakit <- trimws(data$penyakit)
data$penyakit <- tolower(data$penyakit)
data$penyakit <- recode(data$penyakit,
"diabetic" = "Diabetic",
"non-diabetic" = "Non-Diabetic")
table(data$penyakit)
##
## 1 Diabetic dm healthy no
## 45 2 297 5 1 3
## Non-Diabetic normal sakit sehat tidak yes
## 337 1 1 1 3 2
remove_outlier <- function(x) {
Q1 <- quantile(x, 0.25, na.rm = TRUE)
Q3 <- quantile(x, 0.75, na.rm = TRUE)
IQR_val <- Q3 - Q1
lower <- Q1 - 1.5 * IQR_val
upper <- Q3 + 1.5 * IQR_val
x[x < lower | x > upper] <- median(x, na.rm = TRUE)
return(x)
}
data$suhu_tubuh_celcius <- remove_outlier(data$suhu_tubuh_celcius)
head(data$suhu_tubuh_celcius)
## [1] 37.6 36.5 37.5 37.0 36.0 36.8
summary(data$suhu_tubuh_celcius)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NAs
## 35.60 36.50 36.80 36.78 37.00 38.00 49
summary(data$skin_stiffness_n_per_mm)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.180 0.710 1.100 1.329 1.560 150.000
summary(data$microcirculation_pu)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -32.50 19.32 27.70 35.04 37.88 5000.00
summary(data$peak_plantar_pressure_k_pa)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -100.0 276.6 384.3 956.1 502.6 99999.0
summary(data$suhu_tubuh_celcius)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NAs
## 35.60 36.50 36.80 36.78 37.00 38.00 49
boxplot(data$skin_stiffness_n_per_mm)

remove_outlier <- function(x) {
Q1 <- quantile(x, 0.25, na.rm = TRUE)
Q3 <- quantile(x, 0.75, na.rm = TRUE)
IQR_val <- Q3 - Q1
lower <- Q1 - 1.5 * IQR_val
upper <- Q3 + 1.5 * IQR_val
outliers <- sum(x < lower | x > upper, na.rm = TRUE)
cat("Jumlah outlier:", outliers, "\n")
x[x < lower | x > upper] <- median(x, na.rm = TRUE)
return(x)
}
data$skin_stiffness_n_per_mm <- remove_outlier(data$skin_stiffness_n_per_mm)
## Jumlah outlier: 6
write.csv(data, "hospital_dataset_cleaned.csv", row.names = FALSE)
cat("File berhasil disimpan!\n")
## File berhasil disimpan!
summary(data)
## nama tanggal_lahir tensi skin_stiffness_n_per_mm
## Length :698 Length :698 Length :698 Min. :-0.530
## N.unique : 59 N.unique :636 N.unique :547 1st Qu.: 0.720
## N.blank : 40 N.blank : 42 N.blank : 47 Median : 1.100
## Min.nchar: 0 Min.nchar: 0 Min.nchar: 0 Mean : 1.128
## Max.nchar: 16 Max.nchar: 17 Max.nchar: 13 3rd Qu.: 1.560
## Max. : 2.590
##
## microcirculation_pu suhu_tubuh_celcius penyakit
## Min. : -32.50 Min. :35.60 Length :698
## 1st Qu.: 19.32 1st Qu.:36.50 N.unique : 12
## Median : 27.70 Median :36.80 N.blank : 45
## Mean : 35.04 Mean :36.78 Min.nchar: 0
## 3rd Qu.: 37.88 3rd Qu.:37.00 Max.nchar: 12
## Max. :5000.00 Max. :38.00
## NAs :49
## peak_plantar_pressure_k_pa
## Min. : -100.0
## 1st Qu.: 276.6
## Median : 384.3
## Mean : 956.1
## 3rd Qu.: 502.6
## Max. :99999.0
##
head(data)
## nama tanggal_lahir tensi skin_stiffness_n_per_mm
## 1 Michael Anderson 01/04/1957 112/67 0.69
## 2 N/A 20/09/1975 140/91 1.50
## 3 Tan Wei Ming 12/04/1965 134/72 0.76
## 4 Shen Yi-Ching 11/09/1980 120/79 1.92
## 5 Kung Mei-Lin 22/08/1985 99/77 0.81
## 6 Ho Chuan-Wei 10/08/1962 149/65 0.61
## microcirculation_pu suhu_tubuh_celcius penyakit
## 1 42.0 37.6 Non-Diabetic
## 2 41.9 36.5 Non-Diabetic
## 3 26.3 37.5 Non-Diabetic
## 4 27.7 37.0 Diabetic
## 5 25.5 36.0 Diabetic
## 6 42.2 36.8 Non-Diabetic
## peak_plantar_pressure_k_pa
## 1 294.0
## 2 384.3
## 3 431.8
## 4 577.5
## 5 502.3
## 6 201.4
str(data)
## 'data.frame': 698 obs. of 8 variables:
## $ nama : chr "Michael Anderson" "N/A" "Tan Wei Ming" "Shen Yi-Ching" ...
## $ tanggal_lahir : chr "01/04/1957" "20/09/1975" "12/04/1965" "11/09/1980" ...
## $ tensi : chr "112/67" "140/91" "134/72" "120/79" ...
## $ skin_stiffness_n_per_mm : num 0.69 1.5 0.76 1.92 0.81 0.61 1.04 2.24 0.18 1.1 ...
## $ microcirculation_pu : num 42 41.9 26.3 27.7 25.5 42.2 2 9.5 24.8 40.9 ...
## $ suhu_tubuh_celcius : num 37.6 36.5 37.5 37 36 36.8 36.3 36.4 36.9 36.6 ...
## $ penyakit : chr "Non-Diabetic" "Non-Diabetic" "Non-Diabetic" "Diabetic" ...
## $ peak_plantar_pressure_k_pa: num 294 384 432 578 502 ...