library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
data <- read.csv("hospital_dataset.csv")
colSums(is.na(data))
## Nama Tanggal_Lahir Tensi
## 0 0 0
## Skin_Stiffness_N_per_mm Microcirculation_PU Suhu_Tubuh_Celcius
## 37 50 0
## Penyakit Peak_Plantar_Pressure_kPa
## 0 43
data[!complete.cases(data), ]
## Nama Tanggal_Lahir Tensi Skin_Stiffness_N_per_mm
## 2 N/A 20/09/1975 140 / 91 1.50
## 4 Shen Yi-Ching 11/09/1980 120/79 1.92
## 9 Joseph Garcia 06/12/1982 0.18
## 10 Ong Lay Kheng 26/02/1951 128/78 NA
## 11 Lin Mei-Ling 16/02/1944 113/75 0.25
## 23 Barbara Taylor 07/11/1977 111/67 NA
## 28 Charles Clark 22/09/2001 135/78 1.26
## 32 William Thomas 30/11/1944 103/76 NA
## 33 Fang Shu-Chen 10/05/1946 105/88 NA
## 35 Tung Li-Fang 17/09/1972 129/90 1.63
## 39 Pasien 08/06/1951 122/77 -2.18
## 40 UNKNOWN 129/80 1.64
## 46 Fang Shu-Chen 29/07/1942 128/85 0.79
## 48 122/69 0.67
## 53 Jessica White 16/03/1947 101/78 0.95
## 58 Susan Jackson 10/11/1959 119/83 0.94
## 60 Huang Li-Chen 106/74 0.95
## 63 Barbara Taylor 10/01/1992 121/70 0.10
## 74 Shen Yi-Ching 26/05/1992 115/74 NA
## 79 Kung Mei-Lin 08/05/1959 117/83 1.76
## 80 Kung Mei-Lin 17/05/1973 112/86 NA
## 87 Kung Mei-Lin 01/10/1989 113/49 NA
## 107 Yen Kuo-Jung 04/03/1946 119/93 1.17
## 111 Hsu Kuo-Chang 28/03/1940 121/73 -1.50
## 124 Ong Lay Kheng 14/04/1949 134/92 1.75
## 125 Karen Thompson 24/11/1996 102 / 71 NA
## 130 Tsai Chin-Lung 17/06/1989 1.13
## 135 Liu Hsiao-Fen 23/07/1985 113/69 NA
## 138 Liu Hsiao-Fen 29/05/1992 140/76 NA
## 146 Yang Hsiu-Mei 20/05/1953 132/83 1.74
## 164 John Smith 20/03/1980 132/92 NA
## 172 123456 1945 149/83 1.24
## 183 Nancy Robinson 17/03/1954 142/89 1.74
## 204 Tung Li-Fang 121/70 0.56
## 209 Lu Hsiang-Ling 12/05/1990 1.41
## 224 Richard Martin 07/03/1951 94/83 1.06
## 225 Tseng Wen-Liang 19/04/1980 116/72 0.16
## 227 Yang Hsiu-Mei 22/10/1990 117/95 1.45
## 228 Susan Jackson 157/67 0.86
## 238 Nancy Robinson 19/12/1967 111/86 1.07
## 241 Kao Chin-Feng 08/09/1961 104/87 2.35
## 244 Charles Clark 26/11/1997 92/90 NA
## 247 Lu Hsiang-Ling 20/04/1959 106/78 0.57
## 251 Chiu Yu-Chin 16/06/1941 128/81 0.69
## 256 Pasien 13/05/1968 0.10
## 259 Liao Chih-Cheng 29/10/1941 116/85 1.01
## 262 John Smith 20/03/1971 1.56
## 273 Susan Jackson 06/07/1977 125/75 NA
## 275 Karen Thompson 132/84 NA
## 278 Lin Mei-Ling 07/05/1950 111/77 NA
## 280 Pan Mei-Hsuan 22/09/1940 124/92 0.75
## 295 08/12/1942 124/82 1.57
## 303 David Harris 18/06/1961 133/100 NA
## 304 Linda Martinez 10/11/2000 NA
## 308 Richard Martin 15/08/1969 1.42
## 309 Wu Ming-Hui 16/12/2002 136/92 0.98
## 320 Tung Li-Fang 06/12/1950 143/78 2.03
## 324 Cheng Shu-Fen 30/12/1963 94/89 NA
## 338 Shen Yi-Ching 28/04/1947 97/71 NA
## 350 Joseph Walker 24/04/2000 2.12
## 353 Karen Thompson 04/03/1959 121/86 1.42
## 357 Lin Mei-Ling 08/03/1956 126/88 0.50
## 360 Tan Ah Kow 137/81 0.93
## 363 Lu Hsiang-Ling 29/10/1992 111/71 NA
## 364 08/03/1948 117/80 1.10
## 366 Kao Chin-Feng 17/08/1997 138/77 1.88
## 371 Ong Lay Kheng 15/04/1991 105/83 2.00
## 383 Liao Chih-Cheng 16/12/1950 78/76 0.88
## 389 Ho Chuan-Wei 30/03/1986 115/85 NA
## 392 Kung Mei-Lin 14/09/1989 117/88 1.61
## 394 Tsai Chin-Lung 01/05/1941 126/96 1.76
## 400 Tan Wei Ming 25/11/1947 118/80 NA
## 409 John Smith 12/12/1982 130/81 0.65
## 410 Richard Martin 10/07/1966 112/74 0.59
## 413 Ong Lay Kheng 23/01/1994 117/77 0.48
## 417 Chen Wei 28/04/1954 123/58 NA
## 427 Hsieh Shu-Hui 20/10/1998 118/64 0.89
## 431 Shen Yi-Ching 26/02/1970 123/72 1.31
## 442 Lu Hsiang-Ling 15/08/1996 91/97 NA
## 462 Yen Kuo-Jung 21/01/2003 101/46 0.92
## 470 Lu Hsiang-Ling 06/03/1950 132/75 1.49
## 472 Fang Shu-Chen 29/07/1942 128/85 NA
## 474 Liu Hsiao-Fen 16/05/1951 145/73 0.41
## 475 Tseng Wen-Liang 10/02/1997 128/80 0.95
## 481 Patricia Davis 07/11/1999 128/78 NA
## 483 Helen Hall 23/08/1960 0.95
## 486 Yen Kuo-Jung 09/11/1970 124/97 0.55
## 487 William Thomas 06/01/1999 108/88 1.76
## 490 Hsieh Shu-Hui 28/08/1970 146/100 0.93
## 500 John Smith 31/03/1942 135/96 NA
## 511 Ho Chuan-Wei 21/05/1982 130/107 0.86
## 512 Hsu Kuo-Chang 17/05/1969 128/92 1.27
## 517 Charles Clark 10/11/1944 119/67 NA
## 523 Kung Mei-Lin 116/84 0.58
## 532 Tung Li-Fang 26/09/2004 115/82 NA
## 535 Joseph Walker 25/08/1991 122-71 1.56
## 536 Wang Jie 14/05/1991 138/82 1.76
## 537 Tung Li-Fang 27/09/1965 132/76 1.59
## 540 Chang Chung-Wei 23/05/1985 110/81 NA
## 547 Chiu Yu-Chin 01/01/1969 134/76 0.87
## 553 Chang Chung-Wei 30/07/1969 123/66 0.84
## 555 Yen Kuo-Jung 27/07/1978 NA
## 558 James Brown 03/07/1997 121/63 1.42
## 560 Huang Li-Chen 23/06/1966 122/76 1.66
## 577 Jessica White 18/08/1976 106/62 0.87
## 580 Ng Boon Hua 21/02/1940 146/66 0.42
## 587 Hsu Kuo-Chang 03/04/1975 121/56 0.61
## 594 Wu Ming-Hui 17/07/2000 106/69 NA
## 602 Huang Li-Chen 15/07/1960 140/93 NA
## 606 Tan Ah Kow 08/01/1964 125/75 0.41
## 607 Lee Siew Eng 29/05/1970 130/79 2.30
## 611 Kao Chin-Feng 09/01/1962 95/88 0.53
## 621 Barbara Taylor 10/09/1989 155/99 NA
## 640 Richard Martin 26/06/2002 113/69 0.32
## 648 Tseng Wen-Liang 150/82 0.10
## 656 James Brown 12/12/1992 86/80 2.57
## 663 unknown 18/04/1993 145/88 1.71
## 664 Robert Wilson 1985 127/87 0.82
## 665 Wang Jie 19/06/2005 119/56 0.66
## 672 Joseph Garcia 88/83 NA
## 677 Huang Li-Chen 27/02/1960 89/78 NA
## 687 Tan Wei Ming 13/11/1968 96/53 0.53
## 691 Yang Hsiu-Mei 16/04/1996 117/75 2.31
## 692 James Brown 19/07/1973 103/92 NA
## Microcirculation_PU Suhu_Tubuh_Celcius Penyakit
## 2 41.9 36.5°C Non-Diabetic
## 4 NA 37.0 Diabetic
## 9 24.8 36.9 Non-Diabetic
## 10 40.9 36.6 Non-Diabetic
## 11 44.0 37.2celcius Non-Diabetic
## 23 15.7 36.8 Diabetic
## 28 NA 36.3 Non-Diabetic
## 32 25.6 36.9 Non-Diabetic
## 33 8.6 36.6 Diabetic
## 35 NA 36.6 Diabetic
## 39 NA 37.2 Diabetic
## 40 NA 36.5 Diabetic
## 46 52.8 36.3 Non-Diabetic
## 48 NA 36.6 Non-Diabetic
## 53 49.4 37.4 Non-Diabetic
## 58 NA 37.1 Non-Diabetic
## 60 NA 36.3 Non-Diabetic
## 63 NA 37.4 Non-Diabetic
## 74 36.5 37.2 Non-Diabetic
## 79 19.3 Diabetic
## 80 45.1 37.0 Non-Diabetic
## 87 37.4 36.4 Non-Diabetic
## 107 NA 37.0 Tidak
## 111 NA 36.8 Diabetic
## 124 19.5 36.4 Diabetic
## 125 28.5 Diabetic
## 130 NA 37.1 Non-Diabetic
## 135 33.0 37.1 Diabetic
## 138 8.2 36.6 Diabetic
## 146 22.9 37.1 Diabetic
## 164 26.5 36.4 Diabetic
## 172 4.7 36.7 Diabetic
## 183 28.7 36.8 Diabetic
## 204 NA 36.2 Non-Diabetic
## 209 NA Diabetic
## 224 NA 36.4 Non-Diabetic
## 225 NA 36.6 Non-Diabetic
## 227 21.8 36.9 Diabetic
## 228 NA 42.5 Diabetic
## 238 36.3 37.3 Diabetic
## 241 22.2 36.9 Diabetic
## 244 42.4 37.2 Non-Diabetic
## 247 NA 37.4 Non-Diabetic
## 251 41.4 37.2 Non-Diabetic
## 256 NA 37.3 Non-Diabetic
## 259 NA 36.8
## 262 NA Diabetic
## 273 40.1 36.9 Non-Diabetic
## 275 22.7 37.6 Diabetic
## 278 27.7 36.9°C Non-Diabetic
## 280 44.7 36.4 No
## 295 NA 37.0 Non-Diabetic
## 303 NA 37.0 Non-Diabetic
## 304 18.0 36.3 Diabetic
## 308 1.0 36.6 Diabetic
## 309 29.2 37.6 Non-Diabetic
## 320 24.0 37.7 Diabetic
## 324 43.0 36.2 Non-Diabetic
## 338 36.6 36.8 Diabetic
## 350 NA 36.6 Diabetic
## 353 NA 37.2 Non-Diabetic
## 357 NA 36.0 Non-Diabetic
## 360 NA 36.5 Non-Diabetic
## 363 42.6 36.9 Non-Diabetic
## 364 NA 36.3 Non-Diabetic
## 366 46.1 36.8 Diabetic
## 371 NA Diabetic
## 383 NA 36.4 Non-Diabetic
## 389 20.6 36.6 Diabetic
## 392 3.9 36.4 Diabetic
## 394 NA 37.4 Diabetic
## 400 3.0 36.9
## 409 34.3 36.6 Non-Diabetic
## 410 26.2 36.6 Non-Diabetic
## 413 5000.0 36.3 Non-Diabetic
## 417 NA 36.5
## 427 NA 37.2 Non-Diabetic
## 431 NA 37.2 Diabetic
## 442 15.3 36.6 Diabetic
## 462 47.0 -1.0 Non-Diabetic
## 470 NA 36.7 Non-Diabetic
## 472 52.8 36.3 Non-Diabetic
## 474 NA 36.4 Non-Diabetic
## 475 32.7 35.9
## 481 28.4 36.8 Non-Diabetic
## 483 NA 36.8 Non-Diabetic
## 486 26.2 36.8 Diabetic
## 487 NA 36.5 Diabetic
## 490 7.8 36.7 Diabetic
## 500 20.4 Diabetic
## 511 NA 37.1 Diabetic
## 512 NA 36.8 Diabetic
## 517 43.4 36.4 Non-Diabetic
## 523 NA Non-Diabetic
## 532 51.0 36.5 Non-Diabetic
## 535 43.8 36.5 diabetic
## 536 NA 36.6 Diabetic
## 537 39.4 37.5
## 540 38.2 36.9 Non-Diabetic
## 547 34.1 36.9 Non-Diabetic
## 553 NA Non-Diabetic
## 555 37.9 37.2 Diabetic
## 558 7.6 36.6 Diabetic
## 560 NA 37.2 Diabetic
## 577 NA 36.7 Non-Diabetic
## 580 45.6 Non-Diabetic
## 587 15.8 37.3 Non-Diabetic
## 594 29.7 36.8 Non-Diabetic
## 602 13.9 Diabetic
## 606 NA 36.6 Non-Diabetic
## 607 NA 36.6 Diabetic
## 611 63.6 36.6 Non-Diabetic
## 621 27.7 37.1 Diabetic
## 640 NA 38.0 Non-Diabetic
## 648 39.9 Non-Diabetic
## 656 25.8 36.8 Diabetic
## 663 8.1 36.5 1
## 664 25.0 37.0 Non-Diabetic
## 665 NA Non-Diabetic
## 672 40.4 37.6 Non-Diabetic
## 677 29.8 37.1 Non-Diabetic
## 687 25.8 36.6 Non-Diabetic
## 691 NA 36.3 Diabetic
## 692 26.3 36.7 Non-Diabetic
## Peak_Plantar_Pressure_kPa
## 2 NA
## 4 577.5
## 9 NA
## 10 308.9
## 11 NA
## 23 386.0
## 28 173.3
## 32 267.6
## 33 415.3
## 35 612.7
## 39 NA
## 40 667.4
## 46 NA
## 48 348.0
## 53 NA
## 58 351.0
## 60 253.2
## 63 354.1
## 74 456.8
## 79 NA
## 80 310.4
## 87 238.8
## 107 475.2
## 111 544.1
## 124 NA
## 125 710.6
## 130 302.3
## 135 495.6
## 138 530.2
## 146 NA
## 164 571.2
## 172 NA
## 183 NA
## 204 167.4
## 209 350.6
## 224 276.2
## 225 364.6
## 227 NA
## 228 474.5
## 238 NA
## 241 NA
## 244 198.3
## 247 328.4
## 251 NA
## 256 114.1
## 259 546.3
## 262 409.9
## 273 192.7
## 275 607.6
## 278 224.0
## 280 NA
## 295 205.9
## 303 348.3
## 304 536.0
## 308 NA
## 309 NA
## 320 NA
## 324 185.6
## 338 400.9
## 350 483.3
## 353 276.9
## 357 233.2
## 360 354.2
## 363 304.9
## 364 196.6
## 366 NA
## 371 575.0
## 383 286.9
## 389 446.3
## 392 NA
## 394 555.8
## 400 537.4
## 409 NA
## 410 NA
## 413 NA
## 417 328.9
## 427 NA
## 431 548.7
## 442 547.1
## 462 NA
## 470 151.1
## 472 264.5
## 474 217.2
## 475 NA
## 481 287.5
## 483 227.2
## 486 NA
## 487 NA
## 490 NA
## 500 537.8
## 511 509.8
## 512 429.0
## 517 264.8
## 523 390.5
## 532 339.6
## 535 NA
## 536 463.3
## 537 NA
## 540 416.2
## 547 NA
## 553 364.9
## 555 508.5
## 558 NA
## 560 615.9
## 577 396.3
## 580 NA
## 587 NA
## 594 222.7
## 602 482.8
## 606 348.5
## 607 487.9
## 611 NA
## 621 609.5
## 640 379.6
## 648 NA
## 656 NA
## 663 NA
## 664 NA
## 665 238.5
## 672 NA
## 677 355.0
## 687 NA
## 691 573.4
## 692 445.4
data_clean <- na.omit(data)
data$Skin_Stiffness_N_per_mm[is.na(data$Skin_Stiffness_N_per_mm)] <-
mean(data$Skin_Stiffness_N_per_mm, na.rm = TRUE)
sum(duplicated(data))
## [1] 2
data[duplicated(data), ]
## Nama Tanggal_Lahir Tensi Skin_Stiffness_N_per_mm
## 342 Betty Lewis 23/05/1988 121/96 1.66
## 556 Hsieh Shu-Hui 15/01/1967 96/73 1.43
## Microcirculation_PU Suhu_Tubuh_Celcius Penyakit Peak_Plantar_Pressure_kPa
## 342 14.6 36.6 Diabetic 475.8
## 556 36.9 37.0 Diabetic 393.2
data <- data[!duplicated(data), ]
sum(duplicated(data))
## [1] 0
data[duplicated(data), ]
## [1] Nama Tanggal_Lahir
## [3] Tensi Skin_Stiffness_N_per_mm
## [5] Microcirculation_PU Suhu_Tubuh_Celcius
## [7] Penyakit Peak_Plantar_Pressure_kPa
## <0 rows> (or 0-length row.names)
data <- data[!duplicated(data), ]
Q1 <- quantile(data$Skin_Stiffness_N_per_mm, 0.25, na.rm = TRUE)
Q3 <- quantile(data$Skin_Stiffness_N_per_mm, 0.75, na.rm = TRUE)
IQR_value <- IQR(data$Skin_Stiffness_N_per_mm, na.rm = TRUE)
lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value
data_no_outlier <- data %>%
filter(Skin_Stiffness_N_per_mm >= lower_bound &
Skin_Stiffness_N_per_mm <= upper_bound)
ggplot(data, aes(y = Skin_Stiffness_N_per_mm)) +
geom_boxplot()

unique(data$Penyakit)
## [1] "Non-Diabetic" "Diabetic" "" "Sehat" "Sakit"
## [6] "Tidak" "Yes" "No" "NON-DIABETIC" "Normal"
## [11] "DIABETIC" "DM" "diabetic" "1" "non-diabetic"
## [16] "Healthy"
data$Penyakit <- tolower(data$Penyakit)
data$Penyakit <- trimws(data$Penyakit)
data$Penyakit[data$Penyakit == "fluu"] <- "flu"
write.csv(data, "data_clean.csv", row.names = FALSE)