This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.]
library(readr)
## Warning: package 'readr' was built under R version 4.5.2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
IMPORT DATA
data <- read_csv("hospital_dataset.csv")
## Rows: 700 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Nama, Tanggal_Lahir, Tensi, Suhu_Tubuh_Celcius, Penyakit
## dbl (3): Skin_Stiffness_N_per_mm, Microcirculation_PU, Peak_Plantar_Pressure...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
MENGECEK DATA
str(data)
## spc_tbl_ [700 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Nama : chr [1:700] "Michael Anderson" "N/A" "Tan Wei Ming" "Shen Yi-Ching" ...
## $ Tanggal_Lahir : chr [1:700] "1/4/1957" "20/09/1975" "12/4/1965" "11/9/1980" ...
## $ Tensi : chr [1:700] "112/67" "140 / 91" "134/72" "120/79" ...
## $ Skin_Stiffness_N_per_mm : num [1:700] 0.69 1.5 0.76 1.92 0.81 0.61 1.04 2.24 0.18 NA ...
## $ Microcirculation_PU : num [1:700] 42 41.9 26.3 NA 25.5 42.2 2 9.5 24.8 40.9 ...
## $ Suhu_Tubuh_Celcius : chr [1:700] "37.6" "36.5°C" "37.5" "37" ...
## $ Penyakit : chr [1:700] "Non-Diabetic" "Non-Diabetic" "Non-Diabetic" "Diabetic" ...
## $ Peak_Plantar_Pressure_kPa: num [1:700] 294 NA 432 578 502 ...
## - attr(*, "spec")=
## .. cols(
## .. Nama = col_character(),
## .. Tanggal_Lahir = col_character(),
## .. Tensi = col_character(),
## .. Skin_Stiffness_N_per_mm = col_double(),
## .. Microcirculation_PU = col_double(),
## .. Suhu_Tubuh_Celcius = col_character(),
## .. Penyakit = col_character(),
## .. Peak_Plantar_Pressure_kPa = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
summary(data)
## Nama Tanggal_Lahir Tensi
## Length:700 Length:700 Length:700
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Skin_Stiffness_N_per_mm Microcirculation_PU Suhu_Tubuh_Celcius
## Min. : -2.180 Min. : -32.50 Length:700
## 1st Qu.: 0.700 1st Qu.: 18.00 Class :character
## Median : 1.100 Median : 27.70 Mode :character
## Mean : 1.342 Mean : 35.58
## 3rd Qu.: 1.595 3rd Qu.: 39.00
## Max. :150.000 Max. :5000.00
## NA's :37 NA's :50
## Penyakit Peak_Plantar_Pressure_kPa
## Length:700 Min. : -100.0
## Class :character 1st Qu.: 268.6
## Mode :character Median : 384.3
## Mean : 991.9
## 3rd Qu.: 508.5
## Max. :99999.0
## NA's :43
head(data)
## # A tibble: 6 × 8
## Nama Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Michael Anders… 1/4/1957 112/… 0.69 42
## 2 N/A 20/09/1975 140 … 1.5 41.9
## 3 Tan Wei Ming 12/4/1965 134/… 0.76 26.3
## 4 Shen Yi-Ching 11/9/1980 120/… 1.92 NA
## 5 Kung Mei-Lin 22/08/1985 99/77 0.81 25.5
## 6 Ho Chuan-Wei 10/8/1962 149/… 0.61 42.2
## # ℹ abbreviated name: ¹Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## # Peak_Plantar_Pressure_kPa <dbl>
MISSING VALUE
data$Peak_Plantar_Pressure_kPa[is.na(data$Peak_Plantar_Pressure_kPa)] <-
mean(data$Peak_Plantar_Pressure_kPa, na.rm = TRUE)
colSums(is.na(data))
## Nama Tanggal_Lahir Tensi
## 40 42 47
## Skin_Stiffness_N_per_mm Microcirculation_PU Suhu_Tubuh_Celcius
## 37 50 49
## Penyakit Peak_Plantar_Pressure_kPa
## 45 0
data$Microcirculation_PU[is.na(data$Microcirculation_PU)] <-
mean(data$Microcirculation_PU, na.rm = TRUE)
data$Peak_Plantar_Pressure_kPa[is.na(data$Peak_Plantar_Pressure_kPa)] <-
mean(data$Peak_Plantar_Pressure_kPa, na.rm = TRUE)
kategorik->modus
modus <- function(x){
ux <- na.omit(unique(x))
ux[which.max(tabulate(match(x, ux)))]
}
data$Penyakit[is.na(data$Penyakit)] <- modus(data$Penyakit)
DATA TIDAK KONSISTEN TENSI
data$Tensi <- gsub(" ", "", data$Tensi)
SUHU
data$Suhu_Tubuh_Celcius <- gsub("°C", "", data$Suhu_Tubuh_Celcius)
data$Suhu_Tubuh_Celcius <- as.numeric(data$Suhu_Tubuh_Celcius)
## Warning: NAs introduced by coercion
DATA DUPLIKAT
sum(duplicated(data))
## [1] 2
data <- data[!duplicated(data), ]
DETEKSI OUTLIER
Q1 <- quantile(data$Peak_Plantar_Pressure_kPa, 0.25)
Q3 <- quantile(data$Peak_Plantar_Pressure_kPa, 0.75)
IQR <- Q3 - Q1
Q1 <- quantile(data$Peak_Plantar_Pressure_kPa, 0.25, na.rm = TRUE)
Q3 <- quantile(data$Peak_Plantar_Pressure_kPa, 0.75, na.rm = TRUE)
IQR <- Q3 - Q1
lower <- Q1 - 1.5 * IQR
upper <- Q3 + 1.5 * IQR
outlier <- data$Peak_Plantar_Pressure_kPa < lower |
data$Peak_Plantar_Pressure_kPa > upper
sum(outlier)
## [1] 51
MENGHAPUS OUTLIER
data_clean <- data[!outlier, ]
summary(data_clean)
## Nama Tanggal_Lahir Tensi
## Length:647 Length:647 Length:647
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Skin_Stiffness_N_per_mm Microcirculation_PU Suhu_Tubuh_Celcius
## Min. : -1.500 Min. :-32.50 Min. :35.5
## 1st Qu.: 0.700 1st Qu.: 19.20 1st Qu.:36.5
## Median : 1.100 Median : 29.00 Median :36.8
## Mean : 1.364 Mean : 28.43 Mean :36.9
## 3rd Qu.: 1.590 3rd Qu.: 37.40 3rd Qu.:37.1
## Max. :150.000 Max. : 77.30 Max. :99.9
## NA's :36 NA's :52
## Penyakit Peak_Plantar_Pressure_kPa
## Length:647 Min. : 0.001
## Class :character 1st Qu.:270.450
## Mode :character Median :379.600
## Mean :388.257
## 3rd Qu.:507.350
## Max. :715.400
##
VISUALISASI
ggplot(data, aes(y = Peak_Plantar_Pressure_kPa)) +
geom_boxplot(fill = "red") +
ggtitle("Deteksi Outlier")