R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.]

library(readr)
## Warning: package 'readr' was built under R version 4.5.2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2

IMPORT DATA

data <- read_csv("hospital_dataset.csv")
## Rows: 700 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Nama, Tanggal_Lahir, Tensi, Suhu_Tubuh_Celcius, Penyakit
## dbl (3): Skin_Stiffness_N_per_mm, Microcirculation_PU, Peak_Plantar_Pressure...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

MENGECEK DATA

str(data)
## spc_tbl_ [700 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Nama                     : chr [1:700] "Michael Anderson" "N/A" "Tan Wei Ming" "Shen Yi-Ching" ...
##  $ Tanggal_Lahir            : chr [1:700] "1/4/1957" "20/09/1975" "12/4/1965" "11/9/1980" ...
##  $ Tensi                    : chr [1:700] "112/67" "140 / 91" "134/72" "120/79" ...
##  $ Skin_Stiffness_N_per_mm  : num [1:700] 0.69 1.5 0.76 1.92 0.81 0.61 1.04 2.24 0.18 NA ...
##  $ Microcirculation_PU      : num [1:700] 42 41.9 26.3 NA 25.5 42.2 2 9.5 24.8 40.9 ...
##  $ Suhu_Tubuh_Celcius       : chr [1:700] "37.6" "36.5°C" "37.5" "37" ...
##  $ Penyakit                 : chr [1:700] "Non-Diabetic" "Non-Diabetic" "Non-Diabetic" "Diabetic" ...
##  $ Peak_Plantar_Pressure_kPa: num [1:700] 294 NA 432 578 502 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Nama = col_character(),
##   ..   Tanggal_Lahir = col_character(),
##   ..   Tensi = col_character(),
##   ..   Skin_Stiffness_N_per_mm = col_double(),
##   ..   Microcirculation_PU = col_double(),
##   ..   Suhu_Tubuh_Celcius = col_character(),
##   ..   Penyakit = col_character(),
##   ..   Peak_Plantar_Pressure_kPa = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
summary(data)
##      Nama           Tanggal_Lahir         Tensi          
##  Length:700         Length:700         Length:700        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##  Skin_Stiffness_N_per_mm Microcirculation_PU Suhu_Tubuh_Celcius
##  Min.   : -2.180         Min.   : -32.50     Length:700        
##  1st Qu.:  0.700         1st Qu.:  18.00     Class :character  
##  Median :  1.100         Median :  27.70     Mode  :character  
##  Mean   :  1.342         Mean   :  35.58                       
##  3rd Qu.:  1.595         3rd Qu.:  39.00                       
##  Max.   :150.000         Max.   :5000.00                       
##  NA's   :37              NA's   :50                            
##    Penyakit         Peak_Plantar_Pressure_kPa
##  Length:700         Min.   : -100.0          
##  Class :character   1st Qu.:  268.6          
##  Mode  :character   Median :  384.3          
##                     Mean   :  991.9          
##                     3rd Qu.:  508.5          
##                     Max.   :99999.0          
##                     NA's   :43
head(data)
## # A tibble: 6 × 8
##   Nama            Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
##   <chr>           <chr>         <chr>                  <dbl>               <dbl>
## 1 Michael Anders… 1/4/1957      112/…                   0.69                42  
## 2 N/A             20/09/1975    140 …                   1.5                 41.9
## 3 Tan Wei Ming    12/4/1965     134/…                   0.76                26.3
## 4 Shen Yi-Ching   11/9/1980     120/…                   1.92                NA  
## 5 Kung Mei-Lin    22/08/1985    99/77                   0.81                25.5
## 6 Ho Chuan-Wei    10/8/1962     149/…                   0.61                42.2
## # ℹ abbreviated name: ¹​Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## #   Peak_Plantar_Pressure_kPa <dbl>

MISSING VALUE

data$Peak_Plantar_Pressure_kPa[is.na(data$Peak_Plantar_Pressure_kPa)] <- 
  mean(data$Peak_Plantar_Pressure_kPa, na.rm = TRUE)
colSums(is.na(data))
##                      Nama             Tanggal_Lahir                     Tensi 
##                        40                        42                        47 
##   Skin_Stiffness_N_per_mm       Microcirculation_PU        Suhu_Tubuh_Celcius 
##                        37                        50                        49 
##                  Penyakit Peak_Plantar_Pressure_kPa 
##                        45                         0
data$Microcirculation_PU[is.na(data$Microcirculation_PU)] <- 
  mean(data$Microcirculation_PU, na.rm = TRUE)
data$Peak_Plantar_Pressure_kPa[is.na(data$Peak_Plantar_Pressure_kPa)] <- 
  mean(data$Peak_Plantar_Pressure_kPa, na.rm = TRUE)

kategorik->modus

modus <- function(x){
  ux <- na.omit(unique(x))
  ux[which.max(tabulate(match(x, ux)))]
}
data$Penyakit[is.na(data$Penyakit)] <- modus(data$Penyakit)

DATA TIDAK KONSISTEN TENSI

data$Tensi <- gsub(" ", "", data$Tensi)

SUHU

data$Suhu_Tubuh_Celcius <- gsub("°C", "", data$Suhu_Tubuh_Celcius)
data$Suhu_Tubuh_Celcius <- as.numeric(data$Suhu_Tubuh_Celcius)
## Warning: NAs introduced by coercion

DATA DUPLIKAT

sum(duplicated(data))
## [1] 2
data <- data[!duplicated(data), ]

DETEKSI OUTLIER

Q1 <- quantile(data$Peak_Plantar_Pressure_kPa, 0.25)
Q3 <- quantile(data$Peak_Plantar_Pressure_kPa, 0.75)
IQR <- Q3 - Q1
Q1 <- quantile(data$Peak_Plantar_Pressure_kPa, 0.25, na.rm = TRUE)
Q3 <- quantile(data$Peak_Plantar_Pressure_kPa, 0.75, na.rm = TRUE)
IQR <- Q3 - Q1
lower <- Q1 - 1.5 * IQR
upper <- Q3 + 1.5 * IQR
outlier <- data$Peak_Plantar_Pressure_kPa < lower | 
           data$Peak_Plantar_Pressure_kPa > upper
sum(outlier)
## [1] 51

MENGHAPUS OUTLIER

data_clean <- data[!outlier, ]

summary(data_clean)
##      Nama           Tanggal_Lahir         Tensi          
##  Length:647         Length:647         Length:647        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##  Skin_Stiffness_N_per_mm Microcirculation_PU Suhu_Tubuh_Celcius
##  Min.   : -1.500         Min.   :-32.50      Min.   :35.5      
##  1st Qu.:  0.700         1st Qu.: 19.20      1st Qu.:36.5      
##  Median :  1.100         Median : 29.00      Median :36.8      
##  Mean   :  1.364         Mean   : 28.43      Mean   :36.9      
##  3rd Qu.:  1.590         3rd Qu.: 37.40      3rd Qu.:37.1      
##  Max.   :150.000         Max.   : 77.30      Max.   :99.9      
##  NA's   :36                                  NA's   :52        
##    Penyakit         Peak_Plantar_Pressure_kPa
##  Length:647         Min.   :  0.001          
##  Class :character   1st Qu.:270.450          
##  Mode  :character   Median :379.600          
##                     Mean   :388.257          
##                     3rd Qu.:507.350          
##                     Max.   :715.400          
## 

VISUALISASI

ggplot(data, aes(y = Peak_Plantar_Pressure_kPa)) +
  geom_boxplot(fill = "red") +
  ggtitle("Deteksi Outlier")