library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <- read_csv("hospital.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 700 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Nama, Tanggal_Lahir, Tensi, Suhu_Tubuh_Celcius, Penyakit
## dbl (3): Skin_Stiffness_N_per_mm, Microcirculation_PU, Peak_Plantar_Pressure...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 8
## Nama Tanggal_Lahir Tensi Skin_Stiffness_N_per…¹ Microcirculation_PU
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Michael Anders… 01/04/1957 112/… 0.69 42
## 2 N/A 20/09/1975 140 … 1.5 41.9
## 3 Tan Wei Ming 12/04/1965 134/… 0.76 26.3
## 4 Shen Yi-Ching 11/09/1980 120/… 1.92 NA
## 5 Kung Mei-Lin 22/08/1985 99/77 0.81 25.5
## 6 Ho Chuan-Wei 10/08/1962 149/… 0.61 42.2
## # ℹ abbreviated name: ¹Skin_Stiffness_N_per_mm
## # ℹ 3 more variables: Suhu_Tubuh_Celcius <chr>, Penyakit <chr>,
## # Peak_Plantar_Pressure_kPa <dbl>
str(data)
## spc_tbl_ [700 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Nama : chr [1:700] "Michael Anderson" "N/A" "Tan Wei Ming" "Shen Yi-Ching" ...
## $ Tanggal_Lahir : chr [1:700] "01/04/1957" "20/09/1975" "12/04/1965" "11/09/1980" ...
## $ Tensi : chr [1:700] "112/67" "140 / 91" "134/72" "120/79" ...
## $ Skin_Stiffness_N_per_mm : num [1:700] 0.69 1.5 0.76 1.92 0.81 0.61 1.04 2.24 0.18 NA ...
## $ Microcirculation_PU : num [1:700] 42 41.9 26.3 NA 25.5 42.2 2 9.5 24.8 40.9 ...
## $ Suhu_Tubuh_Celcius : chr [1:700] "37.6" "36.5°C" "37.5" "37.0" ...
## $ Penyakit : chr [1:700] "Non-Diabetic" "Non-Diabetic" "Non-Diabetic" "Diabetic" ...
## $ Peak_Plantar_Pressure_kPa: num [1:700] 294 NA 432 578 502 ...
## - attr(*, "spec")=
## .. cols(
## .. Nama = col_character(),
## .. Tanggal_Lahir = col_character(),
## .. Tensi = col_character(),
## .. Skin_Stiffness_N_per_mm = col_double(),
## .. Microcirculation_PU = col_double(),
## .. Suhu_Tubuh_Celcius = col_character(),
## .. Penyakit = col_character(),
## .. Peak_Plantar_Pressure_kPa = col_double()
## .. )
## - attr(*, "problems")=<pointer: 0x00000208ee185ee0>
colSums(is.na(data))
## Nama Tanggal_Lahir Tensi
## 40 44 49
## Skin_Stiffness_N_per_mm Microcirculation_PU Suhu_Tubuh_Celcius
## 39 52 51
## Penyakit Peak_Plantar_Pressure_kPa
## 46 45
data <- data %>%
filter(!is.na(Nama))
data$Tensi <- gsub(" ", "", data$Tensi)
data$Suhu_Tubuh_Celcius <- gsub("°C", "",
data$Suhu_Tubuh_Celcius)
data$Suhu_Tubuh_Celcius <- as.numeric(
data$Suhu_Tubuh_Celcius)
## Warning: NAs introduced by coercion
data$Penyakit <- tolower(data$Penyakit)
data$Penyakit <- trimws(data$Penyakit)
data$Tanggal_Lahir <- as.Date(
data$Tanggal_Lahir,
format="%d/%m/%Y"
)
data <- distinct(data)
data <- data %>%
filter(Suhu_Tubuh_Celcius >= 35 &
Suhu_Tubuh_Celcius <= 42)
summary(data)
## Nama Tanggal_Lahir Tensi Skin_Stiffness_N_per_mm
## Length :600 Min. :1940-03-28 Length :600 Min. : -2.180
## N.unique : 58 1st Qu.:1957-02-05 N.unique :483 1st Qu.: 0.700
## N.blank : 0 Median :1973-12-31 N.blank : 0 Median : 1.110
## Min.nchar: 1 Mean :1973-04-20 Min.nchar: 3 Mean : 1.387
## Max.nchar: 16 3rd Qu.:1989-07-11 Max.nchar: 13 3rd Qu.: 1.610
## Max. :2005-11-27 NAs : 39 Max. :150.000
## NAs :51 NAs :34
## Microcirculation_PU Suhu_Tubuh_Celcius Penyakit
## Min. : -32.5 Min. :35.50 Length :600
## 1st Qu.: 18.0 1st Qu.:36.50 N.unique : 11
## Median : 27.8 Median :36.80 N.blank : 0
## Mean : 36.9 Mean :36.78 Min.nchar: 1
## 3rd Qu.: 39.1 3rd Qu.:37.10 Max.nchar: 12
## Max. :5000.0 Max. :38.00 NAs : 35
## NAs :40
## Peak_Plantar_Pressure_kPa
## Min. : -100.0
## 1st Qu.: 268.8
## Median : 379.4
## Mean : 917.2
## 3rd Qu.: 508.5
## Max. :99999.0
## NAs :38