claims <- data.frame(
Patient_ID = 1:10,
Age = c(45, 34, 66, 50, 72, 55, 48, 39, 60, 52),
Readmitted_30Days = c(1, 0, 1, 0, 0, 1, 0, 1, 0, 0)
)
claims_clean <- claims |> distinct()
claims_clean
## Patient_ID Age Readmitted_30Days
## 1 1 45 1
## 2 2 34 0
## 3 3 66 1
## 4 4 50 0
## 5 5 72 0
## 6 6 55 1
## 7 7 48 0
## 8 8 39 1
## 9 9 60 0
## 10 10 52 0
## Summary Statistics
summary(claims_clean)
## Patient_ID Age Readmitted_30Days
## Min. : 1.00 Min. :34.00 Min. :0.0
## 1st Qu.: 3.25 1st Qu.:45.75 1st Qu.:0.0
## Median : 5.50 Median :51.00 Median :0.0
## Mean : 5.50 Mean :52.10 Mean :0.4
## 3rd Qu.: 7.75 3rd Qu.:58.75 3rd Qu.:1.0
## Max. :10.00 Max. :72.00 Max. :1.0
## Visualization Example
library(ggplot2)
ggplot(claims_clean, aes(x = Age, fill = factor(Readmitted_30Days))) +
geom_histogram(binwidth = 5, color = "white") +
labs(title = "Readmissions by Age Group",
x = "Age",
y = "Count",
fill = "Readmitted in 30 Days") +
theme_minimal()
