animals <- read_excel("animal_care.xlsx")
dc_new <- animals %>% select(`Number of Employees`, `Fostered Animals`)
summary(dc_new$'Number of Employees')
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.15 17.05 17.71 17.94 18.97 21.00
summary(dc_new$'Fostered Animals')
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 380.0 700.0 760.0 920.2 972.0 2140.0 1
cat("Missing `Number of Employees`:", sum(is.na(dc_new$`Number of Employees`)), "\n")
## Missing `Number of Employees`: 0
cat("Missing `Fostered Animals`:", sum(is.na(dc_new$`Fostered Animals`)), "\n")
## Missing `Fostered Animals`: 1
dc_clean <- dc_new %>%
drop_na()
cor(dc_clean$`Number of Employees`, dc_clean$`Fostered Animals`)
## [1] 0.8885505
cat("Remaining observations:", nrow(dc_clean), "\n")
## Remaining observations: 21
ggplot(dc_clean, aes(x = `Number of Employees`, y = `Fostered Animals`)) +
geom_point() +
labs(title = "Number of Employees vs Fostered Animals",
x = "Percent Fostered Animals (`Fostered Animals`)",
y = "Employee Count (`Number of Employees`)")
The amount of fostered animals does increase as number of emplyees increase, increasing the workload and processing.
animals_clean <- animals[!is.na(animals$`Fostered Animals`), ]
hist(animals$`Fostered Animals`, breaks = 10, probability = TRUE)
lines(density(animals$`Fostered Animals`, na.rm = TRUE), col = "red", lwd = 2)
hist(animals_clean$`Fostered Animals`, breaks = 10, probability = TRUE)
lines(density(animals_clean$`Fostered Animals`), col = "red", lwd = 2)