animals <- read_excel("animal_care.xlsx")
dc_new <- animals %>% select(`Number of Employees`, `Fostered Animals`)
summary(dc_new$'Number of Employees')
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   15.15   17.05   17.71   17.94   18.97   21.00
summary(dc_new$'Fostered Animals')
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   380.0   700.0   760.0   920.2   972.0  2140.0       1
cat("Missing `Number of Employees`:", sum(is.na(dc_new$`Number of Employees`)), "\n")
## Missing `Number of Employees`: 0
cat("Missing `Fostered Animals`:", sum(is.na(dc_new$`Fostered Animals`)), "\n")
## Missing `Fostered Animals`: 1
dc_clean <- dc_new %>%
drop_na()
cor(dc_clean$`Number of Employees`, dc_clean$`Fostered Animals`)
## [1] 0.8885505
cat("Remaining observations:", nrow(dc_clean), "\n")
## Remaining observations: 21
ggplot(dc_clean, aes(x = `Number of Employees`, y = `Fostered Animals`)) +
geom_point() +
labs(title = "Number of Employees vs Fostered Animals",
x = "Percent Fostered Animals (`Fostered Animals`)",
y = "Employee Count (`Number of Employees`)")

The amount of fostered animals does increase as number of emplyees increase, increasing the workload and processing.

animals_clean <- animals[!is.na(animals$`Fostered Animals`), ]
hist(animals$`Fostered Animals`, breaks = 10, probability = TRUE)
lines(density(animals$`Fostered Animals`, na.rm = TRUE), col = "red", lwd = 2)

hist(animals_clean$`Fostered Animals`, breaks = 10, probability = TRUE)
lines(density(animals_clean$`Fostered Animals`), col = "red", lwd = 2)