Focusing on older adults (65+), highlighting preventive care dispairites in Texas counties. Key variables: “Vacc” (% Medicare enrollees with annual flu shot), “Hosp” (hospital stays per 100k Medicare enrollees), and Mammo”(% female Medicare enrollees 65-74 with annual mammogram screening).
older_data <- raw_data[, c(3, 74, 99, 106)]
colnames(older_data) <- c("County", "Vacc", "Hosp", "Mammo")
older_data[,2:4] <- lapply(older_data[,2:4], as.numeric)
older_data <- na.omit(older_data)
# Remove NAs + bad values properly
older_data$Vacc <- as.numeric(as.character(older_data$Vacc))
older_data$Hosp <- as.numeric(as.character(older_data$Hosp))
older_data$Mammo <- as.numeric(as.character(older_data$Mammo))
# Drop rows with any NA
older_data <- older_data[complete.cases(older_data[,2:4]), ]
nrow(older_data) # ~200 good rows now
## [1] 239
head(older_data)
## # A tibble: 6 × 4
## County Vacc Hosp Mammo
## <chr> <dbl> <dbl> <dbl>
## 1 Anderson 25 3528 38
## 2 Andrews 28 3684 37
## 3 Angelina 26.7 3332 40
## 4 Aransas 26 1640 45
## 5 Archer 19.7 2705 40
## 6 Armstrong 18.5 1964 36
SUMMARY
summary(older_data$Vacc)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13.60 22.80 25.20 25.87 28.25 44.20
summary(older_data$Hosp)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 906 2432 3023 3036 3674 6214
summary(older_data$Mammo)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 14.00 30.00 36.00 34.67 40.00 55.00
HISTOGRAMS
hist(older_data$Vacc)
hist(older_data$Hosp)
hist(older_data$Mammo)
PLOTS
plot(older_data$Vacc, older_data$Hosp)
plot(older_data$Mammo, older_data$Hosp)
plot(older_data$Vacc, older_data$Mammo)
CORRELATIONS
cor(older_data$Vacc, older_data$Hosp)
## [1] 0.1196028
cor(older_data$Mammo, older_data$Hosp)
## [1] -0.1335233
cor(older_data$Vacc, older_data$Mammo)
## [1] -0.5878529