Load libraries
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("httr")
## Installing package into 'C:/Users/PATELM70/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'httr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\PATELM70\AppData\Local\Temp\RtmpC4Yojn\downloaded_packages
install.packages("jsonlite")
## Installing package into 'C:/Users/PATELM70/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'jsonlite' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'jsonlite'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\PATELM70\AppData\Local\R\win-library\4.4\00LOCK\jsonlite\libs\x64\jsonlite.dll
## to
## C:\Users\PATELM70\AppData\Local\R\win-library\4.4\jsonlite\libs\x64\jsonlite.dll:
## Permission denied
## Warning: restored 'jsonlite'
##
## The downloaded binary packages are in
## C:\Users\PATELM70\AppData\Local\Temp\RtmpC4Yojn\downloaded_packages
install.packages("tidyverse")
## Installing package into 'C:/Users/PATELM70/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'tidyverse' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\PATELM70\AppData\Local\Temp\RtmpC4Yojn\downloaded_packages
install.packages("ggplot2")
## Installing package into 'C:/Users/PATELM70/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'ggplot2' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\PATELM70\AppData\Local\Temp\RtmpC4Yojn\downloaded_packages
library(httr)
## Warning: package 'httr' was built under R version 4.4.3
library(jsonlite)
## Warning: package 'jsonlite' was built under R version 4.4.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
Data set 1
url <- "https://data.cdc.gov/api/views/fhky-rtsk/rows.json?accessType=DOWNLOAD"
response <- GET(url)
json_data <- content(response, "text")
vaccine_data_list <- fromJSON(json_data, flatten = TRUE)
# Inspect the structure of the resulting list
str(vaccine_data_list)
## List of 2
## $ meta:List of 1
## ..$ view:List of 40
## .. ..$ id : chr "fhky-rtsk"
## .. ..$ name : chr "Vaccination Coverage among Young Children (0-35 Months)"
## .. ..$ assetType : chr "dataset"
## .. ..$ attribution : chr "National Center for Immunization and Respiratory Diseases (NCIRD)"
## .. ..$ averageRating : int 0
## .. ..$ category : chr "Child Vaccinations"
## .. ..$ createdAt : int 1620841972
## .. ..$ description : chr "Vaccination Coverage among Young Children (0-35 Months)\n\n• National, regional, state, and selected local area"| __truncated__
## .. ..$ diciBackend : logi FALSE
## .. ..$ displayType : chr "table"
## .. ..$ downloadCount : int 4862
## .. ..$ hideFromCatalog : logi FALSE
## .. ..$ hideFromDataJson : logi FALSE
## .. ..$ locked : logi FALSE
## .. ..$ newBackend : logi TRUE
## .. ..$ numberOfComments : int 0
## .. ..$ oid : int 37173135
## .. ..$ provenance : chr "official"
## .. ..$ publicationAppendEnabled: logi FALSE
## .. ..$ publicationDate : int 1620874584
## .. ..$ publicationGroup : int 18303992
## .. ..$ publicationStage : chr "published"
## .. ..$ rowsUpdatedAt : int 1727374116
## .. ..$ rowsUpdatedBy : chr "b3ca-i9my"
## .. ..$ tableId : int 18303992
## .. ..$ totalTimesRated : int 0
## .. ..$ viewCount : int 21704
## .. ..$ viewLastModified : int 1644612339
## .. ..$ viewType : chr "tabular"
## .. ..$ approvals :'data.frame': 1 obs. of 16 variables:
## .. .. ..$ reviewedAt : int 1644522373
## .. .. ..$ reviewedAutomatically : logi TRUE
## .. .. ..$ state : chr "approved"
## .. .. ..$ submissionId : int 4427834
## .. .. ..$ submissionObject : chr "public_audience_request"
## .. .. ..$ submissionOutcome : chr "change_audience"
## .. .. ..$ submittedAt : int 1644522373
## .. .. ..$ targetAudience : chr "public"
## .. .. ..$ workflowId : int 2100
## .. .. ..$ submissionDetails.permissionType : chr "READ"
## .. .. ..$ submissionOutcomeApplication.endedAt : int 1644522373
## .. .. ..$ submissionOutcomeApplication.failureCount: int 0
## .. .. ..$ submissionOutcomeApplication.startedAt : int 1644522373
## .. .. ..$ submissionOutcomeApplication.status : chr "success"
## .. .. ..$ submitter.id : chr "x3nx-2a2w"
## .. .. ..$ submitter.displayName : chr "Julia A. Falvey"
## .. ..$ clientContext :List of 2
## .. .. ..$ clientContextVariables: list()
## .. .. ..$ inheritedVariables : Named list()
## .. ..$ columns :'data.frame': 18 obs. of 16 variables:
## .. .. ..$ id : int [1:18] -1 -1 -1 -1 -1 -1 -1 -1 540437842 540437841 ...
## .. .. ..$ name : chr [1:18] "sid" "id" "position" "created_at" ...
## .. .. ..$ dataTypeName : chr [1:18] "meta_data" "meta_data" "meta_data" "meta_data" ...
## .. .. ..$ fieldName : chr [1:18] ":sid" ":id" ":position" ":created_at" ...
## .. .. ..$ position : int [1:18] 0 0 0 0 0 0 0 0 2 3 ...
## .. .. ..$ renderTypeName : chr [1:18] "meta_data" "meta_data" "meta_data" "meta_data" ...
## .. .. ..$ flags :List of 18
## .. .. .. ..$ : chr "hidden"
## .. .. .. ..$ : chr "hidden"
## .. .. .. ..$ : chr "hidden"
## .. .. .. ..$ : chr "hidden"
## .. .. .. ..$ : chr "hidden"
## .. .. .. ..$ : chr "hidden"
## .. .. .. ..$ : chr "hidden"
## .. .. .. ..$ : chr "hidden"
## .. .. .. ..$ : NULL
## .. .. .. ..$ : NULL
## .. .. .. ..$ : NULL
## .. .. .. ..$ : NULL
## .. .. .. ..$ : NULL
## .. .. .. ..$ : NULL
## .. .. .. ..$ : NULL
## .. .. .. ..$ : NULL
## .. .. .. ..$ : NULL
## .. .. .. ..$ : NULL
## .. .. ..$ description : chr [1:18] NA NA NA NA ...
## .. .. ..$ tableColumnId : int [1:18] NA NA NA NA NA NA NA NA 137964304 137964303 ...
## .. .. ..$ cachedContents.non_null : chr [1:18] NA NA NA NA ...
## .. .. ..$ cachedContents.largest : chr [1:18] NA NA NA NA ...
## .. .. ..$ cachedContents.null : chr [1:18] NA NA NA NA ...
## .. .. ..$ cachedContents.top :List of 18
## .. .. .. ..$ : NULL
## .. .. .. ..$ : NULL
## .. .. .. ..$ : NULL
## .. .. .. ..$ : NULL
## .. .. .. ..$ : NULL
## .. .. .. ..$ : NULL
## .. .. .. ..$ : NULL
## .. .. .. ..$ : NULL
## .. .. .. ..$ :'data.frame': 11 obs. of 2 variables:
## .. .. .. .. ..$ item : chr [1:11] "Hib" "Hep B" "PCV" "DTaP" ...
## .. .. .. .. ..$ count: chr [1:11] "25377" "20094" "17274" "17273" ...
## .. .. .. ..$ :'data.frame': 10 obs. of 2 variables:
## .. .. .. .. ..$ item : chr [1:10] "≥3 Doses" "" "≥2 Doses" "≥1 Dose" ...
## .. .. .. .. ..$ count: chr [1:10] "38729" "25061" "21186" "12826" ...
## .. .. .. ..$ :'data.frame': 2 obs. of 2 variables:
## .. .. .. .. ..$ item : chr [1:2] "States/Local Areas" "HHS Regions/National"
## .. .. .. .. ..$ count: chr [1:2] "112272" "15916"
## .. .. .. ..$ :'data.frame': 20 obs. of 2 variables:
## .. .. .. .. ..$ item : chr [1:20] "Georgia" "Texas" "Pennsylvania" "United States" ...
## .. .. .. .. ..$ count: chr [1:20] "1856" "1856" "1856" "1856" ...
## .. .. .. ..$ :'data.frame': 20 obs. of 2 variables:
## .. .. .. .. ..$ item : chr [1:20] "2016-2019" "2014-2017" "2016-2017" "2016" ...
## .. .. .. .. ..$ count: chr [1:20] "12450" "12435" "5624" "5623" ...
## .. .. .. ..$ :'data.frame': 6 obs. of 2 variables:
## .. .. .. .. ..$ item : chr [1:6] "Age" "Race and Ethnicity" "Poverty" "Insurance Coverage" ...
## .. .. .. .. ..$ count: chr [1:6] "103303" "6946" "5655" "5339" ...
## .. .. .. ..$ :'data.frame': 20 obs. of 2 variables:
## .. .. .. .. ..$ item : chr [1:20] "24 Months" "35 Months" "19 Months" "13 Months" ...
## .. .. .. .. ..$ count: chr [1:20] "20917" "19527" "18153" "13958" ...
## .. .. .. ..$ :'data.frame': 20 obs. of 2 variables:
## .. .. .. .. ..$ item : chr [1:20] "92.6" "91.7" "92.0" "92.8" ...
## .. .. .. .. ..$ count: chr [1:20] "800" "790" "787" "775" ...
## .. .. .. ..$ :'data.frame': 20 obs. of 2 variables:
## .. .. .. .. ..$ item : chr [1:20] "91.4 to 96.9" "91.0 to 96.7" "89.0 to 95.5" "90.1 to 97.4" ...
## .. .. .. .. ..$ count: chr [1:20] "32" "31" "25" "24" ...
## .. .. .. ..$ :'data.frame': 20 obs. of 2 variables:
## .. .. .. .. ..$ item : chr [1:20] "293" "271" "243" "248" ...
## .. .. .. .. ..$ count: chr [1:20] "859" "681" "680" "652" ...
## .. .. ..$ cachedContents.smallest : chr [1:18] NA NA NA NA ...
## .. .. ..$ cachedContents.count : chr [1:18] NA NA NA NA ...
## .. .. ..$ cachedContents.cardinality: chr [1:18] NA NA NA NA ...
## .. ..$ grants :'data.frame': 1 obs. of 3 variables:
## .. .. ..$ inherited: logi FALSE
## .. .. ..$ type : chr "viewer"
## .. .. ..$ flags :List of 1
## .. .. .. ..$ : chr "public"
## .. ..$ metadata :List of 2
## .. .. ..$ custom_fields :List of 1
## .. .. .. ..$ Common Core:List of 5
## .. .. .. .. ..$ Contact Email: chr "VaxView@cdc.gov"
## .. .. .. .. ..$ Homepage : chr "https://www.cdc.gov/vaccines/imz-managers/coverage/childvaxview/index.html"
## .. .. .. .. ..$ Contact Name : chr " "
## .. .. .. .. ..$ Program Code : chr "009:020"
## .. .. .. .. ..$ Bureau Code : chr "009:20"
## .. .. ..$ availableDisplayTypes: chr [1:3] "table" "fatrow" "page"
## .. ..$ owner :List of 5
## .. .. ..$ id : chr "knbc-mfp8"
## .. .. ..$ displayName: chr "NCIRD"
## .. .. ..$ screenName : chr "NCIRD"
## .. .. ..$ type : chr "interactive"
## .. .. ..$ flags : chr [1:2] "acceptedEula" "mayBeStoriesCoOwner"
## .. ..$ query : Named list()
## .. ..$ rights : chr "read"
## .. ..$ tableAuthor :List of 5
## .. .. ..$ id : chr "knbc-mfp8"
## .. .. ..$ displayName: chr "NCIRD"
## .. .. ..$ screenName : chr "NCIRD"
## .. .. ..$ type : chr "interactive"
## .. .. ..$ flags : chr [1:2] "acceptedEula" "mayBeStoriesCoOwner"
## .. ..$ tags : chr [1:18] "vaxviews" "vaccination" "immunization" "vaccination coverage" ...
## .. ..$ flags : chr [1:4] "default" "ownerMayBeContacted" "restorable" "restorePossibleForType"
## $ data: chr [1:128188, 1:18] "row-s9mi-m2cf_n7rp" "row-3eg9_5em9.j2vi" "row-cm7i_br7b_5rn8" "row-iikw.rebt_d3tm" ...
# Check if the key containing the actual data is part of the list and convert it to a data frame
if ("data" %in% names(vaccine_data_list)) {
vaccine_data <- as.data.frame(vaccine_data_list$data, stringsAsFactors = FALSE)
# Get the column names
column_names <- colnames(vaccine_data)
# Print the column names
print(column_names)
} else {
print("The key containing the data was not found in the JSON structure")
}
## [1] "V1" "V2" "V3" "V4" "V5" "V6" "V7" "V8" "V9" "V10" "V11" "V12"
## [13] "V13" "V14" "V15" "V16" "V17" "V18"
Clean Data set 1
#new dataframe of only the necessary columns
vaccine_data_filtered <- vaccine_data[, c("V9", "V12", "V13", "V14", "V15", "V16")]
new_column_names <- c("Vaccine Type", "Geography", "Birth.Year.Birth.Cohort", "Dimension.Type", "Dimension", "Estimate")
colnames(vaccine_data_filtered) <- new_column_names
#removing the second year in the column as it is not needed
vaccine_data_filtered$`Birth.Year.Birth.Cohort` <- sapply(vaccine_data_filtered$`Birth.Year.Birth.Cohort`, function(x) {
sub("^(\\d{4})-\\d{4}$", "\\1", x)
})
#renaming for analysis
vaccine_data_filtered$'Vaccine Type'[vaccine_data_filtered$'Vaccine Type' == "≥1 Dose Varicella"] <- "Varicella"
vaccine_data_filtered$'Vaccine Type'[vaccine_data_filtered$'Vaccine Type' == "≥1 Dose MMR"] <- "Measles, Mumps, Rubella"
vaccine_data_filtered$'Vaccine Type'[vaccine_data_filtered$'Vaccine Type' == "Combined 7 Series"] <- "Combined Series"
#Combine all the counties into their states
vaccine_data_filtered$Geography <- ifelse(grepl("^TX", vaccine_data_filtered$Geography), "Texas", vaccine_data_filtered$Geography)
vaccine_data_filtered$Geography <- ifelse(grepl("^IL", vaccine_data_filtered$Geography), "Illinois", vaccine_data_filtered$Geography)
vaccine_data_filtered$Geography <- ifelse(grepl("^NY", vaccine_data_filtered$Geography), "New York", vaccine_data_filtered$Geography)
vaccine_data_filtered$Geography <- ifelse(grepl("^PA", vaccine_data_filtered$Geography), "Pennsylvania", vaccine_data_filtered$Geography)
clean_vaccine_data <- vaccine_data_filtered %>%
filter(Dimension.Type == "Age") %>%
mutate(
Age_Value = case_when(
grepl("Months", Dimension) ~ paste0(gsub("\\s*Months.*", "", Dimension), "m"),
grepl("Days", Dimension) ~ paste0(gsub("\\D", "", Dimension), "d"),
TRUE ~ Dimension
),
Estimate = as.numeric(as.character(Estimate))
) %>%
filter(!is.na(Estimate)) %>%
group_by(`Vaccine Type`, Geography, Birth.Year.Birth.Cohort, Age_Value) %>%
summarize(Estimate = mean(Estimate, na.rm = TRUE), .groups = "drop")
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Estimate = as.numeric(as.character(Estimate))`.
## Caused by warning:
## ! NAs introduced by coercion
#Pivot wider to show better coverage of vaccines per age in states
clean_vaccine_data_wide <- clean_vaccine_data %>%
pivot_wider(
id_cols = c(`Vaccine Type`, Geography, Birth.Year.Birth.Cohort),
names_from = Age_Value,
values_from = Estimate
)
Data set 2
vaccine_data_2 <- read.csv('https://raw.githubusercontent.com/mirajpatel289/Data607/refs/heads/main/Vaccination%20Coverage%2019-35month%20.csv')
Clean Data set 2
vaccine_data_2_filtered <- vaccine_data_2[, c("Year", "Vaccination", "Category", "Group", "Estimate" )]
clean_vaccine_data_2 <- vaccine_data_2_filtered %>%
mutate(`Vaccination` = case_when(
`Vaccination` %in% c(
"Combined series (4:3:1:4:3:1:4)",
"Combined series (4:3:1:3:3:1:4)",
"Combined series (4:3:1:3:3:1)"
) ~ "Combined Series",
Vaccination == "DTP/DT/DTaP (4 doses or more)" ~ "DTaP",
Vaccination == "Polio (3 doses or more)" ~ "Polio",
Vaccination %in% c("Hib (3 doses or more)", "Hib (primary series plus booster dose)") ~ "Hib",
Vaccination == "Hepatitis B (3 doses or more)" ~ "Hep B",
Vaccination == "PCV (4 doses or more)" ~ "PCV",
TRUE ~ `Vaccination`
))
Graphs
#Data Set 1
summarized_data_1 <- clean_vaccine_data %>%
group_by(`Vaccine Type`, `Birth.Year.Birth.Cohort`) %>%
summarize(Average_Estimate = mean(Estimate, na.rm = TRUE), .groups = "drop") %>%
mutate(Dataset = "Dataset 1") #used for combination of the two data sets
#Bar graph
ggplot(summarized_data_1, aes(x = `Birth.Year.Birth.Cohort`, y = Average_Estimate, fill = `Vaccine Type`)) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(title = "Average Vaccination Estimate by Birth Year/Cohort",
x = "Birth Year/Cohort",
y = "Average Estimate",
fill = "Vaccine Type") +
theme_minimal()
#Another plot, Heat Map, easier to tell the percentage than bar graph
ggplot(summarized_data_1, aes(x = `Birth.Year.Birth.Cohort`, y = `Vaccine Type`, fill = Average_Estimate)) +
geom_tile() +
labs(title = "Heatmap of Vaccination Estimates",
x = "Birth Year/Cohort",
y = "Vaccine Type",
fill = "Average Estimate") +
theme_minimal()
#Data Set 2
summarized_data_2 <- clean_vaccine_data_2 %>%
group_by(Year, Vaccination) %>%
summarize(Average_Estimate = mean(Estimate, na.rm = TRUE), .groups = "drop") %>%
mutate(Dataset = "Dataset 2", `Birth.Year.Birth.Cohort` = Year, `Vaccine Type` = Vaccination) #used for combination of two data sets
#Bar Graph
ggplot(summarized_data_2, aes(x = Year, y = Average_Estimate, fill = Vaccination)) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(title = "Average Vaccination Estimate by Year",
x = "Year",
y = "Average Estimate",
fill = "Vaccination") +
theme_minimal()
#Heat Map
ggplot(summarized_data_2, aes(x = Year, y = Vaccination, fill = Average_Estimate)) +
geom_tile() +
labs(title = "Heatmap of Vaccination Estimates",
x = "Year",
y = "Vaccination",
fill = "Average Estimate") +
theme_minimal()
### Combine two datasets
#Convert Birth.Year.Birth.Cohort to character in summarized_data_1
summarized_data_1 <- summarized_data_1 %>%
mutate(`Birth.Year.Birth.Cohort` = as.character(`Birth.Year.Birth.Cohort`))
#Convert Birth.Year.Birth.Cohort to character in summarized_data_2
summarized_data_2 <- summarized_data_2 %>%
mutate(`Birth.Year.Birth.Cohort` = as.character(Year),
`Vaccine Type` = Vaccination)
combined_data <- bind_rows(summarized_data_1, summarized_data_2)
#Adjust the factor levels to ensure Dataset 2 appears before Dataset 1 so the years are in order
combined_data$Dataset <- factor(combined_data$Dataset, levels = c("Dataset 2", "Dataset 1"))
# Create the combined bar graph using combined data
ggplot(combined_data, aes(x = `Birth.Year.Birth.Cohort`, y = Average_Estimate, fill = `Vaccine Type`)) +
geom_bar(stat = "identity", position = position_dodge()) +
facet_wrap(~Dataset, scales = "free_x") +
labs(title = "Average Vaccination Estimate by Year/Cohort",
x = "Year or Birth Year/Cohort",
y = "Average Estimate",
fill = "Vaccine Type") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) #easier to read
# Create the combined heatmap using combined data
ggplot(combined_data, aes(x = `Birth.Year.Birth.Cohort`, y = `Vaccine Type`, fill = Average_Estimate)) +
geom_tile() +
facet_wrap(~Dataset, scales = "free_x") +
labs(title = "Heatmap of Vaccination Estimates",
x = "Year or Birth Year/Cohort",
y = "Vaccine Type",
fill = "Average Estimate") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) #easier to read
Statistical Analysis
#Correlation for Dataset 2
correlation_dataset_2 <- cor.test(as.numeric(clean_vaccine_data_2$Estimate), as.numeric(clean_vaccine_data_2$Year))
print(correlation_dataset_2)
##
## Pearson's product-moment correlation
##
## data: as.numeric(clean_vaccine_data_2$Estimate) and as.numeric(clean_vaccine_data_2$Year)
## t = -5.568, df = 1010, p-value = 3.303e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2317320 -0.1121404
## sample estimates:
## cor
## -0.1725721
#Correlation for Dataset 1
correlation_dataset_1 <- cor.test(as.numeric(clean_vaccine_data$Estimate), as.numeric(clean_vaccine_data$`Birth.Year.Birth.Cohort`))
print(correlation_dataset_1)
##
## Pearson's product-moment correlation
##
## data: as.numeric(clean_vaccine_data$Estimate) and as.numeric(clean_vaccine_data$Birth.Year.Birth.Cohort)
## t = 6.8852, df = 36981, p-value = 5.861e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.02559843 0.04595588
## sample estimates:
## cor
## 0.03578087
#Scatter plot for correlation visualization
ggplot(clean_vaccine_data_2, aes(x = Year, y = Estimate)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(title = "Vaccination Estimate vs. Year (Dataset 2)",
x = "Year",
y = "Estimate",
caption = paste("Correlation Coefficient:", round(correlation_dataset_2$estimate, 2))) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 104 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 104 rows containing missing values or values outside the scale range
## (`geom_point()`).
#Birth.Year.Birth.Cohort is numeric for proper model fitting
clean_vaccine_data$Birth.Year.Birth.Cohort <- as.numeric(clean_vaccine_data$Birth.Year.Birth.Cohort)
ggplot(clean_vaccine_data, aes(x = `Birth.Year.Birth.Cohort`, y = Estimate)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(title = "Vaccination Estimate vs. Birth Year/Cohort (Dataset 1)",
x = "Birth Year/Cohort",
y = "Estimate",
caption = paste("Correlation Coefficient:", round(correlation_dataset_1$estimate, 3))) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
Data set 1: There is a statistically significant very weak positive correlation between vaccination estimates and birth year/birth cohort 2011 to 2021. This suggests that every new year, vaccination coverage shows a slight increase, although the correlation is very weak.
Data Set 2: There is a statistically significant weak negative correlation between vaccination estimates and year 1995 to 2009. This suggests that every new year, there might be a slight decrease in vaccination coverage, although this correlation is quite weak.