Load in Data

# Load in 3 pre-filtered vectors for cases, county names, and vaccination rates
load("C:/Users/a.furgang/OneDrive - ulrich medical USA/Documents/Personal/Grad School/Term 1/Programming for Data Analytics & Visualization/Individual Assignment 1/NC_Covid_Data_Sep-4-2021.RData")
# case_count - Provided by NC department of public health, shows the rate of COVID infections per 100k residents by county
# vaccination_rate - Provided by the CDC, shows the % of residents that are fully vaccinated by county
# nc_county_list - Shows the county name for each entry in the case count and vaccination vectors.
# i.e. the first entry in nc_county_list is 'Transylvania' meaning the first value in the case_count and vaccination rate correspond to Transylvania County

#load dplyr package to use the tibble function
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#created a tibble to combine all the the data in list form without changing the data types
all_data <- tibble(
  NC_counties = (nc_county_list), 
  vax_rate = (vaccination_rate), 
  cases = (case_count)
)

Task 1 - Averages

# averages
#average % of residents that are fully vaccinated in NC
nc_average_vax = mean(vaccination_rate)
print(nc_average_vax)
## [1] 40.446
# ? Get average cases? - this data is found by looking at the mean of the case_count vector
#average rate of COVID infections per 100k residents of NC
nc_average_cases = mean(case_count)
print(nc_average_cases)
## [1] 508.07
# ? which county has the highest vaccination rate? - this can be found by finding the max value in the vaccination_rate data and then matching that rate to the corresponding county with the code below
highest_vax_rate <- all_data [["NC_counties"]][all_data$vax_rate == max(all_data$vax_rate)]
print(highest_vax_rate)
## [1] "Martin"

Task 2 - Add Filters

# Filter function
# ? Found this on Stack overflow... No idea how it works!
#the filter_func() function works by using the following parameters -
#input_vector - this is the vector that you are wanting to filter. Replace this part of the code with the name of the vector that you are wanting to filter (in this example - vaccination_rate, or case_count) 
#county_names - this is the vector that contains the list of county names that correspond to each entry in input_vector (in this example - nc_county_list)
#filt_counties - this is the list of entrys that you want filtered out of the original input_vector (in this example - durham_counties)

filter_func = function(input_vector, county_names, filt_counties){
  filt = county_names %in% filt_counties
  output_vector = input_vector[filt]
  return(output_vector)
}

durham_counties = c("Wake", "Granville", "Person", "Orange", "Chatham", "Durham")


#to look at the sizes of the resulting vectors we first need to run the filter_func functions for both case count and vaccination rates and then look at the length of the resulting vectors

#run the filter_func on case_count data and look only at case count in durham counties (based on durham_counties list provided above)

filtered_cases = filter_func(input_vector = case_count,
                             county_names = nc_county_list, 
                             filt_counties = durham_counties)


#running the same filter_func code but on vaccination_rate data to find the vaccination rate in only durham counties (based on durham_counties list provided above)

filtered_vax = filter_func(input_vector = vaccination_rate,
                           county_names = nc_county_list, 
                           filt_counties = durham_counties)

#checking to see if the length of the resulting vectors from above are the same length or not. this program will tell us if the vectors are the same or different lengths
if (length(filtered_vax) == length(filtered_cases)) {
  print("The vectors are the same length")
} else {
  print("The vectors are different length")
  }
## [1] "The vectors are the same length"
# ? what are the sizes of the resulting vectors? - 6 line items
#Are they the same size? - Yes, they are the same size as expected since you are filtering case count & vaccination rate by only durham counties

Including Plots

# Plots
# this first plot shows the rate of covid infections per 100k residents vs % of residents that are fully vaccinated by county
plot(x = vaccination_rate, 
     y = case_count,
     pch = 20,
     main = "Vaccination Rates vs COVID Infection Rates",
     xlab = 'Vaccination Rate (%)',
     ylab = 'COVID Infection Rate (Per 100k)'
     )

# this second plot only looks at the rate of covid infections per 100k residents of Durham Counties vs % of residents that are fully vaccinated by Durham county

#since the vaccination rate and case count for just Durham counties was already calculated in Task 2, I pulled this data straight from the global environment, instead of using the 'for' loop to index the dataset

points(plot(x = filtered_vax,
       y = filtered_cases, 
       pch = 8, col = 'blue', 
       main = "Vaccination Rates vs COVID Infection Rates in Durham Counties", 
       xlab = 'Vaccination Rate (%) for Durham Counties', 
       ylab = 'COVID Infection Rate (Per 100k) for Durham Counties' ) |> 
text(x = filtered_vax, y = filtered_cases, labels = durham_counties,
       cex=.9, font=1.5, pos=4))

#  ? what does this mean? - this code looks case count vs vaccination rate for just Durham counties. We are looking at a specific subset of data instead of looking at the data for the whole state of NC

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.