This dataset contains reports of California’s COVID Vaccine Data
before May 10, 2021. Hence, results may be different
from what’s happening now.
# Loading vaccine administered dataset and saving into R's memory
vaccine_admin <- read.csv(file = "covid-19-progress-county-051021.csv")
dim(vaccine_admin)
## [1] 8683 17
head(vaccine_admin)
## county administered_date total_doses cumulative_total_doses
## 1 All CA Counties 2020-12-15 1134 1134
## 2 All CA Counties 2020-12-16 8119 9253
## 3 All CA Counties 2020-12-17 25719 34972
## 4 All CA Counties 2020-12-18 43750 78722
## 5 All CA Counties 2020-12-19 25203 103925
## 6 All CA Counties 2020-12-20 18945 122870
## pfizer_doses cumulative_pfizer_doses moderna_doses cumulative_moderna_doses
## 1 1134 1134 0 0
## 2 8119 9253 0 0
## 3 25719 34972 0 0
## 4 43696 78668 54 54
## 5 25180 103848 23 77
## 6 18913 122761 32 109
## jj_doses cumulative_jj_doses partially_vaccinated total_partially_vaccinated
## 1 0 0 1134 1134
## 2 0 0 8119 9253
## 3 0 0 25721 34974
## 4 0 0 43751 78725
## 5 0 0 25202 103926
## 6 0 0 18944 122869
## fully_vaccinated cumulative_fully_vaccinated at_least_one_dose
## 1 0 0 1134
## 2 0 0 8119
## 3 0 0 25721
## 4 0 0 43751
## 5 1 1 25202
## 6 1 2 18944
## cumulative_at_least_one_dose california_flag
## 1 1134
## 2 9253
## 3 34974
## 4 78725
## 5 103927
## 6 122871
Simple Data Cleaning
#Making sure there are no unseen white spaces in our county column
vaccine_admin$county <- trimws(vaccine_admin$county, which = "both")
#Formatting dates into appropriate format
vaccine_admin$administered_date <- as.Date(vaccine_admin$administered_date, format = "%Y-%m-%d")
Calculating the percentage of those vaccinated:
#For this section, we will take a subset of the data. We will focus on the rows that have data for "All CA Counties".
vaccine_CA <- subset(vaccine_admin, county == "All CA Counties")
#In the article "Tracking coronavirus vaccinations in California", the LA Times does not give us the population count of California used to calculate percentage. So here, we'll be using the number of 39,466,917 based on some web searches. Feel free to change this number based on other sources!
CA_pop <- c(39466917)
#Calculating the percentage of those who have had at least one dose (this is for those who are partially vaccinated)
vaccine_CA$partial_percent <- round((vaccine_CA$cumulative_at_least_one_dose/CA_pop) * 100, digits = 2)
vaccine_CA$partial_percent
## [1] 0.00 0.02 0.09 0.20 0.26 0.31 0.44 0.56 0.69 0.75 0.76 0.79
## [13] 0.82 0.93 1.06 1.22 1.30 1.31 1.35 1.37 1.48 1.61 1.76 1.91
## [25] 2.08 2.15 2.18 2.33 2.52 2.73 2.99 3.32 3.50 3.61 3.79 4.07
## [37] 4.39 4.78 5.17 5.42 5.56 5.84 6.17 6.53 6.92 7.29 7.57 7.73
## [49] 8.03 8.39 8.78 9.21 9.63 9.94 10.07 10.37 10.69 11.04 11.41 11.79
## [61] 12.03 12.16 12.37 12.72 13.05 13.39 13.73 13.94 14.04 14.25 14.53 14.89
## [73] 15.32 15.84 16.20 16.40 16.76 17.19 17.69 18.22 18.76 19.19 19.41 19.80
## [85] 20.22 20.72 21.30 21.90 22.37 22.62 23.08 23.68 24.33 24.98 25.64 26.15
## [97] 26.40 26.88 27.49 28.10 28.74 29.38 29.82 30.06 30.53 31.19 31.85 32.63
## [109] 33.38 33.90 34.10 34.64 35.37 36.13 36.94 37.71 38.29 38.58 39.08 39.69
## [121] 40.29 40.99 41.63 42.12 42.32 42.71 43.22 43.74 44.25 44.75 45.17 45.38
## [133] 45.69 46.05 46.43 46.77 47.15 47.39 47.51 47.72 47.99 48.23 48.45 48.62
## [145] 48.71
#Calculating the percentage of those who were fully vaccinated
vaccine_CA$fully_percent <- round((vaccine_CA$cumulative_fully_vaccinated/CA_pop)*100, digits = 2)
vaccine_CA$fully_percent
## [1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [13] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 0.05 0.13
## [25] 0.23 0.27 0.30 0.41 0.52 0.60 0.65 0.69 0.71 0.72 0.77 0.84
## [37] 0.93 0.98 1.01 1.04 1.05 1.14 1.24 1.33 1.42 1.50 1.54 1.56
## [49] 1.65 1.77 1.90 2.05 2.25 2.34 2.38 2.53 2.75 3.02 3.32 3.64
## [61] 3.82 3.93 4.09 4.38 4.71 5.06 5.37 5.54 5.63 5.84 6.14 6.51
## [73] 6.89 7.28 7.55 7.68 7.93 8.22 8.57 8.93 9.25 9.49 9.61 9.80
## [85] 10.11 10.47 10.93 11.43 11.76 11.93 12.26 12.65 13.01 13.40 13.83 14.09
## [97] 14.22 14.45 14.76 15.16 15.59 16.07 16.45 16.64 16.97 17.36 17.81 18.35
## [109] 18.88 19.28 19.47 19.91 20.49 21.09 21.75 22.40 22.93 23.20 23.65 24.12
## [121] 24.65 25.18 25.73 26.09 26.28 26.69 27.20 27.74 28.35 28.94 29.33 29.50
## [133] 29.88 30.39 30.90 31.47 32.04 32.43 32.61 33.04 33.61 34.16 34.74 35.16
## [145] 35.43
Plotting Percentages
#Showing column names in dataset
names(vaccine_CA)
## [1] "county" "administered_date"
## [3] "total_doses" "cumulative_total_doses"
## [5] "pfizer_doses" "cumulative_pfizer_doses"
## [7] "moderna_doses" "cumulative_moderna_doses"
## [9] "jj_doses" "cumulative_jj_doses"
## [11] "partially_vaccinated" "total_partially_vaccinated"
## [13] "fully_vaccinated" "cumulative_fully_vaccinated"
## [15] "at_least_one_dose" "cumulative_at_least_one_dose"
## [17] "california_flag" "partial_percent"
## [19] "fully_percent"
#Plotting the initial graph for the first layer for the partially vaccinated percentages
ggplot(vaccine_CA, aes(administered_date, partial_percent)) +
geom_area(fill = "darkolivegreen3")

#Adding the second layer with the fully vaccinated percentages
ggplot(vaccine_CA, aes(administered_date, partial_percent)) +
geom_area(fill = "darkolivegreen3", alpha = 0.5) +
geom_area(aes(administered_date, fully_percent), fill = "forestgreen", alpha = 0.5)

ggplot(vaccine_CA, aes(administered_date, partial_percent)) +
geom_area(fill = "green3") +
geom_area(aes(administered_date, fully_percent), fill = "blue3") +
ylim(0, 100) +
geom_hline(yintercept = 80, linetype = 'longdash')

vaccine_CA$wk_avg <- zoo::rollmean(vaccine_CA$total_doses, k = 7, fill = NA)
vaccine_CA$wk_avg <- round(vaccine_CA$wk_avg, digits = 0)
ggplot(vaccine_CA, aes(administered_date, total_doses)) +
geom_col(fill = "yellowgreen") +
scale_y_continuous() +
geom_line(aes(administered_date, wk_avg), color = "darkgreen", na.rm = T)
