This dataset contains reports of California’s COVID Vaccine Data before May 10, 2021. Hence, results may be different from what’s happening now.

# Loading vaccine administered dataset and saving into R's memory
vaccine_admin <- read.csv(file = "covid-19-progress-county-051021.csv")
dim(vaccine_admin)
## [1] 8683   17
head(vaccine_admin)
##            county administered_date total_doses cumulative_total_doses
## 1 All CA Counties        2020-12-15        1134                   1134
## 2 All CA Counties        2020-12-16        8119                   9253
## 3 All CA Counties        2020-12-17       25719                  34972
## 4 All CA Counties        2020-12-18       43750                  78722
## 5 All CA Counties        2020-12-19       25203                 103925
## 6 All CA Counties        2020-12-20       18945                 122870
##   pfizer_doses cumulative_pfizer_doses moderna_doses cumulative_moderna_doses
## 1         1134                    1134             0                        0
## 2         8119                    9253             0                        0
## 3        25719                   34972             0                        0
## 4        43696                   78668            54                       54
## 5        25180                  103848            23                       77
## 6        18913                  122761            32                      109
##   jj_doses cumulative_jj_doses partially_vaccinated total_partially_vaccinated
## 1        0                   0                 1134                       1134
## 2        0                   0                 8119                       9253
## 3        0                   0                25721                      34974
## 4        0                   0                43751                      78725
## 5        0                   0                25202                     103926
## 6        0                   0                18944                     122869
##   fully_vaccinated cumulative_fully_vaccinated at_least_one_dose
## 1                0                           0              1134
## 2                0                           0              8119
## 3                0                           0             25721
## 4                0                           0             43751
## 5                1                           1             25202
## 6                1                           2             18944
##   cumulative_at_least_one_dose california_flag
## 1                         1134                
## 2                         9253                
## 3                        34974                
## 4                        78725                
## 5                       103927                
## 6                       122871

Simple Data Cleaning

#Making sure there are no unseen white spaces in our county column
vaccine_admin$county <- trimws(vaccine_admin$county, which = "both")
#Formatting dates into appropriate format
vaccine_admin$administered_date <- as.Date(vaccine_admin$administered_date, format = "%Y-%m-%d")

Calculating the percentage of those vaccinated:

#For this section, we will take a subset of the data. We will focus on the rows that have data for "All CA Counties".
vaccine_CA <- subset(vaccine_admin, county == "All CA Counties")
#In the article "Tracking coronavirus vaccinations in California", the LA Times does not give us the population count of California used to calculate percentage. So here, we'll be using the number of 39,466,917 based on some web searches. Feel free to change this number based on other sources!
CA_pop <- c(39466917)

#Calculating the percentage of those who have had at least one dose (this is for those who are partially vaccinated)
vaccine_CA$partial_percent <- round((vaccine_CA$cumulative_at_least_one_dose/CA_pop) * 100, digits = 2)
vaccine_CA$partial_percent
##   [1]  0.00  0.02  0.09  0.20  0.26  0.31  0.44  0.56  0.69  0.75  0.76  0.79
##  [13]  0.82  0.93  1.06  1.22  1.30  1.31  1.35  1.37  1.48  1.61  1.76  1.91
##  [25]  2.08  2.15  2.18  2.33  2.52  2.73  2.99  3.32  3.50  3.61  3.79  4.07
##  [37]  4.39  4.78  5.17  5.42  5.56  5.84  6.17  6.53  6.92  7.29  7.57  7.73
##  [49]  8.03  8.39  8.78  9.21  9.63  9.94 10.07 10.37 10.69 11.04 11.41 11.79
##  [61] 12.03 12.16 12.37 12.72 13.05 13.39 13.73 13.94 14.04 14.25 14.53 14.89
##  [73] 15.32 15.84 16.20 16.40 16.76 17.19 17.69 18.22 18.76 19.19 19.41 19.80
##  [85] 20.22 20.72 21.30 21.90 22.37 22.62 23.08 23.68 24.33 24.98 25.64 26.15
##  [97] 26.40 26.88 27.49 28.10 28.74 29.38 29.82 30.06 30.53 31.19 31.85 32.63
## [109] 33.38 33.90 34.10 34.64 35.37 36.13 36.94 37.71 38.29 38.58 39.08 39.69
## [121] 40.29 40.99 41.63 42.12 42.32 42.71 43.22 43.74 44.25 44.75 45.17 45.38
## [133] 45.69 46.05 46.43 46.77 47.15 47.39 47.51 47.72 47.99 48.23 48.45 48.62
## [145] 48.71
#Calculating the percentage of those who were fully vaccinated
vaccine_CA$fully_percent <- round((vaccine_CA$cumulative_fully_vaccinated/CA_pop)*100, digits = 2)
vaccine_CA$fully_percent
##   [1]  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00
##  [13]  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.01  0.05  0.13
##  [25]  0.23  0.27  0.30  0.41  0.52  0.60  0.65  0.69  0.71  0.72  0.77  0.84
##  [37]  0.93  0.98  1.01  1.04  1.05  1.14  1.24  1.33  1.42  1.50  1.54  1.56
##  [49]  1.65  1.77  1.90  2.05  2.25  2.34  2.38  2.53  2.75  3.02  3.32  3.64
##  [61]  3.82  3.93  4.09  4.38  4.71  5.06  5.37  5.54  5.63  5.84  6.14  6.51
##  [73]  6.89  7.28  7.55  7.68  7.93  8.22  8.57  8.93  9.25  9.49  9.61  9.80
##  [85] 10.11 10.47 10.93 11.43 11.76 11.93 12.26 12.65 13.01 13.40 13.83 14.09
##  [97] 14.22 14.45 14.76 15.16 15.59 16.07 16.45 16.64 16.97 17.36 17.81 18.35
## [109] 18.88 19.28 19.47 19.91 20.49 21.09 21.75 22.40 22.93 23.20 23.65 24.12
## [121] 24.65 25.18 25.73 26.09 26.28 26.69 27.20 27.74 28.35 28.94 29.33 29.50
## [133] 29.88 30.39 30.90 31.47 32.04 32.43 32.61 33.04 33.61 34.16 34.74 35.16
## [145] 35.43

Plotting Percentages

#Showing column names in dataset
names(vaccine_CA)
##  [1] "county"                       "administered_date"           
##  [3] "total_doses"                  "cumulative_total_doses"      
##  [5] "pfizer_doses"                 "cumulative_pfizer_doses"     
##  [7] "moderna_doses"                "cumulative_moderna_doses"    
##  [9] "jj_doses"                     "cumulative_jj_doses"         
## [11] "partially_vaccinated"         "total_partially_vaccinated"  
## [13] "fully_vaccinated"             "cumulative_fully_vaccinated" 
## [15] "at_least_one_dose"            "cumulative_at_least_one_dose"
## [17] "california_flag"              "partial_percent"             
## [19] "fully_percent"
#Plotting the initial graph for the first layer for the partially vaccinated percentages
ggplot(vaccine_CA, aes(administered_date, partial_percent)) +
  geom_area(fill = "darkolivegreen3")

#Adding the second layer with the fully vaccinated percentages
ggplot(vaccine_CA, aes(administered_date, partial_percent)) +
  geom_area(fill = "darkolivegreen3", alpha = 0.5) +
  geom_area(aes(administered_date, fully_percent), fill = "forestgreen", alpha = 0.5)

ggplot(vaccine_CA, aes(administered_date, partial_percent)) +
  geom_area(fill = "green3") +
  geom_area(aes(administered_date, fully_percent), fill = "blue3") +
  ylim(0, 100) +
  geom_hline(yintercept = 80, linetype = 'longdash')

vaccine_CA$wk_avg <- zoo::rollmean(vaccine_CA$total_doses, k = 7, fill = NA)
vaccine_CA$wk_avg <- round(vaccine_CA$wk_avg, digits = 0)

ggplot(vaccine_CA, aes(administered_date, total_doses)) +
  geom_col(fill = "yellowgreen") +
  scale_y_continuous() +
  geom_line(aes(administered_date, wk_avg), color = "darkgreen", na.rm = T)