Loading the necessary packages

# Installing the needed packages
#install.packages("tidyverse") 
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.5     v dplyr   1.0.8
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Setting the file path

setwd("~/")
setwd("~/data related articles")

Uploading the vaccine datasets (csv files)

Nigeria_covid_data <- read_csv("Nigerian Covid-19 vaccination dataset.csv")
## Rows: 37 Columns: 7
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): State
## dbl (3): Latitude, Longitude, Total Vaccinated Population
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

Performing EDA on the data

# Display each column name and check for consistency
colnames(Nigeria_covid_data)
## [1] "State"                             "Latitude"                         
## [3] "Longitude"                         "Population"                       
## [5] "Total Vaccinated Population"       "First Dose (Partially Vaccinated)"
## [7] "Second Dose (Fully Vaccinated)"
# Compute summary statistics of data
summary(Nigeria_covid_data)
##     State              Latitude        Longitude        Population      
##  Length:37          Min.   : 4.664   Min.   : 3.350   Min.   : 2277961  
##  Class :character   1st Qu.: 6.340   1st Qu.: 5.470   1st Qu.: 3727347  
##  Mode  :character   Median : 7.870   Median : 7.320   Median : 4671695  
##                     Mean   : 8.486   Mean   : 7.291   Mean   : 5268934  
##                     3rd Qu.:10.620   3rd Qu.: 8.520   3rd Qu.: 5741815  
##                     Max.   :13.060   Max.   :13.270   Max.   :15076892  
##  Total Vaccinated Population First Dose (Partially Vaccinated)
##  Min.   : 30774              Min.   : 22693                   
##  1st Qu.: 60382              1st Qu.: 39852                   
##  Median : 83952              Median : 54972                   
##  Mean   :106458              Mean   : 68492                   
##  3rd Qu.:102626              3rd Qu.: 70049                   
##  Max.   :647788              Max.   :404414                   
##  Second Dose (Fully Vaccinated)
##  Min.   :  8081                
##  1st Qu.: 22162                
##  Median : 28980                
##  Mean   : 37966                
##  3rd Qu.: 34718                
##  Max.   :243374
# See every column in the data
glimpse(Nigeria_covid_data)
## Rows: 37
## Columns: 7
## $ State                               <chr> "Abia", "Adamawa", "Akwa Ibom", "A~
## $ Latitude                            <dbl> 5.532003, 10.270341, 5.007996, 6.2~
## $ Longitude                           <dbl> 7.486002, 13.270032, 7.849999, 7.0~
## $ Population                          <dbl> 3727347, 4248436, 5482177, 5527809~
## $ `Total Vaccinated Population`       <dbl> 56852, 68510, 66166, 60382, 97045,~
## $ `First Dose (Partially Vaccinated)` <dbl> 34029, 39852, 41134, 41334, 65625,~
## $ `Second Dose (Fully Vaccinated)`    <dbl> 22823, 28658, 25032, 19048, 31420,~
# Calculate percentage of total vaccinated population
Nigeria_covid_data$percent_fully_vaccinated <- Nigeria_covid_data$`Total Vaccinated Population`*100/Nigeria_covid_data$Population

# Calculate percentage of population that has received first dose
Nigeria_covid_data$percent_first_dose <- Nigeria_covid_data$`First Dose (Partially Vaccinated)`*100/Nigeria_covid_data$Population

Export to CSV for visualization

write.csv(Nigeria_covid_data,"~/data related articles//naija_covid_vaccine.csv", row.names = FALSE)