Loading the necessary packages
# Installing the needed packages
#install.packages("tidyverse")
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.5 v dplyr 1.0.8
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.0.2 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Setting the file path
setwd("~/")
setwd("~/data related articles")
Uploading the vaccine datasets (csv files)
Nigeria_covid_data <- read_csv("Nigerian Covid-19 vaccination dataset.csv")
## Rows: 37 Columns: 7
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): State
## dbl (3): Latitude, Longitude, Total Vaccinated Population
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Performing EDA on the data
# Display each column name and check for consistency
colnames(Nigeria_covid_data)
## [1] "State" "Latitude"
## [3] "Longitude" "Population"
## [5] "Total Vaccinated Population" "First Dose (Partially Vaccinated)"
## [7] "Second Dose (Fully Vaccinated)"
# Compute summary statistics of data
summary(Nigeria_covid_data)
## State Latitude Longitude Population
## Length:37 Min. : 4.664 Min. : 3.350 Min. : 2277961
## Class :character 1st Qu.: 6.340 1st Qu.: 5.470 1st Qu.: 3727347
## Mode :character Median : 7.870 Median : 7.320 Median : 4671695
## Mean : 8.486 Mean : 7.291 Mean : 5268934
## 3rd Qu.:10.620 3rd Qu.: 8.520 3rd Qu.: 5741815
## Max. :13.060 Max. :13.270 Max. :15076892
## Total Vaccinated Population First Dose (Partially Vaccinated)
## Min. : 30774 Min. : 22693
## 1st Qu.: 60382 1st Qu.: 39852
## Median : 83952 Median : 54972
## Mean :106458 Mean : 68492
## 3rd Qu.:102626 3rd Qu.: 70049
## Max. :647788 Max. :404414
## Second Dose (Fully Vaccinated)
## Min. : 8081
## 1st Qu.: 22162
## Median : 28980
## Mean : 37966
## 3rd Qu.: 34718
## Max. :243374
# See every column in the data
glimpse(Nigeria_covid_data)
## Rows: 37
## Columns: 7
## $ State <chr> "Abia", "Adamawa", "Akwa Ibom", "A~
## $ Latitude <dbl> 5.532003, 10.270341, 5.007996, 6.2~
## $ Longitude <dbl> 7.486002, 13.270032, 7.849999, 7.0~
## $ Population <dbl> 3727347, 4248436, 5482177, 5527809~
## $ `Total Vaccinated Population` <dbl> 56852, 68510, 66166, 60382, 97045,~
## $ `First Dose (Partially Vaccinated)` <dbl> 34029, 39852, 41134, 41334, 65625,~
## $ `Second Dose (Fully Vaccinated)` <dbl> 22823, 28658, 25032, 19048, 31420,~
# Calculate percentage of total vaccinated population
Nigeria_covid_data$percent_fully_vaccinated <- Nigeria_covid_data$`Total Vaccinated Population`*100/Nigeria_covid_data$Population
# Calculate percentage of population that has received first dose
Nigeria_covid_data$percent_first_dose <- Nigeria_covid_data$`First Dose (Partially Vaccinated)`*100/Nigeria_covid_data$Population
Export to CSV for visualization
write.csv(Nigeria_covid_data,"~/data related articles//naija_covid_vaccine.csv", row.names = FALSE)