Project2

Approach

Code Base

Load Libraries

library(tidyr)   
library(dplyr)    

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)

Import Data

url <- "https://raw.githubusercontent.com/AslamF/DATA607-Project-2/refs/heads/main/country_vaccinations.csv"

raw_data <- read.csv(url)

glimpse(raw_data)
Rows: 86,512
Columns: 15
$ country                             <chr> "Afghanistan", "Afghanistan", "Afg…
$ iso_code                            <chr> "AFG", "AFG", "AFG", "AFG", "AFG",…
$ date                                <chr> "2021-02-22", "2021-02-23", "2021-…
$ total_vaccinations                  <dbl> 0, NA, NA, NA, NA, NA, 8200, NA, N…
$ people_vaccinated                   <dbl> 0, NA, NA, NA, NA, NA, 8200, NA, N…
$ people_fully_vaccinated             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ daily_vaccinations_raw              <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ daily_vaccinations                  <dbl> NA, 1367, 1367, 1367, 1367, 1367, …
$ total_vaccinations_per_hundred      <dbl> 0.00, NA, NA, NA, NA, NA, 0.02, NA…
$ people_vaccinated_per_hundred       <dbl> 0.00, NA, NA, NA, NA, NA, 0.02, NA…
$ people_fully_vaccinated_per_hundred <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ daily_vaccinations_per_million      <dbl> NA, 34, 34, 34, 34, 34, 34, 40, 45…
$ vaccines                            <chr> "Johnson&Johnson, Oxford/AstraZene…
$ source_name                         <chr> "World Health Organization", "Worl…
$ source_website                      <chr> "https://covid19.who.int/", "https…

Identify wide columns we want to collapse and clean up

metric_cols <- c(
  "total_vaccinations",
  "people_vaccinated",
  "people_fully_vaccinated",
  "daily_vaccinations_raw",
  "daily_vaccinations",
  "total_vaccinations_per_hundred",
  "people_vaccinated_per_hundred",
  "people_fully_vaccinated_per_hundred",
  "daily_vaccinations_per_million"
)

tidy_data <- raw_data |>
  pivot_longer(
    cols      = all_of(metric_cols),   # pivot ONLY these columns
    names_to  = "metric",              # column names become values in "metric"
    values_to = "value"                # the numbers go into "value"
  )

tidy_data <- tidy_data |>
  rename_with(tolower) |>             # ensure ALL column names are lowercase
  rename(
    country_name = country,           # more descriptive name
    iso          = iso_code,          # shorter, consistent
    vaccine_used = vaccines           # clarify what this column means
  ) |>
  mutate(
    date = as.Date(date),             # convert from string to proper Date type
    metric = gsub("_", " ", metric)   # make metric labels more readable
  )

## handle missing vlaue in data 

tidy_data <- tidy_data |>
  filter(!is.na(value))


tidy_data |>
  head(20) |>
  print(n = 20)
# A tibble: 20 × 8
   country_name iso   date       vaccine_used  source_name source_website metric
   <chr>        <chr> <date>     <chr>         <chr>       <chr>          <chr> 
 1 Afghanistan  AFG   2021-02-22 Johnson&John… World Heal… https://covid… total…
 2 Afghanistan  AFG   2021-02-22 Johnson&John… World Heal… https://covid… peopl…
 3 Afghanistan  AFG   2021-02-22 Johnson&John… World Heal… https://covid… total…
 4 Afghanistan  AFG   2021-02-22 Johnson&John… World Heal… https://covid… peopl…
 5 Afghanistan  AFG   2021-02-23 Johnson&John… World Heal… https://covid… daily…
 6 Afghanistan  AFG   2021-02-23 Johnson&John… World Heal… https://covid… daily…
 7 Afghanistan  AFG   2021-02-24 Johnson&John… World Heal… https://covid… daily…
 8 Afghanistan  AFG   2021-02-24 Johnson&John… World Heal… https://covid… daily…
 9 Afghanistan  AFG   2021-02-25 Johnson&John… World Heal… https://covid… daily…
10 Afghanistan  AFG   2021-02-25 Johnson&John… World Heal… https://covid… daily…
11 Afghanistan  AFG   2021-02-26 Johnson&John… World Heal… https://covid… daily…
12 Afghanistan  AFG   2021-02-26 Johnson&John… World Heal… https://covid… daily…
13 Afghanistan  AFG   2021-02-27 Johnson&John… World Heal… https://covid… daily…
14 Afghanistan  AFG   2021-02-27 Johnson&John… World Heal… https://covid… daily…
15 Afghanistan  AFG   2021-02-28 Johnson&John… World Heal… https://covid… total…
16 Afghanistan  AFG   2021-02-28 Johnson&John… World Heal… https://covid… peopl…
17 Afghanistan  AFG   2021-02-28 Johnson&John… World Heal… https://covid… daily…
18 Afghanistan  AFG   2021-02-28 Johnson&John… World Heal… https://covid… total…
19 Afghanistan  AFG   2021-02-28 Johnson&John… World Heal… https://covid… peopl…
20 Afghanistan  AFG   2021-02-28 Johnson&John… World Heal… https://covid… daily…
# ℹ 1 more variable: value <dbl>
# ── Analysis 1: Top 20 countries by % of people fully vaccinated ──
fully_vax <- tidy_data |>
  filter(metric == "people fully vaccinated per hundred") |>
  group_by(country_name) |>
  slice_max(date, n = 1) |>           # most recent snapshot per country
  ungroup() |>
  slice_max(value, n = 20)

ggplot(fully_vax, aes(x = reorder(country_name, value), y = value)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(
    title = "Top 20 Countries by Full Vaccination Rate",
    x     = "Country",
    y     = "People Fully Vaccinated per 100"
  ) +
  theme_minimal()

Conclusion

This data set was wide having multiple metric columns you can initially see the countries were posted in columns but are far better presented in rows. Missing values were also removed. From the data we can see that wealthier nations had higher rates of vaccination across the country as opposed to lower-income countries who have less access to healthcare. There is an overall upward trend in vaccination in all countries.