Analyze global COVID-19 cases and deaths using real-world data from Our World in Data.
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.3.3
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.3.3
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
covid <- read_csv("D:/R Programming/data/owid-covid-data.csv")
## Rows: 429435 Columns: 67
## ββ Column specification ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
## Delimiter: ","
## chr (4): iso_code, continent, location, tests_units
## dbl (62): total_cases, new_cases, new_cases_smoothed, total_deaths, new_dea...
## date (1): date
##
## βΉ Use `spec()` to retrieve the full column specification for this data.
## βΉ Specify the column types or set `show_col_types = FALSE` to quiet this message.
This dataset has over 429,000 rows and 67 columns, including cases, deaths, dates, locations, and more.
covid_clean <- covid %>%
select(location, continent, date, total_cases, total_deaths) %>%
filter(!is.na(continent), !is.na(total_cases))
latest_date <- max(covid_clean$date)
latest_date
## [1] "2024-08-04"
covid_latest <- covid_clean %>%
filter(date == latest_date) %>%
group_by(continent) %>%
summarise(
total_cases = sum(total_cases, na.rm = TRUE),
total_deaths = sum(total_deaths, na.rm = TRUE)
)
covid_latest
## # A tibble: 6 Γ 3
## continent total_cases total_deaths
## <chr> <dbl> <dbl>
## 1 Africa 13145380 259117
## 2 Asia 301499099 1637249
## 3 Europe 252916868 2102483
## 4 North America 124492666 1671178
## 5 Oceania 15003352 32918
## 6 South America 68809418 1354187
covid_tidy <- covid_latest %>%
pivot_longer(cols = c(total_cases, total_deaths),
names_to = "metric",
values_to = "count")
covid_tidy
## # A tibble: 12 Γ 3
## continent metric count
## <chr> <chr> <dbl>
## 1 Africa total_cases 13145380
## 2 Africa total_deaths 259117
## 3 Asia total_cases 301499099
## 4 Asia total_deaths 1637249
## 5 Europe total_cases 252916868
## 6 Europe total_deaths 2102483
## 7 North America total_cases 124492666
## 8 North America total_deaths 1671178
## 9 Oceania total_cases 15003352
## 10 Oceania total_deaths 32918
## 11 South America total_cases 68809418
## 12 South America total_deaths 1354187
Asia
sum(covid_latest$total_deaths, na.rm = TRUE)
## [1] 7057132
Long format is preferred for plotting grouped metrics and works
better with ggplot2, facet_wrap, and tidyverse
tools.