Introduction

This project analyzes global COVID-19 pandemic trends using data provided by the World Health Organization (WHO). The project focuses on understanding trends in cases, deaths, and the impact of the pandemic across different countries and time periods through a series of data visualizations. # Data Source

The dataset used in this project was obtained from the World Health Organization (WHO) COVID-19 global data repository. The dataset contains daily updates on confirmed cases and deaths reported worldwide.

R Markdown

Data Import

dat <- read_csv("/Users/prasannameda/Downloads/covid-19-global-daily-data.csv")
## Rows: 554880 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): Country_code, Country, WHO_region
## dbl  (4): New_cases, Cumulative_cases, New_deaths, Cumulative_deaths
## date (1): Date_reported
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(dat)
## # A tibble: 6 × 8
##   Date_reported Country_code Country       WHO_region New_cases Cumulative_cases
##   <date>        <chr>        <chr>         <chr>          <dbl>            <dbl>
## 1 2020-01-04    AF           Afghanistan   EMR               NA                0
## 2 2020-01-04    DZ           Algeria       AFR               NA                0
## 3 2020-01-04    AL           Albania       EUR               NA                0
## 4 2020-01-04    AI           Anguilla      AMR               NA                0
## 5 2020-01-04    AS           American Sam… WPR                0                0
## 6 2020-01-04    AM           Armenia       EUR               NA                0
## # ℹ 2 more variables: New_deaths <dbl>, Cumulative_deaths <dbl>

Visualization 1: Global COVID-19 Cases Over Time

fig1 <- dat %>%
  group_by(Date_reported) %>%
  summarise(total_cases = sum(New_cases, na.rm = TRUE))

ggplot(fig1, aes(x = Date_reported, y = total_cases)) +
  geom_line(color = "purple") +
  labs(
    title = "Global COVID-19 Cases Over Time",
    x = "Date",
    y = "New Cases"
  ) +
  theme_minimal()

Visualization 2: Top Countries by Total Cases

fig2 <- dat %>%
  filter(Country %in% c("India",
                        "United States of America",
                        "Brazil",
                        "United Kingdom")) %>%
  group_by(Country) %>%
  summarise(total_cases = max(Cumulative_cases, na.rm = TRUE))

ggplot(fig2, aes(x = Country, y = total_cases, fill = Country)) +
  geom_col() +
  labs(
    title = "Top Countries by COVID-19 Cases",
    x = "Country",
    y = "Cumulative Cases"
  ) +
  theme_minimal()

Visualization 3: Cases vs Deaths

fig3 <- dat %>%
  group_by(Country) %>%
  summarise(
    total_cases = max(Cumulative_cases, na.rm = TRUE),
    total_deaths = max(Cumulative_deaths, na.rm = TRUE)
  )

ggplot(fig3, aes(x = total_cases, y = total_deaths)) +
  geom_point(color = "red", alpha = 0.6) +
  labs(
    title = "COVID-19 Cases vs Deaths",
    x = "Total Cases",
    y = "Total Deaths"
  ) +
  theme_minimal()

Visualization 4: Daily Global COVID Cases Trend

fig4 <- dat %>%
  group_by(Date_reported) %>%
  summarise(total_cases = sum(New_cases, na.rm = TRUE))

ggplot(fig4, aes(x = Date_reported, y = total_cases)) +
  geom_area(fill = "skyblue", alpha = 0.7) +
  labs(
    title = "Daily Global COVID-19 Cases Trend",
    x = "Date",
    y = "New Cases"
  ) +
  theme_minimal()

Visualization 5: Countries with Highest COVID-19 Deaths

fig5 <- dat %>%
  filter(Country %in% c("India",
                        "United States of America",
                        "Brazil",
                        "United Kingdom")) %>%
  group_by(Country) %>%
  summarise(total_deaths = max(Cumulative_deaths, na.rm = TRUE))

ggplot(fig5,
       aes(x = reorder(Country, total_deaths),
           y = total_deaths,
           fill = Country)) +
  geom_col() +
  coord_flip() +
  labs(
    title = "Countries with Highest COVID-19 Deaths",
    x = "Country",
    y = "Total Deaths"
  ) +
  theme_minimal()

Visualization 6: Heatmap of COVID-19 Cases

fig6 <- dat %>%
  filter(Country %in% c("India",
                        "United States of America",
                        "Brazil")) %>%
  mutate(month = format(Date_reported, "%Y-%m")) %>%
  group_by(Country, month) %>%
  summarise(monthly_cases = sum(New_cases, na.rm = TRUE))
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by Country and month.
## ℹ Output is grouped by Country.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(Country, month))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.
ggplot(fig6,
       aes(x = month,
           y = Country,
           fill = monthly_cases)) +
  geom_tile() +
  labs(
    title = "Monthly COVID-19 Cases Heatmap",
    x = "Month",
    y = "Country"
  ) +
  theme_minimal()

Visualization 7: Comparative COVID-19 Cases

fig7 <- dat %>%
  filter(Country %in% c("India",
                        "United States of America",
                        "Brazil")) %>%
  group_by(Country) %>%
  summarise(total_cases = max(Cumulative_cases, na.rm = TRUE))

ggplot(fig7,
       aes(x = Country,
           y = total_cases,
           fill = Country)) +
  geom_bar(stat = "identity") +
  labs(
    title = "Comparative COVID-19 Cases",
    x = "Country",
    y = "Cumulative Cases"
  ) +
  theme_minimal()

Visualization 8: Interactive Visualization

p <- ggplot(fig1, aes(x = Date_reported, y = total_cases)) +
  geom_line(color = "darkgreen") +
  labs(
    title = "Interactive Global COVID-19 Trend"
  )

ggplotly(p)

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Conclusion

The visualizations in this project highlight the global impact of the COVID-19 pandemic and demonstrate patterns in case growth, deaths, and country-level comparisons. The project also demonstrates the use of R and data visualization techniques to communicate complex public health data effectively.