This project is based on the Visualizing COVID-19 project available at learn.datacamp.com.
#Read dataset for line plot showing cases in China
confirmed_cases_worldwide <- read.csv("datasets/confirmed_cases_worldwide.csv",
colClasses = c("Date", "integer"))
#Show the result
glimpse(confirmed_cases_worldwide)
## Rows: 56
## Columns: 2
## $ date <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01-...
## $ cum_cases <int> 555, 653, 941, 1434, 2118, 2927, 5578, 6166, 8234, 9927, ...
#Line plot of the cumulative number of cases confirmed globally over time
confirmed_cases_worldwide$date = as.Date(confirmed_cases_worldwide$date)
g1 <- ggplot(data = confirmed_cases_worldwide,
aes(x = date,
y = cum_cases,
group = 1)) +
geom_line() +
labs(title = "Cumulative Number of Cases Confirmed Globally over Time",
y = "Number of Cases",
x = "") +
theme(plot.title = element_text(hjust = 0.5)) #Center plot title
g1
# Read data for line plot comparing China to other countries
confirmed_cases_china_vs_world <- read.csv("datasets/confirmed_cases_china_vs_world.csv",
colClasses = c("factor","Date","integer","integer"))
# Show the result
glimpse(confirmed_cases_china_vs_world)
## Rows: 112
## Columns: 4
## $ is_china <fct> China, China, China, China, China, China, China, China, C...
## $ date <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01-...
## $ cases <int> 548, 95, 277, 486, 669, 802, 2632, 578, 2054, 1661, 2089,...
## $ cum_cases <int> 548, 643, 920, 1406, 2075, 2877, 5509, 6087, 8141, 9802, ...
#Line plot comparing cases in China to cases elsewhere
g2 <- ggplot(data = confirmed_cases_china_vs_world) +
geom_line(aes(date,
cum_cases,
group = is_china,
col = is_china)) +
labs(title = "Cumulative Number of Confirmed Cases over Time",
y = "Cumulative Confirmed Cases",
x = "") +
theme(legend.title = element_blank(),
plot.title = element_text(hjust = 0.5))
g2
#Code provided by DataCamp
who_events <- tribble(
~ date, ~ event,
"2020-01-30", "Global health\nemergency declared",
"2020-03-11", "Pandemic\ndeclared",
"2020-02-13", "China reporting\nchange"
) %>%
mutate(date = as.Date(date))
#Adding WHO data to the line plot comparing cases in China to cases elsewhere
g2 +
geom_vline(data = who_events,
aes(xintercept = date),
linetype = "dashed") +
geom_text(data = who_events,
aes(x = date, label = event),
size = 3.5,
angle = 90,
y = 100000)
#China data after reporting change (Feb. 15)
china_after_feb15 <- confirmed_cases_china_vs_world %>%
filter(is_china == "China" &
date >= '2020-02-15')
#Fitting a GAM to Chinese cases after Feb. 15
g3 = ggplot(data = china_after_feb15,
aes(x = date,
y = cum_cases)) +
geom_line() +
geom_smooth(method = "gam",
se = FALSE) +
labs(title = "Cumulative Number of Cases Confirmed in China after Feb. 15",
x = "Date",
y = "Cumulative Confirmed Cases") +
theme(plot.title = element_text(hjust = 0.5))
g3
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
#Filter data for countries other than China
not_china <- confirmed_cases_china_vs_world %>%
filter(is_china == "Not China")
#Fitting a GAM to cases in countries other than China
g4 <- ggplot(data = not_china,
aes(x = date,
y = cum_cases)) +
geom_line() +
labs(title = "Cumulative Number of Cases Confirmed in Countries Other Than China",
x = "Date",
y = "Cumulative Confirmed Cases") +
theme(plot.title = element_text(hjust = 0.5))
g4 +
geom_smooth(method = "gam",
se = FALSE)
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
#Fitting a linear model to log cumulative confirmed cases for countries other than China
g4 +
scale_y_log10() +
geom_smooth(method = "lm",
se = FALSE) +
ylab("log Cumulative Confirmed Cases")
## `geom_smooth()` using formula 'y ~ x'
#Read data on cases by country
confirmed_cases_by_country <- read.csv("datasets/confirmed_cases_by_country.csv",
colClasses = c("factor", "factor", "Date", "integer", "integer"))
glimpse(confirmed_cases_by_country)
## Rows: 13,272
## Columns: 5
## $ country <fct> Afghanistan, Albania, Algeria, Andorra, Antigua and Barbu...
## $ province <fct> , , , , , , , , , , , , , , , , , , , , , , , , ,
## $ date <date> 2020-01-22, 2020-01-22, 2020-01-22, 2020-01-22, 2020-01-...
## $ cases <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ cum_cases <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
#Transform the data to Group by country, summarize to calculate total cases, find the top 7
top_countries_by_total_cases <- confirmed_cases_by_country %>%
group_by(country) %>%
summarize(total_cases = sum(cases)) %>%
top_n(n = 7) %>%
arrange(-total_cases)
## `summarise()` ungrouping output (override with `.groups` argument)
## Selecting by total_cases
#See the result
top_countries_by_total_cases
## # A tibble: 7 x 2
## country total_cases
## <fct> <int>
## 1 Italy 31506
## 2 Iran 16169
## 3 Spain 11748
## 4 Germany 9257
## 5 Korea, South 8320
## 6 France 7699
## 7 US 6421
#Bar chart showing the top 7 countries by number of cases as of March 17, 2020
g5 = ggplot(top_countries_by_total_cases,
aes(x = reorder(country, -total_cases),
y = total_cases,
fill = country)) +
geom_col() +
labs(title = "Cumulative Number of Confirmed Cases by Country (3/17/2020)",
y = "Cumulative Confirmed Cases",
x = "") +
theme(legend.title = element_blank(),
plot.title = element_text(hjust = 0.5))
g5
#Read data for top 7 countries
confirmed_cases_top7_outside_china = read.csv("datasets/confirmed_cases_top7_outside_china.csv",
colClasses = c("factor", "Date", "integer"))
glimpse(confirmed_cases_top7_outside_china)
## Rows: 2,030
## Columns: 3
## $ country <fct> "Germany", "Iran", "Italy", "Korea, South", "Spain", "US"...
## $ date <date> 2020-02-18, 2020-02-18, 2020-02-18, 2020-02-18, 2020-02-...
## $ cum_cases <int> 16, 0, 3, 31, 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, ...
# Using confirmed_cases_top7_outside_china, draw a line plot of
# cum_cases vs. date, grouped and colored by country
g6 = ggplot(confirmed_cases_top7_outside_china) +
geom_line(aes(date,
cum_cases,
group = country,
col = country)) +
labs(title = "Number of Confirmed Cases by Country (Top 7)",
y = "Cumulative Confirmed Cases",
x = "") +
theme(legend.title = element_blank(),
plot.title = element_text(hjust = 0.5))
g6