#read the Dataset sheet into “R”. The dataset will be called "data".
data <- read.csv("https://opendata.ecdc.europa.eu/covid19/nationalcasedeath_eueea_daily_ei/csv", na.strings = "", fileEncoding = "UTF-8-BOM")
#loading additional packages
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(ggthemes)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(directlabels)
covid_eu <- data
covid_eu = covid_eu %>%
mutate(date = dmy(dateRep)) %>% #creating new date variable based on current m/d/y setup
relocate(date, .before = dateRep) %>% #moving new date variable to first column
arrange(date) #arranging dates in order
#Transforming countriesAndTerritories and continentExp to factor variables
covid_eu$countriesAndTerritories <- factor(covid_eu$countriesAndTerritories)
covid_eu$continentExp <- factor(covid_eu$continentExp)
#Creating weekly incidence rate per 100K population
#inspecting output so far, looks as though the cases for first reported date (2021-02-28 and 2021-03-01) are very high for each country. Omit dates
covid_eu_1 = covid_eu %>%
filter(cases > -1, date > "2021-03-01") %>% #couple of instances of negative case count - omit. Also omitting dates 2021-02-28 and 2021-03-01
group_by(countriesAndTerritories) %>% #group by country for calculations below
mutate(cases_100k = (cases/popData2020) *100000) %>% #creating var for cases per 100k population, by country
mutate(weekly.inc.rate = rollapplyr(cases_100k, #creating 7-day rolling avg of cases per 100k population, by country
width=7, FUN=mean, partial=T, align = "left"))
#Plotting incidence rate over time for specific countries
covid_eu_1 %>%
filter(countriesAndTerritories == 'France') %>%
group_by(countriesAndTerritories) %>%
ggplot() +
geom_line(data = covid_eu_1, aes(x=date, y=weekly.inc.rate))
#The line generated is not as expected...vertical line graph...
covid_eu_1 %>%
filter(countriesAndTerritories == 'France') %>% #just looking at one country, France
group_by(countriesAndTerritories) %>%
ggplot(aes(x=date, y=weekly.inc.rate)) +
geom_line() +
theme_classic() + #applying theme to clean up the graph display
labs(title = "France 2021:
COVID-19 Incidence Rate per 100K Population",
x= "Date", y= "Incidence per 100,000")+
theme(plot.title = element_text(hjust = 0.5)) + #centering the plot title
theme(plot.title = element_text(lineheight = 1)) +
theme(plot.title = element_text(face = "bold")) + #change plot title font to be bolded
theme(plot.title = element_text(size = 16)) #increase title text size
#Now, let's trying looking at two countries within the line graph...
covid_eu_1 %>%
filter(countriesAndTerritories %in% c("France", "Spain")) %>% #filtering to more than one country using "%in% c(" for grouping
group_by(countriesAndTerritories) %>%
ggplot(aes(x=date, y=weekly.inc.rate,
group=countriesAndTerritories, #must include "group=" to obtain lines for distinct countries as desired
colour=countriesAndTerritories)) + #must include "colour" by group variable to differentiate lines. also generates legend
geom_line(size= 1) + #within geom_line function, increasing line thickness
theme_classic() + #inputting classic theme first here, else would undo changes made below if tacked to the end of the code
labs(title = "2021 COVID-19 Incidence Rate: France & Spain", #graph title
x = "Date", y = "Cases per 100,000 Persons", #legend titles
colour = "Country") + #altering default legend title - here, the lines are distinguished with colour, so altering via colour = in labs f'n
theme(plot.title = element_text(hjust = 0.5, #Centering title
face = "bold", #Bolding title
size = 16), #Increasing title text size
legend.position = "bottom") + #Moving legend from right-hand side to bottom of graph
scale_color_manual(values = c("Dark Blue", "Dark Red")) + #Changing line and legend colors
scale_x_date(date_breaks = "1 month", date_labels = "%b") #Altering such that there are date labels/breaks for every month ("%B" would show full month)
#########################################################################################################################################################################
# CREATE A TABLE
#########################################################################################################################################################################
# Let's create a table of cumulative cases, deaths and case fatality rate per country
#Will first need to create variables
covid_eu_2 <- covid_eu %>%
filter(cases > -1, deaths > -1) %>% #Min deaths in summary is -3; filtering out negative cases/deaths
group_by(countriesAndTerritories) %>%
mutate(tot_cases = sum(cases)) %>%
mutate(tot_deaths = sum(deaths)) %>%
mutate(fatality_rate = (tot_deaths/tot_cases)*1000)
#Sum of cases/deaths and overall fatality rate by country is similar for all observations belonging to same country
#to create table with one observation per country, create table subset with "unique" identification per country name
#Omitting duplicate rows per country name
cov_table <- covid_eu_2 %>%
distinct(countriesAndTerritories, .keep_all = T) #keepall = T" is used to keep all other variables in the output data frame
#Now dropping all other variables except country name, total cases, total deaths & fatality rate
cov_table = subset(cov_table, select = c(countriesAndTerritories,tot_cases,tot_deaths,fatality_rate)) %>%
arrange(fatality_rate) #here, arranging such by fatality rate (lowest to highest)
library(knitr) #knitr and kableExtra packages to create and format simple tables
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
knitr::kable(cov_table, #syntax to display data frame as table
col.names = c('Country', 'Total Cases', 'Total Deaths', 'Case Fatality per 1,000 Persons'), #renaming columns
align = "lccc", #aligning each, from left to right, "left", "center", "center", "center
caption = "<b> 2021 COVID Cases and Deaths in Europe <b>",) %>% #Adding a caption/title to the table... "<b>" at beginning and end of caption text specifies to bold the text
kable_styling() #this function is part of the kableExtra package and allows for nicer formatting of tables
| Country | Total Cases | Total Deaths | Case Fatality per 1,000 Persons |
|---|---|---|---|
| Iceland | 19861 | 36 | 1.812598 |
| Norway | 328713 | 1202 | 3.656685 |
| Cyprus | 142878 | 614 | 4.297373 |
| Denmark | 581276 | 3038 | 5.226433 |
| Finland | 209258 | 1446 | 6.910130 |
| Netherlands | 2920438 | 20255 | 6.935604 |
| Estonia | 230514 | 1869 | 8.107967 |
| Ireland | 635979 | 5838 | 9.179548 |
| Luxembourg | 95222 | 896 | 9.409590 |
| Austria | 1236181 | 12950 | 10.475812 |
| Malta | 40889 | 471 | 11.518990 |
| Slovakia | 1294019 | 15516 | 11.990550 |
| Sweden | 1242733 | 15221 | 12.248005 |
| Liechtenstein | 5503 | 68 | 12.356896 |
| Slovenia | 445228 | 5876 | 13.197732 |
| Belgium | 1980025 | 27729 | 14.004369 |
| Lithuania | 497643 | 7059 | 14.184867 |
| France | 8399846 | 120983 | 14.403002 |
| Czechia | 2378728 | 34923 | 14.681376 |
| Portugal | 1205993 | 18698 | 15.504236 |
| Germany | 6670407 | 107202 | 16.071283 |
| Spain | 5393268 | 88619 | 16.431410 |
| Latvia | 264031 | 4408 | 16.695009 |
| Croatia | 662397 | 11779 | 17.782387 |
| Greece | 1017445 | 19553 | 19.217746 |
| Poland | 3881349 | 89714 | 23.114129 |
| Italy | 5282076 | 135178 | 25.591832 |
| Hungary | 1213318 | 37376 | 30.804785 |
| Romania | 1795418 | 57935 | 32.268252 |
| Bulgaria | 720376 | 29847 | 41.432530 |