Completed as part of the Johns Hopkins University Data Science Specialization, Developing Data Products course.
The task for this assignment was simply to “create a web page using R Markdown that features a map created with Leaflet”.
For this task, I have decided to display the total number of Covid-19 deaths up until August 24th 2020 and the total number of Covid-19 deaths up until August 24th 2021 for each country on a map. By means of comparison, I will also add the percentage increase for each.
##Getting and Cleaning the Data
I will access three datasets. One with the Covid-19 information on 20/08/24, one for 21/08/24, and the last one with the coordinates for each country.
A little further processing is required as some of the country names don’t match up and in a few cases the coordinates are not provided. I first edit the (factor) country names, then add missing country coordinate information, and then create the final clean dataset for the coordinate information.
I display the first few entries of each dataframe.
library(leaflet)
library(dplyr)
URL2020 <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/08-24-2020.csv"
URL2021 <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/08-24-2021.csv"
URLcountries <- "https://raw.githubusercontent.com/albertyw/avenews/master/old/data/average-latitude-longitude-countries.csv"
download.file(URL2020, "./08-24-2020.csv")
download.file(URL2021, "./08-24-2021.csv")
download.file(URLcountries, "./countries.csv")
data2020 <- read.csv("./08-24-2020.csv")
data2021 <- read.csv("./08-24-2021.csv")
countries <- read.csv("./countries.csv")
levels(countries$Country)[levels(countries$Country)=="United States"] <- "US"
levels(countries$Country)[levels(countries$Country)=="Myanmar"] <- "Burma"
levels(countries$Country)[levels(countries$Country)=="Cape Verde"] <- "Cabo Verde"
levels(countries$Country)[levels(countries$Country)=="Congo"] <- "Congo (Brazzaville)"
levels(countries$Country)[levels(countries$Country)=="Congo, The Democratic Republic of the"] <- "Congo (Kinshasa)"
levels(countries$Country)[levels(countries$Country)=="Czech Republic"] <- "Czechia"
levels(countries$Country)[levels(countries$Country)=="Swaziland"] <- "Eswatini"
levels(countries$Country)[levels(countries$Country)=="Iran, Islamic Republic of"] <- "Iran"
levels(countries$Country)[levels(countries$Country)=="Korea, Republic of"] <- "Korea, South"
levels(countries$Country)[levels(countries$Country)=="Lao People's Democratic Republic"] <- "Laos"
levels(countries$Country)[levels(countries$Country)=="Libyan Arab Jamahiriya"] <- "Libya"
levels(countries$Country)[levels(countries$Country)=="Moldova, Republic of"] <- "Moldova"
levels(countries$Country)[levels(countries$Country)=="Russian Federation"] <- "Russia"
levels(countries$Country)[levels(countries$Country)=="Syrian Arab Republic"] <- "Syria"
levels(data2020$Country_Region)[levels(data2020$Country_Region)=="Taiwan*"] <- "Taiwan"
levels(data2021$Country_Region)[levels(data2021$Country_Region)=="Taiwan*"] <- "Taiwan"
levels(countries$Country)[levels(countries$Country)=="Tanzania, United Republic of"] <- "Tanzania"
levels(countries$Country)[levels(countries$Country)=="Palestinian Territory"] <- "West Bank and Gaza"
missCountries <- data.frame(ISO.3166.Country.Code = c("NM", "KO", "SS"),
Country = c("North Macedonia", "Kosovo", "South Sudan"),
Latitude = c(41.6086, 42.6026, 6.8770),
Longitude = c(21.7453, 20.9030, 31.3070))
countries <- rbind(countries, missCountries)
head(data2020)
## FIPS Admin2 Province_State Country_Region Last_Update Lat
## 1 NA Afghanistan 2020-08-25 04:28:02 33.93911
## 2 NA Albania 2020-08-25 04:28:02 41.15330
## 3 NA Algeria 2020-08-25 04:28:02 28.03390
## 4 NA Andorra 2020-08-25 04:28:02 42.50630
## 5 NA Angola 2020-08-25 04:28:02 -11.20270
## 6 NA Antigua and Barbuda 2020-08-25 04:28:02 17.06080
## Long_ Confirmed Deaths Recovered Active Combined_Key
## 1 67.70995 38045 1390 28360 8305 Afghanistan
## 2 20.16830 8605 254 4413 3938 Albania
## 3 1.65960 41858 1446 29369 11043 Algeria
## 4 1.52180 1060 53 877 130 Andorra
## 5 17.87390 2222 100 877 1245 Angola
## 6 -61.79640 94 3 89 2 Antigua and Barbuda
## Incidence_Rate Case.Fatality_Ratio
## 1 97.753973 3.650076
## 2 299.013135 2.951772
## 3 95.454970 3.454537
## 4 1371.901896 5.000000
## 5 6.760731 4.500450
## 6 95.988890 3.191489
head(data2021)
## FIPS Admin2 Province_State Country_Region Last_Update Lat
## 1 NA Afghanistan 2021-08-25 04:21:41 33.93911
## 2 NA Albania 2021-08-25 04:21:41 41.15330
## 3 NA Algeria 2021-08-25 04:21:41 28.03390
## 4 NA Andorra 2021-08-25 04:21:41 42.50630
## 5 NA Angola 2021-08-25 04:21:41 -11.20270
## 6 NA Antigua and Barbuda 2021-08-25 04:21:41 17.06080
## Long_ Confirmed Deaths Recovered Active Combined_Key Incident_Rate
## 1 67.70995 152660 7083 NA NA Afghanistan 392.1564
## 2 20.16830 140521 2480 NA NA Albania 4882.9314
## 3 1.65960 192626 5063 NA NA Algeria 439.2735
## 4 1.52180 15003 130 NA NA Andorra 19417.5888
## 5 17.87390 46340 1166 NA NA Angola 140.9956
## 6 -61.79640 1540 43 NA NA Antigua and Barbuda 1572.5839
## Case_Fatality_Ratio
## 1 4.6397223
## 2 1.7648608
## 3 2.6284095
## 4 0.8664934
## 5 2.5161847
## 6 2.7922078
head(countries)
## ISO.3166.Country.Code Country Latitude Longitude
## 1 AD Andorra 42.50 1.50
## 2 AE United Arab Emirates 24.00 54.00
## 3 AF Afghanistan 33.00 65.00
## 4 AG Antigua and Barbuda 17.05 -61.80
## 5 AI Anguilla 18.25 -63.17
## 6 AL Albania 41.00 20.00
Next, using forloops, I will compile the number of covid deaths, for countries that have been separated into multiple regions, into one observation.
country_names_2020 <- unique(data2020$Country_Region)
df1 <- data.frame(country = NULL, death_total_2020 = NULL)
for (x in country_names_2020){
death_total_2020 <- sum(data2020[data2020$Country_Region==x,]$Deaths)
country <- country_names_2020[country_names_2020==x]
df1 <- rbind(df1, c(country, death_total_2020))
}
df1 <- cbind(Country = as.character(country_names_2020), df1) %>%
select(-2) %>%
rename(Deaths_2020 = 2)
country_names_2021 <- unique(data2021$Country_Region)
df2 <- data.frame(country = NULL, death_total_2021 = NULL)
for (x in country_names_2021){
death_total_2021 <- sum(data2021[data2021$Country_Region==x,]$Deaths)
country <- country_names_2021[country_names_2021==x]
df2 <- rbind(df2, c(country, death_total_2021))
}
df2 <- cbind(Country = as.character(country_names_2021), df2) %>%
select(-2) %>%
rename(Deaths_2021 = 2)
head(df1)
## Country Deaths_2020
## 1 Afghanistan 1390
## 2 Albania 254
## 3 Algeria 1446
## 4 Andorra 53
## 5 Angola 100
## 6 Antigua and Barbuda 3
head(df2)
## Country Deaths_2021
## 1 Afghanistan 7083
## 2 Albania 2480
## 3 Algeria 5063
## 4 Andorra 130
## 5 Angola 1166
## 6 Antigua and Barbuda 43
Finally, I will merge all of the datasets together and add a column for the percentage growth.
df3 <- merge(df1, df2, all = FALSE) %>%
mutate(Percentage_Growth = signif(Deaths_2021/Deaths_2020, 3))
df4 <- merge(countries, df3, all = FALSE) %>%
select(-2) %>%
rename(lat = 2, lng = 3)
head(df4)
## Country lat lng Deaths_2020 Deaths_2021 Percentage_Growth
## 1 Afghanistan 33.00 65.0 1390 7083 5.10
## 2 Albania 41.00 20.0 254 2480 9.76
## 3 Algeria 28.00 3.0 1446 5063 3.50
## 4 Andorra 42.50 1.5 53 130 2.45
## 5 Angola -12.50 18.5 100 1166 11.70
## 6 Antigua and Barbuda 17.05 -61.8 3 43 14.30
Finally, I create and add an icon, and then I create and present the map.
covidIcon <- makeIcon(
iconUrl = "https://apsic-apac.org/wp-content/uploads/2020/03/Coronavirus-CDC-645x645-statnews.jpg",
iconWidth = 31*215/230, iconHeight = 31,
iconAnchorX = 31*215/230/2, iconAnchorY = 16
)
my_map <- df4 %>%
leaflet() %>%
addTiles() %>%
addMarkers(icon = covidIcon, clusterOptions = markerClusterOptions(),
popup = ~paste0(Country, "<br/> Deaths 2020: ",Deaths_2020, "<br/> Deaths 2021: ",Deaths_2021, "<br/> Percentage Growth: ", Percentage_Growth))
my_map