Completed as part of the Johns Hopkins University Data Science Specialization, Developing Data Products course.

The task for this assignment was simply to “create a web page using R Markdown that features a map created with Leaflet”.

For this task, I have decided to display the total number of Covid-19 deaths up until August 24th 2020 and the total number of Covid-19 deaths up until August 24th 2021 for each country on a map. By means of comparison, I will also add the percentage increase for each.

##Getting and Cleaning the Data

I will access three datasets. One with the Covid-19 information on 20/08/24, one for 21/08/24, and the last one with the coordinates for each country.

A little further processing is required as some of the country names don’t match up and in a few cases the coordinates are not provided. I first edit the (factor) country names, then add missing country coordinate information, and then create the final clean dataset for the coordinate information.

I display the first few entries of each dataframe.

library(leaflet)
library(dplyr)

URL2020 <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/08-24-2020.csv"
URL2021 <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/08-24-2021.csv"
URLcountries <- "https://raw.githubusercontent.com/albertyw/avenews/master/old/data/average-latitude-longitude-countries.csv"
download.file(URL2020, "./08-24-2020.csv")
download.file(URL2021, "./08-24-2021.csv")
download.file(URLcountries, "./countries.csv")

data2020 <- read.csv("./08-24-2020.csv")
data2021 <- read.csv("./08-24-2021.csv")
countries <- read.csv("./countries.csv")

levels(countries$Country)[levels(countries$Country)=="United States"] <- "US"
levels(countries$Country)[levels(countries$Country)=="Myanmar"] <- "Burma"
levels(countries$Country)[levels(countries$Country)=="Cape Verde"] <- "Cabo Verde"
levels(countries$Country)[levels(countries$Country)=="Congo"] <- "Congo (Brazzaville)"
levels(countries$Country)[levels(countries$Country)=="Congo, The Democratic Republic of the"] <- "Congo (Kinshasa)"
levels(countries$Country)[levels(countries$Country)=="Czech Republic"] <- "Czechia"
levels(countries$Country)[levels(countries$Country)=="Swaziland"] <- "Eswatini"
levels(countries$Country)[levels(countries$Country)=="Iran, Islamic Republic of"] <- "Iran"
levels(countries$Country)[levels(countries$Country)=="Korea, Republic of"] <- "Korea, South"
levels(countries$Country)[levels(countries$Country)=="Lao People's Democratic Republic"] <- "Laos"
levels(countries$Country)[levels(countries$Country)=="Libyan Arab Jamahiriya"] <- "Libya"
levels(countries$Country)[levels(countries$Country)=="Moldova, Republic of"] <- "Moldova"
levels(countries$Country)[levels(countries$Country)=="Russian Federation"] <- "Russia"
levels(countries$Country)[levels(countries$Country)=="Syrian Arab Republic"] <- "Syria"
levels(data2020$Country_Region)[levels(data2020$Country_Region)=="Taiwan*"] <- "Taiwan"
levels(data2021$Country_Region)[levels(data2021$Country_Region)=="Taiwan*"] <- "Taiwan"
levels(countries$Country)[levels(countries$Country)=="Tanzania, United Republic of"] <- "Tanzania"
levels(countries$Country)[levels(countries$Country)=="Palestinian Territory"] <- "West Bank and Gaza"

missCountries <- data.frame(ISO.3166.Country.Code = c("NM", "KO", "SS"), 
                            Country = c("North Macedonia", "Kosovo", "South Sudan"), 
                            Latitude = c(41.6086, 42.6026, 6.8770), 
                            Longitude = c(21.7453, 20.9030, 31.3070))

countries <- rbind(countries, missCountries)

head(data2020)
##   FIPS Admin2 Province_State      Country_Region         Last_Update       Lat
## 1   NA                               Afghanistan 2020-08-25 04:28:02  33.93911
## 2   NA                                   Albania 2020-08-25 04:28:02  41.15330
## 3   NA                                   Algeria 2020-08-25 04:28:02  28.03390
## 4   NA                                   Andorra 2020-08-25 04:28:02  42.50630
## 5   NA                                    Angola 2020-08-25 04:28:02 -11.20270
## 6   NA                       Antigua and Barbuda 2020-08-25 04:28:02  17.06080
##       Long_ Confirmed Deaths Recovered Active        Combined_Key
## 1  67.70995     38045   1390     28360   8305         Afghanistan
## 2  20.16830      8605    254      4413   3938             Albania
## 3   1.65960     41858   1446     29369  11043             Algeria
## 4   1.52180      1060     53       877    130             Andorra
## 5  17.87390      2222    100       877   1245              Angola
## 6 -61.79640        94      3        89      2 Antigua and Barbuda
##   Incidence_Rate Case.Fatality_Ratio
## 1      97.753973            3.650076
## 2     299.013135            2.951772
## 3      95.454970            3.454537
## 4    1371.901896            5.000000
## 5       6.760731            4.500450
## 6      95.988890            3.191489
head(data2021)
##   FIPS Admin2 Province_State      Country_Region         Last_Update       Lat
## 1   NA                               Afghanistan 2021-08-25 04:21:41  33.93911
## 2   NA                                   Albania 2021-08-25 04:21:41  41.15330
## 3   NA                                   Algeria 2021-08-25 04:21:41  28.03390
## 4   NA                                   Andorra 2021-08-25 04:21:41  42.50630
## 5   NA                                    Angola 2021-08-25 04:21:41 -11.20270
## 6   NA                       Antigua and Barbuda 2021-08-25 04:21:41  17.06080
##       Long_ Confirmed Deaths Recovered Active        Combined_Key Incident_Rate
## 1  67.70995    152660   7083        NA     NA         Afghanistan      392.1564
## 2  20.16830    140521   2480        NA     NA             Albania     4882.9314
## 3   1.65960    192626   5063        NA     NA             Algeria      439.2735
## 4   1.52180     15003    130        NA     NA             Andorra    19417.5888
## 5  17.87390     46340   1166        NA     NA              Angola      140.9956
## 6 -61.79640      1540     43        NA     NA Antigua and Barbuda     1572.5839
##   Case_Fatality_Ratio
## 1           4.6397223
## 2           1.7648608
## 3           2.6284095
## 4           0.8664934
## 5           2.5161847
## 6           2.7922078
head(countries)
##   ISO.3166.Country.Code              Country Latitude Longitude
## 1                    AD              Andorra    42.50      1.50
## 2                    AE United Arab Emirates    24.00     54.00
## 3                    AF          Afghanistan    33.00     65.00
## 4                    AG  Antigua and Barbuda    17.05    -61.80
## 5                    AI             Anguilla    18.25    -63.17
## 6                    AL              Albania    41.00     20.00

Data Processing

Next, using forloops, I will compile the number of covid deaths, for countries that have been separated into multiple regions, into one observation.

country_names_2020 <- unique(data2020$Country_Region)

df1 <- data.frame(country = NULL, death_total_2020 = NULL)
for (x in country_names_2020){
  death_total_2020 <- sum(data2020[data2020$Country_Region==x,]$Deaths)
  country <- country_names_2020[country_names_2020==x]
  df1 <- rbind(df1, c(country, death_total_2020))
}

df1 <- cbind(Country = as.character(country_names_2020), df1) %>%
  select(-2) %>% 
  rename(Deaths_2020 = 2)

country_names_2021 <- unique(data2021$Country_Region)

df2 <- data.frame(country = NULL, death_total_2021 = NULL)
for (x in country_names_2021){
  death_total_2021 <- sum(data2021[data2021$Country_Region==x,]$Deaths)
  country <- country_names_2021[country_names_2021==x]
  df2 <- rbind(df2, c(country, death_total_2021))
}

df2 <- cbind(Country = as.character(country_names_2021), df2) %>%
  select(-2) %>% 
  rename(Deaths_2021 = 2)

head(df1)
##               Country Deaths_2020
## 1         Afghanistan        1390
## 2             Albania         254
## 3             Algeria        1446
## 4             Andorra          53
## 5              Angola         100
## 6 Antigua and Barbuda           3
head(df2)
##               Country Deaths_2021
## 1         Afghanistan        7083
## 2             Albania        2480
## 3             Algeria        5063
## 4             Andorra         130
## 5              Angola        1166
## 6 Antigua and Barbuda          43

Finally, I will merge all of the datasets together and add a column for the percentage growth.

df3 <- merge(df1, df2, all = FALSE) %>% 
  mutate(Percentage_Growth = signif(Deaths_2021/Deaths_2020, 3))

df4 <- merge(countries, df3, all = FALSE) %>% 
  select(-2) %>%
  rename(lat = 2, lng = 3)

head(df4)
##               Country    lat   lng Deaths_2020 Deaths_2021 Percentage_Growth
## 1         Afghanistan  33.00  65.0        1390        7083              5.10
## 2             Albania  41.00  20.0         254        2480              9.76
## 3             Algeria  28.00   3.0        1446        5063              3.50
## 4             Andorra  42.50   1.5          53         130              2.45
## 5              Angola -12.50  18.5         100        1166             11.70
## 6 Antigua and Barbuda  17.05 -61.8           3          43             14.30

The Map

Finally, I create and add an icon, and then I create and present the map.

covidIcon <- makeIcon(
  iconUrl = "https://apsic-apac.org/wp-content/uploads/2020/03/Coronavirus-CDC-645x645-statnews.jpg",
  iconWidth = 31*215/230, iconHeight = 31,
  iconAnchorX = 31*215/230/2, iconAnchorY = 16
)

my_map <- df4 %>% 
  leaflet() %>%
  addTiles() %>%
  addMarkers(icon = covidIcon, clusterOptions = markerClusterOptions(), 
             popup = ~paste0(Country, "<br/> Deaths 2020: ",Deaths_2020, "<br/> Deaths 2021: ",Deaths_2021, "<br/> Percentage Growth: ", Percentage_Growth))

my_map