Assignment1_G4D

Assignment with visualizing spatial data

This is my first attempt to create an R Markdown script with spatial data. First I will need some packages:

#install.packages("rgdal")
#install.packages("sf")
#install.packages("raster")
#install.packages("sp")
#install.packages("cartography")
#install.packages("units", type='binary') # was necessary like this for loading sf
#install.packages("shiny", type='binary')
#install.packages("leaflet")
#install.packages("tmap", dependencies = TRUE, type="binary")
#install.packages("dplyr")
library(rgdal)
library(sf)
library(raster)
library(sp)
library(cartography)
library(dplyr)
#library(tmap)

1. Static Mapping

import a geospatial vector data file

a) with rgdal & sp way

#download.file("http://biogeo.ucdavis.edu/data/diva/adm/DEU_adm.zip", 
#destfile = "./spatialR/G4D/datos/DEU_adm.zip" , mode='wb')

#unzip("./spatialR/G4D/datos/DEU_adm.zip", exdir = "./spatialR")
spdeu <- readOGR(dsn = "./datos/DEU_adm1.shp", verbose = FALSE)
class(spdeu)

## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"

head(spdeu@data) # shows data slot with attribute-columns in SpatialPolygonsDataFrame object

##   ID_0 ISO  NAME_0 ID_1             NAME_1 TYPE_1 ENGTYPE_1 NL_NAME_1
## 0   86 DEU Germany    1 Baden-WÃ¼rttemberg   Land     State      <NA>
## 1   86 DEU Germany    2             Bayern   Land     State      <NA>
## 2   86 DEU Germany    3             Berlin   Land     State      <NA>
## 3   86 DEU Germany    4        Brandenburg   Land     State      <NA>
## 4   86 DEU Germany    5             Bremen   Land     State      <NA>
## 5   86 DEU Germany    6            Hamburg   Land     State      <NA>
##   VARNAME_1
## 0      <NA>
## 1   Bavaria
## 2      <NA>
## 3      <NA>
## 4      <NA>
## 5      <NA>

pick html colors (with HEX codes from here) for plotting:

spdeu@data$COLOUR <- "#FFFFFF"  # add column wich contains color white
# add specified colours according to ID_1 column (ID for Bundesland so that every Bundesland gets its own colour)
spdeu@data$COLOUR[spdeu@data$ID_1 == 1] <- "#006837"
spdeu@data$COLOUR[spdeu@data$ID_1 == 2] <- "#28B463"
spdeu@data$COLOUR[spdeu@data$ID_1 == 3] <- "#D4EFDF"
spdeu@data$COLOUR[spdeu@data$ID_1 == 4] <- "#3fa25d"
spdeu@data$COLOUR[spdeu@data$ID_1 == 5] <- "#76D7C4"
spdeu@data$COLOUR[spdeu@data$ID_1 == 6] <- "#A3E4D7"
spdeu@data$COLOUR[spdeu@data$ID_1 == 7] <- "#d16156"
spdeu@data$COLOUR[spdeu@data$ID_1 == 8] <- "#D0ECE7"
spdeu@data$COLOUR[spdeu@data$ID_1 == 9] <- "#A9CCE3"
spdeu@data$COLOUR[spdeu@data$ID_1 == 10] <- "#598c81"
spdeu@data$COLOUR[spdeu@data$ID_1 == 11] <- "#467466"
spdeu@data$COLOUR[spdeu@data$ID_1 == 12] <- "#749188"
spdeu@data$COLOUR[spdeu@data$ID_1 == 13] <- "#184832"
spdeu@data$COLOUR[spdeu@data$ID_1 == 14] <- "#72d1a5"
spdeu@data$COLOUR[spdeu@data$ID_1 == 15] <- "#1bcb7a"
spdeu@data$COLOUR[spdeu@data$ID_1 == 16] <- "#FAE5D3"


plot(spdeu, col=spdeu$COLOUR, main = "Germany's Departments") # plot sp object with colours accoding to COLOUR column and give title

b) with sf way

sfdeu <- st_read(dsn = "./datos/DEU_adm1.shp", quiet = TRUE)
class(sfdeu)

## [1] "sf"         "data.frame"

head(sfdeu)

## Simple feature collection with 6 features and 9 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 7.512127 ymin: 47.26986 xmax: 14.74682 ymax: 53.74812
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
##   ID_0 ISO  NAME_0 ID_1            NAME_1 TYPE_1 ENGTYPE_1 NL_NAME_1
## 1   86 DEU Germany    1 Baden-Württemberg   Land     State      <NA>
## 2   86 DEU Germany    2            Bayern   Land     State      <NA>
## 3   86 DEU Germany    3            Berlin   Land     State      <NA>
## 4   86 DEU Germany    4       Brandenburg   Land     State      <NA>
## 5   86 DEU Germany    5            Bremen   Land     State      <NA>
## 6   86 DEU Germany    6           Hamburg   Land     State      <NA>
##   VARNAME_1                       geometry
## 1      <NA> MULTIPOLYGON (((8.708373 47...
## 2   Bavaria MULTIPOLYGON (((10.13386 50...
## 3      <NA> MULTIPOLYGON (((13.17789 52...
## 4      <NA> MULTIPOLYGON (((13.87951 53...
## 5      <NA> MULTIPOLYGON (((8.50506 53....
## 6      <NA> MULTIPOLYGON (((10.07162 53...

sfdeu$COLOUR <- spdeu@data$COLOUR # uses the above created colours
head(sfdeu)  # cotains now column COLOUR

## Simple feature collection with 6 features and 10 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 7.512127 ymin: 47.26986 xmax: 14.74682 ymax: 53.74812
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
##   ID_0 ISO  NAME_0 ID_1            NAME_1 TYPE_1 ENGTYPE_1 NL_NAME_1
## 1   86 DEU Germany    1 Baden-Württemberg   Land     State      <NA>
## 2   86 DEU Germany    2            Bayern   Land     State      <NA>
## 3   86 DEU Germany    3            Berlin   Land     State      <NA>
## 4   86 DEU Germany    4       Brandenburg   Land     State      <NA>
## 5   86 DEU Germany    5            Bremen   Land     State      <NA>
## 6   86 DEU Germany    6           Hamburg   Land     State      <NA>
##   VARNAME_1  COLOUR                       geometry
## 1      <NA> #006837 MULTIPOLYGON (((8.708373 47...
## 2   Bavaria #28B463 MULTIPOLYGON (((10.13386 50...
## 3      <NA> #D4EFDF MULTIPOLYGON (((13.17789 52...
## 4      <NA> #3fa25d MULTIPOLYGON (((13.87951 53...
## 5      <NA> #76D7C4 MULTIPOLYGON (((8.50506 53....
## 6      <NA> #A3E4D7 MULTIPOLYGON (((10.07162 53...

plot(st_geometry(sfdeu), col=sfdeu$COLOUR, main = "Germany's Departments") # plot only geometry without attributes and assign the colour mapping to the COLOUR column

c) with cartography way
The __cartography_ package seems to provide useful mapping functionalities. Besides, it comes with geospatial data for Europe.

# Load data
data(nuts2006) # nuts refers to Nomenclature of Territorial Units for Statistics and is a geocode standard for referencing the subdivisions of countries

# Plot a layer with the extent of the EU28 countries with only a background color
plot(nuts0.spdf, border = NA, col = NA, bg = "#b1dfec")
# Plot non european space
plot(world.spdf, col  = "#E3DEBF", border=NA, add=TRUE) 
# Plot a layer of countries borders
plot(nuts0.spdf, border = "grey20", lwd = 2, add = TRUE)
# Plot a layer of NUTS1
plot(nuts1.spdf, border = "grey30", lwd = 2, add = TRUE)
# Plot a layer of NUTS2
plot(nuts2.spdf, border = "grey40", lwd = 0.5, add = TRUE)
# Plot a layer of NUTS3
plot(nuts3.spdf, border = "grey20", lwd = 0.1, add = TRUE)

Label map e.g. as a population map (from here):

# Layout plot
layoutLayer(title = "Countries with highest birth rates in Europe", # title of the map
            author = "Author: M. Hahn",  # 
            sources = "Sources: eurostat", # 
            scale = NULL, # no scale
            col = NA, # no color for the title box 
            coltitle = "black", # color of the title
            frame = FALSE,  # no frame around the map
            bg = "#A6CAE0", # background of the map
            extent = nuts0.spdf) # set the extent of the map

# Non European space
plot(world.spdf, col = "#AAB7B8", border = NA, add = TRUE)
# European (EU28) countries
plot(nuts0.spdf, col = "#c5c5c7",border = "white", lwd = 1, add = TRUE)

# using nuts dataframes
head(nuts0.df)

##      id emp2008 act2008 unemp2008 birth_2008 death_2008 gdppps1999
## 1418 AT  4089.9  4252.2     162.3      77752      75083     186761
## 1782 BE  4445.9  4779.3     333.4     127205     104587     224031
## 1783 BG  3360.7  3560.4     199.7      77712     110523      40238
## 274  CH  4228.8  4375.4     146.6      76691      61233     186180
## 1894 CY   382.9   397.4      14.5       9205       5194      10684
## 1893 CZ  5002.5  5232.3     229.8     119570     104948     127323
##      gdppps2008  pop1999  pop2008
## 1418     259221  7982461  8318592
## 1782     308142 10213752 10666866
## 1783      82922  8230371  7640238
## 274      273871  7123537  7593494
## 1894      19316   682862   789269
## 1893     210336 10289621 10381130

# Selection of the 10 countries with the most births per overall population in 2008 of Europe
nuts0.df$birthperpop <- nuts0.df$birth_2008/nuts0.df$pop2008
dflab <- nuts0.df[order(nuts0.df$birthperpop, decreasing = TRUE),][1:10,] # from nuts0 (country level) dataframe select new column birthperpop and order incresing, then select first 10 rows to get fewest, savae to new dataframe
dflab

##      id emp2008 act2008 unemp2008 birth_2008 death_2008 gdppps1999
## 1417 TR 21193.0 23471.0    2278.0    1262333         NA     447913
## 1127 IE  2101.2  2235.9     134.7      73996      28274      84087
## 759  IS   177.1   182.4       5.4       4835       1987       6866
## 1840 UK 29363.9 31116.4    1752.5     794383     579697    1231099
## 1484 FR 26462.9 28698.5    2235.6     828404     542575    1232943
## 1592 NO  2513.7  2579.5      65.8      60497      41712     115035
## 1091 EE   656.5   694.9      38.4      16028      16675      10443
## 1782 BE  4445.9  4779.3     333.4     127205     104587     224031
## 999  SE  4593.0  4898.4     305.4     109301      91449     199063
## 422  DK  2853.8  2951.8      98.0      65038      54591     123949
##      gdppps2008  pop1999  pop2008 birthperpop
## 1417     829869 66531941 70586256  0.01788355
## 1127     147840  3732201  4401335  0.01681217
## 759        9726   275712   315459  0.01532687
## 1840    1763903 58637924 61192129  0.01298178
## 1484    1713350 60122665 63961956  0.01295151
## 1592     225592  4445329  4737171  0.01277070
## 1091      22812  1379237  1340935  0.01195285
## 1782     308142 10213752 10666866  0.01192525
## 999      284000  8854322  9182927  0.01190263
## 422      169250  5313577  5475791  0.01187737

# Label creation
# make new column called lab, nuts0 and thus dflab contains column id (country code), line break, birth rate per 1000 of population, round to 1 digit
dflab$lab <- paste(dflab$id, "\n", round(dflab$birthperpop*1000,1), sep ="")

# Label plot of the 10 countries with the highest birth rate
labelLayer(spdf = nuts0.spdf, # SpatialPolygonsDataFrame used to plot he labels
           df = dflab, # data frame containing the lables
           txt = "lab", # label field in df
           col = "#19511f", # color of the labels
           cex = 0.6, # size of the labels
           font = 2) # label font

# Add an explanation text
text(x = 5477360, y = 4177311, labels = "The 10 countries of Europe with highest
birth rates in 2008 [per 1000 residents]", cex = 0.7, adj = 0)

Choropleth Map

Choropleth maps display geographical areas or regions which are coloured, shaded or patterned in relation to a data variable. This provides a way to visualise values over a geographical area, which can show variation or patterns across the displayed location.

A common error when making choropleth maps is to encode raw data values (such as population) rather than using normalized values (calculating population per square kilometre for example) to produce a density map.

Let’s make a map of annual population growth rate, that is the rate at which the number of individuals in a region increases in a given time period, expressed as a fraction of the initial population. These data comes from the Eurostat website. For an explanation of NUTS Levels (Nomenclature of Territorial Units for Statistics) see here.

# Compute the compound annual growth rate
nuts2.df$cagr <- (((nuts2.df$pop2008 / nuts2.df$pop1999)^(1/9)) - 1) * 100

# Set a custom color palette
cols <- carto.pal(pal1 = "green.pal", # first color gradient
                  n1 = 2, # number of colors in the first gradiant
                  pal2 = "red.pal", # second color gradient
                  n2 = 4) # number of colors in the second gradiant

# Plot a layer with the extent of the EU28 countries with only a background color
plot(nuts0.spdf, border = NA, col = NA, bg = "#A6CAE0")
# Plot non european space
plot(world.spdf, col  = "#E3DEBF", border=NA, add=TRUE)

# Plot the compound annual growth rate
choroLayer(spdf = nuts2.spdf, # SpatialPolygonsDataFrame of the regions
           df = nuts2.df, # data frame with compound annual growth rate
           var = "cagr", # compound annual growth rate field in df
           breaks = c(-2.43,-1,0,0.5,1,2,3.1), # list of breaks
           col = cols, # colors 
           border = "grey40", # color of the polygons borders
           lwd = 0.5, # width of the borders
           legend.pos = "right", # position of the legend
           legend.title.txt = "Compound Annual\nGrowth Rate", # title of the legend
           legend.values.rnd = 2, # number of decimal in the legend values
           add = TRUE) # add the layer to the current plot

# Plot a layer of countries borders
plot(nuts0.spdf,border = "grey20", lwd=0.75, add=TRUE)

# Layout plot
layoutLayer(title = "Demographic Trends", author = "cartography", 
            sources = "Eurostat, 2008", frame = TRUE, col = NA, 
            scale = NULL,coltitle = "black",
            south = TRUE) # add a south arrow

another version (just an example) could be:

# Where are more births than deaths in 2008?
nuts2.df$bvsd <- nuts2.df$birth_2008 - nuts2.df$death_2008 # positive numbers -> more births
# nuts2.df$bvsd
# Set a custom color palette
cols <- carto.pal(pal1 = "red.pal", # first color gradient
                  n1 = 4, # number of colors in the first gradiant
                  pal2 = "green.pal", # second color gradient
                  n2 = 4) # number of colors in the second gradiant

# Plot a layer with the extent of the EU28 countries with only a background color
plot(nuts0.spdf, border = NA, col = NA, bg = "#A6CAE0")
# Plot non european space
plot(world.spdf, col  = "#E3DEBF", border=NA, add=TRUE)

# Plot the birth vs. death relations
choroLayer(spdf = nuts2.spdf, # SpatialPolygonsDataFrame of the regions
           df = nuts2.df, # data frame with birth and death data
           var = "bvsd", # difference between birth an death field in df
           breaks = c(-13000,-10000,-5000, -1000, 0, 1000,5000, 15000,120000), # list of breaks
           col = cols, # colors 
           border = "grey40", # color of the polygons borders
           lwd = 0.5, # width of the borders
           legend.pos = "topright", # position of the legend
           legend.title.txt = "more births (>0)\nvs more deaths (<0)", # title of legend
           legend.values.rnd = 0, # number of decimal in the legend values
           add = TRUE) # add the layer to the current plot

# Plot a layer of countries borders
plot(nuts0.spdf,border = "grey20", lwd=0.75, add=TRUE)

# Layout plot
layoutLayer(title = "Demographic Trends", author = "cartography", 
            sources = "Eurostat, 2008", frame = TRUE, col = NA, 
            scale = NULL,coltitle = "black",
            north(pos = "bottomright", col = "grey20", south = FALSE)) # add a north arrow

2. Interactive Mapping

with ggiraph
packages needed:

#install.packages("ggplot2", dependencies = TRUE, type="binary")
#install.packages("ggiraph", type="binary")
#install.packages("rnaturalearth")
#install.packages("readr")
#install.packages("RCurl")
library(ggplot2)
library(ggiraph)
library(rnaturalearth) # contains different types of vector data!
library(readr)
library(RCurl)

## Loading required package: bitops

library(dplyr)

urlfile <- "https://raw.github.com/bhaskarvk/user2017.geodataviz/master/inst/extdata/africa-internet_usage-2015.csv"

internet_usage <- read.csv(urlfile)

head(internet_usage)

##   ï..Country.Name Country.Code
## 1         Algeria          DZA
## 2          Angola          AGO
## 3         Bahrain          BHR
## 4           Benin          BEN
## 5        Botswana          BWA
## 6    Burkina Faso          BFA
##                                        Series.Name    Series.Code
## 1 Individuals using the Internet (% of population) IT.NET.USER.ZS
## 2 Individuals using the Internet (% of population) IT.NET.USER.ZS
## 3 Individuals using the Internet (% of population) IT.NET.USER.ZS
## 4 Individuals using the Internet (% of population) IT.NET.USER.ZS
## 5 Individuals using the Internet (% of population) IT.NET.USER.ZS
## 6 Individuals using the Internet (% of population) IT.NET.USER.ZS
##   X2014..YR2014. X2015..YR2015. X2016..YR2016.
## 1       25.00000      38.200000             ..
## 2       10.20000      12.400000             ..
## 3       90.50313      93.478301             ..
## 4        6.00000       6.787703             ..
## 5       18.50000      27.500000             ..
## 6        9.40000      11.387646             ..

names(internet_usage) <- c("Country Name",  "Country Code",   "Series Name",  "Series Code", 
"2014 [YR2014]", "2015 [YR2015]", "2015 [YR20156]")  # rename the column headers


names(internet_usage)

## [1] "Country Name"   "Country Code"   "Series Name"    "Series Code"   
## [5] "2014 [YR2014]"  "2015 [YR2015]"  "2015 [YR20156]"

world <- sf::st_as_sf(rnaturalearth::countries110) # convert to sf object, data of countries in scale110 comes from package rnaturalearth
# class(countries110)

length(unique(world$iso_a3)) # how many

filter for column region_un(continent) and then join with the data of internet usage but only the cols Country Code and 2015, rename the cols and specifiy the key for joining, then transform the crs to a projected crs

africa <- dplyr::filter(world, region_un=='Africa' | region_un=="Asia") %>%   
  dplyr::left_join(internet_usage %>% dplyr::select(
    `Country Code`, `2015 [YR2015]`
  ) %>% dplyr::rename(iso_a3=`Country Code`, internet.usage.2015=`2015 [YR2015]`),  
  by = 'iso_a3') %>%
  st_transform(crs="+proj=laea +lon_0=18.984375")

## Warning: Column `iso_a3` joining character vector and factor, coercing into
## character vector

#install.packages("broom")
library(broom)

africa.centers <- st_centroid(africa)  # calculate center points of polygons in africa

africa.spdf <- methods::as(africa, 'Spatial') # convert to spatial class object -- why?
africa.spdf@data$id <- row.names(africa.spdf@data)  # create id column with numbers
# africa.spdf@data$id
africa.tidy <- broom::tidy(africa.spdf)  # now it is just a regular dataframe..but how and why?

#install.packages("htmlwidgets")
#install.packages("hrbrthemes", type="binary")
#install.packages("Rttf2pt1", type="binary") # necessary for hrbrthemes
#install.packages("colormap")
#install.packages("widgetframe")
library(htmlwidgets)

## Please upgrade the 'shiny' package to (at least) version 1.1

library(hrbrthemes) 
library(colormap)
library(widgetframe)

africa.tidy <- dplyr::left_join(africa.tidy, africa.spdf@data, by='id')  # we join the data from the data slot of the spatial object to the tidy dataframe

g <- ggplot(africa.tidy) +
  geom_polygon_interactive(
    color='black',
    aes(long, lat, group=group, fill=internet.usage.2015,
        tooltip=sprintf("%s<br/>%s",iso_a3,internet.usage.2015))) +
 hrbrthemes::theme_ipsum() +
  colormap::scale_fill_colormap(
    colormap=colormap::colormaps$copper, reverse = T) +
  labs(title='Internet Usage in Africa (and Asia) in 2015', subtitle='As Percent of Population',
       caption='Source: World Bank Open Data.')

g

Our World in Data

Our World in Data is an online publication that was mentioned in our first G4D session. This publication shows how living conditions are changing. It covers a wide range of topics: Trends in health, food provision, the growth and distribution of incomes, violence, rights, wars, culture, energy use, education, and environmental changes. For each topic the quality of the data is discussed and, by pointing the visitor to the sources, this website is also a database of databases. Covering all of these aspects in one resource makes it possible to understand how the observed long-run sustainable development trends are interlinked.

The project Our World in Data is produced by the Oxford Martin Programme on Global Development at the University of Oxford. Visualizations are licensed under CC BY-SA and may be freely adapted for any purpose. Data is available for download in CSV format. You are free to make use of anything you find there in your academic work.

As an illustration, we will go now to Our World in Data and download a CSV file containing all data used in The size of the poverty gap: some hints regarding the cost of ending extreme poverty. Then, we will upload the file to our Rstudio Cloud Project using the cloud interface. After uploading the file, we can continue:

poverty_gap <- read.csv("./datos/poverty-gap-index.csv")  # download csv, save in folder and shorten the name,then read in

unique(poverty_gap$Code)

##   [1] ALB      AGO      ARG      ARM      AZE      BGD      BLR     
##   [8] BLZ      BEN      BTN      BOL      BIH      BWA      BRA     
##  [15] BGR      BFA      BDI      KHM      CMR      CPV      CAF     
##  [22] TCD      CHL      CHN      COL      COM      COG      CRI     
##  [29] CIV      HRV      CZE      COD      DJI      DOM              
##  [36] ECU      SLV      EST      ETH      FJI      GAB      GMB     
##  [43] GEO      GHA      GTM      GIN      GNB      GUY      HTI     
##  [50] HND      HUN      IND      IDN      IRN      JAM      KAZ     
##  [57] KEN      KIR      OWID_KOS KGZ      LAO      LVA      LSO     
##  [64] LBR      LTU      MKD      MDG      MWI      MYS      MDV     
##  [71] MLI      MRT      MUS      MEX      FSM      MDA      MNG     
##  [78] MNE      MAR      MOZ      NAM      NPL      NIC      NER     
##  [85] NGA      PAK      PSE      PAN      PNG      PRY      PER     
##  [92] PHL      POL      ROU      RUS      RWA      LCA      WSM     
##  [99] STP      SEN      SRB      SYC      SLE      SVK      SVN     
## [106] SLB      ZAF      SSD      LKA      SDN      SUR      SWZ     
## [113] TJK      TZA      THA      TLS      TGO      TON      TTO     
## [120] TUN      TUR      TKM      TUV      UGA      UKR      URY     
## [127] UZB      VUT      VEN      VNM      OWID_WRL ZMB      ZWE     
## 133 Levels:  AGO ALB ARG ARM AZE BDI BEN BFA BGD BGR BIH BLR BLZ ... ZWE

length(unique(poverty_gap$Code))

## [1] 133

head(poverty_gap)

##    Entity Code Year  X...
## 1 Albania  ALB 1996  0.19
## 2 Albania  ALB 2002  0.39
## 3 Albania  ALB 2005  0.18
## 4 Albania  ALB 2008  0.07
## 5 Albania  ALB 2012  0.22
## 6  Angola  AGO 2000 14.65

select only one year for mapping povert_gap with dplyr filter function

npov <- filter(poverty_gap, Year==1992)

names(npov) <-   c("Entity", "Code",   "Year",   "Pov_1992")  # name column of interest

head(npov)

##                     Entity Code Year Pov_1992
## 1                Argentina  ARG 1992     1.18
## 2                  Bolivia  BOL 1992     4.81
## 3                   Brazil  BRA 1992     9.09
## 4                 Bulgaria  BGR 1992     0.00
## 5                  Burundi  BDI 1992    36.79
## 6 Central African Republic  CAF 1992    58.85

Now, we need to join our world geospatial object with the poverty_gap non-spatial object:

nworld <- left_join(world, npov, by = c('iso_a3' = 'Code'))  
st_transform(nworld, crs="+proj=laea +lon_0=18.984375") # poverty joined to world by key (which is iso_a3 in world data and Code in poverty_gap data) and then transform crs

# head(nworld)
# names(nworld)

check (and solve) if there are missing values in poverty column

nworld$Pov_1992  # shows values of column -> there are NAs

##   [1]    NA    NA    NA    NA  1.18    NA    NA    NA    NA    NA    NA
##  [12] 36.79    NA    NA    NA    NA  0.00    NA    NA    NA    NA  4.81
##  [23]  9.09    NA    NA    NA 58.85    NA    NA  1.48    NA  6.64    NA
##  [34]    NA    NA  4.48  4.83    NA    NA    NA    NA    NA    NA    NA
##  [45]  1.50    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
##  [56]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
##  [67]    NA 12.48 10.80    NA    NA    NA    NA    NA    NA    NA    NA
##  [78]    NA    NA    NA    NA    NA    NA    NA  7.93    NA    NA    NA
##  [89]    NA    NA  4.83    NA    NA    NA    NA    NA    NA    NA    NA
## [100]    NA  3.28    NA  2.88    NA    NA    NA    NA    NA    NA    NA
## [111]    NA  0.10    NA    NA 34.12 27.37    NA    NA    NA    NA    NA
## [122]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## [133]    NA    NA  0.30    NA    NA    NA    NA    NA    NA    NA    NA
## [144]    NA    NA    NA    NA    NA    NA  0.00    NA    NA    NA    NA
## [155]    NA    NA  1.13    NA    NA    NA  0.86    NA    NA    NA    NA
## [166] 28.61  0.68  0.15    NA    NA  2.79 14.95    NA    NA    NA    NA
## [177]    NA

nworld$Pov_1992[is.na(nworld$Pov_1992)] <- 0  # select NAs and assign 0 for TRUE

# like above
world.centers <- st_centroid(nworld)  # center points

world.spdf <- methods::as(nworld, 'Spatial')  # convert to spatial class object
world.spdf@data$id <- row.names(world.spdf@data) # create id column with numbers

world.tidy <- broom::tidy(world.spdf) # now it is just a regular dataframe

joining tables:

world.tidy <- dplyr::left_join(world.tidy, world.spdf@data, by='id')
summary(world.tidy$Pov_1992)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   1.429   0.000  58.850

mapping:

g <- ggplot(world.tidy) +
  geom_polygon_interactive(
    color='black',
    aes(long, lat, group=group, fill=(Pov_1992),
        tooltip=sprintf("%s<br/>%s",iso_a3,Pov_1992))) +
 hrbrthemes::theme_ipsum() +
  colormap::scale_fill_colormap(
    colormap=colormap::colormaps$freesurface_red, reverse = T) +
  labs(title='Poverty Gap in the World in 1992', subtitle='As Percent of Population',
       caption='Source: World Bank Open Data.')

g

Plastic waste data

# spatial data
world <- rnaturalearth::countries110 %>% st_as_sf()
europe <- world[world$region_un=="Europe"&world$name!='Russia',]  %>% dplyr::select("iso_a3", "name")

# plastic waste data
plastic <- read.csv("./datos/plastic-waste-per-capita.csv") # contains data of per capita plastic waste in kg per person and day
names(plastic) <- c("Country", "Code", "Year", "plasticwaste") # rename cols
head(plastic)

##               Country Code Year plasticwaste
## 1             Albania  ALB 2010        0.069
## 2             Algeria  DZA 2010        0.144
## 3              Angola  AGO 2010        0.062
## 4            Anguilla  AIA 2010        0.252
## 5 Antigua and Barbuda  ATG 2010        0.660
## 6           Argentina  ARG 2010        0.183

# join spatial and plastic data and project crs
plasticspatial <- inner_join(europe, plastic, by=c("iso_a3" = "Code"))

## Warning: Column `iso_a3`/`Code` joining character vector and factor,
## coercing into character vector

st_transform(plasticspatial, crs="+proj=laea +lon_0=18.984375")

## Simple feature collection with 27 features and 5 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -7627730 ymin: 281334.3 xmax: 1687110 ymax: 8234905
## epsg (SRID):    NA
## proj4string:    +proj=laea +lat_0=0 +lon_0=18.984375 +x_0=0 +y_0=0 +ellps=WGS84 +units=m +no_defs
## First 10 features:
##    iso_a3             name                Country Year plasticwaste
## 1     ALB          Albania                Albania 2010        0.069
## 2     BEL          Belgium                Belgium 2010        0.080
## 3     BGR         Bulgaria               Bulgaria 2010        0.154
## 4     BIH Bosnia and Herz. Bosnia and Herzegovina 2010        0.144
## 5     DEU          Germany                Germany 2010        0.485
## 6     DNK          Denmark                Denmark 2010        0.047
## 7     ESP            Spain                  Spain 2010        0.277
## 8     EST          Estonia                Estonia 2010        0.176
## 9     FIN          Finland                Finland 2010        0.234
## 10    FRA           France                 France 2010        0.192
##                          geometry
## 1  MULTIPOLYGON (((142770.1 45...
## 2  MULTIPOLYGON (((-1205140 55...
## 3  MULTIPOLYGON (((316667.9 47...
## 4  MULTIPOLYGON (((1805.338 48...
## 5  MULTIPOLYGON (((-652941.9 5...
## 6  MULTIPOLYGON (((-448127.7 5...
## 7  MULTIPOLYGON (((-2454567 46...
## 8  MULTIPOLYGON (((361807.8 61...
## 9  MULTIPOLYGON (((464138.2 72...
## 10 MULTIPOLYGON (((-7450322 34...

#head(plasticspatial)
class(plasticspatial)

## [1] "sf"         "data.frame"

# plot with base R
plot(plasticspatial["plasticwaste"], key.pos = 4, axes = TRUE, key.width = lcm(1.3), key.length = 1.0)

# get back longitude and latitude for plotting with ggplot (is this the only reason?)
eu.centers <- st_centroid(plasticspatial)  # center points

eu.spdf <- methods::as(plasticspatial, 'Spatial')  # convert to spatial class object
eu.spdf@data$id <- row.names(eu.spdf@data) # create id column with numbers

eu.tidy <- broom::tidy(eu.spdf) # now it is just a regular dataframe

eu.tidy <- dplyr::left_join(eu.tidy, eu.spdf@data, by='id')

# mapping with ggplot
g <- ggplot(eu.tidy) +
  geom_polygon_interactive(
    color='black',
    aes(long, lat, group=group, fill=plasticwaste,
        tooltip=sprintf("%s<br/>%s",iso_a3,plasticwaste))) +
  coord_fixed(xlim=c(-28,40),ylim=c(30,74)) +
 hrbrthemes::theme_ipsum() +
   viridis::scale_fill_viridis(option="plasma", direction = -1)+
  labs(title='Plastic Waste', subtitle='in Europe',
       caption='Source: Jambeck et al. 2015', fill="plastic waste\n[kg per capita and day]")

g

Using plotly

The plotly package provides access to an online visualitation platform. Before running code in this section, you need to sign up at plotly

#install.packages("plotly", type="binary")
library(plotly)

Sys.setenv("plotly_username"="mh_09")
Sys.setenv("plotly_api_key"="DND0VFXPT45R7m2ppD22")

df <- read.csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')

# light grey boundaries
l <- list(color = toRGB("grey"), width = 0.5)

# specify map projection/options
g <- list(
  showframe = FALSE,
  showcoastlines = FALSE,
  projection = list(type = 'Mercator')
)

p <- plot_geo(df) %>%
  add_trace(
    z = ~GDP..BILLIONS., color = ~GDP..BILLIONS., colors = 'Blues',
    text = ~COUNTRY, locations = ~CODE, marker = list(line = l)
  ) %>%
  colorbar(title = 'GDP Billions US$', tickprefix = '$') %>%
  layout(
    title = '2014 Global GDP<br>Source:<a href="https://www.cia.gov/library/publications/the-world-factbook/fields/2195.html">CIA World Factbook</a>',
    geo = g
  )

# Create a shareable link to your chart
# Set up API credentials: https://plot.ly/r/getting-started
options(browser = 'false') # suppress auto open in browser
chart_link = api_create(p, filename="nchoropleth-ag")

## Found a grid already named: 'nchoropleth-ag Grid'. Since fileopt='overwrite', I'll try to update it

## Found a plot already named: 'nchoropleth-ag'. Since fileopt='overwrite', I'll try to update it

chart_link

the plot is now available here

Using leaflet

The following code is a just a plain visualization:

library(leaflet)
plt <- leaflet() %>%
  setView(lat = 50.85045, lng = 4.34878, zoom=13) %>%
  addTiles(group="OSM") %>%
  addProviderTiles(providers$CartoDB.DarkMatter, group="Dark") %>%
  addProviderTiles(providers$CartoDB.Positron, group="Light") %>%
  addLayersControl(baseGroups=c('OSM','Dark','Light'))
  
plt

More important is the next example which uses a dataset from the recent 2014-2016 West African Ebola outbreak. This example is taken from here.

urlfile2 <- "https://raw.github.com/amcrisan/EpiDesignPattern/master/data/ebola_metadata.RDS"
myfile <- download.file(urlfile2,"./datos/ebola_metadata.csv", method="auto")
# try other data:
#urlfile3 <- "https://github.com/nickloman/ebov/blob/master/metadata/metadata_samples.txt"
#myfile3 <- download.file(urlfile3,"./datos/ebola_metadata2.csv", method="auto")

unfortunately, the rest from here does not work because the data is not accessible

#metadata <- readRDS("./G4D/datos/ebola_metadata.RDS")  # tried a lot but doesnt work (even though file is in folder)
#metadata2 <- read.csv("ebola_metadata2.csv")

#install.packages("ape")
#install.packages("lubridate")
#install.packages("dygraphs")
#install.packages("xts")
#install.packages("ggmap")
library(ape)
#library(ggtree)
library(lubridate)
library(dygraphs)
library(xts)
library(ggmap)
library(RColorBrewer)
library(tidyr)
library(dplyr)

# First, we need to create some new data
# Counting number cases per country
# aggDat<-metadata %>%
#        filter(Country !="?") %>%
#        group_by(Country,country_lon,country_lat) %>%
#        dplyr::count()%>%
#        mutate(popup=sprintf("%s = %d cases",Country,n)) #create a popup for the map
# Here's a very quick look at what this command generates for us:
#aggDat

# Now, we'll define the color palette
# pal <- colorFactor(
#   palette = c('red', 'blue', 'green', 'purple', 'orange'),
#   domain = aggDat$Country)

# Now, we'll create the Map
# This first command will creat an empty map
# m<-leaflet(aggDat)
#   m %>%
#         addTiles()%>% 
#         addCircleMarkers(
#           lng=~country_lon,
#           lat= ~country_lat,
#           radius=~sqrt(n)*2,
#           color = ~pal(Country),
#           stroke = FALSE, fillOpacity = 0.7,
#           label=~as.character(popup),
#           labelOptions = labelOptions(noHide = T),
#           options = leafletOptions(minZoom = 0, maxZoom = 10,scroolWheelZoom=FALSE))

By modifying the code, it is possible to get a dissagregated visualization:

# aggDat<-metadata %>%
#         filter(Country !="?") %>%
#         group_by(Country,Region,region_lon,region_lat) %>%
#         dplyr::count()%>% 
#         mutate(popup=sprintf("%s (%s) = %d cases",Region,Country,n))
#       
# m<-leaflet(aggDat)
#       
# m %>%
#   addTiles()%>% 
#   addCircleMarkers(
#     lng=~region_lon,
#     lat= ~region_lat,
#     radius=~sqrt(n)*2,
#     color = ~pal(Country), #we actually colour the points by country here
#     stroke = FALSE, fillOpacity = 0.7,
#     label=~as.character(popup),
#     labelOptions = labelOptions(noHide = F)
#  )

Leaflet also allows you to automatically generate clusters that will vary when you zoom in and out on the map. We didn’t implement this feature in the shiny application, but I wanted to let you know this is available.

# m<-leaflet(metadata) 
# # by providing the region latitude and longtitude co-ordinates we allow clustering of regional samples
# m %>%
#   addTiles()%>%
#   addCircleMarkers(
#     lng=~region_lon,
#     lat= ~region_lat,
#     stroke = FALSE, fillOpacity = 0.5,
#     clusterOptions= markerClusterOptions(titile="regional clusters") #cluster options
#   )

The cluster colour here are automatically assigned by the Leaflet package, and are as follows:

orange / red for the largest clusters
yellow for “medium” clusters
green for small clusters

By click on a point, the clustering algorithm reveals the geospatial locations that were aggregated into a cluster (which its a neat and handy feature).

Assignment1_G4D_MH