This is my first attempt to create an R Markdown script with spatial data. First I will need some packages:
#install.packages("rgdal")
#install.packages("sf")
#install.packages("raster")
#install.packages("sp")
#install.packages("cartography")
#install.packages("units", type='binary') # was necessary like this for loading sf
#install.packages("shiny", type='binary')
#install.packages("leaflet")
#install.packages("tmap", dependencies = TRUE, type="binary")
#install.packages("dplyr")
library(rgdal)
library(sf)
library(raster)
library(sp)
library(cartography)
library(dplyr)
#library(tmap)
import a geospatial vector data file
a) with rgdal & sp way
#download.file("http://biogeo.ucdavis.edu/data/diva/adm/DEU_adm.zip",
#destfile = "./spatialR/G4D/datos/DEU_adm.zip" , mode='wb')
#unzip("./spatialR/G4D/datos/DEU_adm.zip", exdir = "./spatialR")
spdeu <- readOGR(dsn = "./datos/DEU_adm1.shp", verbose = FALSE)
class(spdeu)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
head(spdeu@data) # shows data slot with attribute-columns in SpatialPolygonsDataFrame object
## ID_0 ISO NAME_0 ID_1 NAME_1 TYPE_1 ENGTYPE_1 NL_NAME_1
## 0 86 DEU Germany 1 Baden-Württemberg Land State <NA>
## 1 86 DEU Germany 2 Bayern Land State <NA>
## 2 86 DEU Germany 3 Berlin Land State <NA>
## 3 86 DEU Germany 4 Brandenburg Land State <NA>
## 4 86 DEU Germany 5 Bremen Land State <NA>
## 5 86 DEU Germany 6 Hamburg Land State <NA>
## VARNAME_1
## 0 <NA>
## 1 Bavaria
## 2 <NA>
## 3 <NA>
## 4 <NA>
## 5 <NA>
pick html colors (with HEX codes from here) for plotting:
spdeu@data$COLOUR <- "#FFFFFF" # add column wich contains color white
# add specified colours according to ID_1 column (ID for Bundesland so that every Bundesland gets its own colour)
spdeu@data$COLOUR[spdeu@data$ID_1 == 1] <- "#006837"
spdeu@data$COLOUR[spdeu@data$ID_1 == 2] <- "#28B463"
spdeu@data$COLOUR[spdeu@data$ID_1 == 3] <- "#D4EFDF"
spdeu@data$COLOUR[spdeu@data$ID_1 == 4] <- "#3fa25d"
spdeu@data$COLOUR[spdeu@data$ID_1 == 5] <- "#76D7C4"
spdeu@data$COLOUR[spdeu@data$ID_1 == 6] <- "#A3E4D7"
spdeu@data$COLOUR[spdeu@data$ID_1 == 7] <- "#d16156"
spdeu@data$COLOUR[spdeu@data$ID_1 == 8] <- "#D0ECE7"
spdeu@data$COLOUR[spdeu@data$ID_1 == 9] <- "#A9CCE3"
spdeu@data$COLOUR[spdeu@data$ID_1 == 10] <- "#598c81"
spdeu@data$COLOUR[spdeu@data$ID_1 == 11] <- "#467466"
spdeu@data$COLOUR[spdeu@data$ID_1 == 12] <- "#749188"
spdeu@data$COLOUR[spdeu@data$ID_1 == 13] <- "#184832"
spdeu@data$COLOUR[spdeu@data$ID_1 == 14] <- "#72d1a5"
spdeu@data$COLOUR[spdeu@data$ID_1 == 15] <- "#1bcb7a"
spdeu@data$COLOUR[spdeu@data$ID_1 == 16] <- "#FAE5D3"
plot(spdeu, col=spdeu$COLOUR, main = "Germany's Departments") # plot sp object with colours accoding to COLOUR column and give title
b) with sf way
sfdeu <- st_read(dsn = "./datos/DEU_adm1.shp", quiet = TRUE)
class(sfdeu)
## [1] "sf" "data.frame"
head(sfdeu)
## Simple feature collection with 6 features and 9 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 7.512127 ymin: 47.26986 xmax: 14.74682 ymax: 53.74812
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
## ID_0 ISO NAME_0 ID_1 NAME_1 TYPE_1 ENGTYPE_1 NL_NAME_1
## 1 86 DEU Germany 1 Baden-Württemberg Land State <NA>
## 2 86 DEU Germany 2 Bayern Land State <NA>
## 3 86 DEU Germany 3 Berlin Land State <NA>
## 4 86 DEU Germany 4 Brandenburg Land State <NA>
## 5 86 DEU Germany 5 Bremen Land State <NA>
## 6 86 DEU Germany 6 Hamburg Land State <NA>
## VARNAME_1 geometry
## 1 <NA> MULTIPOLYGON (((8.708373 47...
## 2 Bavaria MULTIPOLYGON (((10.13386 50...
## 3 <NA> MULTIPOLYGON (((13.17789 52...
## 4 <NA> MULTIPOLYGON (((13.87951 53...
## 5 <NA> MULTIPOLYGON (((8.50506 53....
## 6 <NA> MULTIPOLYGON (((10.07162 53...
sfdeu$COLOUR <- spdeu@data$COLOUR # uses the above created colours
head(sfdeu) # cotains now column COLOUR
## Simple feature collection with 6 features and 10 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 7.512127 ymin: 47.26986 xmax: 14.74682 ymax: 53.74812
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
## ID_0 ISO NAME_0 ID_1 NAME_1 TYPE_1 ENGTYPE_1 NL_NAME_1
## 1 86 DEU Germany 1 Baden-Württemberg Land State <NA>
## 2 86 DEU Germany 2 Bayern Land State <NA>
## 3 86 DEU Germany 3 Berlin Land State <NA>
## 4 86 DEU Germany 4 Brandenburg Land State <NA>
## 5 86 DEU Germany 5 Bremen Land State <NA>
## 6 86 DEU Germany 6 Hamburg Land State <NA>
## VARNAME_1 COLOUR geometry
## 1 <NA> #006837 MULTIPOLYGON (((8.708373 47...
## 2 Bavaria #28B463 MULTIPOLYGON (((10.13386 50...
## 3 <NA> #D4EFDF MULTIPOLYGON (((13.17789 52...
## 4 <NA> #3fa25d MULTIPOLYGON (((13.87951 53...
## 5 <NA> #76D7C4 MULTIPOLYGON (((8.50506 53....
## 6 <NA> #A3E4D7 MULTIPOLYGON (((10.07162 53...
plot(st_geometry(sfdeu), col=sfdeu$COLOUR, main = "Germany's Departments") # plot only geometry without attributes and assign the colour mapping to the COLOUR column
c) with cartography way
The __cartography_ package seems to provide useful mapping functionalities. Besides, it comes with geospatial data for Europe.
# Load data
data(nuts2006) # nuts refers to Nomenclature of Territorial Units for Statistics and is a geocode standard for referencing the subdivisions of countries
# Plot a layer with the extent of the EU28 countries with only a background color
plot(nuts0.spdf, border = NA, col = NA, bg = "#b1dfec")
# Plot non european space
plot(world.spdf, col = "#E3DEBF", border=NA, add=TRUE)
# Plot a layer of countries borders
plot(nuts0.spdf, border = "grey20", lwd = 2, add = TRUE)
# Plot a layer of NUTS1
plot(nuts1.spdf, border = "grey30", lwd = 2, add = TRUE)
# Plot a layer of NUTS2
plot(nuts2.spdf, border = "grey40", lwd = 0.5, add = TRUE)
# Plot a layer of NUTS3
plot(nuts3.spdf, border = "grey20", lwd = 0.1, add = TRUE)
Label map e.g. as a population map (from here):
# Layout plot
layoutLayer(title = "Countries with highest birth rates in Europe", # title of the map
author = "Author: M. Hahn", #
sources = "Sources: eurostat", #
scale = NULL, # no scale
col = NA, # no color for the title box
coltitle = "black", # color of the title
frame = FALSE, # no frame around the map
bg = "#A6CAE0", # background of the map
extent = nuts0.spdf) # set the extent of the map
# Non European space
plot(world.spdf, col = "#AAB7B8", border = NA, add = TRUE)
# European (EU28) countries
plot(nuts0.spdf, col = "#c5c5c7",border = "white", lwd = 1, add = TRUE)
# using nuts dataframes
head(nuts0.df)
## id emp2008 act2008 unemp2008 birth_2008 death_2008 gdppps1999
## 1418 AT 4089.9 4252.2 162.3 77752 75083 186761
## 1782 BE 4445.9 4779.3 333.4 127205 104587 224031
## 1783 BG 3360.7 3560.4 199.7 77712 110523 40238
## 274 CH 4228.8 4375.4 146.6 76691 61233 186180
## 1894 CY 382.9 397.4 14.5 9205 5194 10684
## 1893 CZ 5002.5 5232.3 229.8 119570 104948 127323
## gdppps2008 pop1999 pop2008
## 1418 259221 7982461 8318592
## 1782 308142 10213752 10666866
## 1783 82922 8230371 7640238
## 274 273871 7123537 7593494
## 1894 19316 682862 789269
## 1893 210336 10289621 10381130
# Selection of the 10 countries with the most births per overall population in 2008 of Europe
nuts0.df$birthperpop <- nuts0.df$birth_2008/nuts0.df$pop2008
dflab <- nuts0.df[order(nuts0.df$birthperpop, decreasing = TRUE),][1:10,] # from nuts0 (country level) dataframe select new column birthperpop and order incresing, then select first 10 rows to get fewest, savae to new dataframe
dflab
## id emp2008 act2008 unemp2008 birth_2008 death_2008 gdppps1999
## 1417 TR 21193.0 23471.0 2278.0 1262333 NA 447913
## 1127 IE 2101.2 2235.9 134.7 73996 28274 84087
## 759 IS 177.1 182.4 5.4 4835 1987 6866
## 1840 UK 29363.9 31116.4 1752.5 794383 579697 1231099
## 1484 FR 26462.9 28698.5 2235.6 828404 542575 1232943
## 1592 NO 2513.7 2579.5 65.8 60497 41712 115035
## 1091 EE 656.5 694.9 38.4 16028 16675 10443
## 1782 BE 4445.9 4779.3 333.4 127205 104587 224031
## 999 SE 4593.0 4898.4 305.4 109301 91449 199063
## 422 DK 2853.8 2951.8 98.0 65038 54591 123949
## gdppps2008 pop1999 pop2008 birthperpop
## 1417 829869 66531941 70586256 0.01788355
## 1127 147840 3732201 4401335 0.01681217
## 759 9726 275712 315459 0.01532687
## 1840 1763903 58637924 61192129 0.01298178
## 1484 1713350 60122665 63961956 0.01295151
## 1592 225592 4445329 4737171 0.01277070
## 1091 22812 1379237 1340935 0.01195285
## 1782 308142 10213752 10666866 0.01192525
## 999 284000 8854322 9182927 0.01190263
## 422 169250 5313577 5475791 0.01187737
# Label creation
# make new column called lab, nuts0 and thus dflab contains column id (country code), line break, birth rate per 1000 of population, round to 1 digit
dflab$lab <- paste(dflab$id, "\n", round(dflab$birthperpop*1000,1), sep ="")
# Label plot of the 10 countries with the highest birth rate
labelLayer(spdf = nuts0.spdf, # SpatialPolygonsDataFrame used to plot he labels
df = dflab, # data frame containing the lables
txt = "lab", # label field in df
col = "#19511f", # color of the labels
cex = 0.6, # size of the labels
font = 2) # label font
# Add an explanation text
text(x = 5477360, y = 4177311, labels = "The 10 countries of Europe with highest
birth rates in 2008 [per 1000 residents]", cex = 0.7, adj = 0)
Choropleth maps display geographical areas or regions which are coloured, shaded or patterned in relation to a data variable. This provides a way to visualise values over a geographical area, which can show variation or patterns across the displayed location.
A common error when making choropleth maps is to encode raw data values (such as population) rather than using normalized values (calculating population per square kilometre for example) to produce a density map.
Let’s make a map of annual population growth rate, that is the rate at which the number of individuals in a region increases in a given time period, expressed as a fraction of the initial population. These data comes from the Eurostat website. For an explanation of NUTS Levels (Nomenclature of Territorial Units for Statistics) see here.
# Compute the compound annual growth rate
nuts2.df$cagr <- (((nuts2.df$pop2008 / nuts2.df$pop1999)^(1/9)) - 1) * 100
# Set a custom color palette
cols <- carto.pal(pal1 = "green.pal", # first color gradient
n1 = 2, # number of colors in the first gradiant
pal2 = "red.pal", # second color gradient
n2 = 4) # number of colors in the second gradiant
# Plot a layer with the extent of the EU28 countries with only a background color
plot(nuts0.spdf, border = NA, col = NA, bg = "#A6CAE0")
# Plot non european space
plot(world.spdf, col = "#E3DEBF", border=NA, add=TRUE)
# Plot the compound annual growth rate
choroLayer(spdf = nuts2.spdf, # SpatialPolygonsDataFrame of the regions
df = nuts2.df, # data frame with compound annual growth rate
var = "cagr", # compound annual growth rate field in df
breaks = c(-2.43,-1,0,0.5,1,2,3.1), # list of breaks
col = cols, # colors
border = "grey40", # color of the polygons borders
lwd = 0.5, # width of the borders
legend.pos = "right", # position of the legend
legend.title.txt = "Compound Annual\nGrowth Rate", # title of the legend
legend.values.rnd = 2, # number of decimal in the legend values
add = TRUE) # add the layer to the current plot
# Plot a layer of countries borders
plot(nuts0.spdf,border = "grey20", lwd=0.75, add=TRUE)
# Layout plot
layoutLayer(title = "Demographic Trends", author = "cartography",
sources = "Eurostat, 2008", frame = TRUE, col = NA,
scale = NULL,coltitle = "black",
south = TRUE) # add a south arrow
another version (just an example) could be:
# Where are more births than deaths in 2008?
nuts2.df$bvsd <- nuts2.df$birth_2008 - nuts2.df$death_2008 # positive numbers -> more births
# nuts2.df$bvsd
# Set a custom color palette
cols <- carto.pal(pal1 = "red.pal", # first color gradient
n1 = 4, # number of colors in the first gradiant
pal2 = "green.pal", # second color gradient
n2 = 4) # number of colors in the second gradiant
# Plot a layer with the extent of the EU28 countries with only a background color
plot(nuts0.spdf, border = NA, col = NA, bg = "#A6CAE0")
# Plot non european space
plot(world.spdf, col = "#E3DEBF", border=NA, add=TRUE)
# Plot the birth vs. death relations
choroLayer(spdf = nuts2.spdf, # SpatialPolygonsDataFrame of the regions
df = nuts2.df, # data frame with birth and death data
var = "bvsd", # difference between birth an death field in df
breaks = c(-13000,-10000,-5000, -1000, 0, 1000,5000, 15000,120000), # list of breaks
col = cols, # colors
border = "grey40", # color of the polygons borders
lwd = 0.5, # width of the borders
legend.pos = "topright", # position of the legend
legend.title.txt = "more births (>0)\nvs more deaths (<0)", # title of legend
legend.values.rnd = 0, # number of decimal in the legend values
add = TRUE) # add the layer to the current plot
# Plot a layer of countries borders
plot(nuts0.spdf,border = "grey20", lwd=0.75, add=TRUE)
# Layout plot
layoutLayer(title = "Demographic Trends", author = "cartography",
sources = "Eurostat, 2008", frame = TRUE, col = NA,
scale = NULL,coltitle = "black",
north(pos = "bottomright", col = "grey20", south = FALSE)) # add a north arrow
with ggiraph
packages needed:
#install.packages("ggplot2", dependencies = TRUE, type="binary")
#install.packages("ggiraph", type="binary")
#install.packages("rnaturalearth")
#install.packages("readr")
#install.packages("RCurl")
library(ggplot2)
library(ggiraph)
library(rnaturalearth) # contains different types of vector data!
library(readr)
library(RCurl)
## Loading required package: bitops
library(dplyr)
urlfile <- "https://raw.github.com/bhaskarvk/user2017.geodataviz/master/inst/extdata/africa-internet_usage-2015.csv"
internet_usage <- read.csv(urlfile)
head(internet_usage)
## ï..Country.Name Country.Code
## 1 Algeria DZA
## 2 Angola AGO
## 3 Bahrain BHR
## 4 Benin BEN
## 5 Botswana BWA
## 6 Burkina Faso BFA
## Series.Name Series.Code
## 1 Individuals using the Internet (% of population) IT.NET.USER.ZS
## 2 Individuals using the Internet (% of population) IT.NET.USER.ZS
## 3 Individuals using the Internet (% of population) IT.NET.USER.ZS
## 4 Individuals using the Internet (% of population) IT.NET.USER.ZS
## 5 Individuals using the Internet (% of population) IT.NET.USER.ZS
## 6 Individuals using the Internet (% of population) IT.NET.USER.ZS
## X2014..YR2014. X2015..YR2015. X2016..YR2016.
## 1 25.00000 38.200000 ..
## 2 10.20000 12.400000 ..
## 3 90.50313 93.478301 ..
## 4 6.00000 6.787703 ..
## 5 18.50000 27.500000 ..
## 6 9.40000 11.387646 ..
names(internet_usage) <- c("Country Name", "Country Code", "Series Name", "Series Code",
"2014 [YR2014]", "2015 [YR2015]", "2015 [YR20156]") # rename the column headers
names(internet_usage)
## [1] "Country Name" "Country Code" "Series Name" "Series Code"
## [5] "2014 [YR2014]" "2015 [YR2015]" "2015 [YR20156]"
world <- sf::st_as_sf(rnaturalearth::countries110) # convert to sf object, data of countries in scale110 comes from package rnaturalearth
# class(countries110)
length(unique(world$iso_a3)) # how many
filter for column region_un(continent) and then join with the data of internet usage but only the cols Country Code and 2015, rename the cols and specifiy the key for joining, then transform the crs to a projected crs
africa <- dplyr::filter(world, region_un=='Africa' | region_un=="Asia") %>%
dplyr::left_join(internet_usage %>% dplyr::select(
`Country Code`, `2015 [YR2015]`
) %>% dplyr::rename(iso_a3=`Country Code`, internet.usage.2015=`2015 [YR2015]`),
by = 'iso_a3') %>%
st_transform(crs="+proj=laea +lon_0=18.984375")
## Warning: Column `iso_a3` joining character vector and factor, coercing into
## character vector
#install.packages("broom")
library(broom)
africa.centers <- st_centroid(africa) # calculate center points of polygons in africa
africa.spdf <- methods::as(africa, 'Spatial') # convert to spatial class object -- why?
africa.spdf@data$id <- row.names(africa.spdf@data) # create id column with numbers
# africa.spdf@data$id
africa.tidy <- broom::tidy(africa.spdf) # now it is just a regular dataframe..but how and why?
#install.packages("htmlwidgets")
#install.packages("hrbrthemes", type="binary")
#install.packages("Rttf2pt1", type="binary") # necessary for hrbrthemes
#install.packages("colormap")
#install.packages("widgetframe")
library(htmlwidgets)
## Please upgrade the 'shiny' package to (at least) version 1.1
library(hrbrthemes)
library(colormap)
library(widgetframe)
africa.tidy <- dplyr::left_join(africa.tidy, africa.spdf@data, by='id') # we join the data from the data slot of the spatial object to the tidy dataframe
g <- ggplot(africa.tidy) +
geom_polygon_interactive(
color='black',
aes(long, lat, group=group, fill=internet.usage.2015,
tooltip=sprintf("%s<br/>%s",iso_a3,internet.usage.2015))) +
hrbrthemes::theme_ipsum() +
colormap::scale_fill_colormap(
colormap=colormap::colormaps$copper, reverse = T) +
labs(title='Internet Usage in Africa (and Asia) in 2015', subtitle='As Percent of Population',
caption='Source: World Bank Open Data.')
g
Our World in Data is an online publication that was mentioned in our first G4D session. This publication shows how living conditions are changing. It covers a wide range of topics: Trends in health, food provision, the growth and distribution of incomes, violence, rights, wars, culture, energy use, education, and environmental changes. For each topic the quality of the data is discussed and, by pointing the visitor to the sources, this website is also a database of databases. Covering all of these aspects in one resource makes it possible to understand how the observed long-run sustainable development trends are interlinked.
The project Our World in Data is produced by the Oxford Martin Programme on Global Development at the University of Oxford. Visualizations are licensed under CC BY-SA and may be freely adapted for any purpose. Data is available for download in CSV format. You are free to make use of anything you find there in your academic work.
As an illustration, we will go now to Our World in Data and download a CSV file containing all data used in The size of the poverty gap: some hints regarding the cost of ending extreme poverty. Then, we will upload the file to our Rstudio Cloud Project using the cloud interface. After uploading the file, we can continue:
poverty_gap <- read.csv("./datos/poverty-gap-index.csv") # download csv, save in folder and shorten the name,then read in
unique(poverty_gap$Code)
## [1] ALB AGO ARG ARM AZE BGD BLR
## [8] BLZ BEN BTN BOL BIH BWA BRA
## [15] BGR BFA BDI KHM CMR CPV CAF
## [22] TCD CHL CHN COL COM COG CRI
## [29] CIV HRV CZE COD DJI DOM
## [36] ECU SLV EST ETH FJI GAB GMB
## [43] GEO GHA GTM GIN GNB GUY HTI
## [50] HND HUN IND IDN IRN JAM KAZ
## [57] KEN KIR OWID_KOS KGZ LAO LVA LSO
## [64] LBR LTU MKD MDG MWI MYS MDV
## [71] MLI MRT MUS MEX FSM MDA MNG
## [78] MNE MAR MOZ NAM NPL NIC NER
## [85] NGA PAK PSE PAN PNG PRY PER
## [92] PHL POL ROU RUS RWA LCA WSM
## [99] STP SEN SRB SYC SLE SVK SVN
## [106] SLB ZAF SSD LKA SDN SUR SWZ
## [113] TJK TZA THA TLS TGO TON TTO
## [120] TUN TUR TKM TUV UGA UKR URY
## [127] UZB VUT VEN VNM OWID_WRL ZMB ZWE
## 133 Levels: AGO ALB ARG ARM AZE BDI BEN BFA BGD BGR BIH BLR BLZ ... ZWE
length(unique(poverty_gap$Code))
## [1] 133
head(poverty_gap)
## Entity Code Year X...
## 1 Albania ALB 1996 0.19
## 2 Albania ALB 2002 0.39
## 3 Albania ALB 2005 0.18
## 4 Albania ALB 2008 0.07
## 5 Albania ALB 2012 0.22
## 6 Angola AGO 2000 14.65
select only one year for mapping povert_gap with dplyr filter function
npov <- filter(poverty_gap, Year==1992)
names(npov) <- c("Entity", "Code", "Year", "Pov_1992") # name column of interest
head(npov)
## Entity Code Year Pov_1992
## 1 Argentina ARG 1992 1.18
## 2 Bolivia BOL 1992 4.81
## 3 Brazil BRA 1992 9.09
## 4 Bulgaria BGR 1992 0.00
## 5 Burundi BDI 1992 36.79
## 6 Central African Republic CAF 1992 58.85
Now, we need to join our world geospatial object with the poverty_gap non-spatial object:
nworld <- left_join(world, npov, by = c('iso_a3' = 'Code'))
st_transform(nworld, crs="+proj=laea +lon_0=18.984375") # poverty joined to world by key (which is iso_a3 in world data and Code in poverty_gap data) and then transform crs
# head(nworld)
# names(nworld)
check (and solve) if there are missing values in poverty column
nworld$Pov_1992 # shows values of column -> there are NAs
## [1] NA NA NA NA 1.18 NA NA NA NA NA NA
## [12] 36.79 NA NA NA NA 0.00 NA NA NA NA 4.81
## [23] 9.09 NA NA NA 58.85 NA NA 1.48 NA 6.64 NA
## [34] NA NA 4.48 4.83 NA NA NA NA NA NA NA
## [45] 1.50 NA NA NA NA NA NA NA NA NA NA
## [56] NA NA NA NA NA NA NA NA NA NA NA
## [67] NA 12.48 10.80 NA NA NA NA NA NA NA NA
## [78] NA NA NA NA NA NA NA 7.93 NA NA NA
## [89] NA NA 4.83 NA NA NA NA NA NA NA NA
## [100] NA 3.28 NA 2.88 NA NA NA NA NA NA NA
## [111] NA 0.10 NA NA 34.12 27.37 NA NA NA NA NA
## [122] NA NA NA NA NA NA NA NA NA NA NA
## [133] NA NA 0.30 NA NA NA NA NA NA NA NA
## [144] NA NA NA NA NA NA 0.00 NA NA NA NA
## [155] NA NA 1.13 NA NA NA 0.86 NA NA NA NA
## [166] 28.61 0.68 0.15 NA NA 2.79 14.95 NA NA NA NA
## [177] NA
nworld$Pov_1992[is.na(nworld$Pov_1992)] <- 0 # select NAs and assign 0 for TRUE
# like above
world.centers <- st_centroid(nworld) # center points
world.spdf <- methods::as(nworld, 'Spatial') # convert to spatial class object
world.spdf@data$id <- row.names(world.spdf@data) # create id column with numbers
world.tidy <- broom::tidy(world.spdf) # now it is just a regular dataframe
joining tables:
world.tidy <- dplyr::left_join(world.tidy, world.spdf@data, by='id')
summary(world.tidy$Pov_1992)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 1.429 0.000 58.850
mapping:
g <- ggplot(world.tidy) +
geom_polygon_interactive(
color='black',
aes(long, lat, group=group, fill=(Pov_1992),
tooltip=sprintf("%s<br/>%s",iso_a3,Pov_1992))) +
hrbrthemes::theme_ipsum() +
colormap::scale_fill_colormap(
colormap=colormap::colormaps$freesurface_red, reverse = T) +
labs(title='Poverty Gap in the World in 1992', subtitle='As Percent of Population',
caption='Source: World Bank Open Data.')
g
# spatial data
world <- rnaturalearth::countries110 %>% st_as_sf()
europe <- world[world$region_un=="Europe"&world$name!='Russia',] %>% dplyr::select("iso_a3", "name")
# plastic waste data
plastic <- read.csv("./datos/plastic-waste-per-capita.csv") # contains data of per capita plastic waste in kg per person and day
names(plastic) <- c("Country", "Code", "Year", "plasticwaste") # rename cols
head(plastic)
## Country Code Year plasticwaste
## 1 Albania ALB 2010 0.069
## 2 Algeria DZA 2010 0.144
## 3 Angola AGO 2010 0.062
## 4 Anguilla AIA 2010 0.252
## 5 Antigua and Barbuda ATG 2010 0.660
## 6 Argentina ARG 2010 0.183
# join spatial and plastic data and project crs
plasticspatial <- inner_join(europe, plastic, by=c("iso_a3" = "Code"))
## Warning: Column `iso_a3`/`Code` joining character vector and factor,
## coercing into character vector
st_transform(plasticspatial, crs="+proj=laea +lon_0=18.984375")
## Simple feature collection with 27 features and 5 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -7627730 ymin: 281334.3 xmax: 1687110 ymax: 8234905
## epsg (SRID): NA
## proj4string: +proj=laea +lat_0=0 +lon_0=18.984375 +x_0=0 +y_0=0 +ellps=WGS84 +units=m +no_defs
## First 10 features:
## iso_a3 name Country Year plasticwaste
## 1 ALB Albania Albania 2010 0.069
## 2 BEL Belgium Belgium 2010 0.080
## 3 BGR Bulgaria Bulgaria 2010 0.154
## 4 BIH Bosnia and Herz. Bosnia and Herzegovina 2010 0.144
## 5 DEU Germany Germany 2010 0.485
## 6 DNK Denmark Denmark 2010 0.047
## 7 ESP Spain Spain 2010 0.277
## 8 EST Estonia Estonia 2010 0.176
## 9 FIN Finland Finland 2010 0.234
## 10 FRA France France 2010 0.192
## geometry
## 1 MULTIPOLYGON (((142770.1 45...
## 2 MULTIPOLYGON (((-1205140 55...
## 3 MULTIPOLYGON (((316667.9 47...
## 4 MULTIPOLYGON (((1805.338 48...
## 5 MULTIPOLYGON (((-652941.9 5...
## 6 MULTIPOLYGON (((-448127.7 5...
## 7 MULTIPOLYGON (((-2454567 46...
## 8 MULTIPOLYGON (((361807.8 61...
## 9 MULTIPOLYGON (((464138.2 72...
## 10 MULTIPOLYGON (((-7450322 34...
#head(plasticspatial)
class(plasticspatial)
## [1] "sf" "data.frame"
# plot with base R
plot(plasticspatial["plasticwaste"], key.pos = 4, axes = TRUE, key.width = lcm(1.3), key.length = 1.0)
# get back longitude and latitude for plotting with ggplot (is this the only reason?)
eu.centers <- st_centroid(plasticspatial) # center points
eu.spdf <- methods::as(plasticspatial, 'Spatial') # convert to spatial class object
eu.spdf@data$id <- row.names(eu.spdf@data) # create id column with numbers
eu.tidy <- broom::tidy(eu.spdf) # now it is just a regular dataframe
eu.tidy <- dplyr::left_join(eu.tidy, eu.spdf@data, by='id')
# mapping with ggplot
g <- ggplot(eu.tidy) +
geom_polygon_interactive(
color='black',
aes(long, lat, group=group, fill=plasticwaste,
tooltip=sprintf("%s<br/>%s",iso_a3,plasticwaste))) +
coord_fixed(xlim=c(-28,40),ylim=c(30,74)) +
hrbrthemes::theme_ipsum() +
viridis::scale_fill_viridis(option="plasma", direction = -1)+
labs(title='Plastic Waste', subtitle='in Europe',
caption='Source: Jambeck et al. 2015', fill="plastic waste\n[kg per capita and day]")
g
The plotly package provides access to an online visualitation platform. Before running code in this section, you need to sign up at plotly
#install.packages("plotly", type="binary")
library(plotly)
Sys.setenv("plotly_username"="mh_09")
Sys.setenv("plotly_api_key"="DND0VFXPT45R7m2ppD22")
df <- read.csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
# light grey boundaries
l <- list(color = toRGB("grey"), width = 0.5)
# specify map projection/options
g <- list(
showframe = FALSE,
showcoastlines = FALSE,
projection = list(type = 'Mercator')
)
p <- plot_geo(df) %>%
add_trace(
z = ~GDP..BILLIONS., color = ~GDP..BILLIONS., colors = 'Blues',
text = ~COUNTRY, locations = ~CODE, marker = list(line = l)
) %>%
colorbar(title = 'GDP Billions US$', tickprefix = '$') %>%
layout(
title = '2014 Global GDP<br>Source:<a href="https://www.cia.gov/library/publications/the-world-factbook/fields/2195.html">CIA World Factbook</a>',
geo = g
)
# Create a shareable link to your chart
# Set up API credentials: https://plot.ly/r/getting-started
options(browser = 'false') # suppress auto open in browser
chart_link = api_create(p, filename="nchoropleth-ag")
## Found a grid already named: 'nchoropleth-ag Grid'. Since fileopt='overwrite', I'll try to update it
## Found a plot already named: 'nchoropleth-ag'. Since fileopt='overwrite', I'll try to update it
chart_link
the plot is now available here
The following code is a just a plain visualization:
library(leaflet)
plt <- leaflet() %>%
setView(lat = 50.85045, lng = 4.34878, zoom=13) %>%
addTiles(group="OSM") %>%
addProviderTiles(providers$CartoDB.DarkMatter, group="Dark") %>%
addProviderTiles(providers$CartoDB.Positron, group="Light") %>%
addLayersControl(baseGroups=c('OSM','Dark','Light'))
plt
More important is the next example which uses a dataset from the recent 2014-2016 West African Ebola outbreak. This example is taken from here.
urlfile2 <- "https://raw.github.com/amcrisan/EpiDesignPattern/master/data/ebola_metadata.RDS"
myfile <- download.file(urlfile2,"./datos/ebola_metadata.csv", method="auto")
# try other data:
#urlfile3 <- "https://github.com/nickloman/ebov/blob/master/metadata/metadata_samples.txt"
#myfile3 <- download.file(urlfile3,"./datos/ebola_metadata2.csv", method="auto")
unfortunately, the rest from here does not work because the data is not accessible
#metadata <- readRDS("./G4D/datos/ebola_metadata.RDS") # tried a lot but doesnt work (even though file is in folder)
#metadata2 <- read.csv("ebola_metadata2.csv")
#install.packages("ape")
#install.packages("lubridate")
#install.packages("dygraphs")
#install.packages("xts")
#install.packages("ggmap")
library(ape)
#library(ggtree)
library(lubridate)
library(dygraphs)
library(xts)
library(ggmap)
library(RColorBrewer)
library(tidyr)
library(dplyr)
# First, we need to create some new data
# Counting number cases per country
# aggDat<-metadata %>%
# filter(Country !="?") %>%
# group_by(Country,country_lon,country_lat) %>%
# dplyr::count()%>%
# mutate(popup=sprintf("%s = %d cases",Country,n)) #create a popup for the map
# Here's a very quick look at what this command generates for us:
#aggDat
# Now, we'll define the color palette
# pal <- colorFactor(
# palette = c('red', 'blue', 'green', 'purple', 'orange'),
# domain = aggDat$Country)
# Now, we'll create the Map
# This first command will creat an empty map
# m<-leaflet(aggDat)
# m %>%
# addTiles()%>%
# addCircleMarkers(
# lng=~country_lon,
# lat= ~country_lat,
# radius=~sqrt(n)*2,
# color = ~pal(Country),
# stroke = FALSE, fillOpacity = 0.7,
# label=~as.character(popup),
# labelOptions = labelOptions(noHide = T),
# options = leafletOptions(minZoom = 0, maxZoom = 10,scroolWheelZoom=FALSE))
By modifying the code, it is possible to get a dissagregated visualization:
# aggDat<-metadata %>%
# filter(Country !="?") %>%
# group_by(Country,Region,region_lon,region_lat) %>%
# dplyr::count()%>%
# mutate(popup=sprintf("%s (%s) = %d cases",Region,Country,n))
#
# m<-leaflet(aggDat)
#
# m %>%
# addTiles()%>%
# addCircleMarkers(
# lng=~region_lon,
# lat= ~region_lat,
# radius=~sqrt(n)*2,
# color = ~pal(Country), #we actually colour the points by country here
# stroke = FALSE, fillOpacity = 0.7,
# label=~as.character(popup),
# labelOptions = labelOptions(noHide = F)
# )
Leaflet also allows you to automatically generate clusters that will vary when you zoom in and out on the map. We didn’t implement this feature in the shiny application, but I wanted to let you know this is available.
# m<-leaflet(metadata)
# # by providing the region latitude and longtitude co-ordinates we allow clustering of regional samples
# m %>%
# addTiles()%>%
# addCircleMarkers(
# lng=~region_lon,
# lat= ~region_lat,
# stroke = FALSE, fillOpacity = 0.5,
# clusterOptions= markerClusterOptions(titile="regional clusters") #cluster options
# )
The cluster colour here are automatically assigned by the Leaflet package, and are as follows:
orange / red for the largest clusters
yellow for “medium” clusters
green for small clusters
By click on a point, the clustering algorithm reveals the geospatial locations that were aggregated into a cluster (which its a neat and handy feature).