Welcome to the RPubs for the book ‘Applied Spatial Statistics And Econometrics: Data Analysis in R’ (Routledge, 2nd edition, 2026) (ed.Katarzyna Kopczewska).

This part is for Chapters 1-3.

To download and read data, please see RPubs with Intro.

This site is a collection of all R codes included in this book. These are just the codes - no comments, no explanations what, why and how. All those details are in the book.

The book was written by Spatial Warsaw team based at the Faculty of Economic Sciences at University of Warsaw. Visit us on LinkedIn.

Chapter 1: Basic operations in the R software (Mateusz Kopyt)

1.6 Basic operations on objects

3+5
vector1<-c(1, 2, 3, 4, 5)   # vector of numbers 1,2,3,4,5
vector2<-10:20          # vector of integers from 10 to 20
vector3<-seq(5,10,0.2) # a sequence of numbers from 5 to 10 with a 0.2 step

class(vector1)
class(vector2)
class(vector3)

vector1i<-as.integer(vector1)
class(vector1i)

vector2[3] # the third element of vector2
vector2[2:4]    # elements from 2 to 4 from vector2
vector2[c(1,3,5)] # numbers in c function mean indexes

vector4<-c("abc",3,"xyz",5)
vector4
class(vector4)
vector4i<-as.integer(vector4)
vector5<-c(FALSE,TRUE,TRUE,FALSE)
vector5
class(vector5)

vector3<-seq(5, 10, 0.2)    # reminder of vector
new_vector3<-vector3        # assign a new name
new_vector3             # display the object
rm(vector3)             # delete object from the memory 

list1<-list(1:5, c(FALSE, TRUE, FALSE, TRUE), c(2.3, 5.9), "a", c("John", "Mary", "Sam"))
list1

list2<-list(first=c(2,3,4), second="My plants", third=c(FALSE, TRUE))
list2

str(list1)
str(list2)

list2$third     # by name
list2[[3]]      # by element number
list2[3]        # by element number
list2[[1]][[3]]

df1<-data.frame(1:5, 101:105)
df1 # see column names created automatically

# vector with names of days of the week
wd_names<-c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", 
            "Friday", "Saturday") 
# sample() randomly selects an element from a vector 
df2<-data.frame(let=sample(letters, 100, replace=TRUE),
                num=sample(1:100, 100, replace=TRUE), 
                wd=sample(wd_names, 100, replace=TRUE))
head(df2)   # returns the first 6 rows of the object by default
str(df2)

names(df2)  # request to display headers (column names)
colnames(df2)<-c("letters", "numbers", "days") # add or change column names 
head(df2)           # display the df object after changes

x<-seq(1, 20, 0.4)  # generate a variable x
y<-seq(2,21,0.4)    # generate a variable y
z<-seq(3,22,0.4)    # generate a variable z
xyz<-cbind(x,y,z)   # combine variables into one xyz matrix object 
is.data.frame(xyz)      # check if xyz is a data.frame object
xyz.df<-as.data.frame(xyz)  # convert xyz to data frame
head(xyz.df)        # display the beginning of xyz data.frame

# observations from rows 1 to 4,  variables from columns 1 and 3
df2_section<-df2[1:4, c(1,3)] 
df2_section

# only rows 2,7,10,15 and all columns
df2_section2<-df2[c(2, 7, 10, 15), ]

# rows 1 to 10, but without a selected column - here is the third column 
# character (-) removes the given row or column from the result
df2_section3<-df2[1:10, -3] 
df2_selection4<-df2[df2$days=="Sunday",]
head(df2_selection4) # subset for Sundays
df2_selection5<-df2[df2$days=="Monday" | df2$days=="Tuesday", ]

df3<-data.frame(var_x=sample(1000:10000, 100, replace=TRUE),
                var_y=sample(5000:20000, 100, replace=TRUE), 
                var_z=sample(1000:5000, 100, replace=TRUE))
head(df3)
# a new object containing a marker (ind) and values
df3_stacked<-stack(df3)
df3_stacked
unstack(df3_stacked) # return to the original data

df<-data.frame(ID=1:15, Name=paste("Person", 1:15), Score=sample(50:100, 15, replace=TRUE))
df$Score_new<-df$Score

# create missing observations coded as 99999
df$Score_new[c(3, 5, 9, 10)]<-99999 
head(df$Score_new, 8) # display of the first 8 rows of Score variable

# assign NA to missing observations
is.na(df$Score_new)<-df$Score_new>=99998 

df$Score_new # display all observations of Score_new variable

1.7 Defining and loading data

getwd()             # check the current working directory
setwd("C:/R/Data/")     # set the required working directory - example

library(foreign)        # library supporting additional file formats
getwd()             # checks the location of the default directory

# lines are hashed as this is an example only how to read different formats
# load data from a text file
#data1<-read.table("data.txt", header=TRUE, sep="\t", encoding="UTF-8") 

# load data from a CSV file
#data2<-read.csv("data.csv", header=TRUE, sep=";",dec=".", encoding="UTF-8")

# load data from a tab delimited file
#data3<-read.delim("data.dat", header=FALSE, sep="\t") 

# load data from a dbf file
#data4<-read.dbf("data.dbf") 

# load data from the SPSS format
#data5<-read.spss("data.sav", use.value.labels=FALSE, to.data.frame=TRUE) 

# load data from the STATA format
#data6<-read.dta("data.dta", convert.factors=FALSE) 

library(readxl)         # for read_xlsx()
# only the second sheet from myfile.xlsx will be loaded
#data7<-read_xlsx("myfile.xlsx", sheet=2, col_names=TRUE) 

# reading regional data
data<-read.csv("regional_dataset.csv", header=TRUE, sep=";", dec=",", encoding="UTF-8")
names(data)     # printout of a few lines only
class(data)

# convert data to the data.frame class object and create a new object 
data.df<-as.data.frame(data) 
str(data) # structure of the object
attributes(data) # object attributes 

1.8 Basic statistics of the data set

data23<-data[data$year==2023,] # subset of regional data for the year 2023
summary(data23)         # printout for a few variables

demography<-as.data.frame(cbind(data23$birth_per_1K, data23$persons_in_K, data23$persons_productive_age_K))
colnames(demography)<-c("births", "persons", "working_age") 
# average calculated from all data.frame object variables using lapply()
lapply(demography, mean,na.rm=TRUE)

# average calculated from all data.frame object variables using sapply()
sapply(demography, mean, na.rm=TRUE) 

# average calculated for one variable (column) with data.frame objects
mean(demography$working_age, na.rm=TRUE)

var1<-rnorm(1000)   # 1000 numbers generated from the normal distribution
quantile(var1)

# user-defined probability thresholds
quantile(var1,  probs=c(1,2,5,95,98,99)/100) 

fivenum(data23$unemployment_rate) # position statistics

test_factor<-as.factor(data$region_name) # enforcing the factor class
class(test_factor) # checking if the variable is factor

levels(test_factor) #  factor classes / level names
nlevels(test_factor)  # number of classes / levels

cor(data23$unemployment_rate, data23$salary_avg_Poland_100p)
cor.test(data23$unemployment_rate, data23$salary_avg_Poland_100p)
# creating a new object containing only analysed variables: 
# unemployment rate, salary average, share of the working age population

variables.df<-data.frame(cbind(data23$unemployment_rate, data23$salary_avg_Poland_100p, data23$persons_productive_age_K/data23$persons_in_K*100))
colnames(variables.df)<-c("unemployment rate", "salary avg", "working age share")
# correlation matrix, the rounding function has been used 
# up to two decimal places to improve readability
round(cor(variables.df, use="pairwise"),2)

# Fig.1.6a - correlation scatterplot
pairs(variables.df) 

# Fig.1.6b – correlation between two variables in hexagonal bins
library(hexbin)
bin<-hexbin(data23$unemployment_rate, data23$salary_avg_Poland_100p, xbins=40)
plot(bin)

# Fig.1.6c – correloram with GGally:: package
library(GGally)                 # for ggpairs()
ggpairs(variables.df)           

# Fig.1.6d – correlation with PerformanceAnalytics:: package
library(PerformanceAnalytics)       # for chart.Correlation()
chart.Correlation(variables.df, method="spearman", hist=F)  

Figure 1.6: Graphical representation of correlation: a) with pairs(), b) with hexbin(), c) with ggpairs(), d) with chart.Correlation()

variable<-data23$salary_average

# expression standardising variable - the new variable is Z_variable
Z_variable<-(variable-mean(variable))/sd(variable)

mean(Z_variable) # the mean of the standardised variable is close to zero
var(Z_variable)     # the variance is equal to 1

variable1<-data23$salary_average
Z_variable1<-scale(variable1)   # the standardised variable

mean(Z_variable1)
var(Z_variable1)

1.9 Basic visualisations

1.9.1 Scatterplot and line chart

# aggregate regional data by year
unemp<-aggregate(data$unemployment_rate, by=list(data$year), mean, na.rm=TRUE) 
names(unemp)<-c("year", "value")
head(unemp)     # 6 years out of 16

# Fig.1.7a – basic plot with points only
# par(mfrow=c(1,2), mar=c(2,2,2,1)) # to split the window and set margins
plot(unemp$value) # the simplest point plot

# Fig.1.7b – next layers to Fig.1.7a
# line chart with additional options
plot(unemp$value, type="l", lwd=2, axes=FALSE, xlab="year", ylim=c(5,20), ylab="Annual unemployment rate")
axis(1, at=1:16, labels=unemp$year, cex=0.8)    # x axis
axis(2)                         # y axis
abline(h=(5:20), lty=3, col="grey80")       # horizontal lines
abline(h=(1:4)*5, lty=2, col="grey20")      # horizontal darker lines
abline(v=(1:16)*1, lty=3)               # vertical lines
points(unemp$value, pch=21, bg="red", cex=1.5)  # points
title(main="An average unemployment rate in Poland based on poviat data", cex.main=0.8) # title of the figure
legend(3, 20, pch=21, pt.bg="red", col="black", lwd=2, c("unemployment rate"), bty="n", cex=0.8)    # legend
text(1:16, unemp$value+0.5, round(unemp$value,2), cex=0.8)  # text at points

Figure 1.7: Visualisation of time series: a) simple point plot, b) point plot with line and additional options

# Fig.1.8
library(ggplot2)
unemp$labels<-round(unemp$value, 2) # extra column for point labels  

ggplot(data=unemp,  mapping=aes(x=year, y=labels)) + geom_line() + geom_point(mapping=aes(color="red"), show.legend=FALSE, size=3) + geom_label(aes(label=labels), nudge_y=0.5) + labs(title="Average unemployment rate in Poland \t based on poviat data") + theme(plot.title=element_text(size=20, hjust=0.5), axis.text=element_text(size=16), axis.title=element_blank())

Figure 1.8: Visualisation of time series with ggplot2:: package

1.9.2 Barplot (column chart)

# data processing for Fig.1.9a
p_preprod<-aggregate(data$persons_preproductive_age_K, by=list(data$year), sum, na.rm=TRUE)
colnames(p_preprod)<-c("year","preprod")

# Fig. 1.9a – simple bar plot
# the names.arg option gives labels under the bars
barplot(p_preprod$preprod, col="grey", ylim=c(0,8000), ylab="in thousend",
  names.arg=p_preprod$year, cex.axis=1.2, main="Pre-working age population", cex.names=1.2, cex.lab=1.2, cex.main=1.5) 
abline(h=c(6000,7000,8000), lty=3, col="red") # horizontal lines

# data processing for Fig.1.9b
p_preprod<-aggregate(data$persons_preproductive_age_K, by=list(data$year), sum, na.rm=TRUE)
p_prod<-aggregate(data$persons_productive_age_K, by=list(data$year), sum, na.rm=TRUE)
p_postprod<-aggregate(data$persons_postproductive_age_K, by=list(data$year), sum, na.rm=TRUE)
popul_str<-cbind(p_preprod, p_prod$x, p_postprod$x) # population structure 
colnames(popul_str)<-c("year", "pre-working", "working", "post-working")

# Fig.1.9b - stacked bar graph with barplot()
# convert the data.frame object to the matrix for the barplot command
popul.m<-as.matrix(popul_str) 
barplot(t(popul.m[,2:4]), ylim=c(0,40000), cex.axis=1.2, names.arg=popul.m[,1], legend.text=TRUE, args.legend=list(x="right", bg="white"), cex.names=1.2, main="Population structure by years", cex.main=1.5)
abline(h= popul.m[1,2]/1000, lty=3, lwd=2, col="blue")
abline(h=sum(popul.m[1,2:3]/1000), lty=3, lwd=2, col="blue")
abline(h=sum(popul.m[1,2:4]/1000), lty=3, lwd=2, col="blue")

Figure 1.9: Structural figures: a) simple barplot, b) cumulative barplot

t(popul.m[,2:4])    # statistics by years (1:16)

# Fig.1.10 – barplot with ggplot()
library(ggplot2)
popul_long<-reshape(popul_str, direction="long", idvar="year", timevar="type", times=names(popul_str)[2:4], varying=2:4, v.names="Popul", new.row.names=1:48)
names<-c("post-working", "working", "pre-working") # names of columns

ggplot(data=popul_long,  mapping=aes(x=year, y=Popul, fill=factor(type, names))) + geom_bar(stat="identity") + geom_line(mapping=aes(x=year, y=popul_long[1,3]), color="blue", linetype="dashed") + geom_line(mapping=aes(x=year, y=popul_long[1,3] + popul_long[17,3]), color="blue", linetype="dashed") + geom_line(mapping=aes(x=year, y=popul_long[1,3] + popul_long[17,3] + popul_long[33,3]), color="blue", linetype="dashed") + ggtitle("Population structure by years") + theme(plot.title=element_text(size=20, hjust=0.5), axis.text=element_text(size=16), axis.title=element_blank(), legend.title= element_blank())

Figure 1.10: Barplot using ggplot2:: package

1.9.3 Boxplot

data23<-data[data$year==2023,]
boxplot(data23$unemployment_rate, col="grey78", ylab="unemployment rate", main=" Unemployment rate in poviats in 2023")

unemp<-aggregate(data23$unemployment_rate, by=list(data23$region_name), mean, na.rm=TRUE)
unemp$ID<-c(1:16)
colnames(unemp)<-c("region", "value", "ID")

# Fig 1.11 – multi-layer boxplot
par(mar=c(5,5,5,5))
boxplot(data23$unemployment_rate~data23$region_name, axes=FALSE, ann=FALSE, ylim=c(0,65), col="bisque", border="bisque4")
# average unemployment rate line
abline(h=mean(data23$unemployment_rate, na.rm=TRUE), lty=3, col="red") 
axis(1, at=1:16, labels=unemp$ID, cex.axis=0.8)     # x axis
axis(2, at=(0:6)*10, cex.axis=0.8, ylab="%")        # y axis
for(i in 1:8){          # legend text in two columns
  text(1,60-3.5*i, unemp$ID[i], cex=0.9)
  text(4,60-3.5*i, unemp$region[i], cex=0.9)}
for(i in 9:16){
  text(8,60-3.5*(i-8), unemp$ID[i], cex=0.9)
  text(11,60-3.5*(i-8), unemp$region[i], cex=0.9)}
title(main="The unemployment rate in 2023 by voivodships", cex.main=0.99,
      sub="Data aggregated from the poviat level", cex.sub=0.9)

Figure 1.11: A boxplot grouping by the selected variable: unemployment rate by regions

# Fig.1.12 – boxplot using ggplot()
ggplot(data=data23,  mapping=aes(x=region_name, y=unemployment_rate, fill=region_name)) + geom_boxplot(staplewidth=0.3) + ggtitle("The unemployment rate in 2023 by voivodships", subtitle="Data aggregated from the poviat level") + scale_x_discrete(labels=1:16) + labs(fill="Regions") + theme(plot.title=element_text(size=20, hjust=0.5), plot.subtitle= element_text(size=16, hjust=0.5), axis.text=element_text(size=16), axis.title=element_blank())  

Figure 1.12: Visualisation of boxplot chart with ggplot2:: package

1.10 Regression in examples

# Fig.1.13 – scatterplot of x and y variables
plot(log(data23$salary_average), data23$unemployment_rate, xlab="Logarithm of average salary in PLN", ylab="Registered unemployment rate (in %)") 

Figure 1.13: The dependency between the explanatory (x) and dependent (y) variables selected for the model with the fitted regression line

model1<-lm(unemployment_rate ~ log(salary_average), data=data23)
model1  # regression result - basic version
summary(model1) # regression result - full version

plot(log(data23$salary_average),data23$unemployment_rate, xlab="Logarithm of average salary in PLN", ylab="Registered unemployment rate (in %)")
abline(model1, col="red") # Fig.1.13, fitted line

intercept<-model1$coefficients[1]  # extract the constant term
slope<-model1$coefficients[2] # extract the slope of the regression line
intercept
slope

# Fig.1.14a
# par(mfrow=c(1,2)) # the option to show two charts side by side
res<-residuals(model1) 
plot(res, ylab="Residuals") 
abline(h=0, lty=2, col="red") 
title(main="Residual scatter plot")

# Fig.1.14b - histogram of residuals
hist(res, breaks=30, main="Residual histogram", freq=FALSE, xlab="Residuals", xlim=c(-15,15))

# create the normal distribution density function 
# with parameters consistent with the empirical distribution of the rest
mean.res<-mean(res)
sd.res<-sd(res)
# theoretical normal distribution curve
curve(dnorm(x, mean=mean.res, sd=sd.res), add=TRUE, col="red")

Figure 1.14: Diagnostics of model residuals: a) scatterplot, b) histogram

model1.fit<-predict(model1)  # theoretical values obtained from the model 
error<-data23$unemployment_rate-model1.fit # error of the model 

# normality test for the distribution of unemployment variable
shapiro.test(data23$unemployment_rate) 
shapiro.test(res) # normality test of residual distribution 

Chapter 2: Spatial data and classes in R (Maria Kubara, Piotr Wójcik)

2.2. Point geometry in sf

# create a simple point dataset in data.frame class
my.points.df<-data.frame(name=c("A", "B", "C"), value=c(20, 53, 40), long=c(20.9, 21.0, 21.1), lat=c(52.2, 52.3, 52.4))
my.points.df        # view the initial data.frame

library(sf)     # for st_as_sf(), to convert data.frame to sf object
my.points.sf<-st_as_sf(my.points.df, coords=c("long", "lat"), crs=4326)
my.points.sf        # view the sf class object

names(my.points.sf)[1]<-"PointName" # change the name of the 1.st column
my.points.sf$value<-my.points.sf$value-2 # modify values of variable
my.points.sf
mean(my.points.sf$value)

2.3. Line geometry in sf

lines.df<-data.frame(name=c("Line 1", "Line 2"), type=c("road", "rail"),
  wkt=c("LINESTRING(20.9 52.2, 21.0 52.3, 21.1 52.4)", 
          "LINESTRING(20.5 52.1, 20.7 52.5)"))
lines.df

lines.sf<-st_as_sf(lines.df, wkt="wkt", crs=4326)
lines.sf

2.4. Polygon geometry in sf

polygon.df<-data.frame(name="First area", wkt="POLYGON((20.9 52.2, 21.0 52.3, 21.1 52.4, 20.9 52.2))")
polygon.sf<-st_as_sf(polygon.df, wkt="wkt", crs=4326)
polygon.sf

2.5. Comparing sf:: and sp::

library(sp)     # for proj4string() and as()to convert sf points to sp
my.points.sp<-as(my.points.sf, "Spatial") # convert from sf to sp
proj4string(my.points.sp)<-CRS("+proj=longlat +datum=WGS84 +no_defs")
# points_again_sf<-st_as_sf(my.points.sp) # reverse conversion if needed
class(my.points.sp)
my.points.sp

str(my.points.sf)
str(my.points.sp)
mean(my.points.sf$value)       # direct access in sf
mean(my.points.sp@data$value)  # requires accessing the @data slot

polygon.sp<-as(polygon.sf, "Spatial")   # convert sf polygons to sp
polygon.sp                      # shorter output to save space

str(polygon.sf)     # check the internal structure of the object
str(polygon.sp) # internal structure uses the slot based system

2.6. Reading spatial data from external files

pov<-st_read("powiaty.shp") # read the shapefile
 
st_is_longlat(pov)
st_crs(pov)

pov<-st_transform(pov, crs="+proj=longlat +datum=NAD83") # change projection

table(st_is_valid(pov))
which_incorrect<-which(!st_is_valid(pov))
pov[which_incorrect, c("JPT_KOD_JE", "JPT_NAZWA_")]

pov<-st_make_valid(pov) # automatically correct all the geometries
which_incorrect<-which(!st_is_valid(pov))

2.7. Geographical operations on the sf objects

2.7.1. Handling the geometry column and filtering sf objects

# filter out one row - get the region of interest
warsaw_poviat<-pov[pov$JPT_NAZWA_=="powiat Warszawa", ] 

# limit the descriptive columns in the dataset
warsaw_poviat<-warsaw_poviat[ ,c("JPT_NAZWA_", "SHAPE_AREA")]
warsaw_poviat           # two descriptive columns plus geometry

head(st_set_geometry(warsaw_poviat, value=NULL)) 
head(st_drop_geometry(warsaw_poviat)) # both provide the same output
class(warsaw_poviat)
class(st_drop_geometry(warsaw_poviat))
class(warsaw_poviat$geometry)

2.7.2. Merging spatial objects with st_union()

# filter out ‘powiat pruszkowski’ and limit descriptive columns
pruszkow_poviat<-pov[pov$JPT_NAZWA_=="powiat pruszkowski", c("JPT_NAZWA_", "SHAPE_AREA")] 

# bind two objects into a single object that still includes two areas
wawpru_separated<-rbind(warsaw_poviat, pruszkow_poviat) 
wawpru_separated # two areas remain as separate rows
# bind two objects into a single object that includes one big area
wawpru_union<-st_union(warsaw_poviat, pruszkow_poviat)
wawpru_union # now one area is represented as a single row

2.7.3. Creating a buffer around a polygon

waw_buffer<-st_buffer(warsaw_poviat, dist=2000) 
st_area(warsaw_poviat) # coverage of the original polygon
st_area(waw_buffer) # larger polygon created

2.7.4. Finding centroids of polygons

pov_centroids<-st_centroid(pov) # centroid of region
pov_centroids[, c("JPT_NAZWA_", "SHAPE_AREA")] # printout limited to 3 rows

2.7.5. Checking intersections between polygons

st_intersects(warsaw_poviat, pruszkow_poviat)
st_intersects(warsaw_poviat, pov[pov$JPT_NAZWA_=='powiat Lublin',])

2.7.6. Extracting the intersection of two geometries

circle_waw<-st_buffer(st_centroid(warsaw_poviat), dist=10000) # 10 km
common_area<-st_intersection(warsaw_poviat, circle_waw)
common_area

plot(st_geometry(warsaw_poviat))    # plot the contour of the region
plot(st_geometry(circle_waw), add=TRUE) # plot the circle
plot(st_geometry(common_area), add=TRUE, col="cornsilk")    # plot joint area

plot(st_geometry(st_buffer(warsaw_poviat, dist=2000)), col="cornsilk") # 2km
plot(st_geometry(warsaw_poviat), col="white", add=TRUE) # plot smaller region

Figure 2.1: Buffers and intersections of regions: a) region and circle (buffer of centroid), b) region and buffered region

st_area(st_buffer(warsaw_poviat, dist=2000)) # size of buffered region
st_area(warsaw_poviat) # size of region “warsaw_poviat”
st_area(circle_waw) # size of full circle
st_area(common_area) # size of intersection of region and circle

2.7.7. Filtering points within a polygon

# unify projection of my.points.sf with warsaw_poviat
my.points.sf<-st_transform(my.points.sf, crs="+proj=longlat +datum=NAD83") 
st_within(my.points.sf, warsaw_poviat)

inside_warsaw<-my.points.sf[st_within(my.points.sf, warsaw_poviat), ]
# Error in unclass(x)[i] : invalid subscript type 'list'

st_within(my.points.sf, warsaw_poviat, sparse=FALSE)

inside_warsaw<-my.points.sf[st_within(my.points.sf, warsaw_poviat, sparse=FALSE),]

2.7.8. Measuring distances between points

st_distance(my.points.sf, my.points.sf) # distance in meters

2.8. Merging spatial data with other statistical datasets

# read the file with statistical variables (using relative path)
data<-read.csv("regional_dataset.csv", header=TRUE, sep=";", dec=",", encoding="UTF-8")
head(data) # check if data was read properly
summary(data) # glimpse inside of the dataset structure

names(data) # output shortened, full in chapter Introduction
length(unique(data$code)) # unique poviat names
length(unique(data$year)) # number of years included in the sample

pov[1:3, 1:5]   # rows 1:3 and columns 1:5
data[1:3, 1:5]

# example on using ID_MAP as the merging key
# allows for a direct binding due to proper data ordering
data[data$year==2010,]$ID_MAP 

# cbind() works on datasets with the same number of rows
# hence, filtering by year is needed to have 380 observations in each
dataSpatial2010<-cbind(pov, data[data$year==2010,]) #bind datasets

# show a sample of the merged dataset, printout with rows only
dataSpatial2010[,c("JPT_NAZWA_", "JPT_KOD_JE", "subregion_name_PL_font", "subregion_number", "year", "housing_price_sqm_PLN")] 

pov$JPT_KOD_JE[1:10] # treated as a character, leading zeros included
data[data$year==2010,]$subregion_number[1:10] # as a number, no leading zeros

# 1st option – remove leading zeros from JPT_KOD_JE
pov$JPT_KOD_JE_numeric<-as.numeric(pov$JPT_KOD_JE)

# 2nd option – add leading zeros to the shorter subregion_number codes
data$subregion_number_padded<-sprintf("%04d", data$subregion_number)

data2010<-data[data$year==2010,] # subset of data for 2010 – 380 rows
merged_data1<-merge(pov, data2010, by.x="JPT_KOD_JE", by.y= "subregion_number_padded")  # merge the original JPT_KOD_JE
merged_data2<-merge(pov, data2010, by.x="JPT_KOD_JE_numeric", by.y= "subregion_number")     # merge the original subregion_number
merged_data1[1:5, c("JPT_KOD_JE", "subregion_name_noPL_font", "area_km2")]
merged_data2[1:5, c("JPT_KOD_JE", "subregion_name_noPL_font", "area_km2")]

#1st option – put subregion_name_PL_font to lowercase to match with JPT_NAZWA_
data2010$subreg_name_PL_font_lower<-tolower(data2010$subregion_name_PL_fon)

# merge the original JPT_NAZWA_ and lowercase subregion_name_PL_font
merged_data3<-merge(pov, data2010, by.x="JPT_NAZWA_", by.y = "subreg_name_PL_font_lower")

#2nd option – put names to lowercase and remove latin letters
library(stringi)         # for stri_trans_general()
data2010$subreg_name_noPL_font_lower<-tolower(data2010$subregion_name_noPL_font)
pov$JPT_NAZWA_clean<-stri_trans_general(tolower(pov$JPT_NAZWA_), "Latin-ASCII")

# merge the original JPT_NAZWA_ and lowercase subregion_name_PL_font
merged_data3<-merge(pov, data2010, by.x="JPT_NAZWA_", by.y="subreg_name_PL_font_lower")

# merge with no special letters - using JPT_NAZWA_clean
merged_data4<-merge(pov, data2010, by.x="JPT_NAZWA_clean",  by.y="subreg_name_noPL_font_lower")

merged_data3[1:5, c("JPT_KOD_JE", "subregion_name_noPL_font", "area_km2")]
merged_data4[1:5, c("JPT_KOD_JE", "subregion_name_noPL_font", "area_km2")]

2.9. Enhancing the spatial data manipulation with tidyverse::

library(dplyr)
library(stringr)
class(merged_data1)
glimpse(merged_data1)       # printout was shortened

select(merged_data1, JPT_KOD_JE, year, forest_km2) # selected columns
select(merged_data1, persons_in_K:persons_preproductive_age_K)
select(merged_data1, starts_with("subregion"))
select(merged_data1, JPT_KOD_JE, ends_with("01"), matches("_.{4}_"), -JPT_KJ_I_1)

limited_data<-select(merged_data1, JPT_KOD_JE, ends_with("01"), matches("_.{4}_"), -JPT_KJ_I_1) # save new object

filter(merged_data1, str_detect(JPT_NAZWA_, "ice")) # rows 1-3 only
filter(merged_data1, str_detect(JPT_NAZWA_, "ice"), unemployment_rate>10)

merged_data1<-mutate(merged_data1, code_voi=str_sub(JPT_KJ_I_1, 1, 2))
merged_data1<-rename(merged_data1, NUTS2_id=code_voi)

nrow(merged_data1)
summarize(st_set_geometry(merged_data1, NULL), n())
summarize(st_set_geometry(merged_data1, NULL), number=n())

poviats_voi<-group_by(st_set_geometry(merged_data1, NULL), NUTS2_id) 
class(poviats_voi)
summarize(poviats_voi, number=n()) # first 6 only
arrange(summarize(poviats_voi, number=n()), number) # first 6 only
arrange(summarize(poviats_voi, number=n()), desc(number)) # first 6 only

poviats_voi %>% summarize(number=n()) %>% arrange(desc(number)) #first 6 only

poviats_voi %>% summarize(number=n()) %>% arrange(desc(number)) %>% .[["number"]] %>% hist()    # display a histogram of poviats in regions

merged_data1 %>% select(JPT_NAZWA_, JPT_KJ_I_1) %>% 
  mutate(code_voi=str_sub(JPT_KJ_I_1, 1, 2)) %>% 
  rename(NUTS2_id=code_voi) %>% st_set_geometry(NULL) %>% 
  group_by(NUTS2_id) %>% summarize(number=n()) %>% arrange(desc(number))

merged_data1 %>% mutate(area=st_area(.)) %>%  st_set_geometry(NULL) %>% 
  group_by(NUTS2_id) %>% summarise(area=sum(as.numeric(area))/1e6) #1-3 only

Chapter 3: Spatial visualisations (Katarzyna Kopczewska)

3.1 Plotting basic contour maps

# Fig.3.1a - plot shape only using plot() (contours in the sf class)
library(sp)     # for degAxis()
plot(st_geometry(pov), border="darkgreen")  
plot(st_geometry(voi), add=TRUE, lwd=2)
degAxis(1)      # x-coordinate values, from sp::
degAxis(2)      # y-coordinate values

# Fig.3.1b - the same with ggplot() - plot two maps simultaneously
ggplot() + geom_sf(data=pov) + geom_sf(data=voi, color="red", fill=NA)

Figure 3.1: Contour maps with two layers: a) with plot(), b) with ggplot()

# Fig.3.2a
library(ggspatial)  # for annotations  

# graticule for a map
bbox<-st_as_sfc(st_bbox(voi)) # bounding box
Bbox

lon_breaks<-seq(14, 24, by=2)  # longitude every 2 degrees
lat_breaks<-seq(49, 55, by=1)  # latitude every 1 degree
graticule<-st_transform(st_graticule(lat=lat_breaks, lon=lon_breaks), crs=4326)  
graticule_clipped<-st_intersection(graticule, bbox)

ggplot() + geom_sf(data=voi, fill="white", color="black") + geom_sf(data=graticule_clipped, color="black", linetype="solid", size=0.3) + annotation_scale(location="bl", width_hint=0.3) + annotation_north_arrow(location="tl", which_north="true", style= north_arrow_fancy_orienteering) + theme_minimal() + theme(axis.text=element_text(size=12), panel.grid.major=element_blank(), panel.grid.minor=element_blank())

# Fig.3.2b
library(maps)                   # for map.scale()
library(sp)                     # for compassRose()
plot(st_geometry(region.pov), graticule=TRUE, axes=TRUE)
plot(st_geometry(region.voi), add=TRUE, lwd=2)
map.scale(x=21.1, y=50.4, ratio=FALSE, relwidth=0.15, metric=TRUE, cex=0.65)    # from maps::
compassRose(21.5, 52.1,rot=0,cex=1)     # from sp::

Figure 3.2: Contour maps with additional features (annotations, compass rose, scale): a) using ggplot(), b) using plot()

3.2 Contour maps with a colour layer

3.2.1 Plots using ggplot() function from the ggplot2:: package

# regional data – distance to the core city
data23<-data[data$year==2023, ] # select one year for the analysis 
summary(data23$dist)    # variable to be coloured, distance to core city

pov$dist<-data23$dist   # transfer variable between objects
ggplot() + geom_sf(data=pov, aes(fill=dist)) + labs(title="Distance to the core cities within the NUTS2 division")  # Fig.3.3a

# Fig.3.3b
plot1<-ggplot() + geom_sf(data=pov, aes(fill=dist)) + scale_fill_gradient(low='white', high='grey20') + geom_sf(data=voi, color="white", fill=NA) + labs(title="Grey-scale figure of in-region distances") + theme_minimal()        # no grey background
plot1           # figure was saved to memory and now is called

Figure 3.3: Contour maps with colour layer using ggplot(): a) with automatic colours, b) in shades of grey with second layer of contour map

# Fig.3.4a
pov$unemployment_rate<-data23$unemployment_rate # transfer the variable

# define the color palette
library(RColorBrewer)   # for brewer.pal()
maxy<-25            # maximum value of a variable for a colour range
breaks<-c(0, 5, 10, 15, 20, 25)  
shades<-brewer.pal(8, "Reds")       # it assumes 8 shades of red colours
fillRed<-colorRampPalette(shades)   # it makes a function 
colcode<-fillRed(maxy)[round(data23$ unemployment_rate) + 1]
colcode # vector of continous shades for a given variable

ggplot() + geom_sf(data=pov, aes(fill=data23$unemployment_rate), color=NA) + geom_sf(data=voi, color="gray60", fill=NA, size=1) + scale_fill_gradientn(colours=fillRed(maxy), breaks=breaks, labels=breaks, name="Unemployment Rate") + theme_void() + labs(title="Unemployment rate in 2023") + theme(legend.position="right", legend.title=element_text( size=10), legend.text=element_text(size=8), plot.title=element_text( hjust=0.5), plot.subtitle=element_text(hjust=0.5)) + annotation_north_arrow(location="bl", which_north="true", style= north_arrow_fancy_orienteering) 

# the same graphics with plot()
library(shape)  # for colorlegend()
library(maps)   # for map.scale()
library(sp)     # for compassRose()

plot(st_geometry(pov), col=colcode, lty=0, border="gray")  
plot(st_geometry(voi), add=TRUE, lwd=1, border="gray60")  

# add-ins to map
map.scale(x=17.5, y=49.17, ratio=FALSE, relwidth=0.15, metric=TRUE, cex=0.65)                       # with maps::
compassRose(15, 49.7,rot=0,cex=1)           # with sp::
colorlegend(posy=c(0.05,0.9), posx=c(0.9,0.92), col=fillRed(maxy), zlim=c(0, maxy), zval=breaks, main.cex=0.9)      # with shape::
title(main="Unemployment rate in 2023")

library(viridis)        # for scale_fill_viridis()
ggplot() + geom_sf(data=pov, aes(fill=unemployment_rate), color=NA) +   
scale_fill_viridis(option="A", direction=-1, breaks=breaks, labels=breaks, name="Unemployment Rate") + theme_minimal()  # Fig.3.4b

Figure 3.4: Contour maps with colour layers created with different functions: a) with colorRampPalette(), b) with viridis()

3.2.2 Plots using plot() function

# Fig.3.5a
brks<-c(0, 25, 50, 75, 100, 125, 150, 175) # intervals related to data values
cols<-brewer.pal(8, "Greys")    # shades of grey from RColorBrewer:: 
par(mar=c(5,5,5,5))     # bigger margins, to accommodate subtitle
plot(st_geometry(pov), col=cols[findInterval(pov$dist, brks)], border="transparent") # no borders of regions due to ‘transparent’ colour
plot(st_geometry(voi), border="grey60", add=TRUE)
title(main="Distance from poviats to core regional cities", sub="Euclidean distance in km calculated between regional centroids")
legend("bottomleft", legend=paste("from ",brks[1:7], "to ", brks[2:8]), pt.bg=cols, cex=0.8, bty="n", pch=21)

Figure 3.5: Visualisations using plot(): a) colours assigned with findInterval(), b) colours assigned with findColours() from classInt::

library(classInt)       # for classIntervals()
summary(pov$unemployment_rate)

intervals<-5 
colors<-brewer.pal(intervals, "BuPu") # from blue (Bu) to purple (Pu)
classes<-classIntervals(pov$unemployment_rate, intervals, style="fixed",  
fixedBreaks=c(0, 5, 10, 15, 20, 25)) 
color.table<-findColours(classes, colors)  
 
plot(pov$geometry, col=color.table)     # Fig.3.5b
plot(voi$geometry, lwd=2, add=TRUE) 
legend("bottomleft", legend=names(attr(color.table, "table")), 
fill=attr(color.table, "palette"), cex=0.8, bty="n")
title(main="Unemployment rate in poviats in 2023")

# Fig.3.6a 
library(scales)     # for show_col()
# prepare colours and density 
variable<-unique(data$region_number[data$year==2023])
n<-16               # number of regions
brks<-(0:n)*2       # IDs of regions as in the database
dens<-round(seq.int(from=1, to=48, length.out=n), 0)    # number of stripes
voi$fill_col<-gray.colors(n, start=0.9, end=0.3)  # shades of grey
scales::show_col(voi$fill_col) # plot colours as coloured matrix
voi$fill_dens<-dens[findInterval(variable, brks, all.inside=TRUE)]

# plot shaded polygons based on border points coordinates
plot(st_geometry(voi), col=NA, border=NA, main="Shaded density map")
for(i in seq_len(nrow(voi))) {
   geom<-st_geometry(voi)[[i]]
   if(st_geometry_type(geom)=="MULTIPOLYGON") {
    sub_polys<-geom[[1]]
  } else {
    sub_polys<-list(geom)
  }
for(poly in sub_polys) {
 x<-poly[,1]
 y<-poly[,2]
 polygon(x, y, col=voi$fill_col[i], border="grey80", density=voi$fill_dens[i], angle=45)
  }
}
plot(voi$geometry, add=TRUE, border="grey70")

# Fig.3.6b – automated graphics with plot() 
plot(pov["dist"], key.pos=4, main="Distance to core city")

Figure 3.6: Regional maps: a) with shaded layer, b) with automated division into intervals and selection of colours

library(mapsf)  # for mf_map()
mf_map(pov, var="dist", type="choro")   # figure not shown
title("Distance from poviat to regional core city")

3.2.3 Plots using choropleth() function from the GISTools:: package

library(GISTools)
library(RColorBrewer)

data23<-data[data$year==2023, ]     # select one year for the analysis 
pov$dist<-data23$dist           # transfer variable between objects
pov$unemp_rate23<-data23$unemployment_rate 

# Fig.3.7a - with default colours and number of intervals
choropleth(pov, "dist")         # from the GISTools:: package
shades<-auto.shading(pov[["dist"]]) # default number of intervals
choro.legend(15, 50, shades, cex=0.65, bty="n")

# Fig.3.7b - with own colours and number of intervals
shades<-auto.shading(pov[["dist"]], n=6, cols=brewer.pal(6, "Purples"))
choropleth(pov, "dist", shading=shades)
choro.legend(14.2, 50.5, shades, cex=0.8, bty="n", title='Distance in km')

# not shown - division into intervals by mean and std.dev, 
shades<-auto.shading(pov[["dist"]], n=6, cols=add.alpha(brewer.pal(6, "Greens"), 0.5), cutter=sdCuts)
choropleth(pov, "dist", shading=shades)
choro.legend(14.2, 50.2, shades, under="below", over="above", between="to", cex=0.6, bty="n")

Figure 3.7: Regional maps with GISTools:: package: a) with default setting, b) with user setting

3.3 Plotting basic point data

# Fig.3.8a - basic figure with plot() and points()
# plot based on data in data.frame class
par(bg="white")                     # colour of background
par(mar=c(1,1,1,1))         # almost no margins for bigger map 
plot(st_geometry(region.voi))               # boundary of the region
points(points[,c("crds.x", "crds.y")], pch=".") # layer of points (dots)

# the same plot but based on data in sf class
points.sf<-st_as_sf(points, coords=c("crds.x", "crds.y"), crs=4326)
plot(st_geometry(region.voi))           # boundary of the region
points(st_geometry(points.sf), pch=".") # layer of points (dots)

# Fig.3.8b – more sophisticated plot with a grey background
par(bg="grey95")        # grey colour of background 
par(mar=c(2,2,2,1)) # moderate size of margins
plot(st_geometry(region.voi), border="grey70", col="grey80")
title(main="Location of firms (10%) in lubelskie NUTS2 region")
points(points[,c("crds.x", "crds.y")], pch=21, col="white", bg="cornsilk", cex=0.1) # points in light colour, plotted as small circles
#sf points in light colour, plotted as small circles
#points(st_geometry(points.sf), pch=21, col="white", bg="cornsilk", cex=0.1) 

# layer including location of cities in this region
cities<-read.csv("cities in lubelskie region.csv", sep=";", dec=",", header=TRUE)
cities$label<-paste(cities$position, cities$city, sep=".")
cities$rel.pop<-cities$population/mean(cities$population)
cities$xx<-as.numeric(as.character(cities$xx))  # make variable numeric
cities$yy<-as.numeric(as.character(cities$yy))
cities

points(cities[,1:2], pch=21, bg="black", cex=0.8) # points for cities
text(cities[,1:2], cities$label, font=2) # city name and ranking position

legend("bottomleft", legend=paste0(cities$label,", popul.", cities$population), pt.cex=cities$rel.pop, bty="n", pch=21, cex=0.7)
abline(h=c(50.5, 51, 51.5, 52), col="white", lwd=1, lty=3) # graticule
abline(v=c(21.5, 22, 22.5, 23, 23.5, 24), col="white", lwd=1, lty=3)
degAxis(1)          # x-axis
degAxis(2)          # y-axis
par(bg="white")     # white background for another plots

Figure 3.8: Point data within regional contour: a) basic visualisation, b) coloured visualisation with additional layers

# Fig.3.8a – the same basic plot as before but with ggplot() and data.frame
library(ggplot2)
# points in data.frame class
ggplot() + geom_sf(data=region.voi, fill=NA, color="black", linewidth=0.3) + geom_point(data=points, aes(x=crds.x, y=crds.y), shape=46, size=0.5) + coord_sf(expand=FALSE) + theme_void() + theme(panel.background= element_rect(fill="white", color=NA), plot.background =element_rect(fill= "white", color=NA), plot.margin=margin(4, 4, 4, 4))

# the same plot of points – from sf class
ggplot() + geom_sf(data=region.voi, fill=NA, color="black", linewidth=0.3) + geom_sf(data=points.sf, shape=46, size=0.5) + coord_sf(expand=FALSE) + theme_void() + theme(panel.background= element_rect(fill="white", color=NA), plot.background =element_rect(fill= "white", color=NA), plot.margin=margin(4, 4, 4, 4))
# Fig.3.8b – the same sophisticated plot in ggplot()
x_breaks<-c(21.5, 22, 22.5, 23, 23.5, 24) # "graticule" breaks 
y_breaks<-c(50.5, 51, 51.5, 52)

ggplot() +      # start the ggplot, then each line comes with ‘+’
# region boundary
geom_sf(data=region.voi, fill="grey80", color="grey70", linewidth=0.4) +

# dashed white graticule lines (like abline())
geom_vline(xintercept=x_breaks, linetype=3, linewidth=0.4, colour="white") + geom_hline(yintercept=y_breaks, linetype=3, linewidth=0.4, colour="white") +

# point data as light circles – for data.frame class point data
geom_point(data=points, aes(x=crds.x, y=crds.y), shape=21, size=0.6, stroke=0.2, colour="white", fill="cornsilk") +

# point data as light circles – for sf class point data
# geom_sf(data=points.sf, shape=21, size=0.6, stroke=0.2, colour="white", fill="cornsilk") +

# cities as black circles with labels
geom_point(data=cities, aes(x=xx, y=yy, size=rel.pop), shape=21, fill="black", colour="black", show.legend=TRUE) +
geom_text(data=cities, aes(x=xx, y=yy, label=label), fontface="bold", nudge_y=0.04, size=3) +

# axes 
coord_sf(expand=FALSE) +
scale_x_continuous(breaks=x_breaks, labels=function(x) paste0(x, "°E")) + scale_y_continuous(breaks=y_breaks, labels=function(y) paste0(y, "°N")) +

# legend for city sizes (relative population)
scale_size(name="City population\n(relative to mean)", range=c(2, 6), guide= guide_legend(override.aes=list(shape=21, fill="black", colour="black"))) +

# title and theming (grey background)
labs(title="Location of firms (10%) in lubelskie NUTS2 region") + theme_void(base_size=11) + theme(plot.title=element_text(hjust=0.5), plot.background = element_rect(fill="grey90", colour=NA), panel.background= element_rect(fill="grey90", colour=NA), plot.margin=margin(8, 8, 8, 4), legend.position=c(0.03, 0.15), legend.justification=c(0, 0))
# end of the ggplot() function
# xy coordinates in a matrix class
centroids<-st_centroid(st_make_valid(pov))
crds<-st_coordinates(centroids) 
head(crds)      # head() is for 6 lines, only 3 are presented

# Fig.3.9a – using plot()
plot(st_geometry(pov))  # plot the contour
plot(st_geometry(centroids), pch=21, bg='red', add=TRUE)

# the same with ggplot()
ggplot() + geom_sf(data=pov, fill="grey85", color="black", linewidth=0.3) + geom_sf(data=centroids), shape=21, fill="red", colour="black") + theme_minimal()

crds.voi<-st_coordinates(st_centroid(st_make_valid(voi))) 
labels<-data.frame(X=crds.voi[,1], Y=crds.voi[,2], Name=as.character(voi$ JPT_NAZWA_))
labels  # just the 3 first lines

# Fig.3.9b – using ggplot() 
ggplot() + geom_sf(data=voi, fill="white", color="black") + geom_text(data= labels, aes(x=X, y=Y, label=Name), size=3, hjust=0.5, vjust=-0.5, fontface="bold") + theme_void()

# the same (Fig.3.9b) with plot()
plot(st_geometry(voi), border="grey60")
library(car)    # for plotting labels without overlaps
pointLabel(labels[,c("X", "Y")], labels$Name, cex=0.8) 

Figure 3.9: Centroids of regions: a) as point data, b) as locators for labels

# interactive edition of regions’ names
plot(st_geometry(voi))
identify(labels[,c("X", "Y")], labels=labels$Name)
# ggplot() with text in boxes
library(ggrepel)        # for geom_label_repel()
ggplot() + geom_sf(data=voi, aes(fill=as.numeric(JPT_KOD_JE))) + geom_point(aes(x=X, y=Y, stroke=2), colour='grey80', data=labels, size=0.5) + geom_label_repel(aes(x=X, y=Y, label=Name), data=labels, family='Times', size=2, box.padding=0.2, point.padding=0.3, segment.color='grey50') + theme(legend.position="none")
# Fig.3.10a
# Create own data
x<-c(5,6,7,8,8,7,6,5,4,3,2,2,3,4,5) # x coordinates of points
y<-c(2,3,4,5,6,7,7,6,7,7,6,5,4,3,2) # y coordinates of points
points_df<-data.frame(ID=1:length(x), x=x, y=y) # integrated data.frame
points_sf<-st_as_sf(points_df, coords=c("x", "y"), crs=4326) # sf class
line_sf<-st_as_sf(data.frame(geometry=st_sfc(st_linestring( as.matrix(points_df[, c("x", "y")])))), crs=4326) # lines between points
bbox<-st_bbox(points_sf)    # bounding box of dataset

x_breaks<-seq(floor(bbox$xmin), ceiling(bbox$xmax), by=1)
y_breaks<-seq(floor(bbox$ymin), ceiling(bbox$ymax), by=1)

# option panel.grid=element_blank() removes all gridlines
ggplot() + geom_sf(data=points_sf, color="black", shape=3, size=3) + coord_sf(xlim=c(bbox$xmin, bbox$xmax), ylim=c(bbox$ymin, bbox$ymax), expand=FALSE) + theme_minimal() + theme(panel.grid=element_blank(), panel.background=element_rect(fill="white", color=NA)) + labs(x="Longitude", y="Latitude")

# Fig.3.10b
ggplot() + geom_sf(data=line_sf, color="black", linewidth=1.2) + geom_sf(data=points_sf, color="black", shape=3, size=3) + coord_sf(xlim= c(bbox$xmin, bbox$xmax), ylim=c(bbox$ymin, bbox$ymax), expand=FALSE) + scale_x_continuous(breaks=x_breaks) + scale_y_continuous(breaks=y_breaks) + theme_minimal() + labs(x="Longitude", y="Latitude")

Figure 3.10: Generating point data and merging the points with line

3.4 Plotting point data in colour

3.4.1 Plots using the ggplot() function from the ggplot2:: package

# Fig.3.11a - typical point plot in ggplot() – points and contour in sf
ggplot() + geom_sf(data=points.sf, aes(col=poviat)) + geom_sf(data=region.pov, fill=NA)

# Fig.3.11b
ggplot(points.sf) + geom_sf(aes(color=as.factor(SEC_PKD7)), size=1) + scale_color_viridis_d(option="turbo", name="Industry Code") + theme_minimal() + theme(legend.position="right", axis.text= element_text(size=10), legend.text=element_text(size=9), plot.margin=margin(2, 2, 2, 2))

Figure 3.11: Categorial data in colours with ggplot(): a) points in colours by poviats, b) points in colours by sectors

# Fig.3.12a – simple plot of numeric variable
ggplot()+ geom_sf(data=points.sf, aes(col=subreg))

# Fig.3.12b – multiplot by other variable
ggplot() + geom_point(data=points, alpha=0.5, aes(x=crds.x, y=crds.y, color=roa)) + scale_colour_gradient(low="yellow", high="red") + facet_wrap(~SEC_agg) + ggtitle("Roa of firms by sectors") + labs(fill="ROA", x="", y="") + geom_sf(data=region.voi, fill=NA)

Figure 3.12: Numeric data in colours with ggplot(): a) points in colours by subregions, b) points in colours by profitability, subfigures by sectors

# random observations – subset of geo-located points
nn<-5000    # sample size
selector<-sample(size=nn, 1:dim(points)[1], replace=FALSE) # random IDs
points.lim<-points[selector, c("crds.x", "crds.y", "SEC_PKD7", "SEC_agg")] 
points.lim.sf<-st_as_sf(points.lim, coords=c("crds.x", "crds.y"), crs=4326)

# coordinates of regional cities
city.biala<-c(23.0682406, 52.0291252) 
city.zamosc<-c(23.2134931, 50.7213773)
city.chelm<-c(23.4196661, 51.1355623) 
city.lublin<-c(22.4236853, 51.2180254)

# calculating the distances – from all firms to centre of city 
# st_distance calculates distances in pairs of observations from two objects
# city location was multiplied

# distance to Biala Podlaska
city.biala.df<-data.frame(ID=1:nn, x=rep(city.biala[1], times=nn), y=rep(city.biala[2], times=nn))
city.biala.sf<-st_as_sf(city.biala.df, coords=c("x", "y"), crs=4326) # in sf
dist.to.biala<-st_distance(points.lim.sf, city.biala.sf, by_element=TRUE)   # one gets raw result in meters, from sf: 
points.lim$dist.biala<-as.numeric(dist.to.biala)/1000    # convert to km

# distance to Zamość
city.zamosc.df<-data.frame(ID=1:nn, x=rep(city.zamosc[1], times=nn), y=rep(city.zamosc[2], times=nn))
city.zamosc.sf<-st_as_sf(city.zamosc.df, coords=c("x", "y"), crs=4326)
dist.to.zamosc<-st_distance(points.lim.sf, city.zamosc.sf, by_element=TRUE)
points.lim$dist.zamosc<-as.numeric(dist.to.zamosc)/1000

# distance to Chelm
city.chelm.df<-data.frame(ID=1:nn, x=rep(city.chelm[1], times=nn), y=rep(city.chelm[2], times=nn))
city.chelm.sf<-st_as_sf(city.chelm.df, coords=c("x", "y"), crs=4326)
dist.to.chelm<-st_distance(points.lim.sf, city.chelm.sf, by_element=TRUE)
points.lim$dist.chelm<-as.numeric(dist.to.chelm)/1000

# distance to Lublin
city.lublin.df<-data.frame(ID=1:nn, x=rep(city.lublin[1], times=nn), y=rep(city.lublin[2], times=nn))
city.lublin.sf<-st_as_sf(city.lublin.df, coords=c("x", "y"), crs=4326)
dist.to.lublin<-st_distance(points.lim.sf, city.lublin.sf, by_element=TRUE)
points.lim$dist.lublin<-as.numeric(dist.to.lublin)/1000

# minimum distance (out of four distances)
points.lim$min.dist<-apply(as.matrix(points.lim[,c("dist.biala", "dist.zamosc", "dist.chelm", "dist.lublin")]), 1, FUN=min)
head(points.lim)    # head() is for the first 6 lines, only 3 presented

# Fig.3.13a - blue plot
ggplot() + geom_point(data=points.lim, alpha=0.5, aes(x=crds.x, y=crds.y, color=min.dist)) + geom_sf(data=region.voi, fill=NA)

# Fig.3.13b - green plot
cols<-RColorBrewer::brewer.pal(4,'Paired')
ggplot() + geom_point(data=points.lim, alpha=0.5, aes(x=crds.x, y=crds.y, color=min.dist)) + scale_colour_gradient(low=cols[1], high=cols[4]) + ggtitle("Distance from points to the clostest city") 

Figure 3.13: Distances in colours with ggplot(): a) basic colours, b) own colours

# Fig.3.14a - map of points density with isolines
ggplot() + geom_sf(data=region.voi) + stat_density2d(aes(x=crds.x, y=crds.y, fill=after_stat(level), alpha=0.25), linewidth=0.01, bins=30, data=points, geom="polygon") + geom_density2d(data=points, aes(x=crds.x, y=crds.y), size=0.3)

# Fig.3.14b - map of points density with isolines and labels of cities
# cities    # see Fig.3.8 for structure of object

library(ggrepel)    # for geom_label_repel()
ggplot() + geom_sf(data=region.voi, fill=NA, color="grey60", linewidth=0.2) + stat_density_2d(data=points, aes(x=crds.x, y=crds.y, fill=after_stat(level)), geom="polygon", bins=30, colour=NA, alpha=0.55) + scale_fill_viridis_c(name="Point density", option="C") + geom_point(data=cities, aes(x=xx, y=yy), shape=21, fill="grey85", color="grey40", size=1.6, stroke=0.5) + geom_label_repel(data=cities, aes(x=xx, y=yy, label=label), size=2.5, box.padding=0.2, point.padding=0.3, segment.color="grey50", fill="white", label.size=0.2, max.overlaps=Inf) + coord_sf(expand=FALSE) + theme_minimal(base_size=11) + theme(panel.grid.major=element_line(linewidth=0.2, colour="grey90"), axis.title=element_blank())

Figure 3.14: Density plots with ggplot(): a) density with isolines, b) lables of cities added

3.4.2 Plots using plot() function

# Fig.3.15 - automated solution with plot()
# for categorical variable – Fig.3.15a
plot(points.sf["subreg"], pch=16, main="Categorical variable ‘subreg’ in colours")
# for continuous variable – Fig.3.15b
plot(points.sf["roa"], pch=16, main="Continous variable ‘roa’ in colours")

Figure 3.15: Coloured points with plot() and sf class object: a) for a categorical variable, b) for a numeric variable

# Fig.3.16a - plot of points in colours with findInterval()
var<-points$subreg          # levels of variable to be coloured
locs<-points[, c("crds.x", "crds.y")] # xy locations
brks<-c(1, 2, 3, 4)         # lower bins of intervals
cols<-c("blue3", "cornflowerblue", "seagreen1", "green2") # colours

plot(st_geometry(region.voi))       # contour of region
points(locs, col=cols[findInterval(var, brks)], pch=16, cex=0.7)
legend("bottomleft", legend=brks, fill=cols, cex=0.8, bty="n")
title(main="Points - colors by values") # title of figure

# Fig.3.16b – coloured points representing regional statistics
var<-data$salary_average[data$year==2023]
summary(var)
brks<-c(5000, 5500, 6000, 6500, 7000, 9000, 11000, 13000)
crds<-st_coordinates(st_centroid(st_make_valid(pov))) 
size<-(brks/100)*1.2
library(RColorBrewer)
cols<-brewer.pal(9, "Reds") # from RColorBrewer::, max 9 colours
cols

plot(pov$geometry, border="grey90")     
plot(voi$geometry, border="grey50", add=TRUE) 
points(crds, col=cols[findInterval(var, brks)], pch=16, cex=size[findInterval(var, brks)]/40)     
legend("bottomleft", legend=brks, pt.bg=cols, pt.cex=size/40, bty="n", pch=21)
title(main="Average salary in Poland in year 2023")
savePlot(filename="Regional salary as points", type="jpg")

Figure 3.16: Regional map with plot() for points in colour: a) point data coloured by subregion, b) centroids of regions coloured and sized by regional average salary

# Fig.3.17a
locs<-points.lim[ ,c("crds.x", "crds.y")]
var<-points.lim$min.dist
summary(var)
brks<-c(0, 10, 20, 30, 40, 81)
cols<-c("red", "blue3", "cornflowerblue", "seagreen1", "green2")

plot(st_geometry(region.voi))       # regional contour
points(locs, col=cols[findInterval(var, brks)], pch=16, cex=0.7) # points
legend("bottomleft", legend=paste("from", brks[1:5], "to", brks[2:6], "km"), fill=cols, cex=0.8, bty="n") # legend
title(main="Points - colors by distance") # title

# Fig.3.17b – dataset in sp class and plot with spplot()
library(sp)             # to create SpatialPointsDataFrame class
points.lim.sp<-points.lim   # copy of the object
coordinates(points.lim.sp)<-c("crds.x", "crds.y") # class changes to sp
proj4string(points.lim.sp)<-CRS("+proj=longlat +datum=WGS84") #projection
class(points.lim.sp)
spplot(points.lim.sp["min.dist"])       # plot for sp class

Figure 3.17: Points coloured by distance: a) plotted using plot() and findInterval() from data.frame class, b) plottted using spplot() from sp class

3.5 Interactive maps

3.5.1 Package tmap::

# Fig.3.18 – statistic and interacvtive maps with tmap::
pov$dist<-data$dist[data$year==2023]    # add variable to a contour map
library(tmap)
tmap_mode("view") # "view" for interactive plots and "plot" for static maps
tm_shape(pov) + tm_fill("dist", palette=sf::sf.colors(5)) 
ttm()   # informs about the active mode, "view" or "plot"
tmap_last() # plots the statistic map in R device

Figure 3.18: Static (a) and interactive (b) maps with tmap:: for a distance to from counties to core regional city

3.5.2 Package mapview::

# Fig.3.19
library(mapview)
mapview(pov["dist"], col.regions=sf.colors(10)) # for regional data
mapview(points.sf, zcol="SEC_PKD7") # for point data, Fig.3.19b

Figure 3.19 Interactive plotting with mapview:: a) for regional data, b) for point data

# Fig.3.20a
# cut a contour map from a bigger map and add colouring variable
region.pov$unemp<-data$unemployment_rate[data$region_name=="lubelskie" & data$year==2023]

mapview(region.voi, zcol=NULL, alpha.regions=0.1) +     # regional contours
mapview(region.pov, zcol="unemp", alpha.regions=0.3) + # coloured areas mapview(points.sf[points.sf$SEC_PKD7=="M",], zcol="roa", cex="empl_av_size")    # point data

#location of a few big cities – data.frame object
cities<-data.frame(city=0, x=0, y=0) # empty object 
cities[1,]<-c("Warszawa", 20.99909, 52.2330269) #location of Warszawa
cities[2,]<-c("Kraków", 19.8647884, 50.0469018) # location of Kraków
cities[3,]<-c("Gdańsk", 18.5499432, 54.3612063) # location of Gdańsk
cities[4,]<-c("Poznań", 16.9016659, 52.3997116) # location of Poznań

# check if data are numeric, if not convert to numeric
summary(cities) 
cities$x<-as.numeric(as.character(cities$x)) 
cities$y<-as.numeric(as.character(cities$y)) 
summary(cities)

# change class – from data.frame to sf
cities.sf<-st_as_sf(cities, coords=c("x", "y"), crs=4326)

# geometries – circles with r=100 km from city, 1 km is dist=1000
circle.waw<-st_buffer(cities.sf[1,], dist=100000) 
circle.kra<-st_buffer(cities.sf[2,], dist=100000) 
circle.gda<-st_buffer(cities.sf[3,], dist=100000) 
circle.poz<-st_buffer(cities.sf[4,], dist=100000)

# make projections the same – NAD27
pov<-st_transform(pov, 4267) # 4326=WGS84, 4267=NAD27 
circle.waw<-st_transform(circle.waw, 4267) # 
circle.kra<-st_transform(circle.kra, 4267) # 
circle.gda<-st_transform(circle.gda, 4267) # 
circle.poz<-st_transform(circle.poz, 4267) #

# cut poviats by circles
pov.circle.waw<-st_intersection(st_make_valid(pov), circle.waw) 
pov.circle.kra<-st_intersection(st_make_valid(pov), circle.kra) 
pov.circle.gda<-st_intersection(st_make_valid(pov), circle.gda) 
pov.circle.poz<-st_intersection(st_make_valid(pov), circle.poz)

# graphics in R window – to check circular geometry with coloured regions
plot(st_geometry(pov.circle.waw)) # plotting a contour 
ggplot() + geom_sf(data=pov.circle.waw, aes(fill=dist))

#interactive multilayer maps in browser
mapview(cities.sf, legend=FALSE) +              # cities as points
mapview(pov.circle.waw, zcol="dist", legend=FALSE) +    # circle for Warsaw
mapview(pov.circle.kra, zcol="dist", legend=FALSE) + # circle for Kraków
mapview(pov.circle.gda, zcol="dist", legend=FALSE) + # circle for Gdańsk
mapview(pov.circle.poz, zcol="dist", legend=FALSE) + # circle for Poznań
mapview(pov, zcol="dist", legend=FALSE, alpha=0.1)  # all regions coloured

Figure 3.20: Multilayer plots using mapview(): a) regional and point data, b) regional data highlighted in radius

3.5.3 Package OpenStreetMap::

# Fig.3.21
library(osmdata)

# get data from OSM: roads, rivers and rails
getbb("Lublin") %>%     # get bounding box for a given place name
opq() %>%                   # build an overpass query
add_osm_feature(                # add a feature to an overpass query 
key="highway", value=c("motorway", "primary", "motorway_link", "primary_link"))             # specify which data to import
%>% osmdata_sf() -> big_streets     # save main roads as sf object

getbb("Lublin") %>% opq() %>% add_osm_feature(key="highway", value=c("secondary", "tertiary", "secondary_link", "tertiary_link")) %>% osmdata_sf() -> med_streets   # medium streets saved as sf object

getbb("Lublin") %>% opq() %>% add_osm_feature(key="waterway", value="river") %>% osmdata_sf() -> river  # save rivers as sf object

getbb("Lublin") %>% opq() %>% add_osm_feature(key="railway", value="rail") %>% osmdata_sf()->railway    # save railways as sf object

sub<-points[points$poviat=="powiat Lublin",]    # subset for the city

sub %>%     # use subset only, in data.frame class
ggplot(mapping=aes(x=crds.x, y=crds.y, color=SEC_PKD7)) +
geom_point() + 
geom_sf(data=river$osm_lines, inherit.aes=FALSE, color="deepskyblue", size=0.8, alpha=0.3) +
geom_sf(data=railway$osm_lines, inherit.aes=FALSE, color="#ffbe7f", size=0.2, linetype="dotdash", alpha=0.5) +
geom_sf(data=med_streets$osm_lines, inherit.aes=FALSE, color="#ffbe7f", size=0.3, alpha=0.5) +
geom_sf(data=big_streets$osm_lines, inherit.aes=FALSE, color="#ffbe7f", size=0.5, alpha=0.6) +
coord_sf(xlim=c(22.45, 22.7), ylim=c(51.15, 51.3)) +
labs(title="LUBLIN CITY, POLAND", subtitle="business locations", color="Sectors of firms")

Figure 3.21: Point data plotted together with OpenStreetMap data

3.5.4 Packages leaflet and shiny::

# variable for analysis
pov$dist<-data$dist[data$year==2023]

# solution with mapview::
library(mapview)
mapview(pov, zcol="dist")   # auto colors + legend + popups

# solution with leaflet::
library(leaflet)
pal<-colorNumeric("viridis", domain=pov$dist, na.color=NA)
leaflet(pov) |>
  addProviderTiles(providers$CartoDB.Positron) |>
  addPolygons(
    fillColor=~pal(dist), color="#444444", weight=1,
    fillOpacity=0.8, popup=~paste0("<b>dist:</b> ", dist)) |>
  addLegend("bottomright", pal=pal, values=~dist, title="dist")

# solution with mapview::, leaflet:: and shiny::
library(sf)
library(shiny)
library(leaflet)
library(mapview)
ui<-fluidPage(leafletOutput("m", height=600))
server<-function(input, output, session) {
  output$m<-renderLeaflet({
    mapview(pov, zcol="dist")@map   # grab the underlying leaflet widget
  })}
shinyApp(ui, server)

# Fig.3.22
pov$dist<-data$dist[data$year==2023]

library(leaflet)
library(viridisLite) # for nice, perceptually uniform color palettes

breaks<-pretty(pov$dist, n=6) # breaks at pretty numbers (0,50,100,150,200)

# color palette (reversed) - from leaflet::
pal<-colorNumeric(      # function to fit the colours to data
  palette=viridis(length(breaks), option="A", direction=-1),
  domain=range(pov$dist, na.rm=TRUE))

# optional: a label to show on hover (adjust 'region_name' to own field)
hover_lab<-sprintf("<b>%s</b><br/>Distance: %s",
  if ("region_name" %in% names(pov)) pov$region_name else "Region",
  format(pov$dist, trim=TRUE, digits=3)) %>% lapply(htmltools::HTML)

leaflet(pov, options=leafletOptions(minZoom=2)) |>
  addProviderTiles(providers$CartoDB.Positron) |>
  addPolygons(fillColor=~pal(dist), weight=0.5, color="#FFFFFF", opacity=1,   
  fillOpacity=0.9, smoothFactor=0.2, label=hover_lab, popup=~paste0("<b>",   
  if ("region_name" %in% names(pov)) region_name else "Region", 
  "</b><br/>", "Distance: ", dist), highlightOptions=highlightOptions(
  weight=2, color="#444444", fillOpacity=0.95, bringToFront=TRUE)) |>
  addLegend(pal=pal, values=~dist, title="Distance to regional core city", 
  labFormat=labelFormat(digits=2), position="bottomright", opacity=0.9) |>
  addScaleBar(position="bottomleft", options=scaleBarOptions(imperial= 
  FALSE))

Figure 3.22: Interactive graphics with leaflet::

3.6 Colours and palettes

n<-6
vec<-c("red", "green", "blue", "yellow", "pink", "brown")

image(1:n, 1, as.matrix(1:n), col=vec, xlab="…") # general code

library(terra)
r<-rast(nrows=2, ncols=3, xmin=0, xmax=3, ymin=0, ymax=2)
values(r)<-1:ncell(r)
plot(r, col=vec,  axes=FALSE, main="Coloured raster (3 columns and 2 rows)")

library(scales)
show_col(vec)

library(colorspace)
swatchplot("vec"=vec, "rev.vec"=rev(vec))

library(ggplot2)
df<-data.frame(x=1:n, y=1, shade=vec)
ggplot(df, aes(x=factor(x), y=y, fill=shade)) + geom_tile(width=1, height=1) + scale_fill_identity() + theme_void() 

colors()[1:21] # the first few color names

rgb(runif(5,0,1), runif(5,0,1), runif(5,0,1))
rgb(sample(0:255, 5), sample(0:255, 5), sample(0:255, 5), maxColorValue=255)
hsv(runif(5,0,1), runif(5,0,1), runif(5,0,1))
adjustcolor("#0F58D6", alpha.f=0.5)
col2rgb(c("azure", "azure1", "azure2"), alpha=FALSE)
col2rgb(c("#4B424F", "#BFD15C", "#A44845"), alpha=FALSE)

# current palette to be tested – to find names for listed shades
pal<-palette() # 8 basic colours that can be refereed as 1,2,3, in col option
pal_hex<-as.data.frame(t(grDevices::col2rgb(pal))) # colours in rgb notation
pal_hex # RGB values of eight colours

# all named colours with hex / rgb components
all_named<-grDevices::colors()  # 657 named colours
all_hex<-as.data.frame(t(grDevices::col2rgb(all_named)))
all_hex<-cbind(all_hex, all_named)
head(all_hex)   # RGB values and name of colour

exact_match<-merge(pal_hex, all_hex, by=c("red", "green", "blue"), all.x=TRUE)
exact_match

library(nabor)  # for knn()
merged<-knn(as.matrix(all_hex[,1:3]), as.matrix(pal_hex[,1:3]), k=2)
full_match<-data.frame(nn1_id=merged$nn.idx[,1], nn2_id=merged$nn.idx[,2], nn1_dist=merged$nn.dists[,1], nn2_dist=merged$nn.dists[,2], 
nn1_col=all_named[merged$nn.idx[,1]], nn2_col=all_named[merged$nn.idx[,2]]) 
full_match

rc<-rainbow(10)

library(RColorBrewer)
display.brewer.all()        # all pallets from the package, Fig.3.23a
display.brewer.pal(11,'Spectral') # displaying the palette, Fig.3.23b

Figure 3.23: RColorBrewer palettes: a) all colours together, b) divergent Spetral colours

brewer.pal(n=9, name="OrRd")    # create sequential colours
display.brewer.pal(9,'OrRd')    # display the sequential palette, Fig.3.24a
brewer.pal(4,'Paired')      # create qualitative colours
display.brewer.pal(4,'Paired') # show the qualitative palette, Fig.3.24b

Figure 3.24: RColorBrewer palettes: a) sequential OrRd, b) qualitative Paired

library(viridis)
library(scales)
viridis.map
col1<-viridis(16, option="D") # the default viridis colors
show_col(col1)  # Fig.3.25a

col2<-viridis(16, option="B") # inferno palette
show_col(col2)  # Fig.3.25b

Figure 3.25: Viridis palettes: a) default viridis, b) inferno

library(ggplot2)
# coloured hexagons circled, Fig.3.26a
ggplot(data.frame(x=rnorm(10000),y=rnorm(10000)), aes(x=x, y=y)) + geom_hex() + coord_fixed() + scale_fill_viridis() + theme_bw() 

# creating a data set in the data.frame class
data<-data.frame(x=rnorm(1000), y=rnorm(1000), z=sample(LETTERS[1:5], 1000, replace=TRUE))
head(data)

# scatterplot in colours, Fig.3.26b
ggplot(data) + aes(x, y, color=z) + geom_point(size=4) + scale_color_viridis_d(option="turbo") + theme_bw() 

Figure 3.26: Colours from viridis used with ggplot(): a) for regions with viridis palette, b) for points with turbo palette

library(colorspace)
# sequential palettes
# from deep-blue to light-blue (nice for heatmaps / scales)
seq_blue<-sequential_hcl(7, h=260, c=c(80, 0), l=c(30, 95), power=1.2)
seq_blue

# from teal to light-teal with a subtle hue drift
seq_teal<-sequential_hcl(7, h=c(200,190), c=c(70,0), l=c(28,96), power=1.1)

# use a named predefined palette
hcl_palettes("sequential") # display the available palettes
seq_named<-sequential_hcl(7, palette="Blues 3")

# display as swatchpot the sequential palettes – Fig.3.27a
swatchplot("seq_blue"=seq_blue, "seq_teal"=seq_teal, "seq_named"=seq_named)

# diverging palettes
# from blue to red via alight centre
div_blue.red<-diverging_hcl(11, h=c(260, 0), c=80, l=c(30, 95), power=1.1)

# from green to purple with slightly softer chroma
div_green.purple<-diverging_hcl(11, h=c(130, 300), c=70, l=c(30, 96), power=1.0)

# use a named palette
hcl_palettes("diverging") # display the available palettes

div_named<-diverging_hcl(11, palette="Lisbon")
# display as swatchpot the diverging palettes – Fig.3.27b
swatchplot("div_blue.red"=div_blue.red, "div_named"=div_named,  "div_green.purple"=div_green.purple)

Figure 3.27: Swatchplots of palettes from colorspace:: package: a) sequential palettes, b) diverging palettes

# qualitative palettes
# balanced, vivid colours 
qual_vivid<-qualitative_hcl(8, h=c(0, 360), c=60, l=65)

# soft pastel categories (higher L, lower C)
qual_pastel<-qualitative_hcl(8, h=c(0, 360), c=30, l=85)

# use a named palette
hcl_palettes("qualitative") # display the available palettes

qual_named<-qualitative_hcl(8, palette="Dark 3")
# display as swatchpot the diverging palettes – Fig.3.28a
swatchplot("qual_vivid"=qual_vivid, "qual_pastel"= qual_pastel,  "qual_named"=qual_named)
colorspace::choose_palette()    # Fig.3.28b
colorspace::hclwizard()     # Fig.3.29

Figure 3.28: Qualitative palettes with colorspace:: package: a) qualitative colours, b) interactive interface to choose palette

Figure 3.29: Interactive interface of hclwizard() function from the colorspace:: package

# generate 21 divergent colors with quickPlot::divergentColors() 
library(quickPlot)
# Fig.3.30a - palette from the command example
col3<-divergentColors("darkred", "darkblue", -10, 10, 0, "white")
col3

col3.n<-length(col3)
image(1:col3.n, 1, as.matrix(1:col3.n), col=col3, xlab="quickPlot::divergentColors() / darkred-darkblue")

# Fig.3.30b - another palette
col4<-divergentColors("chocolate4", "peachpuff4", -10, 10, 0, "bisque")
col4

col4.n<-length(col4)
image(1:col4.n, 1, as.matrix(1:col4.n), col=col4, xlab="quickPlot::divergentColors() / chocolate-peachpuff")

Figure 3.30: Divergent palettes with quickPlot:: package: a) from dark red to dark blue, b) in brown shades

pal<-wesanderson::wes_palette("Zissou1", 5, type="continuous")
pal<-ghibli::ghibli_palette("PonyoLight", n=6)
ochre_palettes  # call the palettes object, displayed first two palettes
pal<-ochRe::ochre_palettes$emu_woman_paired[2:10] # select few shades
pal<-PNWColors::pnw_palette("Cascades", 6)
pal<-MetBrewer::met.brewer("Hokusai3", 7)

install.packages("devtools")
devtools::install_github("BlakeRMills/MoMAColors")
library(MoMAColors)
display.all.moma()          # Fig.3.31b
pal<-moma.colors("Smith", 3)

library(ggplot2)
library(patchwork)
library(ggsci)
library(wesanderson)
library(ghibli)
library(nord)
library(ochRe)
library(PNWColors)
library(MetBrewer) 

# helper: stripe plot
palette_stripes<-function(pal, title="Palette") {
if(is.null(pal) || length(pal)==0) {stop(paste("Empty palette:", title))}
df<-data.frame(x=seq_along(pal), y=1, fill_hex=pal)
ggplot(df, aes(x=factor(x), y=y)) + geom_tile(aes(fill=fill_hex), width=1, height=1) + scale_fill_identity() + theme_void() + theme(plot.title= element_text(hjust=0.5, face="bold", size=12), plot.margin=margin(4, 4, 4, 4)) + ggtitle(title)}

# define palettes
palettes <- list(
"ggsci: NPG"           = ggsci::pal_npg("nrc")(8),
"ggsci: AAAS"          = ggsci::pal_aaas("default")(8),
"wesanderson: Zissou1" = wesanderson::wes_palette("Zissou1", 5, type="discrete"),
"ghibli: PonyoMedium" = ghibli::ghibli_palette("PonyoMedium", 6, type= "discrete"),
"nord: Aurora"    = tryCatch(nord::nord("aurora", 5),
                 error = function(e) nord::nord_palettes$aurora[1:5]),
"ochRe: Lorikeet"      = ochRe::ochre_palettes$lorikeet,
"PNWColors: Cascades"  = PNWColors::pnw_palette("Cascades", 6),
"MetBrewer: Hokusai3"  = MetBrewer::met.brewer("Hokusai3", 6))

# build gallery
stripe_plots<-lapply(names(palettes), function(nm) {
  palette_stripes(palettes[[nm]], nm)})
wrap_plots(stripe_plots, ncol=2) # final visualisation, Fig.3.31a

Figure 3.31: Palettes from various packages a) collection of packages plotted with ggplot(), b) collection from MoMA package plotted with internal function

library(jpeg)       # for readJPEG()
photo<-readJPEG("wne-building.jpg")

library(imgpalr)        # for image_pal()
col1<-image_pal("wne-building.jpg", n=8, type="div", saturation=c(0.75, 1), brightness=c(0.75, 1), plot=TRUE)       # Fig.3.32a
col2<-image_pal("wne-building.jpg", n=8, type="div", k=2, saturation=c(0.5, 1), brightness=c(0.25, 1), plot=TRUE)   # Fig.3.32b

Figure 3.32: Palettes created from image with imgpalr::image_pal()