Welcome to the RPubs for the book ‘Applied Spatial Statistics And Econometrics: Data Analysis in R’ (Routledge, 2nd edition, 2026) (ed.Katarzyna Kopczewska).
This part is for Chapters 1-3.
To download and read data, please see RPubs with Intro.
This site is a collection of all R codes included in this book. These are just the codes - no comments, no explanations what, why and how. All those details are in the book.
The book was written by Spatial Warsaw team based at the Faculty of Economic Sciences at University of Warsaw. Visit us on LinkedIn.
3+5
vector1<-c(1, 2, 3, 4, 5) # vector of numbers 1,2,3,4,5
vector2<-10:20 # vector of integers from 10 to 20
vector3<-seq(5,10,0.2) # a sequence of numbers from 5 to 10 with a 0.2 step
class(vector1)
class(vector2)
class(vector3)
vector1i<-as.integer(vector1)
class(vector1i)
vector2[3] # the third element of vector2
vector2[2:4] # elements from 2 to 4 from vector2
vector2[c(1,3,5)] # numbers in c function mean indexes
vector4<-c("abc",3,"xyz",5)
vector4
class(vector4)
vector4i<-as.integer(vector4)
vector5<-c(FALSE,TRUE,TRUE,FALSE)
vector5
class(vector5)
vector3<-seq(5, 10, 0.2) # reminder of vector
new_vector3<-vector3 # assign a new name
new_vector3 # display the object
rm(vector3) # delete object from the memory
list1<-list(1:5, c(FALSE, TRUE, FALSE, TRUE), c(2.3, 5.9), "a", c("John", "Mary", "Sam"))
list1
list2<-list(first=c(2,3,4), second="My plants", third=c(FALSE, TRUE))
list2
str(list1)
str(list2)
list2$third # by name
list2[[3]] # by element number
list2[3] # by element number
list2[[1]][[3]]
df1<-data.frame(1:5, 101:105)
df1 # see column names created automatically
# vector with names of days of the week
wd_names<-c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday",
"Friday", "Saturday")
# sample() randomly selects an element from a vector
df2<-data.frame(let=sample(letters, 100, replace=TRUE),
num=sample(1:100, 100, replace=TRUE),
wd=sample(wd_names, 100, replace=TRUE))
head(df2) # returns the first 6 rows of the object by default
str(df2)
names(df2) # request to display headers (column names)
colnames(df2)<-c("letters", "numbers", "days") # add or change column names
head(df2) # display the df object after changes
x<-seq(1, 20, 0.4) # generate a variable x
y<-seq(2,21,0.4) # generate a variable y
z<-seq(3,22,0.4) # generate a variable z
xyz<-cbind(x,y,z) # combine variables into one xyz matrix object
is.data.frame(xyz) # check if xyz is a data.frame object
xyz.df<-as.data.frame(xyz) # convert xyz to data frame
head(xyz.df) # display the beginning of xyz data.frame
# observations from rows 1 to 4, variables from columns 1 and 3
df2_section<-df2[1:4, c(1,3)]
df2_section
# only rows 2,7,10,15 and all columns
df2_section2<-df2[c(2, 7, 10, 15), ]
# rows 1 to 10, but without a selected column - here is the third column
# character (-) removes the given row or column from the result
df2_section3<-df2[1:10, -3]
df2_selection4<-df2[df2$days=="Sunday",]
head(df2_selection4) # subset for Sundays
df2_selection5<-df2[df2$days=="Monday" | df2$days=="Tuesday", ]
df3<-data.frame(var_x=sample(1000:10000, 100, replace=TRUE),
var_y=sample(5000:20000, 100, replace=TRUE),
var_z=sample(1000:5000, 100, replace=TRUE))
head(df3)
# a new object containing a marker (ind) and values
df3_stacked<-stack(df3)
df3_stacked
unstack(df3_stacked) # return to the original data
df<-data.frame(ID=1:15, Name=paste("Person", 1:15), Score=sample(50:100, 15, replace=TRUE))
df$Score_new<-df$Score
# create missing observations coded as 99999
df$Score_new[c(3, 5, 9, 10)]<-99999
head(df$Score_new, 8) # display of the first 8 rows of Score variable
# assign NA to missing observations
is.na(df$Score_new)<-df$Score_new>=99998
df$Score_new # display all observations of Score_new variablegetwd() # check the current working directory
setwd("C:/R/Data/") # set the required working directory - example
library(foreign) # library supporting additional file formats
getwd() # checks the location of the default directory
# lines are hashed as this is an example only how to read different formats
# load data from a text file
#data1<-read.table("data.txt", header=TRUE, sep="\t", encoding="UTF-8")
# load data from a CSV file
#data2<-read.csv("data.csv", header=TRUE, sep=";",dec=".", encoding="UTF-8")
# load data from a tab delimited file
#data3<-read.delim("data.dat", header=FALSE, sep="\t")
# load data from a dbf file
#data4<-read.dbf("data.dbf")
# load data from the SPSS format
#data5<-read.spss("data.sav", use.value.labels=FALSE, to.data.frame=TRUE)
# load data from the STATA format
#data6<-read.dta("data.dta", convert.factors=FALSE)
library(readxl) # for read_xlsx()
# only the second sheet from myfile.xlsx will be loaded
#data7<-read_xlsx("myfile.xlsx", sheet=2, col_names=TRUE)
# reading regional data
data<-read.csv("regional_dataset.csv", header=TRUE, sep=";", dec=",", encoding="UTF-8")
names(data) # printout of a few lines only
class(data)
# convert data to the data.frame class object and create a new object
data.df<-as.data.frame(data)
str(data) # structure of the object
attributes(data) # object attributes data23<-data[data$year==2023,] # subset of regional data for the year 2023
summary(data23) # printout for a few variables
demography<-as.data.frame(cbind(data23$birth_per_1K, data23$persons_in_K, data23$persons_productive_age_K))
colnames(demography)<-c("births", "persons", "working_age")
# average calculated from all data.frame object variables using lapply()
lapply(demography, mean,na.rm=TRUE)
# average calculated from all data.frame object variables using sapply()
sapply(demography, mean, na.rm=TRUE)
# average calculated for one variable (column) with data.frame objects
mean(demography$working_age, na.rm=TRUE)
var1<-rnorm(1000) # 1000 numbers generated from the normal distribution
quantile(var1)
# user-defined probability thresholds
quantile(var1, probs=c(1,2,5,95,98,99)/100)
fivenum(data23$unemployment_rate) # position statistics
test_factor<-as.factor(data$region_name) # enforcing the factor class
class(test_factor) # checking if the variable is factor
levels(test_factor) # factor classes / level names
nlevels(test_factor) # number of classes / levels
cor(data23$unemployment_rate, data23$salary_avg_Poland_100p)
cor.test(data23$unemployment_rate, data23$salary_avg_Poland_100p)# creating a new object containing only analysed variables:
# unemployment rate, salary average, share of the working age population
variables.df<-data.frame(cbind(data23$unemployment_rate, data23$salary_avg_Poland_100p, data23$persons_productive_age_K/data23$persons_in_K*100))
colnames(variables.df)<-c("unemployment rate", "salary avg", "working age share")
# correlation matrix, the rounding function has been used
# up to two decimal places to improve readability
round(cor(variables.df, use="pairwise"),2)
# Fig.1.6a - correlation scatterplot
pairs(variables.df)
# Fig.1.6b – correlation between two variables in hexagonal bins
library(hexbin)
bin<-hexbin(data23$unemployment_rate, data23$salary_avg_Poland_100p, xbins=40)
plot(bin)
# Fig.1.6c – correloram with GGally:: package
library(GGally) # for ggpairs()
ggpairs(variables.df)
# Fig.1.6d – correlation with PerformanceAnalytics:: package
library(PerformanceAnalytics) # for chart.Correlation()
chart.Correlation(variables.df, method="spearman", hist=F) Figure 1.6: Graphical representation of correlation: a) with
pairs(), b) with hexbin(), c) with ggpairs(), d) with
chart.Correlation()
variable<-data23$salary_average
# expression standardising variable - the new variable is Z_variable
Z_variable<-(variable-mean(variable))/sd(variable)
mean(Z_variable) # the mean of the standardised variable is close to zero
var(Z_variable) # the variance is equal to 1
variable1<-data23$salary_average
Z_variable1<-scale(variable1) # the standardised variable
mean(Z_variable1)
var(Z_variable1)# aggregate regional data by year
unemp<-aggregate(data$unemployment_rate, by=list(data$year), mean, na.rm=TRUE)
names(unemp)<-c("year", "value")
head(unemp) # 6 years out of 16
# Fig.1.7a – basic plot with points only
# par(mfrow=c(1,2), mar=c(2,2,2,1)) # to split the window and set margins
plot(unemp$value) # the simplest point plot
# Fig.1.7b – next layers to Fig.1.7a
# line chart with additional options
plot(unemp$value, type="l", lwd=2, axes=FALSE, xlab="year", ylim=c(5,20), ylab="Annual unemployment rate")
axis(1, at=1:16, labels=unemp$year, cex=0.8) # x axis
axis(2) # y axis
abline(h=(5:20), lty=3, col="grey80") # horizontal lines
abline(h=(1:4)*5, lty=2, col="grey20") # horizontal darker lines
abline(v=(1:16)*1, lty=3) # vertical lines
points(unemp$value, pch=21, bg="red", cex=1.5) # points
title(main="An average unemployment rate in Poland based on poviat data", cex.main=0.8) # title of the figure
legend(3, 20, pch=21, pt.bg="red", col="black", lwd=2, c("unemployment rate"), bty="n", cex=0.8) # legend
text(1:16, unemp$value+0.5, round(unemp$value,2), cex=0.8) # text at pointsFigure 1.7: Visualisation of time series: a) simple point
plot, b) point plot with line and additional options
# Fig.1.8
library(ggplot2)
unemp$labels<-round(unemp$value, 2) # extra column for point labels
ggplot(data=unemp, mapping=aes(x=year, y=labels)) + geom_line() + geom_point(mapping=aes(color="red"), show.legend=FALSE, size=3) + geom_label(aes(label=labels), nudge_y=0.5) + labs(title="Average unemployment rate in Poland \t based on poviat data") + theme(plot.title=element_text(size=20, hjust=0.5), axis.text=element_text(size=16), axis.title=element_blank())Figure 1.8: Visualisation of time series with ggplot2:: package
# data processing for Fig.1.9a
p_preprod<-aggregate(data$persons_preproductive_age_K, by=list(data$year), sum, na.rm=TRUE)
colnames(p_preprod)<-c("year","preprod")
# Fig. 1.9a – simple bar plot
# the names.arg option gives labels under the bars
barplot(p_preprod$preprod, col="grey", ylim=c(0,8000), ylab="in thousend",
names.arg=p_preprod$year, cex.axis=1.2, main="Pre-working age population", cex.names=1.2, cex.lab=1.2, cex.main=1.5)
abline(h=c(6000,7000,8000), lty=3, col="red") # horizontal lines
# data processing for Fig.1.9b
p_preprod<-aggregate(data$persons_preproductive_age_K, by=list(data$year), sum, na.rm=TRUE)
p_prod<-aggregate(data$persons_productive_age_K, by=list(data$year), sum, na.rm=TRUE)
p_postprod<-aggregate(data$persons_postproductive_age_K, by=list(data$year), sum, na.rm=TRUE)
popul_str<-cbind(p_preprod, p_prod$x, p_postprod$x) # population structure
colnames(popul_str)<-c("year", "pre-working", "working", "post-working")
# Fig.1.9b - stacked bar graph with barplot()
# convert the data.frame object to the matrix for the barplot command
popul.m<-as.matrix(popul_str)
barplot(t(popul.m[,2:4]), ylim=c(0,40000), cex.axis=1.2, names.arg=popul.m[,1], legend.text=TRUE, args.legend=list(x="right", bg="white"), cex.names=1.2, main="Population structure by years", cex.main=1.5)
abline(h= popul.m[1,2]/1000, lty=3, lwd=2, col="blue")
abline(h=sum(popul.m[1,2:3]/1000), lty=3, lwd=2, col="blue")
abline(h=sum(popul.m[1,2:4]/1000), lty=3, lwd=2, col="blue")Figure 1.9: Structural figures: a) simple barplot, b)
cumulative barplot
t(popul.m[,2:4]) # statistics by years (1:16)
# Fig.1.10 – barplot with ggplot()
library(ggplot2)
popul_long<-reshape(popul_str, direction="long", idvar="year", timevar="type", times=names(popul_str)[2:4], varying=2:4, v.names="Popul", new.row.names=1:48)
names<-c("post-working", "working", "pre-working") # names of columns
ggplot(data=popul_long, mapping=aes(x=year, y=Popul, fill=factor(type, names))) + geom_bar(stat="identity") + geom_line(mapping=aes(x=year, y=popul_long[1,3]), color="blue", linetype="dashed") + geom_line(mapping=aes(x=year, y=popul_long[1,3] + popul_long[17,3]), color="blue", linetype="dashed") + geom_line(mapping=aes(x=year, y=popul_long[1,3] + popul_long[17,3] + popul_long[33,3]), color="blue", linetype="dashed") + ggtitle("Population structure by years") + theme(plot.title=element_text(size=20, hjust=0.5), axis.text=element_text(size=16), axis.title=element_blank(), legend.title= element_blank())Figure 1.10: Barplot using ggplot2:: package
data23<-data[data$year==2023,]
boxplot(data23$unemployment_rate, col="grey78", ylab="unemployment rate", main=" Unemployment rate in poviats in 2023")
unemp<-aggregate(data23$unemployment_rate, by=list(data23$region_name), mean, na.rm=TRUE)
unemp$ID<-c(1:16)
colnames(unemp)<-c("region", "value", "ID")
# Fig 1.11 – multi-layer boxplot
par(mar=c(5,5,5,5))
boxplot(data23$unemployment_rate~data23$region_name, axes=FALSE, ann=FALSE, ylim=c(0,65), col="bisque", border="bisque4")
# average unemployment rate line
abline(h=mean(data23$unemployment_rate, na.rm=TRUE), lty=3, col="red")
axis(1, at=1:16, labels=unemp$ID, cex.axis=0.8) # x axis
axis(2, at=(0:6)*10, cex.axis=0.8, ylab="%") # y axis
for(i in 1:8){ # legend text in two columns
text(1,60-3.5*i, unemp$ID[i], cex=0.9)
text(4,60-3.5*i, unemp$region[i], cex=0.9)}
for(i in 9:16){
text(8,60-3.5*(i-8), unemp$ID[i], cex=0.9)
text(11,60-3.5*(i-8), unemp$region[i], cex=0.9)}
title(main="The unemployment rate in 2023 by voivodships", cex.main=0.99,
sub="Data aggregated from the poviat level", cex.sub=0.9)Figure 1.11: A boxplot grouping by the selected variable:
unemployment rate by regions
# Fig.1.12 – boxplot using ggplot()
ggplot(data=data23, mapping=aes(x=region_name, y=unemployment_rate, fill=region_name)) + geom_boxplot(staplewidth=0.3) + ggtitle("The unemployment rate in 2023 by voivodships", subtitle="Data aggregated from the poviat level") + scale_x_discrete(labels=1:16) + labs(fill="Regions") + theme(plot.title=element_text(size=20, hjust=0.5), plot.subtitle= element_text(size=16, hjust=0.5), axis.text=element_text(size=16), axis.title=element_blank()) Figure 1.12: Visualisation of boxplot chart with ggplot2:: package
# Fig.1.13 – scatterplot of x and y variables
plot(log(data23$salary_average), data23$unemployment_rate, xlab="Logarithm of average salary in PLN", ylab="Registered unemployment rate (in %)") Figure 1.13: The dependency between the explanatory (x) and
dependent (y) variables selected for the model with the fitted
regression line
model1<-lm(unemployment_rate ~ log(salary_average), data=data23)
model1 # regression result - basic version
summary(model1) # regression result - full version
plot(log(data23$salary_average),data23$unemployment_rate, xlab="Logarithm of average salary in PLN", ylab="Registered unemployment rate (in %)")
abline(model1, col="red") # Fig.1.13, fitted line
intercept<-model1$coefficients[1] # extract the constant term
slope<-model1$coefficients[2] # extract the slope of the regression line
intercept
slope
# Fig.1.14a
# par(mfrow=c(1,2)) # the option to show two charts side by side
res<-residuals(model1)
plot(res, ylab="Residuals")
abline(h=0, lty=2, col="red")
title(main="Residual scatter plot")
# Fig.1.14b - histogram of residuals
hist(res, breaks=30, main="Residual histogram", freq=FALSE, xlab="Residuals", xlim=c(-15,15))
# create the normal distribution density function
# with parameters consistent with the empirical distribution of the rest
mean.res<-mean(res)
sd.res<-sd(res)
# theoretical normal distribution curve
curve(dnorm(x, mean=mean.res, sd=sd.res), add=TRUE, col="red")Figure 1.14: Diagnostics of model residuals: a) scatterplot,
b) histogram
# create a simple point dataset in data.frame class
my.points.df<-data.frame(name=c("A", "B", "C"), value=c(20, 53, 40), long=c(20.9, 21.0, 21.1), lat=c(52.2, 52.3, 52.4))
my.points.df # view the initial data.frame
library(sf) # for st_as_sf(), to convert data.frame to sf object
my.points.sf<-st_as_sf(my.points.df, coords=c("long", "lat"), crs=4326)
my.points.sf # view the sf class object
names(my.points.sf)[1]<-"PointName" # change the name of the 1.st column
my.points.sf$value<-my.points.sf$value-2 # modify values of variable
my.points.sf
mean(my.points.sf$value)library(sp) # for proj4string() and as()to convert sf points to sp
my.points.sp<-as(my.points.sf, "Spatial") # convert from sf to sp
proj4string(my.points.sp)<-CRS("+proj=longlat +datum=WGS84 +no_defs")
# points_again_sf<-st_as_sf(my.points.sp) # reverse conversion if needed
class(my.points.sp)
my.points.sp
str(my.points.sf)
str(my.points.sp)
mean(my.points.sf$value) # direct access in sf
mean(my.points.sp@data$value) # requires accessing the @data slot
polygon.sp<-as(polygon.sf, "Spatial") # convert sf polygons to sp
polygon.sp # shorter output to save space
str(polygon.sf) # check the internal structure of the object
str(polygon.sp) # internal structure uses the slot based systempov<-st_read("powiaty.shp") # read the shapefile
st_is_longlat(pov)
st_crs(pov)
pov<-st_transform(pov, crs="+proj=longlat +datum=NAD83") # change projection
table(st_is_valid(pov))
which_incorrect<-which(!st_is_valid(pov))
pov[which_incorrect, c("JPT_KOD_JE", "JPT_NAZWA_")]
pov<-st_make_valid(pov) # automatically correct all the geometries
which_incorrect<-which(!st_is_valid(pov))# filter out one row - get the region of interest
warsaw_poviat<-pov[pov$JPT_NAZWA_=="powiat Warszawa", ]
# limit the descriptive columns in the dataset
warsaw_poviat<-warsaw_poviat[ ,c("JPT_NAZWA_", "SHAPE_AREA")]
warsaw_poviat # two descriptive columns plus geometry
head(st_set_geometry(warsaw_poviat, value=NULL))
head(st_drop_geometry(warsaw_poviat)) # both provide the same output
class(warsaw_poviat)
class(st_drop_geometry(warsaw_poviat))
class(warsaw_poviat$geometry)# filter out ‘powiat pruszkowski’ and limit descriptive columns
pruszkow_poviat<-pov[pov$JPT_NAZWA_=="powiat pruszkowski", c("JPT_NAZWA_", "SHAPE_AREA")]
# bind two objects into a single object that still includes two areas
wawpru_separated<-rbind(warsaw_poviat, pruszkow_poviat)
wawpru_separated # two areas remain as separate rows
# bind two objects into a single object that includes one big area
wawpru_union<-st_union(warsaw_poviat, pruszkow_poviat)
wawpru_union # now one area is represented as a single rowcircle_waw<-st_buffer(st_centroid(warsaw_poviat), dist=10000) # 10 km
common_area<-st_intersection(warsaw_poviat, circle_waw)
common_area
plot(st_geometry(warsaw_poviat)) # plot the contour of the region
plot(st_geometry(circle_waw), add=TRUE) # plot the circle
plot(st_geometry(common_area), add=TRUE, col="cornsilk") # plot joint area
plot(st_geometry(st_buffer(warsaw_poviat, dist=2000)), col="cornsilk") # 2km
plot(st_geometry(warsaw_poviat), col="white", add=TRUE) # plot smaller regionFigure 2.1: Buffers and intersections of regions: a) region
and circle (buffer of centroid), b) region and buffered region
# unify projection of my.points.sf with warsaw_poviat
my.points.sf<-st_transform(my.points.sf, crs="+proj=longlat +datum=NAD83")
st_within(my.points.sf, warsaw_poviat)
inside_warsaw<-my.points.sf[st_within(my.points.sf, warsaw_poviat), ]
# Error in unclass(x)[i] : invalid subscript type 'list'
st_within(my.points.sf, warsaw_poviat, sparse=FALSE)
inside_warsaw<-my.points.sf[st_within(my.points.sf, warsaw_poviat, sparse=FALSE),]# read the file with statistical variables (using relative path)
data<-read.csv("regional_dataset.csv", header=TRUE, sep=";", dec=",", encoding="UTF-8")
head(data) # check if data was read properly
summary(data) # glimpse inside of the dataset structure
names(data) # output shortened, full in chapter Introduction
length(unique(data$code)) # unique poviat names
length(unique(data$year)) # number of years included in the sample
pov[1:3, 1:5] # rows 1:3 and columns 1:5
data[1:3, 1:5]
# example on using ID_MAP as the merging key
# allows for a direct binding due to proper data ordering
data[data$year==2010,]$ID_MAP
# cbind() works on datasets with the same number of rows
# hence, filtering by year is needed to have 380 observations in each
dataSpatial2010<-cbind(pov, data[data$year==2010,]) #bind datasets
# show a sample of the merged dataset, printout with rows only
dataSpatial2010[,c("JPT_NAZWA_", "JPT_KOD_JE", "subregion_name_PL_font", "subregion_number", "year", "housing_price_sqm_PLN")]
pov$JPT_KOD_JE[1:10] # treated as a character, leading zeros included
data[data$year==2010,]$subregion_number[1:10] # as a number, no leading zeros
# 1st option – remove leading zeros from JPT_KOD_JE
pov$JPT_KOD_JE_numeric<-as.numeric(pov$JPT_KOD_JE)
# 2nd option – add leading zeros to the shorter subregion_number codes
data$subregion_number_padded<-sprintf("%04d", data$subregion_number)
data2010<-data[data$year==2010,] # subset of data for 2010 – 380 rows
merged_data1<-merge(pov, data2010, by.x="JPT_KOD_JE", by.y= "subregion_number_padded") # merge the original JPT_KOD_JE
merged_data2<-merge(pov, data2010, by.x="JPT_KOD_JE_numeric", by.y= "subregion_number") # merge the original subregion_number
merged_data1[1:5, c("JPT_KOD_JE", "subregion_name_noPL_font", "area_km2")]
merged_data2[1:5, c("JPT_KOD_JE", "subregion_name_noPL_font", "area_km2")]
#1st option – put subregion_name_PL_font to lowercase to match with JPT_NAZWA_
data2010$subreg_name_PL_font_lower<-tolower(data2010$subregion_name_PL_fon)
# merge the original JPT_NAZWA_ and lowercase subregion_name_PL_font
merged_data3<-merge(pov, data2010, by.x="JPT_NAZWA_", by.y = "subreg_name_PL_font_lower")
#2nd option – put names to lowercase and remove latin letters
library(stringi) # for stri_trans_general()
data2010$subreg_name_noPL_font_lower<-tolower(data2010$subregion_name_noPL_font)
pov$JPT_NAZWA_clean<-stri_trans_general(tolower(pov$JPT_NAZWA_), "Latin-ASCII")
# merge the original JPT_NAZWA_ and lowercase subregion_name_PL_font
merged_data3<-merge(pov, data2010, by.x="JPT_NAZWA_", by.y="subreg_name_PL_font_lower")
# merge with no special letters - using JPT_NAZWA_clean
merged_data4<-merge(pov, data2010, by.x="JPT_NAZWA_clean", by.y="subreg_name_noPL_font_lower")
merged_data3[1:5, c("JPT_KOD_JE", "subregion_name_noPL_font", "area_km2")]
merged_data4[1:5, c("JPT_KOD_JE", "subregion_name_noPL_font", "area_km2")]library(dplyr)
library(stringr)
class(merged_data1)
glimpse(merged_data1) # printout was shortened
select(merged_data1, JPT_KOD_JE, year, forest_km2) # selected columns
select(merged_data1, persons_in_K:persons_preproductive_age_K)
select(merged_data1, starts_with("subregion"))
select(merged_data1, JPT_KOD_JE, ends_with("01"), matches("_.{4}_"), -JPT_KJ_I_1)
limited_data<-select(merged_data1, JPT_KOD_JE, ends_with("01"), matches("_.{4}_"), -JPT_KJ_I_1) # save new object
filter(merged_data1, str_detect(JPT_NAZWA_, "ice")) # rows 1-3 only
filter(merged_data1, str_detect(JPT_NAZWA_, "ice"), unemployment_rate>10)
merged_data1<-mutate(merged_data1, code_voi=str_sub(JPT_KJ_I_1, 1, 2))
merged_data1<-rename(merged_data1, NUTS2_id=code_voi)
nrow(merged_data1)
summarize(st_set_geometry(merged_data1, NULL), n())
summarize(st_set_geometry(merged_data1, NULL), number=n())
poviats_voi<-group_by(st_set_geometry(merged_data1, NULL), NUTS2_id)
class(poviats_voi)
summarize(poviats_voi, number=n()) # first 6 only
arrange(summarize(poviats_voi, number=n()), number) # first 6 only
arrange(summarize(poviats_voi, number=n()), desc(number)) # first 6 only
poviats_voi %>% summarize(number=n()) %>% arrange(desc(number)) #first 6 only
poviats_voi %>% summarize(number=n()) %>% arrange(desc(number)) %>% .[["number"]] %>% hist() # display a histogram of poviats in regions
merged_data1 %>% select(JPT_NAZWA_, JPT_KJ_I_1) %>%
mutate(code_voi=str_sub(JPT_KJ_I_1, 1, 2)) %>%
rename(NUTS2_id=code_voi) %>% st_set_geometry(NULL) %>%
group_by(NUTS2_id) %>% summarize(number=n()) %>% arrange(desc(number))
merged_data1 %>% mutate(area=st_area(.)) %>% st_set_geometry(NULL) %>%
group_by(NUTS2_id) %>% summarise(area=sum(as.numeric(area))/1e6) #1-3 only# Fig.3.1a - plot shape only using plot() (contours in the sf class)
library(sp) # for degAxis()
plot(st_geometry(pov), border="darkgreen")
plot(st_geometry(voi), add=TRUE, lwd=2)
degAxis(1) # x-coordinate values, from sp::
degAxis(2) # y-coordinate values
# Fig.3.1b - the same with ggplot() - plot two maps simultaneously
ggplot() + geom_sf(data=pov) + geom_sf(data=voi, color="red", fill=NA)Figure 3.1: Contour maps with two layers: a) with plot(), b)
with ggplot()
# Fig.3.2a
library(ggspatial) # for annotations
# graticule for a map
bbox<-st_as_sfc(st_bbox(voi)) # bounding box
Bbox
lon_breaks<-seq(14, 24, by=2) # longitude every 2 degrees
lat_breaks<-seq(49, 55, by=1) # latitude every 1 degree
graticule<-st_transform(st_graticule(lat=lat_breaks, lon=lon_breaks), crs=4326)
graticule_clipped<-st_intersection(graticule, bbox)
ggplot() + geom_sf(data=voi, fill="white", color="black") + geom_sf(data=graticule_clipped, color="black", linetype="solid", size=0.3) + annotation_scale(location="bl", width_hint=0.3) + annotation_north_arrow(location="tl", which_north="true", style= north_arrow_fancy_orienteering) + theme_minimal() + theme(axis.text=element_text(size=12), panel.grid.major=element_blank(), panel.grid.minor=element_blank())
# Fig.3.2b
library(maps) # for map.scale()
library(sp) # for compassRose()
plot(st_geometry(region.pov), graticule=TRUE, axes=TRUE)
plot(st_geometry(region.voi), add=TRUE, lwd=2)
map.scale(x=21.1, y=50.4, ratio=FALSE, relwidth=0.15, metric=TRUE, cex=0.65) # from maps::
compassRose(21.5, 52.1,rot=0,cex=1) # from sp::Figure 3.2: Contour maps with additional features
(annotations, compass rose, scale): a) using ggplot(), b) using
plot()
# regional data – distance to the core city
data23<-data[data$year==2023, ] # select one year for the analysis
summary(data23$dist) # variable to be coloured, distance to core city
pov$dist<-data23$dist # transfer variable between objects
ggplot() + geom_sf(data=pov, aes(fill=dist)) + labs(title="Distance to the core cities within the NUTS2 division") # Fig.3.3a
# Fig.3.3b
plot1<-ggplot() + geom_sf(data=pov, aes(fill=dist)) + scale_fill_gradient(low='white', high='grey20') + geom_sf(data=voi, color="white", fill=NA) + labs(title="Grey-scale figure of in-region distances") + theme_minimal() # no grey background
plot1 # figure was saved to memory and now is calledFigure 3.3: Contour maps with colour layer using ggplot(): a) with automatic colours, b) in shades of grey with second layer of contour map
# Fig.3.4a
pov$unemployment_rate<-data23$unemployment_rate # transfer the variable
# define the color palette
library(RColorBrewer) # for brewer.pal()
maxy<-25 # maximum value of a variable for a colour range
breaks<-c(0, 5, 10, 15, 20, 25)
shades<-brewer.pal(8, "Reds") # it assumes 8 shades of red colours
fillRed<-colorRampPalette(shades) # it makes a function
colcode<-fillRed(maxy)[round(data23$ unemployment_rate) + 1]
colcode # vector of continous shades for a given variable
ggplot() + geom_sf(data=pov, aes(fill=data23$unemployment_rate), color=NA) + geom_sf(data=voi, color="gray60", fill=NA, size=1) + scale_fill_gradientn(colours=fillRed(maxy), breaks=breaks, labels=breaks, name="Unemployment Rate") + theme_void() + labs(title="Unemployment rate in 2023") + theme(legend.position="right", legend.title=element_text( size=10), legend.text=element_text(size=8), plot.title=element_text( hjust=0.5), plot.subtitle=element_text(hjust=0.5)) + annotation_north_arrow(location="bl", which_north="true", style= north_arrow_fancy_orienteering)
# the same graphics with plot()
library(shape) # for colorlegend()
library(maps) # for map.scale()
library(sp) # for compassRose()
plot(st_geometry(pov), col=colcode, lty=0, border="gray")
plot(st_geometry(voi), add=TRUE, lwd=1, border="gray60")
# add-ins to map
map.scale(x=17.5, y=49.17, ratio=FALSE, relwidth=0.15, metric=TRUE, cex=0.65) # with maps::
compassRose(15, 49.7,rot=0,cex=1) # with sp::
colorlegend(posy=c(0.05,0.9), posx=c(0.9,0.92), col=fillRed(maxy), zlim=c(0, maxy), zval=breaks, main.cex=0.9) # with shape::
title(main="Unemployment rate in 2023")
library(viridis) # for scale_fill_viridis()
ggplot() + geom_sf(data=pov, aes(fill=unemployment_rate), color=NA) +
scale_fill_viridis(option="A", direction=-1, breaks=breaks, labels=breaks, name="Unemployment Rate") + theme_minimal() # Fig.3.4bFigure 3.4: Contour maps with colour layers created with
different functions: a) with colorRampPalette(), b) with
viridis()
# Fig.3.5a
brks<-c(0, 25, 50, 75, 100, 125, 150, 175) # intervals related to data values
cols<-brewer.pal(8, "Greys") # shades of grey from RColorBrewer::
par(mar=c(5,5,5,5)) # bigger margins, to accommodate subtitle
plot(st_geometry(pov), col=cols[findInterval(pov$dist, brks)], border="transparent") # no borders of regions due to ‘transparent’ colour
plot(st_geometry(voi), border="grey60", add=TRUE)
title(main="Distance from poviats to core regional cities", sub="Euclidean distance in km calculated between regional centroids")
legend("bottomleft", legend=paste("from ",brks[1:7], "to ", brks[2:8]), pt.bg=cols, cex=0.8, bty="n", pch=21)Figure 3.5: Visualisations using plot(): a) colours assigned with findInterval(), b) colours assigned with findColours() from classInt::
library(classInt) # for classIntervals()
summary(pov$unemployment_rate)
intervals<-5
colors<-brewer.pal(intervals, "BuPu") # from blue (Bu) to purple (Pu)
classes<-classIntervals(pov$unemployment_rate, intervals, style="fixed",
fixedBreaks=c(0, 5, 10, 15, 20, 25))
color.table<-findColours(classes, colors)
plot(pov$geometry, col=color.table) # Fig.3.5b
plot(voi$geometry, lwd=2, add=TRUE)
legend("bottomleft", legend=names(attr(color.table, "table")),
fill=attr(color.table, "palette"), cex=0.8, bty="n")
title(main="Unemployment rate in poviats in 2023")
# Fig.3.6a
library(scales) # for show_col()
# prepare colours and density
variable<-unique(data$region_number[data$year==2023])
n<-16 # number of regions
brks<-(0:n)*2 # IDs of regions as in the database
dens<-round(seq.int(from=1, to=48, length.out=n), 0) # number of stripes
voi$fill_col<-gray.colors(n, start=0.9, end=0.3) # shades of grey
scales::show_col(voi$fill_col) # plot colours as coloured matrix
voi$fill_dens<-dens[findInterval(variable, brks, all.inside=TRUE)]
# plot shaded polygons based on border points coordinates
plot(st_geometry(voi), col=NA, border=NA, main="Shaded density map")
for(i in seq_len(nrow(voi))) {
geom<-st_geometry(voi)[[i]]
if(st_geometry_type(geom)=="MULTIPOLYGON") {
sub_polys<-geom[[1]]
} else {
sub_polys<-list(geom)
}
for(poly in sub_polys) {
x<-poly[,1]
y<-poly[,2]
polygon(x, y, col=voi$fill_col[i], border="grey80", density=voi$fill_dens[i], angle=45)
}
}
plot(voi$geometry, add=TRUE, border="grey70")
# Fig.3.6b – automated graphics with plot()
plot(pov["dist"], key.pos=4, main="Distance to core city")Figure 3.6: Regional maps: a) with shaded layer, b) with
automated division into intervals and selection of colours
library(GISTools)
library(RColorBrewer)
data23<-data[data$year==2023, ] # select one year for the analysis
pov$dist<-data23$dist # transfer variable between objects
pov$unemp_rate23<-data23$unemployment_rate
# Fig.3.7a - with default colours and number of intervals
choropleth(pov, "dist") # from the GISTools:: package
shades<-auto.shading(pov[["dist"]]) # default number of intervals
choro.legend(15, 50, shades, cex=0.65, bty="n")
# Fig.3.7b - with own colours and number of intervals
shades<-auto.shading(pov[["dist"]], n=6, cols=brewer.pal(6, "Purples"))
choropleth(pov, "dist", shading=shades)
choro.legend(14.2, 50.5, shades, cex=0.8, bty="n", title='Distance in km')
# not shown - division into intervals by mean and std.dev,
shades<-auto.shading(pov[["dist"]], n=6, cols=add.alpha(brewer.pal(6, "Greens"), 0.5), cutter=sdCuts)
choropleth(pov, "dist", shading=shades)
choro.legend(14.2, 50.2, shades, under="below", over="above", between="to", cex=0.6, bty="n")Figure 3.7: Regional maps with GISTools:: package: a) with
default setting, b) with user setting
# Fig.3.8a - basic figure with plot() and points()
# plot based on data in data.frame class
par(bg="white") # colour of background
par(mar=c(1,1,1,1)) # almost no margins for bigger map
plot(st_geometry(region.voi)) # boundary of the region
points(points[,c("crds.x", "crds.y")], pch=".") # layer of points (dots)
# the same plot but based on data in sf class
points.sf<-st_as_sf(points, coords=c("crds.x", "crds.y"), crs=4326)
plot(st_geometry(region.voi)) # boundary of the region
points(st_geometry(points.sf), pch=".") # layer of points (dots)
# Fig.3.8b – more sophisticated plot with a grey background
par(bg="grey95") # grey colour of background
par(mar=c(2,2,2,1)) # moderate size of margins
plot(st_geometry(region.voi), border="grey70", col="grey80")
title(main="Location of firms (10%) in lubelskie NUTS2 region")
points(points[,c("crds.x", "crds.y")], pch=21, col="white", bg="cornsilk", cex=0.1) # points in light colour, plotted as small circles
#sf points in light colour, plotted as small circles
#points(st_geometry(points.sf), pch=21, col="white", bg="cornsilk", cex=0.1)
# layer including location of cities in this region
cities<-read.csv("cities in lubelskie region.csv", sep=";", dec=",", header=TRUE)
cities$label<-paste(cities$position, cities$city, sep=".")
cities$rel.pop<-cities$population/mean(cities$population)
cities$xx<-as.numeric(as.character(cities$xx)) # make variable numeric
cities$yy<-as.numeric(as.character(cities$yy))
cities
points(cities[,1:2], pch=21, bg="black", cex=0.8) # points for cities
text(cities[,1:2], cities$label, font=2) # city name and ranking position
legend("bottomleft", legend=paste0(cities$label,", popul.", cities$population), pt.cex=cities$rel.pop, bty="n", pch=21, cex=0.7)
abline(h=c(50.5, 51, 51.5, 52), col="white", lwd=1, lty=3) # graticule
abline(v=c(21.5, 22, 22.5, 23, 23.5, 24), col="white", lwd=1, lty=3)
degAxis(1) # x-axis
degAxis(2) # y-axis
par(bg="white") # white background for another plotsFigure 3.8: Point data within regional contour: a) basic
visualisation, b) coloured visualisation with additional layers
# Fig.3.8a – the same basic plot as before but with ggplot() and data.frame
library(ggplot2)
# points in data.frame class
ggplot() + geom_sf(data=region.voi, fill=NA, color="black", linewidth=0.3) + geom_point(data=points, aes(x=crds.x, y=crds.y), shape=46, size=0.5) + coord_sf(expand=FALSE) + theme_void() + theme(panel.background= element_rect(fill="white", color=NA), plot.background =element_rect(fill= "white", color=NA), plot.margin=margin(4, 4, 4, 4))
# the same plot of points – from sf class
ggplot() + geom_sf(data=region.voi, fill=NA, color="black", linewidth=0.3) + geom_sf(data=points.sf, shape=46, size=0.5) + coord_sf(expand=FALSE) + theme_void() + theme(panel.background= element_rect(fill="white", color=NA), plot.background =element_rect(fill= "white", color=NA), plot.margin=margin(4, 4, 4, 4))# Fig.3.8b – the same sophisticated plot in ggplot()
x_breaks<-c(21.5, 22, 22.5, 23, 23.5, 24) # "graticule" breaks
y_breaks<-c(50.5, 51, 51.5, 52)
ggplot() + # start the ggplot, then each line comes with ‘+’
# region boundary
geom_sf(data=region.voi, fill="grey80", color="grey70", linewidth=0.4) +
# dashed white graticule lines (like abline())
geom_vline(xintercept=x_breaks, linetype=3, linewidth=0.4, colour="white") + geom_hline(yintercept=y_breaks, linetype=3, linewidth=0.4, colour="white") +
# point data as light circles – for data.frame class point data
geom_point(data=points, aes(x=crds.x, y=crds.y), shape=21, size=0.6, stroke=0.2, colour="white", fill="cornsilk") +
# point data as light circles – for sf class point data
# geom_sf(data=points.sf, shape=21, size=0.6, stroke=0.2, colour="white", fill="cornsilk") +
# cities as black circles with labels
geom_point(data=cities, aes(x=xx, y=yy, size=rel.pop), shape=21, fill="black", colour="black", show.legend=TRUE) +
geom_text(data=cities, aes(x=xx, y=yy, label=label), fontface="bold", nudge_y=0.04, size=3) +
# axes
coord_sf(expand=FALSE) +
scale_x_continuous(breaks=x_breaks, labels=function(x) paste0(x, "°E")) + scale_y_continuous(breaks=y_breaks, labels=function(y) paste0(y, "°N")) +
# legend for city sizes (relative population)
scale_size(name="City population\n(relative to mean)", range=c(2, 6), guide= guide_legend(override.aes=list(shape=21, fill="black", colour="black"))) +
# title and theming (grey background)
labs(title="Location of firms (10%) in lubelskie NUTS2 region") + theme_void(base_size=11) + theme(plot.title=element_text(hjust=0.5), plot.background = element_rect(fill="grey90", colour=NA), panel.background= element_rect(fill="grey90", colour=NA), plot.margin=margin(8, 8, 8, 4), legend.position=c(0.03, 0.15), legend.justification=c(0, 0))
# end of the ggplot() function# xy coordinates in a matrix class
centroids<-st_centroid(st_make_valid(pov))
crds<-st_coordinates(centroids)
head(crds) # head() is for 6 lines, only 3 are presented
# Fig.3.9a – using plot()
plot(st_geometry(pov)) # plot the contour
plot(st_geometry(centroids), pch=21, bg='red', add=TRUE)
# the same with ggplot()
ggplot() + geom_sf(data=pov, fill="grey85", color="black", linewidth=0.3) + geom_sf(data=centroids), shape=21, fill="red", colour="black") + theme_minimal()
crds.voi<-st_coordinates(st_centroid(st_make_valid(voi)))
labels<-data.frame(X=crds.voi[,1], Y=crds.voi[,2], Name=as.character(voi$ JPT_NAZWA_))
labels # just the 3 first lines
# Fig.3.9b – using ggplot()
ggplot() + geom_sf(data=voi, fill="white", color="black") + geom_text(data= labels, aes(x=X, y=Y, label=Name), size=3, hjust=0.5, vjust=-0.5, fontface="bold") + theme_void()
# the same (Fig.3.9b) with plot()
plot(st_geometry(voi), border="grey60")
library(car) # for plotting labels without overlaps
pointLabel(labels[,c("X", "Y")], labels$Name, cex=0.8) Figure 3.9: Centroids of regions: a) as point data, b) as
locators for labels
# interactive edition of regions’ names
plot(st_geometry(voi))
identify(labels[,c("X", "Y")], labels=labels$Name)# ggplot() with text in boxes
library(ggrepel) # for geom_label_repel()
ggplot() + geom_sf(data=voi, aes(fill=as.numeric(JPT_KOD_JE))) + geom_point(aes(x=X, y=Y, stroke=2), colour='grey80', data=labels, size=0.5) + geom_label_repel(aes(x=X, y=Y, label=Name), data=labels, family='Times', size=2, box.padding=0.2, point.padding=0.3, segment.color='grey50') + theme(legend.position="none")# Fig.3.10a
# Create own data
x<-c(5,6,7,8,8,7,6,5,4,3,2,2,3,4,5) # x coordinates of points
y<-c(2,3,4,5,6,7,7,6,7,7,6,5,4,3,2) # y coordinates of points
points_df<-data.frame(ID=1:length(x), x=x, y=y) # integrated data.frame
points_sf<-st_as_sf(points_df, coords=c("x", "y"), crs=4326) # sf class
line_sf<-st_as_sf(data.frame(geometry=st_sfc(st_linestring( as.matrix(points_df[, c("x", "y")])))), crs=4326) # lines between points
bbox<-st_bbox(points_sf) # bounding box of dataset
x_breaks<-seq(floor(bbox$xmin), ceiling(bbox$xmax), by=1)
y_breaks<-seq(floor(bbox$ymin), ceiling(bbox$ymax), by=1)
# option panel.grid=element_blank() removes all gridlines
ggplot() + geom_sf(data=points_sf, color="black", shape=3, size=3) + coord_sf(xlim=c(bbox$xmin, bbox$xmax), ylim=c(bbox$ymin, bbox$ymax), expand=FALSE) + theme_minimal() + theme(panel.grid=element_blank(), panel.background=element_rect(fill="white", color=NA)) + labs(x="Longitude", y="Latitude")
# Fig.3.10b
ggplot() + geom_sf(data=line_sf, color="black", linewidth=1.2) + geom_sf(data=points_sf, color="black", shape=3, size=3) + coord_sf(xlim= c(bbox$xmin, bbox$xmax), ylim=c(bbox$ymin, bbox$ymax), expand=FALSE) + scale_x_continuous(breaks=x_breaks) + scale_y_continuous(breaks=y_breaks) + theme_minimal() + labs(x="Longitude", y="Latitude")Figure 3.10: Generating point data and merging the points with line
# Fig.3.11a - typical point plot in ggplot() – points and contour in sf
ggplot() + geom_sf(data=points.sf, aes(col=poviat)) + geom_sf(data=region.pov, fill=NA)
# Fig.3.11b
ggplot(points.sf) + geom_sf(aes(color=as.factor(SEC_PKD7)), size=1) + scale_color_viridis_d(option="turbo", name="Industry Code") + theme_minimal() + theme(legend.position="right", axis.text= element_text(size=10), legend.text=element_text(size=9), plot.margin=margin(2, 2, 2, 2))Figure 3.11: Categorial data in colours with ggplot(): a)
points in colours by poviats, b) points in colours by sectors
# Fig.3.12a – simple plot of numeric variable
ggplot()+ geom_sf(data=points.sf, aes(col=subreg))
# Fig.3.12b – multiplot by other variable
ggplot() + geom_point(data=points, alpha=0.5, aes(x=crds.x, y=crds.y, color=roa)) + scale_colour_gradient(low="yellow", high="red") + facet_wrap(~SEC_agg) + ggtitle("Roa of firms by sectors") + labs(fill="ROA", x="", y="") + geom_sf(data=region.voi, fill=NA)Figure 3.12: Numeric data in colours with ggplot(): a) points in colours by subregions, b) points in colours by profitability, subfigures by sectors
# random observations – subset of geo-located points
nn<-5000 # sample size
selector<-sample(size=nn, 1:dim(points)[1], replace=FALSE) # random IDs
points.lim<-points[selector, c("crds.x", "crds.y", "SEC_PKD7", "SEC_agg")]
points.lim.sf<-st_as_sf(points.lim, coords=c("crds.x", "crds.y"), crs=4326)
# coordinates of regional cities
city.biala<-c(23.0682406, 52.0291252)
city.zamosc<-c(23.2134931, 50.7213773)
city.chelm<-c(23.4196661, 51.1355623)
city.lublin<-c(22.4236853, 51.2180254)
# calculating the distances – from all firms to centre of city
# st_distance calculates distances in pairs of observations from two objects
# city location was multiplied
# distance to Biala Podlaska
city.biala.df<-data.frame(ID=1:nn, x=rep(city.biala[1], times=nn), y=rep(city.biala[2], times=nn))
city.biala.sf<-st_as_sf(city.biala.df, coords=c("x", "y"), crs=4326) # in sf
dist.to.biala<-st_distance(points.lim.sf, city.biala.sf, by_element=TRUE) # one gets raw result in meters, from sf:
points.lim$dist.biala<-as.numeric(dist.to.biala)/1000 # convert to km
# distance to Zamość
city.zamosc.df<-data.frame(ID=1:nn, x=rep(city.zamosc[1], times=nn), y=rep(city.zamosc[2], times=nn))
city.zamosc.sf<-st_as_sf(city.zamosc.df, coords=c("x", "y"), crs=4326)
dist.to.zamosc<-st_distance(points.lim.sf, city.zamosc.sf, by_element=TRUE)
points.lim$dist.zamosc<-as.numeric(dist.to.zamosc)/1000
# distance to Chelm
city.chelm.df<-data.frame(ID=1:nn, x=rep(city.chelm[1], times=nn), y=rep(city.chelm[2], times=nn))
city.chelm.sf<-st_as_sf(city.chelm.df, coords=c("x", "y"), crs=4326)
dist.to.chelm<-st_distance(points.lim.sf, city.chelm.sf, by_element=TRUE)
points.lim$dist.chelm<-as.numeric(dist.to.chelm)/1000
# distance to Lublin
city.lublin.df<-data.frame(ID=1:nn, x=rep(city.lublin[1], times=nn), y=rep(city.lublin[2], times=nn))
city.lublin.sf<-st_as_sf(city.lublin.df, coords=c("x", "y"), crs=4326)
dist.to.lublin<-st_distance(points.lim.sf, city.lublin.sf, by_element=TRUE)
points.lim$dist.lublin<-as.numeric(dist.to.lublin)/1000
# minimum distance (out of four distances)
points.lim$min.dist<-apply(as.matrix(points.lim[,c("dist.biala", "dist.zamosc", "dist.chelm", "dist.lublin")]), 1, FUN=min)
head(points.lim) # head() is for the first 6 lines, only 3 presented
# Fig.3.13a - blue plot
ggplot() + geom_point(data=points.lim, alpha=0.5, aes(x=crds.x, y=crds.y, color=min.dist)) + geom_sf(data=region.voi, fill=NA)
# Fig.3.13b - green plot
cols<-RColorBrewer::brewer.pal(4,'Paired')
ggplot() + geom_point(data=points.lim, alpha=0.5, aes(x=crds.x, y=crds.y, color=min.dist)) + scale_colour_gradient(low=cols[1], high=cols[4]) + ggtitle("Distance from points to the clostest city") Figure 3.13: Distances in colours with ggplot(): a) basic
colours, b) own colours
# Fig.3.14a - map of points density with isolines
ggplot() + geom_sf(data=region.voi) + stat_density2d(aes(x=crds.x, y=crds.y, fill=after_stat(level), alpha=0.25), linewidth=0.01, bins=30, data=points, geom="polygon") + geom_density2d(data=points, aes(x=crds.x, y=crds.y), size=0.3)
# Fig.3.14b - map of points density with isolines and labels of cities
# cities # see Fig.3.8 for structure of object
library(ggrepel) # for geom_label_repel()
ggplot() + geom_sf(data=region.voi, fill=NA, color="grey60", linewidth=0.2) + stat_density_2d(data=points, aes(x=crds.x, y=crds.y, fill=after_stat(level)), geom="polygon", bins=30, colour=NA, alpha=0.55) + scale_fill_viridis_c(name="Point density", option="C") + geom_point(data=cities, aes(x=xx, y=yy), shape=21, fill="grey85", color="grey40", size=1.6, stroke=0.5) + geom_label_repel(data=cities, aes(x=xx, y=yy, label=label), size=2.5, box.padding=0.2, point.padding=0.3, segment.color="grey50", fill="white", label.size=0.2, max.overlaps=Inf) + coord_sf(expand=FALSE) + theme_minimal(base_size=11) + theme(panel.grid.major=element_line(linewidth=0.2, colour="grey90"), axis.title=element_blank())Figure 3.14: Density plots with ggplot(): a) density with
isolines, b) lables of cities added
# Fig.3.15 - automated solution with plot()
# for categorical variable – Fig.3.15a
plot(points.sf["subreg"], pch=16, main="Categorical variable ‘subreg’ in colours")
# for continuous variable – Fig.3.15b
plot(points.sf["roa"], pch=16, main="Continous variable ‘roa’ in colours")Figure 3.15: Coloured points with plot() and sf class object:
a) for a categorical variable, b) for a numeric variable
# Fig.3.16a - plot of points in colours with findInterval()
var<-points$subreg # levels of variable to be coloured
locs<-points[, c("crds.x", "crds.y")] # xy locations
brks<-c(1, 2, 3, 4) # lower bins of intervals
cols<-c("blue3", "cornflowerblue", "seagreen1", "green2") # colours
plot(st_geometry(region.voi)) # contour of region
points(locs, col=cols[findInterval(var, brks)], pch=16, cex=0.7)
legend("bottomleft", legend=brks, fill=cols, cex=0.8, bty="n")
title(main="Points - colors by values") # title of figure
# Fig.3.16b – coloured points representing regional statistics
var<-data$salary_average[data$year==2023]
summary(var)
brks<-c(5000, 5500, 6000, 6500, 7000, 9000, 11000, 13000)
crds<-st_coordinates(st_centroid(st_make_valid(pov)))
size<-(brks/100)*1.2
library(RColorBrewer)
cols<-brewer.pal(9, "Reds") # from RColorBrewer::, max 9 colours
cols
plot(pov$geometry, border="grey90")
plot(voi$geometry, border="grey50", add=TRUE)
points(crds, col=cols[findInterval(var, brks)], pch=16, cex=size[findInterval(var, brks)]/40)
legend("bottomleft", legend=brks, pt.bg=cols, pt.cex=size/40, bty="n", pch=21)
title(main="Average salary in Poland in year 2023")
savePlot(filename="Regional salary as points", type="jpg")Figure 3.16: Regional map with plot() for points in colour: a) point data coloured by subregion, b) centroids of regions coloured and sized by regional average salary
# Fig.3.17a
locs<-points.lim[ ,c("crds.x", "crds.y")]
var<-points.lim$min.dist
summary(var)
brks<-c(0, 10, 20, 30, 40, 81)
cols<-c("red", "blue3", "cornflowerblue", "seagreen1", "green2")
plot(st_geometry(region.voi)) # regional contour
points(locs, col=cols[findInterval(var, brks)], pch=16, cex=0.7) # points
legend("bottomleft", legend=paste("from", brks[1:5], "to", brks[2:6], "km"), fill=cols, cex=0.8, bty="n") # legend
title(main="Points - colors by distance") # title
# Fig.3.17b – dataset in sp class and plot with spplot()
library(sp) # to create SpatialPointsDataFrame class
points.lim.sp<-points.lim # copy of the object
coordinates(points.lim.sp)<-c("crds.x", "crds.y") # class changes to sp
proj4string(points.lim.sp)<-CRS("+proj=longlat +datum=WGS84") #projection
class(points.lim.sp)
spplot(points.lim.sp["min.dist"]) # plot for sp classFigure 3.17: Points coloured by distance: a) plotted using plot() and findInterval() from data.frame class, b) plottted using spplot() from sp class
# Fig.3.18 – statistic and interacvtive maps with tmap::
pov$dist<-data$dist[data$year==2023] # add variable to a contour map
library(tmap)
tmap_mode("view") # "view" for interactive plots and "plot" for static maps
tm_shape(pov) + tm_fill("dist", palette=sf::sf.colors(5))
ttm() # informs about the active mode, "view" or "plot"
tmap_last() # plots the statistic map in R deviceFigure 3.18: Static (a) and interactive (b) maps with tmap::
for a distance to from counties to core regional city
# Fig.3.19
library(mapview)
mapview(pov["dist"], col.regions=sf.colors(10)) # for regional data
mapview(points.sf, zcol="SEC_PKD7") # for point data, Fig.3.19bFigure 3.19 Interactive plotting with mapview:: a) for
regional data, b) for point data
# Fig.3.20a
# cut a contour map from a bigger map and add colouring variable
region.pov$unemp<-data$unemployment_rate[data$region_name=="lubelskie" & data$year==2023]
mapview(region.voi, zcol=NULL, alpha.regions=0.1) + # regional contours
mapview(region.pov, zcol="unemp", alpha.regions=0.3) + # coloured areas mapview(points.sf[points.sf$SEC_PKD7=="M",], zcol="roa", cex="empl_av_size") # point data
#location of a few big cities – data.frame object
cities<-data.frame(city=0, x=0, y=0) # empty object
cities[1,]<-c("Warszawa", 20.99909, 52.2330269) #location of Warszawa
cities[2,]<-c("Kraków", 19.8647884, 50.0469018) # location of Kraków
cities[3,]<-c("Gdańsk", 18.5499432, 54.3612063) # location of Gdańsk
cities[4,]<-c("Poznań", 16.9016659, 52.3997116) # location of Poznań
# check if data are numeric, if not convert to numeric
summary(cities)
cities$x<-as.numeric(as.character(cities$x))
cities$y<-as.numeric(as.character(cities$y))
summary(cities)
# change class – from data.frame to sf
cities.sf<-st_as_sf(cities, coords=c("x", "y"), crs=4326)
# geometries – circles with r=100 km from city, 1 km is dist=1000
circle.waw<-st_buffer(cities.sf[1,], dist=100000)
circle.kra<-st_buffer(cities.sf[2,], dist=100000)
circle.gda<-st_buffer(cities.sf[3,], dist=100000)
circle.poz<-st_buffer(cities.sf[4,], dist=100000)
# make projections the same – NAD27
pov<-st_transform(pov, 4267) # 4326=WGS84, 4267=NAD27
circle.waw<-st_transform(circle.waw, 4267) #
circle.kra<-st_transform(circle.kra, 4267) #
circle.gda<-st_transform(circle.gda, 4267) #
circle.poz<-st_transform(circle.poz, 4267) #
# cut poviats by circles
pov.circle.waw<-st_intersection(st_make_valid(pov), circle.waw)
pov.circle.kra<-st_intersection(st_make_valid(pov), circle.kra)
pov.circle.gda<-st_intersection(st_make_valid(pov), circle.gda)
pov.circle.poz<-st_intersection(st_make_valid(pov), circle.poz)
# graphics in R window – to check circular geometry with coloured regions
plot(st_geometry(pov.circle.waw)) # plotting a contour
ggplot() + geom_sf(data=pov.circle.waw, aes(fill=dist))
#interactive multilayer maps in browser
mapview(cities.sf, legend=FALSE) + # cities as points
mapview(pov.circle.waw, zcol="dist", legend=FALSE) + # circle for Warsaw
mapview(pov.circle.kra, zcol="dist", legend=FALSE) + # circle for Kraków
mapview(pov.circle.gda, zcol="dist", legend=FALSE) + # circle for Gdańsk
mapview(pov.circle.poz, zcol="dist", legend=FALSE) + # circle for Poznań
mapview(pov, zcol="dist", legend=FALSE, alpha=0.1) # all regions colouredFigure 3.20: Multilayer plots using mapview(): a) regional
and point data, b) regional data highlighted in radius
# Fig.3.21
library(osmdata)
# get data from OSM: roads, rivers and rails
getbb("Lublin") %>% # get bounding box for a given place name
opq() %>% # build an overpass query
add_osm_feature( # add a feature to an overpass query
key="highway", value=c("motorway", "primary", "motorway_link", "primary_link")) # specify which data to import
%>% osmdata_sf() -> big_streets # save main roads as sf object
getbb("Lublin") %>% opq() %>% add_osm_feature(key="highway", value=c("secondary", "tertiary", "secondary_link", "tertiary_link")) %>% osmdata_sf() -> med_streets # medium streets saved as sf object
getbb("Lublin") %>% opq() %>% add_osm_feature(key="waterway", value="river") %>% osmdata_sf() -> river # save rivers as sf object
getbb("Lublin") %>% opq() %>% add_osm_feature(key="railway", value="rail") %>% osmdata_sf()->railway # save railways as sf object
sub<-points[points$poviat=="powiat Lublin",] # subset for the city
sub %>% # use subset only, in data.frame class
ggplot(mapping=aes(x=crds.x, y=crds.y, color=SEC_PKD7)) +
geom_point() +
geom_sf(data=river$osm_lines, inherit.aes=FALSE, color="deepskyblue", size=0.8, alpha=0.3) +
geom_sf(data=railway$osm_lines, inherit.aes=FALSE, color="#ffbe7f", size=0.2, linetype="dotdash", alpha=0.5) +
geom_sf(data=med_streets$osm_lines, inherit.aes=FALSE, color="#ffbe7f", size=0.3, alpha=0.5) +
geom_sf(data=big_streets$osm_lines, inherit.aes=FALSE, color="#ffbe7f", size=0.5, alpha=0.6) +
coord_sf(xlim=c(22.45, 22.7), ylim=c(51.15, 51.3)) +
labs(title="LUBLIN CITY, POLAND", subtitle="business locations", color="Sectors of firms")Figure 3.21: Point data plotted together with OpenStreetMap data
# variable for analysis
pov$dist<-data$dist[data$year==2023]
# solution with mapview::
library(mapview)
mapview(pov, zcol="dist") # auto colors + legend + popups
# solution with leaflet::
library(leaflet)
pal<-colorNumeric("viridis", domain=pov$dist, na.color=NA)
leaflet(pov) |>
addProviderTiles(providers$CartoDB.Positron) |>
addPolygons(
fillColor=~pal(dist), color="#444444", weight=1,
fillOpacity=0.8, popup=~paste0("<b>dist:</b> ", dist)) |>
addLegend("bottomright", pal=pal, values=~dist, title="dist")
# solution with mapview::, leaflet:: and shiny::
library(sf)
library(shiny)
library(leaflet)
library(mapview)
ui<-fluidPage(leafletOutput("m", height=600))
server<-function(input, output, session) {
output$m<-renderLeaflet({
mapview(pov, zcol="dist")@map # grab the underlying leaflet widget
})}
shinyApp(ui, server)
# Fig.3.22
pov$dist<-data$dist[data$year==2023]
library(leaflet)
library(viridisLite) # for nice, perceptually uniform color palettes
breaks<-pretty(pov$dist, n=6) # breaks at pretty numbers (0,50,100,150,200)
# color palette (reversed) - from leaflet::
pal<-colorNumeric( # function to fit the colours to data
palette=viridis(length(breaks), option="A", direction=-1),
domain=range(pov$dist, na.rm=TRUE))
# optional: a label to show on hover (adjust 'region_name' to own field)
hover_lab<-sprintf("<b>%s</b><br/>Distance: %s",
if ("region_name" %in% names(pov)) pov$region_name else "Region",
format(pov$dist, trim=TRUE, digits=3)) %>% lapply(htmltools::HTML)
leaflet(pov, options=leafletOptions(minZoom=2)) |>
addProviderTiles(providers$CartoDB.Positron) |>
addPolygons(fillColor=~pal(dist), weight=0.5, color="#FFFFFF", opacity=1,
fillOpacity=0.9, smoothFactor=0.2, label=hover_lab, popup=~paste0("<b>",
if ("region_name" %in% names(pov)) region_name else "Region",
"</b><br/>", "Distance: ", dist), highlightOptions=highlightOptions(
weight=2, color="#444444", fillOpacity=0.95, bringToFront=TRUE)) |>
addLegend(pal=pal, values=~dist, title="Distance to regional core city",
labFormat=labelFormat(digits=2), position="bottomright", opacity=0.9) |>
addScaleBar(position="bottomleft", options=scaleBarOptions(imperial=
FALSE))Figure 3.22: Interactive graphics with leaflet::
n<-6
vec<-c("red", "green", "blue", "yellow", "pink", "brown")
image(1:n, 1, as.matrix(1:n), col=vec, xlab="…") # general code
library(terra)
r<-rast(nrows=2, ncols=3, xmin=0, xmax=3, ymin=0, ymax=2)
values(r)<-1:ncell(r)
plot(r, col=vec, axes=FALSE, main="Coloured raster (3 columns and 2 rows)")
library(scales)
show_col(vec)
library(colorspace)
swatchplot("vec"=vec, "rev.vec"=rev(vec))
library(ggplot2)
df<-data.frame(x=1:n, y=1, shade=vec)
ggplot(df, aes(x=factor(x), y=y, fill=shade)) + geom_tile(width=1, height=1) + scale_fill_identity() + theme_void()
colors()[1:21] # the first few color names
rgb(runif(5,0,1), runif(5,0,1), runif(5,0,1))
rgb(sample(0:255, 5), sample(0:255, 5), sample(0:255, 5), maxColorValue=255)
hsv(runif(5,0,1), runif(5,0,1), runif(5,0,1))
adjustcolor("#0F58D6", alpha.f=0.5)
col2rgb(c("azure", "azure1", "azure2"), alpha=FALSE)
col2rgb(c("#4B424F", "#BFD15C", "#A44845"), alpha=FALSE)
# current palette to be tested – to find names for listed shades
pal<-palette() # 8 basic colours that can be refereed as 1,2,3, in col option
pal_hex<-as.data.frame(t(grDevices::col2rgb(pal))) # colours in rgb notation
pal_hex # RGB values of eight colours
# all named colours with hex / rgb components
all_named<-grDevices::colors() # 657 named colours
all_hex<-as.data.frame(t(grDevices::col2rgb(all_named)))
all_hex<-cbind(all_hex, all_named)
head(all_hex) # RGB values and name of colour
exact_match<-merge(pal_hex, all_hex, by=c("red", "green", "blue"), all.x=TRUE)
exact_match
library(nabor) # for knn()
merged<-knn(as.matrix(all_hex[,1:3]), as.matrix(pal_hex[,1:3]), k=2)
full_match<-data.frame(nn1_id=merged$nn.idx[,1], nn2_id=merged$nn.idx[,2], nn1_dist=merged$nn.dists[,1], nn2_dist=merged$nn.dists[,2],
nn1_col=all_named[merged$nn.idx[,1]], nn2_col=all_named[merged$nn.idx[,2]])
full_match
rc<-rainbow(10)
library(RColorBrewer)
display.brewer.all() # all pallets from the package, Fig.3.23a
display.brewer.pal(11,'Spectral') # displaying the palette, Fig.3.23bFigure 3.23: RColorBrewer palettes: a) all colours together,
b) divergent Spetral colours
brewer.pal(n=9, name="OrRd") # create sequential colours
display.brewer.pal(9,'OrRd') # display the sequential palette, Fig.3.24a
brewer.pal(4,'Paired') # create qualitative colours
display.brewer.pal(4,'Paired') # show the qualitative palette, Fig.3.24bFigure 3.24: RColorBrewer palettes: a) sequential OrRd, b)
qualitative Paired
library(viridis)
library(scales)
viridis.map
col1<-viridis(16, option="D") # the default viridis colors
show_col(col1) # Fig.3.25a
col2<-viridis(16, option="B") # inferno palette
show_col(col2) # Fig.3.25bFigure 3.25: Viridis palettes: a) default viridis, b) inferno
library(ggplot2)
# coloured hexagons circled, Fig.3.26a
ggplot(data.frame(x=rnorm(10000),y=rnorm(10000)), aes(x=x, y=y)) + geom_hex() + coord_fixed() + scale_fill_viridis() + theme_bw()
# creating a data set in the data.frame class
data<-data.frame(x=rnorm(1000), y=rnorm(1000), z=sample(LETTERS[1:5], 1000, replace=TRUE))
head(data)
# scatterplot in colours, Fig.3.26b
ggplot(data) + aes(x, y, color=z) + geom_point(size=4) + scale_color_viridis_d(option="turbo") + theme_bw() Figure 3.26: Colours from viridis used with ggplot(): a) for
regions with viridis palette, b) for points with turbo palette
library(colorspace)
# sequential palettes
# from deep-blue to light-blue (nice for heatmaps / scales)
seq_blue<-sequential_hcl(7, h=260, c=c(80, 0), l=c(30, 95), power=1.2)
seq_blue
# from teal to light-teal with a subtle hue drift
seq_teal<-sequential_hcl(7, h=c(200,190), c=c(70,0), l=c(28,96), power=1.1)
# use a named predefined palette
hcl_palettes("sequential") # display the available palettes
seq_named<-sequential_hcl(7, palette="Blues 3")
# display as swatchpot the sequential palettes – Fig.3.27a
swatchplot("seq_blue"=seq_blue, "seq_teal"=seq_teal, "seq_named"=seq_named)
# diverging palettes
# from blue to red via alight centre
div_blue.red<-diverging_hcl(11, h=c(260, 0), c=80, l=c(30, 95), power=1.1)
# from green to purple with slightly softer chroma
div_green.purple<-diverging_hcl(11, h=c(130, 300), c=70, l=c(30, 96), power=1.0)
# use a named palette
hcl_palettes("diverging") # display the available palettes
div_named<-diverging_hcl(11, palette="Lisbon")
# display as swatchpot the diverging palettes – Fig.3.27b
swatchplot("div_blue.red"=div_blue.red, "div_named"=div_named, "div_green.purple"=div_green.purple)Figure 3.27: Swatchplots of palettes from colorspace::
package: a) sequential palettes, b) diverging palettes
# qualitative palettes
# balanced, vivid colours
qual_vivid<-qualitative_hcl(8, h=c(0, 360), c=60, l=65)
# soft pastel categories (higher L, lower C)
qual_pastel<-qualitative_hcl(8, h=c(0, 360), c=30, l=85)
# use a named palette
hcl_palettes("qualitative") # display the available palettes
qual_named<-qualitative_hcl(8, palette="Dark 3")
# display as swatchpot the diverging palettes – Fig.3.28a
swatchplot("qual_vivid"=qual_vivid, "qual_pastel"= qual_pastel, "qual_named"=qual_named)
colorspace::choose_palette() # Fig.3.28b
colorspace::hclwizard() # Fig.3.29Figure 3.28: Qualitative palettes with colorspace:: package:
a) qualitative colours, b) interactive interface to choose
palette
Figure 3.29: Interactive interface of hclwizard() function
from the colorspace:: package
# generate 21 divergent colors with quickPlot::divergentColors()
library(quickPlot)
# Fig.3.30a - palette from the command example
col3<-divergentColors("darkred", "darkblue", -10, 10, 0, "white")
col3
col3.n<-length(col3)
image(1:col3.n, 1, as.matrix(1:col3.n), col=col3, xlab="quickPlot::divergentColors() / darkred-darkblue")
# Fig.3.30b - another palette
col4<-divergentColors("chocolate4", "peachpuff4", -10, 10, 0, "bisque")
col4
col4.n<-length(col4)
image(1:col4.n, 1, as.matrix(1:col4.n), col=col4, xlab="quickPlot::divergentColors() / chocolate-peachpuff")Figure 3.30: Divergent palettes with quickPlot:: package: a)
from dark red to dark blue, b) in brown shades
pal<-wesanderson::wes_palette("Zissou1", 5, type="continuous")
pal<-ghibli::ghibli_palette("PonyoLight", n=6)
ochre_palettes # call the palettes object, displayed first two palettes
pal<-ochRe::ochre_palettes$emu_woman_paired[2:10] # select few shades
pal<-PNWColors::pnw_palette("Cascades", 6)
pal<-MetBrewer::met.brewer("Hokusai3", 7)
install.packages("devtools")
devtools::install_github("BlakeRMills/MoMAColors")
library(MoMAColors)
display.all.moma() # Fig.3.31b
pal<-moma.colors("Smith", 3)
library(ggplot2)
library(patchwork)
library(ggsci)
library(wesanderson)
library(ghibli)
library(nord)
library(ochRe)
library(PNWColors)
library(MetBrewer)
# helper: stripe plot
palette_stripes<-function(pal, title="Palette") {
if(is.null(pal) || length(pal)==0) {stop(paste("Empty palette:", title))}
df<-data.frame(x=seq_along(pal), y=1, fill_hex=pal)
ggplot(df, aes(x=factor(x), y=y)) + geom_tile(aes(fill=fill_hex), width=1, height=1) + scale_fill_identity() + theme_void() + theme(plot.title= element_text(hjust=0.5, face="bold", size=12), plot.margin=margin(4, 4, 4, 4)) + ggtitle(title)}
# define palettes
palettes <- list(
"ggsci: NPG" = ggsci::pal_npg("nrc")(8),
"ggsci: AAAS" = ggsci::pal_aaas("default")(8),
"wesanderson: Zissou1" = wesanderson::wes_palette("Zissou1", 5, type="discrete"),
"ghibli: PonyoMedium" = ghibli::ghibli_palette("PonyoMedium", 6, type= "discrete"),
"nord: Aurora" = tryCatch(nord::nord("aurora", 5),
error = function(e) nord::nord_palettes$aurora[1:5]),
"ochRe: Lorikeet" = ochRe::ochre_palettes$lorikeet,
"PNWColors: Cascades" = PNWColors::pnw_palette("Cascades", 6),
"MetBrewer: Hokusai3" = MetBrewer::met.brewer("Hokusai3", 6))
# build gallery
stripe_plots<-lapply(names(palettes), function(nm) {
palette_stripes(palettes[[nm]], nm)})
wrap_plots(stripe_plots, ncol=2) # final visualisation, Fig.3.31aFigure 3.31: Palettes from various packages a) collection of packages plotted with ggplot(), b) collection from MoMA package plotted with internal function
library(jpeg) # for readJPEG()
photo<-readJPEG("wne-building.jpg")
library(imgpalr) # for image_pal()
col1<-image_pal("wne-building.jpg", n=8, type="div", saturation=c(0.75, 1), brightness=c(0.75, 1), plot=TRUE) # Fig.3.32a
col2<-image_pal("wne-building.jpg", n=8, type="div", k=2, saturation=c(0.5, 1), brightness=c(0.25, 1), plot=TRUE) # Fig.3.32bFigure 3.32: Palettes created from image with imgpalr::image_pal()