Welcome to the RPubs for the book ‘Applied Spatial Statistics And Econometrics: Data Analysis in R’ (Routledge, 2nd edition, 2026) (ed.Katarzyna Kopczewska).

This part is for Chapter 4.

To download and read data, please see RPubs with Intro.

This site is a collection of all R codes included in this book. These are just the codes - no comments, no explanations what, why and how. All those details are in the book.

The book was written by Spatial Warsaw team based at the Faculty of Economic Sciences at University of Warsaw. Visit us on LinkedIn.

Chapter 4: Spatial Data with the Web APIs (Mateusz Kopyt)

4.2 Spatial data in vector format - example of the OSM database

library(osmdata)
head(available_features(), 21)
available_tags("building")
available_tags("amenity")

pov<-st_read("powiaty.shp", stringsAsFactors=FALSE)     # import shapefile
Lublin<-pov[pov$JPT_KOD_JE=="0663",]    # select one region, city of Lublin
latlong<-"EPSG:4326" # assign the WGS84 (EPSG:4326) coordinate system code
Lublin<-st_transform(Lublin, crs=latlong)   # transform to WGS84 ("EPSG:4326")
Lublin_box<-st_bbox(Lublin)             # get bounding box from contour map
Lublin_box                  # bounding box – extreme coordinates

Lublin_box2<-getbb("Lublin, Polska") # bounding box online using osmdata::
Lublin_box2

# query in traditional way
query_temp<-opq(Lublin_box)
query<-add_osm_feature(query_temp, key='amenity', value='fuel')

# query using pipe operator
query<-opq(Lublin_box) %>% add_osm_feature(key='amenity', value='fuel')
query

petrol_lub.sf<-osmdata_sf(query)
petrol_lub.sf

plot(st_geometry(Lublin))   # plot data and contour – not shown
plot(st_geometry(petrol_lub.sf$osm_points), add=TRUE) # plot of raw data
 
petrol_lub_clear.sf<-unique_osmdata(petrol_lub.sf) # cleaned data

plot(st_geometry(Lublin))
plot(st_geometry(petrol_lub_clear.sf$osm_points), pch=3, add=TRUE)
plot(st_geometry(petrol_lub_clear.sf$osm_polygons), add=TRUE)
 
petrol_lub_clear_center<-st_centroid(petrol_lub_clear.sf$osm_polygons)
plot(st_geometry(Lublin))   # Fig.4.1a
plot(st_geometry(petrol_lub_clear.sf$osm_points), pch=3, add=TRUE)
plot(st_geometry(petrol_lub_clear_center), pch=3, col="red", add=TRUE)

Lublin_box2.poly<-getbb("Lublin, Polska", format_out='polygon')
head(Lublin_box2.poly[[1]]) # only 2 rows shown

# Fig.4.1b - object limited to the borders of Lublin
petrol_lub_clear2.sf<-trim_osmdata(petrol_lub_clear.sf, Lublin_box2.poly)
plot(st_geometry(Lublin))
plot(st_geometry(petrol_lub_clear2.sf$osm_points), pch=3, add=TRUE)

Figure 4.1: Location of gas stations in Lublin - data from the OpenStreetMap (OSM) database downloaded using the API service: a) cleaned point data with buildings transformed into point data, b) points after removing objects outside of Lublin using the trim_data() function

# petrol stations limited to one brand only (Shell)
query1<-opq(Lublin_box)%>%
          add_osm_feature(key='amenity', value='fuel') %>%
          add_osm_feature(key='name', value='Shell', value_exact=FALSE)
petrol_lub_Shell.sf<-osmdata_sf(query1)

petrol_lub_Shell.sf<-trim_osmdata(petrol_lub_Shell.sf, Lublin_box2.poly)
plot(st_geometry(Lublin))       # figure not shown
plot(st_geometry(petrol_lub_Shell.sf$osm_points), pch=3, add=TRUE)
plot(st_geometry(petrol_lub_Shell.sf$osm_polygons), add=TRUE)

petrol_lub_Shell.sf # the whole structure of the object
petrol_lub_Shell.sf$osm_points # first two observations

4.3 Background raster maps from free sources - an example through API

library(OpenStreetMap)      # for openmap(), openproj()
# define the codes for used geo-reference system
latlong<-"EPSG:4326"        # WGS84 (EPSG:4326) geo-reference system code
osmap<-"EPSG:3857"      # native OSM and Google Maps coordinate system
polish_borders<-"EPSG:2180"     # official coordinate system for Poland
european<-"EPSG:4258"       # European reference system 

region.voi<-voi[voi$JPT_NAZWA_=="lubelskie",] # select Lubelskie region
region.voi.latlong<-st_transform(region.voi, latlong) 
region.voi.osmap<-st_transform(region.voi, osmap)
region.voi.box<-st_bbox(region.voi.latlong) # bounding box of the region
region.voi.box

region.voi.osm.map<-openmap(c(region.voi.box[2], region.voi.box[1]), c(region.voi.box[4], region.voi.box[3]))

# raster geoprojection changed to EPSG:4258 to match the boundary 
region.voi.osm.4258<-openproj(region.voi.osm.map, european)

par(mar=c(0,0,0,0))     # no margins in the figure
# Fig.4.3a – 1st method – in the original layout of the OSM background
# the Lubelskie region boundaries changed to native OSM georeference system
plot(region.voi.osm.map)
plot(st_geometry(region.voi.osmap), add=TRUE)

# Fig.4.3 – 2nd method - in a georeference system consistent with shapefile
plot(region.voi.osm.4258)
plot(st_geometry(region.voi), lwd=2, border="red", add=TRUE)
plot(st_geometry(Lublin), lwd=2, add=TRUE) # add contour of city of Lublin

Figure 4.2: Contour of the Lubelskie Voivodeship with the default OpenStreetMap raster background: a) method I - original system, b) method II - EPSG coordinate system: 4258 according to the boundary vector layer

european<-"EPSG:4258"
region.voi.4258<-st_transform(region.voi, european)
Lublin.4258<-st_transform(Lublin, european)
p<-autoplot(region.voi.osm.4258)

p + geom_sf(data=region.voi.4258, inherit.aes=FALSE, fill=NA, color="red", linewidth=1) + geom_sf(data=Lublin.4258, inherit.aes=FALSE, fill=NA, color="blue", linewidth=0.8) + theme(axis.text=element_text(size=16))

Figure 4.3: Contour of the Lubelskie Voivodeship with the OpenStreetMap raster background created using the autoplot() function

nm<-c("osm","osm-german","esri","esri-topo","esri-physical", "esri-imagery", "esri-natgeo","nps","apple-iphoto")
par(mfrow=c(3,3))       # divide graphics window into 3x3=9 figures
par(mar=c(1,0,1,0)) # narrow margins
for(i in 1:length(nm)){     # for each map style
  map<-openmap(c(50.25181,21.61554),c(52.28784,24.14578),type=nm[i])
  plot(map)
  title(main=nm[i])}

Figure 4.4: Selected raster map styles available for download using the openmap() function from the OpenStreetMap:: package

4.4 Access to non-spatial internet databases and resources via application programming interfaces (APIs) - examples

library(rdbnomics)      # Visit <https://db.nomics.world>.
rdb_prov<-rdb_providers()   # data providers for the DB NOMICS website
rdb_upd<-rdb_last_updates()     # list of the last 100 updates
head(rdb_prov, 3)       # the first three items from the received list

rdb_providers # remember to run without parentheses

rdb_upd[99] # this is dynamic database, results may differ

# download data series from DB NOMICS
milk_PL_dbn<-rdb('Eurostat/APRO_MK_POBTA/A.D2100.PRO.PL') 
class(milk_PL_dbn)
tail(milk_PL_dbn, 3) # last 3 records retrieved

ggplot(milk_PL_dbn, aes(x=period, y=value, color=series_code)) + geom_line(linewidth=1) + theme(legend.position="bottom",  legend.title= element_blank(), axis.text=element_text(size=16), axis.title =element_text(size=16), legend.text=element_text(size=16))
 
milk_EU28_dbn<-rdb(provider_code='Eurostat', dataset_code='APRO_MK_POBTA', dimensions=list(dairyprod="D2100", milkitem="PRO", geo="EU28"))
head(milk_EU28_dbn, 3) # the first 2 observations only as output

# download two datasets (for PL and RO)
milk_PLRO_dbn<-rdb(provider_code='Eurostat', dataset_code='APRO_MK_POBTA', dimensions=list(dairyprod="D2100", milkitem="PRO", geo=c("PL", "RO")))

# the second version for the same
milk_PLRO_dbn<-rdb(c('Eurostat/APRO_MK_POBTA/A.D2100.PRO.PL', 'Eurostat/APRO_MK_POBTA/A.D2100.PRO.RO'))
tail(milk_PL_dbn, 3) # the last 3 records from downloaded dataset

ggplot(milk_PLRO_dbn, aes(x=period , y=value, color=series_code)) + geom_line(linewidth=1) + theme(legend.position="bottom",  legend.title= element_blank())    # Fig.4.6

Figure 4.6: Milk production in Poland and Romania - data gathered from the DB NOMICS WebAPI

milk_ALL_dbn<-rdb(provider_code='Eurostat', dataset_code='APRO_MK_POBTA', mask='A.D2100.PRO.') 
dim(milk_ALL_dbn)

table(milk_ALL_dbn$geo) # number of observations for a given country

milk_AT_dbn<-rdb(api_link = 'https://api.db.nomics.world/v22/series/Eurostat/APRO_MK_POBTA?dimensions=%7B%22dairyprod%22%3A%5B%22D2100%22%5D%2C%22milkitem%22%3A%5B%22PRO%22%5D%2C%22geo%22%3A%5B%22AT%22%5D%7D&facets=1&format=json&limit=1000&observations=1&q=apro_mk_pobta')

library(httr2)      # for request(), resp_body_string(), req_perform()
library(jsonlite)       # for fromJSON()

# build simple query based on API URL 
U1_request<-request("https://radon.nauka.gov.pl/opendata/polon/institutions?resultNumbers=100") 
U1_result<-req_perform(U1_request) # executes the request constructed earlier

# parse raw response to readable form in the list
U1_parsed<-fromJSON(resp_body_string(U1_result), flatten=TRUE) 
class(U1_parsed)

# convert/extract to data.frame
U1.df<-as.data.frame(U1_parsed$results) 
dim(U1.df)

# show rows 37:39 and selected important column
U1.df[37:39, c(8, 20, 33, 35, 37, 41, 48)] 

U1_result
U1_result$status_code # direct reference to the status of the response

U1_request<-request("https://radon.nauka.gov.pl/opendata/polon/institutions?iKindCd=13&iKindCd=10&statusCode=1&resultNumbers=100")

U1_result<-req_perform(U1_request)
U1_parsed<-fromJSON(resp_body_string(U1_result), flatten=TRUE)
U1.df<-as.data.frame(U1_parsed$results)

U1_parsed$pagination

# NOTE! The U1.df object from the previous example is needed to run correctly
# rm(output, output_temp, out_final, parsed, request, resp, i, q_url) 
# clearing variables from any previous launches of this code (if needed)
# the code repeats exactly the above schema - it forms a fully working example
request<-request("https://radon.nauka.gov.pl/opendata/polon/institutions?iKindCd=13&iKindCd=10&statusCode=1&resultNumbers=100")
resp<-req_perform(request)
parsed<-fromJSON(resp_body_string(resp), flatten=TRUE)
output<-as.data.frame(parsed$results)

# check if there are more than 100 results and further queries are needed
if(parsed$pagination$maxCount>100) 
{
  # determine the necessary number of API requests to obtain all results; 
  nreq<-ceiling(parsed$pagination$maxCount/100) # rounds the number up
    for(i in 2:nreq) # subsequent API requests in a loop
     {
q_url<-paste("https://radon.nauka.gov.pl/opendata/polon/institutions?iKindCd=13&iKindCd=10&statusCode=1&resultNumbers=100&token=",parsed$pagination$token,sep="")
request<-request(q_url)
resp<-req_perform(request)
parsed<-fromJSON(resp_body_string(resp), flatten=TRUE)
output_temp<-as.data.frame(parsed$results) # temporary object
    #output[setdiff(names(output_temp), names(output))]<-NA
    #output_temp[setdiff(names(output), names(output_temp))]<-NA
    # add a temporary object to previously downloaded data
output<-rbind(output, output_temp) 
    }
}
# selected columns from the results 
out_final<-output[, c(8,35,33,37,47,25,9,23,42,48,1)]
head(out_final, 2) # print data, the first 2 records presented

# save object as a csv file, path and filename can be adjusted
write.csv2(out_final, file="uni_PL.csv", quote=TRUE, row.names=FALSE) 

4.5 Geocoding data

4.5.1 Geocoding with tmaptools:: package based on Nominatim server

library(tmaptools)          # for geocode_OSM()
geocode_OSM("Warszawa, Polska") # the simplest form of query
# address request, detailed result as a data.frame class
geocode_OSM(URLencode("Miodowa 5, Warszawa, Polska"), details=TRUE, as.data.frame=TRUE)

addresses.df<-data.frame(Lp=integer(), adres=character(), lon=numeric(), lat=numeric(), stringsAsFactors=FALSE) # empty data.frame
addresses.df[1,]<-c(1,"Dluga 44/50, Warszawa, Polska", NA, NA)
addresses.df[2,]<-c(2,"Wydzial Nauk Ekonomicznych, Warszawa, Polska",NA,NA)

result<-geocode_OSM(URLencode(addresses.df[,2])) # geocode both addresses
result

addresses.df[,3:4]<-result[,2:3] # add geocoding results to the source object
addresses.df

# read data on cities and their population, described in I.4.5
popul<-read.csv("population_in_cities.csv",sep=",", stringsAsFactors=FALSE, encoding="UTF-8")
head(popul)

# create a town and country name encoded for query
popul$query_full<-URLencode(paste(popul$towns, "Poland", sep=", "))
towns_geo<-geocode_OSM(popul$query_full)        # it may take time
popul<-cbind(popul, towns_geo[,2:3])        # add results to popul object
rm(towns_geo)       # remove temporary object with geocoding results
head(popul[,-6])                    # without column no 6 with query

# write an object to a new file after geocoding
write.csv(population, file="populationxy.csv", row.names=FALSE)

# read data on firms’ addresses as described in I.4.3
firms_waw<-read.csv("firms_warszawa.csv", stringsAsFactors=FALSE, encoding="UTF-8")     # read data
# create a variable with a full address
firms_waw$Address_full<-paste(firms_waw$Address, firms_waw$Postcode, firms_waw$City, firms_waw$Country, sep=", ")
firms_geo<-geocode_OSM(URLencode(firms_waw$Address_full))
firms_waw<-cbind(firms_waw, firms_geo[,2:3]) # add geocode to firms object
# write the result (after geocoding) to a new file
write.csv(firms_waw, file="firms_warszawa_xy.csv", row.names=FALSE) 

# define the codes for used geo-reference system
latlong<-"EPSG:4326"    # WGS84 (EPSG:4326) geo-reference system code
osmap<-"EPSG:3857"  # native Google Maps and OSM geo-reference system
polish_borders<-"EPSG:2180" # native geo-reference system for boundary

pov<-st_read("powiaty.shp", stringsAsFactors=FALSE) # read shapefile
WAW<-pov[pov$JPT_KOD_JE==1465,]         # select the area of city of Warsaw
WAW_latlong<-st_transform(WAW, latlong)     # transform to WGS84 system
WAW_osmap<-st_transform(WAW, osmap)     # transform to native OSM system
WAW_box<-st_bbox(WAW_latlong)       # bounding box for a contour
WAW_osm.map<-openmap(c(WAW_box[2], WAW_box[1]), c(WAW_box[4], WAW_box[3]))

# Fig.4.8a
par(mar=c(0,0,0,0))
WAW_osm2.map<-openproj(WAW_osm.map, latlong) # OSM map reprojected to WGS84
plot(WAW_osm2.map)
plot(st_geometry(WAW_latlong), add=TRUE)        # borders of Warsaw area
crds<-as.data.frame(cbind(firms_waw$lon, firms_waw$lat)) # xy of firms
points(crds, pch=16, col="red")

# Fig.4.8b
plot(WAW_osm.map)       # draw a map in native OSM system
plot(WAW_osmap, add=TRUE)   # draw boundaries in native OSM system

# transform firms’ coordinate matrix into OSM
crds_osm<-sf_project(latlong, osmap, crds)  # from sf::
points(crds_osm, pch=16, col="red")

Figure 4.8: The result of geocoding using the Nominatim server (OSM) - location on the map of Warsaw. A comparison of the use of two coordinate systems: a) EPSG: 4326 (latitude / longitude) and b) EPSG: 3857 (native OSM)

osm() # parameters of the original reference system used by OSM maps

firms2_geo<-geocode_OSM(URLencode(firms_waw$Address_full[1:2]), projection= osm())
firms2_geo # the result in x and y is from the Cartesian coordinate system

# geocoding result directly as an sf class object
firms_geo.sf<-geocode_OSM(firms_waw$Address_full[1:2], as.sf=TRUE)
firms_geo.sf

class(firms_geo.sf)

# clasic plot method - plain contour and points inside
plot(st_geometry(WAW_latlong))
plot(st_geometry(firms_geo.sf), add=TRUE)

# Fig.4.9 – ggplot() approach
ggplot() + geom_sf(data=WAW_latlong, fill=NA) + geom_sf(data=firms_geo.sf, color="blue", size=5) + theme(axis.text=element_text(size=16))

Figure 4.9: The result of geocoding directly to sf class using the Nominatim server (OSM) – illustration with ggplot() from ggplot2:: package

rev_geocode_OSM(21.0072, 52.12356)

library(OpenStreetMap)  # if not loaded previously
firms3<-read.csv("firms_warszawa_xy.csv", stringsAsFactors=FALSE, encoding="UTF-8")
firms3_rev<-rev_geocode_OSM(firms_waw$lon, firms_waw$lat)
firms3<-cbind(firms3,firms3_rev[,3:25]) # combine the results
firms3[,c(8,11)] # display the original and re-geocoded addresses

4.5.2 Geocoding with photon:: package based on Photon server

library(httr2)
library(jsonlite)

# simple location request (address)
adr_request<-request("https://photon.komoot.io/api/?q='Długa+44/50+Warszawa'")
adr_result<-req_perform(adr_request)
adr_parsed<-fromJSON(resp_body_string(adr_result), flatten=TRUE)
adr_parsed$features # result as an object of data.frame class

# information about the geographical coordinates of the objects found
adr_parsed$features$geometry.coordinates
adr_parsed$features$geometry.coordinates[[1]][[1]] # longitude of 1st object
adr_parsed$features$geometry.coordinates[[1]][[2]] # latitude of 1st object

# limit the results to the first one
adr_request<-request("https://photon.komoot.io/api/?q='Długa+44/50+Warszawa'&limit=1")
adr_result<-req_perform(adr_request)
adr_parsed<-fromJSON(resp_body_string(adr_result), flatten=TRUE)
adr_parsed$features

# limit the results to a specific value of the OSM attribute (here: dance)
adr_request<-request("https://photon.komoot.io/api/?q='Długa+44/50+Warszawa'&osm_tag=:dance")
adr_result<-req_perform(adr_request)
adr_parsed<-fromJSON(resp_body_string(adr_result), flatten=TRUE)
adr_parsed$features

library(httr2)
library(jsonlite)
head(firms_waw) # use the previously prepared object, only 2 rows shown

firms_waw$lat<-NA
firms_waw$lon<-NA
firms_waw$geoadres<-NA
n<-nrow(firms_waw)      # how many rows
res<-data.frame(id=numeric(length=n),  result=numeric(length=n),  status=character(length=n),  whichone=numeric(length=n), stringsAsFactors=FALSE)  # new data.frame object
res                 # structure of the newly created object

for(i in 1:nrow(firms_waw)) {
  q_url<-paste("https://photon.komoot.io/api/?q='", firms_waw$Address_full[i],"'",sep="")
  q_url<-URLencode(q_url)
  resp<-req_perform(request(q_url))
  parsed<-fromJSON(resp_body_string(resp), flatten=TRUE)
  parsed<-parsed$features
  print(paste(i, nrow(parsed)))
  res$id[i]<-i
  if (!is.null(nrow(parsed))) {
    res$result[i]<-nrow(parsed)
    if (length(which(parsed$properties.osm_key=="building"))!=0) {
      j<-min(which(parsed$properties.osm_key=="building"))
    } else {
      j<-1
    }
    firms_waw$lat[i]<-as.numeric(parsed$geometry.coordinates[[j]][2])
    firms_waw$lon[i]<-as.numeric(parsed$geometry.coordinates[[j]][1])
    firms_waw$geoadres[i]<-as.character(paste(parsed$properties.street[[j]],parsed$properties.housenumber[[j]],parsed$properties.postcode[[j]],parsed$properties.city[[j]],sep=" "))
    res$status[i]<-"OK"
    res$whichone[i]<-j
  }  else {
    res$result[i]<-NA
    res$status[i]<-"BAD"
  }
    Sys.sleep(1)
}
res

# final output with the result of geocoding, the first three records
head(firms_waw, 2)
rm(resp, parsed, q_url, i, j)

adr_request<-request("https://photon.komoot.io/reverse/?lon=21.14358&lat=52.22834")
adr_result<-req_perform(adr_request)
adr_parsed<-fromJSON(resp_body_string(adr_result), flatten=TRUE)
adr_parsed$features