library(tidyverse)
library(httr)
library(sf)
library(dplyr)
library(tmap)
library(rgdal)
library(geojsonsf)
library(raster)
library(speciesRaster)
library(fasterize)
library(ggstatsplot)
get_X_Y_coordinates <- function(x) {
sftype <- as.character(sf::st_geometry_type(x, by_geometry = FALSE))
if(sftype == "POINT") {
xy <- as.data.frame(sf::st_coordinates(x))
dplyr::bind_cols(x, xy)
} else {
x
}
}
sf_fisbroker <- function(url) {
typenames <- basename(url)
url <- httr::parse_url(url)
url$query <- list(service = "wfs",
version = "2.0.0",
request = "GetFeature",
srsName = "EPSG:25833",
TYPENAMES = typenames)
request <- httr::build_url(url)
print(request)
out <- sf::read_sf(request)
out <- sf::st_transform(out, 3035)
out <- get_X_Y_coordinates(out)
out <- st_as_sf(as.data.frame(out))
return(out)
}
export_format <- c(
"geojson",
"sqlite"
)
sf_save <- function(z, fname) {
ifelse(!dir.exists(fname), dir.create(fname), "Folder exists already")
ff <- paste(file.path(fname, fname), export_format, sep = ".")
purrr::walk(ff, ~{ sf::st_write(z, .x, delete_dsn = TRUE)})
saveRDS(z, paste0(file.path(fname, fname), ".rds"))
}
#Berlin polygon
load(url("https://userpage.fu-berlin.de/soga/300/30100_data_sets/berlin_district.RData"))
berlin.sf <- st_transform(berlin.sf, 3035)
#Verkehrszellen (2003 file)
vz <- sf_fisbroker("https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_vz")
## [1] "https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_vz?service=wfs&version=2.0.0&request=GetFeature&srsName=EPSG%3A25833&TYPENAMES=s_vz"
vz <- vz %>% separate(gml_id, c("s_vz.", "gml_id"), sep=5) %>%
dplyr::select("gml_id", "geometry")
vz$s_vz. <- NULL
#LOR areas (2011 file)
lor <- sf_fisbroker("https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_lor_plan")
## [1] "https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_lor_plan?service=wfs&version=2.0.0&request=GetFeature&srsName=EPSG%3A25833&TYPENAMES=s_lor_plan"
lor <- lor %>%
separate(gml_id, c("s_lor_plan", "gml_id"), sep=11) %>%
dplyr::select("gml_id", "geometry")
Downloading from Berlin’s open data portal using their API; subsetting and cleaning
brw_processing <- function(x){
y <- x %>%
dplyr::filter(NUTZUNG != "G - Gewerbe") %>%
dplyr::select(gml_id, BRW, geometry) %>%
filter(!st_is_empty(.))
}
brw_outliers <- function(x){
val <- x$BRW
yUL <- val %>% quantile(.97, na.rm = TRUE)
out <- x %>%
dplyr::filter(BRW < yUL)
return(out)
}
brw2012 <- sf_fisbroker("https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_brw_2012")
## [1] "https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_brw_2012?service=wfs&version=2.0.0&request=GetFeature&srsName=EPSG%3A25833&TYPENAMES=s_brw_2012"
brw2011 <- sf_fisbroker("https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_brw_2011")
## [1] "https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_brw_2011?service=wfs&version=2.0.0&request=GetFeature&srsName=EPSG%3A25833&TYPENAMES=s_brw_2011"
brw2010 <- sf_fisbroker("https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_brw_2010")
## [1] "https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_brw_2010?service=wfs&version=2.0.0&request=GetFeature&srsName=EPSG%3A25833&TYPENAMES=s_brw_2010"
brw2004 <- sf_fisbroker("https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_brw_2004")
## [1] "https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_brw_2004?service=wfs&version=2.0.0&request=GetFeature&srsName=EPSG%3A25833&TYPENAMES=s_brw_2004"
brw2003 <- sf_fisbroker("https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_brw_2003")
## [1] "https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_brw_2003?service=wfs&version=2.0.0&request=GetFeature&srsName=EPSG%3A25833&TYPENAMES=s_brw_2003"
brw2002 <- sf_fisbroker("https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_brw_2002")
## [1] "https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_brw_2002?service=wfs&version=2.0.0&request=GetFeature&srsName=EPSG%3A25833&TYPENAMES=s_brw_2002"
brw2012 <- brw_processing(brw2012) %>% brw_outliers()
brw2011 <- brw_processing(brw2011) %>% brw_outliers()
brw2010 <- brw_processing(brw2010) %>% brw_outliers()
brw2004 <- brw_processing(brw2004) %>% brw_outliers()
brw2003 <- brw_processing(brw2003) %>% brw_outliers()
brw2002 <- brw_processing(brw2002) %>% brw_outliers()
Loading and cleaning demographic data (already partially cleaned in Excel)
mss_2003 <- read.csv("~/Desktop/Code/Thesis/demographic/2003_MSS_cut.csv")
mss_2003 <- mss_2003 %>% separate(Verkehrszellen,
c("VKZ_num", "VKZ_name"),
sep = 4)
mss_2003 <- mss_2003 %>%
dplyr::select("VKZ_num",
"VKZ_name",
"Langzeit.arbeitslose.über.1.Jahr.am.31.12.02.pro.100.EW.18.60.J.",
"Arbeitslose.insgesamt.31.12.02.pro.100.EW.18.60.J.",
"Langzeitfälle..über.2.Jahre..unter.den.Sozialhilfebeziehern.pro.100.EW..",
"Über.64.Jährige.pro.100.EW",
"Unter.18.Jährige.pro.100.EW",
"Ausländer.pro.100.EW",
"EW.aus.EU.Staaten.pro.100.EW") %>%
rename(long_unemp2003 = Langzeit.arbeitslose.über.1.Jahr.am.31.12.02.pro.100.EW.18.60.J.,
unemp2003 = Arbeitslose.insgesamt.31.12.02.pro.100.EW.18.60.J.,
gWelfare2003 = Langzeitfälle..über.2.Jahre..unter.den.Sozialhilfebeziehern.pro.100.EW..,
seniors2003 = Über.64.Jährige.pro.100.EW,
youth2003 = Unter.18.Jährige.pro.100.EW,
foreign2003 = Ausländer.pro.100.EW,
european2003 = EW.aus.EU.Staaten.pro.100.EW)
mss_2003[,3:9] <- lapply(mss_2003[,3:9], as.numeric)
## Warning in lapply(mss_2003[, 3:9], as.numeric): NAs introduced by coercion
## Warning in lapply(mss_2003[, 3:9], as.numeric): NAs introduced by coercion
mss_2003[339,1] <- c("Berlin")
mss_2003[339,2] <- c("Berlin")
##### MSS 2011
mss_2011 <- read.csv("~/Desktop/Code/Thesis/demographic/2011_MSS_cut.csv")
mss_2011 <- mss_2011 %>%
dplyr::select("Raumid",
"Gebiet",
"Status3",
"Status1",
"E22",
"E6",
"E5",
"E7",
"E18") %>%
rename(long_unemp2011 = Status3,
unemp2011 = Status1,
gWelfare2011 = E22,
seniors2011 = E6,
youth2011 = E5,
foreign2011 = E7,
european2011 = E18)
mss_2011[,3:9] <- lapply(mss_2011[,3:9], as.numeric)
Prerasterization: setup functions are used to reduce code; the first function creates a raster from the given sf object and the second smooths the data (some parcels are overly valued)
#template raster for Berlin
berlin_template <- raster(extent(berlin.sf),
resolution = 216,
crs = st_crs(berlin.sf)$proj4string)
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in Proj4
## definition
#function to create a raster from file
brwrast <- function(x){
out <- fasterize(x, berlin_template, field = "BRW", background = NA)
return(out)
}
#function to smooth data
brwfocal <- function(x){
fweight <- focalWeight(x, d=3, type="Gauss")
out <- focal(x, w=fweight, fun = "mean", NAonly=TRUE)
return(out)
}
Rasterizing files based on functions
brw2012rast <- brw2012 %>% brwrast() %>% brwfocal()
brw2011rast <- brw2011 %>% brwrast() %>% brwfocal()
brw2010rast <- brw2010 %>% brwrast() %>% brwfocal()
brw2004rast <- brw2004 %>% brwrast() %>% brwfocal()
brw2003rast <- brw2003 %>% brwrast() %>% brwfocal()
brw2002rast <- brw2002 %>% brwrast() %>% brwfocal()
Consolidating files: The result of the rasterization is our first variable to analyze: the change in average parcel valuation from 2003 to 2011. The map displays this raster.
brw2011ave <- stack(brw2012rast, brw2011rast, brw2010rast) %>%
approxNA(method = "linear", rule = 2)
brw2011ave <- overlay(brw2011ave, fun = "mean")
brw2003ave <- stack(brw2004rast, brw2003rast, brw2002rast) %>%
approxNA(method = "linear", rule = 2)
brw2003ave <- overlay(brw2003ave, fun = "mean")
brwChange <- brw2011ave - brw2003ave
tmap_mode("view")
## tmap mode set to interactive viewing
berlinbase <- tm_shape(berlin.sf) + tm_fill(alpha = .4)
testmap <- berlinbase + tm_shape(brwChange) + tm_raster(alpha = .8)
testmap
## Variable(s) "NA" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
First, the data is spatialized via a join to the correct shapefile:
mss_2003sf <- left_join(mss_2003, vz, by = c("VKZ_num" = "gml_id")) %>%
st_as_sf() %>% st_transform(3035) %>% na.omit() %>% filter(!st_is_empty(.))
mss_2011sf <- left_join(mss_2011, lor, by = c("Raumid" = "gml_id")) %>%
st_as_sf() %>%
st_transform(3035) %>%
filter(!st_is_empty(.))
Then, a separate raster is created for each variable for each year
unemp2003r <- rasterize(mss_2003sf, berlin_template, field = "unemp2003")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in Proj4
## definition
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum European Terrestrial Reference System 1989 in
## Proj4 definition
longunemp2003r <- rasterize(mss_2003sf, berlin_template, field = "long_unemp2003")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in Proj4
## definition
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum European Terrestrial Reference System 1989 in
## Proj4 definition
gWelfare2003r <- rasterize(mss_2003sf, berlin_template, field = "gWelfare2003")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in Proj4
## definition
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum European Terrestrial Reference System 1989 in
## Proj4 definition
seniors2003r <- rasterize(mss_2003sf, berlin_template, field = "seniors2003")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in Proj4
## definition
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum European Terrestrial Reference System 1989 in
## Proj4 definition
youth2003r <- rasterize(mss_2003sf, berlin_template, field = "youth2003")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in Proj4
## definition
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum European Terrestrial Reference System 1989 in
## Proj4 definition
foreign2003r <- rasterize(mss_2003sf, berlin_template, field = "foreign2003")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in Proj4
## definition
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum European Terrestrial Reference System 1989 in
## Proj4 definition
eu2003r <- rasterize(mss_2003sf, berlin_template, field = "european2003")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in Proj4
## definition
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum European Terrestrial Reference System 1989 in
## Proj4 definition
unemp2011r <- rasterize(mss_2011sf, berlin_template, field = "unemp2011")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in Proj4
## definition
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum European Terrestrial Reference System 1989 in
## Proj4 definition
longunemp2011r <- rasterize(mss_2011sf, berlin_template, field = "long_unemp2011")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in Proj4
## definition
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum European Terrestrial Reference System 1989 in
## Proj4 definition
gWelfare2011r <- rasterize(mss_2011sf, berlin_template, field = "gWelfare2011")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in Proj4
## definition
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum European Terrestrial Reference System 1989 in
## Proj4 definition
seniors2011r <- rasterize(mss_2011sf, berlin_template, field = "seniors2011")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in Proj4
## definition
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum European Terrestrial Reference System 1989 in
## Proj4 definition
youth2011r <- rasterize(mss_2011sf, berlin_template, field = "youth2011")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in Proj4
## definition
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum European Terrestrial Reference System 1989 in
## Proj4 definition
foreign2011r <- rasterize(mss_2011sf, berlin_template, field = "foreign2011")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in Proj4
## definition
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum European Terrestrial Reference System 1989 in
## Proj4 definition
eu2011r <- rasterize(mss_2011sf, berlin_template, field = "european2011")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in Proj4
## definition
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum European Terrestrial Reference System 1989 in
## Proj4 definition
Note that after dropping all instances of NA values, we are left with approximately 21% of the total cells created in the initial raster — while this seems dramatic, we have to remember that the raster was created in a square surrounding Berlin, including the numerous suburbs, extensive parkland, and various airports and other non-residential land.
unempChange <- unemp2011r - unemp2003r
longunempChange <- longunemp2011r - longunemp2003r
gWelfareChange <- gWelfare2011r - gWelfare2003r
seniorsChange <- seniors2011r - seniors2003r
youthChange <- youth2011r - youth2003r
foreignChange <- foreign2011r - foreign2003r
euChange <- eu2011r - eu2003r
brwChange <- brw2011ave - brw2003ave
demo_change_stack = stack(list(
unempC = unempChange,
longunempC = longunempChange,
gWelfareC = gWelfareChange,
seniorsC = seniorsChange,
youthC = youthChange,
foreignC = foreignChange,
euC = euChange,
brwC = brwChange
)
)
bdataC <- raster::as.data.frame(demo_change_stack, xy = TRUE) %>% na.omit()
Our final output is a dataframe bdataC (meaning Berlin data: change) that shows how all seven variables have changed in areas where data for all seven variables is available. We will save this dataframe to the disk and load it in the analysis file.
write.csv(bdataC, file="~/Desktop/Code/Thesis/bdataC.csv", row.names = FALSE)