This report summarizes the process of matching Bamse coordinates to Sweden DeSO. The rda file contaning geographical coordinates, the DeSO layer and SEI data which were directly extracted from SCB website
Installing package into 'C:/Users/racmur/AppData/Local/R/win-library/4.4'
(as 'lib' is unspecified)
package 'rmarkdown' successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\racmur\AppData\Local\Temp\RtmpqUEJkz\downloaded_packages
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(jsonlite)
Attaching package: 'jsonlite'
The following object is masked from 'package:purrr':
flatten
#|echo: false#|eval: true#|warning: false#Setting the working directorysetwd("C:/Users/racmur/OneDrive - Karolinska Institutet/Documents/PROJECTS/Olena") #Loading the .rda fileload("C:/Users/racmur/OneDrive - Karolinska Institutet/Documents/PROJECTS/Olena/BAMSE2023_coordinates_AP_2020-2022.rda")#Checking the loaded objects#ls()#Assigning the result to a data framemy.df <-as.data.frame(result)#Displaying the first few rows of the data# head(my.df)#Printing the column names and structure of the data#print(colnames(my.df))#str(my.df)#Transposing the data frame (if needed)my.df <-data.frame(t(my.df), drop =FALSE)
Accessing DeSO file
Accessing the json file for Sweden_DeSO locally and transforming it if necessary
#|echo: false#|eval: true#|warning: false#Path to Sweden_DeSO fileSweden_DeSO_path <-"C:/Users/racmur/OneDrive - Karolinska Institutet/Documents/PROJECTS/Olena/DeSO_Shapes/main.DeSO_2018.json"#Reading and transforming Sweden_DeSO to SWEREF 99 TM (EPSG:3006), then printing the CRSSweden_DeSO <-st_read(Sweden_DeSO_path, quiet =TRUE ) %>%st_transform(3006)#Printing the CRS to confirm the layer is in EPSG 3006#print(st_crs(Sweden_DeSO))
Visualizing the layer of Sweden DeSO
#|echo: true#|eval: true#|warning: false#Plotting the layerggplot() +geom_sf(data = Sweden_DeSO, fill ="white", color ="black") +# Map of Sweden DeSO theme_minimal() +coord_sf(xlim =c(220000, 900000), ylim =c(6000000, 8000000), crs =st_crs(3006), ) #Adjusted limits for Whole Sweden
#|echo: TRUE#|eval: false#|warning: FALSE#Loading the Bamse coordinates .rda fileload("C:/Users/racmur/OneDrive - Karolinska Institutet/Documents/PROJECTS/Olena/BAMSE2023_coordinates_AP_2020-2022.rda")my.df2<-result#Checking what objects were loaded (print them to the console)#ls()#Displaying the first few rows of the data#head(my.df2)#print(colnames(my.df2))is.data.frame(my.df2)
[1] TRUE
my.df2 <-as.data.frame(my.df2)#str(my.df2)#head(my.df2)#my.df<- data.frame(t(my.df))#drop = FALSE#Removing rows with missing values in the coordinate columnsmy.df2 <- my.df[!is.na(my.df$x_sv99) &!is.na(my.df$y_sv99), ]#Ensuring coordinates are numericmy.df2$x_sv99 <-as.numeric(my.df2$x_sv99)my.df2$y_sv99 <-as.numeric(my.df2$y_sv99)#Convert the data to an sf object with explicit CRSmy.df2_sf <-st_as_sf(my.df2 , coords =c("x_sv99", "y_sv99"), crs =3006)
Warning in min(cc[[1]], na.rm = TRUE): no non-missing arguments to min;
returning Inf
Warning in min(cc[[2]], na.rm = TRUE): no non-missing arguments to min;
returning Inf
Warning in max(cc[[1]], na.rm = TRUE): no non-missing arguments to max;
returning -Inf
Warning in max(cc[[2]], na.rm = TRUE): no non-missing arguments to max;
returning -Inf
#Checking if CRS is correctly assigned and explicitly setting it if missing.st_crs(my.df2_sf)
my.df2_sf <-st_set_crs(my.df2_sf, 3006)# Converting to sf object using the CRS from Sweden_DeSO and removing rows with missing coordinatesmy.df2 <- my.df2[!is.na(my.df2$x_sv99) &!is.na(my.df2$y_sv99), ]my.df2_sf <-st_as_sf(my.df2, coords =c("x_sv99", "y_sv99"), crs =st_crs(Sweden_DeSO))
Warning in min(cc[[1]], na.rm = TRUE): no non-missing arguments to min;
returning Inf
Warning in min(cc[[2]], na.rm = TRUE): no non-missing arguments to min;
returning Inf
Warning in max(cc[[1]], na.rm = TRUE): no non-missing arguments to max;
returning -Inf
Warning in max(cc[[2]], na.rm = TRUE): no non-missing arguments to max;
returning -Inf
# Checking and transforming layers' CRS if needed, to avoid any potential misalignment between them once plotted together.if (st_crs(Sweden_DeSO) !=st_crs(my.df2_sf)) { my.df2_sf <-st_transform(my.df2_sf, st_crs(Sweden_DeSO))}
Linking Sweden DeSO with geographic coordinates and SEI from SCB database
Step 1: Plotting coordinates in the boundaries of Sweden DeSO and performing their spatial join to generate a table with DeSO id and coordinate points
Note
Note, there were some coordinates outliers, which fall too far away from Sweden
#|echo: true#|eval: true#|warning: false# Ensure both objects are sf objectsmy.df2_sf <-st_as_sf(my.df2_sf)Sweden_DeSO <-st_as_sf(Sweden_DeSO)# Identifying points within Sweden_DeSOwithin_swe <-st_within(my.df2_sf, Sweden_DeSO)# Converting the list to a logical vector indicating whether each point is within Swedenwithin_swe_logical <-lengths(within_swe) >0# Using dplyr to filter the data frame to keep only points within Swedenmy.df2_sf_within_swe <- my.df2_sf %>% dplyr::filter(within_swe_logical)# Bounding box of Swedenbbox_swe <-st_bbox(Sweden_DeSO)# Adding a column to indicate whether each point is within Swedenmy.df2_sf$within_swe <- within_swe_logical# Filtering the points that are inside Sweden (Remove the outliers)my.df2_sf_filtered <- my.df2_sf[within_swe_logical, , drop =FALSE]sum(is.na(my.df2_sf$within_swe))
[1] 0
ggplot() +geom_sf(data = Sweden_DeSO, fill ="lightgray", color ="black") +# Plotting Sweden DeSO layergeom_sf(data = my.df2_sf_within_swe , color ="blue", size =3) +# Plotting Bamse coordinates layerlabs(title ="Coordinates in Sweden-DeSO", x ="Easting (meters)", y ="Northing (meters)") +theme_minimal() +coord_sf()
Spatial join
#|echo: false#|eval: true#|warning: false# Checking CRS of both objectsst_crs(my.df2_sf_within_swe)
#colnames(my.df2_sf_within_swe)# Checking the class of both spatial objectsclass(my.df2_sf_within_swe)
[1] "sf" "data.frame"
class(Sweden_DeSO)
[1] "sf" "data.frame"
# Ensuring the geometries are validmy.df2_sf_within_swe <-st_make_valid(my.df2_sf_within_swe)Sweden_DeSO <-st_make_valid(Sweden_DeSO)# Performing the spatial join of both layersBamse_coord_DeSO <-st_join(my.df2_sf_within_swe, Sweden_DeSO, join = st_intersects, left =TRUE)# Checking for NA values in geometriesna_count <-sum(is.na(st_geometry(Bamse_coord_DeSO)))cat("Number of NA geometries:", na_count, "\n")
Number of NA geometries: 0
# Checking for NA values in the 'deso' fieldsum(is.na(Bamse_coord_DeSO$deso))
[1] 0
# Removing rows with empty geometriesBamse_coord_DeSO <- Bamse_coord_DeSO[!st_is_empty(st_geometry(Bamse_coord_DeSO)), ]# Ensuring geometries are validBamse_coord_DeSO <-st_make_valid(Bamse_coord_DeSO)# Removing unwanted columns by their indices or namesBamse_coord_DeSO <- Bamse_coord_DeSO %>%select(-starts_with("remove_"), -7:-161)# Checking for NA values in the 'deso' fieldsum(is.na(Bamse_coord_DeSO$deso))
[1] 0
# Viewing the resulting data frame with the 'deso' field#head(Bamse_coord_DeSO[, c("deso", "geometry")])# Converting to a regular data frame and checking if DeSO columns are includedBamse_coord_DeSO <-as.data.frame(Bamse_coord_DeSO)# Viewing the full resulting data frame#View(Bamse_coord_DeSO)
Note
Step 2: Fetching SEI from SCB database
Note
SEI 1: Income (Nettoinkomst (2011 -2023))
Extracted from scb database: Inkomststruktur nettoinkomst efter region och kön. År 2011 - 2023 - Medelvärde för samtliga, tkr efter region, år, inkomstkomponent.
library(pxweb)#|echo: TRUE#|eval: true#|warning: false#|# URL for the metadata and data requesturl1 <-"https://api.scb.se/OV0104/v1/doris/sv/ssd/START/HE/HE0110/HE0110I/Tab2InkDesoN"# Fetching the metadata to understand the available variablespxmd1 <-pxweb_get(url1)# Printing the metadata to understand its structure#print(pxmd1)# Extracting valid region (in this case DeSO id) codes from metadatavalid_regions <- pxmd1$variables[[1]]$values # Filtering to include only DeSO regions deso_regions <-grep("^(^[0-9]{4}[A-Z][0-9]{4}$)", valid_regions, value =TRUE)#print(deso_regions)# Defining the query for SCB data using pxweb_queryquery <-pxweb_query(list("Region"= deso_regions, # Use only DeSO region codes"Inkomstkomponenter"=c("240"), # Specific income component"Kon"=c("1","2"), # All genders"ContentsCode"=c("000005FW"), # Specific code for the dataset"Tid"=as.character(seq(2011, 2023)) # Specific period))# Validating the query with the metadatapxweb_validate_query_with_metadata(query, pxmd1)# Fetching the data from SCB APIdata1 <-pxweb_get(url = url1, query = query)
Downloading large query (in 2 batches):
|
| | 0%
|
|=================================== | 50%
|
|======================================================================| 100%
# Checking if data is fetched correctlyif (is.null(data1)) {stop("Data fetching failed. Please check the query and URL.")}# Converting the data to a dataframe for easier handlingNettoink_long <-as.data.frame(data1)
Warning in pxweb_as_data_frame.pxweb_data(x, row.names = row.names, optional =
optional, : NAs introduced by coercion
# Renaminging 'region' column to 'deso'Nettoink_long <- Nettoink_long %>%rename(deso = region)# Printing the first few rows of the data#head(Nettoink_long)
SEI 2: unemployment
Extracted from statistikdatabasen Arbetsmarknad / Registerbaserad arbetsmarknadsstatistik (RAMS) / DeSo, tabeller avseende 2018-2021 / Befolkningen 20-64 år efter region, sysselsättning och kön. År 2018
#|echo: false#|eval: true#|warning: false# URL for the metadata and data requesturl2 <-"https://api.scb.se/OV0104/v1/doris/sv/ssd/START/AM/AM0207/AM0207I/BefDeSoSyss"#Fetching the metadata to understand for unemployd dataset to see the available variablespxmd2 <-pxweb_get(url2)# Printing the metadata to understand its structure#print(pxmd2)# Extracting valid region codes from metadatavalid_regions <- pxmd2$variables[[1]]$values # Defining the query for SCB- unemployment data using pxweb_queryquery <-pxweb_query(list("Region"= valid_regions, # Use only DeSO region codes"Sysselsattning"=c("EJFÖRV"), # Specific unemployment component of interest"Kon"=c("1","2"), # 2 genders"ContentsCode"=c('000004JY'), # Specific code for the dataset"Tid"=as.character(2018) # Specific period))# Validating the query with the metadatapxweb_validate_query_with_metadata(query, pxmd2)# Fetching the data from SCB APIdata2 <-pxweb_get(url = url2, query = query)# Checking if data is fetched correctlyif (is.null(url2)) {stop("Data fetching failed. Please check the query and URL2.")}# Converting the data to a dataframe for easier handlingEjförv_long<-as.data.frame(data2)# Renaminging 'region' column to 'deso' to avoid later mismatchEjförv_long <- Ejförv_long %>%rename(deso = region)# Printing the first few rows of the data#head(Ejförv_long)
SEI 3: Education level
The extracted indicator is the number of people with förgymnasial utbildning
From statistic database Utbildning och forskning / Befolkningens utbildning / DeSo / Befolkning 25-64 år (fr.o.m. 2023, 25–65 år) efter region och utbildningsnivå. År 2015 - 2023.
#|echo: false#|eval: true#|warning: false# URL for the metadata and data requesturl3 <-"https://api.scb.se/OV0104/v1/doris/sv/ssd/START/UF/UF0506/UF0506YDeso/UtbSUNBefDesoRegso"# Fetching the metadata to understand the available variablespxmd3 <-pxweb_get(url3)# Printing the metadata to understand its structure#print(pxmd3)# Extracting valid region codes from metadatavalid_regions <- pxmd3$variables[[1]]$values # Defining the query for SCB data using pxweb_queryquery <-pxweb_query(list("Region"= valid_regions, # Use only DeSO region codes"UtbildningsNiva"=c("21"), # Specific education component of interest (corresponding to förgymnasial utbildning)"ContentsCode"=c("000005MO"), # Specific code for the dataset"Tid"=as.character(seq(2015, 2023)) # Specific period data is available))# Validating the query with the metadatapxweb_validate_query_with_metadata(query, pxmd3)# Checking if data is fetched correctlyif (is.null(url3)) {stop("Data fetching failed. Please check the query and URL3.")}# Fetching the data from SCB APIdata3 <-pxweb_get(url = url3, query = query) # Converting the data to a dataframe for easier handlingUtbild_long<-as.data.frame(data3)# Renaminging 'region' column to 'deso'Utbild_long <- Utbild_long %>%rename(deso = region)# Printing the first few rows of the data#head(Utbild_long)
Ikomst klass (Kvartiler 1 2,3 and 4, andel personer, procent))
Note: This section will be relocated so that same SEIof the same category are grouped together to allow a logical flow.
#|echo: false#|eval: true#|warning: false# URL for the metadata and data requesturl5 <-"https://api.scb.se/OV0104/v1/doris/sv/ssd/START/HE/HE0110/HE0110I/TabVX3InkDesoN"#Fetching the metadata to understand for unemployd dataset to see the available variablespxmd5 <-pxweb_get(url5)# Printing the metadata to understand its structure#print(pxmd5)# Extracting valid region codes from metadatavalid_regions <- pxmd5$variables[[1]]$values # Defining the query for SCB- unemployment data using pxweb_queryquery <-pxweb_query(list("Region"= valid_regions, # Use only DeSO region codes"ContentsCode"=c("000006T4","000006T5","000006T6","000006T7"), # Specific code for the dataset (Kvartiler 1,2,3 and 4, andel personer, procent)"Tid"=c("2011", "2015", "2020", "2023"))) # Specific period# Validating the query with the metadatapxweb_validate_query_with_metadata(query, pxmd5)# Fetching the data from SCB APIdata5 <-pxweb_get(url = url5, query = query)
Downloading large query (in 2 batches):
|
| | 0%
|
|=================================== | 50%
|
|======================================================================| 100%
# Checking if data is fetched correctlyif (is.null(url5)) {stop("Data fetching failed. Please check the query and URL2.")}# Converting the data to a dataframe for easier handlingInkKvart_long<-as.data.frame(data5)
Warning in pxweb_as_data_frame.pxweb_data(x, row.names = row.names, optional =
optional, : NAs introduced by coercion
Warning in pxweb_as_data_frame.pxweb_data(x, row.names = row.names, optional =
optional, : NAs introduced by coercion
Warning in pxweb_as_data_frame.pxweb_data(x, row.names = row.names, optional =
optional, : NAs introduced by coercion
Warning in pxweb_as_data_frame.pxweb_data(x, row.names = row.names, optional =
optional, : NAs introduced by coercion
# Renaminging 'region' column to 'deso' to avoid later mismatchInkKvart_long <- InkKvart_long %>%rename(deso = region)# Printing the first few rows of the data#head(InkKvart_long)
Step 3: Linking DeSO with geographic coordinates and SCB data
Spatial join of DeSO and Bamse coordinates
In this step, spatial joining of geographical coordinates and Sweden DeSO was done to have both in the sabe table.
Joining the spatially joined DeSO and Bamse coordinates with SEI
Note, as a reminde, Nettoink_long was used and not the pivoted Nettoink_wide