Bamse_DeSO_SEI

Author

Rachel

Bamse coordinates - DeSO matching

This report summarizes the process of matching Bamse coordinates to Sweden DeSO. The rda file contaning geographical coordinates, the DeSO layer and SEI data which were directly extracted from SCB website

#|echo: false
#|eval: false
#|warning: false
#|
options(repos = c(CRAN = "https://cran.rstudio.com"))
## Load libraries for reading Excel files and plotting coordinates

# install.packages("readxl") 
# install.packages("writexl")
# install.packages("openxlsx") 
# install.packages("openxlsx", lib = "C:/Users/racmur/Rlibs")
# install.packages("xlsx")
# install.packages("ggplot2") 
# install.packages("sf", dependencies = TRUE)
# install.packages("sp") 
# install.packages("rnaturalearth") 
# install.packages("rnaturalearthdata") 
#install.packages("quarto") 
# install.packages("dplyr") 
# install.packages("tidyverse") 
# install.packages("jsonlite")
# install.packages("conflicted")
# install.packages("pkgbuild")
# install.packages("janitor") 
#install.packages("pxweb")
#install.packages("DT")
#install.packages("tidyr")
install.packages("rmarkdown")
Installing package into 'C:/Users/racmur/AppData/Local/R/win-library/4.4'
(as 'lib' is unspecified)
package 'rmarkdown' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\racmur\AppData\Local\Temp\RtmpqUEJkz\downloaded_packages
library(readxl)
library(writexl)
library(openxlsx)
library(openxlsx, lib.loc = "C:/Users/racmur/Rlibs")
#library(xlsx)
library(ggplot2) 
library(sf) 
Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
library(sp) 
library(rnaturalearth) 
library(rnaturalearthdata) 

Attaching package: 'rnaturalearthdata'
The following object is masked from 'package:rnaturalearth':

    countries110
library(quarto) 
library(dplyr) 

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tidyverse) 
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ lubridate 1.9.4     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.1
✔ readr     2.1.5     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(jsonlite)

Attaching package: 'jsonlite'

The following object is masked from 'package:purrr':

    flatten
library(conflicted) 
library(pkgbuild)
library(janitor)#for checking spaces 
library(pxweb)
pxweb 0.17.0: R tools for the PX-WEB API.
https://github.com/ropengov/pxweb
library(DT)
library(tidyr)
library(dplyr)
library(pxweb)
library(rmarkdown)
Import the Bamse coordinates file
#|echo: false
#|eval: true
#|warning: false

#Setting the working directory
setwd("C:/Users/racmur/OneDrive - Karolinska Institutet/Documents/PROJECTS/Olena")  
#Loading the .rda file
load("C:/Users/racmur/OneDrive - Karolinska Institutet/Documents/PROJECTS/Olena/BAMSE2023_coordinates_AP_2020-2022.rda")

#Checking the loaded objects
#ls()

#Assigning the result to a data frame
my.df <- as.data.frame(result)

#Displaying the first few rows of the data
# head(my.df)

#Printing the column names and structure of the data
#print(colnames(my.df))
#str(my.df)

#Transposing the data frame (if needed)
my.df <- data.frame(t(my.df), drop = FALSE)
Accessing DeSO file

Accessing the json file for Sweden_DeSO locally and transforming it if necessary

#|echo: false
#|eval: true
#|warning: false

#Path to Sweden_DeSO file
Sweden_DeSO_path <- "C:/Users/racmur/OneDrive - Karolinska Institutet/Documents/PROJECTS/Olena/DeSO_Shapes/main.DeSO_2018.json"

#Reading and transforming Sweden_DeSO to SWEREF 99 TM (EPSG:3006), then printing the CRS
Sweden_DeSO <- st_read(Sweden_DeSO_path, quiet = TRUE ) %>%
  st_transform(3006)

#Printing the CRS to confirm the layer is in EPSG 3006
#print(st_crs(Sweden_DeSO))
Visualizing the layer of Sweden DeSO
#|echo: true
#|eval: true
#|warning: false
#Plotting the layer
ggplot() +
  geom_sf(data = Sweden_DeSO, fill = "white", color = "black") +  # Map of Sweden  DeSO 
    theme_minimal() +
  coord_sf(xlim = c(220000, 900000), ylim = c(6000000, 8000000), crs = st_crs(3006), ) #Adjusted limits for Whole Sweden

#|echo: TRUE
#|eval: false
#|warning: FALSE

#Loading the Bamse coordinates .rda file
load("C:/Users/racmur/OneDrive - Karolinska Institutet/Documents/PROJECTS/Olena/BAMSE2023_coordinates_AP_2020-2022.rda")

my.df2<-result


#Checking what objects were loaded (print them to the console)
#ls()

#Displaying the first few rows of the data
#head(my.df2)

#print(colnames(my.df2))

is.data.frame(my.df2)
[1] TRUE
my.df2 <- as.data.frame(my.df2)

#str(my.df2)
#head(my.df2)

#my.df<- data.frame(t(my.df))
#drop = FALSE

#Removing rows with missing values in the coordinate columns
my.df2 <- my.df[!is.na(my.df$x_sv99) & !is.na(my.df$y_sv99), ]

#Ensuring coordinates are numeric
my.df2$x_sv99 <- as.numeric(my.df2$x_sv99)
my.df2$y_sv99 <- as.numeric(my.df2$y_sv99)

#Convert the data to an sf object with explicit CRS
my.df2_sf <- st_as_sf(my.df2 , coords = c("x_sv99", "y_sv99"), crs = 3006)
Warning in min(cc[[1]], na.rm = TRUE): no non-missing arguments to min;
returning Inf
Warning in min(cc[[2]], na.rm = TRUE): no non-missing arguments to min;
returning Inf
Warning in max(cc[[1]], na.rm = TRUE): no non-missing arguments to max;
returning -Inf
Warning in max(cc[[2]], na.rm = TRUE): no non-missing arguments to max;
returning -Inf
#Checking if CRS is correctly assigned and explicitly setting it if missing.
st_crs(my.df2_sf)
Coordinate Reference System:
  User input: EPSG:3006 
  wkt:
PROJCRS["SWEREF99 TM",
    BASEGEOGCRS["SWEREF99",
        DATUM["SWEREF99",
            ELLIPSOID["GRS 1980",6378137,298.257222101,
                LENGTHUNIT["metre",1]]],
        PRIMEM["Greenwich",0,
            ANGLEUNIT["degree",0.0174532925199433]],
        ID["EPSG",4619]],
    CONVERSION["SWEREF99 TM",
        METHOD["Transverse Mercator",
            ID["EPSG",9807]],
        PARAMETER["Latitude of natural origin",0,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8801]],
        PARAMETER["Longitude of natural origin",15,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8802]],
        PARAMETER["Scale factor at natural origin",0.9996,
            SCALEUNIT["unity",1],
            ID["EPSG",8805]],
        PARAMETER["False easting",500000,
            LENGTHUNIT["metre",1],
            ID["EPSG",8806]],
        PARAMETER["False northing",0,
            LENGTHUNIT["metre",1],
            ID["EPSG",8807]]],
    CS[Cartesian,2],
        AXIS["northing (N)",north,
            ORDER[1],
            LENGTHUNIT["metre",1]],
        AXIS["easting (E)",east,
            ORDER[2],
            LENGTHUNIT["metre",1]],
    USAGE[
        SCOPE["Topographic mapping (medium and small scale)."],
        AREA["Sweden - onshore and offshore."],
        BBOX[54.96,10.03,69.07,24.17]],
    ID["EPSG",3006]]
my.df2_sf <- st_set_crs(my.df2_sf, 3006)

# Converting to sf object using the CRS from Sweden_DeSO and removing rows with missing coordinates
my.df2 <- my.df2[!is.na(my.df2$x_sv99) & !is.na(my.df2$y_sv99), ]
my.df2_sf <- st_as_sf(my.df2, coords = c("x_sv99", "y_sv99"), crs = st_crs(Sweden_DeSO))
Warning in min(cc[[1]], na.rm = TRUE): no non-missing arguments to min;
returning Inf
Warning in min(cc[[2]], na.rm = TRUE): no non-missing arguments to min;
returning Inf
Warning in max(cc[[1]], na.rm = TRUE): no non-missing arguments to max;
returning -Inf
Warning in max(cc[[2]], na.rm = TRUE): no non-missing arguments to max;
returning -Inf
# Checking and transforming layers' CRS if needed, to avoid any potential misalignment between them once plotted together.
if (st_crs(Sweden_DeSO) != st_crs(my.df2_sf)) {
  my.df2_sf <- st_transform(my.df2_sf, st_crs(Sweden_DeSO))
}
Linking Sweden DeSO with geographic coordinates and SEI from SCB database
Step 1: Plotting coordinates in the boundaries of Sweden DeSO and performing their spatial join to generate a table with DeSO id and coordinate points
Note

Note, there were some coordinates outliers, which fall too far away from Sweden

#|echo: true
#|eval: true
#|warning: false


# Ensure both objects are sf objects
my.df2_sf <- st_as_sf(my.df2_sf)
Sweden_DeSO <- st_as_sf(Sweden_DeSO)

# Identifying points within Sweden_DeSO
within_swe <- st_within(my.df2_sf, Sweden_DeSO)

# Converting the list to a logical vector indicating whether each point is within Sweden
within_swe_logical <- lengths(within_swe) > 0

# Using dplyr to filter the data frame to keep only points within Sweden
my.df2_sf_within_swe <- my.df2_sf %>% dplyr::filter(within_swe_logical)

# Bounding box of Sweden
bbox_swe <- st_bbox(Sweden_DeSO)

# Adding a column to indicate whether each point is within Sweden
my.df2_sf$within_swe <- within_swe_logical

# Filtering the points that are inside Sweden (Remove the outliers)
my.df2_sf_filtered <- my.df2_sf[within_swe_logical, , drop = FALSE]

sum(is.na(my.df2_sf$within_swe))
[1] 0
ggplot() +
  geom_sf(data = Sweden_DeSO, fill = "lightgray", color = "black") +  # Plotting Sweden DeSO layer
  geom_sf(data = my.df2_sf_within_swe , color = "blue", size = 3) + # Plotting Bamse coordinates layer
  labs(title = "Coordinates in Sweden-DeSO", 
       x = "Easting (meters)", 
       y = "Northing (meters)") +
  theme_minimal() +
  coord_sf()

Spatial join
#|echo: false
#|eval: true
#|warning: false
# Checking CRS of both objects
st_crs(my.df2_sf_within_swe)
Coordinate Reference System:
  User input: EPSG:3006 
  wkt:
PROJCRS["SWEREF99 TM",
    BASEGEOGCRS["SWEREF99",
        DATUM["SWEREF99",
            ELLIPSOID["GRS 1980",6378137,298.257222101,
                LENGTHUNIT["metre",1]]],
        PRIMEM["Greenwich",0,
            ANGLEUNIT["degree",0.0174532925199433]],
        ID["EPSG",4619]],
    CONVERSION["SWEREF99 TM",
        METHOD["Transverse Mercator",
            ID["EPSG",9807]],
        PARAMETER["Latitude of natural origin",0,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8801]],
        PARAMETER["Longitude of natural origin",15,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8802]],
        PARAMETER["Scale factor at natural origin",0.9996,
            SCALEUNIT["unity",1],
            ID["EPSG",8805]],
        PARAMETER["False easting",500000,
            LENGTHUNIT["metre",1],
            ID["EPSG",8806]],
        PARAMETER["False northing",0,
            LENGTHUNIT["metre",1],
            ID["EPSG",8807]]],
    CS[Cartesian,2],
        AXIS["northing (N)",north,
            ORDER[1],
            LENGTHUNIT["metre",1]],
        AXIS["easting (E)",east,
            ORDER[2],
            LENGTHUNIT["metre",1]],
    USAGE[
        SCOPE["Topographic mapping (medium and small scale)."],
        AREA["Sweden - onshore and offshore."],
        BBOX[54.96,10.03,69.07,24.17]],
    ID["EPSG",3006]]
st_crs(Sweden_DeSO)
Coordinate Reference System:
  User input: EPSG:3006 
  wkt:
PROJCRS["SWEREF99 TM",
    BASEGEOGCRS["SWEREF99",
        DATUM["SWEREF99",
            ELLIPSOID["GRS 1980",6378137,298.257222101,
                LENGTHUNIT["metre",1]]],
        PRIMEM["Greenwich",0,
            ANGLEUNIT["degree",0.0174532925199433]],
        ID["EPSG",4619]],
    CONVERSION["SWEREF99 TM",
        METHOD["Transverse Mercator",
            ID["EPSG",9807]],
        PARAMETER["Latitude of natural origin",0,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8801]],
        PARAMETER["Longitude of natural origin",15,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8802]],
        PARAMETER["Scale factor at natural origin",0.9996,
            SCALEUNIT["unity",1],
            ID["EPSG",8805]],
        PARAMETER["False easting",500000,
            LENGTHUNIT["metre",1],
            ID["EPSG",8806]],
        PARAMETER["False northing",0,
            LENGTHUNIT["metre",1],
            ID["EPSG",8807]]],
    CS[Cartesian,2],
        AXIS["northing (N)",north,
            ORDER[1],
            LENGTHUNIT["metre",1]],
        AXIS["easting (E)",east,
            ORDER[2],
            LENGTHUNIT["metre",1]],
    USAGE[
        SCOPE["Topographic mapping (medium and small scale)."],
        AREA["Sweden - onshore and offshore."],
        BBOX[54.96,10.03,69.07,24.17]],
    ID["EPSG",3006]]
# Checking column names of Sweden_DeSO
colnames(Sweden_DeSO)
[1] "fid"        "uuid"       "deso"       "kommun"     "lan"       
[6] "kommunnamn" "lannamn"    "version"    "geometry"  
#colnames(my.df2_sf_within_swe)

# Checking the class of both spatial objects
class(my.df2_sf_within_swe)
[1] "sf"         "data.frame"
class(Sweden_DeSO)
[1] "sf"         "data.frame"
# Ensuring the geometries are valid
my.df2_sf_within_swe <- st_make_valid(my.df2_sf_within_swe)
Sweden_DeSO <- st_make_valid(Sweden_DeSO)

# Performing the spatial join of both layers
Bamse_coord_DeSO <- st_join(my.df2_sf_within_swe, Sweden_DeSO, join = st_intersects, left = TRUE)

# Checking for NA values in geometries
na_count <- sum(is.na(st_geometry(Bamse_coord_DeSO)))
cat("Number of NA geometries:", na_count, "\n")
Number of NA geometries: 0 
# Checking for NA values in the 'deso' field
sum(is.na(Bamse_coord_DeSO$deso))
[1] 0
# Removing rows with empty geometries
Bamse_coord_DeSO <- Bamse_coord_DeSO[!st_is_empty(st_geometry(Bamse_coord_DeSO)), ]

# Ensuring geometries are valid
Bamse_coord_DeSO <- st_make_valid(Bamse_coord_DeSO)
           
# Removing unwanted columns by their indices or names
Bamse_coord_DeSO <- Bamse_coord_DeSO %>% 
  select(-starts_with("remove_"), -7:-161)

# Checking for NA values in the 'deso' field
sum(is.na(Bamse_coord_DeSO$deso))
[1] 0
# Viewing the resulting data frame with the 'deso' field
#head(Bamse_coord_DeSO[, c("deso", "geometry")])

# Converting to a regular data frame and checking if DeSO columns are included
Bamse_coord_DeSO <- as.data.frame(Bamse_coord_DeSO)

# Viewing the full resulting data frame
#View(Bamse_coord_DeSO)
Note

Step 2: Fetching SEI from SCB database

Note

SEI 1: Income (Nettoinkomst (2011 -2023))

Extracted from scb database: Inkomststruktur nettoinkomst efter region och kön. År 2011 - 2023 - Medelvärde för samtliga, tkr efter region, år, inkomstkomponent.

library(pxweb)
#|echo: TRUE
#|eval: true
#|warning: false
#|
# URL for the metadata and data request
url1 <- "https://api.scb.se/OV0104/v1/doris/sv/ssd/START/HE/HE0110/HE0110I/Tab2InkDesoN"
           
# Fetching the metadata to understand the available variables
pxmd1 <- pxweb_get(url1)
           
# Printing the metadata to understand its structure
#print(pxmd1)
           
# Extracting valid region (in this case DeSO id) codes from metadata
valid_regions <- pxmd1$variables[[1]]$values  
           
# Filtering to include only DeSO regions 
deso_regions <- grep("^(^[0-9]{4}[A-Z][0-9]{4}$)", valid_regions, value = TRUE)
           
#print(deso_regions)
           
# Defining the query for SCB data using pxweb_query
query <- pxweb_query(list(
"Region" = deso_regions,  # Use only DeSO region codes
"Inkomstkomponenter" = c("240"),  # Specific income component
"Kon" = c("1","2"),  # All genders
"ContentsCode" = c("000005FW"),  # Specific code for the dataset
"Tid" = as.character(seq(2011, 2023))   # Specific period
))
           
# Validating the query with the metadata
pxweb_validate_query_with_metadata(query, pxmd1)

# Fetching the data from SCB API
data1 <- pxweb_get(
url = url1, 
query = query
)
  Downloading large query (in 2 batches):

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================================================| 100%
# Checking if data is fetched correctly
if (is.null(data1)) {
stop("Data fetching failed. Please check the query and URL.")
}
           
# Converting the data to a dataframe for easier handling
Nettoink_long <- as.data.frame(data1)
Warning in pxweb_as_data_frame.pxweb_data(x, row.names = row.names, optional =
optional, : NAs introduced by coercion
# Renaminging 'region' column to 'deso'
Nettoink_long  <- Nettoink_long  %>%
  rename(deso = region)

# Printing the first few rows of the data
#head(Nettoink_long)
SEI 2: unemployment

Extracted from statistikdatabasen Arbetsmarknad / Registerbaserad arbetsmarknadsstatistik (RAMS) / DeSo, tabeller avseende 2018-2021 / Befolkningen 20-64 år efter region, sysselsättning och kön. År 2018

#|echo: false
#|eval: true
#|warning: false
# URL for the metadata and data request
url2 <- "https://api.scb.se/OV0104/v1/doris/sv/ssd/START/AM/AM0207/AM0207I/BefDeSoSyss"
           
#Fetching the metadata to understand for unemployd dataset to see the available variables
pxmd2 <- pxweb_get(url2)
           
# Printing the metadata to understand its structure
#print(pxmd2)
           
# Extracting valid region codes from metadata
valid_regions <- pxmd2$variables[[1]]$values  
           
# Defining the query for SCB- unemployment data using pxweb_query
query <- pxweb_query(list(
"Region" = valid_regions,  # Use only DeSO region codes
"Sysselsattning" = c("EJFÖRV"),  # Specific unemployment component of interest
"Kon" = c("1","2"),  # 2 genders
"ContentsCode" = c('000004JY'),  # Specific code for the dataset
"Tid" = as.character(2018)   # Specific period
))
   
# Validating the query with the metadata
pxweb_validate_query_with_metadata(query, pxmd2)

           
# Fetching the data from SCB API
data2 <- pxweb_get(
url = url2, 
query = query
)
           
# Checking if data is fetched correctly
if (is.null(url2)) {
stop("Data fetching failed. Please check the query and URL2.")
}
           
 # Converting the data to a dataframe for easier handling
Ejförv_long<- as.data.frame(data2)

# Renaminging 'region' column to 'deso' to avoid later mismatch
Ejförv_long  <- Ejförv_long  %>%
rename(deso = region)


# Printing the first few rows of the data
#head(Ejförv_long)
SEI 3: Education level

The extracted indicator is the number of people with förgymnasial utbildning

From statistic database Utbildning och forskning / Befolkningens utbildning / DeSo / Befolkning 25-64 år (fr.o.m. 2023, 25–65 år) efter region och utbildningsnivå. År 2015 - 2023.

#|echo: false
#|eval: true
#|warning: false

# URL for the metadata and data request
url3 <- "https://api.scb.se/OV0104/v1/doris/sv/ssd/START/UF/UF0506/UF0506YDeso/UtbSUNBefDesoRegso"
           
# Fetching the metadata to understand the available variables
pxmd3 <- pxweb_get(url3)
           
# Printing the metadata to understand its structure
#print(pxmd3)
           
# Extracting valid region codes from metadata
valid_regions <- pxmd3$variables[[1]]$values  
           
# Defining the query for SCB data using pxweb_query
query <- pxweb_query(list(
"Region" = valid_regions,  # Use only DeSO region codes
"UtbildningsNiva" = c("21"),  # Specific education component of interest (corresponding to förgymnasial utbildning)
"ContentsCode" = c("000005MO"), # Specific code for the dataset
"Tid" = as.character(seq(2015, 2023)) # Specific period data is available
))
   
# Validating the query with the metadata
pxweb_validate_query_with_metadata(query, pxmd3)

# Checking if data is fetched correctly
if (is.null(url3)) {
stop("Data fetching failed. Please check the query and URL3.")
}
      
# Fetching the data from SCB API
data3 <- pxweb_get(
url = url3, 
query = query
)     
 # Converting the data to a dataframe for easier handling
Utbild_long<- as.data.frame(data3)


# Renaminging 'region' column to 'deso'
Utbild_long  <- Utbild_long  %>%
rename(deso = region)

# Printing the first few rows of the data
#head(Utbild_long)
Ikomst klass (Kvartiler 1 2,3 and 4, andel personer, procent))

Note: This section will be relocated so that same SEIof the same category are grouped together to allow a logical flow.

Extracted from tatistikdatabasen/Hushållens ekonomi/Inkomster och skatter/DeSo/Ekonomisk standard, andel av befolkningen per inkomstklass efter region. År 2011 - 2023

#|echo: false
#|eval: true
#|warning: false
# URL for the metadata and data request
url5 <- "https://api.scb.se/OV0104/v1/doris/sv/ssd/START/HE/HE0110/HE0110I/TabVX3InkDesoN"
           
#Fetching the metadata to understand for unemployd dataset to see the available variables
pxmd5 <- pxweb_get(url5)
           
# Printing the metadata to understand its structure
#print(pxmd5)
           
# Extracting valid region codes from metadata
valid_regions <- pxmd5$variables[[1]]$values  
           
# Defining the query for SCB- unemployment data using pxweb_query
query <- pxweb_query(list(
"Region" = valid_regions,  # Use only DeSO region codes
"ContentsCode" = c("000006T4","000006T5","000006T6","000006T7"),  # Specific code for the dataset  (Kvartiler 1,2,3 and 4, andel personer, procent)
"Tid" = c("2011", "2015", "2020", "2023")))   # Specific period


# Validating the query with the metadata
 pxweb_validate_query_with_metadata(query, pxmd5)


# Fetching the data from SCB API
data5 <- pxweb_get(
url = url5, 
query = query
)
  Downloading large query (in 2 batches):

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================================================| 100%
# Checking if data is fetched correctly
if (is.null(url5)) {
stop("Data fetching failed. Please check the query and URL2.")
}
           
 # Converting the data to a dataframe for easier handling
InkKvart_long<- as.data.frame(data5)
Warning in pxweb_as_data_frame.pxweb_data(x, row.names = row.names, optional =
optional, : NAs introduced by coercion
Warning in pxweb_as_data_frame.pxweb_data(x, row.names = row.names, optional =
optional, : NAs introduced by coercion
Warning in pxweb_as_data_frame.pxweb_data(x, row.names = row.names, optional =
optional, : NAs introduced by coercion
Warning in pxweb_as_data_frame.pxweb_data(x, row.names = row.names, optional =
optional, : NAs introduced by coercion
# Renaminging 'region' column to 'deso' to avoid later mismatch
InkKvart_long  <- InkKvart_long  %>%
  rename(deso = region)

# Printing the first few rows of the data
#head(InkKvart_long)
Step 3: Linking DeSO with geographic coordinates and SCB data

Spatial join of DeSO and Bamse coordinates

In this step, spatial joining of geographical coordinates and Sweden DeSO was done to have both in the sabe table.

Joining the spatially joined DeSO and Bamse coordinates with SEI

Note, as a reminde, Nettoink_long was used and not the pivoted Nettoink_wide

#|echo: false
#|eval: true
#|warning: false

#names(Bamse_coord_DeSO)
names(Nettoink_long)
[1] "deso"                         "inkomstkomponent"            
[3] "kön"                          "år"                          
[5] "Medelvärde för samtliga, tkr"
names(Ejförv_long)
[1] "deso"           "sysselsättning" "kön"            "år"            
[5] "Befolkningen"  
names(Utbild_long)
[1] "deso"            "utbildningsnivå" "år"              "Befolkning"     
names(InkKvart_long)
[1] "deso"                               "år"                                
[3] "Kvartil 1, andel personer, procent" "Kvartil 2, andel personer, procent"
[5] "Kvartil 3, andel personer, procent" "Kvartil 4, andel personer, procent"
# Joining all

Bamse_Deso_coord_SEI <- left_join(Bamse_coord_DeSO, Nettoink_long %>% select(deso, everything()), by = "deso")

Bamse_Deso_coord_SEI <- left_join(Bamse_Deso_coord_SEI, Ejförv_long %>% select(deso, everything()), by = "deso")

Bamse_Deso_coord_SEI <- left_join(Bamse_Deso_coord_SEI, Utbild_long %>% select(deso, everything()), by = "deso")

Bamse_Deso_coord_SEI <- left_join(Bamse_Deso_coord_SEI, InkKvart_long %>% select(deso, everything()), by = "deso")


#Saving the merged result to rda, or CSV  or JSON
#save(Bamse_Deso_coord_SEI, file = "Bamse_Deso_coord_SEI.rda")
# JSON
data <- as.data.frame(Bamse_Deso_coord_SEI)
data[] <- lapply(data, function(x) if (is.list(x)) sapply(x, paste, collapse = ",") else x)

write.csv(data, "Bamse_Deso_coord_SEI", row.names = FALSE)
#write_json(Bamse_Deso_coord_SEI, "Bamse_Deso_coord_SEI.json", pretty = TRUE)

#head(Bamse_Deso_coord_SEI)

Encrypting document and publishing it