This document provides the full replication code for the article:
Regional Resilience through Resilient Neighbourhoods? Measuring and assessing resilience using grid-level data in the Warsaw region
The scripts reproduce the construction of the Local Industrial Resilience Index (LIRI), a grid-level measure of economic resilience based on firm survival, firm births, and the industrial structure of local economies. The workflow follows the methodological steps described in Section 3 of the paper and includes:
library(here) # for reproducible file paths
library(dplyr) # for data manipulation (filtering, aggregating, shares, etc.)
library(tidyr) # for data reshaping (long-wide transformations)
library(sf) # for sf class and handling spatial data
library(ggplot2) # for plotting in sf class
library(units) # for consistent handling of distance units
library(stringr) # for cleaning and parsing text data (firm identifiers, sector codes)
library(purrr) # for functional programming (iterative calculations across sectors and grids)
library(BBmisc) # for scaling and normalization
library(DescTools) # for PseudoR2() to get R2 McFadden in probits
library(texreg) # for screenreg() to make synthetic table with results
# 2x2 km grid for mazowieckie with population count
grids_maz <- st_read(here("data", "grid_mazowieckie_2km", "population_mazowieckie_2km_grid_2180.shp"))
## Reading layer `population_mazowieckie_2km_grid_2180' from data source
## `/Users/monikakot/Documents/Nauka/Tandemy/LIRI R/data/grid_mazowieckie_2km/population_mazowieckie_2km_grid_2180.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 9320 features and 2 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 517613.9 ymin: 352474.6 xmax: 781613.9 ymax: 628474.6
## Projected CRS: ETRF2000-PL / CS92
# Shapefile for Mazowieckie region
voivodeships <- st_read(here("data", "voivodeships_poviats_shp", "wojewodztwa.shp"))
## Reading layer `wojewodztwa' from data source
## `/Users/monikakot/Documents/Nauka/Tandemy/LIRI R/data/voivodeships_poviats_shp/wojewodztwa.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 16 features and 29 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 171677.6 ymin: 133223.7 xmax: 861895.7 ymax: 774923.7
## Projected CRS: ETRS89 / Poland CS92
voivodeships$jpt_nazwa_ <- iconv(voivodeships$jpt_nazwa_, "latin1", "UTF-8")
voivodeships <- st_transform(voivodeships, 4326)
mazowieckie <- voivodeships %>% filter(jpt_nazwa_ == "mazowieckie")
# Firms in 2012
firms_2012 <- st_read(here("data", "regon_clean_match", "regon_2012_rematched_clean.shp"))
## Reading layer `regon_2012_rematched_clean' from data source
## `/Users/monikakot/Documents/Nauka/Tandemy/LIRI R/data/regon_clean_match/regon_2012_rematched_clean.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 989092 features and 8 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 19.26446 ymin: 51.01471 xmax: 23.11206 ymax: 53.47828
## Geodetic CRS: WGS 84
firms_2012 <- firms_2012 %>%
mutate(TECH = ifelse(is.na(TECH), "non-tech", TECH)) # adding labels to non-tech firms
nrow(firms_2012) # around 989,000 firms in 2012
## [1] 989092
# Firms in 2021
firms_2021 <- st_read(here("data", "regon_clean_match","regon_2021_rematched_clean.shp"))
## Reading layer `regon_2021_rematched_clean' from data source
## `/Users/monikakot/Documents/Nauka/Tandemy/LIRI R/data/regon_clean_match/regon_2021_rematched_clean.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1129717 features and 8 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 19.26446 ymin: 51.01471 xmax: 23.11206 ymax: 53.47828
## Geodetic CRS: WGS 84
firms_2021 <- firms_2021 %>%
mutate(TECH = ifelse(is.na(TECH), "non-tech", TECH)) # adding labels to non-tech firms
nrow(firms_2021) # around 1,130,000 firms in 2021
## [1] 1129717
# Inspecting the grids
summary(grids_maz)
## ID population geometry
## Min. : 1 Min. : 0.0 POLYGON :9320
## 1st Qu.:2331 1st Qu.: 77.0 epsg:2180 : 0
## Median :4660 Median : 148.0 +proj=tmer...: 0
## Mean :4660 Mean : 590.2
## 3rd Qu.:6990 3rd Qu.: 271.0
## Max. :9320 Max. :51587.0
# We have 9320 grids
# We have up to 51 587 people per grid
# Plot for 2x2 km² grids for mazowieckie
ggplot() +
geom_sf(data = grids_maz, crs = 4326) +
geom_sf(data = mazowieckie, fill = NA, color = "red") +
theme_minimal() +
ggtitle("2x2 km² grids") +
theme(plot.title = element_text(hjust = 0.5, size = 12))
As a first step we analyze the structure of each cell in 2012. We calculate shares of firms from specific sectors and their co-occurences to get some information about the specialization, diversity and related variety at the grid level (2x2 km)
Here we calculate:
# Changing to prjected CRS (2180 for Poland)
firms_2012 <- st_transform(firms_2012, 2180)
grids_maz <- st_transform(grids_maz, 2180)
# Checking if the geometries of grids and firms from 2012 match
st_crs(grids_maz) == st_crs(firms_2012) # TRUE - it matches
## [1] TRUE
# Checking if the grid is valid
table(st_is_valid(grids_maz)) # all TRUE
##
## TRUE
## 9320
glimpse(grids_maz)
## Rows: 9,320
## Columns: 3
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, …
## $ population <int> 0, 118, 251, 77, 3, 0, 9, 104, 202, 18, 10, 4, 43, 5, 32, 2…
## $ geometry <POLYGON [m]> POLYGON ((667613.9 352474.6..., POLYGON ((669613.9 …
nrow(grids_maz) # 9 320 grids
## [1] 9320
nrow(firms_2012) # 989 092 firms
## [1] 989092
# Assigning each firm to ONE grid (nearest grid - distance is 0 when inside)
idx <- st_nearest_feature(firms_2012, grids_maz)
# For each firm adding the ID of a grid they lay in
firms_2012_to_grids <- firms_2012 %>%
st_drop_geometry() %>% # dropping geometries for faster calculations
mutate(ID = grids_maz$ID[idx])
# All possible grid IDs, sectors and firm sizes
grid_ids <- sort(unique(grids_maz$ID))
sectors <- sort(unique(firms_2012$SEK_PKD7))
# Calculating number of firms per cell (total and by sector)
grids_baseline_2012 <- firms_2012_to_grids %>%
count(ID, SEK_PKD7, name = "n") %>% # number of firms in grids by sectors
complete(ID = grid_ids,
SEK_PKD7 = sectors,
fill = list(n = 0L)) %>% # adding 0 for empty grids and for not filled sectors
pivot_wider(names_from = SEK_PKD7,
values_from = n,
values_fill = 0,
names_prefix = "firms_") %>%
mutate(firms_total = rowSums(across(starts_with("firms_")))) %>%
mutate(population = grids_maz$population) %>%
relocate(ID, population, firms_total) %>%
glimpse()
## Rows: 9,320
## Columns: 24
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ population <int> 0, 118, 251, 77, 3, 0, 9, 104, 202, 18, 10, 4, 43, 5, 32, …
## $ firms_total <dbl> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, 12, 11,…
## $ firms_A <int> 0, 4, 31, 8, 0, 0, 1, 15, 54, 0, 0, 0, 1, 0, 0, 0, 12, 10,…
## $ firms_B <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ firms_C <int> 0, 1, 2, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ firms_D <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ firms_E <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ firms_F <int> 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3…
## $ firms_G <int> 0, 1, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 4…
## $ firms_H <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2…
## $ firms_I <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ firms_J <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ firms_K <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ firms_L <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ firms_M <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ firms_N <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ firms_O <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ firms_P <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ firms_Q <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ firms_R <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ firms_S <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ firms_T <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ firms_U <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
# Calculating number of tech firms in each grid
tech_counts_per_grid <- firms_2012_to_grids %>%
count(ID, IF_TECH, name = "n") %>% # number of firms in grids by sectors
complete(ID = grid_ids,
IF_TECH = 0:1,
fill = list(n = 0L)) %>%
mutate(label = dplyr::case_when(
IF_TECH == 1 ~ "firms_tech",
IF_TECH == 0 ~ "firms_non_tech")) %>%
dplyr::select(ID, label, n) %>%
pivot_wider(names_from = label,
values_from = n,
values_fill = 0) %>%
glimpse()
## Rows: 9,320
## Columns: 3
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, …
## $ firms_non_tech <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, 12, …
## $ firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
# Calculating number of firms in different sizes in each grid
firm_sizes <- sort(unique(firms_2012$LPRAC))
size_counts_per_grid <- firms_2012_to_grids %>%
count(ID, LPRAC, name = "n") %>% # number of firms in grids by sectors
complete(ID = grid_ids,
LPRAC = firm_sizes,
fill = list(n = 0L)) %>%
mutate(label = dplyr::case_when(
LPRAC == 1 ~ "firms_size_0_9",
LPRAC == 2 ~ "firms_size_10_49",
LPRAC == 3 ~ "firms_size_50_249",
LPRAC == 4 ~ "firms_size_250_999",
LPRAC == 5 ~ "firms_size_1000")) %>%
dplyr::select(ID, label, n) %>%
pivot_wider(names_from = label,
values_from = n,
values_fill = 0) %>%
glimpse()
## Rows: 9,320
## Columns: 6
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
## $ firms_size_0_9 <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_size_10_49 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_50_249 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_250_999 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_1000 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
# Adding tech counts to the baseline
grids_baseline_2_2012 <- grids_baseline_2012 %>%
cbind(tech_counts_per_grid[, c(2:3)]) %>%
cbind(size_counts_per_grid[, c(2:6)]) %>%
glimpse()
## Rows: 9,320
## Columns: 31
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
## $ population <int> 0, 118, 251, 77, 3, 0, 9, 104, 202, 18, 10, 4, 43, …
## $ firms_total <dbl> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_A <int> 0, 4, 31, 8, 0, 0, 1, 15, 54, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_B <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_C <int> 0, 1, 2, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_D <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_E <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_F <int> 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_G <int> 0, 1, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_H <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_I <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_J <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_K <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_L <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_M <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_N <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_O <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_P <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_Q <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_R <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_S <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_T <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_U <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_non_tech <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_0_9 <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_size_10_49 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_50_249 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_250_999 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_1000 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
# Checking if the number of firms in rows matches the total number of firms
dim(firms_2012)[1] == sum(grids_baseline_2_2012$firms_total) # TRUE
## [1] TRUE
sum(grids_baseline_2_2012$firms_non_tech) + sum(grids_baseline_2_2012$firms_tech) == sum(grids_baseline_2_2012$firms_total) # TRUE
## [1] TRUE
# Quick check - how many empty grids do we have?
dim(grids_baseline_2_2012 %>% filter(firms_total == 0))[1] # 712 grids are empty
## [1] 712
# 712 / 9 320 = 7.5%
Here we calculate diversity and specialization metrics at grid level, specifically:
Shannon's entropy (internal diversity): tells us how
balanced is a grid’s internal mix of sectors
Krugman dissimilarity index (external vs internal
diversity): tells us how different is a grid’s sector mix from the
region’s overall mix
Sectoral HHI (grids’ regional dominance profile): tells
us in how many sectors is this cell a regional hub (hosting a big slice
of sector activity)
sector_count_cols <- paste0("firms_", LETTERS[1:21]) # column names
K <- length(sector_count_cols) # number of sectors
# region totals and shares (used by Krugman + external HHI)
sector_totals <- colSums(grids_baseline_2_2012[, sector_count_cols])
regional_shares <- sector_totals / sum(sector_totals)
# name to match 'share_' columns later
names(regional_shares) <- gsub("^firms_", "share_", names(regional_shares))
grids_baseline_3_2012 <- grids_baseline_2_2012 %>%
# --- differentiating between empty filled grids ---
mutate(has_firms = firms_total > 0) %>%
# --- sectoral shares for Shannons entropy ---
mutate(across(all_of(sector_count_cols), # calculating sectoral shares
~ ifelse(has_firms, .x / firms_total, 0), # if there are firms in grid calculate, otherwise do not
.names = "share_{.col}")) %>%
rename_with(~ sub("^share_firms_", "share_", .x), starts_with("share_firms_")) %>%
# --- Shannon entropy (within-grid diversity) ---
mutate(H = if_else(has_firms, # Shannon entropy, NA if grid has no firms
-rowSums(across(starts_with("share_"),
~ if_else(.x > 0, .x * log(.x), 0.0))),
NA)) %>%
# --- Krugman dissimilarity index vs region ---
mutate(KDI = if_else(has_firms,
rowSums(across(starts_with("share_"), ~ abs(.x - regional_shares[cur_column()]))),
NA)) %>%
# --- External HHI (regional dominance) ---
# Sector-wise grid shares (each sector's shares sum to 1 across grids)
mutate(across(all_of(sector_count_cols),
~ {d <- sector_totals[[cur_column()]]
if (is.na(d) || d == 0) 0 else .x / d },
.names = "domshare_{.col}")) %>%
# Sum of squares over sectors
mutate(HHI = if_else(has_firms,
rowSums(across(starts_with("domshare_"), ~ .x * .x)),
NA),
HHI_z = as.numeric(scale(HHI))) %>%
glimpse()
## Rows: 9,320
## Columns: 78
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
## $ population <int> 0, 118, 251, 77, 3, 0, 9, 104, 202, 18, 10, 4, 43, …
## $ firms_total <dbl> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_A <int> 0, 4, 31, 8, 0, 0, 1, 15, 54, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_B <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_C <int> 0, 1, 2, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_D <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_E <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_F <int> 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_G <int> 0, 1, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_H <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_I <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_J <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_K <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_L <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_M <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_N <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_O <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_P <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_Q <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_R <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_S <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_T <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_U <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_non_tech <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_0_9 <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_size_10_49 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_50_249 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_250_999 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_1000 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ has_firms <lgl> FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, T…
## $ share_A <dbl> 0.0000000, 0.5714286, 0.8611111, 1.0000000, 0.00000…
## $ share_B <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_C <dbl> 0.00000000, 0.14285714, 0.05555556, 0.00000000, 1.0…
## $ share_D <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_E <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_F <dbl> 0.00000000, 0.00000000, 0.02777778, 0.00000000, 0.0…
## $ share_G <dbl> 0.00000000, 0.14285714, 0.05555556, 0.00000000, 0.0…
## $ share_H <dbl> 0.00000000, 0.14285714, 0.00000000, 0.00000000, 0.0…
## $ share_I <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.0…
## $ share_J <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_K <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.0…
## $ share_L <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_M <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_N <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_O <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_P <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_Q <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_R <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_S <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_T <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_U <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ H <dbl> NA, 1.1537419, 0.5494580, 0.0000000, 0.0000000, NA,…
## $ KDI <dbl> NA, 0.9850775, 1.2032836, 1.4810614, 1.8807148, NA,…
## $ domshare_firms_A <dbl> 0.000000000000, 0.000015586096, 0.000120792241, 0.0…
## $ domshare_firms_B <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ domshare_firms_C <dbl> 0.00000000000, 0.00001695145, 0.00003390290, 0.0000…
## $ domshare_firms_D <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ domshare_firms_E <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ domshare_firms_F <dbl> 0.00000000000, 0.00000000000, 0.00001393573, 0.0000…
## $ domshare_firms_G <dbl> 0.000000000000, 0.000005201966, 0.000010403933, 0.0…
## $ domshare_firms_H <dbl> 0.00000000000, 0.00002222420, 0.00000000000, 0.0000…
## $ domshare_firms_I <dbl> 0.00000000000, 0.00000000000, 0.00000000000, 0.0000…
## $ domshare_firms_J <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ domshare_firms_K <dbl> 0.00000000000, 0.00000000000, 0.00000000000, 0.0000…
## $ domshare_firms_L <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ domshare_firms_M <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ domshare_firms_N <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ domshare_firms_O <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ domshare_firms_P <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ domshare_firms_Q <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ domshare_firms_R <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ domshare_firms_S <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ domshare_firms_T <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ domshare_firms_U <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ HHI <dbl> NA, 0.00000000105125349, 0.00000001604261866, 0.000…
## $ HHI_z <dbl> NA, -0.03474617, -0.03473713, -0.03474622, -0.03474…
# We only keep the columns that are needed later on
grids_baseline_4_2012 <- grids_baseline_3_2012 %>%
dplyr::select(colnames(grids_baseline_3_2012)[c(1:55, 77)]) %>%
glimpse()
## Rows: 9,320
## Columns: 56
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
## $ population <int> 0, 118, 251, 77, 3, 0, 9, 104, 202, 18, 10, 4, 43, …
## $ firms_total <dbl> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_A <int> 0, 4, 31, 8, 0, 0, 1, 15, 54, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_B <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_C <int> 0, 1, 2, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_D <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_E <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_F <int> 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_G <int> 0, 1, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_H <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_I <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_J <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_K <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_L <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_M <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_N <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_O <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_P <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_Q <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_R <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_S <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_T <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_U <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_non_tech <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_0_9 <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_size_10_49 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_50_249 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_250_999 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_1000 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ has_firms <lgl> FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, T…
## $ share_A <dbl> 0.0000000, 0.5714286, 0.8611111, 1.0000000, 0.00000…
## $ share_B <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_C <dbl> 0.00000000, 0.14285714, 0.05555556, 0.00000000, 1.0…
## $ share_D <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_E <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_F <dbl> 0.00000000, 0.00000000, 0.02777778, 0.00000000, 0.0…
## $ share_G <dbl> 0.00000000, 0.14285714, 0.05555556, 0.00000000, 0.0…
## $ share_H <dbl> 0.00000000, 0.14285714, 0.00000000, 0.00000000, 0.0…
## $ share_I <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.0…
## $ share_J <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_K <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.0…
## $ share_L <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_M <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_N <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_O <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_P <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_Q <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_R <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_S <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_T <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_U <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ H <dbl> NA, 1.1537419, 0.5494580, 0.0000000, 0.0000000, NA,…
## $ KDI <dbl> NA, 0.9850775, 1.2032836, 1.4810614, 1.8807148, NA,…
## $ HHI <dbl> NA, 0.00000000105125349, 0.00000001604261866, 0.000…
grids_baseline_5_2012 <- grids_baseline_5_2012 %>%
mutate(HHI_n = normalize(HHI, method = "range", range = c(0, 1), margin = 2),
HHI_z = as.numeric(scale(HHI)),
H_n = normalize(H, method = "range", range = c(0, 1), margin = 2),
H_z = as.numeric(scale(H)),
KDI_n = normalize(KDI, method = "range", range = c(0, 1), margin = 2),
KDI_z = as.numeric(scale(KDI)),
RV_n = normalize(RV, method = "range", range = c(0, 1), margin = 2),
RV_z = as.numeric(scale(RV))) %>%
glimpse()
## Rows: 9,320
## Columns: 65
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
## $ population <int> 0, 118, 251, 77, 3, 0, 9, 104, 202, 18, 10, 4, 43, …
## $ firms_total <dbl> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_A <int> 0, 4, 31, 8, 0, 0, 1, 15, 54, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_B <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_C <int> 0, 1, 2, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_D <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_E <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_F <int> 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_G <int> 0, 1, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_H <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_I <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_J <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_K <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_L <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_M <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_N <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_O <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_P <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_Q <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_R <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_S <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_T <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_U <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_non_tech <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_0_9 <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_size_10_49 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_50_249 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_250_999 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_1000 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ has_firms <lgl> FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, T…
## $ share_A <dbl> 0.0000000, 0.5714286, 0.8611111, 1.0000000, 0.00000…
## $ share_B <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_C <dbl> 0.00000000, 0.14285714, 0.05555556, 0.00000000, 1.0…
## $ share_D <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_E <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_F <dbl> 0.00000000, 0.00000000, 0.02777778, 0.00000000, 0.0…
## $ share_G <dbl> 0.00000000, 0.14285714, 0.05555556, 0.00000000, 0.0…
## $ share_H <dbl> 0.00000000, 0.14285714, 0.00000000, 0.00000000, 0.0…
## $ share_I <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.0…
## $ share_J <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_K <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.0…
## $ share_L <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_M <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_N <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_O <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_P <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_Q <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_R <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_S <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_T <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_U <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ H <dbl> NA, 1.1537419, 0.5494580, 0.0000000, 0.0000000, NA,…
## $ KDI <dbl> NA, 0.9850775, 1.2032836, 1.4810614, 1.8807148, NA,…
## $ HHI <dbl> NA, 0.00000000105125349, 0.00000001604261866, 0.000…
## $ RV <dbl> NA, 0.018134908, 0.005218286, 0.000000000, 0.000000…
## $ HHI_n <dbl> NA, 0.0000000124085269, 0.0000001919530095, 0.00000…
## $ HHI_z <dbl> NA, -0.03474617, -0.03473713, -0.03474622, -0.03474…
## $ H_n <dbl> NA, 0.4463194, 0.2125551, 0.0000000, 0.0000000, NA,…
## $ H_z <dbl> NA, 0.37914484, -0.53831394, -1.37253294, -1.372532…
## $ KDI_n <dbl> NA, 0.4456040, 0.5662057, 0.7197325, 0.9406194, NA,…
## $ KDI_z <dbl> NA, -0.3082596, 0.3814051, 1.2593526, 2.5225016, NA…
## $ RV_n <dbl> NA, 0.09936488, 0.02859206, 0.00000000, 0.00000000,…
## $ RV_z <dbl> NA, 0.04314414, -0.52595342, -0.75586755, -0.755867…
Original variables:
summary(grids_baseline_5_2012 %>% dplyr::select(H, KDI, HHI, RV))
## H KDI HHI RV
## Min. :0.0000 Min. :0.1788 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.4359 1st Qu.:0.9160 1st Qu.:0.00000 1st Qu.:0.00426
## Median :0.7730 Median :1.1468 Median :0.00000 Median :0.00871
## Mean :0.9040 Mean :1.0826 Mean :0.00006 Mean :0.01716
## 3rd Qu.:1.2511 3rd Qu.:1.3201 3rd Qu.:0.00000 3rd Qu.:0.01866
## Max. :2.5850 Max. :1.9882 Max. :0.08350 Max. :0.18251
## NA's :712 NA's :712 NA's :712 NA's :712
Normalised variables ([0,1]):
summary(grids_baseline_5_2012 %>% dplyr::select(H_n, KDI_n, HHI_n, RV_n))
## H_n KDI_n HHI_n RV_n
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.1686 1st Qu.:0.4074 1st Qu.:0.00000 1st Qu.:0.02331
## Median :0.2990 Median :0.5350 Median :0.00000 Median :0.04775
## Mean :0.3497 Mean :0.4995 Mean :0.00069 Mean :0.09400
## 3rd Qu.:0.4840 3rd Qu.:0.6308 3rd Qu.:0.00000 3rd Qu.:0.10223
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## NA's :712 NA's :712 NA's :712 NA's :712
Standardised variables (z-score):
summary(grids_baseline_5_2012 %>% dplyr::select(H_z, KDI_z, HHI_z, RV_z))
## H_z KDI_z HHI_z RV_z
## Min. :-1.3725 Min. :-2.8565 Min. :-0.03475 Min. :-0.75587
## 1st Qu.:-0.7108 1st Qu.:-0.5265 1st Qu.:-0.03474 1st Qu.:-0.56839
## Median :-0.1989 Median : 0.2030 Median :-0.03473 Median :-0.37190
## Mean : 0.0000 Mean : 0.0000 Mean : 0.00000 Mean : 0.00000
## 3rd Qu.: 0.5270 3rd Qu.: 0.7508 3rd Qu.:-0.03467 3rd Qu.: 0.06615
## Max. : 2.5522 Max. : 2.8621 Max. :50.28453 Max. : 7.28532
## NA's :712 NA's :712 NA's :712 NA's :712
# How many grids have no firms in them?
dim(grids_baseline_5_2012 %>% filter(has_firms == F))[1] # 712 have no firms in them
## [1] 712
dim(grids_baseline_5_2012 %>% filter(has_firms == F))[1]/dim(grids_baseline_5_2012)[1] # 7.6%
## [1] 0.07639485
In this step we calculate the outcome in each cell, meaning how each cell change firm-wise: how many firms exited and how many entered.
surv_exit_by_grid <- firms_2012_to_grids %>%
count(ID, SURVIVOR, name = "n") %>%
complete(ID = grid_ids,
fill = list(n = 0L)) %>%
pivot_wider(names_from = SURVIVOR,
values_from = n,
values_fill = 0,
names_prefix = "n_") %>%
transmute(ID,
exits = n_0,
survivors = n_1)
surv_exit_by_grid <- firms_2012_to_grids %>%
transmute(ID,
SURVIVOR = as.integer(SURVIVOR),
IF_TECH = as.integer(dplyr::coalesce(IF_TECH, 0L))) %>%
count(ID, SURVIVOR, IF_TECH, name = "n") %>%
complete(ID = grid_ids, # 4 combinations for each grid (1/1, 1/0, 0/1, 0/0)
SURVIVOR = 0:1,
IF_TECH = 0:1,
fill = list(n = 0L)) %>%
mutate(label = dplyr::case_when(
SURVIVOR == 1 & IF_TECH == 1 ~ "survivors_tech",
SURVIVOR == 1 & IF_TECH == 0 ~ "survivors_non_tech",
SURVIVOR == 0 & IF_TECH == 1 ~ "exits_tech",
SURVIVOR == 0 & IF_TECH == 0 ~ "exits_non_tech")) %>%
dplyr::select(ID, label, n) %>%
pivot_wider(names_from = label,
values_from = n,
values_fill = 0) %>%
mutate(survivors_total = dplyr::coalesce(survivors_tech, 0L) + dplyr::coalesce(survivors_non_tech, 0L),
exits_total = dplyr::coalesce(exits_tech, 0L) + dplyr::coalesce(exits_non_tech, 0L)) %>%
glimpse()
## Rows: 9,320
## Columns: 7
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
## $ exits_non_tech <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ exits_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_non_tech <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, 0, …
## $ survivors_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_total <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, 0, …
## $ exits_total <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
# Adding survivrs and exits counts to baseline features
grids_outcomes <- cbind(grids_baseline_5_2012, surv_exit_by_grid[, 2:7]) %>% glimpse()
## Rows: 9,320
## Columns: 71
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
## $ population <int> 0, 118, 251, 77, 3, 0, 9, 104, 202, 18, 10, 4, 43, …
## $ firms_total <dbl> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_A <int> 0, 4, 31, 8, 0, 0, 1, 15, 54, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_B <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_C <int> 0, 1, 2, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_D <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_E <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_F <int> 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_G <int> 0, 1, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_H <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_I <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_J <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_K <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_L <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_M <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_N <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_O <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_P <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_Q <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_R <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_S <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_T <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_U <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_non_tech <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_0_9 <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_size_10_49 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_50_249 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_250_999 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_1000 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ has_firms <lgl> FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, T…
## $ share_A <dbl> 0.0000000, 0.5714286, 0.8611111, 1.0000000, 0.00000…
## $ share_B <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_C <dbl> 0.00000000, 0.14285714, 0.05555556, 0.00000000, 1.0…
## $ share_D <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_E <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_F <dbl> 0.00000000, 0.00000000, 0.02777778, 0.00000000, 0.0…
## $ share_G <dbl> 0.00000000, 0.14285714, 0.05555556, 0.00000000, 0.0…
## $ share_H <dbl> 0.00000000, 0.14285714, 0.00000000, 0.00000000, 0.0…
## $ share_I <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.0…
## $ share_J <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_K <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.0…
## $ share_L <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_M <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_N <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_O <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_P <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_Q <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_R <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_S <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_T <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_U <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ H <dbl> NA, 1.1537419, 0.5494580, 0.0000000, 0.0000000, NA,…
## $ KDI <dbl> NA, 0.9850775, 1.2032836, 1.4810614, 1.8807148, NA,…
## $ HHI <dbl> NA, 0.00000000105125349, 0.00000001604261866, 0.000…
## $ RV <dbl> NA, 0.018134908, 0.005218286, 0.000000000, 0.000000…
## $ HHI_n <dbl> NA, 0.0000000124085269, 0.0000001919530095, 0.00000…
## $ HHI_z <dbl> NA, -0.03474617, -0.03473713, -0.03474622, -0.03474…
## $ H_n <dbl> NA, 0.4463194, 0.2125551, 0.0000000, 0.0000000, NA,…
## $ H_z <dbl> NA, 0.37914484, -0.53831394, -1.37253294, -1.372532…
## $ KDI_n <dbl> NA, 0.4456040, 0.5662057, 0.7197325, 0.9406194, NA,…
## $ KDI_z <dbl> NA, -0.3082596, 0.3814051, 1.2593526, 2.5225016, NA…
## $ RV_n <dbl> NA, 0.09936488, 0.02859206, 0.00000000, 0.00000000,…
## $ RV_z <dbl> NA, 0.04314414, -0.52595342, -0.75586755, -0.755867…
## $ exits_non_tech <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ exits_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_non_tech <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, 0, …
## $ survivors_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_total <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, 0, …
## $ exits_total <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
# Quick check if the values match (survivors + exits = total firms in grid) - all TRUE
table(grids_outcomes$survivors_total + grids_outcomes$exits_total == grids_outcomes$firms_total)
##
## TRUE
## 9320
table(grids_outcomes$survivors_non_tech + grids_outcomes$survivors_tech == grids_outcomes$survivors_total)
##
## TRUE
## 9320
table(grids_outcomes$exits_non_tech + grids_outcomes$exits_tech == grids_outcomes$exits_total)
##
## TRUE
## 9320
# Changing to projected CRS for 2021 subset (2180 for Poland)
firms_2021 <- st_transform(firms_2021, 2180)
# Checking if the geometries of grids and new firms (from 2021) match
st_crs(grids_maz) == st_crs(firms_2021) # TRUE - it matches
## [1] TRUE
# Assigning each firm to ONE grid (nearest grid - distance is 0 when inside)
idx_2 <- st_nearest_feature(firms_2021, grids_maz)
# For each firm adding the ID of a grid they lay in
firms_2021_to_grids <- firms_2021 %>%
st_drop_geometry() %>% # dropping geometries for faster calculations
mutate(ID = grids_maz$ID[idx_2])
# Counting new firms per grid
new_firms_by_grid <- firms_2021_to_grids %>%
transmute(ID,
SURVIVOR = as.integer(SURVIVOR),
IF_TECH = as.integer(dplyr::coalesce(IF_TECH, 0L))) %>%
count(ID, SURVIVOR, IF_TECH, name = "n") %>%
complete(ID = grid_ids, # 4 combinations for each grid (1/1, 1/0, 0/1, 0/0)
SURVIVOR = 0:1,
IF_TECH = 0:1,
fill = list(n = 0L)) %>%
mutate(label = dplyr::case_when(
SURVIVOR == 1 & IF_TECH == 1 ~ "survivors_tech",
SURVIVOR == 1 & IF_TECH == 0 ~ "survivors_non_tech",
SURVIVOR == 0 & IF_TECH == 1 ~ "new_firms_tech",
SURVIVOR == 0 & IF_TECH == 0 ~ "new_firms_non_tech")) %>%
dplyr::select(ID, label, n) %>%
pivot_wider(names_from = label,
values_from = n,
values_fill = 0) %>%
mutate(new_firms_total = dplyr::coalesce(new_firms_tech, 0L) + dplyr::coalesce(new_firms_non_tech, 0L)) %>%
glimpse()
## Rows: 9,320
## Columns: 6
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
## $ new_firms_non_tech <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0, 0, 0, 3,…
## $ new_firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_non_tech <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, 0, …
## $ survivors_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ new_firms_total <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0, 0, 0, 3,…
# Calculating firm births in each sector
sector_letters <- LETTERS[1:21]
births_grid_sector_2021 <- firms_2021_to_grids %>%
filter(SURVIVOR == 0L) %>%
transmute(ID, SEK_PKD7) %>%
count(ID, SEK_PKD7, name = "n") %>%
complete(ID = grid_ids, # 21 combinations for each grid
SEK_PKD7 = sector_letters,
fill = list(n = 0L)) %>%
pivot_wider(names_from = SEK_PKD7,
values_from = n,
names_prefix = "births_") %>%
glimpse()
## Rows: 9,320
## Columns: 22
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18…
## $ births_A <int> 0, 1, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1…
## $ births_B <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ births_C <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ births_D <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ births_E <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ births_F <int> 0, 0, 1, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0…
## $ births_G <int> 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ births_H <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ births_I <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ births_J <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ births_K <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ births_L <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ births_M <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ births_N <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ births_O <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ births_P <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ births_Q <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ births_R <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ births_S <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0…
## $ births_T <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ births_U <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
# Adding firm births to the dataset
grids_outcomes_2 <- grids_outcomes %>%
cbind(new_firms_by_grid[, c(2, 3, 6)],
births_grid_sector_2021[, c(2:22)]) %>%
glimpse()
## Rows: 9,320
## Columns: 95
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
## $ population <int> 0, 118, 251, 77, 3, 0, 9, 104, 202, 18, 10, 4, 43, …
## $ firms_total <dbl> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_A <int> 0, 4, 31, 8, 0, 0, 1, 15, 54, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_B <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_C <int> 0, 1, 2, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_D <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_E <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_F <int> 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_G <int> 0, 1, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_H <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_I <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_J <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_K <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_L <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_M <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_N <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_O <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_P <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_Q <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_R <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_S <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_T <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_U <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_non_tech <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_0_9 <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_size_10_49 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_50_249 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_250_999 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_1000 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ has_firms <lgl> FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, T…
## $ share_A <dbl> 0.0000000, 0.5714286, 0.8611111, 1.0000000, 0.00000…
## $ share_B <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_C <dbl> 0.00000000, 0.14285714, 0.05555556, 0.00000000, 1.0…
## $ share_D <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_E <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_F <dbl> 0.00000000, 0.00000000, 0.02777778, 0.00000000, 0.0…
## $ share_G <dbl> 0.00000000, 0.14285714, 0.05555556, 0.00000000, 0.0…
## $ share_H <dbl> 0.00000000, 0.14285714, 0.00000000, 0.00000000, 0.0…
## $ share_I <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.0…
## $ share_J <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_K <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.0…
## $ share_L <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_M <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_N <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_O <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_P <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_Q <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_R <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_S <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_T <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ share_U <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ H <dbl> NA, 1.1537419, 0.5494580, 0.0000000, 0.0000000, NA,…
## $ KDI <dbl> NA, 0.9850775, 1.2032836, 1.4810614, 1.8807148, NA,…
## $ HHI <dbl> NA, 0.00000000105125349, 0.00000001604261866, 0.000…
## $ RV <dbl> NA, 0.018134908, 0.005218286, 0.000000000, 0.000000…
## $ HHI_n <dbl> NA, 0.0000000124085269, 0.0000001919530095, 0.00000…
## $ HHI_z <dbl> NA, -0.03474617, -0.03473713, -0.03474622, -0.03474…
## $ H_n <dbl> NA, 0.4463194, 0.2125551, 0.0000000, 0.0000000, NA,…
## $ H_z <dbl> NA, 0.37914484, -0.53831394, -1.37253294, -1.372532…
## $ KDI_n <dbl> NA, 0.4456040, 0.5662057, 0.7197325, 0.9406194, NA,…
## $ KDI_z <dbl> NA, -0.3082596, 0.3814051, 1.2593526, 2.5225016, NA…
## $ RV_n <dbl> NA, 0.09936488, 0.02859206, 0.00000000, 0.00000000,…
## $ RV_z <dbl> NA, 0.04314414, -0.52595342, -0.75586755, -0.755867…
## $ exits_non_tech <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ exits_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_non_tech <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, 0, …
## $ survivors_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_total <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, 0, …
## $ exits_total <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ new_firms_non_tech <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0, 0, 0, 3,…
## $ new_firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ new_firms_total <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0, 0, 0, 3,…
## $ births_A <int> 0, 1, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ births_B <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ births_C <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ births_D <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ births_E <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ births_F <int> 0, 0, 1, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ births_G <int> 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ births_H <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ births_I <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ births_J <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ births_K <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ births_L <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ births_M <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ births_N <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ births_O <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ births_P <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ births_Q <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ births_R <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ births_S <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ births_T <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ births_U <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
First we calculate grid-sector relatedness values. For each grid it’s a weighted average of how related candidate sector is to grid’s incumbent sectors k.
# Build matrices
P_inc <- as.matrix(dplyr::select(grids_outcomes_2, starts_with("share_"))) # Grids × sectoral shares
Births <- as.matrix(dplyr::select(grids_outcomes_2, starts_with("births_"))) # Grids x firm births
# Calculate relatedness matrix (similarily as for related variety calculated before, but now we also include the diagonals, as we care about relatedness of the same sector firms as well)
PMI_relatedness <- log(R)
max_relatedness <- max(PMI_relatedness)
phi_relatedness <- PMI_relatedness / max_relatedness
# R' = P_inc * phi
# Rprime tells us how well sector s “fits” the current structure of grid g,
# taking a weighted average of its relatedness to the sectors that are already present in g.
# High value ⇒ sector s is strongly connected to what’s already present in grid g.
# Low value ⇒ sector s is weakly related to the grid’s current structure.
Rprime <- P_inc %*% phi_relatedness
head(Rprime)
## firms_A firms_B firms_C firms_D firms_E firms_F
## [1,] 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## [2,] 0.01164657 0.03711688 0.03313269 0.04000872 0.03967672 0.02282728
## [3,] 0.01187857 0.01845826 0.01816916 0.01920331 0.01897797 0.01551354
## [4,] 0.01201129 0.01164896 0.01106021 0.01166859 0.01128125 0.01106004
## [5,] 0.01106021 0.07770913 0.09715810 0.08460415 0.08480694 0.04058568
## [6,] 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## firms_G firms_H firms_I firms_J firms_K firms_L
## [1,] 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## [2,] 0.02557106 0.03599143 0.03556797 0.03714234 0.03619545 0.03935129
## [3,] 0.01569806 0.01608934 0.01789354 0.01810903 0.01823532 0.01865785
## [4,] 0.01104901 0.01137164 0.01116949 0.01094269 0.01150760 0.01094467
## [5,] 0.03816067 0.05236921 0.07465614 0.07799137 0.07356317 0.08316689
## [6,] 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## firms_M firms_N firms_O firms_P firms_Q firms_R
## [1,] 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## [2,] 0.03272099 0.03229252 0.02771552 0.03530657 0.03586230 0.03718214
## [3,] 0.01688014 0.01725064 0.01664178 0.01812313 0.01802400 0.01829810
## [4,] 0.01064176 0.01133096 0.01188631 0.01137962 0.01116360 0.01118443
## [5,] 0.06769625 0.06507635 0.05066553 0.07351860 0.07499801 0.07746865
## [6,] 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## firms_S firms_T firms_U
## [1,] 0.00000000 0.00000000 0.00000000
## [2,] 0.03146209 0.04506909 0.04414351
## [3,] 0.01695410 0.02075147 0.02021155
## [4,] 0.01118418 0.01201129 0.01201129
## [5,] 0.06322600 0.09715810 0.09067907
## [6,] 0.00000000 0.00000000 0.00000000
Calculating renewal
renewal <- rowSums(Rprime * Births) / rowSums(Births)
This can be only calculated for the grids where firms were present in 2012 and some firm births occurred after, otherwise we get an NA.
Calculating reorientation
reorientation <- rowSums((Rprime < 0.02) * Births) / rowSums(Births)
Next we normalize the RR and TE measures to be able to compare them
renewal_z <- as.numeric(scale(renewal)) # z-score RR
reorientation_z <- as.numeric(scale(reorientation)) # z-score TE
summary(renewal_z)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -1.5357 -0.6467 -0.3723 0.0000 0.2371 8.3480 1517
summary(reorientation_z)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -1.26523 -1.08014 -0.06213 0.00000 1.14097 1.14097 1517
# Adding the renewal and reorientation measures to each grid and dropping the unnecessary variables
grids_outcomes_3 <- grids_outcomes_2 %>%
mutate(renewal = renewal_z,
renewal = if_else(new_firms_total > 0, renewal, NA),
reorientation = reorientation_z,
reorientation = if_else(new_firms_total > 0, reorientation, NA)) %>%
dplyr::select(colnames(grids_outcomes_2)[c(1:3,
25:26,
30:31,
54:74)],
renewal, reorientation) %>%
glimpse()
## Rows: 9,320
## Columns: 30
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
## $ population <int> 0, 118, 251, 77, 3, 0, 9, 104, 202, 18, 10, 4, 43, …
## $ firms_total <dbl> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_non_tech <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_250_999 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_1000 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ H <dbl> NA, 1.1537419, 0.5494580, 0.0000000, 0.0000000, NA,…
## $ KDI <dbl> NA, 0.9850775, 1.2032836, 1.4810614, 1.8807148, NA,…
## $ HHI <dbl> NA, 0.00000000105125349, 0.00000001604261866, 0.000…
## $ RV <dbl> NA, 0.018134908, 0.005218286, 0.000000000, 0.000000…
## $ HHI_n <dbl> NA, 0.0000000124085269, 0.0000001919530095, 0.00000…
## $ HHI_z <dbl> NA, -0.03474617, -0.03473713, -0.03474622, -0.03474…
## $ H_n <dbl> NA, 0.4463194, 0.2125551, 0.0000000, 0.0000000, NA,…
## $ H_z <dbl> NA, 0.37914484, -0.53831394, -1.37253294, -1.372532…
## $ KDI_n <dbl> NA, 0.4456040, 0.5662057, 0.7197325, 0.9406194, NA,…
## $ KDI_z <dbl> NA, -0.3082596, 0.3814051, 1.2593526, 2.5225016, NA…
## $ RV_n <dbl> NA, 0.09936488, 0.02859206, 0.00000000, 0.00000000,…
## $ RV_z <dbl> NA, 0.04314414, -0.52595342, -0.75586755, -0.755867…
## $ exits_non_tech <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ exits_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_non_tech <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, 0, …
## $ survivors_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_total <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, 0, …
## $ exits_total <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ new_firms_non_tech <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0, 0, 0, 3,…
## $ new_firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ new_firms_total <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0, 0, 0, 3,…
## $ renewal <dbl> NA, -0.003761441, -0.605257647, NA, NA, NA, NA, -0.…
## $ reorientation <dbl> NA, -0.7839933, 1.1409718, NA, NA, NA, NA, 1.140971…
# Selecting grids with firms inside them
full_grids <- grids_outcomes_3 %>% filter(firms_total > 0)
dim(full_grids)[1] # 8608 grids for modelling
## [1] 8608
# Adding measure for firms per resident
full_grids_1 <- full_grids %>%
mutate(firms_per_person = if_else(population > 0 & firms_total > 0,
firms_total/population,
0)) %>%
glimpse()
## Rows: 8,608
## Columns: 31
## $ ID <int> 2, 3, 4, 5, 7, 8, 9, 13, 17, 18, 19, 20, 21, 22, 23…
## $ population <int> 118, 251, 77, 3, 9, 104, 202, 43, 47, 46, 37, 173, …
## $ firms_total <dbl> 7, 36, 8, 1, 1, 18, 60, 1, 12, 11, 13, 45, 15, 20, …
## $ firms_non_tech <int> 7, 36, 8, 1, 1, 18, 60, 1, 12, 11, 13, 45, 15, 20, …
## $ firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_250_999 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_1000 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ H <dbl> 1.1537419, 0.5494580, 0.0000000, 0.0000000, 0.00000…
## $ KDI <dbl> 0.9850775, 1.2032836, 1.4810614, 1.8807148, 1.48106…
## $ HHI <dbl> 0.00000000105125349, 0.00000001604261866, 0.0000000…
## $ RV <dbl> 0.018134908, 0.005218286, 0.000000000, 0.000000000,…
## $ HHI_n <dbl> 0.0000000124085269, 0.0000001919530095, 0.000000011…
## $ HHI_z <dbl> -0.03474617, -0.03473713, -0.03474622, -0.03474663,…
## $ H_n <dbl> 0.44631939, 0.21255514, 0.00000000, 0.00000000, 0.0…
## $ H_z <dbl> 0.37914484, -0.53831394, -1.37253294, -1.37253294, …
## $ KDI_n <dbl> 0.4456040, 0.5662057, 0.7197325, 0.9406194, 0.71973…
## $ KDI_z <dbl> -0.3082596, 0.3814051, 1.2593526, 2.5225016, 1.2593…
## $ RV_n <dbl> 0.09936488, 0.02859206, 0.00000000, 0.00000000, 0.0…
## $ RV_z <dbl> 0.04314414, -0.52595342, -0.75586755, -0.75586755, …
## $ exits_non_tech <int> 2, 3, 1, 0, 0, 2, 1, 0, 1, 0, 1, 3, 2, 2, 1, 0, 0, …
## $ exits_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_non_tech <int> 5, 33, 7, 1, 1, 16, 59, 1, 11, 11, 12, 42, 13, 18, …
## $ survivors_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_total <int> 5, 33, 7, 1, 1, 16, 59, 1, 11, 11, 12, 42, 13, 18, …
## $ exits_total <int> 2, 3, 1, 0, 0, 2, 1, 0, 1, 0, 1, 3, 2, 2, 1, 0, 0, …
## $ new_firms_non_tech <int> 5, 1, 0, 0, 0, 1, 10, 0, 3, 4, 0, 2, 1, 3, 1, 0, 0,…
## $ new_firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ new_firms_total <int> 5, 1, 0, 0, 0, 1, 10, 0, 3, 4, 0, 2, 1, 3, 1, 0, 0,…
## $ renewal <dbl> -0.003761441, -0.605257647, NA, NA, NA, -0.82482310…
## $ reorientation <dbl> -0.7839933, 1.1409718, NA, NA, NA, 1.1409718, 1.140…
## $ firms_per_person <dbl> 0.05932203, 0.14342629, 0.10389610, 0.33333333, 0.1…
We model survival at the grid level. For each grid \(g\):
The outcome is modeled as a grouped binomial: \[ y_g \sim \mathrm{Binomial}(N_g,\, p_g), \]
survival_binomial <- glm(cbind(survivors_total, exits_total) ~
firms_per_person + # number of firms per person
RV +
H +
HHI +
KDI,
family = binomial,
data = full_grids_1)
summary(survival_binomial)
##
## Call:
## glm(formula = cbind(survivors_total, exits_total) ~ firms_per_person +
## RV + H + HHI + KDI, family = binomial, data = full_grids_1)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.30858 0.06813 63.24 < 0.0000000000000002 ***
## firms_per_person -0.52003 0.02544 -20.44 < 0.0000000000000002 ***
## RV 1.75969 0.25689 6.85 0.00000000000739 ***
## H -1.39084 0.02935 -47.39 < 0.0000000000000002 ***
## HHI 2.34850 0.19951 11.77 < 0.0000000000000002 ***
## KDI -0.90688 0.04154 -21.83 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 79829 on 8607 degrees of freedom
## Residual deviance: 17689 on 8602 degrees of freedom
## AIC: 42611
##
## Number of Fisher Scoring iterations: 4
# Standardized Pearson residuals per cell
z_surv <- rstandard(survival_binomial, type = "pearson")
summary(z_surv)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -30.85576 -0.68786 0.17866 -0.01226 0.80032 11.34754
RESISTANCE measure: Z_surv is a standardized Pearson residual - it tells us how much the cell over/under-performed survival relative to expected in in σ units:
full_grids_1$z_surv <- rstandard(survival_binomial, type = "pearson")
We use the negative binomial model.
library(MASS)
births_nb <- glm.nb(new_firms_total ~
firms_per_person +
RV +
H +
HHI +
KDI,
data = full_grids_1)
summary(births_nb)
##
## Call:
## glm.nb(formula = new_firms_total ~ firms_per_person + RV + H +
## HHI + KDI, data = full_grids_1, init.theta = 1.743913628,
## link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.56281 0.17286 20.611 <0.0000000000000002 ***
## firms_per_person 2.45819 0.07422 33.118 <0.0000000000000002 ***
## RV 36.89460 0.95440 38.658 <0.0000000000000002 ***
## H -0.15064 0.07295 -2.065 0.0389 *
## HHI 51.31378 5.00582 10.251 <0.0000000000000002 ***
## KDI -2.26921 0.11044 -20.547 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.7439) family taken to be 1)
##
## Null deviance: 58960.9 on 8607 degrees of freedom
## Residual deviance: 9364.2 on 8602 degrees of freedom
## AIC: 54528
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.7439
## Std. Err.: 0.0327
##
## 2 x log-likelihood: -54513.5510
# Standardized Pearson residuals per cell
z_births <- rstandard(births_nb, type = "pearson")
summary(z_births)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.466076 -0.722199 -0.232492 0.001064 0.428026 40.002080
RECOVERY measure: Z_births is a standardized Pearson residual - it tells us how much the cell over/under-performed in new births relative to expected in in σ units:
# full_grids_1$fit_births_nb <- fitted(births_nb)
full_grids_1$z_births_nb <- rstandard(births_nb, type = "pearson")
We add the resistance and renewal measures to the full dataset (all grids from Mazowieckie are present).
grids_outcomes_4 <- grids_outcomes_3 %>%
left_join(full_grids_1 %>%
dplyr::select(ID, z_surv, z_births_nb),
by = "ID") %>%
rename(resistance = z_surv,
recovery = z_births_nb) %>%
glimpse()
## Rows: 9,320
## Columns: 32
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
## $ population <int> 0, 118, 251, 77, 3, 0, 9, 104, 202, 18, 10, 4, 43, …
## $ firms_total <dbl> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_non_tech <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_250_999 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_1000 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ H <dbl> NA, 1.1537419, 0.5494580, 0.0000000, 0.0000000, NA,…
## $ KDI <dbl> NA, 0.9850775, 1.2032836, 1.4810614, 1.8807148, NA,…
## $ HHI <dbl> NA, 0.00000000105125349, 0.00000001604261866, 0.000…
## $ RV <dbl> NA, 0.018134908, 0.005218286, 0.000000000, 0.000000…
## $ HHI_n <dbl> NA, 0.0000000124085269, 0.0000001919530095, 0.00000…
## $ HHI_z <dbl> NA, -0.03474617, -0.03473713, -0.03474622, -0.03474…
## $ H_n <dbl> NA, 0.4463194, 0.2125551, 0.0000000, 0.0000000, NA,…
## $ H_z <dbl> NA, 0.37914484, -0.53831394, -1.37253294, -1.372532…
## $ KDI_n <dbl> NA, 0.4456040, 0.5662057, 0.7197325, 0.9406194, NA,…
## $ KDI_z <dbl> NA, -0.3082596, 0.3814051, 1.2593526, 2.5225016, NA…
## $ RV_n <dbl> NA, 0.09936488, 0.02859206, 0.00000000, 0.00000000,…
## $ RV_z <dbl> NA, 0.04314414, -0.52595342, -0.75586755, -0.755867…
## $ exits_non_tech <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ exits_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_non_tech <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, 0, …
## $ survivors_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_total <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, 0, …
## $ exits_total <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ new_firms_non_tech <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0, 0, 0, 3,…
## $ new_firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ new_firms_total <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0, 0, 0, 3,…
## $ renewal <dbl> NA, -0.003761441, -0.605257647, NA, NA, NA, NA, -0.…
## $ reorientation <dbl> NA, -0.7839933, 1.1409718, NA, NA, NA, NA, 1.140971…
## $ resistance <dbl> NA, -1.10624910, 0.01685743, -0.93866384, 0.2967646…
## $ recovery <dbl> NA, -0.3573670, -0.7887458, -0.9106664, -0.8266104,…
In this step we calculate the Local Industrial Resilience Index (LIRI) which is composed of 4 components (each calculated at grid (g) level):
Resistance - standardized Pearson residual from the binomial model for survival (calculated in step 5.2)
Recovery - standardized Pearson residual from the Negative-binomial model for firm births (calculated in step 5.3)
Renewal - births-weighted average relatedness (z-scored) (calculated in step 4.2)
Reorientation - share of new firms in sectors considered unrelated to incumbentsm(z-scored) (calculated in step 4.2)
\[ LIRI(g) = \frac{\text{Resistance}(g) + \text{Recovery}(g) + 0.5 \cdot \text{Renewal}(g) + 0.5 \cdot \text{Reorientation}(g)}{3} \]
The higher the LIRI, the more resilient grid cell and more adaptive to changing conditions.
Let’s visualize the pairwise scatterplots for the LIRI components.
grids_LIRI_full <- grids_outcomes_4 %>% filter(!is.na(renewal) &
!is.na(reorientation) &
!is.na(resistance) &
!is.na(recovery)) %>% glimpse()
## Rows: 7,783
## Columns: 32
## $ ID <int> 2, 3, 8, 9, 17, 18, 20, 21, 22, 23, 27, 28, 29, 30,…
## $ population <int> 118, 251, 104, 202, 47, 46, 173, 88, 130, 47, 52, 2…
## $ firms_total <dbl> 7, 36, 18, 60, 12, 11, 45, 15, 20, 15, 12, 42, 40, …
## $ firms_non_tech <int> 7, 36, 18, 60, 12, 11, 45, 15, 20, 15, 12, 42, 40, …
## $ firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_250_999 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_1000 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ H <dbl> 1.1537419, 0.5494580, 0.5566469, 0.4262234, 0.00000…
## $ KDI <dbl> 0.9850775, 1.2032836, 1.2248516, 1.2810614, 1.48106…
## $ HHI <dbl> 0.0000000010512535, 0.0000000160426187, 0.000000004…
## $ RV <dbl> 0.018134908, 0.005218286, 0.005998433, 0.003789328,…
## $ HHI_n <dbl> 0.000000012408527, 0.000000191953010, 0.00000005347…
## $ HHI_z <dbl> -0.03474617, -0.03473713, -0.03474410, -0.03471851,…
## $ H_n <dbl> 0.44631939, 0.21255514, 0.21533611, 0.16488242, 0.0…
## $ H_z <dbl> 0.37914484, -0.53831394, -0.52739940, -0.72541591, …
## $ KDI_n <dbl> 0.4456040, 0.5662057, 0.5781262, 0.6091932, 0.71973…
## $ KDI_z <dbl> -0.30825961, 0.38140509, 0.44957293, 0.62723039, 1.…
## $ RV_n <dbl> 0.09936488, 0.02859206, 0.03286664, 0.02076250, 0.0…
## $ RV_z <dbl> 0.04314414, -0.52595342, -0.49158069, -0.58891236, …
## $ exits_non_tech <int> 2, 3, 2, 1, 1, 0, 3, 2, 2, 1, 3, 4, 1, 2, 3, 7, 6, …
## $ exits_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_non_tech <int> 5, 33, 16, 59, 11, 11, 42, 13, 18, 14, 9, 38, 39, 1…
## $ survivors_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_total <int> 5, 33, 16, 59, 11, 11, 42, 13, 18, 14, 9, 38, 39, 1…
## $ exits_total <int> 2, 3, 2, 1, 1, 0, 3, 2, 2, 1, 3, 4, 1, 2, 3, 7, 6, …
## $ new_firms_non_tech <int> 5, 1, 1, 10, 3, 4, 2, 1, 3, 1, 1, 7, 6, 2, 2, 5, 5,…
## $ new_firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, …
## $ new_firms_total <int> 5, 1, 1, 10, 3, 4, 2, 1, 3, 1, 1, 7, 6, 2, 2, 6, 5,…
## $ renewal <dbl> -0.003761441, -0.605257647, -0.824823103, -0.700643…
## $ reorientation <dbl> -0.7839933, 1.1409718, 1.1409718, 1.1409718, 1.1409…
## $ resistance <dbl> -1.10624910, 0.01685743, -0.35396354, 1.85744612, -…
## $ recovery <dbl> -0.35736704, -0.78874575, -0.81033190, 1.46969002, …
pairs(grids_LIRI_full %>% dplyr::select(renewal, reorientation,
resistance, recovery))
summary(grids_LIRI_full %>% dplyr::select(renewal,
reorientation,
resistance,
recovery))
## renewal reorientation resistance recovery
## Min. :-0.897455 Min. :-1.265235 Min. :-30.85576 Min. :-1.4661
## 1st Qu.:-0.645123 1st Qu.:-1.085409 1st Qu.: -0.72425 1st Qu.:-0.5792
## Median :-0.371265 Median :-0.062131 Median : 0.10641 Median :-0.1299
## Mean : 0.003946 Mean :-0.002932 Mean : -0.02191 Mean : 0.1073
## 3rd Qu.: 0.240594 3rd Qu.: 1.140972 3rd Qu.: 0.83340 3rd Qu.: 0.5220
## Max. : 8.347952 Max. : 1.140972 Max. : 11.34754 Max. :40.0021
grids_outcomes_5 <- grids_outcomes_4 %>%
mutate(LIRI = if_else(!is.na(renewal) &
!is.na(reorientation) &
!is.na(resistance) &
!is.na(recovery),
(resistance + recovery + 0.5*renewal + 0.5*reorientation)/3,
NA)) %>%
glimpse()
## Rows: 9,320
## Columns: 33
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
## $ population <int> 0, 118, 251, 77, 3, 0, 9, 104, 202, 18, 10, 4, 43, …
## $ firms_total <dbl> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_non_tech <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_250_999 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_1000 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ H <dbl> NA, 1.1537419, 0.5494580, 0.0000000, 0.0000000, NA,…
## $ KDI <dbl> NA, 0.9850775, 1.2032836, 1.4810614, 1.8807148, NA,…
## $ HHI <dbl> NA, 0.00000000105125349, 0.00000001604261866, 0.000…
## $ RV <dbl> NA, 0.018134908, 0.005218286, 0.000000000, 0.000000…
## $ HHI_n <dbl> NA, 0.0000000124085269, 0.0000001919530095, 0.00000…
## $ HHI_z <dbl> NA, -0.03474617, -0.03473713, -0.03474622, -0.03474…
## $ H_n <dbl> NA, 0.4463194, 0.2125551, 0.0000000, 0.0000000, NA,…
## $ H_z <dbl> NA, 0.37914484, -0.53831394, -1.37253294, -1.372532…
## $ KDI_n <dbl> NA, 0.4456040, 0.5662057, 0.7197325, 0.9406194, NA,…
## $ KDI_z <dbl> NA, -0.3082596, 0.3814051, 1.2593526, 2.5225016, NA…
## $ RV_n <dbl> NA, 0.09936488, 0.02859206, 0.00000000, 0.00000000,…
## $ RV_z <dbl> NA, 0.04314414, -0.52595342, -0.75586755, -0.755867…
## $ exits_non_tech <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ exits_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_non_tech <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, 0, …
## $ survivors_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_total <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, 0, …
## $ exits_total <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ new_firms_non_tech <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0, 0, 0, 3,…
## $ new_firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ new_firms_total <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0, 0, 0, 3,…
## $ renewal <dbl> NA, -0.003761441, -0.605257647, NA, NA, NA, NA, -0.…
## $ reorientation <dbl> NA, -0.7839933, 1.1409718, NA, NA, NA, NA, 1.140971…
## $ resistance <dbl> NA, -1.10624910, 0.01685743, -0.93866384, 0.2967646…
## $ recovery <dbl> NA, -0.3573670, -0.7887458, -0.9106664, -0.8266104,…
## $ LIRI <dbl> NA, -0.61916450, -0.16801042, NA, NA, NA, NA, -0.33…
# Checking for how many grids we were able to calculate the LIRI
grids_outcomes_5 %>% filter(!is.na(LIRI)) %>% nrow()
## [1] 7783
(grids_outcomes_5 %>% filter(!is.na(LIRI)) %>% nrow())/grids_outcomes_5 %>% nrow()
## [1] 0.8350858
We can calculate the LIRI index for grids in which (1) there were firms present in 2012 and (2) some firm births occurred after 2012. We were able to calculate this index for 7 783 out of 9 320 grids (~ 84% of grids) - only those grids met the needed conditions.
summary(grids_outcomes_5$LIRI)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -7.24589 -0.31829 0.03781 0.02863 0.33424 7.14900 1537
We check impact of the relative location measure by set of dummy
variables that indicated if grid is within the radius of 10, 25 of 50 km
from the city of five sizes: - midsize city (dist_core_10,
dist_core_25, dist_core_50) - midsize city
(dist_midsize_10, dist_midsize_25,
dist_midsize_50) - regional city
(dist_regional_10, dist_regional_25,
dist_regional_50) - local big city
(dist_localbig_10, dist_localbig_25,
dist_localbig_50) - local small city
(dist_localsmall_10, dist_localsmall_25,
dist_localsmall_50)
First we create a table with the coordinates for the cities and their classification.
cities <- tribble(
~id, ~city, ~pop, ~lon, ~lat,
1, "Warszawa", 1729119, 21.06119, 52.23294,
2, "Radom", 217834, 21.01389, 51.41520,
3, "Płock", 122572, 19.64501, 52.53549,
4, "Siedlce", 76585, 22.14144, 52.16167,
5, "Pruszków", 59796, 20.66378, 52.17202,
6, "Legionowo", 54246, 20.90458, 52.40488,
7, "Ostrołęka", 52792, 21.51907, 53.07745,
8, "Piaseczno", 45270, 20.88196, 52.07337,
9, "Otwock", 45073, 21.21945, 52.11633,
10, "Ciechanów", 44673, 20.54715, 52.87107,
11, "Żyrardów", 41056, 20.40536, 52.05526,
12, "Mińsk Mazowiecki", 40028, 21.52169, 52.17917,
13, "Wołomin", 37418, 21.20118, 52.34264,
14, "Sochaczew", 37333, 20.10920, 52.23635,
15, "Ząbki", 32376, 21.07776, 52.29252,
16, "Mława", 30893, 20.31333, 53.13291,
17, "Grodzisk Mazowiecki", 29988, 20.59806, 52.10513,
18, "Marki", 29395, 21.04995, 52.33716,
19, "Nowy Dwór Mazowiecki", 28361, 20.42122, 52.43413,
20, "Wyszków", 27205, 21.41228, 52.59230,
21, "Piastów", 22862, 20.77894, 52.18553,
22, "Ostrów Mazowiecka", 22770, 21.85631, 52.80713,
23, "Płońsk", 22435, 20.32957, 52.62666,
24, "Kobyłka", 21132, 21.16377, 52.34008,
25, "Józefów", 20013, 21.08495, 52.12857,
26, "Sulejówek", 19385, 21.24671, 52.24465,
27, "Pionki", 19286, 21.41105, 51.47252,
28, "Pułtusk", 19229, 21.04836, 52.70321,
29, "Gostynin", 19026, 19.33104, 52.42421,
30, "Sokołów Podlaski", 18730, 22.19902, 52.41285,
31, "Sierpc", 18468, 19.63032, 52.85337,
32, "Kozienice", 18150, 21.50136, 51.58732,
33, "Zielonka", 17434, 21.07706, 52.29300,
34, "Konstancin_Jeziorna", 17371, 21.07058, 52.08389,
35, "Przasnysz", 17337, 20.84989, 53.01855,
36, "Garwolin", 17160, 21.58779, 51.89536,
37, "Łomianki", 16632, 20.85897, 52.33345,
38, "Grójec", 16430, 20.83774, 51.86819,
39, "Milanówek", 16427, 20.63528, 52.12485
) %>%
mutate(
group = case_when(
pop >= 1e6 ~ "core", # 1 city: Warszawa
pop >= 1e5 ~ "midsize", # 2 cities: Radom, Płock
pop >= 5e4 ~ "regional", # 4 cities
pop >= 3e4 ~ "localbig", # 9 cities
pop >= 1.5e4 ~ "localsmall", # 23 cities
TRUE ~ NA_character_
)
) %>%
glimpse()
## Rows: 39
## Columns: 6
## $ id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 1…
## $ city <chr> "Warszawa", "Radom", "Płock", "Siedlce", "Pruszków", "Legionowo"…
## $ pop <dbl> 1729119, 217834, 122572, 76585, 59796, 54246, 52792, 45270, 4507…
## $ lon <dbl> 21.06119, 21.01389, 19.64501, 22.14144, 20.66378, 20.90458, 21.5…
## $ lat <dbl> 52.23294, 51.41520, 52.53549, 52.16167, 52.17202, 52.40488, 53.0…
## $ group <chr> "core", "midsize", "midsize", "regional", "regional", "regional"…
cities.sf <- st_as_sf(cities, coords = c("lon","lat"), crs = 4326)
# st_crs(grids_outcomes_5_sf) # 2180
# Adding geometry
grids_outcomes_5_sf <- grids_outcomes_5 %>%
st_set_geometry(st_geometry(grids_maz))
# Centroids as grid geometry
grids_outcomes_5_centr <- grids_outcomes_5_sf %>%
st_centroid() %>%
glimpse()
## Rows: 9,320
## Columns: 34
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
## $ population <int> 0, 118, 251, 77, 3, 0, 9, 104, 202, 18, 10, 4, 43, …
## $ firms_total <dbl> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_non_tech <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, …
## $ firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_250_999 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_1000 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ H <dbl> NA, 1.1537419, 0.5494580, 0.0000000, 0.0000000, NA,…
## $ KDI <dbl> NA, 0.9850775, 1.2032836, 1.4810614, 1.8807148, NA,…
## $ HHI <dbl> NA, 0.00000000105125349, 0.00000001604261866, 0.000…
## $ RV <dbl> NA, 0.018134908, 0.005218286, 0.000000000, 0.000000…
## $ HHI_n <dbl> NA, 0.0000000124085269, 0.0000001919530095, 0.00000…
## $ HHI_z <dbl> NA, -0.03474617, -0.03473713, -0.03474622, -0.03474…
## $ H_n <dbl> NA, 0.4463194, 0.2125551, 0.0000000, 0.0000000, NA,…
## $ H_z <dbl> NA, 0.37914484, -0.53831394, -1.37253294, -1.372532…
## $ KDI_n <dbl> NA, 0.4456040, 0.5662057, 0.7197325, 0.9406194, NA,…
## $ KDI_z <dbl> NA, -0.3082596, 0.3814051, 1.2593526, 2.5225016, NA…
## $ RV_n <dbl> NA, 0.09936488, 0.02859206, 0.00000000, 0.00000000,…
## $ RV_z <dbl> NA, 0.04314414, -0.52595342, -0.75586755, -0.755867…
## $ exits_non_tech <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ exits_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_non_tech <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, 0, …
## $ survivors_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_total <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, 0, …
## $ exits_total <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ new_firms_non_tech <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0, 0, 0, 3,…
## $ new_firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ new_firms_total <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0, 0, 0, 3,…
## $ renewal <dbl> NA, -0.003761441, -0.605257647, NA, NA, NA, NA, -0.…
## $ reorientation <dbl> NA, -0.7839933, 1.1409718, NA, NA, NA, NA, 1.140971…
## $ resistance <dbl> NA, -1.10624910, 0.01685743, -0.93866384, 0.2967646…
## $ recovery <dbl> NA, -0.3573670, -0.7887458, -0.9106664, -0.8266104,…
## $ LIRI <dbl> NA, -0.61916450, -0.16801042, NA, NA, NA, NA, -0.33…
## $ geometry <POINT [m]> POINT (668613.9 353474.6), POINT (670613.9 35…
# Changing projection for the cities
cities.m <- st_transform(cities.sf, 2180)
st_crs(grids_outcomes_5_centr) == st_crs(cities.m) # TRUE
## [1] TRUE
# Calculating distances
D <- st_distance(grids_outcomes_5_centr, cities.m) # distances in metres
D_km <- set_units(D, "km") |> drop_units() # distances in km + matrix
# Adding names of columns (one column for each city = new 39 columns)
city_cols <- paste0(
"dist_", str_replace_all(tolower(cities$city), "\\s+", "_"), "_km")
colnames(D_km) <- city_cols
grids_with_city_dists <- bind_cols(grids_outcomes_5_centr, as_tibble(D_km))
glimpse(grids_with_city_dists)
## Rows: 9,320
## Columns: 73
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13…
## $ population <int> 0, 118, 251, 77, 3, 0, 9, 104, 202, 18, 1…
## $ firms_total <dbl> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1,…
## $ firms_non_tech <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1,…
## $ firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ firms_size_250_999 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ firms_size_1000 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ H <dbl> NA, 1.1537419, 0.5494580, 0.0000000, 0.00…
## $ KDI <dbl> NA, 0.9850775, 1.2032836, 1.4810614, 1.88…
## $ HHI <dbl> NA, 0.00000000105125349, 0.00000001604261…
## $ RV <dbl> NA, 0.018134908, 0.005218286, 0.000000000…
## $ HHI_n <dbl> NA, 0.0000000124085269, 0.000000191953009…
## $ HHI_z <dbl> NA, -0.03474617, -0.03473713, -0.03474622…
## $ H_n <dbl> NA, 0.4463194, 0.2125551, 0.0000000, 0.00…
## $ H_z <dbl> NA, 0.37914484, -0.53831394, -1.37253294,…
## $ KDI_n <dbl> NA, 0.4456040, 0.5662057, 0.7197325, 0.94…
## $ KDI_z <dbl> NA, -0.3082596, 0.3814051, 1.2593526, 2.5…
## $ RV_n <dbl> NA, 0.09936488, 0.02859206, 0.00000000, 0…
## $ RV_z <dbl> NA, 0.04314414, -0.52595342, -0.75586755,…
## $ exits_non_tech <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0,…
## $ exits_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ survivors_non_tech <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1,…
## $ survivors_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ survivors_total <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1,…
## $ exits_total <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0,…
## $ new_firms_non_tech <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0…
## $ new_firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ new_firms_total <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0…
## $ renewal <dbl> NA, -0.003761441, -0.605257647, NA, NA, N…
## $ reorientation <dbl> NA, -0.7839933, 1.1409718, NA, NA, NA, NA…
## $ resistance <dbl> NA, -1.10624910, 0.01685743, -0.93866384,…
## $ recovery <dbl> NA, -0.3573670, -0.7887458, -0.9106664, -…
## $ LIRI <dbl> NA, -0.61916450, -0.16801042, NA, NA, NA,…
## $ dist_warszawa_km <dbl> 136.6155, 137.0379, 137.4881, 137.9658, 1…
## $ dist_radom_km <dbl> 51.43201, 52.57096, 53.76020, 54.99648, 4…
## $ dist_płock_km <dbl> 207.3859, 208.5963, 209.8188, 211.0532, 2…
## $ dist_siedlce_km <dbl> 136.5111, 135.8476, 135.2104, 134.5999, 1…
## $ dist_pruszków_km <dbl> 137.6734, 138.4827, 139.3160, 140.1729, 1…
## $ dist_legionowo_km <dbl> 157.4892, 157.9975, 158.5294, 159.0847, 1…
## $ dist_ostrołęka_km <dbl> 228.6120, 228.6202, 228.6458, 228.6888, 2…
## $ dist_piaseczno_km <dbl> 122.2778, 122.9412, 123.6334, 124.3540, 1…
## $ dist_otwock_km <dbl> 122.2380, 122.5272, 122.8483, 123.2010, 1…
## $ dist_ciechanów_km <dbl> 213.7816, 214.3936, 215.0225, 215.6680, 2…
## $ dist_żyrardów_km <dbl> 134.0671, 135.1559, 136.2654, 137.3950, 1…
## $ dist_mińsk_mazowiecki_km <dbl> 128.8081, 128.7655, 128.7539, 128.7734, 1…
## $ dist_wołomin_km <dbl> 147.4063, 147.6735, 147.9673, 148.2874, 1…
## $ dist_sochaczew_km <dbl> 161.9775, 163.1328, 164.3042, 165.4915, 1…
## $ dist_ząbki_km <dbl> 142.9575, 143.3481, 143.7654, 144.2093, 1…
## $ dist_mława_km <dbl> 246.2451, 246.9083, 247.5860, 248.2779, 2…
## $ dist_grodzisk_mazowiecki_km <dbl> 132.6688, 133.5732, 134.5013, 135.4526, 1…
## $ dist_marki_km <dbl> 148.1662, 148.5705, 149.0006, 149.4561, 1…
## $ dist_nowy_dwór_mazowiecki_km <dbl> 170.9681, 171.8203, 172.6913, 173.5811, 1…
## $ dist_wyszków_km <dbl> 174.5070, 174.5788, 174.6735, 174.7910, 1…
## $ dist_piastów_km <dbl> 136.3263, 137.0292, 137.7576, 138.5110, 1…
## $ dist_ostrów_mazowiecka_km <dbl> 200.8290, 200.6013, 200.3934, 200.2052, 1…
## $ dist_płońsk_km <dbl> 193.0888, 193.9120, 194.7523, 195.6094, 1…
## $ dist_kobyłka_km <dbl> 147.3923, 147.6940, 148.0220, 148.3763, 1…
## $ dist_józefów_km <dbl> 124.9036, 125.3343, 125.7953, 126.2862, 1…
## $ dist_sulejówek_km <dbl> 136.2738, 136.5125, 136.7799, 137.0761, 1…
## $ dist_pionki_km <dbl> 49.95792, 50.04659, 50.21483, 50.46184, 4…
## $ dist_pułtusk_km <dbl> 188.4414, 188.7729, 189.1249, 189.4974, 1…
## $ dist_gostynin_km <dbl> 211.5990, 212.9849, 214.3805, 215.7856, 2…
## $ dist_sokołów_podlaski_km <dbl> 164.0078, 163.4229, 162.8604, 162.3206, 1…
## $ dist_sierpc_km <dbl> 237.2514, 238.3211, 239.4027, 240.4961, 2…
## $ dist_kozienice_km <dbl> 63.08335, 62.96861, 62.91727, 62.92948, 6…
## $ dist_zielonka_km <dbl> 143.0178, 143.4089, 143.8268, 144.2712, 1…
## $ dist_konstancin_jeziorna_km <dbl> 120.2063, 120.6677, 121.1603, 121.6839, 1…
## $ dist_przasnysz_km <dbl> 225.1506, 225.5550, 225.9763, 226.4146, 2…
## $ dist_garwolin_km <dbl> 97.81512, 97.64351, 97.51264, 97.42265, 9…
## $ dist_łomianki_km <dbl> 150.5134, 151.0836, 151.6780, 152.2963, 1…
## $ dist_grójec_km <dbl> 101.89043, 102.73344, 103.60820, 104.5139…
## $ dist_milanówek_km <dbl> 133.6060, 134.4669, 135.3520, 136.2606, 1…
## $ geometry <POINT [m]> POINT (668613.9 353474.6), POINT (6…
# For each grid we have the distance to each city listed
# Calculating minimal distance to each city type
groups <- c("core","midsize","regional","localbig","localsmall")
min_by_group <- map_dfc(groups, function(g) {
idx <- which(cities.m$group == g)
mins <- if (length(idx) == 1) {
as.numeric(D_km[, idx])
} else {
apply(D_km[, idx, drop = FALSE], 1, min, na.rm = TRUE)
}
tibble(!!paste0("mindist_", g, "_km") := mins)
})
grids_with_mins <- bind_cols(grids_outcomes_5_centr, min_by_group)
glimpse(grids_with_mins)
## Rows: 9,320
## Columns: 39
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
## $ population <int> 0, 118, 251, 77, 3, 0, 9, 104, 202, 18, 10, 4, 4…
## $ firms_total <dbl> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, …
## $ firms_non_tech <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, …
## $ firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_250_999 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_1000 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ H <dbl> NA, 1.1537419, 0.5494580, 0.0000000, 0.0000000, …
## $ KDI <dbl> NA, 0.9850775, 1.2032836, 1.4810614, 1.8807148, …
## $ HHI <dbl> NA, 0.00000000105125349, 0.00000001604261866, 0.…
## $ RV <dbl> NA, 0.018134908, 0.005218286, 0.000000000, 0.000…
## $ HHI_n <dbl> NA, 0.0000000124085269, 0.0000001919530095, 0.00…
## $ HHI_z <dbl> NA, -0.03474617, -0.03473713, -0.03474622, -0.03…
## $ H_n <dbl> NA, 0.4463194, 0.2125551, 0.0000000, 0.0000000, …
## $ H_z <dbl> NA, 0.37914484, -0.53831394, -1.37253294, -1.372…
## $ KDI_n <dbl> NA, 0.4456040, 0.5662057, 0.7197325, 0.9406194, …
## $ KDI_z <dbl> NA, -0.3082596, 0.3814051, 1.2593526, 2.5225016,…
## $ RV_n <dbl> NA, 0.09936488, 0.02859206, 0.00000000, 0.000000…
## $ RV_z <dbl> NA, 0.04314414, -0.52595342, -0.75586755, -0.755…
## $ exits_non_tech <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ exits_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_non_tech <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, …
## $ survivors_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_total <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, …
## $ exits_total <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ new_firms_non_tech <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0, 0, 0,…
## $ new_firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ new_firms_total <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0, 0, 0,…
## $ renewal <dbl> NA, -0.003761441, -0.605257647, NA, NA, NA, NA, …
## $ reorientation <dbl> NA, -0.7839933, 1.1409718, NA, NA, NA, NA, 1.140…
## $ resistance <dbl> NA, -1.10624910, 0.01685743, -0.93866384, 0.2967…
## $ recovery <dbl> NA, -0.3573670, -0.7887458, -0.9106664, -0.82661…
## $ LIRI <dbl> NA, -0.61916450, -0.16801042, NA, NA, NA, NA, -0…
## $ mindist_core_km <dbl> 136.6155, 137.0379, 137.4881, 137.9658, 134.2582…
## $ mindist_midsize_km <dbl> 51.43201, 52.57096, 53.76020, 54.99648, 48.66053…
## $ mindist_regional_km <dbl> 136.5111, 135.8476, 135.2104, 134.5999, 135.0460…
## $ mindist_localbig_km <dbl> 122.2380, 122.5272, 122.8483, 123.2010, 119.7435…
## $ mindist_localsmall_km <dbl> 49.95792, 50.04659, 50.21483, 50.46184, 47.94949…
## $ geometry <POINT [m]> POINT (668613.9 353474.6), POINT (670613.9…
# Dummies for for location within given radius
radii <- c(10, 25, 50)
mk_dummy <- function(df, g, r) {
nm <- paste0("dist_", g, "_", r)
df[[nm]] <- as.integer(df[[paste0("mindist_", g, "_km")]] <= r)
df
}
grids_outcomes_6_sf <- reduce(
.x = expand_grid(g = groups, r = radii) %>% split(.$g),
.f = function(df, idx_tbl) {
# idx_tbl carries same df with repeated g and r; loop inside
reduce2(idx_tbl$g, idx_tbl$r, mk_dummy, .init = df)
},
.init = grids_with_mins
)
st_geometry(grids_outcomes_6_sf) = st_geometry(grids_outcomes_5_sf)
glimpse(grids_outcomes_6_sf)
## Rows: 9,320
## Columns: 54
## $ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
## $ population <int> 0, 118, 251, 77, 3, 0, 9, 104, 202, 18, 10, 4, 4…
## $ firms_total <dbl> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, …
## $ firms_non_tech <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, …
## $ firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_250_999 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ firms_size_1000 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ H <dbl> NA, 1.1537419, 0.5494580, 0.0000000, 0.0000000, …
## $ KDI <dbl> NA, 0.9850775, 1.2032836, 1.4810614, 1.8807148, …
## $ HHI <dbl> NA, 0.00000000105125349, 0.00000001604261866, 0.…
## $ RV <dbl> NA, 0.018134908, 0.005218286, 0.000000000, 0.000…
## $ HHI_n <dbl> NA, 0.0000000124085269, 0.0000001919530095, 0.00…
## $ HHI_z <dbl> NA, -0.03474617, -0.03473713, -0.03474622, -0.03…
## $ H_n <dbl> NA, 0.4463194, 0.2125551, 0.0000000, 0.0000000, …
## $ H_z <dbl> NA, 0.37914484, -0.53831394, -1.37253294, -1.372…
## $ KDI_n <dbl> NA, 0.4456040, 0.5662057, 0.7197325, 0.9406194, …
## $ KDI_z <dbl> NA, -0.3082596, 0.3814051, 1.2593526, 2.5225016,…
## $ RV_n <dbl> NA, 0.09936488, 0.02859206, 0.00000000, 0.000000…
## $ RV_z <dbl> NA, 0.04314414, -0.52595342, -0.75586755, -0.755…
## $ exits_non_tech <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ exits_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_non_tech <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, …
## $ survivors_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ survivors_total <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, …
## $ exits_total <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ new_firms_non_tech <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0, 0, 0,…
## $ new_firms_tech <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ new_firms_total <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0, 0, 0,…
## $ renewal <dbl> NA, -0.003761441, -0.605257647, NA, NA, NA, NA, …
## $ reorientation <dbl> NA, -0.7839933, 1.1409718, NA, NA, NA, NA, 1.140…
## $ resistance <dbl> NA, -1.10624910, 0.01685743, -0.93866384, 0.2967…
## $ recovery <dbl> NA, -0.3573670, -0.7887458, -0.9106664, -0.82661…
## $ LIRI <dbl> NA, -0.61916450, -0.16801042, NA, NA, NA, NA, -0…
## $ mindist_core_km <dbl> 136.6155, 137.0379, 137.4881, 137.9658, 134.2582…
## $ mindist_midsize_km <dbl> 51.43201, 52.57096, 53.76020, 54.99648, 48.66053…
## $ mindist_regional_km <dbl> 136.5111, 135.8476, 135.2104, 134.5999, 135.0460…
## $ mindist_localbig_km <dbl> 122.2380, 122.5272, 122.8483, 123.2010, 119.7435…
## $ mindist_localsmall_km <dbl> 49.95792, 50.04659, 50.21483, 50.46184, 47.94949…
## $ geometry <POLYGON [m]> POLYGON ((667613.9 352474.6..., POLYGON …
## $ dist_core_10 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dist_core_25 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dist_core_50 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dist_localbig_10 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dist_localbig_25 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dist_localbig_50 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dist_localsmall_10 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dist_localsmall_25 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dist_localsmall_50 <int> 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, …
## $ dist_midsize_10 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dist_midsize_25 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dist_midsize_50 <int> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ dist_regional_10 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dist_regional_25 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dist_regional_50 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
We already have the numbers of tech firms per grid in 2012 and 2021 with division of exit/survivor for 2012 and survivor/new firm for 2021. We can additionally create variables connected to presence/share of technology firms and presence of big firms:
grids_outcomes_6 <- st_drop_geometry(grids_outcomes_6_sf)
grids_outcomes_7 <- grids_outcomes_6 %>%
mutate(share_tech_12 = if_else(firms_total > 0 & firms_tech > 0,
firms_tech/firms_total, 0),
share_tech_21 = if_else((new_firms_total + survivors_total) > 0 &
(new_firms_tech + survivors_tech) > 0,
(new_firms_tech + survivors_tech)/(new_firms_total + survivors_total), 0),
if_tech_12 = if_else(firms_tech > 0, 1, 0),
if_tech_21 = if_else((new_firms_tech + survivors_tech) > 0, 1, 0),
if_big_firms = if_else((firms_size_250_999 > 0 | firms_size_1000 > 0), 1, 0))
size_counts_per_grid_2021 <- firms_2021_to_grids %>%
count(ID, LPRAC, name = "n") %>% # number of firms in grids by sectors
complete(ID = grid_ids,
LPRAC = firm_sizes,
fill = list(n = 0L)) %>%
mutate(label = dplyr::case_when(
LPRAC == 1 ~ "firms_size_0_9_2021",
LPRAC == 2 ~ "firms_size_10_49_2021",
LPRAC == 3 ~ "firms_size_50_249_2021",
LPRAC == 4 ~ "firms_size_250_999_2021",
LPRAC == 5 ~ "firms_size_1000_2021")) %>%
dplyr::select(ID, label, n) %>%
pivot_wider(names_from = label,
values_from = n,
values_fill = 0)
# Preparing the final dataset
grids_outcomes_export <- grids_outcomes_7 %>%
cbind(size_counts_per_grid_2021[, 5:6]) %>%
rename(grid_ID = ID,
# firm counts
tot_12 = firms_total,
tech_12 = firms_tech,
nontech_12 = firms_non_tech,
survivors = survivors_total,
exits = exits_total,
births = new_firms_total,
shrtech_12 = share_tech_12,
shrtech_21 = share_tech_21,
if_big_12 = if_big_firms,
# distances to cities
km_core = mindist_core_km,
km_mid = mindist_midsize_km,
km_reg = mindist_regional_km,
km_loc_big = mindist_localbig_km,
km_loc_sml = mindist_localsmall_km,
# dummies for distances to cities
if_core_10 = dist_core_10,
if_core_25 = dist_core_25,
if_core_50 = dist_core_50,
if_mid_10 = dist_midsize_10,
if_mid_25 = dist_midsize_25,
if_mid_50 = dist_midsize_50,
if_reg_10 = dist_regional_10,
if_reg_25 = dist_regional_25,
if_reg_50 = dist_regional_50,
if_lbig_10 = dist_localbig_10,
if_lbig_25 = dist_localbig_25,
if_lbig_50 = dist_localbig_50,
if_lsml_10 = dist_localsmall_10,
if_lsml_25 = dist_localsmall_25,
if_lsml_50 = dist_localsmall_50,
reorient = reorientation) %>%
mutate(tot_21 = survivors + births,
tech_21 = survivors_tech + new_firms_tech,
nontech_21 = survivors_non_tech + new_firms_non_tech,
surv_rate = if_else(tot_12 > 0, survivors/tot_12, NA),
exit_rate = if_else(tot_12 > 0, exits/tot_12, NA),
birth_rate = if_else(tot_12 > 0, births/tot_12, NA),
if_big_21 = if_else((firms_size_250_999_2021 > 0 |
firms_size_1000_2021 > 0), 1, 0)) %>%
dplyr::select(grid_ID, population,
tot_12, tech_12, nontech_12,
tot_21, tech_21, nontech_21,
survivors, surv_rate,
exits, exit_rate,
births, birth_rate,
shrtech_12, shrtech_21,
if_tech_12, if_tech_21,
if_big_12, if_big_21,
km_core, km_mid, km_reg, km_loc_big, km_loc_sml,
if_core_10, if_core_25, if_core_50,
if_mid_10, if_mid_25, if_mid_50,
if_reg_10, if_reg_25, if_reg_50,
if_lbig_10, if_lbig_25, if_lbig_50,
if_lsml_10, if_lsml_25, if_lsml_50,
KDI, H, HHI, RV,
KDI_n, H_n, HHI_n, RV_n,
renewal, reorient,
resistance, recovery,
LIRI) %>%
glimpse()
## Rows: 9,320
## Columns: 53
## $ grid_ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, …
## $ population <int> 0, 118, 251, 77, 3, 0, 9, 104, 202, 18, 10, 4, 43, 5, 32, 2…
## $ tot_12 <dbl> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, 12, 11, …
## $ tech_12 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nontech_12 <int> 0, 7, 36, 8, 1, 0, 1, 18, 60, 0, 0, 0, 1, 0, 0, 0, 12, 11, …
## $ tot_21 <int> 0, 10, 34, 7, 1, 0, 1, 17, 69, 0, 0, 0, 1, 0, 0, 0, 14, 15,…
## $ tech_21 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nontech_21 <int> 0, 10, 34, 7, 1, 0, 1, 17, 69, 0, 0, 0, 1, 0, 0, 0, 14, 15,…
## $ survivors <int> 0, 5, 33, 7, 1, 0, 1, 16, 59, 0, 0, 0, 1, 0, 0, 0, 11, 11, …
## $ surv_rate <dbl> NA, 0.7142857, 0.9166667, 0.8750000, 1.0000000, NA, 1.00000…
## $ exits <int> 0, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 3,…
## $ exit_rate <dbl> NA, 0.28571429, 0.08333333, 0.12500000, 0.00000000, NA, 0.0…
## $ births <int> 0, 5, 1, 0, 0, 0, 0, 1, 10, 0, 0, 0, 0, 0, 0, 0, 3, 4, 0, 2…
## $ birth_rate <dbl> NA, 0.71428571, 0.02777778, 0.00000000, 0.00000000, NA, 0.0…
## $ shrtech_12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ shrtech_21 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ if_tech_12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ if_tech_21 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ if_big_12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ if_big_21 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ km_core <dbl> 136.6155, 137.0379, 137.4881, 137.9658, 134.2582, 134.6583,…
## $ km_mid <dbl> 51.43201, 52.57096, 53.76020, 54.99648, 48.66053, 49.78253,…
## $ km_reg <dbl> 136.5111, 135.8476, 135.2104, 134.5999, 135.0460, 134.6307,…
## $ km_loc_big <dbl> 122.2380, 122.5272, 122.8483, 123.2010, 119.7435, 120.2570,…
## $ km_loc_sml <dbl> 49.95792, 50.04659, 50.21483, 50.46184, 47.94949, 47.95853,…
## $ if_core_10 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ if_core_25 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ if_core_50 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ if_mid_10 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ if_mid_25 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ if_mid_50 <int> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,…
## $ if_reg_10 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ if_reg_25 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ if_reg_50 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ if_lbig_10 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ if_lbig_25 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ if_lbig_50 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ if_lsml_10 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ if_lsml_25 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ if_lsml_50 <int> 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1,…
## $ KDI <dbl> NA, 0.9850775, 1.2032836, 1.4810614, 1.8807148, NA, 1.48106…
## $ H <dbl> NA, 1.1537419, 0.5494580, 0.0000000, 0.0000000, NA, 0.00000…
## $ HHI <dbl> NA, 0.00000000105125349, 0.00000001604261866, 0.00000000097…
## $ RV <dbl> NA, 0.018134908, 0.005218286, 0.000000000, 0.000000000, NA,…
## $ KDI_n <dbl> NA, 0.4456040, 0.5662057, 0.7197325, 0.9406194, NA, 0.71973…
## $ H_n <dbl> NA, 0.4463194, 0.2125551, 0.0000000, 0.0000000, NA, 0.00000…
## $ HHI_n <dbl> NA, 0.0000000124085269, 0.0000001919530095, 0.0000000114558…
## $ RV_n <dbl> NA, 0.09936488, 0.02859206, 0.00000000, 0.00000000, NA, 0.0…
## $ renewal <dbl> NA, -0.003761441, -0.605257647, NA, NA, NA, NA, -0.82482310…
## $ reorient <dbl> NA, -0.7839933, 1.1409718, NA, NA, NA, NA, 1.1409718, 1.140…
## $ resistance <dbl> NA, -1.10624910, 0.01685743, -0.93866384, 0.29676465, NA, 0…
## $ recovery <dbl> NA, -0.3573670, -0.7887458, -0.9106664, -0.8266104, NA, -0.…
## $ LIRI <dbl> NA, -0.61916450, -0.16801042, NA, NA, NA, NA, -0.33540704, …
# EPSG: 2180
grids_outcomes_export_2180 <- grids_maz %>%
dplyr::select(geometry) %>%
cbind(grids_outcomes_export)
# EPSG: 4326
grids_outcomes_export_4326 <- st_transform(grids_outcomes_export_2180, 4326)
# saving the dataset to a new object
LIRI <- grids_outcomes_export_4326
# new variable creation
LIRI$HHI <- LIRI$HHI*1000
LIRI$firms.pc.21 <- LIRI$tot_21/LIRI$population
LIRI$popul.K <- LIRI$population/1000
LIRI$below0 <- 0
LIRI$below0[LIRI$LIRI<(-0.1)] <- 1
LIRI$above0 <- 0
LIRI$above0[LIRI$LIRI>0.1] <- 1
LIRI$label <- "around 0"
LIRI$label[LIRI$LIRI<(-0.1)] <- "below 0"
LIRI$label[LIRI$LIRI>0.1] <- "above 0"
LIRI$label[is.na(LIRI$LIRI)==TRUE] <- NA
# selection of columns for the analysis
sel_col <- c("LIRI", "KDI", "H", "HHI", "RV", "surv_rate", "birth_rate", "popul.K",
"firms.pc.21", "if_tech_21", "if_big_21", "below0", "above0")
# dataset without NAs & not in sf format
sub <- LIRI[, sel_col]
sub <- st_drop_geometry(sub)
sums <- apply(sub, 1, sum)
zeros <- which(is.na(sums)==TRUE)
sub.full <- sub[-zeros, ]
table(LIRI$label)
##
## above 0 around 0 below 0
## 3466 1269 3048
ggplot() +
geom_sf(data = LIRI, aes(fill = label)) + theme_minimal() +
labs(title = "LIRI index",
subtitle = "The higher the LIRI, the more resilient grid cell and more adaptive to changing conditions",
fill = "LIRI") +
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 10, face = "italic"))
cor(st_drop_geometry(LIRI[ ,c("renewal", "reorient", "resistance", "recovery")]), use = "complete")
## renewal reorient resistance recovery
## renewal 1.00000000 -0.72307572 -0.05217299 -0.03787684
## reorient -0.72307572 1.00000000 0.06136415 0.09330819
## resistance -0.05217299 0.06136415 1.00000000 -0.18591643
## recovery -0.03787684 0.09330819 -0.18591643 1.00000000
# a) renewal
ggplot() +
geom_sf(data = LIRI, aes(fill = renewal), color = NA) +
scale_fill_viridis_c(option = "B") + theme_minimal() +
labs(title = "Renewal", fill = "renewal") +
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))
# b) reorientation
ggplot() +
geom_sf(data = LIRI, aes(fill = reorient), color = NA) +
scale_fill_viridis_c(option="B") + theme_minimal() +
labs(title = "Reorientation", fill = "reorient") +
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))
# c) resistance
ggplot() +
geom_sf(data = LIRI, aes(fill = resistance), color = NA) +
scale_fill_viridis_c(option = "B") + theme_minimal() +
labs(title = "Resistance", fill = "resist") +
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))
# d) recovery
ggplot() +
geom_sf(data = LIRI, aes(fill = recovery), color = NA) +
scale_fill_viridis_c(option = "B") + theme_minimal() +
labs(title = "Recovery", fill = "recover") +
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))
eq1 <- below0 ~ popul.K + if_tech_21 + if_big_21 + surv_rate + birth_rate + KDI + H + HHI + RV + firms.pc.21
probit1 <- glm(eq1, family = binomial(link = "probit"), data = sub.full)
summary(probit1)
##
## Call:
## glm(formula = eq1, family = binomial(link = "probit"), data = sub.full)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 24.85921 0.66505 37.379 < 0.0000000000000002 ***
## popul.K -0.20983 0.02165 -9.693 < 0.0000000000000002 ***
## if_tech_21 -0.53070 0.05369 -9.884 < 0.0000000000000002 ***
## if_big_21 -0.01380 0.12981 -0.106 0.9154
## surv_rate -22.45580 0.50508 -44.460 < 0.0000000000000002 ***
## birth_rate -2.97728 0.18056 -16.489 < 0.0000000000000002 ***
## KDI -2.59303 0.26997 -9.605 < 0.0000000000000002 ***
## H -1.89586 0.17421 -10.882 < 0.0000000000000002 ***
## HHI 0.05996 0.01353 4.431 0.0000093718350 ***
## RV -18.41862 2.76141 -6.670 0.0000000000256 ***
## firms.pc.21 -0.46511 0.19646 -2.367 0.0179 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 10420.9 on 7782 degrees of freedom
## Residual deviance: 5559.2 on 7772 degrees of freedom
## AIC: 5581.2
##
## Number of Fisher Scoring iterations: 15
PseudoR2(probit1, which = NULL)
## McFadden
## 0.4665327
eq2 <- above0 ~ popul.K + if_tech_21 + if_big_21 + surv_rate + birth_rate + KDI + H + HHI + RV + firms.pc.21
probit2 <- glm(eq2, family = binomial(link = "probit"), data = sub.full)
summary(probit2)
##
## Call:
## glm(formula = eq2, family = binomial(link = "probit"), data = sub.full)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -18.98769 0.55742 -34.063 < 0.0000000000000002 ***
## popul.K 0.16409 0.01689 9.716 < 0.0000000000000002 ***
## if_tech_21 0.51043 0.04748 10.750 < 0.0000000000000002 ***
## if_big_21 0.37787 0.11994 3.150 0.00163 **
## surv_rate 17.34088 0.41330 41.957 < 0.0000000000000002 ***
## birth_rate 1.64568 0.13750 11.969 < 0.0000000000000002 ***
## KDI 1.40037 0.24755 5.657 0.00000001540173368 ***
## H 1.24595 0.15535 8.020 0.00000000000000106 ***
## HHI -0.04974 0.01528 -3.256 0.00113 **
## RV 16.03802 2.40995 6.655 0.00000000002834489 ***
## firms.pc.21 0.82619 0.17142 4.820 0.00000143840319544 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 10696.3 on 7782 degrees of freedom
## Residual deviance: 7016.9 on 7772 degrees of freedom
## AIC: 7038.9
##
## Number of Fisher Scoring iterations: 25
PseudoR2(probit2, which = NULL)
## McFadden
## 0.3439844
eq3 <- LIRI ~ popul.K + if_tech_21 + if_big_21 + surv_rate + birth_rate + KDI + H + HHI + RV + firms.pc.21
model3 <- lm(eq3, data = sub.full)
summary(model3)
##
## Call:
## lm(formula = eq3, data = sub.full)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.7390 -0.1994 -0.0436 0.1693 4.4062
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.636229 0.107574 -52.394 < 0.0000000000000002 ***
## popul.K 0.072391 0.002230 32.464 < 0.0000000000000002 ***
## if_tech_21 0.165763 0.012483 13.279 < 0.0000000000000002 ***
## if_big_21 0.139953 0.031459 4.449 0.000008760 ***
## surv_rate 4.430422 0.056409 78.541 < 0.0000000000000002 ***
## birth_rate 0.571915 0.022678 25.219 < 0.0000000000000002 ***
## KDI 0.928043 0.057399 16.168 < 0.0000000000000002 ***
## H 0.537478 0.037918 14.175 < 0.0000000000000002 ***
## HHI -0.033024 0.002752 -11.999 < 0.0000000000000002 ***
## RV 2.912614 0.569919 5.111 0.000000329 ***
## firms.pc.21 0.393614 0.040151 9.803 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3987 on 7772 degrees of freedom
## Multiple R-squared: 0.544, Adjusted R-squared: 0.5434
## F-statistic: 927.3 on 10 and 7772 DF, p-value: < 0.00000000000000022
screenreg(list(probit1, probit2, model3),
custom.header = list("Probit LIRI-" = 1,
"Probit LIRI+" = 2,
"OLS LIRI all" = 3 ),
digits = 3)
##
## ==========================================================
## Probit LIRI- Probit LIRI+ OLS LIRI all
## ------------- ------------- ------------
## Model 1 Model 2 Model 3
## ----------------------------------------------------------
## (Intercept) 24.859 *** -18.988 *** -5.636 ***
## (0.665) (0.557) (0.108)
## popul.K -0.210 *** 0.164 *** 0.072 ***
## (0.022) (0.017) (0.002)
## if_tech_21 -0.531 *** 0.510 *** 0.166 ***
## (0.054) (0.047) (0.012)
## if_big_21 -0.014 0.378 ** 0.140 ***
## (0.130) (0.120) (0.031)
## surv_rate -22.456 *** 17.341 *** 4.430 ***
## (0.505) (0.413) (0.056)
## birth_rate -2.977 *** 1.646 *** 0.572 ***
## (0.181) (0.137) (0.023)
## KDI -2.593 *** 1.400 *** 0.928 ***
## (0.270) (0.248) (0.057)
## H -1.896 *** 1.246 *** 0.537 ***
## (0.174) (0.155) (0.038)
## HHI 0.060 *** -0.050 ** -0.033 ***
## (0.014) (0.015) (0.003)
## RV -18.419 *** 16.038 *** 2.913 ***
## (2.761) (2.410) (0.570)
## firms.pc.21 -0.465 * 0.826 *** 0.394 ***
## (0.196) (0.171) (0.040)
## ----------------------------------------------------------
## AIC 5581.234 7038.935
## BIC 5657.791 7115.492
## Log Likelihood -2779.617 -3508.468
## Deviance 5559.234 7016.935
## Num. obs. 7783 7783 7783
## R^2 0.544
## Adj. R^2 0.543
## ==========================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05