#2021 godina sa genderom i dobnim grupama
library (readxl)
X2021_medjuopcinske_Mig <- read_excel("C:/Users/Amra/OneDrive - Direkcija za ekonomsko planiranje/Prijava teme doktorske disertacije/data collection for BiH/!FBiHmedjuopstinskeunutrasnjemigracije/2021_medjuopcinske_Mig.xlsx")
X_2021 <- X2021_medjuopcinske_Mig
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
long_2021 <- X_2021 %>%
pivot_longer(
cols = "BIHAĆ":"INO",
names_to = "iz/u",
values_to = "value",
names_repair = "minimal" #veoma bitno to je kad imamo veoma sitnih razlika i onda kazemo R-u da ih zanemari
)
# dodaj 2021 godinu
long_2021$Y <- 2021
dim(long_2021)
## [1] 454608 7
long_2021 <- long_2021 %>%
filter(Nivo==3) %>%
filter(Pol != "Ukupno") %>%
filter (STAROST != "UKUPNO")
dim (long_2021)
## [1] 259120 7
# transform it to individual level
# Load necessary library
library(tidyr)
# Transform the dataset by expanding rows based on the 'value' column
long_2021_indiv <- long_2021 %>%
uncount(value)
# Print the transformed dataset
print(long_2021_indiv)
## # A tibble: 21,434 × 6
## Nivo `KANTON/OPĆINA` STAROST Pol `iz/u` Y
## <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 3 BOSANSKA KRUPA 0-4 M BIHAĆ 2021
## 2 3 BOSANSKA KRUPA 0-4 M BUŽIM 2021
## 3 3 BOSANSKA KRUPA 0-4 M KALESIJA 2021
## 4 3 BOSANSKA KRUPA 0-4 M TUZLA 2021
## 5 3 BOSANSKA KRUPA 0-4 M INO 2021
## 6 3 BOSANSKA KRUPA 0-4 Ž CAZIN 2021
## 7 3 BOSANSKA KRUPA 0-4 Ž CAZIN 2021
## 8 3 BOSANSKA KRUPA 0-4 Ž KLJUČ 2021
## 9 3 BOSANSKA KRUPA 0-4 Ž REPUBLIKA SRPSKA 2021
## 10 3 BOSANSKA KRUPA 5-9 M BIHAĆ 2021
## # ℹ 21,424 more rows
dim (long_2021_indiv)
## [1] 21434 6
#rename
long_2021_indiv <- long_2021_indiv %>% rename(from =`KANTON/OPĆINA`, to = `iz/u`)
names(long_2021_indiv)
## [1] "Nivo" "from" "STAROST" "Pol" "to" "Y"
#2022 godina sa genderom i dobnim grupama
library(readxl)
X2022_medjuopcinske_Mig <- read_excel("C:/Users/Amra/OneDrive - Direkcija za ekonomsko planiranje/Prijava teme doktorske disertacije/data collection for BiH/!FBiHmedjuopstinskeunutrasnjemigracije/2022_medjuopcinske_Mig.xlsx",
sheet = "Za R")
## New names:
## • `UKUPNO` -> `UKUPNO...56`
## • `UKUPNO` -> `UKUPNO...66`
X_2022 <- X2022_medjuopcinske_Mig
library(tidyr)
library(dplyr)
long_2022 <- X_2022 %>%
pivot_longer(
cols = "BIHAĆ":"INO",
names_to = "iz/u",
values_to = "value",
names_repair = "minimal" #veoma bitno to je kad imamo veoma sitnih razlika i onda kazemo R-u da ih zanemari
)
# dodaj 2022 godinu
long_2022$Y <- 2022
dim(long_2022)
## [1] 465696 7
long_2022 <- long_2022 %>%
filter(Nivo==3) %>%
filter(POL != "Ukupno") %>%
filter (STAROST != "UKUPNO")
dim (long_2022)
## [1] 265440 7
# transform it to individual level
# Load necessary library
library(tidyr)
# Transform the dataset by expanding rows based on the 'value' column
long_2022_indiv <- long_2022 %>%
uncount(value)
# Print the transformed dataset
print(long_2022_indiv)
## # A tibble: 24,592 × 6
## Nivo Opcina STAROST POL `iz/u` Y
## <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 3 BOSANSKA KRUPA 0-4 M BOSANSKI PETROVAC 2022
## 2 3 BOSANSKA KRUPA 0-4 M BUŽIM 2022
## 3 3 BOSANSKA KRUPA 0-4 M CAZIN 2022
## 4 3 BOSANSKA KRUPA 0-4 M CAZIN 2022
## 5 3 BOSANSKA KRUPA 0-4 M CAZIN 2022
## 6 3 BOSANSKA KRUPA 0-4 M SANSKI MOST 2022
## 7 3 BOSANSKA KRUPA 0-4 M ZENICA 2022
## 8 3 BOSANSKA KRUPA 0-4 M ZENICA 2022
## 9 3 BOSANSKA KRUPA 0-4 M INO 2022
## 10 3 BOSANSKA KRUPA 0-4 Ž CAZIN 2022
## # ℹ 24,582 more rows
dim (long_2022_indiv)
## [1] 24592 6
names(long_2022_indiv)
## [1] "Nivo" "Opcina" "STAROST" "POL" "iz/u" "Y"
#rename
long_2022_indiv <- long_2022_indiv %>% rename(from = Opcina, to = `iz/u`, Pol = POL)
#2020 godina sa genderom i dobnim grupama
library(readxl)
X2020_medjuopcinske_Mig <- read_excel("C:/Users/Amra/OneDrive - Direkcija za ekonomsko planiranje/Prijava teme doktorske disertacije/data collection for BiH/!FBiHmedjuopstinskeunutrasnjemigracije/2020_medjuopcinske_Mig.xlsx")
X_2020 <- X2020_medjuopcinske_Mig
library(tidyr)
library(dplyr)
long_2020 <- X_2020 %>%
pivot_longer(
cols = "BIHAĆ":"INO",
names_to = "iz/u",
values_to = "value",
names_repair = "minimal" #veoma bitno to je kad imamo veoma sitnih razlika i onda kazemo R-u da ih zanemari
)
# dodaj 2020 godinu
long_2020$Y <- 2020
dim(long_2020)
## [1] 459270 7
long_2020 <- long_2020 %>%
filter(Nivo==3) %>%
filter(Pol != "Ukupno") %>%
filter (STAROST != "UKUPNO")
dim (long_2020)
## [1] 255960 7
# transform it to individual level
# Load necessary library
library(tidyr)
# Transform the dataset by expanding rows based on the 'value' column
long_2020_indiv <- long_2020 %>%
uncount(value)
# Print the transformed dataset
print(long_2020_indiv)
## # A tibble: 19,879 × 6
## Nivo `KANTON/OPĆINA` STAROST Pol `iz/u` Y
## <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 3 BOSANSKA KRUPA 0-4 M BIHAĆ 2020
## 2 3 BOSANSKA KRUPA 0-4 M BIHAĆ 2020
## 3 3 BOSANSKA KRUPA 0-4 M BUŽIM 2020
## 4 3 BOSANSKA KRUPA 0-4 M CAZIN 2020
## 5 3 BOSANSKA KRUPA 0-4 M NOVI GRAD SARAJEVO 2020
## 6 3 BOSANSKA KRUPA 0-4 M INO 2020
## 7 3 BOSANSKA KRUPA 0-4 M INO 2020
## 8 3 BOSANSKA KRUPA 0-4 Ž BIHAĆ 2020
## 9 3 BOSANSKA KRUPA 0-4 Ž BIHAĆ 2020
## 10 3 BOSANSKA KRUPA 0-4 Ž BUŽIM 2020
## # ℹ 19,869 more rows
dim (long_2020_indiv)
## [1] 19879 6
names(long_2020_indiv)
## [1] "Nivo" "KANTON/OPĆINA" "STAROST" "Pol"
## [5] "iz/u" "Y"
#rename
long_2020_indiv <- long_2020_indiv %>% rename(from = `KANTON/OPĆINA`, to = `iz/u`)
head(long_2020_indiv)
## # A tibble: 6 × 6
## Nivo from STAROST Pol to Y
## <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 3 BOSANSKA KRUPA 0-4 M BIHAĆ 2020
## 2 3 BOSANSKA KRUPA 0-4 M BIHAĆ 2020
## 3 3 BOSANSKA KRUPA 0-4 M BUŽIM 2020
## 4 3 BOSANSKA KRUPA 0-4 M CAZIN 2020
## 5 3 BOSANSKA KRUPA 0-4 M NOVI GRAD SARAJEVO 2020
## 6 3 BOSANSKA KRUPA 0-4 M INO 2020
#2019 godina sa genderom i dobnim grupama
library(readxl)
X2019_medjuopcinske_Mig <- read_excel("C:/Users/Amra/OneDrive - Direkcija za ekonomsko planiranje/Prijava teme doktorske disertacije/data collection for BiH/!FBiHmedjuopstinskeunutrasnjemigracije/2019_medjuopcinske_Mig.xlsx")
X_2019 <- X2019_medjuopcinske_Mig
library(tidyr)
library(dplyr)
long_2019 <- X_2019 %>%
pivot_longer(
cols = "BIHAĆ":"INO",
names_to = "iz/u",
values_to = "value",
names_repair = "minimal" #veoma bitno to je kad imamo veoma sitnih razlika i onda kazemo R-u da ih zanemari
)
# dodaj 2019 godinu
long_2019$Y <- 2019
dim(long_2019)
## [1] 464940 7
long_2019 <- long_2019 %>%
filter(Nivo==3) %>%
filter(Pol != "Ukupno") %>%
filter (STAROST != "UKUPNO")
dim (long_2019)
## [1] 259120 7
# transform it to individual level
# Load necessary library
library(tidyr)
# Transform the dataset by expanding rows based on the 'value' column
long_2019_indiv <- long_2019 %>%
uncount(value)
# Print the transformed dataset
print(long_2019_indiv)
## # A tibble: 22,451 × 6
## Nivo `KANTON/OPĆINA` STAROST Pol `iz/u` Y
## <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 3 BOSANSKA KRUPA 0-4 M BIHAĆ 2019
## 2 3 BOSANSKA KRUPA 0-4 M CAZIN 2019
## 3 3 BOSANSKA KRUPA 0-4 M INO 2019
## 4 3 BOSANSKA KRUPA 0-4 M INO 2019
## 5 3 BOSANSKA KRUPA 0-4 M INO 2019
## 6 3 BOSANSKA KRUPA 0-4 Ž MOSTAR 2019
## 7 3 BOSANSKA KRUPA 0-4 Ž INO 2019
## 8 3 BOSANSKA KRUPA 0-4 Ž INO 2019
## 9 3 BOSANSKA KRUPA 5-9 M BIHAĆ 2019
## 10 3 BOSANSKA KRUPA 5-9 M KLJUČ 2019
## # ℹ 22,441 more rows
dim (long_2019_indiv)
## [1] 22451 6
names(long_2019_indiv)
## [1] "Nivo" "KANTON/OPĆINA" "STAROST" "Pol"
## [5] "iz/u" "Y"
#rename
long_2019_indiv <- long_2019_indiv %>% rename(from = `KANTON/OPĆINA`, to = `iz/u`)
head(long_2019_indiv)
## # A tibble: 6 × 6
## Nivo from STAROST Pol to Y
## <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 3 BOSANSKA KRUPA 0-4 M BIHAĆ 2019
## 2 3 BOSANSKA KRUPA 0-4 M CAZIN 2019
## 3 3 BOSANSKA KRUPA 0-4 M INO 2019
## 4 3 BOSANSKA KRUPA 0-4 M INO 2019
## 5 3 BOSANSKA KRUPA 0-4 M INO 2019
## 6 3 BOSANSKA KRUPA 0-4 Ž MOSTAR 2019
#2018 godina sa genderom i dobnim grupama
library(readxl)
X2018_medjuopcinske_Mig <- read_excel("C:/Users/Amra/OneDrive - Direkcija za ekonomsko planiranje/Prijava teme doktorske disertacije/data collection for BiH/!FBiHmedjuopstinskeunutrasnjemigracije/2019_medjuopcinske_Mig.xlsx")
X_2018 <- X2018_medjuopcinske_Mig
library(tidyr)
library(dplyr)
long_2018 <- X_2018 %>%
pivot_longer(
cols = "BIHAĆ":"INO",
names_to = "iz/u",
values_to = "value",
names_repair = "minimal" #veoma bitno to je kad imamo veoma sitnih razlika i onda kazemo R-u da ih zanemari
)
# dodaj 2018 godinu
long_2018$Y <- 2018
dim(long_2018)
## [1] 464940 7
long_2018 <- long_2018 %>%
filter(Nivo==3) %>%
filter(Pol != "Ukupno") %>%
filter (STAROST != "UKUPNO")
dim (long_2018)
## [1] 259120 7
# transform it to individual level
# Load necessary library
library(tidyr)
# Transform the dataset by expanding rows based on the 'value' column
long_2018_indiv <- long_2018 %>%
uncount(value)
# Print the transformed dataset
print(long_2018_indiv)
## # A tibble: 22,451 × 6
## Nivo `KANTON/OPĆINA` STAROST Pol `iz/u` Y
## <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 3 BOSANSKA KRUPA 0-4 M BIHAĆ 2018
## 2 3 BOSANSKA KRUPA 0-4 M CAZIN 2018
## 3 3 BOSANSKA KRUPA 0-4 M INO 2018
## 4 3 BOSANSKA KRUPA 0-4 M INO 2018
## 5 3 BOSANSKA KRUPA 0-4 M INO 2018
## 6 3 BOSANSKA KRUPA 0-4 Ž MOSTAR 2018
## 7 3 BOSANSKA KRUPA 0-4 Ž INO 2018
## 8 3 BOSANSKA KRUPA 0-4 Ž INO 2018
## 9 3 BOSANSKA KRUPA 5-9 M BIHAĆ 2018
## 10 3 BOSANSKA KRUPA 5-9 M KLJUČ 2018
## # ℹ 22,441 more rows
dim (long_2018_indiv)
## [1] 22451 6
names(long_2018_indiv)
## [1] "Nivo" "KANTON/OPĆINA" "STAROST" "Pol"
## [5] "iz/u" "Y"
#rename
long_2018_indiv <- long_2018_indiv %>% rename(from = `KANTON/OPĆINA`, to = `iz/u`)
head(long_2018_indiv)
## # A tibble: 6 × 6
## Nivo from STAROST Pol to Y
## <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 3 BOSANSKA KRUPA 0-4 M BIHAĆ 2018
## 2 3 BOSANSKA KRUPA 0-4 M CAZIN 2018
## 3 3 BOSANSKA KRUPA 0-4 M INO 2018
## 4 3 BOSANSKA KRUPA 0-4 M INO 2018
## 5 3 BOSANSKA KRUPA 0-4 M INO 2018
## 6 3 BOSANSKA KRUPA 0-4 Ž MOSTAR 2018
sve.mig <- bind_rows(long_2018_indiv,long_2019_indiv,long_2020_indiv, long_2021_indiv, long_2022_indiv)
head (sve.mig)
## # A tibble: 6 × 6
## Nivo from STAROST Pol to Y
## <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 3 BOSANSKA KRUPA 0-4 M BIHAĆ 2018
## 2 3 BOSANSKA KRUPA 0-4 M CAZIN 2018
## 3 3 BOSANSKA KRUPA 0-4 M INO 2018
## 4 3 BOSANSKA KRUPA 0-4 M INO 2018
## 5 3 BOSANSKA KRUPA 0-4 M INO 2018
## 6 3 BOSANSKA KRUPA 0-4 Ž MOSTAR 2018
#### import database with ethnic majrity
medjuop_migracijeFBiH_2018_2019 <- read_excel("C:/Users/Amra/OneDrive - Direkcija za ekonomsko planiranje/Prijava teme doktorske disertacije/data collection for BiH/medjuop.migracijeFBiH_2018-2019.xlsx",
sheet = "2018-2019", skip = 1)
wide <- medjuop_migracijeFBiH_2018_2019
library(tidyr)
long <- wide %>%
pivot_longer(
cols = "BIHAĆ":"INO",
names_to = "iz/u",
values_to = "value",
names_repair = "minimal" #veoma bitno to je kad imamo veoma sitnih razlika i onda kazemo R-u da ih zanemari
)
####inspect long and rename variables
names(long)[2] <- "from" # Rename the second column
names(long)[3] <- "to" # Rename the third column
names (long)
## [1] "Majority" "from" "to" "value"
# Create a data frame for the join
join_df <- long %>%
select(from, Majority) %>%
distinct() # Ensure unique pairs of iz and Majority
# Perform a left join to match Majority based on u
sve.mig.ethnic <- sve.mig %>%
left_join(join_df, by = c("from" = "from"))
print (sve.mig.ethnic)
## # A tibble: 110,807 × 7
## Nivo from STAROST Pol to Y Majority
## <dbl> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 3 BOSANSKA KRUPA 0-4 M BIHAĆ 2018 Bosniak
## 2 3 BOSANSKA KRUPA 0-4 M CAZIN 2018 Bosniak
## 3 3 BOSANSKA KRUPA 0-4 M INO 2018 Bosniak
## 4 3 BOSANSKA KRUPA 0-4 M INO 2018 Bosniak
## 5 3 BOSANSKA KRUPA 0-4 M INO 2018 Bosniak
## 6 3 BOSANSKA KRUPA 0-4 Ž MOSTAR 2018 Bosniak
## 7 3 BOSANSKA KRUPA 0-4 Ž INO 2018 Bosniak
## 8 3 BOSANSKA KRUPA 0-4 Ž INO 2018 Bosniak
## 9 3 BOSANSKA KRUPA 5-9 M BIHAĆ 2018 Bosniak
## 10 3 BOSANSKA KRUPA 5-9 M KLJUČ 2018 Bosniak
## # ℹ 110,797 more rows
# Rename the new column
sve.mig.ethnic <- sve.mig.ethnic %>%
rename(Majority_from = Majority)
# View the updated data frame
head(sve.mig.ethnic)
## # A tibble: 6 × 7
## Nivo from STAROST Pol to Y Majority_from
## <dbl> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 3 BOSANSKA KRUPA 0-4 M BIHAĆ 2018 Bosniak
## 2 3 BOSANSKA KRUPA 0-4 M CAZIN 2018 Bosniak
## 3 3 BOSANSKA KRUPA 0-4 M INO 2018 Bosniak
## 4 3 BOSANSKA KRUPA 0-4 M INO 2018 Bosniak
## 5 3 BOSANSKA KRUPA 0-4 M INO 2018 Bosniak
## 6 3 BOSANSKA KRUPA 0-4 Ž MOSTAR 2018 Bosniak
# Initialize the new column with NA
sve.mig.ethnic$Majority_to <- NA
print(sve.mig)
## # A tibble: 110,807 × 6
## Nivo from STAROST Pol to Y
## <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 3 BOSANSKA KRUPA 0-4 M BIHAĆ 2018
## 2 3 BOSANSKA KRUPA 0-4 M CAZIN 2018
## 3 3 BOSANSKA KRUPA 0-4 M INO 2018
## 4 3 BOSANSKA KRUPA 0-4 M INO 2018
## 5 3 BOSANSKA KRUPA 0-4 M INO 2018
## 6 3 BOSANSKA KRUPA 0-4 Ž MOSTAR 2018
## 7 3 BOSANSKA KRUPA 0-4 Ž INO 2018
## 8 3 BOSANSKA KRUPA 0-4 Ž INO 2018
## 9 3 BOSANSKA KRUPA 5-9 M BIHAĆ 2018
## 10 3 BOSANSKA KRUPA 5-9 M KLJUČ 2018
## # ℹ 110,797 more rows
#add from RS, BD, INO
dim (sve.mig.ethnic)
## [1] 110807 8
new_rows <- data.frame(
Nivo = c(2, 2, 2),
from = c("REPUBLIKA SRPSKA", "BRČKO DISTRIKT BOSNE I HERCEGOVINE", "INO"),
STAROST = c(NA, NA, NA),
Pol = c(NA, NA, NA),
to = c(NA, NA, NA),
Y = c(NA, NA, NA),
Majority_from = c("Serb", "None", "None"),
Majority_to = c(NA, NA, NA)
)
# Append the new rows to the existing dataset
sve.mig.ethnic <- rbind(sve.mig.ethnic, new_rows)
dim(sve.mig.ethnic)
## [1] 110810 8
# Loop through the data frame
for (i in 1:nrow(sve.mig.ethnic)) {
if (sve.mig.ethnic$to[i] %in% sve.mig.ethnic$from) {
sve.mig.ethnic$Majority_to[i] <- sve.mig.ethnic$Majority_from[which(sve.mig.ethnic$from == sve.mig.ethnic$to[i])]
}
}
print (sve.mig.ethnic)
## # A tibble: 110,810 × 8
## Nivo from STAROST Pol to Y Majority_from Majority_to
## <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 3 BOSANSKA KRUPA 0-4 M BIHAĆ 2018 Bosniak Bosniak
## 2 3 BOSANSKA KRUPA 0-4 M CAZIN 2018 Bosniak Bosniak
## 3 3 BOSANSKA KRUPA 0-4 M INO 2018 Bosniak None
## 4 3 BOSANSKA KRUPA 0-4 M INO 2018 Bosniak None
## 5 3 BOSANSKA KRUPA 0-4 M INO 2018 Bosniak None
## 6 3 BOSANSKA KRUPA 0-4 Ž MOSTAR 2018 Bosniak None
## 7 3 BOSANSKA KRUPA 0-4 Ž INO 2018 Bosniak None
## 8 3 BOSANSKA KRUPA 0-4 Ž INO 2018 Bosniak None
## 9 3 BOSANSKA KRUPA 5-9 M BIHAĆ 2018 Bosniak Bosniak
## 10 3 BOSANSKA KRUPA 5-9 M KLJUČ 2018 Bosniak <NA>
## # ℹ 110,800 more rows
dim(sve.mig.ethnic)
## [1] 110810 8
sve.mig.ethnic$ID <- 1:110810
#rename
sve.mig.ethnic <- sve.mig.ethnic %>%
rename (Level=Nivo, Age.int = STAROST, Sex=Pol)
str(sve.mig.ethnic)
## tibble [110,810 × 9] (S3: tbl_df/tbl/data.frame)
## $ Level : num [1:110810] 3 3 3 3 3 3 3 3 3 3 ...
## $ from : chr [1:110810] "BOSANSKA KRUPA" "BOSANSKA KRUPA" "BOSANSKA KRUPA" "BOSANSKA KRUPA" ...
## $ Age.int : chr [1:110810] "0-4" "0-4" "0-4" "0-4" ...
## $ Sex : chr [1:110810] "M" "M" "M" "M" ...
## $ to : chr [1:110810] "BIHAĆ" "CAZIN" "INO" "INO" ...
## $ Y : num [1:110810] 2018 2018 2018 2018 2018 ...
## $ Majority_from: chr [1:110810] "Bosniak" "Bosniak" "Bosniak" "Bosniak" ...
## $ Majority_to : chr [1:110810] "Bosniak" "Bosniak" "None" "None" ...
## $ ID : int [1:110810] 1 2 3 4 5 6 7 8 9 10 ...
Ako imas mikro informacije (osobe ili domacinstva) onda mozes da estimiras LOGIT model. M Opcija 1: dependent variable=1 ako se osoba preselila iz “group has same ethnical majority as the origin group” (ovdje koristim grupu jer nemas podatke po opstinama). Dvije napomene. U model dodaj kontrole za “socio-economics (najvaznije etnucku pripadnost)” i zadrzi samo observacije osoba koje su se preselile. Opcija 2: estimiraj model iz opcije 1 za razlicite etnicke grupe: bosnjake, srbe, i hrvate.
table (sve.mig.ethnic$Majority_from, sve.mig.ethnic$Majority_to)
##
## Bosniak Croat None Serb
## Bosniak 64977 3518 11816 9738
## Croat 3345 3033 3799 889
## None 1805 1648 896 863
## Serb 231 33 149 1087
sve.mig.ethnic$same_majority <- ifelse(sve.mig.ethnic$Majority_from == sve.mig.ethnic$Majority_to, 1, 0)
# Load necessary libraries
library(dplyr)
# Recategorize Age.int into broader intervals
sve.mig.ethnic <- sve.mig.ethnic %>%
mutate(Age_group = case_when(
Age.int %in% c("0-4", "5-9", "10-14","15-19") ~ "0-19",
Age.int %in% c("20-24","25-29", "30-34") ~ "20-34",
Age.int %in% c("35-39", "40-44", "45-49") ~ "35-49",
Age.int %in% c("50-54", "55-59", "60-64") ~ "50-64",
Age.int %in% c("65-69", "70-74", "75-79", "80-84", "85+") ~ "65+",
TRUE ~ NA_character_
))
#keep only above 20 as instructed by ismir
sve.mig.ethnic.above19 <- sve.mig.ethnic %>% filter (Age_group!="0-19")
# Re-run the logistic regression with the new age groups
model2 <- glm(same_majority ~ Age_group + Sex + Y+ from,
family = binomial(link = "logit"), data = sve.mig.ethnic.above19)
# Summary of the new model
summary(model2)
##
## Call:
## glm(formula = same_majority ~ Age_group + Sex + Y + from, family = binomial(link = "logit"),
## data = sve.mig.ethnic.above19)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.773060 11.411441 -0.856 0.391762
## Age_group35-49 -0.508867 0.020129 -25.280 < 2e-16 ***
## Age_group50-64 -0.661776 0.024489 -27.024 < 2e-16 ***
## Age_group65+ -1.219250 0.027207 -44.814 < 2e-16 ***
## SexŽ 0.488369 0.016719 29.211 < 2e-16 ***
## Y 0.005457 0.005649 0.966 0.334066
## fromBIHAĆ -0.873578 0.095169 -9.179 < 2e-16 ***
## fromBOSANSKA KRUPA -0.925852 0.099983 -9.260 < 2e-16 ***
## fromBOSANSKI PETROVAC -0.136367 0.124492 -1.095 0.273347
## fromBOSANSKO GRAHOVO -0.148060 0.187772 -0.789 0.430398
## fromBREZA 0.971818 0.157487 6.171 6.80e-10 ***
## fromBUGOJNO -0.785541 0.101220 -7.761 8.44e-15 ***
## fromBUSOVAČA -3.399470 0.156530 -21.718 < 2e-16 ***
## fromBUŽIM -0.559961 0.112146 -4.993 5.94e-07 ***
## fromCAZIN -1.009569 0.094324 -10.703 < 2e-16 ***
## fromČAPLJINA -1.913066 0.113131 -16.910 < 2e-16 ***
## fromČELIĆ -1.783451 0.128609 -13.867 < 2e-16 ***
## fromČITLUK -1.059280 0.120711 -8.775 < 2e-16 ***
## fromDOBOJ ISTOK -0.963388 0.127447 -7.559 4.06e-14 ***
## fromDOBOJ JUG -0.578888 0.150086 -3.857 0.000115 ***
## fromDOBRETIĆI -2.862802 0.493675 -5.799 6.67e-09 ***
## fromDOMALJEVAC/ŠAMAC -1.800744 0.206727 -8.711 < 2e-16 ***
## fromDONJI VAKUF -0.222496 0.125290 -1.776 0.075759 .
## fromDRVAR 0.575689 0.142434 4.042 5.30e-05 ***
## fromFOČA 0.044995 0.191396 0.235 0.814139
## fromFOJNICA -1.121417 0.125039 -8.969 < 2e-16 ***
## fromGLAMOČ -2.660987 0.184809 -14.399 < 2e-16 ***
## fromGORAŽDE -0.567931 0.101017 -5.622 1.89e-08 ***
## fromGORNJI VAKUF-USKOPLJE -0.562371 0.115349 -4.875 1.09e-06 ***
## fromGRAČANICA -0.626988 0.101410 -6.183 6.30e-10 ***
## fromGRADAČAC -1.286781 0.104969 -12.259 < 2e-16 ***
## fromGRUDE -0.952303 0.139709 -6.816 9.34e-12 ***
## fromHADŽIĆI 0.273093 0.117427 2.326 0.020037 *
## fromILIDŽA 0.258437 0.090404 2.859 0.004254 **
## fromILIJAŠ 0.183944 0.109688 1.677 0.093547 .
## fromJABLANICA -1.016977 0.133053 -7.643 2.12e-14 ***
## fromJAJCE -2.163796 0.121101 -17.868 < 2e-16 ***
## fromKAKANJ 0.255589 0.110556 2.312 0.020786 *
## fromKALESIJA -0.845514 0.099051 -8.536 < 2e-16 ***
## fromKISELJAK -3.520519 0.151284 -23.271 < 2e-16 ***
## fromKLADANJ 0.113775 0.124198 0.916 0.359626
## fromKONJIC -0.601529 0.108707 -5.533 3.14e-08 ***
## fromKREŠEVO -1.806882 0.173641 -10.406 < 2e-16 ***
## fromKUPRES -2.703265 0.224256 -12.054 < 2e-16 ***
## fromLIVNO -2.770679 0.146119 -18.962 < 2e-16 ***
## fromLUKAVAC -0.017383 0.104357 -0.167 0.867708
## fromLJUBUŠKI -1.483737 0.118963 -12.472 < 2e-16 ***
## fromMAGLAJ -0.974966 0.108203 -9.011 < 2e-16 ***
## fromMOSTAR -2.793663 0.095779 -29.168 < 2e-16 ***
## fromNEUM -1.980464 0.194933 -10.160 < 2e-16 ***
## fromNOVI GRAD SARAJEVO 0.265274 0.086984 3.050 0.002291 **
## fromNOVI TRAVNIK -0.736883 0.111046 -6.636 3.23e-11 ***
## fromNOVO SARAJEVO 0.551792 0.088917 6.206 5.45e-10 ***
## fromODŽAK -3.997316 0.222748 -17.945 < 2e-16 ***
## fromOLOVO 1.080579 0.148379 7.283 3.27e-13 ***
## fromORAŠJE -3.198877 0.187849 -17.029 < 2e-16 ***
## fromPALE 1.591515 0.476553 3.340 0.000839 ***
## fromPOSUŠJE -1.226475 0.147059 -8.340 < 2e-16 ***
## fromPROZOR -3.209364 0.163825 -19.590 < 2e-16 ***
## fromRAVNO -1.794998 0.247595 -7.250 4.18e-13 ***
## fromSANSKI MOST -1.887805 0.101123 -18.668 < 2e-16 ***
## fromSAPNA -0.800534 0.120128 -6.664 2.66e-11 ***
## fromSARAJEVO CENTAR 0.546243 0.090889 6.010 1.86e-09 ***
## fromSARAJEVO STARI GRAD 1.013371 0.101700 9.964 < 2e-16 ***
## fromSREBRENIK -0.909496 0.099318 -9.157 < 2e-16 ***
## fromSTOLAC -2.331330 0.134439 -17.341 < 2e-16 ***
## fromŠIROKI BRIJEG -1.451696 0.111680 -12.999 < 2e-16 ***
## fromTEOČAK -1.291384 0.141337 -9.137 < 2e-16 ***
## fromTEŠANJ -1.342465 0.099890 -13.439 < 2e-16 ***
## fromTOMISLAVGRAD -1.962212 0.135650 -14.465 < 2e-16 ***
## fromTRAVNIK -0.812235 0.095905 -8.469 < 2e-16 ***
## fromTRNOVO 1.434286 0.209735 6.839 8.00e-12 ***
## fromTUZLA -0.736229 0.089252 -8.249 < 2e-16 ***
## fromUSORA -3.338471 0.280520 -11.901 < 2e-16 ***
## fromVAREŠ 0.120687 0.140774 0.857 0.391272
## fromVELIKA KLADUŠA -0.989105 0.099850 -9.906 < 2e-16 ***
## fromVISOKO 0.181501 0.108448 1.674 0.094205 .
## fromVITEZ -4.481120 0.183623 -24.404 < 2e-16 ***
## fromVOGOŠĆA 0.441838 0.103958 4.250 2.14e-05 ***
## fromZAVIDOVIĆI -0.586802 0.103035 -5.695 1.23e-08 ***
## fromZENICA -0.760239 0.089683 -8.477 < 2e-16 ***
## fromŽEPČE -4.160610 0.171535 -24.255 < 2e-16 ***
## fromŽIVINICE -0.551747 0.096741 -5.703 1.17e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 113551 on 86371 degrees of freedom
## Residual deviance: 92061 on 86289 degrees of freedom
## (2377 observations deleted due to missingness)
## AIC: 92227
##
## Number of Fisher Scoring iterations: 5
model2a <- glm(same_majority ~ Age_group + Sex + Majority_from + factor(Y),
family = binomial(link = "logit"), data = sve.mig.ethnic.above19)
summary(model2a)
##
## Call:
## glm(formula = same_majority ~ Age_group + Sex + Majority_from +
## factor(Y), family = binomial(link = "logit"), data = sve.mig.ethnic.above19)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 9.611e-01 2.180e-02 44.082 < 2e-16 ***
## Age_group35-49 -4.007e-01 1.895e-02 -21.137 < 2e-16 ***
## Age_group50-64 -4.951e-01 2.293e-02 -21.594 < 2e-16 ***
## Age_group65+ -9.916e-01 2.529e-02 -39.212 < 2e-16 ***
## SexŽ 4.019e-01 1.580e-02 25.447 < 2e-16 ***
## Majority_fromCroat -2.010e+00 2.587e-02 -77.721 < 2e-16 ***
## Majority_fromNone -2.424e+00 4.078e-02 -59.428 < 2e-16 ***
## Majority_fromSerb 3.528e-01 6.682e-02 5.280 1.29e-07 ***
## factor(Y)2019 3.188e-14 2.404e-02 0.000 1.000
## factor(Y)2020 -1.952e-01 2.440e-02 -8.000 1.25e-15 ***
## factor(Y)2021 -1.459e-02 2.421e-02 -0.603 0.547
## factor(Y)2022 2.519e-02 2.413e-02 1.044 0.297
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 113551 on 86371 degrees of freedom
## Residual deviance: 100185 on 86360 degrees of freedom
## (2377 observations deleted due to missingness)
## AIC: 100209
##
## Number of Fisher Scoring iterations: 4
texreg::screenreg(list(model2, model2a))
##
## =======================================================
## Model 1 Model 2
## -------------------------------------------------------
## (Intercept) -9.77 0.96 ***
## (11.41) (0.02)
## Age_group35-49 -0.51 *** -0.40 ***
## (0.02) (0.02)
## Age_group50-64 -0.66 *** -0.50 ***
## (0.02) (0.02)
## Age_group65+ -1.22 *** -0.99 ***
## (0.03) (0.03)
## SexŽ 0.49 *** 0.40 ***
## (0.02) (0.02)
## Y 0.01
## (0.01)
## fromBIHAĆ -0.87 ***
## (0.10)
## fromBOSANSKA KRUPA -0.93 ***
## (0.10)
## fromBOSANSKI PETROVAC -0.14
## (0.12)
## fromBOSANSKO GRAHOVO -0.15
## (0.19)
## fromBREZA 0.97 ***
## (0.16)
## fromBUGOJNO -0.79 ***
## (0.10)
## fromBUSOVAČA -3.40 ***
## (0.16)
## fromBUŽIM -0.56 ***
## (0.11)
## fromCAZIN -1.01 ***
## (0.09)
## fromČAPLJINA -1.91 ***
## (0.11)
## fromČELIĆ -1.78 ***
## (0.13)
## fromČITLUK -1.06 ***
## (0.12)
## fromDOBOJ ISTOK -0.96 ***
## (0.13)
## fromDOBOJ JUG -0.58 ***
## (0.15)
## fromDOBRETIĆI -2.86 ***
## (0.49)
## fromDOMALJEVAC/ŠAMAC -1.80 ***
## (0.21)
## fromDONJI VAKUF -0.22
## (0.13)
## fromDRVAR 0.58 ***
## (0.14)
## fromFOČA 0.04
## (0.19)
## fromFOJNICA -1.12 ***
## (0.13)
## fromGLAMOČ -2.66 ***
## (0.18)
## fromGORAŽDE -0.57 ***
## (0.10)
## fromGORNJI VAKUF-USKOPLJE -0.56 ***
## (0.12)
## fromGRAČANICA -0.63 ***
## (0.10)
## fromGRADAČAC -1.29 ***
## (0.10)
## fromGRUDE -0.95 ***
## (0.14)
## fromHADŽIĆI 0.27 *
## (0.12)
## fromILIDŽA 0.26 **
## (0.09)
## fromILIJAŠ 0.18
## (0.11)
## fromJABLANICA -1.02 ***
## (0.13)
## fromJAJCE -2.16 ***
## (0.12)
## fromKAKANJ 0.26 *
## (0.11)
## fromKALESIJA -0.85 ***
## (0.10)
## fromKISELJAK -3.52 ***
## (0.15)
## fromKLADANJ 0.11
## (0.12)
## fromKONJIC -0.60 ***
## (0.11)
## fromKREŠEVO -1.81 ***
## (0.17)
## fromKUPRES -2.70 ***
## (0.22)
## fromLIVNO -2.77 ***
## (0.15)
## fromLUKAVAC -0.02
## (0.10)
## fromLJUBUŠKI -1.48 ***
## (0.12)
## fromMAGLAJ -0.97 ***
## (0.11)
## fromMOSTAR -2.79 ***
## (0.10)
## fromNEUM -1.98 ***
## (0.19)
## fromNOVI GRAD SARAJEVO 0.27 **
## (0.09)
## fromNOVI TRAVNIK -0.74 ***
## (0.11)
## fromNOVO SARAJEVO 0.55 ***
## (0.09)
## fromODŽAK -4.00 ***
## (0.22)
## fromOLOVO 1.08 ***
## (0.15)
## fromORAŠJE -3.20 ***
## (0.19)
## fromPALE 1.59 ***
## (0.48)
## fromPOSUŠJE -1.23 ***
## (0.15)
## fromPROZOR -3.21 ***
## (0.16)
## fromRAVNO -1.79 ***
## (0.25)
## fromSANSKI MOST -1.89 ***
## (0.10)
## fromSAPNA -0.80 ***
## (0.12)
## fromSARAJEVO CENTAR 0.55 ***
## (0.09)
## fromSARAJEVO STARI GRAD 1.01 ***
## (0.10)
## fromSREBRENIK -0.91 ***
## (0.10)
## fromSTOLAC -2.33 ***
## (0.13)
## fromŠIROKI BRIJEG -1.45 ***
## (0.11)
## fromTEOČAK -1.29 ***
## (0.14)
## fromTEŠANJ -1.34 ***
## (0.10)
## fromTOMISLAVGRAD -1.96 ***
## (0.14)
## fromTRAVNIK -0.81 ***
## (0.10)
## fromTRNOVO 1.43 ***
## (0.21)
## fromTUZLA -0.74 ***
## (0.09)
## fromUSORA -3.34 ***
## (0.28)
## fromVAREŠ 0.12
## (0.14)
## fromVELIKA KLADUŠA -0.99 ***
## (0.10)
## fromVISOKO 0.18
## (0.11)
## fromVITEZ -4.48 ***
## (0.18)
## fromVOGOŠĆA 0.44 ***
## (0.10)
## fromZAVIDOVIĆI -0.59 ***
## (0.10)
## fromZENICA -0.76 ***
## (0.09)
## fromŽEPČE -4.16 ***
## (0.17)
## fromŽIVINICE -0.55 ***
## (0.10)
## Majority_fromCroat -2.01 ***
## (0.03)
## Majority_fromNone -2.42 ***
## (0.04)
## Majority_fromSerb 0.35 ***
## (0.07)
## factor(Y)2019 0.00
## (0.02)
## factor(Y)2020 -0.20 ***
## (0.02)
## factor(Y)2021 -0.01
## (0.02)
## factor(Y)2022 0.03
## (0.02)
## -------------------------------------------------------
## AIC 92226.82 100209.44
## BIC 93004.24 100321.83
## Log Likelihood -46030.41 -50092.72
## Deviance 92060.82 100185.44
## Num. obs. 86372 86372
## =======================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05
This is a logistic regression model (GLM) with a binomial family,
which predicts the probability of migration from one ethnic majority
area to another based on factors like age, sex, ethnic majority from
which the person is moving, and the year of migration (Y). The outcome
variable is same_majority
, indicating whether a person
moved between areas of the same ethnic majority.
Intercept (0.9611): The log-odds of moving to a municipality with the same ethnic majority for the reference categories (age 20-34, male, from Bosniak majority, in 2018). A positive intercept suggests a higher likelihood of such a move for the reference group.
Age Groups:
Sex (Female = 0.4019): Females have higher odds of moving between areas of the same ethnic majority compared to males (reference category).
Majority_from:
Year (factor(Y)):
***
symbols, meaning the relationships observed in the model are unlikely to
have occurred by chance.Yes, from the logistic regression model, we can calculate the predicted probabilities of migration to a municipality with the same ethnic majority group for each ethnic group, holding other variables constant.
The logistic regression equation is:
\[ P(\text{same_majority} = 1) = \frac{1}{1 + e^{-(\beta_0 + \beta_1X_1 + \dots + \beta_kX_k)}} \]
Where: - \(\beta_0\) is the intercept. - \(\beta_1, \beta_2, \dots\) are the coefficients for the respective variables. - \(X_1, X_2, \dots\) are the values of the predictor variables (like ethnic majority).
Majority_from
and
Majority_to
.These probabilities reflect the likelihood of individuals from each ethnic majority group migrating to a municipality with the same ethnic majority. Would you like to explore any of these groups further or include other variables in the calculation?