Init

Packages and options.

#options
options(digits = 2,
        tibble.print_max = 30)

#minimum group size
min_n = 25

#packages
library(pacman)
p_load(kirkegaard, readxl, rms, haven, sf, tmap, ggeffects)

#default theme
theme_set(theme_bw())

#overwrite describe
describe = function(...) psych::describe(...) %>% as.matrix()

Data

#all years mixed together
#we dont use these because we cannot adjust for grade inflation
#but these contain a bit more countries
raw_all_years = read_excel("data/Resultater - Folkeskolens Afgangseksamen all years together.xlsx",
                           range = "A5:D151") %>% 
  set_colnames(c("country_region", "country", "GPA", "n")) %>% 
  filter(!is.na(country), country != "<ukendt>", country != "Mellemøsten, uoplyst land")

#read by year data
raw = read_excel("data/data.xlsx", range = "A5:E728") %>% 
  set_colnames(c("year", "country_region", "country", "GPA", "n"))

#countries only (remove regions and unknowns)
countries = raw %>% filter(!is.na(country), country != "<ukendt>", country != "Mellemøsten, uoplyst land")

#mutate
countries %<>% 
  mutate(
    year = str_match(year, "\\/(\\d+)")[, 2] %>% as.numeric(),
    year_fct = factor(year),
    ISO = pu_translate(country)
  )

## No exact match: Saudi Arabien

## Best fuzzy match found: Saudi Arabien -> Saudi-Arabien with distance 1.00

#average across years
#beware grade inflation, standardized
#we fit a model and regress out the year effect
#this preserves the data scale
GPA_inflation_adjust_fit = ols(GPA ~ year_fct, data = countries, weights = countries$n)
countries$GPA_adj = GPA_inflation_adjust_fit %>% resid() %>% add(coef(GPA_inflation_adjust_fit)[1])

countries_avg = plyr::ddply(countries, c("country", "ISO"), function(dd) {
  tibble(
    n = wtd_sum(dd$n),
    n_sqrt = sqrt(n),
    GPA = wtd_mean(dd$GPA_adj, dd$n)
  )
})

#by generation
raw_bg = read_excel("data/data generation.xlsx", range = "A5:F1107") %>% 
  set_colnames(c("year", "generation", "country_region", "country", "GPA", "n"))

#countries only (remove regions and unknowns)
countries_bg = raw_bg %>% filter(!is.na(country), country != "<ukendt>", country != "Mellemøsten, uoplyst land")

#mutate
countries_bg %<>% 
  mutate(
    year = str_match(year, "\\/(\\d+)")[, 2] %>% as.numeric(),
    year_fct = factor(year),
    ISO = pu_translate(country),
    first_gen = (generation == "Indvandrer")
  )

## No exact match: Saudi Arabien
## Best fuzzy match found: Saudi Arabien -> Saudi-Arabien with distance 1.00

#average across years
GPA_inflation_adjust_bg_fit = ols(GPA ~ year_fct, data = countries_bg, weights = countries_bg$n)
countries_bg$GPA_adj = GPA_inflation_adjust_bg_fit %>% resid() %>% add(coef(GPA_inflation_adjust_bg_fit)[1])

countries_avg_bg = plyr::ddply(countries_bg, c("country", "ISO", "first_gen"), function(dd) {
  tibble(
    n = wtd_sum(dd$n),
    n_sqrt = sqrt(n),
    GPA = wtd_mean(dd$GPA_adj, dd$n)
  )
})

#national data from prior studies
#danish immigrant and stereotype study
# dk_immigrant_pref_study = read_rds("data/dk_immigrant_pref_global_envir.rds")
#dutch stereotype study
nl_stereotype_study = read_rds("data/dutch-stereotype_data_out.rds")
#cognitive elite study
natl_time_pref_study = read_rds("data/time_prefdata_out.rds")
#danish GPA 2015 study https://openpsych.net/paper/36
# dk_gpa_study = read_rds("data/")

#worldbank human capital index
worldbank_hci_2018 = read_excel("data/hcidataoctober2018.xlsx", sheet = 4) %>% 
  df_legalize_names() %>% 
  mutate(
    ISO = pu_translate(Country_Name),
    IQwb = HUMAN_CAPITAL_INDEX
  )

## No exact match: eSwatini

## Best fuzzy match found: eSwatini -> Eswatini with distance 2.00

#PIAAC and employment gaps
PIAAC_employment_gaps = read_excel("data/PIAAC employment.xlsx")

#spatial data
worldmap = read_sf("data/TM_WORLD_BORDERS-0.3/TM_WORLD_BORDERS-0.3.shp") %>% 
  rename(
    ISO = ISO3
  )

#brain drain
brain_drain_oecd = read_dta("data/brain drain/iabbd_8010_v1.dta") %>% 
  mutate(
    ISO = ccode_origin,
    high_fraction = high / tot
  )

#denmark as destination, 1995-2005 years
brain_drain_oecd_dk = brain_drain_oecd %>% 
  filter(destination == "Denmark")

#sum across years and sexes
brain_drain_oecd_dk_sum = brain_drain_oecd_dk %>% 
  plyr::ddply(c("origin", "ISO"), function(dd) {
    tibble(
      total = sum(dd$tot),
      low = sum(dd$low),
      med = sum(dd$med),
      high = sum(dd$high),
      high_fraction = high / total
    )
  }) %>% 
  #filter to those with n >= min
  filter(total >= min_n)

#merge
brain_drain_oecd_dk_sum = brain_drain_oecd_dk_sum %>% left_join(natl_time_pref_study %>% select(ISO, Mean_years_of_schooling), by = "ISO")

#model
(brain_drain_model = ols(high_fraction ~ Mean_years_of_schooling, data = brain_drain_oecd_dk_sum))

## Frequencies of Missing Values Due to Each Variable
##           high_fraction Mean_years_of_schooling 
##                       0                      10 
## 
## Linear Regression Model
##  
##  ols(formula = high_fraction ~ Mean_years_of_schooling, data = brain_drain_oecd_dk_sum)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs     156    LR chi2      9.46    R2       0.059    
##  sigma0.0816    d.f.            1    R2 adj   0.053    
##  d.f.    154    Pr(> chi2) 0.0021    g        0.023    
##  
##  Residuals
##  
##        Min        1Q    Median        3Q       Max 
##  -0.199226 -0.051772 -0.001017  0.049583  0.226157 
##  
##  
##                          Coef   S.E.   t     Pr(>|t|)
##  Intercept               0.1993 0.0188 10.59 <0.0001 
##  Mean_years_of_schooling 0.0064 0.0021  3.10 0.0023  
##

brain_drain_oecd_dk_sum$edu_selection = resid(brain_drain_model) %>% standardize()

#plot
brain_drain_oecd_dk_sum %>% select(edu_selection, high_fraction, Mean_years_of_schooling) %>% wtd.cors()

##                         edu_selection high_fraction Mean_years_of_schooling
## edu_selection                 1.0e+00          0.97                -5.2e-17
## high_fraction                 9.7e-01          1.00                 2.4e-01
## Mean_years_of_schooling      -5.2e-17          0.24                 1.0e+00

GG_scatter(brain_drain_oecd_dk_sum, "Mean_years_of_schooling", "high_fraction") +
  ylab("High education % among emigrants to Denmark")

## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/brain_brain_to_dk.png")

## `geom_smooth()` using formula 'y ~ x'

#selection by year
brain_drain_oecd_dk_sum_by_year = brain_drain_oecd %>% 
  filter(destination == "Denmark") %>% 
    plyr::ddply(c("origin", "ISO", "year"), function(dd) {
    tibble(
      total = sum(dd$tot),
      low = sum(dd$low),
      med = sum(dd$med),
      high = sum(dd$high),
      high_fraction = high / total
    )
  }) %>% 
  #filter to those with n >= min
  filter(total >= min_n)

#variance between countries over time?
lm(high_fraction ~ ISO + year, data = brain_drain_oecd_dk_sum_by_year, weights = brain_drain_oecd_dk_sum_by_year$total) %>% 
  stats::aov() %>% 
  sjstats::anova_stats()

#merge
d = left_join(countries_avg, natl_time_pref_study %>% select(ISO, NAME, IQlv, IQr, IQb, IslamPewResearch2010), by = "ISO") %>% 
  left_join(nl_stereotype_study %>% select(ISO, Muslim), by = "ISO") %>% 
  left_join(brain_drain_oecd_dk_sum %>% select(ISO, high_fraction, edu_selection)) %>% 
  left_join(worldbank_hci_2018 %>% select(ISO, IQwb), by = "ISO") %>% 
  mutate(
    NAME = pu_translate(ISO, reverse = T)
  )

## Joining, by = "ISO"

assert_that(!any(duplicated(d$ISO)))

## [1] TRUE

#merge bg
d_bg = left_join(countries_avg_bg, natl_time_pref_study %>% select(ISO, NAME, IQlv, IQr, IQb, IslamPewResearch2010), by = "ISO") %>% 
  left_join(nl_stereotype_study %>% select(ISO, Muslim), by = "ISO") %>% 
  left_join(brain_drain_oecd_dk_sum %>% select(ISO, high_fraction, edu_selection)) %>% 
  left_join(worldbank_hci_2018 %>% select(ISO, IQwb), by = "ISO") %>% 
  mutate(
    NAME = pu_translate(ISO, reverse = T)
  )

## Joining, by = "ISO"

#fill in Muslim
d$Muslim = miss_fill(d$Muslim, d$IslamPewResearch2010)
d_bg$Muslim = miss_fill(d_bg$Muslim, d_bg$IslamPewResearch2010)

#add missing data
#Kosovo is 95.6% Muslim
#https://www.britannica.com/place/Kosovo/Religion
d$Muslim[which(d$ISO == "KSV")] = .956
d_bg$Muslim[which(d_bg$ISO == "KSV")] = .956

#Serbia and Montenegro
#Montenegro 3.3% muslim https://www.cia.gov/library/publications/the-world-factbook/geos/print_mj.html
#Serbia 3.1% https://en.wikipedia.org/wiki/Islam_in_Serbia
d$Muslim[which(d$ISO == "SRBM")] = .031
d_bg$Muslim[which(d_bg$ISO == "SRBM")] = .031

#use Serbia IQ for Kosova, and Serbia Montenegro, and Yugoslavia
d$IQlv[which(d$ISO %in% c("KSV", "SRBM", "YUG"))] = 90.3
d_bg$IQlv[which(d_bg$ISO %in% c("KSV", "SRBM", "YUG"))] = 90.3

#Use Becker's geo estimate for Cayman islands
d$IQlv[which(d$ISO %in% c("CYM"))] = 82
d_bg$IQlv[which(d_bg$ISO %in% c("CYM"))] = 82

Analyses

Descriptives

#input data
countries$n %>% describe()

##    vars   n mean   sd median trimmed mad min   max range skew kurtosis  se
## X1    1 663  609 5301     13      33  13   3 53313 53310  9.6       89 206

countries$n %>% sum()

## [1] 4e+05

countries_avg$n %>% describe()

##    vars   n mean    sd median trimmed mad min    max  range skew kurtosis   se
## X1    1 116 3483 33685     60     168  79   3 363055 363052   10      109 3128

countries_avg$n %>% sum()

## [1] 4e+05

#without Denmark
countries_avg %>% 
  filter(ISO != "DNK") %>% 
  .$n %>% describe()

##    vars   n mean  sd median trimmed mad min  max range skew kurtosis se
## X1    1 115  356 811     59     156  76   3 5677  5674  3.9       18 76

Combined generations

#main predictors
main_preds = c("IQlv", "IQr", "IQb", "IQwb", "Muslim", "edu_selection")

#unweighted
GG_scatter(d, "IQlv", "GPA", case_names = "NAME")

## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/IQlv GPA unwt.png")

## `geom_smooth()` using formula 'y ~ x'

#weights
GG_scatter(d, "IQlv", "GPA", case_names = "NAME", weights = "n_sqrt")

## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/IQlv GPA wt.png")

## `geom_smooth()` using formula 'y ~ x'

#no dk
GG_scatter(d %>% filter(ISO != "DNK"), "IQlv", "GPA", case_names = "NAME", weights = "n_sqrt")

## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/IQlv GPA wt noDK.png")

## `geom_smooth()` using formula 'y ~ x'

#large only, no dk
GG_scatter(d %>% filter(ISO != "DNK", n >= min_n), "IQlv", "GPA", case_names = "NAME", weights = "n_sqrt")

## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/IQlv GPA wt noDK large.png")

## `geom_smooth()` using formula 'y ~ x'

#correlations
d %>% 
  filter(ISO != "DNK", n >= min_n) %>% 
  {
    print(wtd.cors(select(., GPA, !!main_preds), weight = .$n_sqrt))
    print(pairwiseCount(select(., GPA, !!main_preds)))
  }

##                 GPA   IQlv    IQr    IQb   IQwb Muslim edu_selection
## GPA            1.00  0.469  0.531  0.520  0.451  -0.40         0.351
## IQlv           0.47  1.000  0.978  0.897  0.842  -0.41        -0.042
## IQr            0.53  0.978  1.000  0.902  0.858  -0.44         0.015
## IQb            0.52  0.897  0.902  1.000  0.770  -0.47        -0.013
## IQwb           0.45  0.842  0.858  0.770  1.000  -0.51        -0.086
## Muslim        -0.40 -0.407 -0.442 -0.470 -0.511   1.00        -0.136
## edu_selection  0.35 -0.042  0.015 -0.013 -0.086  -0.14         1.000
##               GPA IQlv IQr IQb IQwb Muslim edu_selection
## GPA            81   81  77  78   72     81            71
## IQlv           81   81  77  78   72     81            71
## IQr            77   77  77  77   71     77            71
## IQb            78   78  77  78   71     78            71
## IQwb           72   72  71  71   72     72            66
## Muslim         81   81  77  78   72     81            71
## edu_selection  71   71  71  71   66     71            71

#combined
combine_upperlower(
  wtd.cors(select(d %>% filter(ISO != "DNK"), GPA, !!main_preds)),
  
  #weights
  d %>% 
    filter(ISO != "DNK", n >= min_n) %>% 
    {
      wtd.cors(select(., GPA, !!main_preds), weight = .$n_sqrt)
    }
)

##                 GPA   IQlv    IQr    IQb   IQwb Muslim edu_selection
## GPA              NA  0.553  0.591  0.567  0.525  -0.24        0.2619
## IQlv           0.47     NA  0.981  0.929  0.897  -0.36       -0.0024
## IQr            0.53  0.978     NA  0.932  0.923  -0.36        0.0194
## IQb            0.52  0.897  0.902     NA  0.874  -0.43       -0.0029
## IQwb           0.45  0.842  0.858  0.770     NA  -0.39       -0.0316
## Muslim        -0.40 -0.407 -0.442 -0.470 -0.511     NA       -0.1389
## edu_selection  0.35 -0.042  0.015 -0.013 -0.086  -0.14            NA

#datasets for models
model_preds = c("IQlv", "Muslim", "edu_selection")

d_std_noDK = d %>% df_standardize(messages = F, exclude = c("n", "n_sqrt")) %>% filter(ISO != "DNK") %>% filter(!is.na(IQlv), !is.na(Muslim), !is.na(edu_selection), n >= min_n)

d_noDK = d %>% filter(ISO != "DNK") %>% filter(!is.na(IQlv), !is.na(Muslim), !is.na(edu_selection), n >= min_n)

#models
model_forms = list(
  GPA ~ IQlv,
  GPA ~ Muslim,
  GPA ~ IQlv + Muslim,
  GPA ~ IQlv + Muslim + edu_selection
)

#fits
model_forms %>% 
  map(~ols(., data = d_std_noDK, weights = d_std_noDK$n_sqrt)) %>% 
  summarize_models()

#unstandardized
model_forms %>% 
  map(~ols(., data = d_noDK, weights = d_noDK$n_sqrt)) %>% 
  summarize_models()

By generation

#unweighted
GG_scatter(d_bg, "IQlv", "GPA", case_names = "NAME", color = "first_gen") +
  scale_color_discrete("First generation?") +
  theme(legend.position="bottom")

## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/bg/IQlv GPA unwt.png")

## `geom_smooth()` using formula 'y ~ x'

#weights
GG_scatter(d_bg, "IQlv", "GPA", case_names = "NAME", weights = "n_sqrt", color = "first_gen") +
  scale_color_discrete("First generation?") +
  theme(legend.position="bottom")

## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/bg/IQlv GPA wt.png")

## `geom_smooth()` using formula 'y ~ x'

#weights + no dk
GG_scatter(d_bg %>% filter(ISO != "DNK"), "IQlv", "GPA", case_names = "NAME", weights = "n_sqrt", color = "first_gen") +
  scale_color_discrete("First generation?") +
  theme(legend.position="bottom")

## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/bg/IQlv GPA wt noDK.png")

## `geom_smooth()` using formula 'y ~ x'

#large only, no dk
GG_scatter(d_bg %>% filter(ISO != "DNK", n >= min_n), "IQlv", "GPA", case_names = "NAME", weights = "n_sqrt", color = "first_gen") +
  scale_color_discrete("First generation?") +
  scale_x_continuous("National intelligence of origin country") +
  scale_y_continuous("GPA in Denmark") +
  theme(legend.position="bottom")

## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/bg/IQlv GPA wt noDK large.png")

## `geom_smooth()` using formula 'y ~ x'

#Muslim
GG_scatter(d_bg %>% filter(ISO != "DNK", n >= min_n), "Muslim", "GPA", case_names = "NAME", weights = "n_sqrt", color = "first_gen") +
  scale_color_discrete("First generation?") +
  scale_x_continuous("Muslims in origin country", labels = scales::percent) +
  scale_y_continuous("GPA in Denmark") +
  theme(legend.position="bottom")

## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/bg/Muslim GPA wt noDK large.png")

## `geom_smooth()` using formula 'y ~ x'

#Education selection
GG_scatter(d_bg %>% filter(ISO != "DNK", n >= min_n), "edu_selection", "GPA", case_names = "NAME", weights = "n_sqrt", color = "first_gen") +
  scale_color_discrete("First generation?") +
  scale_x_continuous("Educational selection of migrants") +
  scale_y_continuous("GPA in Denmark") +
  theme(legend.position="bottom")

## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/bg/edu selection GPA wt noDK large.png")

## `geom_smooth()` using formula 'y ~ x'

#standardized datasets
model_preds = c("IQlv", "Muslim", "first_gen", "edu_selection")

d_bg_std_noDK = d_bg %>% df_standardize(messages = F, exclude = c("n", "n_sqrt")) %>% filter(ISO != "DNK") %>% filter(!is.na(IQlv), !is.na(Muslim), !is.na(first_gen), !is.na(edu_selection), n > min_n)

d_bg_noDK = d_bg %>% filter(ISO != "DNK") %>% filter(!is.na(IQlv), !is.na(Muslim), !is.na(first_gen), !is.na(edu_selection), n > min_n)

#models
model_forms = list(
  GPA ~ first_gen + IQlv,
  GPA ~ first_gen + Muslim,
  GPA ~ first_gen + IQlv + Muslim,
  GPA ~ first_gen + IQlv + Muslim + edu_selection
)

#fits
model_forms %>% 
  map(~ols(., data = d_bg_std_noDK, weights = d_bg_std_noDK$n_sqrt)) %>% 
  summarize_models()

#unstandardized
model_forms %>% 
  map(~ols(., data = d_bg_noDK, weights = d_bg_noDK$n_sqrt)) %>% 
  summarize_models()

#model predictions
ols(model_forms[[4]], data = d_bg_noDK %>% mutate(first_gen = factor(first_gen)), weights = d_bg_noDK$n_sqrt) %>% 
  ggeffects::ggpredict(c("IQlv", "first_gen")) %>% 
  plot()

ols(model_forms[[4]], data = d_bg_noDK %>% mutate(first_gen = factor(first_gen)), weights = d_bg_noDK$n_sqrt) %>% 
  ggeffects::ggpredict(c("Muslim", "first_gen")) %>% 
  plot()

ols(model_forms[[4]], data = d_bg_noDK %>% mutate(first_gen = factor(first_gen)), weights = d_bg_noDK$n_sqrt) %>% 
  ggeffects::ggpredict(c("edu_selection", "first_gen")) %>% 
  plot()

By included countries

Vary the minimum sample size and see how this affects results.

#compute
cor_results_by_minimum_n = map_df(5:200, function(min_n) {
  #subset
  d %>% 
    filter(ISO != "DNK", n >= min_n) ->
    d_local
  
  #compute correlations
  cors = wtd.cors(select(d_local, GPA, !!main_preds), weight = d_local$n_sqrt)
  
  #save as long form
  cors %>% 
    reshape2::melt() %>% 
    mutate(
      min_n = min_n,
      n = nrow(d_local)
      )
})

#plot
cor_results_by_minimum_n %>% 
  filter(Var2 == "GPA", Var1 != "GPA") %>% 
  ggplot(aes(min_n, value, color = Var1)) +
  geom_line() +
  scale_color_discrete("Predictor") +
  scale_y_continuous("Correlation with GPA") +
  scale_x_continuous("Minimum required sample size for GPA")

GG_save("figs/min_n_results.png")

#model results
model_results_by_minimum_n = map_df(5:200, function(min_n) {
  #subset
  #standardize again to avoid the pre-filtering above!
  d_bg %>% 
    df_standardize(messages = F, exclude = c("n", "n_sqrt")) %>% 
    filter(ISO != "DNK", n >= min_n) ->
    d_local
  
  #compute correlations
  model_fit = ols(GPA ~ first_gen + IQlv + Muslim + edu_selection, data = d_local, weights = d_local$n_sqrt)

  #save as long form
  model_fit %>% 
    summary.lm() %>% 
    broom::tidy() %>% 
    mutate(
      min_n = min_n,
      n = nrow(d_local)
    )
})

#plot
model_results_by_minimum_n %>% 
  filter(term != "Intercept") %>% 
  mutate(
    p_low = p.value < .01
  ) %>% 
  ggplot(aes(min_n, estimate, color = term, linetype = p_low)) +
  geom_line() +
  scale_color_discrete("Predictor") +
  scale_linetype_discrete("p < .01", ) +
  scale_y_continuous("Standardized beta") +
  scale_x_continuous("Minimum required sample size for GPA")

GG_save("figs/min_n_results_model.png")

#correlations of predictors with sample size
d %>% select(!!main_preds, n) %>% wtd.cors()

##                  IQlv    IQr     IQb   IQwb Muslim edu_selection      n
## IQlv           1.0000  0.981  0.9296  0.898 -0.367       -0.0024  0.096
## IQr            0.9811  1.000  0.9323  0.924 -0.361        0.0194  0.108
## IQb            0.9296  0.932  1.0000  0.876 -0.431       -0.0029  0.106
## IQwb           0.8981  0.924  0.8759  1.000 -0.393       -0.0316  0.121
## Muslim        -0.3666 -0.361 -0.4306 -0.393  1.000       -0.1389 -0.053
## edu_selection -0.0024  0.019 -0.0029 -0.032 -0.139        1.0000 -0.321
## n              0.0958  0.108  0.1061  0.121 -0.053       -0.3206  1.000

Maps

Merge as needed and then plot. Tricky because some of the countries no longer exist.

#GPA data
#which are we missing?
setdiff(d$ISO, worldmap$ISO)

## [1] "YUG"  "KSV"  "SRBM"

#ignore the Yugoslavia problem

#prep GPA data
d_combined = d %>% select(ISO, GPA, n, IQlv, Muslim, edu_selection) %>% 
  left_join(d_bg %>% select(ISO, GPA, first_gen) %>% mutate(first_gen = first_gen %>% mapvalues(c(T, F), c("GPA_1st_gen", "GPA_2nd_gen"))) %>% pivot_wider(names_from = "first_gen", values_from = "GPA"), by = "ISO")

left_join(worldmap, d_combined, by = "ISO") %>% 
  st_make_valid() ->
  GPA_map_data

#GPA
GPA_map_data %>% 
  tm_shape() +
  tm_polygons("GPA", title = "Mean GPA") ->
  GPA_map

GPA_map

tmap_save(GPA_map, filename = "figs/GPA_map.png")

## Map saved to /media/luks8tb2/Projects/Denmark GPA origin country/figs/GPA_map.png

## Resolution: 3054 by 1444 pixels

## Size: 10 by 4.8 inches (300 dpi)

#1st gen
GPA_map_data %>% 
  tm_shape() +
  tm_polygons("GPA_1st_gen", title = "Mean GPA, 1st gen.") ->
  GPA_1st_map

GPA_1st_map

tmap_save(GPA_1st_map, filename = "figs/GPA_1st_map.png")

## Map saved to /media/luks8tb2/Projects/Denmark GPA origin country/figs/GPA_1st_map.png

## Resolution: 3054 by 1444 pixels

## Size: 10 by 4.8 inches (300 dpi)

#2nd gen
GPA_map_data %>% 
  tm_shape() +
  tm_polygons("GPA_2nd_gen", title = "Mean GPA, 2nd gen.") ->
  GPA_2nd_map

GPA_2nd_map

tmap_save(GPA_2nd_map, filename = "figs/GPA_2nd_map.png")

## Map saved to /media/luks8tb2/Projects/Denmark GPA origin country/figs/GPA_2nd_map.png

## Resolution: 3054 by 1444 pixels

## Size: 10 by 4.8 inches (300 dpi)

#selection index into Denmark
left_join(worldmap, brain_drain_oecd_dk_sum, by = "ISO") %>% 
  st_make_valid() ->
  selection_map_data

selection_map_data %>% 
  tm_shape() +
  tm_polygons("edu_selection", title = "Selection index into Denmark") ->
  selection_map

selection_map

## Variable(s) "edu_selection" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

tmap_save(selection_map, filename = "figs/selection_map.png")

## Variable(s) "edu_selection" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

## Map saved to /media/luks8tb2/Projects/Denmark GPA origin country/figs/selection_map.png

## Resolution: 3054 by 1444 pixels

## Size: 10 by 4.8 inches (300 dpi)

PIAAC and employment

#original backwards plot
GG_scatter(PIAAC_employment_gaps, "Employment_gap", "PIAAC_gap", case_names = "Country") +
  xlab("Employment rate gap") +
  ylab("PIAAC test score gap")

## `geom_smooth()` using formula 'y ~ x'

#causal direction
GG_scatter(PIAAC_employment_gaps, "PIAAC_gap", "Employment_gap", case_names = "Country") +
  ylab("Employment rate gap") +
  xlab("PIAAC test score gap")

## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/PIAAC_employment.png")

## `geom_smooth()` using formula 'y ~ x'

Appendix data table

#combine the variables by generation to wide format
d %>% select(ISO, GPA, n, IQlv, Muslim, edu_selection) %>% 
  left_join(d_bg %>% select(ISO, GPA, first_gen) %>% mutate(first_gen = first_gen %>% mapvalues(c(T, F), c("GPA_1st_gen", "GPA_2nd_gen"))) %>% pivot_wider(names_from = "first_gen", values_from = "GPA"), by = "ISO") %>% 
  select(ISO, IQlv, Muslim, edu_selection, n, GPA, GPA_1st_gen, GPA_2nd_gen)

Notebook: Home country IQ and Muslim percentage predicts grade point average among 116 immigrant groups in Denmark

Init

Data

Analyses

Descriptives

Combined generations

By generation

By included countries

Maps

PIAAC and employment

Appendix data table

Meta