About

Data are from a recent Danish report published by the Danish statistics agency (DST). They collaborated with the Swedish and Norwegian agencies to gather comparable data for employment rates for a select group of 11 countries of origin. These data are combined with national IQs and Muslim %’s to see how predictable employment rates are.

Initialize

library(pacman)
p_load(readr, ggplot2, kirkegaard, dplyr, tidyr, lsr)
options(digits = 2, width = 300)

d = read_csv("data/employtment_rates.csv")

## Parsed with column specification:
## cols(
##   Origin = col_character(),
##   Host = col_character(),
##   Employment_men = col_integer(),
##   Employment_women = col_integer(),
##   Employment_total = col_integer()
## )

#translate
d$Origin %<>% pu_translate() %>% pu_translate(reverse = T)
d$Host %<>% pu_translate() %>% pu_translate(reverse = T)

#wide version
tidyr::spread(d[c("Origin", "Host", "Employment_total")], key = Host, value = Employment_total) %>% 
  select(-Origin) %>% 
  wtd.cors() %>% 
  MAT_half() %>% 
  averages()

## arithmetic  geometric   harmonic       mode     median    trimmed   midrange 
##       0.86       0.85       0.85       0.94       0.86       0.86       0.85

#longer version for sex regression
d2 = d %>% select(-Employment_total) %>% 
  #gather sexs
  gather(key = sex, value = Employment, Employment_men, Employment_women)
#fix text
d2$sex %<>% str_replace("Employment_", "")

#national data
mega = read_csv("data/Megadataset_v2.0p.csv") %>% mutate(iso = X1, IQ = LV2012estimatedIQ, Muslim = IslamPewResearch2010)

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_character(),
##   EthnicHeterogenityVanhanen2012 = col_integer(),
##   EthnicConflictVanhanen2012 = col_integer(),
##   SlowTimePrefWangetal2011 = col_integer(),
##   Math00Mean = col_integer(),
##   Math00SD = col_integer(),
##   Read00Mean = col_integer(),
##   Read00SD = col_integer(),
##   Sci00Mean = col_integer(),
##   Sci00SD = col_integer(),
##   Math03Mean = col_integer(),
##   Math03SD = col_integer(),
##   Read03Mean = col_integer(),
##   Read03SD = col_integer(),
##   Sci03Mean = col_integer(),
##   Sci03SD = col_integer(),
##   PS03Mean = col_integer(),
##   PS03SD = col_integer(),
##   Read09.Native = col_integer(),
##   Read09.1g = col_integer()
##   # ... with 102 more columns
## )

## See spec(...) for full column specifications.

mega_all = mega

#merge mega and employment data
mega = dplyr::left_join(d %>% mutate(Origin = pu_translate(Origin)) %>% rename(iso = Origin), mega[c("iso", "Muslim", "IQ")], by = "iso")

## Warning: Column `iso` has different attributes on LHS and RHS of join

Figure of data

ggplot(d, aes(Origin, Employment_total/100, fill = Host)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = -20, hjust = 0)) +
  scale_y_continuous("Employment rate (both sexes)", labels = scales::percent)

GG_save("figures/employment_rate.png")

Regression

#sexs together
fit = lm(Employment_total ~ Origin + Host, data = d)
MOD_summary(fit, kfold = F)

## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.
## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.

## 
##     ---- Model summary ----    
## Model coefficients
##                               Beta   SE CI_lower CI_upper
## Origin: Afghanistan           0.00   NA       NA       NA
## Origin: Bosnia & Herzegovina  1.10 0.30   0.4799     1.72
## Origin: China                 0.62 0.30  -0.0015     1.23
## Origin: Denmark               2.00 0.43   1.1077     2.89
## Origin: Germany               1.35 0.30   0.7303     1.97
## Origin: Iran                  0.62 0.30  -0.0015     1.23
## Origin: Iraq                 -0.13 0.30  -0.7525     0.48
## Origin: Norway                1.74 0.43   0.8451     2.63
## Origin: Poland                1.48 0.30   0.8651     2.10
## Origin: Somalia              -0.89 0.30  -1.5036    -0.27
## Origin: Sweden                2.15 0.43   1.2600     3.05
## Origin: Syria                -1.25 0.30  -1.8694    -0.63
## Origin: Thailand              1.10 0.30   0.4799     1.72
## Origin: Turkey                0.64 0.30   0.0177     1.25
## Host: Denmark                 0.00   NA       NA       NA
## Host: Norway                  0.55 0.15   0.2288     0.87
## Host: Sweden                  0.25 0.15  -0.0705     0.57
## 
## 
## Model meta-data
##            outcome  N df   R2 R2-adj. R2-cv
## 1 Employment_total 36 20 0.92    0.87    NA
## 
## 
## Etas from analysis of variance
##         Eta Eta_partial
## Origin 0.94        0.96
## Host   0.22        0.62

#sexs apart
fit_sex = lm(Employment ~ Origin + Host + sex, data = d2)
fit_sex_sum = MOD_summary(fit_sex, kfold = F) %T>% print()

## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.
## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.
## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.

## 
##     ---- Model summary ----    
## Model coefficients
##                                Beta    SE CI_lower CI_upper
## Origin: Afghanistan           0.000    NA       NA       NA
## Origin: Bosnia & Herzegovina  1.143 0.215    0.712     1.57
## Origin: China                 0.726 0.215    0.295     1.16
## Origin: Denmark               1.994 0.311    1.370     2.62
## Origin: Germany               1.397 0.215    0.966     1.83
## Origin: Iran                  0.689 0.215    0.258     1.12
## Origin: Iraq                 -0.054 0.215   -0.485     0.38
## Origin: Norway                1.761 0.311    1.138     2.38
## Origin: Poland                1.478 0.215    1.047     1.91
## Origin: Somalia              -0.744 0.215   -1.175    -0.31
## Origin: Sweden                2.095 0.311    1.472     2.72
## Origin: Syria                -1.107 0.215   -1.538    -0.68
## Origin: Thailand              1.116 0.215    0.685     1.55
## Origin: Turkey                0.635 0.215    0.204     1.07
## Host: Denmark                 0.000    NA       NA       NA
## Host: Norway                  0.505 0.112    0.280     0.73
## Host: Sweden                  0.252 0.112    0.027     0.48
## sex: men                      0.000    NA       NA       NA
## sex: women                   -0.500 0.088   -0.676    -0.32
## 
## 
## Model meta-data
##      outcome  N df   R2 R2-adj. R2-cv
## 1 Employment 72 55 0.89    0.86    NA
## 
## 
## Etas from analysis of variance
##         Eta Eta_partial
## Origin 0.89        0.94
## Host   0.20        0.52
## sex    0.25        0.61

fit_sex_sum %>% .[[1]] %>% write_clipboard()

##                               Beta   SE CI lower CI upper
## Origin: Afghanistan           0.00                       
## Origin: Bosnia & Herzegovina  1.14 0.22     0.71     1.57
## Origin: China                 0.73 0.22     0.29     1.16
## Origin: Denmark               1.99 0.31     1.37     2.62
## Origin: Germany               1.40 0.22     0.97     1.83
## Origin: Iran                  0.69 0.22     0.26     1.12
## Origin: Iraq                 -0.05 0.22    -0.49     0.38
## Origin: Norway                1.76 0.31     1.14     2.38
## Origin: Poland                1.48 0.22     1.05     1.91
## Origin: Somalia              -0.74 0.22    -1.17    -0.31
## Origin: Sweden                2.10 0.31     1.47     2.72
## Origin: Syria                -1.11 0.22    -1.54    -0.68
## Origin: Thailand              1.12 0.22     0.68     1.55
## Origin: Turkey                0.63 0.22     0.20     1.07
## Host: Denmark                 0.00                       
## Host: Norway                  0.50 0.11     0.28     0.73
## Host: Sweden                  0.25 0.11     0.03     0.48
## sex: men                      0.00                       
## sex: women                   -0.50 0.09    -0.68    -0.32

#missing cells
length(unique(d$Origin))

## [1] 14

length(unique(d$Host))

## [1] 3

length(unique(d$Origin)) * length(unique(d$Host))

## [1] 42

nrow(d) / (length(unique(d$Origin)) * length(unique(d$Host)))

## [1] 0.86

PISA data comparison

#data
pisa = read_csv("data/pisa_2006.csv")

## Warning: Duplicated column names deduplicated: 'Germany' => 'Germany_1' [10]

## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   Origin = col_character()
## )

## See spec(...) for full column specifications.

#long format
pisa_long = gather(pisa, key = Host, value = PISA, -Origin) %>% na.omit

#regress
fit_pisa = lm(PISA ~ Origin + Host, data = pisa_long)
fit_pisa %>% summary

## 
## Call:
## lm(formula = PISA ~ Origin + Host, data = pisa_long)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -39.05  -2.13   0.00   2.42  45.34 
## 
## Coefficients: (22 not defined because of singularities)
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                353.07      46.78    7.55  3.3e-09 ***
## OriginArgentina             37.93      53.33    0.71  0.48107    
## OriginAustralia             94.28      34.17    2.76  0.00870 ** 
## OriginAustria              142.67      20.41    6.99  1.9e-08 ***
## OriginAzerbaijan            28.93      53.33    0.54  0.59050    
## OriginBangladesh            76.02      38.34    1.98  0.05427 .  
## OriginBelarus              125.52      46.35    2.71  0.00992 ** 
## OriginBelgium              157.04      25.46    6.17  2.7e-07 ***
## OriginBosnia Herzegovina    72.93      22.05    3.31  0.00200 ** 
## OriginBrazil                86.78      36.87    2.35  0.02357 *  
## OriginBulgaria              80.93      53.33    1.52  0.13701    
## OriginCanada               180.93      53.33    3.39  0.00157 ** 
## OriginCap Verde              7.10      32.49    0.22  0.82803    
## OriginChile                 84.93      53.33    1.59  0.11915    
## OriginChina                105.01      24.35    4.31  0.00010 ***
## OriginColombia              34.93      53.33    0.65  0.51623    
## OriginCroatia               70.33      23.89    2.94  0.00537 ** 
## OriginCzech Republic       189.68      30.18    6.29  1.9e-07 ***
## OriginDenmark              125.61      31.60    3.97  0.00029 ***
## OriginEstonia               27.04      42.77    0.63  0.53083    
## OriginFinland              153.04      42.77    3.58  0.00092 ***
## OriginFrance               107.35      19.93    5.39  3.4e-06 ***
## OriginGermany              154.47      18.30    8.44  2.0e-10 ***
## OriginGreece                44.99      23.92    1.88  0.06731 .  
## OriginHong Kong            188.93      53.33    3.54  0.00102 ** 
## OriginHungary              181.68      30.18    6.02  4.4e-07 ***
## OriginIceland              137.93      53.33    2.59  0.01345 *  
## OriginIndia                128.48      32.26    3.98  0.00028 ***
## OriginIndonesia             39.93      53.33    0.75  0.45840    
## OriginIreland              154.93      53.33    2.90  0.00596 ** 
## OriginIsrael               100.93      53.33    1.89  0.06568 .  
## OriginItaly                 59.11      19.51    3.03  0.00428 ** 
## OriginJapan                177.93      53.33    3.34  0.00184 ** 
## OriginJordan                68.93      53.33    1.29  0.20361    
## OriginKyrgyzstan           -31.07      53.33   -0.58  0.56348    
## OriginLatvia               111.52      46.35    2.41  0.02085 *  
## OriginLiechtenstein        132.66      23.32    5.69  1.3e-06 ***
## OriginLithuania            134.93      53.33    2.53  0.01545 *  
## OriginLuxembourg           113.10      32.49    3.48  0.00122 ** 
## OriginMacau                157.93      53.33    2.96  0.00513 ** 
## OriginMacedonia             44.83      23.89    1.88  0.06786 .  
## OriginMexico                56.93      53.33    1.07  0.29215    
## OriginMorocco               86.97      32.25    2.70  0.01019 *  
## OriginNetherlands          157.36      26.30    5.98  5.0e-07 ***
## OriginNew Zealand           75.78      34.17    2.22  0.03233 *  
## OriginNorway               179.08      43.70    4.10  0.00020 ***
## OriginPakistan              44.57      29.01    1.54  0.13234    
## OriginPoland               119.21      21.42    5.57  1.9e-06 ***
## OriginPortugal              72.55      20.37    3.56  0.00097 ***
## OriginQatar                 -4.07      53.33   -0.08  0.93958    
## OriginRep. of Korea         77.78      34.17    2.28  0.02827 *  
## OriginRomania               59.68      30.18    1.98  0.05491 .  
## OriginRussia               117.52      28.94    4.06  0.00022 ***
## OriginSamoa                -26.38      39.81   -0.66  0.51144    
## OriginSerbia                45.25      19.07    2.37  0.02253 *  
## OriginSlovakia             127.68      30.18    4.23  0.00013 ***
## OriginSlovenia              50.33      23.89    2.11  0.04144 *  
## OriginSouth Africa         105.93      39.16    2.71  0.00998 ** 
## OriginSpain                114.66      23.32    4.92  1.5e-05 ***
## OriginSweden               134.56      36.22    3.71  0.00062 ***
## OriginSwitzerland          140.16      23.32    6.01  4.6e-07 ***
## OriginTaiwan               178.93      53.33    3.35  0.00175 ** 
## OriginThailand              67.93      53.33    1.27  0.21011    
## OriginThe Congo             75.97      32.25    2.36  0.02346 *  
## OriginThe Philippines       76.93      39.16    1.96  0.05642 .  
## OriginTunesia               32.93      53.33    0.62  0.54042    
## OriginTurkey                39.41      17.77    2.22  0.03234 *  
## OriginUkraine               93.52      46.35    2.02  0.05039 .  
## OriginUnited Kingdom       112.28      34.17    3.29  0.00212 ** 
## OriginUnited States        135.93      39.16    3.47  0.00126 ** 
## OriginUruguay               74.93      53.33    1.40  0.16774    
## OriginVietnam               82.93      39.16    2.12  0.04044 *  
## HostAustralia               82.00      36.21    2.26  0.02903 *  
## HostAustria                 26.25      44.63    0.59  0.55963    
## HostAzerbaijan                 NA         NA      NA       NA    
## HostBelgium                 -2.04      46.49   -0.04  0.96520    
## HostBrazil                 -49.85      58.18   -0.86  0.39671    
## HostBulgaria                   NA         NA      NA       NA    
## HostCanada                     NA         NA      NA       NA    
## HostChile                      NA         NA      NA       NA    
## HostColombia                   NA         NA      NA       NA    
## HostCroatia                 69.60      54.61    1.27  0.20983    
## HostCzech Republic         -29.75      57.47   -0.52  0.60760    
## HostDenmark                 -5.20      46.37   -0.11  0.91129    
## HostEstonia                150.89      64.82    2.33  0.02507 *  
## HostFinland                 56.89      53.77    1.06  0.29634    
## HostFrance                  34.58      53.43    0.65  0.52117    
## HostGermany                 17.94      45.60    0.39  0.69608    
## HostGermany_1                8.46      52.69    0.16  0.87324    
## HostGreece                  77.93      50.22    1.55  0.12854    
## HostHong Kong                  NA         NA      NA       NA    
## HostHungary                -30.75      57.47   -0.53  0.59562    
## HostIceland                    NA         NA      NA       NA    
## HostIndonesia                  NA         NA      NA       NA    
## HostIreland                    NA         NA      NA       NA    
## HostIsrael                     NA         NA      NA       NA    
## HostItaly                   62.83      53.27    1.18  0.24517    
## HostJapan                      NA         NA      NA       NA    
## HostJordan                     NA         NA      NA       NA    
## HostKyrgyzstan                 NA         NA      NA       NA    
## HostLatvia                  25.42      56.91    0.45  0.65758    
## HostLiechtenstein           24.63      45.54    0.54  0.59159    
## HostLithuania                  NA         NA      NA       NA    
## HostLuxembourg              19.83      46.61    0.43  0.67281    
## HostMacau                      NA         NA      NA       NA    
## HostMexico                     NA         NA      NA       NA    
## HostNetherlands             28.19      47.98    0.59  0.56023    
## HostNew Zealand             98.31      39.57    2.48  0.01726 *  
## HostNorway                 -45.15      53.99   -0.84  0.40792    
## HostPoland                  25.72      53.64    0.48  0.63422    
## HostPortugal                24.15      45.54    0.53  0.59881    
## HostQatar                      NA         NA      NA       NA    
## HostRep. of Korea           91.15      48.56    1.88  0.06781 .  
## HostRomania                  5.25      57.47    0.09  0.92760    
## HostRussia                   8.42      56.91    0.15  0.88317    
## HostSerbia                  25.68      53.02    0.48  0.63074    
## HostSeychelles              46.91      42.94    1.09  0.28116    
## HostSlovakia                 7.25      57.47    0.13  0.90018    
## HostSlovenia               115.60      54.61    2.12  0.04053 *  
## HostSpain                   20.27      55.02    0.37  0.71454    
## HostSweden                  15.37      60.47    0.25  0.80066    
## HostSwitzerland             21.91      45.54    0.48  0.63314    
## HostTaiwan                     NA         NA      NA       NA    
## HostThailand                   NA         NA      NA       NA    
## HostTunesia                    NA         NA      NA       NA    
## HostTurkey                  31.53      52.27    0.60  0.54979    
## HostUnited Kingdom          49.65      48.56    1.02  0.31268    
## HostUnited States              NA         NA      NA       NA    
## HostUruguay                    NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26 on 40 degrees of freedom
## Multiple R-squared:  0.941,  Adjusted R-squared:  0.784 
## F-statistic: 6.01 on 106 and 40 DF,  p-value: 4.22e-09

#(fit_pisa_sum = MOD_summary(fit_pisa, kfold = F))
#TODO: enable NAs in output. Useful in a few cases, like this one!
fit_pisa %>% aov %>% lsr::etaSquared() %>% sqrt

##        eta.sq eta.sq.part
## Origin   0.64        0.94
## Host     0.33        0.80

#missing cells
length(unique(pisa$Origin))

## [1] 72

length(unique(pisa_long$Host))

## [1] 58

length(unique(pisa$Origin)) * length(unique(pisa_long$Host))

## [1] 4176

nrow(pisa_long) / (length(unique(pisa$Origin)) * length(unique(pisa_long$Host)))

## [1] 0.035

Origin country characteristics models

As in all previous studies, we calculate the correlations between country of origin predictors – IQ and Muslim% – and outcomes. In this case, we can do it by sex as well, and have near perfect comparison data for 3 countries.

#correlations overall
wtd.cors(mega[c("Employment_men", "Employment_women", "Employment_total", "IQ", "Muslim")])

##                  Employment_men Employment_women Employment_total    IQ Muslim
## Employment_men             1.00             0.91             0.98  0.64  -0.63
## Employment_women           0.91             1.00             0.97  0.76  -0.78
## Employment_total           0.98             0.97             1.00  0.70  -0.71
## IQ                         0.64             0.76             0.70  1.00  -0.84
## Muslim                    -0.63            -0.78            -0.71 -0.84   1.00

#by country
plyr::dlply(mega, "Host", function(this) {
  #browser()
  cor_matrix(this[c("Employment_men", "Employment_women", "Employment_total", "IQ", "Muslim")], CI = .95)
})

## $Denmark
##                  Employment_men        Employment_women      Employment_total      IQ                    Muslim               
## Employment_men   "1"                   "0.92 [0.75 0.98]"    "0.98 [0.93 1.00]"    "0.71 [0.24 0.91]"    "-0.73 [-0.92 -0.28]"
## Employment_women "0.92 [0.75 0.98]"    "1"                   "0.98 [0.92 0.99]"    "0.77 [0.35 0.93]"    "-0.86 [-0.96 -0.55]"
## Employment_total "0.98 [0.93 1.00]"    "0.98 [0.92 0.99]"    "1"                   "0.74 [0.30 0.92]"    "-0.80 [-0.94 -0.43]"
## IQ               "0.71 [0.24 0.91]"    "0.77 [0.35 0.93]"    "0.74 [0.30 0.92]"    "1"                   "-0.84 [-0.95 -0.51]"
## Muslim           "-0.73 [-0.92 -0.28]" "-0.86 [-0.96 -0.55]" "-0.80 [-0.94 -0.43]" "-0.84 [-0.95 -0.51]" "1"                  
## 
## $Norway
##                  Employment_men        Employment_women      Employment_total      IQ                    Muslim               
## Employment_men   "1"                   "0.88 [0.61 0.96]"    "0.97 [0.90 0.99]"    "0.67 [0.15 0.90]"    "-0.69 [-0.90 -0.18]"
## Employment_women "0.88 [0.61 0.96]"    "1"                   "0.96 [0.87 0.99]"    "0.82 [0.46 0.95]"    "-0.85 [-0.96 -0.53]"
## Employment_total "0.97 [0.90 0.99]"    "0.96 [0.87 0.99]"    "1"                   "0.75 [0.30 0.92]"    "-0.78 [-0.94 -0.37]"
## IQ               "0.67 [0.15 0.90]"    "0.82 [0.46 0.95]"    "0.75 [0.30 0.92]"    "1"                   "-0.84 [-0.95 -0.51]"
## Muslim           "-0.69 [-0.90 -0.18]" "-0.85 [-0.96 -0.53]" "-0.78 [-0.94 -0.37]" "-0.84 [-0.95 -0.51]" "1"                  
## 
## $Sweden
##                  Employment_men       Employment_women      Employment_total      IQ                    Muslim               
## Employment_men   "1"                  "0.93 [0.77 0.98]"    "0.98 [0.94 1.00]"    "0.59 [0.03 0.87]"    "-0.50 [-0.83 0.10]" 
## Employment_women "0.93 [0.77 0.98]"   "1"                   "0.98 [0.94 1.00]"    "0.72 [0.24 0.91]"    "-0.67 [-0.90 -0.15]"
## Employment_total "0.98 [0.94 1.00]"   "0.98 [0.94 1.00]"    "1"                   "0.66 [0.14 0.89]"    "-0.58 [-0.87 -0.01]"
## IQ               "0.59 [0.03 0.87]"   "0.72 [0.24 0.91]"    "0.66 [0.14 0.89]"    "1"                   "-0.84 [-0.96 -0.52]"
## Muslim           "-0.50 [-0.83 0.10]" "-0.67 [-0.90 -0.15]" "-0.58 [-0.87 -0.01]" "-0.84 [-0.96 -0.52]" "1"                  
## 
## attr(,"split_type")
## [1] "data.frame"
## attr(,"split_labels")
##      Host
## 1 Denmark
## 2  Norway
## 3  Sweden

#plot
ggplot(mega, aes(IQ, Employment_total/100, color = Host, label = iso)) +
  geom_point() +
  geom_smooth(method = lm, se = F) +
  ggrepel::geom_text_repel() +
  scale_y_continuous("Employed%", labels = scales::percent) +
  xlab("National IQ of home country (Lynn & Vanhanen)") +
  theme_bw()

GG_save("figures/IQ_by_country.png")

ggplot(mega, aes(Muslim, Employment_total/100, color = Host, label = iso)) +
  geom_point() +
  geom_smooth(method = lm, se = F) +
  ggrepel::geom_text_repel() +
  scale_y_continuous("Employed%", labels = scales::percent) +
  scale_x_continuous("Muslim% in home country (Pew Research)", labels = scales::percent) +
  theme_bw()

GG_save("figures/Muslim_by_country.png")

#AOV model betas - unemployment
aov_fit_betas = fit_sex_sum %>% 
  .[[1]] %>% 
  .["Beta"] %>% 
  rownames_to_column(var = "Predictor") %>% 
  filter(str_detect(Predictor, "Origin")) %>% 
  mutate(iso = str_replace(Predictor, "Origin: ", "") %>% pu_translate, Beta = standardize(Beta)) %>% 
  left_join(mega_all[c("iso", "Names", "IQ", "Muslim")], by = "iso")

#plot
GG_scatter(aov_fit_betas, "IQ", "Beta", case_names = "Names", repel_names = T) +
  xlab("National IQ of home country (Lynn & Vanhanen)")

GG_save("figures/IQ_beta.png")

GG_scatter(aov_fit_betas, "Muslim", "Beta", case_names = "Names", repel_names = T) +
  scale_x_continuous("Muslim% in home country (Pew Research)", labels = scales::percent)

GG_save("figures/Muslim_beta.png")

#AOV model betas - pisa
#due to missing data, we have to use the regular lm fit
aov_fit_betas_pisa = fit_pisa %>% 
  broom::tidy() %>% 
  filter(str_detect(term, "Origin")) %>% 
  mutate(iso = str_replace(term, "Origin", "") %>% pu_translate, Beta = standardize(estimate)) %>% 
  left_join(mega_all[c("iso", "Names", "IQ", "Muslim")], by = "iso")

## No exact match: Cap Verde

## No exact match: Rep. of Korea

## No exact match: The Congo

## No exact match: The Philippines

## No exact match: Tunesia

## Best fuzzy match found: Cap Verde -> Cape Verde with distance 1.00

## Best fuzzy match found: Rep. of Korea -> Republic of Korea with distance 5.00

## Best fuzzy match found: The Congo -> DR Congo with distance 3.00

## Best fuzzy match found: The Philippines -> Philippines with distance 4.00

## Best fuzzy match found: Tunesia -> Tunisia with distance 1.00

#plot
GG_scatter(aov_fit_betas_pisa, "IQ", "Beta", case_names = "Names", repel_names = T) +
  xlab("National IQ of home country (Lynn & Vanhanen)")

GG_save("figures/IQ_beta_pisa.png")

GG_scatter(aov_fit_betas_pisa, "Muslim", "Beta", case_names = "Names", repel_names = T) +
  scale_x_continuous("Muslim% in home country (Pew Research)", labels = scales::percent, limits = c(-.15, NA))

GG_save("figures/Muslim_beta_pisa.png")