This initial chunk sets the working directory, loads necessary libraries, and imports all the raw data files (.csv and .xlsx) that will be used for the subsequent National IQ (NIQ) data harmonization and analysis.
##################################################################s-factor
#setwd('~')
library('readxl')
#setwd('rfolder/bestNIQs/data')
agri <- read.csv("data/agri.csv")
basicskills <- read.csv("data/basicskills.csv")
becker <- read.csv("data/becker.csv")
becker$alpha3[becker$alpha3=='KNA.'] <- 'KNA'
calories <- read.csv("data/calories.csv")
ce2 <- read.csv("data/ce2.csv")
CIAGDP <- read.csv("data/CIAGDP.csv")
circ2 <- read.csv("data/circ2.csv")
doi_total <- read.csv("data/doi_total.csv")
GNInPPPIMF <- read.csv("data/GNInPPPIMF.csv")
GNIPPPIMF <- read.csv("data/GNIPPPIMF.csv")
HDI <- read.csv("data/HDI.csv")
health <- read.csv("data/health.csv")
ict <- read.csv("data/ict.csv")
isp <- read.csv("data/internet-speeds-by-country-2024.csv")
meanages <- read.csv("data/meanages.csv")
medianincome <- read.csv("data/medianincome.csv")
medianwealth <- read.csv("data/medianwealth.csv")
mega <- read.csv("data/mega.csv")
newdata <- read.csv("data/newdata.csv")
PIRLS2021 <- read.csv("data/PIRLS2021.csv")
pisa <- read.csv("data/pisa.csv")
oil <- read.csv('data/worldoil.csv')
SPI <- read.csv("data/SPI.csv")
techexp <- read.csv("data/techexp.csv")
testscores <- read.csv("data/testscores.csv")
timss4m <- read.csv("data/timss4thmath.csv")
timss8m <- read.csv("data/timssmath8th.csv")
timss4s <- read.csv("data/timsssci4th.csv")
timss8s <- read.csv("data/timss8thsci.csv")
WBGDP <- read.csv("data/WBGDP.csv")
allniq <- read_excel("data/allniq.xlsx")
New names:
IMFGDP <- read_excel("data/IMFGDP.xls")
hssiqs <- read.csv('data/hssiq.csv')
This section performs the crucial step of cleaning and harmonizing various international test score and estimated IQ datasets (e.g., Harmonized Test Scores, PISA, TIMSS, Rinder’s estimates) into a standardized IQ scale.
#############################
#IQ data cleaning
hts <- testscores %>% filter(Indicator == 'Harmonized Test Scores')
hts$mean = rowMeans(subset(hts, select=c(X2010, X2017, X2018, X2020)), na.rm = TRUE)
hts$wbtestscore = (hts$mean - 529.5)/100*15+100
hts$t2020 <- hts$X2020
hts <- hts %>% select(wbtestscore, Country.ISO3)
nit <- allniq %>% select(CA_totc, SAS_IQc, countrycode)
nit$RinderIQ <- as.numeric(nit$CA_totc)
Warning: NAs introduced by coercion
nit$RinderSAS <- as.numeric(nit$SAS_IQc)
Warning: NAs introduced by coercion
basicskills$bs <- (basicskills$Mean - 514.8)/100*15+100
basicskills$alpha3 <- countrycode(basicskills$Country, origin = 'country.name', destination='iso3c')
Warning: Some values were not matched unambiguously: Kosovo
basicskills$alpha3[basicskills$Country=='Kosovo'] <- 'KSV'
bs <- basicskills %>% filter(!Data.layer=='5') %>% select(bs, alpha3)
pisa$alpha3 <- countrycode(pisa$cmtry, origin = 'country.name', destination='iso3c')
Warning: Some values were not matched unambiguously: Kosovo
pisa$alpha3[pisa$cmtry=='Kosovo'] <- 'KSV'
becker$alpha3[becker$alpha3=='FRA '] <- 'FRA'
pisa$pisa2 <- (pisa$pisa-505.8)/100*15+100
timss4m$alpha3 <- countrycode(timss4m$country, origin = 'country.name', destination='iso3c')
Warning: Some values were not matched unambiguously: Kosovo
timss4m$alpha3[48] <- 'KSV'
timss4m$alpha3[timss4m$country=='England'] <- 'GBR'
timss4m$T4mIQ <- (timss4m$score-535)/100*15 + 97.3
timss4s$alpha3 <- countrycode(timss4s$country, origin = 'country.name', destination='iso3c')
Warning: Some values were not matched unambiguously: England, Kosovo
timss4s$alpha3[51] <- 'KSV'
timss4s$alpha3[12] <- 'GBR'
timss4s$T4sIQ <- (timss4s$sci4thtimss-542.333)/100*15 + 100
timss8m$alpha3 <- countrycode(timss8m$country, origin = 'country.name', destination='iso3c')
Warning: Some values were not matched unambiguously: England
timss8m$alpha3[13] <- 'GBR'
timss8m$T8mIQ <- (timss8m$math8th-520.33333)/100*15 + 100
timss8s$alpha3 <- countrycode(timss8s$country, origin = 'country.name', destination='iso3c')
Warning: Some values were not matched unambiguously: England
timss8s$alpha3[14] <- 'GBR'
timss8s$T8sIQ <- (timss8s$timss8thsci-522.33333)/100*15 + 100
PIRLS2021$alpha3 <- countrycode(PIRLS2021$country, origin = 'country.name', destination='iso3c')
Warning: Some values were not matched unambiguously: Kosovo
PIRLS2021$alpha3[50] <- 'KSV'
PIRLS2021$PRLIQ <- (PIRLS2021$score-562.33333)/100*15 + 100
t4m <- timss4m %>% select(alpha3, T4mIQ)
t4s <- timss4s %>% select(alpha3, T4sIQ)
t8m <- timss8m %>% select(alpha3, T8mIQ)
t8s <- timss8s %>% select(alpha3, T8sIQ)
p21 <- PIRLS2021 %>% select(alpha3, PRLIQ)
niqs1 <- full_join(becker, hts, by = join_by(alpha3 == Country.ISO3))
niqs2 <- full_join(niqs1, pisa, by = join_by(alpha3 == alpha3))
niqs3 <- full_join(niqs2, bs, by = join_by(alpha3 == alpha3))
niqs4 <- full_join(niqs3, nit, by = join_by(alpha3 == countrycode))
niqs5 <- full_join(niqs4, t4m, by = join_by(alpha3 == alpha3))
niqs6 <- full_join(niqs5, t4s, by = join_by(alpha3 == alpha3))
niqs7 <- full_join(niqs6, t8m, by = join_by(alpha3 == alpha3))
niqs8 <- full_join(niqs7, t8s, by = join_by(alpha3 == alpha3))
niqs9 <- full_join(niqs8, p21, by = join_by(alpha3 == alpha3))
This chunk performs final adjustments, aggregations, and regional grouping on the merged dataset.
niqs9$R[is.na(niqs9$UW) & is.na(niqs9$NW) & is.na(niqs9$QNW) & is.na(niqs9$SAS)] <- NA
onlyscores <- data.frame(niqs9 %>% select(UW, T4mIQ, T4sIQ, T8sIQ, T8mIQ, PRLIQ, QNW, NW, SAS, L.V12, R, wbtestscore, RinderIQ, RinderSAS, pisa2, bs, L.V02, alpha3))
onlyscores$SAS <- onlyscores$SAS - 1.74 + 1
onlyscores$NW <- onlyscores$NW - 1 + 1
onlyscores$UW <- onlyscores$UW + 1
onlyscores$bs <- onlyscores$bs + 1
onlyscores$pisa2 <- onlyscores$pisa2 + 1
onlyscores$wbtestscore <- onlyscores$wbtestscore + 1
onlyscores$QNW <- onlyscores$QNW - 1 + 1
onlyscores$L.V12 <- onlyscores$L.V12 - 0.84 + 1
onlyscores$L.V02 <- onlyscores$L.V02 - 0.84
onlyscores$RinderIQ <- onlyscores$RinderIQ - 0.74
onlyscores$RinderSAS <- onlyscores$RinderSAS - 0.74
onlyscores$OM = rowMeans(subset(onlyscores, select=c(RinderSAS, RinderIQ, L.V02, L.V12, QNW, wbtestscore, pisa2, bs, UW, NW, SAS, QNW)), na.rm = TRUE)
onlyscores$R <- onlyscores$R - 1.74 + 1
onlyscores$region <- countrycode(onlyscores$alpha3, origin='iso3c', destination='un.regionsub.name')
Warning: Some values were not matched unambiguously: ANT, KSV, TWN
onlyscores$region[onlyscores$alpha3=='KSV'] <- 'Southern Europe'
onlyscores$region[onlyscores$alpha3=='TWN'] <- 'Eastern Asia'
onlyscores <- onlyscores %>% filter(!is.na(region))
onlyscores$region[onlyscores$alpha3=='KNA'] <- NA
###################################
################################
byreg <- onlyscores %>%
group_by(region) %>%
summarise(RIQ = mean(RinderIQ, na.rm=T), BSD = mean(bs, na.rm=T), WBTS = mean(wbtestscore, na.rm=T), PISA = mean(pisa2, na.rm=T), RSAS = mean(RinderSAS, na.rm=T), BQNW = mean(QNW, na.rm=T), BSAS = mean(SAS, na.rm=T), LV12 = mean(L.V12, na.rm=T), LV02 = mean(L.V02, na.rm=T), BOR = mean(R, na.rm=T))
print(byreg, n=20)
Charting the relationship between mean and standard error (simple method)
##################################
longer <- onlyscores %>% select(T4mIQ, T4sIQ, T8sIQ, T8mIQ, PRLIQ, QNW, RinderSAS, pisa2, alpha3)
long_format <- longer %>%
gather(key = "Measure", value = "Value", -alpha3)
long_format$se <- 1/sqrt(350)
uniques <- unique(onlyscores$alpha3)
rs <- rep(0, length(uniques))
with_se <- data.frame(uniques, rs)
for(i in 1:length(uniques)) {
f <- long_format %>% filter(alpha3==uniques[i] & !is.na(Value))
if (nrow(f) > 1) {
metaobjn <- metafor::rma(yi=Value, sei=se, data = f)
with_se$mean[i] <- metaobjn$b
with_se$se[i] <- metaobjn$se
}
else if (nrow(f) == 1) {
with_se$mean[i] <- mean(f$Value)
with_se$se[i] <- NA
}
else {
with_se$mean[i] <- NA
with_se$se[i] <- NA
}
}
p <- GG_scatter(with_se, 'mean', 'se', case_names='uniques') +
xlab('Mean') +
ylab('Standard Error') +
theme_bw() +
theme(
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
legend.position = "right",
plot.background = element_rect(fill = "white")
)
p
file_name = 'output/ropz.jpg'
ggsave(plot = p, filename = file_name, dpi = 420)
Saving 7.29 x 4.5 in image
Charting the relationship between psychometric and scholastic ability
p <- GG_scatter(onlyscores, 'UW', 'bs', case_names='alpha3') +
xlab('IQ Based on Pychometric Data (From Becker)') +
ylab('Scholastic Ability Based on Basic Skills Dataset') +
theme_bw() +
theme(
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
legend.position = "right",
plot.background = element_rect(fill = "white")
)
p
file_name = 'output/ropzicle.jpg'
ggsave(plot = p, filename = file_name, dpi = 420)
Saving 7.29 x 4.5 in image
Calculation of meta-analytic means.
########################################
onlyscores2 <- data.frame(onlyscores %>% select(UW, QNW, NW, SAS, L.V12, R, T4mIQ, T4sIQ, T8sIQ, T8mIQ, PRLIQ, wbtestscore, RinderIQ, RinderSAS, pisa2, bs, L.V02, alpha3))
#onlyscores2 <- onlyscores2 %>% filter(!(alpha3 %in% (unique(newdata$alpha3))))
onlyscores2$nest2 = rowMeans(subset(onlyscores2, select=c(T4mIQ, T8mIQ, QNW, UW, NW, SAS, L.V02, L.V12, R)), na.rm = TRUE)
onlyscores2$nest3 = rowMeans(subset(onlyscores2, select=c(nest2, T4sIQ, T8sIQ)), na.rm = TRUE)
onlyscores2$nest4 = rowMeans(subset(onlyscores2, select=c(nest3, pisa2, RinderSAS, wbtestscore, PRLIQ)), na.rm = TRUE)
onlyscores2$NIQtemp1 = rowMeans(subset(onlyscores2, select=c(nest4, bs, RinderIQ)), na.rm = TRUE)
onlyscores2$revised <- 0
###########################
#Meta-analytic means
longer <- onlyscores %>% select(UW, QNW, NW, SAS, L.V12, R, T4mIQ, T4sIQ, T8sIQ, T8mIQ, PRLIQ, wbtestscore, RinderIQ, RinderSAS, pisa2, bs, L.V02, alpha3)
long_format <- longer %>%
gather(key = "Measure", value = "Value", -alpha3)
long_format$se <- NA
long_format$se[long_format$Measure=='UW'] <- 15/sqrt(50)
long_format$se[long_format$Measure=='NW'] <- 15/sqrt(50)
long_format$se[long_format$Measure=='QNW'] <- 15/sqrt(50)
long_format$se[long_format$Measure=='T4mIQ'] <- 15/sqrt(50)
long_format$se[long_format$Measure=='T8mIQ'] <- 15/sqrt(50)
long_format$se[long_format$Measure=='R'] <- 15/sqrt(50)
long_format$se[long_format$Measure=='SAS'] <- 15/sqrt(50)
long_format$se[long_format$Measure=='T4sIQ'] <- 15/sqrt(125)
long_format$se[long_format$Measure=='T8sIQ'] <- 15/sqrt(125)
long_format$se[long_format$Measure=='L.V02'] <- 15/sqrt(50)
long_format$se[long_format$Measure=='L.V12'] <- 15/sqrt(50)
long_format$se[long_format$Measure=='PRLIQ'] <- 15/sqrt(250)
long_format$se[long_format$Measure=='pisa2'] <- 15/sqrt(250)
long_format$se[long_format$Measure=='wbtestscore'] <- 15/sqrt(500)
long_format$se[long_format$Measure=='RinderSAS'] <- 15/sqrt(250)
long_format$se[long_format$Measure=='bs'] <- 15/sqrt(750)
long_format$se[long_format$Measure=='RinderIQ'] <- 15/sqrt(750)
uniques <- unique(onlyscores$alpha3)
mean <- rep(0, length(uniques))
with_se <- data.frame(uniques, mean)
for(i in 1:length(uniques)) {
f <- long_format %>% filter(alpha3==uniques[i] & !is.na(Value))
if (nrow(f) > 1) {
metaobjn <- metafor::rma(yi=Value, sei=se, data = f)
with_se$mean[i] <- metaobjn$b
with_se$se[i] <- metaobjn$se
}
else if (nrow(f) == 1) {
with_se$mean[i] <- mean(f$Value)
with_se$se[i] <- NA
}
else {
with_se$mean[i] <- NA
with_se$se[i] <- NA
}
}
Regression models predicting standard errors based on means and sample sizes.
long_format <- longer %>%
gather(key = "Measure", value = "Value", -alpha3)
with_se <- long_format %>% group_by(alpha3) %>% summarise(mean = mean(Value, na.rm=T), se = sd(Value, na.rm=T)/sqrt(sum(!is.na(Value))), n = sum(!is.na(Value)))
lr <- lm(data=with_se, se ~ mean)
summary(lr)
Call:
lm(formula = se ~ mean, data = with_se)
Residuals:
Min 1Q Median 3Q Max
-2.5236 -0.6971 -0.2905 0.3537 9.3262
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.429649 0.732361 10.14 < 0.0000000000000002 ***
mean -0.068579 0.008725 -7.86 0.000000000000331 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.309 on 181 degrees of freedom
(28 observations deleted due to missingness)
Multiple R-squared: 0.2545, Adjusted R-squared: 0.2504
F-statistic: 61.79 on 1 and 181 DF, p-value: 0.0000000000003313
lr <- lm(data=with_se, se ~ n)
summary(lr)
Call:
lm(formula = se ~ n, data = with_se)
Residuals:
Min 1Q Median 3Q Max
-2.9796 -0.6665 -0.0802 0.5470 8.1964
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.5552 0.2014 17.65 <0.0000000000000002 ***
n -0.1939 0.0191 -10.15 <0.0000000000000002 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.21 on 181 degrees of freedom
(28 observations deleted due to missingness)
Multiple R-squared: 0.3628, Adjusted R-squared: 0.3593
F-statistic: 103.1 on 1 and 181 DF, p-value: < 0.00000000000000022
lr <- lm(data=with_se, se ~ mean + n)
summary(lr)
Call:
lm(formula = se ~ mean + n, data = with_se)
Residuals:
Min 1Q Median 3Q Max
-2.9508 -0.6304 -0.1386 0.5679 8.3803
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.48373 0.73417 7.469 0.00000000000336 ***
mean -0.02793 0.01024 -2.728 0.00701 **
n -0.15200 0.02425 -6.269 0.00000000261139 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.189 on 180 degrees of freedom
(28 observations deleted due to missingness)
Multiple R-squared: 0.3881, Adjusted R-squared: 0.3813
F-statistic: 57.08 on 2 and 180 DF, p-value: < 0.00000000000000022
Calculation of meta-analytic means (again).
onlyscores2 <- data.frame(onlyscores %>% select(UW, QNW, NW, SAS, L.V12, R, T4mIQ, T4sIQ, T8sIQ, T8mIQ, PRLIQ, wbtestscore, RinderIQ, RinderSAS, pisa2, bs, L.V02, alpha3))
#onlyscores2 <- onlyscores2 %>% filter(!(alpha3 %in% (unique(newdata$alpha3))))
onlyscores2$nest1 = rowMeans(subset(onlyscores2, select=c(QNW, NW, UW, T4mIQ, T8mIQ, L.V02, L.V12, SAS, R)), na.rm = TRUE)
onlyscores2$nest2 = rowMeans(subset(onlyscores2, select=c(nest1, T4sIQ, T8sIQ)), na.rm = TRUE)
onlyscores2$nest3 = rowMeans(subset(onlyscores2, select=c(nest2, pisa2, wbtestscore, RinderSAS, PRLIQ)), na.rm = TRUE)
onlyscores2$NIQtemp1 = rowMeans(subset(onlyscores2, select=c(nest3, RinderIQ, bs)), na.rm = TRUE)
onlyscores2$revised <- 0
###########################
#Meta-analytic means
longer <- onlyscores %>% select(UW, QNW, NW, SAS, L.V12, R, T4mIQ, T4sIQ, T8sIQ, T8mIQ, PRLIQ, wbtestscore, RinderIQ, RinderSAS, pisa2, bs, L.V02, alpha3)
long_format <- longer %>%
gather(key = "Measure", value = "Value", -alpha3)
long_format$se <- NA
long_format$se[long_format$Measure=='UW'] <- 15/sqrt(10)
long_format$se[long_format$Measure=='NW'] <- 15/sqrt(10)
long_format$se[long_format$Measure=='QNW'] <- 15/sqrt(10)
long_format$se[long_format$Measure=='T4mIQ'] <- 15/sqrt(10)
long_format$se[long_format$Measure=='T8mIQ'] <- 15/sqrt(10)
long_format$se[long_format$Measure=='R'] <- 15/sqrt(20)
long_format$se[long_format$Measure=='SAS'] <- 15/sqrt(20)
long_format$se[long_format$Measure=='T4sIQ'] <- 15/sqrt(20)
long_format$se[long_format$Measure=='T8sIQ'] <- 15/sqrt(20)
long_format$se[long_format$Measure=='L.V02'] <- 15/sqrt(20)
long_format$se[long_format$Measure=='L.V12'] <- 15/sqrt(20)
long_format$se[long_format$Measure=='PRLIQ'] <- 15/sqrt(40)
long_format$se[long_format$Measure=='pisa2'] <- 15/sqrt(40)
long_format$se[long_format$Measure=='wbtestscore'] <- 15/sqrt(40)
long_format$se[long_format$Measure=='RinderSAS'] <- 15/sqrt(40)
long_format$se[long_format$Measure=='bs'] <- 15/sqrt(80)
long_format$se[long_format$Measure=='RinderIQ'] <- 15/sqrt(80)
long_format$se2 <- 15/sqrt(350)
uniques <- unique(onlyscores$alpha3)
mean <- rep(0, length(uniques))
with_se <- data.frame(uniques, mean)
for(i in 1:length(uniques)) {
f <- long_format %>% filter(alpha3==uniques[i] & !is.na(Value))
if (nrow(f) > 1) {
metaobjn <- metafor::rma(yi=Value, sei=se, data = f)
metaobjn2 <- metafor::rma(yi=Value, sei=se2, data = f)
with_se$mean[i] <- metaobjn$b
with_se$se[i] <- sd(f$Value)/sqrt(nrow(f %>% filter(!is.na(Value))))
}
else if (nrow(f) == 1) {
with_se$mean[i] <- mean(f$Value)
with_se$se[i] <- NA
}
else {
with_se$mean[i] <- NA
with_se$se[i] <- NA
}
}
Calculation of final means.
########################################PSY##SCH########################################PSY##############################
nd <- newdata %>% select(alpha3, NIQr)
nd <- na.omit(nd)
rop2 <- with_se %>% select(uniques, se, mean)
onlyscores4 <- full_join(onlyscores2, rop2, by = join_by(alpha3 == uniques))
onlyscores4 <- full_join(onlyscores4, nd, by = join_by(alpha3 == alpha3))
onlyscores4 <- onlyscores4 %>% select(UW, QNW, NW, SAS, L.V12, R, NIQr, NIQtemp1, wbtestscore, RinderIQ, RinderSAS, mean, se, pisa2, bs, L.V02, alpha3)
onlyscores4$seadj <- onlyscores4$se*2.32/mean(with_se$se, na.rm=T)
onlyscores4$region <- countrycode(onlyscores4$alpha3, origin='iso3c', destination='un.regionsub.name')
Warning: Some values were not matched unambiguously: KSV, TWN
onlyscores4$region[onlyscores4$alpha3=='KSV'] <- 'Southern Europe'
onlyscores4$region[onlyscores4$alpha3=='TWN'] <- 'Eastern Asia'
onlyscores4$alpha3[onlyscores4$alpha3=='KNA.'] <- NA
onlyscores4$region[onlyscores4$alpha3=='KNA'] <- 'Latin America and the Caribbean'
onlyscores4$NIQt <- (onlyscores4$NIQtemp1 + onlyscores4$mean)/2
onlyscores4$NIQ <- (onlyscores4$NIQtemp1 + onlyscores4$mean)/2
onlyscores4$revised <- 0
onlyscores4$revised[onlyscores4$alpha3 %in% unique(nd$alpha3)] <- 1
onlyscores4$NIQ[onlyscores4$revised==1] <- onlyscores4$NIQr[onlyscores4$revised==1]
tviqs <- onlyscores4 %>% select(NIQ, mean, NIQtemp1, alpha3, NIQt, NIQr)
tviqs$name <- countrycode(tviqs$alpha3, origin='iso3c', destination='country.name')
Warning: Some values were not matched unambiguously: KSV
revisedlist <- tviqs %>% select(name, NIQt, NIQr) %>% filter(!is.na(NIQr))
Europe IQ map.
library(ggplot2)
library(dplyr)
library(maps)
library(countrycode)
world_map <- map_data("world")
world_map$alpha3 <- countrycode(world_map$region, origin = "country.name", destination = "iso3c")
Warning: Some values were not matched unambiguously: Ascension Island, Azores, Barbuda, Bonaire, Canary Islands, Chagos Archipelago, Grenadines, Heard Island, Kosovo, Madeira Islands, Micronesia, Saba, Saint Martin, Siachen Glacier, Sint Eustatius, Virgin Islands
onlyb <- onlyscores4 %>% filter(!is.na(alpha3))
world_map$alpha3[world_map$region == "Kosovo"] <- "KSV"
world_map_data <- left_join(world_map, onlyb, by = c('alpha3'))
# Define IQ bins and colors
world_map_data$color_category <- cut(world_map_data$NIQ,
breaks = c(-Inf, 85, 88, 91, 94, 97, 100, Inf),
labels = c('=< 85', '85 to 88', '88 to 91', '91 to 94', '94 to 97', '97 to 100', '> 100'),
right = TRUE)
europe_bounds <- c(-25, 50, 35, 70)
# Subset data for Europe
europe_map_data <- world_map_data %>%
filter(long >= europe_bounds[1], long <= europe_bounds[2],
lat >= europe_bounds[3], lat <= europe_bounds[4])
# Plotting
p_europe <- ggplot(data = europe_map_data, aes(x = long, y = lat, group = group, fill = color_category)) +
geom_polygon(color = "black") +
scale_fill_manual(name = "IQ",
values = c("=< 85" = "orange2",
"85 to 88" = "#FFDD55",
"88 to 91" = "#FFEECC",
"91 to 94" = "#CCEEFF",
"94 to 97" = "#66CCFF",
"97 to 100" = "#4477EE",
"> 100" = "#4422AA")) +
theme_minimal() +
theme(plot.background = element_rect(fill = "white"),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank()) +
labs(title = "")
p_europe
file_name <- paste0('output/eumap.png')
ggsave(filename = file_name, dpi = 420)
Saving 7.29 x 4.5 in image
Chart of Lynn 2002 and current estimates
p <- GG_scatter(onlyscores4, 'NIQ', 'L.V02', case_names='alpha3') +
geom_point() +
geom_smooth(method = 'lm', se = FALSE, color = 'blue') +
labs(x = 'National IQ (Jensen & Kirkegaard, 2024)', y = "National IQ (Lynn and Vanhannen, 2002)") +
theme_minimal() +
theme_bw() +
theme(
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
legend.position = "right",
plot.background = element_rect(fill = "white")
)
p
file_name <- paste0('output/lv.jpg')
ggsave(filename = file_name, dpi = 420)
Saving 7.29 x 4.5 in image
Charting relationship between standard errors and means (final data)
fit2 <- lm(data=onlyscores4, seadj ~ NIQ)
summary(fit2)
Call:
lm(formula = seadj ~ NIQ, data = onlyscores4)
Residuals:
Min 1Q Median 3Q Max
-3.4387 -0.9795 -0.3994 0.5010 12.5201
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 10.09143 0.99776 10.114 < 0.0000000000000002 ***
NIQ -0.09306 0.01185 -7.856 0.00000000000034 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.762 on 181 degrees of freedom
(28 observations deleted due to missingness)
Multiple R-squared: 0.2543, Adjusted R-squared: 0.2502
F-statistic: 61.72 on 1 and 181 DF, p-value: 0.0000000000003399
fit4 <- lm(data=onlyscores4, seadj ~ ns(NIQ, df=4))
summary(fit4)
Call:
lm(formula = seadj ~ ns(NIQ, df = 4), data = onlyscores4)
Residuals:
Min 1Q Median 3Q Max
-3.6244 -0.8290 -0.2513 0.4813 12.2187
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.1397 0.6741 4.658 0.00000623 ***
ns(NIQ, df = 4)1 -1.1289 0.6716 -1.681 0.09455 .
ns(NIQ, df = 4)2 -2.9789 0.7151 -4.166 0.00004831 ***
ns(NIQ, df = 4)3 -0.8624 1.6543 -0.521 0.60280
ns(NIQ, df = 4)4 -2.5542 0.9013 -2.834 0.00513 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.729 on 178 degrees of freedom
(28 observations deleted due to missingness)
Multiple R-squared: 0.2938, Adjusted R-squared: 0.2779
F-statistic: 18.51 on 4 and 178 DF, p-value: 0.0000000000009732
# passes
anova(fit4, fit2)
Analysis of Variance Table
Model 1: seadj ~ ns(NIQ, df = 4)
Model 2: seadj ~ NIQ
Res.Df RSS Df Sum of Sq F Pr(>F)
1 178 532.08
2 181 561.85 -3 -29.778 3.3207 0.0211 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
uzi3 <- seq(from=62, to=109, by=0.01)
uzi4 <- data.frame(NIQ=uzi3)
uzi4$fit = predict(fit4, uzi4, interval = "confidence")
p <- ggplot(uzi4) +
geom_point(mapping = aes(x=NIQ, y=seadj), data=onlyscores4) +
geom_line(data = uzi4, aes(x = NIQ, y = fit[, 1]), color = "green", size = 1) +
geom_ribbon(data = uzi4, aes(x = NIQ, ymin = fit[, 2], ymax = fit[, 3]), alpha = 0.35) + # Confidence interval shading
geom_text(data = onlyscores4, aes(x = NIQ, y = seadj, label = alpha3), vjust = -.66, size = 3) + # Add country labels
labs(title = "spearman's rho = -.63, n = 201") +
xlab('National IQ') +
ylab('Standard Error') +
theme_bw() +
theme(
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 16),
axis.title.y = element_text(size = 16),
legend.position = "right",
plot.background = element_rect(fill = "white")
)
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
Please use `linewidth` instead.
plot(p)
file_name <- paste0('output/sechart.jpg')
ggsave(plot = p, filename = file_name, dpi = 420)
Saving 7.29 x 4.5 in image
Charting the relationship between WB test scores and Lynn 2002 estimates.
p <- GG_scatter(onlyscores4, 'wbtestscore', 'L.V02', case_names='alpha3') +
geom_point() +
geom_smooth(method = 'lm', se = FALSE, color = 'blue') +
labs(x = 'World Bank Test Scores', y = "National IQ (Lynn and Vanhannen, 2002)") +
theme_minimal() +
theme_bw() +
theme(
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
legend.position = "right",
plot.background = element_rect(fill = "white")
)
p
file_name <- paste0('output/lv2.jpg')
ggsave(filename = file_name, dpi = 420)
Saving 7.29 x 4.5 in image
Grouping the GDP data together.
HDIGDP <- HDI %>% select(iso3, gnipc_2021, gnipc_2020, gnipc_2019, gnipc_2018)
IMFGDP$alpha3 <- countrycode(IMFGDP[, 1] %>% unlist(), origin = 'country.name', destination='iso3c')
Warning: Some values were not matched unambiguously: Australia and New Zealand, Kosovo, Timor
Warning: Some strings were matched more than once, and therefore set to <NA> in the result: Australia and New Zealand,AUS,NZL
IMFGDP$alpha3[IMFGDP[, 1]=='Kosovo'] <- 'KSV'
IMFGDP$alpha3[IMFGDP[, 1]=='Timor'] <- 'TLS'
IMFGDP$alpha3[IMFGDP[, 1]=='Timor'] <- 'TLS'
IMFGDP$alpha3[IMFGDP[, 1]=='Australia and New Zealand'] <- 'NZL'
IMFGDP$gdp2022 <- as.numeric(IMFGDP[, 44] %>% unlist())
Warning: NAs introduced by coercion
IMFGDP$gdp2021 <- as.numeric(IMFGDP[, 43] %>% unlist())
Warning: NAs introduced by coercion
IMFGDP$gdp2020 <- as.numeric(IMFGDP[, 42] %>% unlist())
Warning: NAs introduced by coercion
IMFGDP$gdp2019 <- as.numeric(IMFGDP[, 41] %>% unlist())
Warning: NAs introduced by coercion
IMFGDP$gdp2018 <- as.numeric(IMFGDP[, 40] %>% unlist())
Warning: NAs introduced by coercion
CIAGDP$alpha3 <- countrycode(CIAGDP$name, origin = 'country.name', destination='iso3c')
Warning: Some values were not matched unambiguously: Kosovo, Saint Martin, Virgin Islands
CIAGDP$alpha3[CIAGDP$name=='Kosovo'] <- 'KSV'
SPI <- SPI %>% filter(spiyear > 2017 & spiyear < 2023)
average_scores <- SPI %>%
filter(spiyear %in% 2018:2022) %>% # Filter for the specific years
group_by(spicountrycode) %>% # Group by country
summarise(across(where(is.numeric), ~mean(.x, na.rm = TRUE), .names = "avg_{.col}")) # Average for each numeric column
#GDP calculations
IMFGDP$gdpimf = rowMeans(subset(IMFGDP, select=c(gdp2018, gdp2019, gdp2020, gdp2021, gdp2022)), na.rm = TRUE)
WBGDP$gdpwb = rowMeans(subset(WBGDP, select=c(X2018, X2019, X2020, X2021, X2022)), na.rm = TRUE)
HDIGDP$gdphdi = rowMeans(subset(HDIGDP, select=c(gnipc_2018, gnipc_2019, gnipc_2020, gnipc_2021)), na.rm = TRUE)
CIAGDP$gdpcia <- as.numeric(gsub("[\\$,]", "", CIAGDP$value))
imf2 <- IMFGDP %>% select(alpha3, gdpimf)
imf2 <- imf2[-135, ]
wb2 <- WBGDP %>% select(Country.Code, gdpwb)
hdi2 <- HDIGDP %>% select(iso3, gdphdi)
cia2 <- CIAGDP %>% select(alpha3, gdpcia)
spi2 <- average_scores %>% select(spicountrycode, avg_GDPpc)
GNIPPPIMF$gnipppimf = rowMeans(subset(GNIPPPIMF, select=c(X2018, X2019, X2020, X2021, X2022)), na.rm = TRUE)
GNIPPPIMF2 <- GNIPPPIMF %>% select(Country.Code, gnipppimf)
medianwealth$wealth <- as.numeric(medianwealth$Median)
Warning: NAs introduced by coercion
medianwealth$alpha3 <- countrycode(medianwealth$Location, origin = 'country.name', destination='iso3c')
Warning: Some values were not matched unambiguously: European Union, Guyana
medianwealth$alpha3[medianwealth$Location=='Guyana'] <- 'GUY'
medianincome$income <- as.numeric(medianincome$medianIncomeByCountry_medianIncome)
medianincome$alpha3 <- countrycode(medianincome$country, origin = 'country.name', destination='iso3c')
Warning: Some values were not matched unambiguously: Micronesia
medianincome$alpha3[medianincome$country=='Micronesia'] <- 'FSM'
mi2 <- medianincome %>% select(alpha3, income)
mw2 <- medianwealth %>% select(alpha3, wealth)
gdpcum <- full_join(imf2, wb2, by = join_by(alpha3 == Country.Code))
gdpcum2 <- full_join(gdpcum, hdi2, by = join_by(alpha3 == iso3))
gdpcum3 <- full_join(gdpcum2, cia2, by = join_by(alpha3 == alpha3))
Warning: Detected an unexpected many-to-many relationship between `x` and `y`.
gdpcum4 <- full_join(gdpcum3, spi2, by = join_by(alpha3 == spicountrycode))
gdpcum5 <- full_join(gdpcum4, mi2, by = join_by(alpha3 == alpha3))
gdpcum6 <- full_join(gdpcum5, mw2, by = join_by(alpha3 == alpha3))
Warning: Detected an unexpected many-to-many relationship between `x` and `y`.
gdpcum7 <- full_join(gdpcum6, GNIPPPIMF2, by = join_by(alpha3 == Country.Code))
gdpcum7 <- gdpcum7[-193, ]
gdpcum7 <- gdpcum7 %>% filter(!is.na(alpha3))
gdpcum7 <- gdpcum7[1:269, ]
gdpcum7$gdpspi <- gdpcum7$avg_GDPpc
gdpcum7$gnihdi <- gdpcum7$gdphdi
gdpcum7$GDP = rowMeans(subset(gdpcum7, select=c(gdpwb, gdpcia, gdpimf, gdpspi)), na.rm = TRUE)
esca <- gdpcum7 %>% select(GDP, income, wealth, gnihdi, gnipppimf, alpha3)
Regressing GDP onto NIQ.
forchart <- full_join(tviqs, esca, by = join_by(alpha3 == alpha3))
#VCT and FSM not in original
forchart <- forchart %>% filter(!alpha3=='VCT')
forchart <- forchart %>% filter(!alpha3=='FSM')
p <- GG_scatter(forchart, 'NIQ', 'GDP', case_names='alpha3') +
geom_point() +
geom_smooth(method = 'lm', se = T, color = 'orange') +
geom_smooth(se = T, color = 'blue') +
theme_bw() +
theme(
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
legend.position = "right",
plot.background = element_rect(fill = "white")
)
p
file_name <- paste0('output/niqngdp.jpg')
ggsave(filename = file_name, dpi = 420)
Saving 7.29 x 4.5 in image
onlyscores4$sf <- pnorm((onlyscores4$NIQ-125)/15)
fusedtrans <- left_join(onlyscores4, esca, by='alpha3')
fusedtrans <- fusedtrans %>% filter(!alpha3=='VCT')
fusedtrans <- fusedtrans %>% filter(!alpha3=='FSM')
p2 <- GG_scatter(fusedtrans, 'sf', 'GDP', case_names='alpha3') + labs(x = "Predicted % who score above 125", y = "GDP per capita", title = "") + theme(
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
legend.position = "right",
plot.background = element_rect(fill = "white")
) + geom_smooth()
p2
file_name <- paste0('output/dlift2.jpg')
ggsave(plot = p, filename = file_name, dpi = 420)
Saving 7.29 x 4.5 in image
World IQ map.
world_map <- map_data("world")
world_map$alpha3 <- countrycode(world_map$region, origin = "country.name", destination = "iso3c")
Warning: Some values were not matched unambiguously: Ascension Island, Azores, Barbuda, Bonaire, Canary Islands, Chagos Archipelago, Grenadines, Heard Island, Kosovo, Madeira Islands, Micronesia, Saba, Saint Martin, Siachen Glacier, Sint Eustatius, Virgin Islands
onlyb <- onlyscores4 %>% filter(!is.na(alpha3))
world_map_data <- left_join(world_map, onlyb, by = c('alpha3'))
world_map_data$color_category <- cut(world_map_data$NIQ,
breaks = c(-Inf, 71, 76, 81, 86, 91, 96, 101, Inf),
labels = c('=< 71', '71 to 76', '76 to 81', '81 to 86', '86 to 91', '91 to 96', '96 to 101', '> 101'),
right = TRUE)
p <- ggplot(data = world_map_data, aes(x = long, y = lat, group = group, fill = color_category)) +
geom_polygon(color = "black") +
scale_fill_manual(name = "IQ",
values = c("=< 71" = "darkred",
"71 to 76" = "red1",
"76 to 81" = "orange",
"81 to 86" = "#FFDD55",
"86 to 91" = "#FFEECC",
"91 to 96" = "#66CCFF",
"96 to 101" = "#4477EE",
"> 101" = "#4422AA")) +
theme_minimal() +
theme(plot.background = element_rect(fill = "white"),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank()) +
labs(title = "")
p
file_name <- paste0('output/natedysssss.png')
ggsave(filename = file_name, dpi = 420)
Saving 7.29 x 4.5 in image
Regressing logGNI onto NIQ.
#################3
#S-factor data cleaning
years <- 2018:2022
variable_pattern <- paste0(".*_(", paste(years, collapse = "|"), ")")
variable_pattern
[1] ".*_(2018|2019|2020|2021|2022)"
long_hdi <- pivot_longer(HDI,
cols = matches(variable_pattern),
names_to = c("variable", "year"),
names_sep = "_",
values_drop_na = TRUE)
Warning: Expected 2 pieces. Additional pieces discarded in 92 rows [1, 22, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, ...].
terst <- long_hdi %>%
mutate(year = as.numeric(year)) %>% filter(year %in% years)
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `year = as.numeric(year)`.
Caused by warning:
! NAs introduced by coercion
long_hdi <- pivot_longer(HDI,
cols = matches(variable_pattern),
names_to = c("variable", "year"),
names_sep = "_",
values_drop_na = TRUE) %>%
mutate(year = as.numeric(year)) %>% # Convert year to numeric
filter(year %in% years) # Keep only the years 2018-2022
Warning: Expected 2 pieces. Additional pieces discarded in 92 rows [1, 22, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, ...].Warning: There was 1 warning in `mutate()`.
ℹ In argument: `year = as.numeric(year)`.
Caused by warning:
! NAs introduced by coercion
average_values2 <- long_hdi %>%
group_by(iso3, variable) %>%
summarise(average = mean(value, na.rm = TRUE), .groups = 'drop')
wide_hdi <- pivot_wider(average_values2, names_from = variable, values_from = average)
wide_hdi <- data.frame(wide_hdi)
SFACTOR <- full_join(SPI, wide_hdi, by = join_by(spicountrycode == iso3))
SFACTOR <- SFACTOR %>% filter(!is.na(spicountrycode))
SFACTOR <- SFACTOR %>% filter(spiyear==2022)
SFACTOR <- full_join(SFACTOR, esca, by = join_by(spicountrycode == alpha3))
SFACTOR$GNI = rowMeans(subset(SFACTOR, select=c(gnipc, gnihdi, gnipppimf)), na.rm = TRUE)
SFACTOR$GNI[SFACTOR$spicountrycode=='MAC'] <- 92487.5
SFACTOR$alpha3 <- SFACTOR$spicountrycode
gnis <- SFACTOR %>% select(GNI, alpha3)
forchart2 <- full_join(gnis, onlyscores4, by='alpha3')
forchart2$logGNI <- log(forchart2$GNI)
p <- GG_scatter(forchart2, 'NIQ', 'logGNI', case_names='alpha3') + labs(
x = 'National IQ (Jensen & Kirkegaard, 2024)',
y = "log(GNI)"
) +
theme_minimal() + # Use a minimal theme
theme_bw() + # Base theme with white background
theme(
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
legend.position = "right",
plot.background = element_rect(fill = "white")
)
# Print the plot
print(p)
ggsave(plot = p, filename = 'output/oeyeopen243.jpg', dpi = 420)
Saving 7.29 x 4.5 in image
lr <- lm(data=forchart2, logGNI ~ NIQ)
summary(lr)
Call:
lm(formula = logGNI ~ NIQ, data = forchart2)
Residuals:
Min 1Q Median 3Q Max
-2.20404 -0.39976 0.00287 0.31940 1.91091
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.085779 0.371933 5.608 0.0000000702 ***
NIQ 0.087582 0.004451 19.678 < 0.0000000000000002 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.6786 on 193 degrees of freedom
(80 observations deleted due to missingness)
Multiple R-squared: 0.6674, Adjusted R-squared: 0.6656
F-statistic: 387.2 on 1 and 193 DF, p-value: < 0.00000000000000022