About

This is an R notebook for the study:

The paper was edited in March 2017 to improve readability.

Initialize

Load packages and data.

options(digits = 2)
library(pacman)
p_load(kirkegaard)
data = read.csv("data.csv", row.names=1)
colnames(data)
##  [1] "Number.of.participants"     "IQ"                        
##  [3] "Years.of.edu"               "Higher.edu.pct."           
##  [5] "GDP.US.per.capita"          "Ethnic.Han.pct"            
##  [7] "High.ed.per.100k.2013"      "High.ed.per.100k.2012"     
##  [9] "High.ed.per.100k.2011"      "High.ed.per.100k.2010"     
## [11] "High.ed.per.100k.2009"      "High.ed.per.100k.2008"     
## [13] "Med.tech.pers.per.10k.2011" "Med.tech.pers.per.10k.2010"
## [15] "Med.tech.pers.per.10k.2009" "Med.tech.pers.per.10k.2008"
## [17] "Med.tech.pers.per.10k.2004" "life.expect.2010"          
## [19] "Pop.soc.sur.13"             "Pop.soc.sur.12"            
## [21] "Pop.soc.sur.11"             "Pop.soc.sur.09"            
## [23] "Pop.soc.sur.08"             "Pop.soc.sur.07"            
## [25] "Pop.soc.sur.06"             "Pop.soc.sur.05"            
## [27] "Pop.soc.sur.04"             "Pop.soc.sur.illit.13"      
## [29] "Pop.soc.sur.illit.12"       "Pop.soc.sur.illit.11"      
## [31] "Pop.soc.sur.illit.09"       "Pop.soc.sur.illit.08"      
## [33] "Pop.soc.sur.illit.07"       "Pop.soc.sur.illit.06"      
## [35] "Pop.soc.sur.illit.05"       "Pop.soc.sur.illit.04"      
## [37] "Pop.size.13"                "Pop.size.12"               
## [39] "Pop.size.11"                "Pop.size.10"               
## [41] "Pop.size.09"                "Pop.size.08"               
## [43] "Pop.size.07"                "Pop.size.06"               
## [45] "Pop.size.05"                "Pop.size.04"               
## [47] "Pop.size.urban.13"          "Pop.size.urban.12"         
## [49] "Pop.size.urban.11"          "Pop.size.urban.10"         
## [51] "Pop.size.urban.09"          "Pop.size.urban.08"         
## [53] "Pop.size.urban.07"          "Pop.size.urban.06"         
## [55] "Pop.size.urban.05"          "Pop.size.rural.13"         
## [57] "Pop.size.rural.12"          "Pop.size.rural.11"         
## [59] "Pop.size.rural.10"          "Pop.size.rural.09"         
## [61] "Pop.size.rural.08"          "Pop.size.rural.07"         
## [63] "Pop.size.rural.06"          "Pop.size.rural.05"         
## [65] "Internet.users.13"          "Internet.users.12"         
## [67] "Internet.users.11"          "Internet.users.10"         
## [69] "Internet.users.09"          "Internet.users.08"         
## [71] "Internet.users.07"          "Internet.users.06"         
## [73] "Internet.users.05"          "Internet.users.04"         
## [75] "Invent.patent.13"           "Invent.patent.12"          
## [77] "Invent.patent.11"           "Invent.patent.10"          
## [79] "Invent.patent.09"           "Invent.patent.08"          
## [81] "Invent.patent.07"           "Invent.patent.06"          
## [83] "Invent.patent.05"           "Invent.patent.04"          
## [85] "Sci.personnel.11"           "Sci.personnel.10"          
## [87] "Sci.personnel.09"           "Sci.personnel.08"          
## [89] "Sci.personnel.07"           "Sci.personnel.06"          
## [91] "Sci.personnel.05"           "Sci.personnel.04"

Recode data.

##Population data
Pop.size = data[,grep("Pop.size.\\d",colnames(data))] 
Pop.size.survey = data[,grep("Pop.soc.sur.\\d",colnames(data))]
Pop.size.urban = data[,grep("Pop.size.urban.\\d",colnames(data))]
Pop.size.rural = data[,grep("Pop.size.rural.\\d",colnames(data))]
Pop.size.pct.urban = Pop.size.urban/Pop.size[-10]
Pop.size.pct.urban.mean = data.frame(Pop.size.pct.urban.mean=apply(Pop.size.pct.urban,1,mean))

#illiteracy
Pop.size.survey.illit = data[,grep("Pop.soc.sur.illit.\\d",colnames(data))]
Illit.per.cap = Pop.size.survey.illit/Pop.size.survey
fa(Illit.per.cap)
## The estimated weights for the factor scores are probably incorrect.  Try a different factor extraction method.
## Factor Analysis using method =  minres
## Call: fa(r = Illit.per.cap)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                       MR1   h2     u2 com
## Pop.soc.sur.illit.13 0.96 0.93 0.0749   1
## Pop.soc.sur.illit.12 0.98 0.97 0.0344   1
## Pop.soc.sur.illit.11 0.99 0.98 0.0197   1
## Pop.soc.sur.illit.09 1.00 0.99 0.0083   1
## Pop.soc.sur.illit.08 0.99 0.99 0.0122   1
## Pop.soc.sur.illit.07 0.98 0.96 0.0372   1
## Pop.soc.sur.illit.06 0.99 0.99 0.0112   1
## Pop.soc.sur.illit.05 0.98 0.96 0.0353   1
## Pop.soc.sur.illit.04 0.98 0.96 0.0392   1
## 
##                 MR1
## SS loadings    8.73
## Proportion Var 0.97
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  36  and the objective function was  34 with Chi Square of  883
## The degrees of freedom for the model are 27  and the objective function was  6.4 
## 
## The root mean square of the residuals (RMSR) is  0.01 
## The df corrected root mean square of the residuals is  0.02 
## 
## The harmonic number of observations is  31 with the empirical chi square  0.47  with prob <  1 
## The total number of observations was  31  with Likelihood Chi Square =  163  with prob <  2.4e-21 
## 
## Tucker Lewis Index of factoring reliability =  0.78
## RMSEA index =  0.45  and the 90 % confidence intervals are  0.35 0.47
## BIC =  70
## Fit based upon off diagonal values = 1
Illit.g = as.numeric(fa(Illit.per.cap)$scores)
## The estimated weights for the factor scores are probably incorrect.  Try a different factor extraction method.
Illit.per.cap.mean = data.frame(Illit.per.cap.mean=apply(Illit.per.cap,1,mean))

#high ed per 100k
High.ed.per.100k = data[,grep("High.ed.per.100k.\\d",colnames(data))]
High.ed.per.100k.mean = data.frame(High.ed.per.100k.mean=apply(High.ed.per.100k,1,mean))

#med tech personnel per 10k
Med.tech.pers.per10kcap = data[,grep("Med.tech.pers.per.10k.\\d",colnames(data))]
Med.tech.pers.per10kcap.mean = data.frame(Med.tech.pers.per10kcap.mean=apply(Med.tech.pers.per10kcap,1,mean))

#internet users
Internet.users = data[,grep("Internet.users.\\d",colnames(data))]
Internet.users.per.capita = Internet.users/Pop.size
Internet.users.per.capita.mean = data.frame(Internet.users.per.capita.mean=apply(Internet.users.per.capita,1,mean))

#Invention patents
Invention.patents = data[,grep("Invent.patent.\\d",colnames(data))]
Invention.patents.per.cap = Invention.patents/Pop.size
Invention.patents.per.cap.mean = data.frame(Invention.patents.per.cap.mean=apply(Invention.patents.per.cap,1,mean))

#scientific personenel
Scientific.personnel = data[,grep("Sci.personnel.\\d",colnames(data))]
Scientific.personnel.per.cap = Scientific.personnel/Pop.size[-c(1:2)]
Scientific.personnel.per.cap.mean = data.frame(Scientific.personnel.per.cap.mean=apply(Scientific.personnel.per.cap,1,mean))

#Yearly intercorrelations
alpha(Pop.size.pct.urban)
## 
## Reliability analysis   
## Call: alpha(x = Pop.size.pct.urban)
## 
##   raw_alpha std.alpha G6(smc) average_r  S/N     ase mean   sd
##          1         1       1      0.99 1289 0.00029  0.5 0.15
## 
##  lower alpha upper     95% confidence boundaries
## 1 1 1 
## 
##  Reliability if an item is dropped:
##                   raw_alpha std.alpha G6(smc) average_r  S/N alpha se
## Pop.size.urban.13         1         1       1      0.99 1353  0.00026
## Pop.size.urban.12         1         1       1      0.99 1241  0.00029
## Pop.size.urban.11         1         1       1      0.99 1094  0.00033
## Pop.size.urban.10         1         1       1      0.99 1050  0.00035
## Pop.size.urban.09         1         1       1      0.99 1056  0.00036
## Pop.size.urban.08         1         1       1      0.99 1058  0.00035
## Pop.size.urban.07         1         1       1      0.99 1091  0.00034
## Pop.size.urban.06         1         1       1      0.99 1162  0.00032
## Pop.size.urban.05         1         1       1      0.99 1288  0.00028
## 
##  Item statistics 
##                    n raw.r std.r r.cor r.drop mean   sd
## Pop.size.urban.13 31  0.99  0.99  0.99   0.99 0.54 0.14
## Pop.size.urban.12 31  1.00  1.00  1.00   0.99 0.53 0.14
## Pop.size.urban.11 31  1.00  1.00  1.00   1.00 0.52 0.14
## Pop.size.urban.10 31  1.00  1.00  1.00   1.00 0.51 0.15
## Pop.size.urban.09 31  1.00  1.00  1.00   1.00 0.49 0.15
## Pop.size.urban.08 31  1.00  1.00  1.00   1.00 0.48 0.15
## Pop.size.urban.07 31  1.00  1.00  1.00   1.00 0.47 0.15
## Pop.size.urban.06 31  1.00  1.00  1.00   1.00 0.46 0.15
## Pop.size.urban.05 31  0.99  0.99  0.99   0.99 0.45 0.16
alpha(Illit.per.cap)
## 
## Reliability analysis   
## Call: alpha(x = Illit.per.cap)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase  mean    sd
##       0.99         1       1      0.97 287 0.0012 0.088 0.069
## 
##  lower alpha upper     95% confidence boundaries
## 0.99 0.99 1 
## 
##  Reliability if an item is dropped:
##                      raw_alpha std.alpha G6(smc) average_r S/N alpha se
## Pop.soc.sur.illit.13      0.99         1       1      0.98 316   0.0010
## Pop.soc.sur.illit.12      0.99         1       1      0.97 259   0.0012
## Pop.soc.sur.illit.11      0.99         1       1      0.97 244   0.0013
## Pop.soc.sur.illit.09      0.99         1       1      0.97 234   0.0014
## Pop.soc.sur.illit.08      0.99         1       1      0.97 237   0.0014
## Pop.soc.sur.illit.07      0.99         1       1      0.97 263   0.0013
## Pop.soc.sur.illit.06      0.99         1       1      0.97 236   0.0014
## Pop.soc.sur.illit.05      0.99         1       1      0.97 261   0.0013
## Pop.soc.sur.illit.04      0.99         1       1      0.97 265   0.0013
## 
##  Item statistics 
##                       n raw.r std.r r.cor r.drop  mean    sd
## Pop.soc.sur.illit.13 31  0.97  0.97  0.97   0.96 0.060 0.071
## Pop.soc.sur.illit.12 31  0.98  0.98  0.99   0.98 0.060 0.060
## Pop.soc.sur.illit.11 31  0.99  0.99  0.99   0.99 0.060 0.051
## Pop.soc.sur.illit.09 31  0.99  1.00  1.00   0.99 0.083 0.069
## Pop.soc.sur.illit.08 31  0.99  0.99  0.99   0.99 0.089 0.067
## Pop.soc.sur.illit.07 31  0.98  0.98  0.98   0.98 0.096 0.069
## Pop.soc.sur.illit.06 31  0.99  0.99  0.99   0.99 0.108 0.081
## Pop.soc.sur.illit.05 31  0.99  0.98  0.98   0.98 0.124 0.082
## Pop.soc.sur.illit.04 31  0.98  0.98  0.98   0.98 0.115 0.077
alpha(High.ed.per.100k)
## 
## Reliability analysis   
## Call: alpha(x = High.ed.per.100k)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd
##       0.99         1       1      0.98 367 0.0013 2346 987
## 
##  lower alpha upper     95% confidence boundaries
## 0.99 0.99 1 
## 
##  Reliability if an item is dropped:
##                       raw_alpha std.alpha G6(smc) average_r S/N alpha se
## High.ed.per.100k.2013      0.99         1       1      0.99 381   0.0012
## High.ed.per.100k.2012      0.99         1       1      0.98 301   0.0015
## High.ed.per.100k.2011      0.99         1       1      0.98 262   0.0018
## High.ed.per.100k.2010      0.99         1       1      0.98 272   0.0019
## High.ed.per.100k.2009      0.99         1       1      0.98 301   0.0017
## High.ed.per.100k.2008      0.99         1       1      0.99 346   0.0014
## 
##  Item statistics 
##                        n raw.r std.r r.cor r.drop mean   sd
## High.ed.per.100k.2013 31  0.99  0.99  0.99   0.98 2493  856
## High.ed.per.100k.2012 31  0.99  0.99  0.99   0.99 2420  882
## High.ed.per.100k.2011 31  1.00  1.00  1.00   1.00 2338  913
## High.ed.per.100k.2010 31  1.00  1.00  1.00   1.00 2328 1046
## High.ed.per.100k.2009 31  1.00  0.99  0.99   0.99 2280 1097
## High.ed.per.100k.2008 31  0.99  0.99  0.99   0.99 2214 1167
alpha(Med.tech.pers.per10kcap)
## 
## Reliability analysis   
## Call: alpha(x = Med.tech.pers.per10kcap)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean sd
##       0.99         1       1      0.99 364 0.0011   46 20
## 
##  lower alpha upper     95% confidence boundaries
## 0.99 0.99 1 
## 
##  Reliability if an item is dropped:
##                            raw_alpha std.alpha G6(smc) average_r S/N
## Med.tech.pers.per.10k.2011      0.99         1       1      0.99 288
## Med.tech.pers.per.10k.2010      0.99         1       1      0.98 245
## Med.tech.pers.per.10k.2009      0.99         1       1      0.98 237
## Med.tech.pers.per.10k.2008      0.99         1       1      0.98 244
## Med.tech.pers.per.10k.2004      1.00         1       1      0.99 760
##                            alpha se
## Med.tech.pers.per.10k.2011  0.00140
## Med.tech.pers.per.10k.2010  0.00166
## Med.tech.pers.per.10k.2009  0.00175
## Med.tech.pers.per.10k.2008  0.00169
## Med.tech.pers.per.10k.2004  0.00047
## 
##  Item statistics 
##                             n raw.r std.r r.cor r.drop mean sd
## Med.tech.pers.per.10k.2011 31  1.00  0.99  1.00   0.99   51 22
## Med.tech.pers.per.10k.2010 31  1.00  1.00  1.00   1.00   49 21
## Med.tech.pers.per.10k.2009 31  1.00  1.00  1.00   1.00   47 21
## Med.tech.pers.per.10k.2008 31  1.00  1.00  1.00   1.00   43 20
## Med.tech.pers.per.10k.2004 31  0.98  0.98  0.98   0.98   39 16
alpha(Internet.users.per.capita)
## 
## Reliability analysis   
## Call: alpha(x = Internet.users.per.capita)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean    sd
##       0.99      0.99       1      0.94 154 0.0021 0.26 0.095
## 
##  lower alpha upper     95% confidence boundaries
## 0.98 0.99 0.99 
## 
##  Reliability if an item is dropped:
##                   raw_alpha std.alpha G6(smc) average_r S/N alpha se
## Internet.users.13      0.98      0.99       1      0.94 138   0.0025
## Internet.users.12      0.98      0.99       1      0.94 134   0.0025
## Internet.users.11      0.98      0.99       1      0.94 130   0.0026
## Internet.users.10      0.98      0.99       1      0.94 134   0.0026
## Internet.users.09      0.98      0.99       1      0.93 126   0.0027
## Internet.users.08      0.98      0.99       1      0.93 129   0.0027
## Internet.users.07      0.98      0.99       1      0.94 137   0.0024
## Internet.users.06      0.99      0.99       1      0.94 140   0.0020
## Internet.users.05      0.99      0.99       1      0.94 152   0.0018
## Internet.users.04      0.99      0.99       1      0.95 172   0.0017
## 
##  Item statistics 
##                    n raw.r std.r r.cor r.drop  mean    sd
## Internet.users.13 31  0.98  0.97  0.97   0.98 0.461 0.114
## Internet.users.12 31  0.99  0.98  0.98   0.98 0.423 0.119
## Internet.users.11 31  0.99  0.99  0.99   0.99 0.386 0.123
## Internet.users.10 31  0.99  0.98  0.98   0.98 0.347 0.108
## Internet.users.09 31  0.99  0.99  0.99   0.99 0.294 0.114
## Internet.users.08 31  0.99  0.99  0.98   0.99 0.237 0.121
## Internet.users.07 31  0.97  0.97  0.97   0.97 0.165 0.094
## Internet.users.06 31  0.96  0.97  0.97   0.95 0.110 0.064
## Internet.users.05 31  0.94  0.95  0.95   0.93 0.090 0.061
## Internet.users.04 31  0.91  0.93  0.93   0.90 0.078 0.058
alpha(Scientific.personnel.per.cap)
## 
## Reliability analysis   
## Call: alpha(x = Scientific.personnel.per.cap)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd
##       0.98      0.98    0.99      0.86  50 0.0057  1.2 0.66
## 
##  lower alpha upper     95% confidence boundaries
## 0.97 0.98 0.99 
## 
##  Reliability if an item is dropped:
##                  raw_alpha std.alpha G6(smc) average_r S/N alpha se
## Sci.personnel.11      0.98      0.98    0.99      0.89  56   0.0051
## Sci.personnel.10      0.98      0.98    0.98      0.85  41   0.0070
## Sci.personnel.09      0.98      0.98    0.98      0.87  45   0.0066
## Sci.personnel.08      0.97      0.98    0.98      0.85  40   0.0073
## Sci.personnel.07      0.97      0.98    0.98      0.86  42   0.0070
## Sci.personnel.06      0.97      0.98    0.98      0.86  42   0.0070
## Sci.personnel.05      0.98      0.98    0.98      0.87  48   0.0062
## Sci.personnel.04      0.97      0.98    0.98      0.86  42   0.0069
## 
##  Item statistics 
##                   n raw.r std.r r.cor r.drop mean   sd
## Sci.personnel.11 31  0.87  0.87  0.84   0.83  1.3 0.77
## Sci.personnel.10 31  0.96  0.96  0.96   0.95  1.1 0.59
## Sci.personnel.09 31  0.93  0.93  0.92   0.91  1.2 0.69
## Sci.personnel.08 31  0.97  0.97  0.97   0.96  1.2 0.68
## Sci.personnel.07 31  0.95  0.95  0.95   0.94  1.2 0.76
## Sci.personnel.06 31  0.96  0.96  0.96   0.94  1.2 0.78
## Sci.personnel.05 31  0.91  0.91  0.89   0.88  1.3 0.72
## Sci.personnel.04 31  0.95  0.95  0.95   0.93  1.2 0.68
alpha(Invention.patents.per.cap)
## 
## Reliability analysis   
## Call: alpha(x = Invention.patents.per.cap)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd
##       0.97         1       1      0.96 240 0.0035  2.2 3.4
## 
##  lower alpha upper     95% confidence boundaries
## 0.96 0.97 0.98 
## 
##  Reliability if an item is dropped:
##                  raw_alpha std.alpha G6(smc) average_r S/N alpha se
## Invent.patent.13      0.97      1.00       1      0.97 258   0.0032
## Invent.patent.12      0.96      1.00       1      0.96 229   0.0044
## Invent.patent.11      0.96      1.00       1      0.96 213   0.0047
## Invent.patent.10      0.96      1.00       1      0.96 201   0.0044
## Invent.patent.09      0.96      0.99       1      0.96 197   0.0042
## Invent.patent.08      0.96      1.00       1      0.96 203   0.0040
## Invent.patent.07      0.97      1.00       1      0.96 199   0.0037
## Invent.patent.06      0.97      1.00       1      0.96 209   0.0036
## Invent.patent.05      0.97      1.00       1      0.96 219   0.0035
## Invent.patent.04      0.97      1.00       1      0.96 248   0.0034
## 
##  Item statistics 
##                   n raw.r std.r r.cor r.drop mean  sd
## Invent.patent.13 31  0.98  0.96  0.96   0.96 5.10 6.8
## Invent.patent.12 31  0.99  0.97  0.97   0.98 3.91 5.5
## Invent.patent.11 31  0.99  0.98  0.98   0.99 3.12 4.7
## Invent.patent.10 31  1.00  0.99  0.99   0.99 2.27 3.6
## Invent.patent.09 31  0.99  1.00  1.00   0.99 1.89 3.2
## Invent.patent.08 31  0.98  0.99  0.99   0.98 1.67 3.2
## Invent.patent.07 31  0.98  0.99  0.99   0.98 1.31 2.4
## Invent.patent.06 31  0.97  0.99  0.99   0.97 1.08 2.0
## Invent.patent.05 31  0.96  0.98  0.98   0.96 0.91 1.8
## Invent.patent.04 31  0.94  0.96  0.96   0.94 0.67 1.3
##New DF
data2 = cbind(data["IQ"],
              data["Ethnic.Han.pct"]/100,
              Pop.size.pct.urban.mean,
              Illit.per.cap.mean,
              High.ed.per.100k.mean,
              Med.tech.pers.per10kcap.mean,
              Internet.users.per.capita.mean,
              Invention.patents.per.cap.mean,
              Scientific.personnel.per.cap.mean,
              data["Years.of.edu"],
              data["Higher.edu.pct."],
              data["GDP.US.per.capita"],
              data["life.expect.2010"]
              )

Analyses

Main analyses

## S factor?
S.fa = fa(data2[-c(1:2)])
data2["S"] = as.numeric(S.fa$scores)

cors = wtd.cors(data2)

S.loadings = as.numeric(S.fa$loadings)
names(S.loadings) = dimnames(S.fa$loadings)[[1]]

#MCV
fa_Jensens_method(S.fa, data2, criterion = "IQ", loading_reversing = F) +
  xlim(-.7, 1.1)
## Using Pearson correlations for the criterion-indicators relationships.

silence(ggsave("MCV.png"))

#Main plots
GG_scatter(data2, "IQ", "S")

silence(ggsave("IQ_S.png"))

GG_scatter(data2[-2, ], "IQ", "S")

silence(ggsave("IQ_S_noB.png"))

GG_scatter(data2, "Ethnic.Han.pct", "IQ") +
  xlab("Percent Han") +
  scale_x_continuous(labels = scales::percent)

silence(ggsave("Han_IQ.png"))

GG_scatter(data2[-2, ], "Ethnic.Han.pct", "IQ") +
  xlab("Percent Han") +
  scale_x_continuous(labels = scales::percent)