What

This is an R markdown document for the following study:

Some authors, Kirkegaard, E. O. W., Fuerst, J., more authors. (2018?). Sex differences in intelligence: a nation-wide study of Jordanian 4th graders. Some journal.

Packages

options(digits = 2)
library(pacman)
p_load(kirkegaard, readr, dplyr, lavaan, broom)

Data

#load
#d = readxl::read_xlsx("data/Jordan dataset.xlsx")
d = haven::read_sav("data/math4thgradenew.sav")

#recode
d %<>% mutate(
  sex = g_st %>% plyr::mapvalues(from = c(1, 2), to = c("Male", "Female")) %>% factor(),
  school_sex = g_sch %>% plyr::mapvalues(from = c(1, 2, 3), to = c("Male", "Female", "Mixed"))
)

#cognitive only
cog_items = d %>% df_subset_by_pattern("^Ans")

#item content
item_content = read_csv("data/item_content.csv")

## Parsed with column specification:
## cols(
##   Item = col_integer(),
##   Content = col_character(),
##   Level = col_character()
## )

#recode bloom
#knowledge -> remember
#applying -> application
#reasoning -> analysis
item_content$Level %>% fct_relevel("knowledge")

##  [1] knowledge   knowledge   knowledge   knowledge   application
##  [6] application reasoning   application application application
## [11] application knowledge   knowledge   knowledge   application
## [16] knowledge   application knowledge   knowledge   knowledge  
## [21] knowledge   reasoning   reasoning   reasoning   application
## [26] reasoning   knowledge   reasoning   application reasoning  
## [31] reasoning   knowledge   knowledge   application reasoning  
## [36] application application reasoning   reasoning   reasoning  
## Levels: knowledge application reasoning

item_content$Level_ord = item_content$Level %>% plyr::mapvalues(c("knowledge", "application", "reasoning"), c("Bloom L1: knowledge", "Bloom L2: application", "Bloom L3: analysis")) %>% ordered(levels = c("Bloom L1: knowledge", "Bloom L2: application", "Bloom L3: analysis"))
item_content$Level_ord_alt = item_content$Level_ord %>% fct_collapse("Bloom L2/3 application/analysis" = c("Bloom L2: application", "Bloom L3: analysis"))

Theory

#pass rate non-linear
#mid
qnorm(.5)

## [1] 0

qnorm(.55)

## [1] 0.13

qnorm(.55)-qnorm(.5)

## [1] 0.13

#high
qnorm(.94)

## [1] 1.6

qnorm(.99)

## [1] 2.3

qnorm(.99)-qnorm(.94)

## [1] 0.77

Descriptive

#sex
table2(d$sex)

Scoring

#simple scoring
d$sum_score = rowSums(cog_items)

#IRT
irt_all = psych::irt.fa(cog_items)

#score
irt_all_score = scoreIrt(irt_all, cog_items)
d$irt_all = irt_all_score$theta1 %>% standardize()

#compare scoring methods
GG_scatter(d, "irt_all", "sum_score")

GG_save("figs/sum_irt_scoring.png")
d %$% cor(irt_all, sum_score) %>% print(digits = 5)

## [1] 0.99593

Ceiling problems

#how many with perfect score
table2(d$sum_score, include_NA = F) %>% 
  #mutate into int
  mutate(Group = as.integer(Group)) %>% 
  #sort
  arrange(Group) %>% 
  #subset
  .[c(1, 41), ]

Sex difference

#plot
GG_denhist(d, "irt_all", group = "sex") +
  scale_x_continuous("Mathematical ability\n(IRT score, standardized)")

## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/dist_by_sex.png")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#summary stats
describeBy(d$irt_all, group = d$sex)

## 
##  Descriptive statistics by group 
## group: Female
##    vars     n mean   sd median trimmed mad  min max range skew kurtosis
## X1    1 15946 0.06 0.98  -0.01    0.04 1.1 -2.2 2.1   4.4 0.18    -0.88
##      se
## X1 0.01
## -------------------------------------------------------- 
## group: Male
##    vars     n  mean sd median trimmed mad  min max range skew kurtosis
## X1    1 16400 -0.06  1  -0.14    -0.1 1.2 -2.2 2.1   4.4 0.24    -0.96
##      se
## X1 0.01

SMD_matrix(d$irt_all, d$sex)

##        Female Male
## Female     NA 0.12
## Male     0.12   NA

Male advantage and item params

#male advantage and difficulty
irt_params_all = data_frame(
  item = seq_along(cog_items),
  position = as.numeric(item),
  pass_rate = cog_items %>% colMeans(),
  difficulty = irt_all$irt$difficulty %>% .[[1]],
  discrimination = irt_all$irt$discrimination %>% .[, 1],
  loading = irt_all$fa$loadings %>% as.vector()
)

#boy advantage
irt_params_all$male_d = map_dbl(names(cog_items), function(item) {
  #pass rates
  b_pass = cog_items[d$sex == "Male", item] %>% unlist() %>% wtd_mean()
  g_pass = cog_items[d$sex == "Female", item] %>% unlist() %>% wtd_mean()
  
  #z score gap
  qnorm(b_pass) - qnorm(g_pass)
})

irt_params_all %>% write_clipboard()

##    Item Position Pass rate Difficulty Discrimination Loading Male d
## 1     1     1.00      0.60      -0.30           0.61    0.52   0.01
## 2     2     2.00      0.58      -0.27           0.78    0.62   0.01
## 3     3     3.00      0.77      -0.84           0.58    0.50  -0.20
## 4     4     4.00      0.52      -0.05           0.70    0.58  -0.14
## 5     5     5.00      0.48       0.06           0.66    0.55   0.00
## 6     6     6.00      0.35       0.46           0.58    0.50   0.10
## 7     7     7.00      0.48       0.07           0.55    0.48  -0.07
## 8     8     8.00      0.64      -0.47           0.83    0.64   0.17
## 9     9     9.00      0.79      -1.02           0.78    0.61  -0.21
## 10   10    10.00      0.28       0.61           0.30    0.28   0.00
## 11   11    11.00      0.40       0.29           0.48    0.43  -0.15
## 12   12    12.00      0.59      -0.26           0.52    0.46  -0.06
## 13   13    13.00      0.63      -0.43           0.86    0.65  -0.15
## 14   14    14.00      0.71      -0.76           0.93    0.68  -0.23
## 15   15    15.00      0.39       0.35           0.79    0.62  -0.01
## 16   16    16.00      0.40       0.27           0.53    0.47   0.04
## 17   17    17.00      0.49       0.04           0.82    0.64  -0.11
## 18   18    18.00      0.57      -0.22           0.88    0.66  -0.07
## 19   19    19.00      0.48       0.07           0.64    0.54  -0.11
## 20   20    20.00      0.28       0.64           0.49    0.44   0.02
## 21   21    21.00      0.55      -0.17           0.80    0.63   0.04
## 22   22    22.00      0.48       0.04           0.44    0.41  -0.04
## 23   23    23.00      0.63      -0.44           0.85    0.65  -0.04
## 24   24    24.00      0.58      -0.26           0.89    0.66  -0.15
## 25   25    25.00      0.48       0.07           0.75    0.60  -0.15
## 26   26    26.00      0.33       0.48           0.47    0.42   0.00
## 27   27    27.00      0.31       0.54           0.44    0.41   0.00
## 28   28    28.00      0.53      -0.09           0.77    0.61   0.01
## 29   29    29.00      0.45       0.14           0.63    0.53   0.11
## 30   30    30.00      0.38       0.38           0.72    0.58   0.00
## 31   31    31.00      0.52      -0.07           0.74    0.60  -0.09
## 32   32    32.00      0.67      -0.47           0.43    0.39  -0.24
## 33   33    33.00      0.61      -0.36           0.80    0.62  -0.16
## 34   34    34.00      0.68      -0.63           0.91    0.67  -0.13
## 35   35    35.00      0.67      -0.55           0.75    0.60  -0.10
## 36   36    36.00      0.63      -0.43           0.76    0.61  -0.18
## 37   37    37.00      0.73      -0.83           0.92    0.68  -0.26
## 38   38    38.00      0.67      -0.54           0.74    0.59  -0.26
## 39   39    39.00      0.38       0.37           0.60    0.52  -0.11
## 40   40    40.00      0.35       0.46           0.56    0.49  -0.05

#plots
GG_scatter(irt_params_all, "difficulty", "male_d", case_names = "item")

GG_save("figs/difficulty_male_advantage.png")

GG_scatter(irt_params_all, "pass_rate", "male_d", case_names = "item")

GG_save("figs/passrate_male_advantage.png")

GG_scatter(irt_params_all, "loading", "male_d", case_names = "item")

GG_save("figs/loadings_male_advantage.png")

#outlier stats
lm(male_d ~ difficulty, data = irt_params_all) %>% augment()

#without outlier
irt_params_all[-8, ] %>% cor_matrix(CI = .95)

## Warning in summary.lm(lm(stdz(y, weight = weight) ~ stdz(x, weight =
## weight), : essentially perfect fit: summary may be unreliable

##                item                 position            
## item           "1"                  "1.00 [1.00 1.00]"  
## position       "1.00 [1.00 1.00]"   "1"                 
## pass_rate      "0.02 [-0.30 0.33]"  "0.02 [-0.30 0.33]" 
## difficulty     "-0.02 [-0.33 0.30]" "-0.02 [-0.33 0.30]"
## discrimination "0.16 [-0.16 0.45]"  "0.16 [-0.16 0.45]" 
## loading        "0.16 [-0.17 0.45]"  "0.16 [-0.17 0.45]" 
## male_d         "-0.24 [-0.52 0.08]" "-0.24 [-0.52 0.08]"
##                pass_rate             difficulty           
## item           "0.02 [-0.30 0.33]"   "-0.02 [-0.33 0.30]" 
## position       "0.02 [-0.30 0.33]"   "-0.02 [-0.33 0.30]" 
## pass_rate      "1"                   "-1.00 [-1.00 -0.99]"
## difficulty     "-1.00 [-1.00 -0.99]" "1"                  
## discrimination "0.59 [0.34 0.76]"    "-0.60 [-0.77 -0.35]"
## loading        "0.58 [0.33 0.76]"    "-0.59 [-0.76 -0.33]"
## male_d         "-0.67 [-0.81 -0.45]" "0.68 [0.47 0.82]"   
##                discrimination        loading              
## item           "0.16 [-0.16 0.45]"   "0.16 [-0.17 0.45]"  
## position       "0.16 [-0.16 0.45]"   "0.16 [-0.17 0.45]"  
## pass_rate      "0.59 [0.34 0.76]"    "0.58 [0.33 0.76]"   
## difficulty     "-0.60 [-0.77 -0.35]" "-0.59 [-0.76 -0.33]"
## discrimination "1"                   "0.99 [0.99 1.00]"   
## loading        "0.99 [0.99 1.00]"    "1"                  
## male_d         "-0.35 [-0.60 -0.04]" "-0.33 [-0.58 -0.01]"
##                male_d               
## item           "-0.24 [-0.52 0.08]" 
## position       "-0.24 [-0.52 0.08]" 
## pass_rate      "-0.67 [-0.81 -0.45]"
## difficulty     "0.68 [0.47 0.82]"   
## discrimination "-0.35 [-0.60 -0.04]"
## loading        "-0.33 [-0.58 -0.01]"
## male_d         "1"

#models
lm(male_d ~ difficulty, data = irt_params_all)

## 
## Call:
## lm(formula = male_d ~ difficulty, data = irt_params_all)
## 
## Coefficients:
## (Intercept)   difficulty  
##     -0.0603       0.1365

lm(male_d ~ difficulty, data = irt_params_all[-8, ])

## 
## Call:
## lm(formula = male_d ~ difficulty, data = irt_params_all[-8, ])
## 
## Coefficients:
## (Intercept)   difficulty  
##     -0.0663       0.1514

#predict
lm(male_d ~ difficulty, data = irt_params_all) %>% predict(newdata = data.frame(difficulty = c(-1, 0, 1)))

##      1      2      3 
## -0.197 -0.060  0.076

lm(male_d ~ difficulty, data = irt_params_all[-8, ]) %>% predict(newdata = data.frame(difficulty = c(-1, 0, 1)))

##      1      2      3 
## -0.218 -0.066  0.085

GG_scatter(irt_params_all, "pass_rate", "difficulty") +
  ylim(-1, 1)

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

lm(pass_rate ~ difficulty, data = irt_params_all) %>% predict(newdata = data.frame(difficulty = c(-1, 0, 1)))

##    1    2    3 
## 0.81 0.49 0.18

describe(d$irt_all) %>% as.data.frame()

#new subtests
easy_items = irt_params_all %>% 
  #sort by difficulty
  arrange(difficulty) %>% 
  #extract
  .[1:20, "item"] %>% unlist()

#create tests
cog_items_easy = cog_items[, easy_items]
cog_items_hard = cog_items[, -easy_items]

#IRT
irt_easy = irt.fa(cog_items_easy)

irt_hard = irt.fa(cog_items_hard)

#score
irt_easy_score = scoreIrt(irt_easy, cog_items_easy)
irt_hard_score = scoreIrt(irt_hard, cog_items_hard)

#save
d$irt_easy = irt_easy_score$theta1 %>% standardize()
d$irt_hard = irt_hard_score$theta1 %>% standardize()

#correlation
GG_scatter(d, "irt_easy", "irt_hard")

GG_save("figs/easy_hard.png")

#differences
SMD_matrix(d$irt_easy, d$sex)

##        Female Male
## Female     NA 0.15
## Male     0.15   NA

SMD_matrix(d$irt_hard, d$sex)

##        Female  Male
## Female     NA 0.066
## Male    0.066    NA

#non-skewed test sex difference
fit_sex_interaction_model = lm(male_d ~ difficulty, data = irt_params_all[-8, ])
predict(fit_sex_interaction_model, newdata = data.frame(difficulty = 0))

##      1 
## -0.066

#random subtests
set.seed(1)
cog_items_random = map(1:5, ~sample_frac(cog_items, size = .5))

#fit IRTs
irt_random = map(cog_items_random, ~irt.fa(., plot = F))

#extract correlate
map(irt_random, ~.$irt$difficulty[[1]]) %>% as.data.frame() %>% set_colnames(1:5) %>% wtd.cors() %>% MAT_half() %>% mean() %>% print(digits = 5)

## [1] 0.99971

map(irt_random, ~.$fa$loadings %>% as.vector) %>% as.data.frame() %>% set_colnames(1:5) %>% wtd.cors() %>% MAT_half() %>% mean() %>% print(digits = 5)

## [1] 0.99692

Sex bias checks

#items by sex
cog_items_male = cog_items[d$sex == "Male", ]
cog_items_female = cog_items[d$sex != "Male", ]

#IRT by sex
irt_male = irt.fa(cog_items_male)

irt_female = irt.fa(cog_items_female)

#extract IRM params
irt_params = data_frame(
  item = rep(seq_along(cog_items), times = 2) %>% factor(),
  sex = rep(c("Male", "Female"), each = 40),
  difficulty = c(irt_male$tau, irt_female$tau),
  loading = c(irt_male$fa$loadings %>% as.vector(), irt_female$fa$loadings %>% as.vector())
)

#print
irt_params %>% print(n = Inf)

## # A tibble: 80 x 4
##    item  sex    difficulty loading
##    <fct> <chr>       <dbl>   <dbl>
##  1 1     Male     -0.257     0.525
##  2 2     Male     -0.215     0.622
##  3 3     Male     -0.631     0.524
##  4 4     Male      0.0255    0.554
##  5 5     Male      0.0532    0.545
##  6 6     Male      0.348     0.506
##  7 7     Male      0.0941    0.481
##  8 8     Male     -0.443     0.668
##  9 9     Male     -0.705     0.644
## 10 10    Male      0.586     0.270
## 11 11    Male      0.338     0.416
## 12 12    Male     -0.200     0.505
## 13 13    Male     -0.251     0.652
## 14 14    Male     -0.447     0.694
## 15 15    Male      0.280     0.623
## 16 16    Male      0.220     0.428
## 17 17    Male      0.0880    0.649
## 18 18    Male     -0.132     0.669
## 19 19    Male      0.115     0.528
## 20 20    Male      0.566     0.393
## 21 21    Male     -0.150     0.634
## 22 22    Male      0.0615    0.439
## 23 23    Male     -0.316     0.661
## 24 24    Male     -0.123     0.679
## 25 25    Male      0.134     0.592
## 26 26    Male      0.429     0.387
## 27 27    Male      0.493     0.395
## 28 28    Male     -0.0740    0.607
## 29 29    Male      0.0652    0.554
## 30 30    Male      0.309     0.601
## 31 31    Male     -0.0125    0.604
## 32 32    Male     -0.314     0.423
## 33 33    Male     -0.204     0.636
## 34 34    Male     -0.400     0.715
## 35 35    Male     -0.387     0.627
## 36 36    Male     -0.258     0.633
## 37 37    Male     -0.488     0.684
## 38 38    Male     -0.308     0.605
## 39 39    Male      0.371     0.524
## 40 40    Male      0.424     0.475
## 41 1     Female   -0.252     0.514
## 42 2     Female   -0.206     0.613
## 43 3     Female   -0.831     0.472
## 44 4     Female   -0.116     0.596
## 45 5     Female    0.0492    0.556
## 46 6     Female    0.451     0.508
## 47 7     Female    0.0270    0.476
## 48 8     Female   -0.276     0.626
## 49 9     Female   -0.913     0.567
## 50 10    Female    0.585     0.299
## 51 11    Female    0.190     0.443
## 52 12    Female   -0.260     0.410
## 53 13    Female   -0.402     0.649
## 54 14    Female   -0.676     0.661
## 55 15    Female    0.275     0.623
## 56 16    Female    0.264     0.520
## 57 17    Female   -0.0211    0.617
## 58 18    Female   -0.202     0.655
## 59 19    Female    0.00582   0.547
## 60 20    Female    0.582     0.501
## 61 21    Female   -0.111     0.626
## 62 22    Female    0.0182    0.367
## 63 23    Female   -0.355     0.633
## 64 24    Female   -0.271     0.646
## 65 25    Female   -0.0193    0.599
## 66 26    Female    0.433     0.464
## 67 27    Female    0.496     0.420
## 68 28    Female   -0.0681    0.613
## 69 29    Female    0.172     0.518
## 70 30    Female    0.311     0.567
## 71 31    Female   -0.102     0.585
## 72 32    Female   -0.551     0.349
## 73 33    Female   -0.366     0.603
## 74 34    Female   -0.531     0.620
## 75 35    Female   -0.491     0.567
## 76 36    Female   -0.434     0.569
## 77 37    Female   -0.748     0.660
## 78 38    Female   -0.565     0.575
## 79 39    Female    0.258     0.505
## 80 40    Female    0.370     0.501

#reorder item id by difficulty
irt_params$item = fct_reorder(irt_params$item, irt_params$difficulty)

#plot
ggplot(irt_params, aes(item, difficulty, color = sex)) +
  geom_point() +
  geom_smooth() +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 10, size = 7))

## `geom_smooth()` using method = 'loess'

GG_save("figs/difficulty_sex.png")

## `geom_smooth()` using method = 'loess'

#reorder item id by difficulty
irt_params$item = fct_reorder(irt_params$item, irt_params$loading)

ggplot(irt_params, aes(item, loading, color = sex)) +
  geom_point() +
  geom_smooth() +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 10, size = 7))

## `geom_smooth()` using method = 'loess'

GG_save("figs/loading_sex.png")

## `geom_smooth()` using method = 'loess'

#statistics
irt_params[c("sex", "difficulty", "item")] %>% 
  spread(sex, difficulty) %>% 
  cor_matrix(CI = .95)

##        Female             Male              
## Female "1"                "0.97 [0.95 0.99]"
## Male   "0.97 [0.95 0.99]" "1"

irt_params[c("sex", "loading", "item")] %>% 
  spread(sex, loading) %>% 
  cor_matrix(CI = .95)

##        Female             Male              
## Female "1"                "0.90 [0.82 0.95]"
## Male   "0.90 [0.82 0.95]" "1"

#related to male advantage?
irt_params_all$delta_difficulty = irt_params[c("sex", "difficulty", "item")] %>% 
  spread(sex, difficulty) %>% 
  mutate(delta = Male - Female) %>% 
  .[["delta"]]

irt_params_all$delta_loading = irt_params[c("sex", "loading", "item")] %>% 
  spread(sex, loading) %>% 
  mutate(delta = Male - Female) %>% 
  .[["delta"]]

#abs
irt_params_all$abs_delta_difficulty = abs(irt_params_all$delta_difficulty)
irt_params_all$abs_delta_loading = abs(irt_params_all$delta_loading)

#cors
cor_matrix(irt_params_all[c("male_d", "delta_difficulty", "delta_loading", "abs_delta_difficulty", "abs_delta_loading")], CI = .95) %>% write_clipboard()

##                     X
## 1                   1
## 2   0.03 [-0.28 0.34]
## 3   0.01 [-0.30 0.32]
## 4  -0.02 [-0.33 0.30]
## 5  -0.05 [-0.36 0.26]
## 6   0.03 [-0.28 0.34]
## 7                   1
## 8    0.36 [0.05 0.60]
## 9    0.74 [0.56 0.85]
## 10  0.10 [-0.22 0.40]
## 11  0.01 [-0.30 0.32]
## 12   0.36 [0.05 0.60]
## 13                  1
## 14   0.44 [0.15 0.66]
## 15  0.12 [-0.20 0.41]
## 16 -0.02 [-0.33 0.30]
## 17   0.74 [0.56 0.85]
## 18   0.44 [0.15 0.66]
## 19                  1
## 20  0.11 [-0.21 0.41]
## 21 -0.05 [-0.36 0.26]
## 22  0.10 [-0.22 0.40]
## 23  0.12 [-0.20 0.41]
## 24  0.11 [-0.21 0.41]
## 25                  1

#descriptives
describeBy(irt_params$difficulty, group = irt_params$sex)

## 
##  Descriptive statistics by group 
## group: Female
##    vars  n  mean   sd median trimmed  mad   min  max range  skew kurtosis
## X1    1 40 -0.11 0.39  -0.11    -0.1 0.44 -0.91 0.58   1.5 -0.07    -0.88
##      se
## X1 0.06
## -------------------------------------------------------- 
## group: Male
##    vars  n  mean   sd median trimmed  mad  min  max range skew kurtosis
## X1    1 40 -0.03 0.34  -0.04   -0.03 0.39 -0.7 0.59   1.3 0.07    -0.97
##      se
## X1 0.05

describeBy(irt_params$loading, group = irt_params$sex)

## 
##  Descriptive statistics by group 
## group: Female
##    vars  n mean   sd median trimmed  mad min  max range  skew kurtosis
## X1    1 40 0.55 0.09   0.57    0.56 0.09 0.3 0.66  0.36 -0.85    -0.02
##      se
## X1 0.01
## -------------------------------------------------------- 
## group: Male
##    vars  n mean   sd median trimmed  mad  min  max range  skew kurtosis
## X1    1 40 0.56 0.11    0.6    0.57 0.11 0.27 0.72  0.44 -0.67    -0.38
##      se
## X1 0.02

#plots
GG_scatter(irt_params_all, "delta_difficulty", "male_d")

GG_scatter(irt_params_all, "delta_loading", "male_d")

GG_scatter(irt_params_all, "abs_delta_difficulty", "male_d")

GG_scatter(irt_params_all, "abs_delta_loading", "male_d")

Item content and sex differences

#tables of item classification
table2(item_content$Content) %>% write_clipboard()

##           Group Count Percent
## 1        number 23.00   57.50
## 2      geometry 12.00   30.00
## 3 data analysis  5.00   12.50
## 4                0.00    0.00

table2(item_content$Level) %>% write_clipboard()

##         Group Count Percent
## 1   knowledge 15.00   37.50
## 2 application 13.00   32.50
## 3   reasoning 12.00   30.00
## 4              0.00    0.00

#merge item data
irt_params_all$Content = item_content$Content %>% factor() %>% fct_relevel("number")
irt_params_all$Level = item_content$Level
irt_params_all$Level_ord = item_content$Level_ord
irt_params_all$Level_ord_alt = item_content$Level_ord_alt

#model
lm(male_d ~ difficulty, data = irt_params_all) %>% augment()

item_content_model = lm(male_d ~ position + difficulty + loading + Content + Level, data = irt_params_all)
item_content_model %>% summary()

## 
## Call:
## lm(formula = male_d ~ position + difficulty + loading + Content + 
##     Level, data = irt_params_all)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.14450 -0.04288 -0.00971  0.03738  0.24271 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -0.11355    0.09965   -1.14  0.26298    
## position             -0.00214    0.00168   -1.27  0.21171    
## difficulty            0.15719    0.04010    3.92  0.00044 ***
## loading               0.20098    0.17645    1.14  0.26314    
## Contentdata analysis -0.06273    0.05848   -1.07  0.29149    
## Contentgeometry       0.01094    0.03603    0.30  0.76343    
## Levelknowledge       -0.02030    0.03280   -0.62  0.54040    
## Levelreasoning        0.00110    0.03625    0.03  0.97595    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.082 on 32 degrees of freedom
## Multiple R-squared:  0.482,  Adjusted R-squared:  0.369 
## F-statistic: 4.26 on 7 and 32 DF,  p-value: 0.00201

#level effect in particular
lm(male_d ~ Level, data = irt_params_all) %>% summary()

## 
## Call:
## lm(formula = male_d ~ Level, data = irt_params_all)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.1969 -0.0751  0.0104  0.0799  0.2297 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     -0.0630     0.0294   -2.14    0.039 *
## Levelknowledge  -0.0200     0.0402   -0.50    0.622  
## Levelreasoning  -0.0123     0.0425   -0.29    0.774  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.11 on 37 degrees of freedom
## Multiple R-squared:  0.0067, Adjusted R-squared:  -0.047 
## F-statistic: 0.125 on 2 and 37 DF,  p-value: 0.883

lm(male_d ~ position + loading + Content + Level, data = irt_params_all) %>% summary()

## 
## Call:
## lm(formula = male_d ~ position + loading + Content + Level, data = irt_params_all)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.16797 -0.05696 -0.00602  0.06498  0.21167 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           0.103313   0.099309    1.04     0.31  
## position             -0.000957   0.001982   -0.48     0.63  
## loading              -0.220672   0.167585   -1.32     0.20  
## Contentdata analysis -0.118808   0.067941   -1.75     0.09 .
## Contentgeometry      -0.023247   0.041886   -0.56     0.58  
## Levelknowledge       -0.046496   0.038478   -1.21     0.24  
## Levelreasoning        0.011549   0.043312    0.27     0.79  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.099 on 33 degrees of freedom
## Multiple R-squared:  0.234,  Adjusted R-squared:  0.0944 
## F-statistic: 1.68 on 6 and 33 DF,  p-value: 0.158

GG_denhist(irt_params_all, "difficulty", "Level")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

polycor::hetcor(data.frame(irt_params_all$Level_ord, irt_params_all$difficulty))

## 
## Two-Step Estimates
## 
## Correlations/Type of Correlation:
##                           irt_params_all.Level_ord
## irt_params_all.Level_ord                         1
## irt_params_all.difficulty                    0.167
##                           irt_params_all.difficulty
## irt_params_all.Level_ord                 Polyserial
## irt_params_all.difficulty                         1
## 
## Standard Errors:
##  irt_params_all.Level_ord irt_params_all.difficulty 
##                                               0.171 
## Levels:  0.171
## 
## n = 40 
## 
## P-values for Tests of Bivariate Normality:
##  irt_params_all.Level_ord irt_params_all.difficulty 
##                                               0.176 
## Levels:  0.176

polycor::hetcor(data.frame(irt_params_all$Level_ord_alt, irt_params_all$difficulty))

## 
## Two-Step Estimates
## 
## Correlations/Type of Correlation:
##                              irt_params_all.Level_ord_alt
## irt_params_all.Level_ord_alt                            1
## irt_params_all.difficulty                           0.165
##                              irt_params_all.difficulty
## irt_params_all.Level_ord_alt                Polyserial
## irt_params_all.difficulty                            1
## 
## Standard Errors:
## irt_params_all.Level_ord_alt    irt_params_all.difficulty 
##                                                     0.197 
## Levels:  0.197
## 
## n = 40 
## 
## P-values for Tests of Bivariate Normality:
## irt_params_all.Level_ord_alt    irt_params_all.difficulty 
##                                                    0.0806 
## Levels:  0.0806

#nicer output
item_content_model %>% MOD_summary(standardize = F, kfold = F)

## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.

## 
##     ---- Model summary ----    
## Model coefficients
##                           Beta     SE CI_lower CI_upper
## position               -0.0021 0.0017  -0.0056   0.0013
## difficulty              0.1572 0.0401   0.0755   0.2389
## loading                 0.2010 0.1764  -0.1584   0.5604
## Content: number         0.0000     NA       NA       NA
## Content: data analysis -0.0627 0.0585  -0.1819   0.0564
## Content: geometry       0.0109 0.0360  -0.0625   0.0843
## Level: application      0.0000     NA       NA       NA
## Level: knowledge       -0.0203 0.0328  -0.0871   0.0465
## Level: reasoning        0.0011 0.0362  -0.0727   0.0749
## 
## 
## Model meta-data
##   outcome  N df   R2 R2-adj. R2-cv
## 1  male_d 40 32 0.48    0.37    NA
## 
## 
## Etas from analysis of variance
##              Eta Eta_partial
## position   0.162        0.22
## difficulty 0.499        0.57
## loading    0.145        0.20
## Content    0.191        0.26
## Level      0.087        0.12

item_content_model %>% MOD_summary(standardize = F, kfold = F) %>% .[[1]] %>% write_clipboard()

## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.

##                         Beta   SE CI lower CI upper
## Position                0.00 0.00    -0.01     0.00
## Difficulty              0.16 0.04     0.08     0.24
## Loading                 0.20 0.18    -0.16     0.56
## Content: number         0.00                       
## Content: data analysis -0.06 0.06    -0.18     0.06
## Content: geometry       0.01 0.04    -0.06     0.08
## Level: application      0.00                       
## Level: knowledge       -0.02 0.03    -0.09     0.05
## Level: reasoning        0.00 0.04    -0.07     0.07

item_content_model %>% MOD_summary(standardize = T, kfold = F)

## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.

## 
##     ---- Model summary ----    
## Model coefficients
##                          Beta   SE CI_lower CI_upper
## position               -0.242 0.19    -0.63     0.14
## difficulty              0.661 0.17     0.32     1.00
## loading                 0.186 0.16    -0.15     0.52
## Content: number         0.000   NA       NA       NA
## Content: data analysis -0.605 0.56    -1.75     0.54
## Content: geometry       0.106 0.35    -0.60     0.81
## Level: application      0.000   NA       NA       NA
## Level: knowledge       -0.196 0.32    -0.84     0.45
## Level: reasoning        0.011 0.35    -0.70     0.72
## 
## 
## Model meta-data
##   outcome  N df   R2 R2-adj. R2-cv
## 1  male_d 40 32 0.48    0.37    NA
## 
## 
## Etas from analysis of variance
##              Eta Eta_partial
## position   0.162        0.22
## difficulty 0.499        0.57
## loading    0.145        0.20
## Content    0.191        0.26
## Level      0.087        0.12

#intercept CI
item_content_model %>% summary() %>% 
  .[["coefficients"]] %>% 
  (function(x) {
    #convert to sensible type
    x = as.data.frame(x)

    #calc CI
    x$Estimate[1] + (x$`Std. Error`[1] * c(-1.96, 1.96))
  })

## [1] -0.309  0.082

School type

#sum stats
d %>% plyr::ddply(c("sex", "school_sex"), function(x) {
  data_frame(
    mean = wtd_mean(x$irt_all),
    sd = wtd_sd(x$irt_all),
    n = nrow(x)
  )
})

#irt by school type x sex
school_sex = list(
  male_alone = cog_items %>% filter(d$school_sex == "Male", d$sex == "Male"),
  female_alone = cog_items %>% filter(d$school_sex == "Female", d$sex == "Female"),
  male_together = cog_items %>% filter(d$school_sex == "Mixed", d$sex == "Male"),
  female_together = cog_items %>% filter(d$school_sex == "Mixed", d$sex == "Female"),
  together = cog_items %>% filter(d$school_sex == "Mixed")
)

#samples
map_int(school_sex, nrow)

##      male_alone    female_alone   male_together female_together 
##            9822            5536            6531           10408 
##        together 
##           16939

#function
irt_function = function(cog_items) {
  #fit irt
  irt_fit = irt.fa(cog_items)

  #get irt params
  data_frame(
    item = seq_along(cog_items),
    position = as.numeric(item),
    pass_rate = cog_items %>% colMeans(),
    difficulty = irt_fit$irt$difficulty %>% .[[1]],
    discrimination = irt_fit$irt$discrimination %>% .[, 1],
    loading = irt_fit$fa$loadings %>% as.vector(),
    fit = list(irt_fit)
  )
}

#fit IRTs
school_sex_irt = map(school_sex, irt_function) %>% bind_rows(.id = "subsample")

#examine correlations
#difficulty
school_sex_irt %>% 
  select(item, subsample, difficulty) %>% 
  spread(subsample, difficulty) %>% 
  .[-1] %>% 
  wtd.cors()

##                 female_alone female_together male_alone male_together
## female_alone            1.00            1.00       0.96          0.96
## female_together         1.00            1.00       0.95          0.97
## male_alone              0.96            0.95       1.00          0.96
## male_together           0.96            0.97       0.96          1.00
## together                0.99            1.00       0.96          0.99
##                 together
## female_alone        0.99
## female_together     1.00
## male_alone          0.96
## male_together       0.99
## together            1.00

#loading
school_sex_irt %>% 
  select(item, subsample, loading) %>% 
  spread(subsample, loading) %>% 
  .[-1] %>% 
  wtd.cors()

##                 female_alone female_together male_alone male_together
## female_alone            1.00            0.96       0.81          0.90
## female_together         0.96            1.00       0.83          0.96
## male_alone              0.81            0.83       1.00          0.83
## male_together           0.90            0.96       0.83          1.00
## together                0.94            0.99       0.83          0.98
##                 together
## female_alone        0.94
## female_together     0.99
## male_alone          0.83
## male_together       0.98
## together            1.00

#regression for mixed school
#derive male advantage
#boy advantage
together = school_sex_irt %>% filter(subsample == "together")
together$male_d = map_dbl(names(cog_items), function(item) {
  #pass rates
  b_pass = cog_items[d$sex == "Male" & d$school_sex == "Mixed", item] %>% unlist() %>% wtd_mean()
  g_pass = cog_items[d$sex == "Female" & d$school_sex == "Mixed", item] %>% unlist() %>% wtd_mean()
  
  #z score gap
  qnorm(b_pass) - qnorm(g_pass)
})

#content
together$position = irt_params_all$position
together$Content = irt_params_all$Content
together$Level = irt_params_all$Level

#cors
together %>% select(position, difficulty, loading, male_d) %>% wtd.cors()

##            position difficulty loading male_d
## position      1.000     -0.033    0.12 -0.152
## difficulty   -0.033      1.000   -0.46 -0.016
## loading       0.119     -0.463    1.00  0.486
## male_d       -0.152     -0.016    0.49  1.000

#model
lm(male_d ~ position + difficulty + loading + Content + Level, data = together) %>% MOD_summary(standardize = F)

## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.

## 
##     ---- Model summary ----    
## Model coefficients
##                           Beta     SE CI_lower CI_upper
## position               -0.0021 0.0016  -0.0053   0.0012
## difficulty              0.0494 0.0314  -0.0146   0.1134
## loading                 0.5873 0.1520   0.2777   0.8968
## Content: number         0.0000     NA       NA       NA
## Content: data analysis -0.0097 0.0556  -0.1231   0.1036
## Content: geometry       0.0242 0.0344  -0.0459   0.0943
## Level: application      0.0000     NA       NA       NA
## Level: knowledge       -0.0164 0.0311  -0.0797   0.0469
## Level: reasoning        0.0047 0.0343  -0.0651   0.0745
## 
## 
## Model meta-data
##   outcome  N df   R2 R2-adj. R2-cv
## 1  male_d 40 32 0.37    0.23    NA
## 
## 
## Etas from analysis of variance
##             Eta Eta_partial
## position   0.18        0.22
## difficulty 0.22        0.27
## loading    0.54        0.56
## Content    0.14        0.17
## Level      0.09        0.11

lm(male_d ~ difficulty + loading, data = together)  %>% MOD_summary(standardize = F)

## 
##     ---- Model summary ----    
## Model coefficients
##             Beta    SE CI_lower CI_upper
## difficulty 0.049 0.029  -0.0092     0.11
## loading    0.564 0.144   0.2712     0.86
## 
## 
## Model meta-data
##   outcome  N df   R2 R2-adj. R2-cv
## 1  male_d 40 37 0.29    0.25    NA
## 
## 
## Etas from analysis of variance
##             Eta Eta_partial
## difficulty 0.24        0.27
## loading    0.54        0.54

#plot
together %>% GG_scatter("loading", "male_d")

together %>% GG_scatter("difficulty", "male_d")

#together, but split by sex
together_male = irt.fa(school_sex$male_together)

together_female = irt.fa(school_sex$female_together)

#join
together_item_pars = data_frame(
  male_d = together$male_d,
  male_loading = together_male$fa$loadings %>% .[, 1],
  female_loading = together_female$fa$loadings %>% .[, 1],
  male_difficulty = together_male$irt$difficulty[[1]],
  female_difficulty = together_female$irt$difficulty[[1]],
  male_difficulty_alone = together_male$irt$difficulty[[1]],
  female_difficulty_alone = together_female$irt$difficulty[[1]],
  
  #diffs
  d_loading = male_loading - female_loading,
  d_diff = male_difficulty - female_difficulty,
  
  #abs
  abs_d_loading = abs(d_loading),
  abs_d_diff = abs(d_diff)
)

wtd.cors(together_item_pars)

##                          male_d male_loading female_loading
## male_d                   1.0000        0.463         0.4513
## male_loading             0.4628        1.000         0.9573
## female_loading           0.4513        0.957         1.0000
## male_difficulty         -0.1581       -0.554        -0.5314
## female_difficulty        0.0734       -0.437        -0.4206
## male_difficulty_alone   -0.1581       -0.554        -0.5314
## female_difficulty_alone  0.0734       -0.437        -0.4206
## d_loading                0.1053        0.287        -0.0025
## d_diff                  -0.9727       -0.594        -0.5620
## abs_d_loading            0.0084        0.059        -0.0901
## abs_d_diff               0.9693        0.577         0.5465
##                         male_difficulty female_difficulty
## male_d                           -0.158             0.073
## male_loading                     -0.554            -0.437
## female_loading                   -0.531            -0.421
## male_difficulty                   1.000             0.972
## female_difficulty                 0.972             1.000
## male_difficulty_alone             1.000             0.972
## female_difficulty_alone           0.972             1.000
## d_loading                        -0.156            -0.117
## d_diff                            0.323             0.094
## abs_d_loading                    -0.083            -0.067
## abs_d_diff                       -0.333            -0.105
##                         male_difficulty_alone female_difficulty_alone
## male_d                                 -0.158                   0.073
## male_loading                           -0.554                  -0.437
## female_loading                         -0.531                  -0.421
## male_difficulty                         1.000                   0.972
## female_difficulty                       0.972                   1.000
## male_difficulty_alone                   1.000                   0.972
## female_difficulty_alone                 0.972                   1.000
## d_loading                              -0.156                  -0.117
## d_diff                                  0.323                   0.094
## abs_d_loading                          -0.083                  -0.067
## abs_d_diff                             -0.333                  -0.105
##                         d_loading d_diff abs_d_loading abs_d_diff
## male_d                     0.1053 -0.973        0.0084      0.969
## male_loading               0.2867 -0.594        0.0588      0.577
## female_loading            -0.0025 -0.562       -0.0901      0.547
## male_difficulty           -0.1562  0.323       -0.0831     -0.333
## female_difficulty         -0.1167  0.094       -0.0674     -0.105
## male_difficulty_alone     -0.1562  0.323       -0.0831     -0.333
## female_difficulty_alone   -0.1167  0.094       -0.0674     -0.105
## d_loading                  1.0000 -0.193        0.5021      0.185
## d_diff                    -0.1931  1.000       -0.0813     -0.998
## abs_d_loading              0.5021 -0.081        1.0000      0.075
## abs_d_diff                 0.1854 -0.998        0.0753      1.000

Versions

sessionInfo()

## R version 3.4.3 (2017-11-30)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 18.2
## 
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] bindrcpp_0.2       broom_0.4.3        lavaan_0.5-23.1097
##  [4] kirkegaard_2018.03 psych_1.7.8        magrittr_1.5      
##  [7] assertthat_0.2.0   weights_0.85       mice_2.46.0       
## [10] gdata_2.18.0       Hmisc_4.1-1        Formula_1.2-2     
## [13] survival_2.41-3    lattice_0.20-35    forcats_0.3.0     
## [16] stringr_1.3.0      dplyr_0.7.4        purrr_0.2.4       
## [19] readr_1.1.1        tidyr_0.8.0        tibble_1.4.2      
## [22] ggplot2_2.2.1      tidyverse_1.2.1    pacman_0.4.6      
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.3.1          jsonlite_1.5        splines_3.4.3      
##  [4] modelr_0.1.1        gtools_3.5.0        multilevel_2.6     
##  [7] stats4_3.4.3        latticeExtra_0.6-28 cellranger_1.1.0   
## [10] yaml_2.1.16         pbivnorm_0.6.0      pillar_1.2.1       
## [13] backports_1.1.2     glue_1.2.0          quadprog_1.5-5     
## [16] digest_0.6.15       RColorBrewer_1.1-2  checkmate_1.8.5    
## [19] rvest_0.3.2         colorspace_1.3-2    htmltools_0.3.6    
## [22] Matrix_1.2-12       plyr_1.8.4          pkgconfig_2.0.1    
## [25] curry_0.1.1         haven_1.1.1.9000    mvtnorm_1.0-7      
## [28] scales_0.5.0        htmlTable_1.11.2    lsr_0.5            
## [31] nnet_7.3-12         lazyeval_0.2.1      cli_1.0.0          
## [34] mnormt_1.5-5        crayon_1.3.4        readxl_1.0.0       
## [37] evaluate_0.10.1     nlme_3.1-131.1      MASS_7.3-48        
## [40] xml2_1.2.0          foreign_0.8-69      tools_3.4.3        
## [43] data.table_1.10.4-3 hms_0.4.1           munsell_0.4.3      
## [46] cluster_2.0.6       compiler_3.4.3      rlang_0.2.0        
## [49] polycor_0.7-9       grid_3.4.3          rstudioapi_0.7     
## [52] htmlwidgets_1.0     labeling_0.3        base64enc_0.1-3    
## [55] rmarkdown_1.8       gtable_0.2.0        reshape2_1.4.3     
## [58] R6_2.2.2            gridExtra_2.3       lubridate_1.7.2    
## [61] psychometric_2.2    knitr_1.20          utf8_1.1.3         
## [64] bindr_0.1           rprojroot_1.3-2     stringi_1.1.7      
## [67] parallel_3.4.3      Rcpp_0.12.16        rpart_4.1-12       
## [70] acepack_1.4.1       tidyselect_0.2.4

Sex differences in intelligence: a nation-wide study of Jordanian 4th graders

What

Packages

Data

Theory

Descriptive

Scoring

Ceiling problems

Sex difference

Male advantage and item params

Sex bias checks

Item content and sex differences

School type

Versions