This is an R markdown document for the following study:
options(digits = 2)
library(pacman)
p_load(kirkegaard, readr, dplyr, lavaan, broom)
#load
#d = readxl::read_xlsx("data/Jordan dataset.xlsx")
d = haven::read_sav("data/math4thgradenew.sav")
#recode
d %<>% mutate(
sex = g_st %>% plyr::mapvalues(from = c(1, 2), to = c("Male", "Female")) %>% factor(),
school_sex = g_sch %>% plyr::mapvalues(from = c(1, 2, 3), to = c("Male", "Female", "Mixed"))
)
#cognitive only
cog_items = d %>% df_subset_by_pattern("^Ans")
#item content
item_content = read_csv("data/item_content.csv")
## Parsed with column specification:
## cols(
## Item = col_integer(),
## Content = col_character(),
## Level = col_character()
## )
#recode bloom
#knowledge -> remember
#applying -> application
#reasoning -> analysis
item_content$Level %>% fct_relevel("knowledge")
## [1] knowledge knowledge knowledge knowledge application
## [6] application reasoning application application application
## [11] application knowledge knowledge knowledge application
## [16] knowledge application knowledge knowledge knowledge
## [21] knowledge reasoning reasoning reasoning application
## [26] reasoning knowledge reasoning application reasoning
## [31] reasoning knowledge knowledge application reasoning
## [36] application application reasoning reasoning reasoning
## Levels: knowledge application reasoning
item_content$Level_ord = item_content$Level %>% plyr::mapvalues(c("knowledge", "application", "reasoning"), c("Bloom L1: knowledge", "Bloom L2: application", "Bloom L3: analysis")) %>% ordered(levels = c("Bloom L1: knowledge", "Bloom L2: application", "Bloom L3: analysis"))
item_content$Level_ord_alt = item_content$Level_ord %>% fct_collapse("Bloom L2/3 application/analysis" = c("Bloom L2: application", "Bloom L3: analysis"))
#pass rate non-linear
#mid
qnorm(.5)
## [1] 0
qnorm(.55)
## [1] 0.13
qnorm(.55)-qnorm(.5)
## [1] 0.13
#high
qnorm(.94)
## [1] 1.6
qnorm(.99)
## [1] 2.3
qnorm(.99)-qnorm(.94)
## [1] 0.77
#sex
table2(d$sex)
#simple scoring
d$sum_score = rowSums(cog_items)
#IRT
irt_all = psych::irt.fa(cog_items)
#score
irt_all_score = scoreIrt(irt_all, cog_items)
d$irt_all = irt_all_score$theta1 %>% standardize()
#compare scoring methods
GG_scatter(d, "irt_all", "sum_score")
GG_save("figs/sum_irt_scoring.png")
d %$% cor(irt_all, sum_score) %>% print(digits = 5)
## [1] 0.99593
#how many with perfect score
table2(d$sum_score, include_NA = F) %>%
#mutate into int
mutate(Group = as.integer(Group)) %>%
#sort
arrange(Group) %>%
#subset
.[c(1, 41), ]
#plot
GG_denhist(d, "irt_all", group = "sex") +
scale_x_continuous("Mathematical ability\n(IRT score, standardized)")
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
GG_save("figs/dist_by_sex.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#summary stats
describeBy(d$irt_all, group = d$sex)
##
## Descriptive statistics by group
## group: Female
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 15946 0.06 0.98 -0.01 0.04 1.1 -2.2 2.1 4.4 0.18 -0.88
## se
## X1 0.01
## --------------------------------------------------------
## group: Male
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 16400 -0.06 1 -0.14 -0.1 1.2 -2.2 2.1 4.4 0.24 -0.96
## se
## X1 0.01
SMD_matrix(d$irt_all, d$sex)
## Female Male
## Female NA 0.12
## Male 0.12 NA
#male advantage and difficulty
irt_params_all = data_frame(
item = seq_along(cog_items),
position = as.numeric(item),
pass_rate = cog_items %>% colMeans(),
difficulty = irt_all$irt$difficulty %>% .[[1]],
discrimination = irt_all$irt$discrimination %>% .[, 1],
loading = irt_all$fa$loadings %>% as.vector()
)
#boy advantage
irt_params_all$male_d = map_dbl(names(cog_items), function(item) {
#pass rates
b_pass = cog_items[d$sex == "Male", item] %>% unlist() %>% wtd_mean()
g_pass = cog_items[d$sex == "Female", item] %>% unlist() %>% wtd_mean()
#z score gap
qnorm(b_pass) - qnorm(g_pass)
})
irt_params_all %>% write_clipboard()
## Item Position Pass rate Difficulty Discrimination Loading Male d
## 1 1 1.00 0.60 -0.30 0.61 0.52 0.01
## 2 2 2.00 0.58 -0.27 0.78 0.62 0.01
## 3 3 3.00 0.77 -0.84 0.58 0.50 -0.20
## 4 4 4.00 0.52 -0.05 0.70 0.58 -0.14
## 5 5 5.00 0.48 0.06 0.66 0.55 0.00
## 6 6 6.00 0.35 0.46 0.58 0.50 0.10
## 7 7 7.00 0.48 0.07 0.55 0.48 -0.07
## 8 8 8.00 0.64 -0.47 0.83 0.64 0.17
## 9 9 9.00 0.79 -1.02 0.78 0.61 -0.21
## 10 10 10.00 0.28 0.61 0.30 0.28 0.00
## 11 11 11.00 0.40 0.29 0.48 0.43 -0.15
## 12 12 12.00 0.59 -0.26 0.52 0.46 -0.06
## 13 13 13.00 0.63 -0.43 0.86 0.65 -0.15
## 14 14 14.00 0.71 -0.76 0.93 0.68 -0.23
## 15 15 15.00 0.39 0.35 0.79 0.62 -0.01
## 16 16 16.00 0.40 0.27 0.53 0.47 0.04
## 17 17 17.00 0.49 0.04 0.82 0.64 -0.11
## 18 18 18.00 0.57 -0.22 0.88 0.66 -0.07
## 19 19 19.00 0.48 0.07 0.64 0.54 -0.11
## 20 20 20.00 0.28 0.64 0.49 0.44 0.02
## 21 21 21.00 0.55 -0.17 0.80 0.63 0.04
## 22 22 22.00 0.48 0.04 0.44 0.41 -0.04
## 23 23 23.00 0.63 -0.44 0.85 0.65 -0.04
## 24 24 24.00 0.58 -0.26 0.89 0.66 -0.15
## 25 25 25.00 0.48 0.07 0.75 0.60 -0.15
## 26 26 26.00 0.33 0.48 0.47 0.42 0.00
## 27 27 27.00 0.31 0.54 0.44 0.41 0.00
## 28 28 28.00 0.53 -0.09 0.77 0.61 0.01
## 29 29 29.00 0.45 0.14 0.63 0.53 0.11
## 30 30 30.00 0.38 0.38 0.72 0.58 0.00
## 31 31 31.00 0.52 -0.07 0.74 0.60 -0.09
## 32 32 32.00 0.67 -0.47 0.43 0.39 -0.24
## 33 33 33.00 0.61 -0.36 0.80 0.62 -0.16
## 34 34 34.00 0.68 -0.63 0.91 0.67 -0.13
## 35 35 35.00 0.67 -0.55 0.75 0.60 -0.10
## 36 36 36.00 0.63 -0.43 0.76 0.61 -0.18
## 37 37 37.00 0.73 -0.83 0.92 0.68 -0.26
## 38 38 38.00 0.67 -0.54 0.74 0.59 -0.26
## 39 39 39.00 0.38 0.37 0.60 0.52 -0.11
## 40 40 40.00 0.35 0.46 0.56 0.49 -0.05
#plots
GG_scatter(irt_params_all, "difficulty", "male_d", case_names = "item")
GG_save("figs/difficulty_male_advantage.png")
GG_scatter(irt_params_all, "pass_rate", "male_d", case_names = "item")
GG_save("figs/passrate_male_advantage.png")
GG_scatter(irt_params_all, "loading", "male_d", case_names = "item")
GG_save("figs/loadings_male_advantage.png")
#outlier stats
lm(male_d ~ difficulty, data = irt_params_all) %>% augment()
#without outlier
irt_params_all[-8, ] %>% cor_matrix(CI = .95)
## Warning in summary.lm(lm(stdz(y, weight = weight) ~ stdz(x, weight =
## weight), : essentially perfect fit: summary may be unreliable
## item position
## item "1" "1.00 [1.00 1.00]"
## position "1.00 [1.00 1.00]" "1"
## pass_rate "0.02 [-0.30 0.33]" "0.02 [-0.30 0.33]"
## difficulty "-0.02 [-0.33 0.30]" "-0.02 [-0.33 0.30]"
## discrimination "0.16 [-0.16 0.45]" "0.16 [-0.16 0.45]"
## loading "0.16 [-0.17 0.45]" "0.16 [-0.17 0.45]"
## male_d "-0.24 [-0.52 0.08]" "-0.24 [-0.52 0.08]"
## pass_rate difficulty
## item "0.02 [-0.30 0.33]" "-0.02 [-0.33 0.30]"
## position "0.02 [-0.30 0.33]" "-0.02 [-0.33 0.30]"
## pass_rate "1" "-1.00 [-1.00 -0.99]"
## difficulty "-1.00 [-1.00 -0.99]" "1"
## discrimination "0.59 [0.34 0.76]" "-0.60 [-0.77 -0.35]"
## loading "0.58 [0.33 0.76]" "-0.59 [-0.76 -0.33]"
## male_d "-0.67 [-0.81 -0.45]" "0.68 [0.47 0.82]"
## discrimination loading
## item "0.16 [-0.16 0.45]" "0.16 [-0.17 0.45]"
## position "0.16 [-0.16 0.45]" "0.16 [-0.17 0.45]"
## pass_rate "0.59 [0.34 0.76]" "0.58 [0.33 0.76]"
## difficulty "-0.60 [-0.77 -0.35]" "-0.59 [-0.76 -0.33]"
## discrimination "1" "0.99 [0.99 1.00]"
## loading "0.99 [0.99 1.00]" "1"
## male_d "-0.35 [-0.60 -0.04]" "-0.33 [-0.58 -0.01]"
## male_d
## item "-0.24 [-0.52 0.08]"
## position "-0.24 [-0.52 0.08]"
## pass_rate "-0.67 [-0.81 -0.45]"
## difficulty "0.68 [0.47 0.82]"
## discrimination "-0.35 [-0.60 -0.04]"
## loading "-0.33 [-0.58 -0.01]"
## male_d "1"
#models
lm(male_d ~ difficulty, data = irt_params_all)
##
## Call:
## lm(formula = male_d ~ difficulty, data = irt_params_all)
##
## Coefficients:
## (Intercept) difficulty
## -0.0603 0.1365
lm(male_d ~ difficulty, data = irt_params_all[-8, ])
##
## Call:
## lm(formula = male_d ~ difficulty, data = irt_params_all[-8, ])
##
## Coefficients:
## (Intercept) difficulty
## -0.0663 0.1514
#predict
lm(male_d ~ difficulty, data = irt_params_all) %>% predict(newdata = data.frame(difficulty = c(-1, 0, 1)))
## 1 2 3
## -0.197 -0.060 0.076
lm(male_d ~ difficulty, data = irt_params_all[-8, ]) %>% predict(newdata = data.frame(difficulty = c(-1, 0, 1)))
## 1 2 3
## -0.218 -0.066 0.085
GG_scatter(irt_params_all, "pass_rate", "difficulty") +
ylim(-1, 1)
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
lm(pass_rate ~ difficulty, data = irt_params_all) %>% predict(newdata = data.frame(difficulty = c(-1, 0, 1)))
## 1 2 3
## 0.81 0.49 0.18
describe(d$irt_all) %>% as.data.frame()
#new subtests
easy_items = irt_params_all %>%
#sort by difficulty
arrange(difficulty) %>%
#extract
.[1:20, "item"] %>% unlist()
#create tests
cog_items_easy = cog_items[, easy_items]
cog_items_hard = cog_items[, -easy_items]
#IRT
irt_easy = irt.fa(cog_items_easy)
irt_hard = irt.fa(cog_items_hard)
#score
irt_easy_score = scoreIrt(irt_easy, cog_items_easy)
irt_hard_score = scoreIrt(irt_hard, cog_items_hard)
#save
d$irt_easy = irt_easy_score$theta1 %>% standardize()
d$irt_hard = irt_hard_score$theta1 %>% standardize()
#correlation
GG_scatter(d, "irt_easy", "irt_hard")
GG_save("figs/easy_hard.png")
#differences
SMD_matrix(d$irt_easy, d$sex)
## Female Male
## Female NA 0.15
## Male 0.15 NA
SMD_matrix(d$irt_hard, d$sex)
## Female Male
## Female NA 0.066
## Male 0.066 NA
#non-skewed test sex difference
fit_sex_interaction_model = lm(male_d ~ difficulty, data = irt_params_all[-8, ])
predict(fit_sex_interaction_model, newdata = data.frame(difficulty = 0))
## 1
## -0.066
#random subtests
set.seed(1)
cog_items_random = map(1:5, ~sample_frac(cog_items, size = .5))
#fit IRTs
irt_random = map(cog_items_random, ~irt.fa(., plot = F))
#extract correlate
map(irt_random, ~.$irt$difficulty[[1]]) %>% as.data.frame() %>% set_colnames(1:5) %>% wtd.cors() %>% MAT_half() %>% mean() %>% print(digits = 5)
## [1] 0.99971
map(irt_random, ~.$fa$loadings %>% as.vector) %>% as.data.frame() %>% set_colnames(1:5) %>% wtd.cors() %>% MAT_half() %>% mean() %>% print(digits = 5)
## [1] 0.99692
#items by sex
cog_items_male = cog_items[d$sex == "Male", ]
cog_items_female = cog_items[d$sex != "Male", ]
#IRT by sex
irt_male = irt.fa(cog_items_male)
irt_female = irt.fa(cog_items_female)
#extract IRM params
irt_params = data_frame(
item = rep(seq_along(cog_items), times = 2) %>% factor(),
sex = rep(c("Male", "Female"), each = 40),
difficulty = c(irt_male$tau, irt_female$tau),
loading = c(irt_male$fa$loadings %>% as.vector(), irt_female$fa$loadings %>% as.vector())
)
#print
irt_params %>% print(n = Inf)
## # A tibble: 80 x 4
## item sex difficulty loading
## <fct> <chr> <dbl> <dbl>
## 1 1 Male -0.257 0.525
## 2 2 Male -0.215 0.622
## 3 3 Male -0.631 0.524
## 4 4 Male 0.0255 0.554
## 5 5 Male 0.0532 0.545
## 6 6 Male 0.348 0.506
## 7 7 Male 0.0941 0.481
## 8 8 Male -0.443 0.668
## 9 9 Male -0.705 0.644
## 10 10 Male 0.586 0.270
## 11 11 Male 0.338 0.416
## 12 12 Male -0.200 0.505
## 13 13 Male -0.251 0.652
## 14 14 Male -0.447 0.694
## 15 15 Male 0.280 0.623
## 16 16 Male 0.220 0.428
## 17 17 Male 0.0880 0.649
## 18 18 Male -0.132 0.669
## 19 19 Male 0.115 0.528
## 20 20 Male 0.566 0.393
## 21 21 Male -0.150 0.634
## 22 22 Male 0.0615 0.439
## 23 23 Male -0.316 0.661
## 24 24 Male -0.123 0.679
## 25 25 Male 0.134 0.592
## 26 26 Male 0.429 0.387
## 27 27 Male 0.493 0.395
## 28 28 Male -0.0740 0.607
## 29 29 Male 0.0652 0.554
## 30 30 Male 0.309 0.601
## 31 31 Male -0.0125 0.604
## 32 32 Male -0.314 0.423
## 33 33 Male -0.204 0.636
## 34 34 Male -0.400 0.715
## 35 35 Male -0.387 0.627
## 36 36 Male -0.258 0.633
## 37 37 Male -0.488 0.684
## 38 38 Male -0.308 0.605
## 39 39 Male 0.371 0.524
## 40 40 Male 0.424 0.475
## 41 1 Female -0.252 0.514
## 42 2 Female -0.206 0.613
## 43 3 Female -0.831 0.472
## 44 4 Female -0.116 0.596
## 45 5 Female 0.0492 0.556
## 46 6 Female 0.451 0.508
## 47 7 Female 0.0270 0.476
## 48 8 Female -0.276 0.626
## 49 9 Female -0.913 0.567
## 50 10 Female 0.585 0.299
## 51 11 Female 0.190 0.443
## 52 12 Female -0.260 0.410
## 53 13 Female -0.402 0.649
## 54 14 Female -0.676 0.661
## 55 15 Female 0.275 0.623
## 56 16 Female 0.264 0.520
## 57 17 Female -0.0211 0.617
## 58 18 Female -0.202 0.655
## 59 19 Female 0.00582 0.547
## 60 20 Female 0.582 0.501
## 61 21 Female -0.111 0.626
## 62 22 Female 0.0182 0.367
## 63 23 Female -0.355 0.633
## 64 24 Female -0.271 0.646
## 65 25 Female -0.0193 0.599
## 66 26 Female 0.433 0.464
## 67 27 Female 0.496 0.420
## 68 28 Female -0.0681 0.613
## 69 29 Female 0.172 0.518
## 70 30 Female 0.311 0.567
## 71 31 Female -0.102 0.585
## 72 32 Female -0.551 0.349
## 73 33 Female -0.366 0.603
## 74 34 Female -0.531 0.620
## 75 35 Female -0.491 0.567
## 76 36 Female -0.434 0.569
## 77 37 Female -0.748 0.660
## 78 38 Female -0.565 0.575
## 79 39 Female 0.258 0.505
## 80 40 Female 0.370 0.501
#reorder item id by difficulty
irt_params$item = fct_reorder(irt_params$item, irt_params$difficulty)
#plot
ggplot(irt_params, aes(item, difficulty, color = sex)) +
geom_point() +
geom_smooth() +
theme_classic() +
theme(axis.text.x = element_text(angle = 10, size = 7))
## `geom_smooth()` using method = 'loess'
GG_save("figs/difficulty_sex.png")
## `geom_smooth()` using method = 'loess'
#reorder item id by difficulty
irt_params$item = fct_reorder(irt_params$item, irt_params$loading)
ggplot(irt_params, aes(item, loading, color = sex)) +
geom_point() +
geom_smooth() +
theme_classic() +
theme(axis.text.x = element_text(angle = 10, size = 7))
## `geom_smooth()` using method = 'loess'
GG_save("figs/loading_sex.png")
## `geom_smooth()` using method = 'loess'
#statistics
irt_params[c("sex", "difficulty", "item")] %>%
spread(sex, difficulty) %>%
cor_matrix(CI = .95)
## Female Male
## Female "1" "0.97 [0.95 0.99]"
## Male "0.97 [0.95 0.99]" "1"
irt_params[c("sex", "loading", "item")] %>%
spread(sex, loading) %>%
cor_matrix(CI = .95)
## Female Male
## Female "1" "0.90 [0.82 0.95]"
## Male "0.90 [0.82 0.95]" "1"
#related to male advantage?
irt_params_all$delta_difficulty = irt_params[c("sex", "difficulty", "item")] %>%
spread(sex, difficulty) %>%
mutate(delta = Male - Female) %>%
.[["delta"]]
irt_params_all$delta_loading = irt_params[c("sex", "loading", "item")] %>%
spread(sex, loading) %>%
mutate(delta = Male - Female) %>%
.[["delta"]]
#abs
irt_params_all$abs_delta_difficulty = abs(irt_params_all$delta_difficulty)
irt_params_all$abs_delta_loading = abs(irt_params_all$delta_loading)
#cors
cor_matrix(irt_params_all[c("male_d", "delta_difficulty", "delta_loading", "abs_delta_difficulty", "abs_delta_loading")], CI = .95) %>% write_clipboard()
## X
## 1 1
## 2 0.03 [-0.28 0.34]
## 3 0.01 [-0.30 0.32]
## 4 -0.02 [-0.33 0.30]
## 5 -0.05 [-0.36 0.26]
## 6 0.03 [-0.28 0.34]
## 7 1
## 8 0.36 [0.05 0.60]
## 9 0.74 [0.56 0.85]
## 10 0.10 [-0.22 0.40]
## 11 0.01 [-0.30 0.32]
## 12 0.36 [0.05 0.60]
## 13 1
## 14 0.44 [0.15 0.66]
## 15 0.12 [-0.20 0.41]
## 16 -0.02 [-0.33 0.30]
## 17 0.74 [0.56 0.85]
## 18 0.44 [0.15 0.66]
## 19 1
## 20 0.11 [-0.21 0.41]
## 21 -0.05 [-0.36 0.26]
## 22 0.10 [-0.22 0.40]
## 23 0.12 [-0.20 0.41]
## 24 0.11 [-0.21 0.41]
## 25 1
#descriptives
describeBy(irt_params$difficulty, group = irt_params$sex)
##
## Descriptive statistics by group
## group: Female
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 40 -0.11 0.39 -0.11 -0.1 0.44 -0.91 0.58 1.5 -0.07 -0.88
## se
## X1 0.06
## --------------------------------------------------------
## group: Male
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 40 -0.03 0.34 -0.04 -0.03 0.39 -0.7 0.59 1.3 0.07 -0.97
## se
## X1 0.05
describeBy(irt_params$loading, group = irt_params$sex)
##
## Descriptive statistics by group
## group: Female
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 40 0.55 0.09 0.57 0.56 0.09 0.3 0.66 0.36 -0.85 -0.02
## se
## X1 0.01
## --------------------------------------------------------
## group: Male
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 40 0.56 0.11 0.6 0.57 0.11 0.27 0.72 0.44 -0.67 -0.38
## se
## X1 0.02
#plots
GG_scatter(irt_params_all, "delta_difficulty", "male_d")
GG_scatter(irt_params_all, "delta_loading", "male_d")
GG_scatter(irt_params_all, "abs_delta_difficulty", "male_d")
GG_scatter(irt_params_all, "abs_delta_loading", "male_d")
#tables of item classification
table2(item_content$Content) %>% write_clipboard()
## Group Count Percent
## 1 number 23.00 57.50
## 2 geometry 12.00 30.00
## 3 data analysis 5.00 12.50
## 4 0.00 0.00
table2(item_content$Level) %>% write_clipboard()
## Group Count Percent
## 1 knowledge 15.00 37.50
## 2 application 13.00 32.50
## 3 reasoning 12.00 30.00
## 4 0.00 0.00
#merge item data
irt_params_all$Content = item_content$Content %>% factor() %>% fct_relevel("number")
irt_params_all$Level = item_content$Level
irt_params_all$Level_ord = item_content$Level_ord
irt_params_all$Level_ord_alt = item_content$Level_ord_alt
#model
lm(male_d ~ difficulty, data = irt_params_all) %>% augment()
item_content_model = lm(male_d ~ position + difficulty + loading + Content + Level, data = irt_params_all)
item_content_model %>% summary()
##
## Call:
## lm(formula = male_d ~ position + difficulty + loading + Content +
## Level, data = irt_params_all)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.14450 -0.04288 -0.00971 0.03738 0.24271
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.11355 0.09965 -1.14 0.26298
## position -0.00214 0.00168 -1.27 0.21171
## difficulty 0.15719 0.04010 3.92 0.00044 ***
## loading 0.20098 0.17645 1.14 0.26314
## Contentdata analysis -0.06273 0.05848 -1.07 0.29149
## Contentgeometry 0.01094 0.03603 0.30 0.76343
## Levelknowledge -0.02030 0.03280 -0.62 0.54040
## Levelreasoning 0.00110 0.03625 0.03 0.97595
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.082 on 32 degrees of freedom
## Multiple R-squared: 0.482, Adjusted R-squared: 0.369
## F-statistic: 4.26 on 7 and 32 DF, p-value: 0.00201
#level effect in particular
lm(male_d ~ Level, data = irt_params_all) %>% summary()
##
## Call:
## lm(formula = male_d ~ Level, data = irt_params_all)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.1969 -0.0751 0.0104 0.0799 0.2297
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0630 0.0294 -2.14 0.039 *
## Levelknowledge -0.0200 0.0402 -0.50 0.622
## Levelreasoning -0.0123 0.0425 -0.29 0.774
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.11 on 37 degrees of freedom
## Multiple R-squared: 0.0067, Adjusted R-squared: -0.047
## F-statistic: 0.125 on 2 and 37 DF, p-value: 0.883
lm(male_d ~ position + loading + Content + Level, data = irt_params_all) %>% summary()
##
## Call:
## lm(formula = male_d ~ position + loading + Content + Level, data = irt_params_all)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.16797 -0.05696 -0.00602 0.06498 0.21167
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.103313 0.099309 1.04 0.31
## position -0.000957 0.001982 -0.48 0.63
## loading -0.220672 0.167585 -1.32 0.20
## Contentdata analysis -0.118808 0.067941 -1.75 0.09 .
## Contentgeometry -0.023247 0.041886 -0.56 0.58
## Levelknowledge -0.046496 0.038478 -1.21 0.24
## Levelreasoning 0.011549 0.043312 0.27 0.79
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.099 on 33 degrees of freedom
## Multiple R-squared: 0.234, Adjusted R-squared: 0.0944
## F-statistic: 1.68 on 6 and 33 DF, p-value: 0.158
GG_denhist(irt_params_all, "difficulty", "Level")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
polycor::hetcor(data.frame(irt_params_all$Level_ord, irt_params_all$difficulty))
##
## Two-Step Estimates
##
## Correlations/Type of Correlation:
## irt_params_all.Level_ord
## irt_params_all.Level_ord 1
## irt_params_all.difficulty 0.167
## irt_params_all.difficulty
## irt_params_all.Level_ord Polyserial
## irt_params_all.difficulty 1
##
## Standard Errors:
## irt_params_all.Level_ord irt_params_all.difficulty
## 0.171
## Levels: 0.171
##
## n = 40
##
## P-values for Tests of Bivariate Normality:
## irt_params_all.Level_ord irt_params_all.difficulty
## 0.176
## Levels: 0.176
polycor::hetcor(data.frame(irt_params_all$Level_ord_alt, irt_params_all$difficulty))
##
## Two-Step Estimates
##
## Correlations/Type of Correlation:
## irt_params_all.Level_ord_alt
## irt_params_all.Level_ord_alt 1
## irt_params_all.difficulty 0.165
## irt_params_all.difficulty
## irt_params_all.Level_ord_alt Polyserial
## irt_params_all.difficulty 1
##
## Standard Errors:
## irt_params_all.Level_ord_alt irt_params_all.difficulty
## 0.197
## Levels: 0.197
##
## n = 40
##
## P-values for Tests of Bivariate Normality:
## irt_params_all.Level_ord_alt irt_params_all.difficulty
## 0.0806
## Levels: 0.0806
#nicer output
item_content_model %>% MOD_summary(standardize = F, kfold = F)
## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## position -0.0021 0.0017 -0.0056 0.0013
## difficulty 0.1572 0.0401 0.0755 0.2389
## loading 0.2010 0.1764 -0.1584 0.5604
## Content: number 0.0000 NA NA NA
## Content: data analysis -0.0627 0.0585 -0.1819 0.0564
## Content: geometry 0.0109 0.0360 -0.0625 0.0843
## Level: application 0.0000 NA NA NA
## Level: knowledge -0.0203 0.0328 -0.0871 0.0465
## Level: reasoning 0.0011 0.0362 -0.0727 0.0749
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 male_d 40 32 0.48 0.37 NA
##
##
## Etas from analysis of variance
## Eta Eta_partial
## position 0.162 0.22
## difficulty 0.499 0.57
## loading 0.145 0.20
## Content 0.191 0.26
## Level 0.087 0.12
item_content_model %>% MOD_summary(standardize = F, kfold = F) %>% .[[1]] %>% write_clipboard()
## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.
## Beta SE CI lower CI upper
## Position 0.00 0.00 -0.01 0.00
## Difficulty 0.16 0.04 0.08 0.24
## Loading 0.20 0.18 -0.16 0.56
## Content: number 0.00
## Content: data analysis -0.06 0.06 -0.18 0.06
## Content: geometry 0.01 0.04 -0.06 0.08
## Level: application 0.00
## Level: knowledge -0.02 0.03 -0.09 0.05
## Level: reasoning 0.00 0.04 -0.07 0.07
item_content_model %>% MOD_summary(standardize = T, kfold = F)
## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## position -0.242 0.19 -0.63 0.14
## difficulty 0.661 0.17 0.32 1.00
## loading 0.186 0.16 -0.15 0.52
## Content: number 0.000 NA NA NA
## Content: data analysis -0.605 0.56 -1.75 0.54
## Content: geometry 0.106 0.35 -0.60 0.81
## Level: application 0.000 NA NA NA
## Level: knowledge -0.196 0.32 -0.84 0.45
## Level: reasoning 0.011 0.35 -0.70 0.72
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 male_d 40 32 0.48 0.37 NA
##
##
## Etas from analysis of variance
## Eta Eta_partial
## position 0.162 0.22
## difficulty 0.499 0.57
## loading 0.145 0.20
## Content 0.191 0.26
## Level 0.087 0.12
#intercept CI
item_content_model %>% summary() %>%
.[["coefficients"]] %>%
(function(x) {
#convert to sensible type
x = as.data.frame(x)
#calc CI
x$Estimate[1] + (x$`Std. Error`[1] * c(-1.96, 1.96))
})
## [1] -0.309 0.082
#sum stats
d %>% plyr::ddply(c("sex", "school_sex"), function(x) {
data_frame(
mean = wtd_mean(x$irt_all),
sd = wtd_sd(x$irt_all),
n = nrow(x)
)
})
#irt by school type x sex
school_sex = list(
male_alone = cog_items %>% filter(d$school_sex == "Male", d$sex == "Male"),
female_alone = cog_items %>% filter(d$school_sex == "Female", d$sex == "Female"),
male_together = cog_items %>% filter(d$school_sex == "Mixed", d$sex == "Male"),
female_together = cog_items %>% filter(d$school_sex == "Mixed", d$sex == "Female"),
together = cog_items %>% filter(d$school_sex == "Mixed")
)
#samples
map_int(school_sex, nrow)
## male_alone female_alone male_together female_together
## 9822 5536 6531 10408
## together
## 16939
#function
irt_function = function(cog_items) {
#fit irt
irt_fit = irt.fa(cog_items)
#get irt params
data_frame(
item = seq_along(cog_items),
position = as.numeric(item),
pass_rate = cog_items %>% colMeans(),
difficulty = irt_fit$irt$difficulty %>% .[[1]],
discrimination = irt_fit$irt$discrimination %>% .[, 1],
loading = irt_fit$fa$loadings %>% as.vector(),
fit = list(irt_fit)
)
}
#fit IRTs
school_sex_irt = map(school_sex, irt_function) %>% bind_rows(.id = "subsample")
#examine correlations
#difficulty
school_sex_irt %>%
select(item, subsample, difficulty) %>%
spread(subsample, difficulty) %>%
.[-1] %>%
wtd.cors()
## female_alone female_together male_alone male_together
## female_alone 1.00 1.00 0.96 0.96
## female_together 1.00 1.00 0.95 0.97
## male_alone 0.96 0.95 1.00 0.96
## male_together 0.96 0.97 0.96 1.00
## together 0.99 1.00 0.96 0.99
## together
## female_alone 0.99
## female_together 1.00
## male_alone 0.96
## male_together 0.99
## together 1.00
#loading
school_sex_irt %>%
select(item, subsample, loading) %>%
spread(subsample, loading) %>%
.[-1] %>%
wtd.cors()
## female_alone female_together male_alone male_together
## female_alone 1.00 0.96 0.81 0.90
## female_together 0.96 1.00 0.83 0.96
## male_alone 0.81 0.83 1.00 0.83
## male_together 0.90 0.96 0.83 1.00
## together 0.94 0.99 0.83 0.98
## together
## female_alone 0.94
## female_together 0.99
## male_alone 0.83
## male_together 0.98
## together 1.00
#regression for mixed school
#derive male advantage
#boy advantage
together = school_sex_irt %>% filter(subsample == "together")
together$male_d = map_dbl(names(cog_items), function(item) {
#pass rates
b_pass = cog_items[d$sex == "Male" & d$school_sex == "Mixed", item] %>% unlist() %>% wtd_mean()
g_pass = cog_items[d$sex == "Female" & d$school_sex == "Mixed", item] %>% unlist() %>% wtd_mean()
#z score gap
qnorm(b_pass) - qnorm(g_pass)
})
#content
together$position = irt_params_all$position
together$Content = irt_params_all$Content
together$Level = irt_params_all$Level
#cors
together %>% select(position, difficulty, loading, male_d) %>% wtd.cors()
## position difficulty loading male_d
## position 1.000 -0.033 0.12 -0.152
## difficulty -0.033 1.000 -0.46 -0.016
## loading 0.119 -0.463 1.00 0.486
## male_d -0.152 -0.016 0.49 1.000
#model
lm(male_d ~ position + difficulty + loading + Content + Level, data = together) %>% MOD_summary(standardize = F)
## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## position -0.0021 0.0016 -0.0053 0.0012
## difficulty 0.0494 0.0314 -0.0146 0.1134
## loading 0.5873 0.1520 0.2777 0.8968
## Content: number 0.0000 NA NA NA
## Content: data analysis -0.0097 0.0556 -0.1231 0.1036
## Content: geometry 0.0242 0.0344 -0.0459 0.0943
## Level: application 0.0000 NA NA NA
## Level: knowledge -0.0164 0.0311 -0.0797 0.0469
## Level: reasoning 0.0047 0.0343 -0.0651 0.0745
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 male_d 40 32 0.37 0.23 NA
##
##
## Etas from analysis of variance
## Eta Eta_partial
## position 0.18 0.22
## difficulty 0.22 0.27
## loading 0.54 0.56
## Content 0.14 0.17
## Level 0.09 0.11
lm(male_d ~ difficulty + loading, data = together) %>% MOD_summary(standardize = F)
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## difficulty 0.049 0.029 -0.0092 0.11
## loading 0.564 0.144 0.2712 0.86
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 male_d 40 37 0.29 0.25 NA
##
##
## Etas from analysis of variance
## Eta Eta_partial
## difficulty 0.24 0.27
## loading 0.54 0.54
#plot
together %>% GG_scatter("loading", "male_d")
together %>% GG_scatter("difficulty", "male_d")
#together, but split by sex
together_male = irt.fa(school_sex$male_together)
together_female = irt.fa(school_sex$female_together)
#join
together_item_pars = data_frame(
male_d = together$male_d,
male_loading = together_male$fa$loadings %>% .[, 1],
female_loading = together_female$fa$loadings %>% .[, 1],
male_difficulty = together_male$irt$difficulty[[1]],
female_difficulty = together_female$irt$difficulty[[1]],
male_difficulty_alone = together_male$irt$difficulty[[1]],
female_difficulty_alone = together_female$irt$difficulty[[1]],
#diffs
d_loading = male_loading - female_loading,
d_diff = male_difficulty - female_difficulty,
#abs
abs_d_loading = abs(d_loading),
abs_d_diff = abs(d_diff)
)
wtd.cors(together_item_pars)
## male_d male_loading female_loading
## male_d 1.0000 0.463 0.4513
## male_loading 0.4628 1.000 0.9573
## female_loading 0.4513 0.957 1.0000
## male_difficulty -0.1581 -0.554 -0.5314
## female_difficulty 0.0734 -0.437 -0.4206
## male_difficulty_alone -0.1581 -0.554 -0.5314
## female_difficulty_alone 0.0734 -0.437 -0.4206
## d_loading 0.1053 0.287 -0.0025
## d_diff -0.9727 -0.594 -0.5620
## abs_d_loading 0.0084 0.059 -0.0901
## abs_d_diff 0.9693 0.577 0.5465
## male_difficulty female_difficulty
## male_d -0.158 0.073
## male_loading -0.554 -0.437
## female_loading -0.531 -0.421
## male_difficulty 1.000 0.972
## female_difficulty 0.972 1.000
## male_difficulty_alone 1.000 0.972
## female_difficulty_alone 0.972 1.000
## d_loading -0.156 -0.117
## d_diff 0.323 0.094
## abs_d_loading -0.083 -0.067
## abs_d_diff -0.333 -0.105
## male_difficulty_alone female_difficulty_alone
## male_d -0.158 0.073
## male_loading -0.554 -0.437
## female_loading -0.531 -0.421
## male_difficulty 1.000 0.972
## female_difficulty 0.972 1.000
## male_difficulty_alone 1.000 0.972
## female_difficulty_alone 0.972 1.000
## d_loading -0.156 -0.117
## d_diff 0.323 0.094
## abs_d_loading -0.083 -0.067
## abs_d_diff -0.333 -0.105
## d_loading d_diff abs_d_loading abs_d_diff
## male_d 0.1053 -0.973 0.0084 0.969
## male_loading 0.2867 -0.594 0.0588 0.577
## female_loading -0.0025 -0.562 -0.0901 0.547
## male_difficulty -0.1562 0.323 -0.0831 -0.333
## female_difficulty -0.1167 0.094 -0.0674 -0.105
## male_difficulty_alone -0.1562 0.323 -0.0831 -0.333
## female_difficulty_alone -0.1167 0.094 -0.0674 -0.105
## d_loading 1.0000 -0.193 0.5021 0.185
## d_diff -0.1931 1.000 -0.0813 -0.998
## abs_d_loading 0.5021 -0.081 1.0000 0.075
## abs_d_diff 0.1854 -0.998 0.0753 1.000
sessionInfo()
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 18.2
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] bindrcpp_0.2 broom_0.4.3 lavaan_0.5-23.1097
## [4] kirkegaard_2018.03 psych_1.7.8 magrittr_1.5
## [7] assertthat_0.2.0 weights_0.85 mice_2.46.0
## [10] gdata_2.18.0 Hmisc_4.1-1 Formula_1.2-2
## [13] survival_2.41-3 lattice_0.20-35 forcats_0.3.0
## [16] stringr_1.3.0 dplyr_0.7.4 purrr_0.2.4
## [19] readr_1.1.1 tidyr_0.8.0 tibble_1.4.2
## [22] ggplot2_2.2.1 tidyverse_1.2.1 pacman_0.4.6
##
## loaded via a namespace (and not attached):
## [1] httr_1.3.1 jsonlite_1.5 splines_3.4.3
## [4] modelr_0.1.1 gtools_3.5.0 multilevel_2.6
## [7] stats4_3.4.3 latticeExtra_0.6-28 cellranger_1.1.0
## [10] yaml_2.1.16 pbivnorm_0.6.0 pillar_1.2.1
## [13] backports_1.1.2 glue_1.2.0 quadprog_1.5-5
## [16] digest_0.6.15 RColorBrewer_1.1-2 checkmate_1.8.5
## [19] rvest_0.3.2 colorspace_1.3-2 htmltools_0.3.6
## [22] Matrix_1.2-12 plyr_1.8.4 pkgconfig_2.0.1
## [25] curry_0.1.1 haven_1.1.1.9000 mvtnorm_1.0-7
## [28] scales_0.5.0 htmlTable_1.11.2 lsr_0.5
## [31] nnet_7.3-12 lazyeval_0.2.1 cli_1.0.0
## [34] mnormt_1.5-5 crayon_1.3.4 readxl_1.0.0
## [37] evaluate_0.10.1 nlme_3.1-131.1 MASS_7.3-48
## [40] xml2_1.2.0 foreign_0.8-69 tools_3.4.3
## [43] data.table_1.10.4-3 hms_0.4.1 munsell_0.4.3
## [46] cluster_2.0.6 compiler_3.4.3 rlang_0.2.0
## [49] polycor_0.7-9 grid_3.4.3 rstudioapi_0.7
## [52] htmlwidgets_1.0 labeling_0.3 base64enc_0.1-3
## [55] rmarkdown_1.8 gtable_0.2.0 reshape2_1.4.3
## [58] R6_2.2.2 gridExtra_2.3 lubridate_1.7.2
## [61] psychometric_2.2 knitr_1.20 utf8_1.1.3
## [64] bindr_0.1 rprojroot_1.3-2 stringi_1.1.7
## [67] parallel_3.4.3 Rcpp_0.12.16 rpart_4.1-12
## [70] acepack_1.4.1 tidyselect_0.2.4