library(pacman)
p_load(psych, dplyr, umx, lavaan, ggplot2, plyr, stringr, reshape, ggthemes, skedastic)
DKG <- function(n = 10000, r = 0.3, test_mean = 100, test_sd = 15, bins = 4, up_bias = 7.5){
require(egg)
set.seed(1)
true_score = rnorm(n)
est_score = true_score * r + rnorm(n) * sqrt(1- r^2)
true_score = true_score * test_sd + test_mean
est_score = est_score * test_sd + test_mean + up_bias
d = data.frame(true_score, est_score)
br_points = quantile(d$true_score, probs = seq(0, 1, length = bins + 1))
d$group = cut(d$true_score, breaks = br_points, include.lowest = T, labels = F)
f = ecdf(d$true_score)
d$p_true_score = f(d$true_score) * 100
d$p_est_score = f(d$est_score) * 100
d2 = ddply(d, .variables = .(group), summarize,
mean_est = mean(est_score),
mean_true = mean(true_score),
mean_est_p = mean(p_est_score),
mean_true_p = mean(p_true_score)) #courtesy of Emil
dl = melt(d2[1:3], id.vars = "group")
g1 <- ggplot(d, aes(est_score, true_score)) + geom_point(alpha = 0.5, color = "#DECDC3") + geom_smooth(method = "lm", se = F, color = "#2D4059", formula = "y ~ x") + labs(x = "Self-estimated score", y = "Ability") + scale_x_continuous(breaks = seq(0, 200, 5)) + theme_bw()
g2 <- ggplot(dl, aes(group, value, color = variable)) + geom_line(size = 1) + scale_color_manual("Score", values = c("#900D0D", "#D9C6A5"), labels = c("Self-estimated", "True")) + labs(x = "Quartile", y = "Ability") + theme_bw() + theme(legend.position = c(0.91, 0.22), legend.background = element_blank())
ggarrange(g1, g2, ncol = 1)}
DKN <- read.csv("DKNLSY.csv")
DKN <- DKN %>% dplyr::rename(
ID = C0000100, #Child ID code
MID = C0000200, #Mother's ID code
RACE = C0005300, #Mother-provided race of child
SEX = C0005400, #Sex
YOB = C0005700, #Year of Birth
SPS = C1506800, #SPPC: Scholastic Self-Perception Score, Raw
SPW = C1507000, #SPPC: Self-Worth Score, Raw
DIG = C1507200, #Digit Span Total Score, Raw
PIM = C1507600, #PIAT Math Total Score, Raw
PIR = C1507900, #PIAT Reading Recognition Score, Raw
PIC = C1508200, #PIAT Reading Comprehension Score, Raw
PPT = C1508600 #PPVT Total Score, Raw
) %>% mutate(
AGE = 1994 - YOB)
DKN <- umx_residualize(c("DIG", "PIM", "PIR", "PIC", "PPT"), c("AGE"), data = DKN)
## [1] "(Intercept) B = -4.727 [-4.955, -4.499], t = -40.584, p < 0.001"
## [1] "AGE B = 0.332 [0.31, 0.355], t = 29.067, p < 0.001"
## [1] "(Intercept) B = 1.036 [0.398, 1.674], t = 3.184, p = 0.001"
## [1] "AGE B = 0.884 [0.822, 0.947], t = 27.688, p < 0.001"
## [1] "(Intercept) B = 1.456 [0.765, 2.146], t = 4.134, p < 0.001"
## [1] "AGE B = 0.957 [0.889, 1.025], t = 27.695, p < 0.001"
## [1] "(Intercept) B = 0.786 [0.168, 1.404], t = 2.494, p = 0.013"
## [1] "AGE B = 0.84 [0.78, 0.901], t = 27.161, p < 0.001"
## [1] "(Intercept) B = 3.015 [2.094, 3.937], t = 6.415, p < 0.001"
## [1] "AGE B = 0.287 [0.197, 0.377], t = 6.222, p < 0.001"
DKN <- subset(DKN, SPS > 0); DKM <- subset(DKN, SEX == 1); DKW <- subset(DKN, SEX == 2)
describe(DKN)
table(DKN$RACE); table(DKN$SEX)
##
## 1 2 3
## 625 932 1137
##
## 1 2
## 1324 1370
As in other iterations, white = 3, black = 2, Hispanic = 1. Since we cannot check invariance of the target measurement (scholastic self-perception), the test will be run in both sexes separately.
Some people still believe in the Dunning-Kruger effect, and not even the steelman where it takes the form of a linear bias by level of ability. I will show an alternative and then empirically assess it in the NLSY '79 Children 1994 wave, chosen because it had the most cognitive tests at an age where reliability was feasible. I split the data by sex because measurement invariance with respect to academic self-perception (measure of self-estimated ability to be used) could not be tested. Also, I cut off individuals with intellectual disabilities (here, g 2 SDs below the mean) because their responses on the SPPC were unlikely invariant. There is range restriction for the SPPC, unfortunately.
Consider the original Dunning-Kruger graph from 1999. There was a large gap between self-estimated ability and actual ability at the bottom of the ability distribution due to overestimated abilities and a slight underestimation at the top due to underestimation. Though Dunning & Kruger (and later, just Dunning) would come to describe this as a result of a lack of metacognition in those with poor abilities making them incapable of accurate self-appraisal. Their explanation implies non-linearities and heteroskedasticity that should yield a negative result with the Glejser test (see Gignac's latest paper).
Their original graph is consistent with an alternative interpretation where individual ability is constantly related to self-estimated abilities and everyone overestimates by a fixed amount. The reason for this is that the transformation of such data into quartiles or other types of bins necessarily leads to the condition whereby the data give the appearance of the Dunning-Kruger effect. When self-estimated ability is uncorrelated with ability, the plotted level by quartile is flat; when it is perfectly correlated with ability, it cannot be distinguished from ability estimates. At any level 0 < r < 1, self-estimated ability is intermediate in slope when binned. The apparent overestimation effect for low-ability individuals and its corollary of underestimation (not always observed) for high-ability individuals occurs because constant overestimation shifts the data vertically, so the point where the lines cross moves rightward. This leads to an illusory Dunning-Kruger and may be the explanation for the original finding.
DKG(r = 0, up_bias = 0)
## Loading required package: egg
## Loading required package: gridExtra
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
DKG(r = 0.3, up_bias = 0)
DKG(r = 0.3, up_bias = 5)
DKG(r = 0.3, up_bias = 15)
MOD <- '
g =~ DIG + PIM + PIR + PIC + PPT'
MODCON <- cfa(MOD, data = DKN, group = "RACE", std.lv = T)
MODMET <- cfa(MOD, data = DKN, group = "RACE", std.lv = F, group.equal = "loadings")
MODSCA <- cfa(MOD, data = DKN, group = "RACE", std.lv = F, group.equal = c("loadings", "intercepts"))
MODSTR <- cfa(MOD, data = DKN, group = "RACE", std.lv = F, group.equal = c("loadings", "intercepts", "residuals"))
round(cbind("Configural" = fitMeasures(MODCON, FITM),
"Metric" = fitMeasures(MODMET, FITM),
"Scalar" = fitMeasures(MODSCA, FITM),
"Strict" = fitMeasures(MODSTR, FITM)), 3)
## Configural Metric Scalar Strict
## chisq 58.724 76.026 113.818 158.268
## df 15.000 23.000 31.000 41.000
## npar 45.000 37.000 29.000 19.000
## cfi 0.991 0.989 0.983 0.976
## rmsea 0.057 0.051 0.055 0.056
## rmsea.ci.lower 0.042 0.038 0.044 0.047
## rmsea.ci.upper 0.073 0.064 0.065 0.066
## aic 102488.782 102490.084 102511.876 102536.326
## bic 102754.227 102708.339 102682.940 102648.403
MODCON <- cfa(MOD, data = DKM, group = "RACE", std.lv = T)
MODMET <- cfa(MOD, data = DKM, group = "RACE", std.lv = F, group.equal = "loadings")
MODSCA <- cfa(MOD, data = DKM, group = "RACE", std.lv = F, group.equal = c("loadings", "intercepts"))
MODSTR <- cfa(MOD, data = DKM, group = "RACE", std.lv = F, group.equal = c("loadings", "intercepts", "residuals"))
round(cbind("Configural" = fitMeasures(MODCON, FITM),
"Metric" = fitMeasures(MODMET, FITM),
"Scalar" = fitMeasures(MODSCA, FITM),
"Strict" = fitMeasures(MODSTR, FITM)), 3)
## Configural Metric Scalar Strict
## chisq 29.754 52.389 73.331 115.451
## df 15.000 23.000 31.000 41.000
## npar 45.000 37.000 29.000 19.000
## cfi 0.994 0.988 0.983 0.970
## rmsea 0.047 0.054 0.056 0.064
## rmsea.ci.lower 0.021 0.035 0.039 0.050
## rmsea.ci.upper 0.072 0.073 0.072 0.078
## aic 50490.516 50497.151 50502.093 50524.213
## bic 50723.995 50689.123 50652.557 50622.792
MODCON <- cfa(MOD, data = DKW, group = "RACE", std.lv = T)
MODMET <- cfa(MOD, data = DKW, group = "RACE", std.lv = F, group.equal = "loadings")
MODSCA <- cfa(MOD, data = DKW, group = "RACE", std.lv = F, group.equal = c("loadings", "intercepts"))
MODSTR <- cfa(MOD, data = DKW, group = "RACE", std.lv = F, group.equal = c("loadings", "intercepts", "residuals"))
round(cbind("Configural" = fitMeasures(MODCON, FITM),
"Metric" = fitMeasures(MODMET, FITM),
"Scalar" = fitMeasures(MODSCA, FITM),
"Strict" = fitMeasures(MODSTR, FITM)), 3)
## Configural Metric Scalar Strict
## chisq 41.026 47.899 78.450 103.923
## df 15.000 23.000 31.000 41.000
## npar 45.000 37.000 29.000 19.000
## cfi 0.990 0.990 0.981 0.975
## rmsea 0.062 0.049 0.058 0.058
## rmsea.ci.lower 0.039 0.029 0.042 0.044
## rmsea.ci.upper 0.085 0.068 0.074 0.072
## aic 51946.046 51936.919 51951.471 51956.944
## bic 52181.062 52130.154 52102.925 52056.173
Measurement invariance is probably acceptable, but the sample is quite large. In an earlier study, I investigated this level of apparent non-invariance and it amounted to almost nothing.
MODALL <- cfa(MOD, data = DKN, std.lv = T); fitMeasures(MODALL, FITM)
## chisq df npar cfi rmsea
## 39.207 5.000 10.000 0.993 0.050
## rmsea.ci.lower rmsea.ci.upper aic bic
## 0.036 0.066 102687.684 102746.672
resid(MODALL, "cor")
## $type
## [1] "cor.bollen"
##
## $cov
## DIG PIM PIR PIC PPT
## DIG 0.000
## PIM 0.019 0.000
## PIR 0.010 -0.007 0.000
## PIC -0.030 0.005 0.003 0.000
## PPT -0.030 0.035 -0.019 0.016 0.000
MODALLM <- cfa(MOD, data = DKM, std.lv = T); fitMeasures(MODALLM, FITM)
## chisq df npar cfi rmsea
## 20.119 5.000 10.000 0.994 0.048
## rmsea.ci.lower rmsea.ci.upper aic bic
## 0.027 0.070 50597.282 50649.166
resid(MODALLM, "cor")
## $type
## [1] "cor.bollen"
##
## $cov
## DIG PIM PIR PIC PPT
## DIG 0.000
## PIM 0.014 0.000
## PIR 0.011 -0.005 0.000
## PIC -0.029 0.006 0.001 0.000
## PPT -0.058 0.023 -0.014 0.025 0.000
MODALLW <- cfa(MOD, data = DKW, std.lv = T); fitMeasures(MODALLW, FITM)
## chisq df npar cfi rmsea
## 26.246 5.000 10.000 0.992 0.056
## rmsea.ci.lower rmsea.ci.upper aic bic
## 0.036 0.078 52017.527 52069.753
resid(MODALLW, "cor")
## $type
## [1] "cor.bollen"
##
## $cov
## DIG PIM PIR PIC PPT
## DIG 0.000
## PIM 0.025 0.000
## PIR 0.008 -0.008 0.000
## PIC -0.032 0.003 0.005 0.000
## PPT -0.007 0.049 -0.028 0.005 0.000
All manifest variables are nearly independent net of g.
DKN <- cbind(DKN, lavPredict(MODALL))
DKM <- cbind(DKM, lavPredict(MODALLM))
DKW <- cbind(DKW, lavPredict(MODALLW))
DKN <- subset(DKN, scale(g) > -2); DKM <- subset(DKM, scale(g) > -2); DKW <- subset(DKW, scale(g) > -2)
TL <- lm(g ~ SPS, DKN)
summary(TL)
##
## Call:
## lm(formula = g ~ SPS, data = DKN)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.26426 -0.53804 0.01512 0.51364 2.36986
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.9879585 0.0638819 -15.46 <2e-16 ***
## SPS 0.0061828 0.0003584 17.25 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7888 on 2609 degrees of freedom
## Multiple R-squared: 0.1024, Adjusted R-squared: 0.102
## F-statistic: 297.6 on 1 and 2609 DF, p-value: < 2.2e-16
plot(TL, 1); plot(TL, 3)
cor(DKN$g, DKN$SPS)
## [1] 0.319963
TLM <- lm(g ~ SPS, DKM)
summary(TLM)
##
## Call:
## lm(formula = g ~ SPS, data = DKM)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.39041 -0.54052 0.01319 0.55044 2.35472
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.9851296 0.0938583 -10.50 <2e-16 ***
## SPS 0.0061566 0.0005324 11.56 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8124 on 1289 degrees of freedom
## Multiple R-squared: 0.094, Adjusted R-squared: 0.09329
## F-statistic: 133.7 on 1 and 1289 DF, p-value: < 2.2e-16
plot(TLM, 1); plot(TLM, 3)
cor(DKM$g, DKM$SPS)
## [1] 0.3065867
TLW <- lm(g ~ SPS, DKW)
summary(TLW)
##
## Call:
## lm(formula = g ~ SPS, data = DKW)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.24189 -0.53409 0.00684 0.49832 2.28558
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.002411 0.089281 -11.23 <2e-16 ***
## SPS 0.006163 0.000496 12.43 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7883 on 1331 degrees of freedom
## Multiple R-squared: 0.104, Adjusted R-squared: 0.1033
## F-statistic: 154.4 on 1 and 1331 DF, p-value: < 2.2e-16
plot(TLW, 1); plot(TLW, 3)
cor(DKW$g, DKW$SPS)
## [1] 0.3224292
ggplot(DKN, aes(x = g, y = SPS)) + geom_point() + geom_smooth(method = loess, color = "steelblue4", formula = 'y ~ x') + geom_smooth(method = lm, color = "orangered2", formula = 'y ~ x') + labs(x = "General Mental Ability", y = "Self-Estimated Ability") + theme_minimal() + theme(legend.position = "none", text = element_text(size = 12, family = "serif"), plot.title = element_text(hjust = 0.5))
ggplot(DKM, aes(x = g, y = SPS)) + geom_point() + geom_smooth(method = loess, formula = 'y ~ x', color = "blue") + geom_smooth(method = lm, formula = 'y ~ x', color = "red") + labs(x = "General Mental Ability", y = "Self-Estimated Ability") + theme_wsj() + theme(legend.position = "none")
ggplot(DKW, aes(x = g, y = SPS)) + geom_point() + geom_smooth(method = loess, formula = 'y ~ x', color = "blue") + geom_smooth(method = lm, formula = 'y ~ x', color = "red") + labs(x = "General Mental Ability", y = "Self-Estimated Ability") + theme_wsj() + theme(legend.position = "none")
glejser(TL); glejser(TLM); glejser(TLW)
The Dunning-Kruger effect is hard to distinguish from merely-statistical alternatives for which there is evidence: there is evidence of a positive relationship between real and self-estimated ability that doesn't seem to deviate from linearity and for some degree of constant overestimation, the average of which is unrelated to ability. The application of LOESS and Glejser's test to the NLSY '79 children data revealed a lack of evidence for the Dunning-Kruger effect, although range restriction may have impacted the result, though it's doubtful it did terribly much. There are at least two British datasets I am aware of which have data sufficient to test whether the Dunning-Kruger effect is present. It seems more likely Gignac is correct than Dunning. Interestingly, there was invariance by race (black/white/Hispanic) for the cognitive tests and there was rather clear local independence.