The datafile is in SPSS format, and it’s not always coded in a good way. We load the data and recode it in a more useful way.
library(pacman)
p_load(kirkegaard, haven, dplyr)
#load spss data
d = haven::read_spss("data/VESTOTAL.sav")
#convert variable names
meaningful_var_names = purrr::map2_chr(d, names(d), function(x, x_orig) {
#get label, replace illegal characters
x_label = attr(x, "label")
x_label_nice = str_replace_all(x_label, pattern = "[ \\-\\/]", replacement = "_") %>%
str_replace_all(pattern = "[\\(\\)]", replacement = "")
#if no label, use orignal
if (are_equal(x_label_nice, character())) return(x_orig)
x_label_nice
})
#set names
names(d) = meaningful_var_names
#remove duplicated variables
d = d[!duplicated(names(d))]
#get rid of labelled type
converter_labelled = function(x) {
#does object have labels?
has_labels = has_attr(x, "labels")
#if labels, convert to factor
if (has_labels) {
y = as_factor(x)
} else {
#convert to ordinary numeric
y = as.vector(x)
}
y
}
#convert each variable
d = df_colFunc(d, converter_labelled)
#convert AFQT to z scale
d$ARMED_FORCES_QUALIFICATION_TEST %<>%
as.character %>%
str_replace("AFQT", "") %>%
as.numeric %>%
{
#codes above 100 are impossible because score is a centile
#recode to NA
.[. > 100] = NA
#centiles of 0 or 100 are slightly inaccurate, so we round to a possible value
.[. == 0] = .1
.[. == 100] = 99.9
#convert centiles to z scores
qnorm(./100)
}
#basic stats
#get a g factor
A recent study found that pupil size is related to GCA. We examine this question in the present dataset.
#plot both
d %>%
select(PUPIL_SIZE_RT_MM, PUPIL_SIZE_LT_MM, ARMED_FORCES_QUALIFICATION_TEST) %>%
gather(key = eye, value = pupil_size, PUPIL_SIZE_LT_MM, PUPIL_SIZE_RT_MM) %>%
ggplot(aes(ARMED_FORCES_QUALIFICATION_TEST, pupil_size, color = eye)) +
geom_smooth() +
geom_point(alpha = .01) +
scale_color_discrete("Eye", labels = c("Left", "Right")) +
xlab("AFQT z-score") +
ylab("Pupil size (mm)") +
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 49 rows containing non-finite values (stat_smooth).
## Warning: Removed 49 rows containing missing values (geom_point).
silence(ggsave("figures/pupil_size.png"))
cor_matrix(d[c("PUPIL_SIZE_RT_MM", "PUPIL_SIZE_LT_MM", "ARMED_FORCES_QUALIFICATION_TEST")], CI = .99)
## PUPIL_SIZE_RT_MM PUPIL_SIZE_LT_MM
## PUPIL_SIZE_RT_MM "1" "0.97 [0.97 0.98]"
## PUPIL_SIZE_LT_MM "0.97 [0.97 0.98]" "1"
## ARMED_FORCES_QUALIFICATION_TEST "0.05 [0.01 0.09]" "0.05 [0.01 0.09]"
## ARMED_FORCES_QUALIFICATION_TEST
## PUPIL_SIZE_RT_MM "0.05 [0.01 0.09]"
## PUPIL_SIZE_LT_MM "0.05 [0.01 0.09]"
## ARMED_FORCES_QUALIFICATION_TEST "1"
count.pairwise(d[c("PUPIL_SIZE_RT_MM", "PUPIL_SIZE_LT_MM", "ARMED_FORCES_QUALIFICATION_TEST")])
## PUPIL_SIZE_RT_MM PUPIL_SIZE_LT_MM
## PUPIL_SIZE_RT_MM 4459 4455
## PUPIL_SIZE_LT_MM 4455 4458
## ARMED_FORCES_QUALIFICATION_TEST 4438 4437
## ARMED_FORCES_QUALIFICATION_TEST
## PUPIL_SIZE_RT_MM 4438
## PUPIL_SIZE_LT_MM 4437
## ARMED_FORCES_QUALIFICATION_TEST 4441
So, yes, pupil size and GCA are slightly related, but the association is only visible in a large dataset and the weak relationship casts doubt on theoretical interpretations.