Startup

The datafile is in SPSS format, and it’s not always coded in a good way. We load the data and recode it in a more useful way.

library(pacman)
p_load(kirkegaard, haven, dplyr)

#load spss data
d = haven::read_spss("data/VESTOTAL.sav")

#convert variable names
meaningful_var_names = purrr::map2_chr(d, names(d), function(x, x_orig) {
  #get label, replace illegal characters
  x_label = attr(x, "label")
  x_label_nice = str_replace_all(x_label, pattern = "[ \\-\\/]", replacement = "_") %>% 
    str_replace_all(pattern = "[\\(\\)]", replacement = "")
  
  #if no label, use orignal
  if (are_equal(x_label_nice, character())) return(x_orig)
  x_label_nice
})

#set names
names(d) = meaningful_var_names

#remove duplicated variables
d = d[!duplicated(names(d))]

#get rid of labelled type
converter_labelled = function(x) {
  #does object have labels?
  has_labels = has_attr(x, "labels")
  
  #if labels, convert to factor
  if (has_labels) {
    y = as_factor(x)
  } else {
    #convert to ordinary numeric
    y = as.vector(x)
  }
  
  y
}

#convert each variable
d = df_colFunc(d, converter_labelled)

#convert AFQT to z scale
d$ARMED_FORCES_QUALIFICATION_TEST %<>% 
  as.character %>% 
  str_replace("AFQT", "") %>% 
  as.numeric %>% 
  {
    #codes above 100 are impossible because score is a centile
    #recode to NA
    .[. > 100] = NA
    
    #centiles of 0 or 100 are slightly inaccurate, so we round to a possible value
    .[. == 0] = .1
    .[. == 100] = 99.9
    
    #convert centiles to z scores
    qnorm(./100)
  }

Basic descriptive statistics

#basic stats

Factor analysis of the cognitive tests

#get a g factor

Pupil size and GCA?

A recent study found that pupil size is related to GCA. We examine this question in the present dataset.

#plot both
d %>% 
  select(PUPIL_SIZE_RT_MM, PUPIL_SIZE_LT_MM, ARMED_FORCES_QUALIFICATION_TEST) %>% 
  gather(key = eye, value = pupil_size, PUPIL_SIZE_LT_MM, PUPIL_SIZE_RT_MM) %>%
  ggplot(aes(ARMED_FORCES_QUALIFICATION_TEST, pupil_size, color = eye)) + 
  geom_smooth() +
  geom_point(alpha = .01) +
  scale_color_discrete("Eye", labels = c("Left", "Right")) +
  xlab("AFQT z-score") +
  ylab("Pupil size (mm)") +
  theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 49 rows containing non-finite values (stat_smooth).
## Warning: Removed 49 rows containing missing values (geom_point).

silence(ggsave("figures/pupil_size.png"))

cor_matrix(d[c("PUPIL_SIZE_RT_MM", "PUPIL_SIZE_LT_MM", "ARMED_FORCES_QUALIFICATION_TEST")], CI = .99)
##                                 PUPIL_SIZE_RT_MM   PUPIL_SIZE_LT_MM  
## PUPIL_SIZE_RT_MM                "1"                "0.97 [0.97 0.98]"
## PUPIL_SIZE_LT_MM                "0.97 [0.97 0.98]" "1"               
## ARMED_FORCES_QUALIFICATION_TEST "0.05 [0.01 0.09]" "0.05 [0.01 0.09]"
##                                 ARMED_FORCES_QUALIFICATION_TEST
## PUPIL_SIZE_RT_MM                "0.05 [0.01 0.09]"             
## PUPIL_SIZE_LT_MM                "0.05 [0.01 0.09]"             
## ARMED_FORCES_QUALIFICATION_TEST "1"
count.pairwise(d[c("PUPIL_SIZE_RT_MM", "PUPIL_SIZE_LT_MM", "ARMED_FORCES_QUALIFICATION_TEST")])
##                                 PUPIL_SIZE_RT_MM PUPIL_SIZE_LT_MM
## PUPIL_SIZE_RT_MM                            4459             4455
## PUPIL_SIZE_LT_MM                            4455             4458
## ARMED_FORCES_QUALIFICATION_TEST             4438             4437
##                                 ARMED_FORCES_QUALIFICATION_TEST
## PUPIL_SIZE_RT_MM                                           4438
## PUPIL_SIZE_LT_MM                                           4437
## ARMED_FORCES_QUALIFICATION_TEST                            4441

So, yes, pupil size and GCA are slightly related, but the association is only visible in a large dataset and the weak relationship casts doubt on theoretical interpretations.