library(tidyverse)
library(janitor)
library(psych)
library(dplyr)
library(lavaan)
library(ggplot2)
library(semPlot)
# ---Data prep -----------------------------------------------------------
db <- read_csv("vpq_all_data.csv")
db <- clean_names(db)
# Sample description ------------------------------------------------------
### 1. Frequencies for categorical-ish variables -----------------------------
##### term
term_tab <- db %>%
tabyl(term) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 1)
##### sex
sex_tab <- db %>%
tabyl(sex) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 1)
##### genderid
genderid_tab <- db %>%
tabyl(genderid) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 1)
##### genderid_other (if it is filled in for some people)
genderid_other_tab <- db %>%
tabyl(genderid_other) %>%
adorn_totals("row")
term_tab
## term n percent
## FALL2018 2052 12.0%
## FALL2019 2071 12.1%
## FALL2020 2351 13.7%
## FALL2022 5141 30.0%
## WINTER2019 1817 10.6%
## WINTER2020 1724 10.1%
## WINTER2021 1975 11.5%
## Total 17131 100.0%
sex_tab
## sex n percent valid_percent
## 1 6057 35.4% 35.4%
## 2 10976 64.1% 64.2%
## 3 4 0.0% 0.0%
## 4 62 0.4% 0.4%
## 5 2 0.0% 0.0%
## 6 2 0.0% 0.0%
## <NA> 28 0.2% -
## Total 17131 100.0% 100.0%
genderid_tab
## genderid n percent valid_percent
## 1 8484 49.5% 49.6%
## 2 8285 48.4% 48.5%
## 3 142 0.8% 0.8%
## 4 181 1.1% 1.1%
## <NA> 39 0.2% -
## Total 17131 100.0% 100.0%
genderid_other_tab
## genderid_other n percent valid_percent
## Masculine 1 5.837371e-05 0.005681818
## agender 7 4.086160e-04 0.039772727
## bisexual 2 1.167474e-04 0.011363636
## blend 1 5.837371e-05 0.005681818
## boy 1 5.837371e-05 0.005681818
## cassgender 1 5.837371e-05 0.005681818
## demi-girl 4 2.334948e-04 0.022727273
## flexible 1 5.837371e-05 0.005681818
## gay 2 1.167474e-04 0.011363636
## gender-fluid 16 9.339793e-04 0.090909091
## gender-fluid_non-binary 2 1.167474e-04 0.011363636
## gender-queer 6 3.502423e-04 0.034090909
## gender_not_conforming 1 5.837371e-05 0.005681818
## girl 1 5.837371e-05 0.005681818
## girlflux 1 5.837371e-05 0.005681818
## he/they 1 5.837371e-05 0.005681818
## human 2 1.167474e-04 0.011363636
## lesbian 1 5.837371e-05 0.005681818
## non-binary 103 6.012492e-03 0.585227273
## non-binary_ woman-shifted 1 5.837371e-05 0.005681818
## non-binary_female 5 2.918685e-04 0.028409091
## pansexual 2 1.167474e-04 0.011363636
## she-they 3 1.751211e-04 0.017045455
## transgender_male 2 1.167474e-04 0.011363636
## transgender_non-binary 1 5.837371e-05 0.005681818
## transgender_non-binary_male 1 5.837371e-05 0.005681818
## two-spirit 4 2.334948e-04 0.022727273
## unlabeled 2 1.167474e-04 0.011363636
## woman 1 5.837371e-05 0.005681818
## <NA> 16955 9.897262e-01 NA
## Total 17131 1.000000e+00 1.000000000
# Descriptives for age --------------------------------------------------
### 1. Make a clean numeric age variable -------------------
db <- db %>%
mutate(
age_num = suppressWarnings(as.numeric(age))
)
### 2. Descriptives for age_num ----------------------------
age_desc <- db %>%
summarise(
n = sum(!is.na(age_num)),
mean = mean(age_num, na.rm = TRUE),
sd = sd(age_num, na.rm = TRUE),
min = min(age_num, na.rm = TRUE),
q1 = quantile(age_num, 0.25, na.rm = TRUE),
median = median(age_num, na.rm = TRUE),
q3 = quantile(age_num, 0.75, na.rm = TRUE),
max = max(age_num, na.rm = TRUE)
)
age_desc
## # A tibble: 1 × 8
## n mean sd min q1 median q3 max
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 17078 19.2 2.84 1 18 18 20 100
Data was obtained from 17131 students from the University of Alberta. The mean age of the partcipants was M = 19.1821642, SD = 2.8421852. Of the total, 35.4% identified as men, 64.1% idenified as women, 0.4% identified as other and 0.2% preferred not to identify.
### clean_vpq ---------------------------------
vpq_vars <- c("oe_2", "oe_3", "ob_1", "ob_4")
# Keep only rows with *all* VPQ items present
n_before <- nrow(db)
db_clean <- db[complete.cases(db[, vpq_vars]), ]
n_after <- nrow(db_clean)
# Standardise items
db_clean <- db_clean %>%
mutate(
oe_2_z = as.numeric(scale(oe_2)),
oe_3_z = as.numeric(scale(oe_3)),
ob_1_z = as.numeric(scale(ob_1)),
ob_4_z = as.numeric(scale(ob_4))
)
# Recompute OE and OB scores in the cleaned data
db_clean <- db_clean %>%
mutate(
OE_score = rowMeans(across(c(oe_2, oe_3)), na.rm = TRUE),
OB_score = rowMeans(across(c(ob_1, ob_4)), na.rm = TRUE)
)
vpq_items <- db_clean[, vpq_vars]
Out of the total, 537 were removed due to missing values in the VPQ responses. A new total of 16594 participants were included in the analyses.
We explored how the data structure aligns with the different theoretical frameworks.
The mutually exclusive framework (OE and OB perspectives are strict opposites; only one can dominate at a time) is better reflected in a one-factor structure, treating VP as a single construct.
The continuum or complementary framework (OE and OB lie on a shared dimension and generally move in opposite directions, but not in a strictly incompatible way) could be reflected either by a single construct or two highly correlated constructs.
The independent framework (OE and OB are separate systems, each
with its own variability and meaning) would be better reflected by two
“independent” constructs. Importantly, “independent” in this context
refers to relative independence, not absolute independence, observed
between uncorrelated processes.
set.seed(123)
N <- nrow(db_clean)
index_A <- sample(1:N, size = round(N/2), replace = FALSE)
db_A <- db_clean[index_A, ] # Sample for EFA
db_B <- db_clean[-index_A, ] # Sample for CFA
vpq_A <- db_A[, vpq_vars] #efa
vpq_B <- db_B[, vpq_vars] #cfa
To allow independent cross-validation of the factor structure, the full dataset was randomly split into two equal subsamples. Consequently, EFAn = 8297, and CFAn = 8297.
# 1) Parallel analysis ----------------------------------------
fa.parallel(
vpq_A,
fa = "fa",
fm = "ml",
n.iter = 100,
main = "Parallel Analysis (EFA sample)"
)
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
A parallel analysis using maximum likelihood extraction indicated that a two-factor solution was optimal for the Visual Perspective Questionnaire (VPQ), with the observed-data eigenvalues exceeding those from random data for two factors, suggesting no additional components. Despite this, we stilld explored a one-factor solution.
# 1-factor
efa_1f <- fa(
vpq_A,
nfactors = 1,
fm = "ml",
rotate = "oblimin"
)
print(efa_1f)
## Factor Analysis using method = ml
## Call: fa(r = vpq_A, nfactors = 1, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 h2 u2 com
## oe_2 -0.54 0.295 0.705 1
## oe_3 0.35 0.120 0.880 1
## ob_1 0.17 0.029 0.971 1
## ob_4 1.00 0.995 0.005 1
##
## ML1
## SS loadings 1.44
## Proportion Var 0.36
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 6 with the objective function = 0.76 with Chi Square = 6316.77
## df of the model are 2 and the objective function was 0.26
##
## The root mean square of the residuals (RMSR) is 0.18
## The df corrected root mean square of the residuals is 0.32
##
## The harmonic n.obs is 8297 with the empirical chi square 3320.63 with prob < 0
## The total n.obs was 8297 with Likelihood Chi Square = 2134.4 with prob < 0
##
## Tucker Lewis Index of factoring reliability = -0.014
## RMSEA index = 0.358 and the 90 % confidence intervals are 0.346 0.371
## BIC = 2116.35
## Fit based upon off diagonal values = 0.67
## Measures of factor score adequacy
## ML1
## Correlation of (regression) scores with factors 1.00
## Multiple R square of scores with factors 1.00
## Minimum correlation of possible factor scores 0.99
# 2-factor
efa_2f <- fa(
vpq_A,
nfactors = 2,
fm = "ml",
rotate = "oblimin"
)
print(efa_2f)
## Factor Analysis using method = ml
## Call: fa(r = vpq_A, nfactors = 2, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 ML2 h2 u2 com
## oe_2 -0.60 0.15 0.34 0.661 1.1
## oe_3 0.10 0.84 0.76 0.237 1.0
## ob_1 0.33 -0.52 0.30 0.701 1.7
## ob_4 0.96 0.07 0.96 0.036 1.0
##
## ML1 ML2
## SS loadings 1.38 0.99
## Proportion Var 0.34 0.25
## Cumulative Var 0.34 0.59
## Proportion Explained 0.58 0.42
## Cumulative Proportion 0.58 1.00
##
## With factor correlations of
## ML1 ML2
## ML1 1.00 0.23
## ML2 0.23 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 6 with the objective function = 0.76 with Chi Square = 6316.77
## df of the model are -1 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is NA
##
## The harmonic n.obs is 8297 with the empirical chi square 0 with prob < NA
## The total n.obs was 8297 with Likelihood Chi Square = 0 with prob < NA
##
## Tucker Lewis Index of factoring reliability = 1.001
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML1 ML2
## Correlation of (regression) scores with factors 0.98 0.88
## Multiple R square of scores with factors 0.96 0.78
## Minimum correlation of possible factor scores 0.93 0.56
# Compare loadings in a tidy way. Cutoff = 0 to see all the loadings
## One-factor
print(efa_1f$loadings, cutoff = 0)
##
## Loadings:
## ML1
## oe_2 -0.543
## oe_3 0.347
## ob_1 0.170
## ob_4 0.997
##
## ML1
## SS loadings 1.439
## Proportion Var 0.360
## Two-factor
print(efa_2f$loadings, cutoff = 0)
##
## Loadings:
## ML1 ML2
## oe_2 -0.599 0.152
## oe_3 0.099 0.845
## ob_1 0.330 -0.520
## ob_4 0.963 0.070
##
## ML1 ML2
## SS loadings 1.406 1.012
## Proportion Var 0.351 0.253
## Cumulative Var 0.351 0.604
# Determine scale range to reverse OB items (for bipolar CFA)
min_scale <- min(as.matrix(vpq_items), na.rm = TRUE)
max_scale <- max(as.matrix(vpq_items), na.rm = TRUE)
db_B <- db_B %>%
mutate(
ob_1_rev = (max_scale + min_scale) - ob_1,
ob_4_rev = (max_scale + min_scale) - ob_4
)
# Subsets for models
## Mutually Exclusive (ME)
vpq_ME <- db_B %>% dplyr::select(oe_2, oe_3, ob_1_rev, ob_4_rev) %>% na.omit()
## Continuum or Complementary (CC)
vpq_CC <- db_B %>% dplyr::select(oe_2, oe_3, ob_1, ob_4) %>% na.omit()
### Independent (IND)
vpq_IND <- db_B %>% dplyr::select(oe_2, oe_3, ob_1, ob_4) %>% na.omit()
This model tested the mutually exclusive framework by specifying a single bipolar factor underlying all four VPQ items
model_ME <- '
VP_bipolar =~ oe_2 + oe_3 + ob_1_rev + ob_4_rev
'
fit_ME <- cfa(
model_ME,
data = vpq_ME,
std.lv = TRUE
)
summary(fit_ME)
## lavaan 0.6.17 ended normally after 7271 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 8
##
## Number of observations 8297
##
## Model Test User Model:
##
## Test statistic 1461.835
## Degrees of freedom 2
## P-value (Chi-square) 0.000
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## VP_bipolar =~
## oe_2 0.018 NA
## oe_3 -0.016 NA
## ob_1_rev 0.009 NA
## ob_4_rev 28.558 NA
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .oe_2 1.084 NA
## .oe_3 1.234 NA
## .ob_1_rev 1.512 NA
## .ob_4_rev -814.436 NA
## VP_bipolar 1.000
semPaths(fit_ME, whatLabels = "std", layout = "tree", edge.label.cex = 0.8)
This model tested the continuum/complementary framework by specifying a single latent continuum factor, with items coded so that higher scores reflected greater engagement in an observer-like perspective.
model_CC <- '
VP_compl =~ oe_2 + oe_3 + ob_1 + ob_4
'
fit_CC <- cfa(
model_CC,
data = vpq_CC,
std.lv = TRUE
)
summary(fit_CC)
## lavaan 0.6.17 ended normally after 7271 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 8
##
## Number of observations 8297
##
## Model Test User Model:
##
## Test statistic 1461.835
## Degrees of freedom 2
## P-value (Chi-square) 0.000
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## VP_compl =~
## oe_2 0.018 NA
## oe_3 -0.016 NA
## ob_1 -0.009 NA
## ob_4 -28.558 NA
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .oe_2 1.084 NA
## .oe_3 1.234 NA
## .ob_1 1.512 NA
## .ob_4 -814.436 NA
## VP_compl 1.000
Convergence checks
lavInspect(fit_ME, "converged")
## [1] TRUE
lavInspect(fit_CC, "converged")
## [1] TRUE
lavInspect(fit_CC_2,"converged")
## [1] TRUE
lavInspect(fit_CC_neg,"converged")
## [1] FALSE
lavInspect(fit_IND, "converged")
## [1] TRUE
Latent Covariances
We examined latent covariances of two models. In the Continuum / complementary: two positively correlated factors model we explored the question: “If we allow OE and OB to relate, how strongly do they actually do so?”. In the Independent: two uncorrelated factors we ask “What does the model assume about the relationship between OE and OB?”.
This is relevant because the observed-scale correlations between OE and OB items could be contaminated by measurement error and/or influenced by item wording and response styles. Latent covariances analysis remove item-specific noise and could reflect underlying psychological relationship between perspectives.
lavInspect(fit_CC_2, "cov.lv")
## OE OB
## OE 1.000
## OB -0.113 1.000
lavInspect(fit_IND, "cor.lv")
## OE OB
## OE 1
## OB 0 1
When allowed to covary, OE and OB showed a negligible latent association (r = -0.1132342), indicating that assuming independence between perspectives does not conflict with the data. Moreover, when OE and OB are assumed independent, the data do not contradict that assumption.
We used a simulation-based approach to evaluate how well different theoretical frameworks captured the joint distribution of OE and OB scores in the current data.
library(tidyverse)
library(MASS) # for mvrnorm
library(cowplot) # for arranging plots
# Data handling --------------------------------------------
### vpq ---------------------------------------------------------------------
vpq_db <- db_clean
# compute OE and OB scores in the cleaned data
vpq_db$OE_score <- rowMeans(vpq_db[, c("oe_2", "oe_3")], na.rm = TRUE)
vpq_db$OB_score <- rowMeans(vpq_db[, c("ob_1", "ob_4")], na.rm = TRUE)
vpq_db <- vpq_db %>%
mutate(
OE_z = as.numeric(scale(OE_score)),
OB_z = as.numeric(scale(OB_score))
)
# 1. Load data
# Compute OE and OB scores from items
vpq_db <- vpq_db %>%
mutate(
OE_score = rowMeans(across(c(oe_2, oe_3)), na.rm = TRUE),
OB_score = rowMeans(across(c(ob_1, ob_4)), na.rm = TRUE)
) %>%
mutate(
OE_z = as.numeric(scale(OE_score)),
OB_z = as.numeric(scale(OB_score))
)
# real data -----------------------------------------------------------
real_df <- vpq_db %>%
dplyr::mutate(
OE = OE_z,
OB = OB_z,
framework = "Real data"
) %>%
dplyr::select(OE, OB, framework)
N <- nrow(real_df)
# simulations -----------------------------------------------------------
simulate_framework <- function(N, rho, label){
Sigma <- matrix(c(1, rho, rho, 1), 2, 2)
raw <- MASS::mvrnorm(N, mu = c(0,0), Sigma = Sigma)
tibble(
OE = raw[,1],
OB = raw[,2],
framework = label
)
}
sim_independent <- simulate_framework(N, -0.30, "Independent")
sim_continuum <- simulate_framework(N, -0.60, "Continuum")
sim_mutual <- simulate_framework(N, -0.90, "Mutually exclusive")
# Combine *all* frameworks
all_df <- dplyr::bind_rows(real_df, sim_independent, sim_continuum, sim_mutual)
set.seed(123)
all_df_plot <- all_df %>%
mutate(
OE_plot = if_else(
framework == "Real data",
OE + rnorm(n(), 0, 0.35), # jitter only real
OE
),
OB_plot = if_else(
framework == "Real data",
OB + rnorm(n(), 0, 0.35),
OB
)
)
x_limits <- range(all_df_plot$OE_plot, na.rm = TRUE)
y_limits <- range(all_df_plot$OB_plot, na.rm = TRUE)
We compared the empirical association observed in the VPQ data against formalized simulations derived from the three hypothesized frameworks. Each framework makes distinct predictions about the strength and structure of the relationship between OE and OB, allowing the observed data to be evaluated relative to theoretically constrained benchmarks rather than in isolation.
Three frameworks were operationalized using bivariate normal simulations with unit variances and varying correlations between OE and OB. The independent framework was modelled with a weak negative correlation (ρ = −.30), reflecting the assumption that OE and OB are largely separable dimensions that can co-occur with limited constraint. The continuum framework was modelled with a moderate negative correlation (ρ = −.60), consistent with the view that OE and OB reflect opposing tendencies along a single dimension, such that increases in one are typically accompanied by decreases in the other. Finally, the mutually exclusive framework was modelled with a strong negative correlation (ρ = −.90), representing the theoretical position that adopting one perspective largely precludes the other.
# 1. Real data: OE / OB scores and z-scores-----------------------------------------
vpq_db <- vpq_db %>%
dplyr::mutate(
OE_score = rowMeans(dplyr::across(c(oe_2, oe_3)), na.rm = TRUE),
OB_score = rowMeans(dplyr::across(c(ob_1, ob_4)), na.rm = TRUE)
) %>%
dplyr::mutate(
OE_z = as.numeric(scale(OE_score)),
OB_z = as.numeric(scale(OB_score))
)
# Real data in z-space for later binding
real_df <- vpq_db %>%
dplyr::transmute(
OE = OE_z,
OB = OB_z,
framework = "Real data"
)
N <- nrow(real_df)
# 2. Simulate the three theoretical frameworks -------------------------------------------------------------------------
simulate_framework <- function(N, rho, label){
Sigma <- matrix(c(1, rho, rho, 1), 2, 2)
raw <- MASS::mvrnorm(N, mu = c(0, 0), Sigma = Sigma)
tibble(
OE = raw[, 1],
OB = raw[, 2],
framework = label
)
}
sim_independent <- simulate_framework(N, -0.30, "Independent")
sim_continuum <- simulate_framework(N, -0.60, "Continuum")
sim_mutual <- simulate_framework(N, -0.90, "Mutually exclusive")
# 3. Combine real + simulated-------------------------------------------------------------------------
quad_all <- dplyr::bind_rows(
real_df,
sim_independent,
sim_continuum,
sim_mutual
)
# EXTREME QUADRANTS
# 4. Extreme quadrant thresholds (±1 SD) in z-space -----------------------
# Since OE and OB in quad_all are z-scores, ±1 SD corresponds to ±1.
thr_OE_hi <- 1
thr_OE_lo <- -1
thr_OB_hi <- 1
thr_OB_lo <- -1
# 5. Apply extreme quadrant rules to ALL frameworks -----------------------
quad_all <- quad_all %>%
dplyr::mutate(
group = dplyr::case_when(
OE >= thr_OE_hi & OB >= thr_OB_hi ~ "High_OE_High_OB",
OE <= thr_OE_lo & OB <= thr_OB_lo ~ "Low_OE_Low_OB",
OE >= thr_OE_hi & OB <= thr_OB_lo ~ "High_OE_Low_OB",
OE <= thr_OE_lo & OB >= thr_OB_hi ~ "Low_OE_High_OB",
TRUE ~ "Medium_or_Mixed"
)
)
# 6. Summary table: counts and proportion-----------------------------------------
quad_summary <- quad_all %>%
dplyr::count(framework, group) %>%
dplyr::group_by(framework) %>%
dplyr::mutate(prop = n / sum(n)) %>%
dplyr::ungroup()
quad_summary
## # A tibble: 18 × 4
## framework group n prop
## <chr> <chr> <int> <dbl>
## 1 Continuum High_OE_High_OB 43 0.00259
## 2 Continuum High_OE_Low_OB 1199 0.0723
## 3 Continuum Low_OE_High_OB 1265 0.0762
## 4 Continuum Low_OE_Low_OB 24 0.00145
## 5 Continuum Medium_or_Mixed 14063 0.847
## 6 Independent High_OE_High_OB 151 0.00910
## 7 Independent High_OE_Low_OB 806 0.0486
## 8 Independent Low_OE_High_OB 827 0.0498
## 9 Independent Low_OE_Low_OB 191 0.0115
## 10 Independent Medium_or_Mixed 14619 0.881
## 11 Mutually exclusive High_OE_Low_OB 1860 0.112
## 12 Mutually exclusive Low_OE_High_OB 1921 0.116
## 13 Mutually exclusive Medium_or_Mixed 12813 0.772
## 14 Real data High_OE_High_OB 161 0.00970
## 15 Real data High_OE_Low_OB 805 0.0485
## 16 Real data Low_OE_High_OB 903 0.0544
## 17 Real data Low_OE_Low_OB 198 0.0119
## 18 Real data Medium_or_Mixed 14527 0.875
# 7. Jittered version for plotting (real data only) ------------------------
set.seed(123)
quad_all_plot <- quad_all %>%
dplyr::mutate(
OE_plot = dplyr::if_else(
framework == "Real data",
OE + rnorm(dplyr::n(), 0, 0.35),
OE
),
OB_plot = dplyr::if_else(
framework == "Real data",
OB + rnorm(dplyr::n(), 0, 0.35),
OB
)
)
We tested whether the joint distribution of OE and OB scores was better characterized by statistical independence or by a complementary, quasi-bipolar relation by comparing quadrant frequencies from the real data with those generated under each theoretical framework.
# Odds ratios for extreme quadrants only ----------------------------------
compute_or_extreme <- function(counts_df, quadrant_name, sim_label,
real_label = "Real data") {
real_df <- counts_df %>% dplyr::filter(framework == real_label)
sim_df <- counts_df %>% dplyr::filter(framework == sim_label)
# counts for the target quadrant
a <- real_df$n[real_df$quadrant == quadrant_name]
c <- sim_df$n[sim_df$quadrant == quadrant_name]
# handle absent quadrants
if (length(a) == 0) a <- 0
if (length(c) == 0) c <- 0
b <- sum(real_df$n) - a
d <- sum(sim_df$n) - c
# Haldane correction
a <- a + 0.5; b <- b + 0.5
c <- c + 0.5; d <- d + 0.5
OR <- (a / b) / (c / d)
tibble::tibble(
quadrant = quadrant_name,
comparison = paste(real_label, "vs", sim_label),
OR = OR
)
}
quad_profiles <- quad_summary %>%
dplyr::rename(quadrant = group)
extreme_quadrants <- c("High_OE_High_OB", "Low_OE_Low_OB")
sim_frameworks <- c("Independent", "Continuum", "Mutually exclusive")
odds_ratio_results <- purrr::map_dfr(sim_frameworks, function(sim_lab) {
purrr::map_dfr(extreme_quadrants, function(q) {
compute_or_extreme(quad_profiles, q, sim_lab)
})
})
odds_ratio_results
## # A tibble: 6 × 3
## quadrant comparison OR
## <chr> <chr> <dbl>
## 1 High_OE_High_OB Real data vs Independent 1.07
## 2 Low_OE_Low_OB Real data vs Independent 1.04
## 3 High_OE_High_OB Real data vs Continuum 3.74
## 4 Low_OE_Low_OB Real data vs Continuum 8.19
## 5 High_OE_High_OB Real data vs Mutually exclusive 326.
## 6 Low_OE_Low_OB Real data vs Mutually exclusive 402.
Each odds ratio compares how often an extreme quadrant appears in the real data relative to how often it would appear if a given theoretical framework were true.
ORs ≈ 1: The real data match the theoretical prediction
ORs > 1: The quadrant is more frequent in the real data than the theoretical predicts. The framework generates too few cases in that quadrant compared to what is observed
ORs <1: The quadrant is less frequent in the real data than the theory predicts. The framework overpredicts that pattern
db_original <- read_csv("MT_2026.csv", show_col_types = FALSE) %>%
clean_names() %>%
mutate(
OE = rowMeans(across(c(oe_2, oe_3)), na.rm = TRUE),
OB = rowMeans(across(c(ob_1, ob_4)), na.rm = TRUE)
)
total_db_original <- nrow(db_original)
# Missing -----------------------------------------------------------------
db_complete <- na.omit(db_original)
missing_cases <- nrow(db_original) - nrow(db_complete)
# VVIQ descriptive before vviq < 32 removed ---------------------------------------------------------
ggplot(db_complete, aes(x = vviq_total)) +
geom_density(linewidth = 1) +
geom_vline(xintercept = 32, colour = "red", linewidth = 1) +
labs(
x = "VVIQ total score",
y = "Density",
title = "Density distribution of VVIQ scores"
) +
theme_minimal()
# Aphantasia cases ---------------------------------------------------------
db_aph <- db_complete %>% filter(vviq_total >= 32)
aphantasia_cases <- nrow(db_complete) - nrow(db_aph)
db <- db_aph
# Total participants after filtering
total <- nrow(db)
We explored the relationship between the VVIQ and VPQ quartiles distribution. A total of 1928 participants completed online versions of the VPQ and the VVIQ. Of the total, 294 were excluded due to missing data. Another 73 were excluded due to be identified as possible aphantasia cases (VVIQ scores below 32). The final sample size was 1561
Using the same approach as before, we calculated the quadrants for the new sample.
# Quadrants (z-score OE/OB, ±0.5 SD cutoffs) -------------------------------
quad_all <- db %>%
mutate(
OE_z = as.numeric(scale(OE)),
OB_z = as.numeric(scale(OB))
)
# --- FIXED THRESHOLD (used in BOTH datasets) ---
thr_OE_hi <- 0.5
thr_OE_lo <- -0.5
thr_OB_hi <- 0.5
thr_OB_lo <- -0.5
quad_all <- quad_all %>%
mutate(
group = case_when(
OE_z >= thr_OE_hi & OB_z >= thr_OB_hi ~ "High_OE_High_OB",
OE_z <= thr_OE_lo & OB_z <= thr_OB_lo ~ "Low_OE_Low_OB",
OE_z >= thr_OE_hi & OB_z <= thr_OB_lo ~ "High_OE_Low_OB",
OE_z <= thr_OE_lo & OB_z >= thr_OB_hi ~ "Low_OE_High_OB",
TRUE ~ "Medium_or_Mixed"
)
)
quad_counts_MT_2026 <- quad_all %>% count(group, name = "n")
quad_counts_MT_2026
## # A tibble: 5 × 2
## group n
## <chr> <int>
## 1 High_OE_High_OB 59
## 2 High_OE_Low_OB 335
## 3 Low_OE_High_OB 117
## 4 Low_OE_Low_OB 54
## 5 Medium_or_Mixed 996
# --- Prep: VVIQ z + keep 4 quadrants -----------------------------------------
quad_levels <- c(
"High_OE_High_OB",
"Low_OE_Low_OB",
"High_OE_Low_OB",
"Low_OE_High_OB"
)
db_quad4 <- quad_all %>%
mutate(vviq_z = as.numeric(scale(vviq_total))) %>%
filter(group %in% quad_levels) %>%
mutate(
extreme_consistent = as.integer(
group %in% c("High_OE_High_OB", "Low_OE_Low_OB")
),
quad4 = relevel(factor(group, levels = quad_levels),
ref = "High_OE_Low_OB")
)
The binary logistic regression examines whether imagery vividness is associated with the alignment of own-eyes and observer perspectives when participants occupy extreme regions of the OE–OB space. By collapsing extreme quadrants into consistent profiles (High OE–High OB and Low OE–Low OB) versus inconsistent profiles (High OE–Low OB and Low OE–High OB), the model tests whether higher vividness predicts a greater likelihood that both perspectives are jointly high or jointly low rather than mismatched. Thus, the regression does not assess perspective strength per se, but rather whether imagery vividness is related to the coherence between own-eyes and observer perspectives under a fixed quadrant definition.
logit_mod <- glm(
extreme_consistent ~ vviq_z,
data = db_quad4,
family = binomial(link = "logit")
)
summary(logit_mod)
##
## Call:
## glm(formula = extreme_consistent ~ vviq_z, family = binomial(link = "logit"),
## data = db_quad4)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.3866 0.1057 -13.119 <2e-16 ***
## vviq_z 0.2032 0.1098 1.851 0.0641 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 565.45 on 564 degrees of freedom
## Residual deviance: 561.98 on 563 degrees of freedom
## AIC: 565.98
##
## Number of Fisher Scoring iterations: 4
exp(cbind(OR = coef(logit_mod), confint(logit_mod)))
## OR 2.5 % 97.5 %
## (Intercept) 0.2499327 0.2022433 0.3061936
## vviq_z 1.2253445 0.9897204 1.5230866
The multinomial logistic regression extends the binary analysis by examining how imagery vividness relates to specific extreme perspective configurations rather than overall consistency. Using the High OE–Low OB quadrant as the reference category, the model estimates whether increases in vividness differentially shift the likelihood of belonging to High OE–High OB, Low OE–Low OB, or Low OE–High OB profiles. This analysis decomposes the binary consistency effect, allowing identification of which particular quadrant transitions contribute to the overall tendency toward aligned versus misaligned perspective profiles, and clarifies whether imagery vividness selectively favors certain configurations or acts more uniformly across the extreme OE–OB space.
library(nnet)
multi_mod <- multinom(
quad4 ~ vviq_z,
data = db_quad4,
trace = FALSE
)
summary(multi_mod)
## Call:
## multinom(formula = quad4 ~ vviq_z, data = db_quad4, trace = FALSE)
##
## Coefficients:
## (Intercept) vviq_z
## High_OE_High_OB -1.726333 0.2001968
## Low_OE_Low_OB -1.827197 0.3241902
## Low_OE_High_OB -1.042255 0.2139621
##
## Std. Errors:
## (Intercept) vviq_z
## High_OE_High_OB 0.1415455 0.1467977
## Low_OE_Low_OB 0.1485536 0.1547399
## Low_OE_High_OB 0.1078279 0.1118537
##
## Residual Deviance: 1231.256
## AIC: 1243.256
exp(coef(multi_mod))
## (Intercept) vviq_z
## High_OE_High_OB 0.1779357 1.221643
## Low_OE_Low_OB 0.1608638 1.382910
## Low_OE_High_OB 0.3526585 1.238576
Interpretation: Estimate (β) : The change in the log-odds of the outcome for a one-unit increase in the predictor.
Odds Ratio (OR = exp(β)) : Converts log-odds into an interpretable scale.
OR > 1: odds increase.
OR = 1: no change.
OR < 1: odds decrease.
Intercept: The log-odds (or OR) of the outcome when the predictor is 0.
Standard Error (SE): Reflects uncertainty in the coefficient estimate. Larger SEs indicate less precise estimates.
z value: Ratio of Estimate to SE. Used to compute the p-value.
p-value: Probability of observing an effect at least as extreme if the true effect were zero. Used to assess statistical evidence for an effect.
Confidence Interval (CI): Range of plausible OR values. If the CI includes 1, the effect is not statistically distinguishable from no effect.
# Read raw data
db <- read_csv("opam.csv") %>%
clean_names()
db <- db %>%
mutate(
# VPQ scores
OE = rowMeans(across(c(oe_2, oe_3)), na.rm = TRUE),
OB = rowMeans(across(c(ob_1, ob_4)), na.rm = TRUE)
)
# vviq transformations
vviq_min <- min(db$vviq_total, na.rm = TRUE)
vviq_max <- max(db$vviq_total, na.rm = TRUE)
db$vviq_inv <- (vviq_max + vviq_min) - db$vviq_total
#check
summary(db$vviq_total)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 16.00 50.00 57.00 56.31 64.00 80.00
summary(db$vviq_inv)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 16.00 32.00 39.00 39.69 46.00 80.00
cor(db$vviq_total, db$vviq_inv, use = "complete.obs") # Should be -1
## [1] -1
# # vviq_descriptive --------------------------------------------------------
#
ggplot(db, aes(x = vviq_inv)) +
geom_density(linewidth = 1) +
geom_vline(xintercept = 32, colour = "red", linewidth = 1) +
labs(
x = "VVIQ total score",
y = "Density",
title = "Density distribution of VVIQ scores"
) +
theme_minimal()
## Aphantasia cases
#
db <- db %>%
filter(vviq_inv >= 32)
# 1) Compute cuts from the CURRENT db (raw OE/OB)
mu_OE <- mean(db$OE, na.rm = TRUE)
sd_OE <- sd(db$OE, na.rm = TRUE)
mu_OB <- mean(db$OB, na.rm = TRUE)
sd_OB <- sd(db$OB, na.rm = TRUE)
k_sd <- 0.5 # keep as before to increase representation
high_OE_cut <- mu_OE + k_sd * sd_OE
low_OE_cut <- mu_OE - k_sd * sd_OE
high_OB_cut <- mu_OB + k_sd * sd_OB
low_OB_cut <- mu_OB - k_sd * sd_OB
# 2) Assign quadrants + mixed using those cuts
db_quartiles <- db %>%
mutate(
VP_quartile = case_when(
OE >= high_OE_cut & OB >= high_OB_cut ~ "High_OE_High_OB",
OE <= low_OE_cut & OB <= low_OB_cut ~ "Low_OE_Low_OB",
OE >= high_OE_cut & OB <= low_OB_cut ~ "High_OE_Low_OB",
OE <= low_OE_cut & OB >= high_OB_cut ~ "Low_OE_High_OB",
TRUE ~ "Mixed"
),
VP_quartile = factor(
VP_quartile,
levels = c(
"High_OE_High_OB",
"Low_OE_Low_OB",
"High_OE_Low_OB",
"Low_OE_High_OB",
"Mixed"
)
)
)
# 3) Counts + proportions
vp_counts <- db_quartiles %>%
count(VP_quartile, name = "n") %>%
mutate(prop = n / sum(n))
vp_counts
## # A tibble: 5 × 3
## VP_quartile n prop
## <fct> <int> <dbl>
## 1 High_OE_High_OB 11 0.0373
## 2 Low_OE_Low_OB 3 0.0102
## 3 High_OE_Low_OB 62 0.210
## 4 Low_OE_High_OB 63 0.214
## 5 Mixed 156 0.529
# keep the cut values handy for plotting
cuts <- tibble(
k_sd = k_sd,
low_OE_cut = low_OE_cut,
high_OE_cut = high_OE_cut,
low_OB_cut = low_OB_cut,
high_OB_cut = high_OB_cut
)
cuts
## # A tibble: 1 × 5
## k_sd low_OE_cut high_OE_cut low_OB_cut high_OB_cut
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.5 2.93 3.85 1.90 2.93
quad_levels <- c(
"High_OE_High_OB",
"Low_OE_Low_OB",
"High_OE_Low_OB",
"Low_OE_High_OB"
)
db_quad4 <- db_quartiles %>%
mutate(vviq_z = as.numeric(scale(vviq_inv))) %>%
filter(VP_quartile %in% quad_levels) %>%
mutate(
extreme_consistent = as.integer(VP_quartile %in% c("High_OE_High_OB", "Low_OE_Low_OB")),
quad4 = relevel(factor(VP_quartile, levels = quad_levels), ref = "High_OE_Low_OB")
)
logit_mod <- glm(
extreme_consistent ~ vviq_z,
data = db_quad4,
family = binomial(link = "logit")
)
summary(logit_mod)
##
## Call:
## glm(formula = extreme_consistent ~ vviq_z, family = binomial(link = "logit"),
## data = db_quad4)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.3344 0.3112 -7.502 6.31e-14 ***
## vviq_z 0.4730 0.2235 2.116 0.0343 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 90.812 on 138 degrees of freedom
## Residual deviance: 86.612 on 137 degrees of freedom
## AIC: 90.612
##
## Number of Fisher Scoring iterations: 5
# odds ratios and CIs
exp(cbind(OR = coef(logit_mod), confint(logit_mod)))
## OR 2.5 % 97.5 %
## (Intercept) 0.09687267 0.04950845 0.1697893
## vviq_z 1.60486922 1.02194654 2.5106769
multi_mod <- nnet::multinom(
quad4 ~ vviq_z,
data = db_quad4
)
## # weights: 12 (6 variable)
## initial value 192.694916
## iter 10 value 135.544047
## final value 135.486234
## converged
summary(multi_mod)
## Call:
## nnet::multinom(formula = quad4 ~ vviq_z, data = db_quad4)
##
## Coefficients:
## (Intercept) vviq_z
## High_OE_High_OB -2.00200529 0.61997136
## Low_OE_Low_OB -3.17841144 -0.65103326
## Low_OE_High_OB 0.01642012 -0.03949422
##
## Std. Errors:
## (Intercept) vviq_z
## High_OE_High_OB 0.3845583 0.2572050
## Low_OE_Low_OB 0.6875981 0.8111332
## Low_OE_High_OB 0.1789328 0.1816985
##
## Residual Deviance: 270.9725
## AIC: 282.9725
# odds ratios for multinomial model
exp(coef(multi_mod))
## (Intercept) vviq_z
## High_OE_High_OB 0.13506417 1.8588748
## Low_OE_Low_OB 0.04165177 0.5215066
## Low_OE_High_OB 1.01655567 0.9612755