Scenario 1: 40 vs 400

Simulate data with 40 items on factor 1 and 400 on factor 2.

d1 <- NULL
d2 <- NULL
total = tibble(value=integer(), item=character(), Part=character(), task=character())
for(i in 1:1000){
  df1 <- NULL
  df2 <- NULL
  df3 <- NULL
  df4 <- NULL
  df5 <- NULL
  df6 <- NULL
  df7 <- NULL
  d1 <- rbeta(1, 2, 2)
  d2 <- rbeta(1, 2, 2)
  for(j in 1:40){
    df1[j] = rbinom(1,1, d1)
    df2[j] = paste("A", j)
  }
  df1 <- tibble(value = df1)
  df2 <- tibble(item = df2)
  df3 = bind_cols(df1, df2)
  for(j in 1:400){
    df4[j] = rbinom(1,1, d2)
    df5[j] = paste("B", j)
  }
  df4 = tibble(value=df4)
  df5 = tibble(item = df5)
  df6 = bind_cols(df4, df5) 
  df7 <- bind_rows(df3, df6)
  df7$Part = paste("Part", i)
  df7$task = c(rep("A", 40), rep("B", 400))
  total <- bind_rows(total, df7)
}

Sanity check: are the two factors indeed uncorrelated.

total %>%
  group_by(Part, task) %>%
  mutate(
    total = sum(value)
  ) %>%
  slice(1) %>%
  ungroup() %>%
  dplyr::select(-c("value", "item")) %>%
  pivot_wider(id_cols=Part, names_from = task, values_from = total) %>%
  ggplot(aes(A, B)) + geom_point()

Make wide data

ttl1 <- total %>%
  dplyr::select(-"task") %>%
  pivot_wider(id_cols=Part, names_from=item, values_from=value) %>%
  dplyr::select(-"Part")

Conf detect with correct factor structure.

dtct <- c(rep(1, 40), rep(2, 400))

ttl2 <- data.frame(ttl1)
fscores = rowSums(ttl2)

#conf1 <- conf.detect(ttl2, fscores,  dtct)

#write_rds(conf1, "conf1.rds")
conf1 <- read_rds("conf1.rds")

conf1$detect

Detect suggests uni-dimesnionality

Exploratory detect– clustering looks right but DETECT index small.

#expl1 <- expl.detect(ttl2, fscores, nclusters=2, use_sum_score=TRUE, seed=1)

#write_rds(expl1, "expl1.rds")
expl1 <- read_rds("expl1.rds")

expl1$detect.unweighted

##     DETECT.val  ASSI.val RATIO.val MADCOV100.val MCOV100.val
## Cl2 0.05352761 0.0534065 0.1028308     0.5205407 -0.06159096

graphics::plot(expl1$clusterfit, main = paste("Cluster Dendogram with ", 
        2, " Clusters", sep = ""))
stats::rect.hclust(expl1$clusterfit, k = 2, border = "red")

Scenario 2: 400 vs 400

Simulate data with 40 items on factor 1 and 400 on factor 2.

d1 <- NULL
d2 <- NULL
total = tibble(value=integer(), item=character(), Part=character(), task=character())
for(i in 1:1000){
  df1 <- NULL
  df2 <- NULL
  df3 <- NULL
  df4 <- NULL
  df5 <- NULL
  df6 <- NULL
  df7 <- NULL
  d1 <- rbeta(1, 2, 2)
  d2 <- rbeta(1, 2, 2)
  for(j in 1:400){
    df1[j] = rbinom(1,1, d1)
    df2[j] = paste("A", j)
  }
  df1 <- tibble(value = df1)
  df2 <- tibble(item = df2)
  df3 = bind_cols(df1, df2)
  for(j in 1:400){
    df4[j] = rbinom(1,1, d2)
    df5[j] = paste("B", j)
  }
  df4 = tibble(value=df4)
  df5 = tibble(item = df5)
  df6 = bind_cols(df4, df5) 
  df7 <- bind_rows(df3, df6)
  df7$Part = paste("Part", i)
  df7$task = c(rep("A", 400), rep("B", 400))
  total <- bind_rows(total, df7)
}

Make wide data

ttl1 <- total %>%
  dplyr::select(-"task") %>%
  pivot_wider(id_cols=Part, names_from=item, values_from=value) %>%
  dplyr::select(-"Part")

Conf detect with correct factor structure.

dtct <- c(rep(1, 400), rep(2, 400))

ttl2 <- data.frame(ttl1)
fscores = rowSums(ttl2)

#conf2 <- conf.detect(ttl2, fscores,  dtct)

#write_rds(conf2, "conf2.rds")
conf2 <- read_rds("conf2.rds")

conf2$detect

400 by 400 seems to give sensible results.

Scenario 3: 40 vs 40

d1 <- NULL
d2 <- NULL
total = tibble(value=integer(), item=character(), Part=character(), task=character())
for(i in 1:1000){
  df1 <- NULL
  df2 <- NULL
  df3 <- NULL
  df4 <- NULL
  df5 <- NULL
  df6 <- NULL
  df7 <- NULL
  d1 <- rbeta(1, 2, 2)
  d2 <- rbeta(1, 2, 2)
  for(j in 1:40){
    df1[j] = rbinom(1,1, d1)
    df2[j] = paste("A", j)
  }
  df1 <- tibble(value = df1)
  df2 <- tibble(item = df2)
  df3 = bind_cols(df1, df2)
  for(j in 1:40){
    df4[j] = rbinom(1,1, d2)
    df5[j] = paste("B", j)
  }
  df4 = tibble(value=df4)
  df5 = tibble(item = df5)
  df6 = bind_cols(df4, df5) 
  df7 <- bind_rows(df3, df6)
  df7$Part = paste("Part", i)
  df7$task = c(rep("A", 40), rep("B", 40))
  total <- bind_rows(total, df7)
}

Make wide data

ttl1 <- total %>%
  dplyr::select(-"task") %>%
  pivot_wider(id_cols=Part, names_from=item, values_from=value) %>%
  dplyr::select(-"Part")

Conf detect with correct factor structure.

dtct <- c(rep(1, 40), rep(2, 40))

ttl2 <- data.frame(ttl1)
fscores = rowSums(ttl2)

conf3 <- conf.detect(ttl2, fscores,  dtct)

## -----------------------------------------------------------
## Confirmatory DETECT Analysis 
## Conditioning on 1 Score
## Bandwidth Scale: 1.1 
## Pairwise Estimation of Conditional Covariances
## ...........................................................
## Nonparametric ICC estimation 
##  5% 10% 15% 20% 25% 30% 35% 40% 45% 50% 
## 55% 60% 65% 70% 75% 80% 85% 90% 95% 
## ...........................................................
## Nonparametric Estimation of conditional covariances 
##  5% 10% 15% 20% 25% 30% 35% 40% 45% 50% 
## 55% 60% 65% 70% 75% 80% 85% 90% 95% 
## -----------------------------------------------------------
##           unweighted weighted
## DETECT         2.543    2.543
## ASSI           1.000    1.000
## RATIO          1.000    1.000
## MADCOV100      2.543    2.543
## MCOV100       -0.251   -0.251

Works ok – gives similar value to 400 vs 400

Scenario 4: 4 vs 40

d1 <- NULL
d2 <- NULL
total = tibble(value=integer(), item=character(), Part=character(), task=character())
for(i in 1:1000){
  df1 <- NULL
  df2 <- NULL
  df3 <- NULL
  df4 <- NULL
  df5 <- NULL
  df6 <- NULL
  df7 <- NULL
  d1 <- rbeta(1, 2, 2)
  d2 <- rbeta(1, 2, 2)
  for(j in 1:4){
    df1[j] = rbinom(1,1, d1)
    df2[j] = paste("A", j)
  }
  df1 <- tibble(value = df1)
  df2 <- tibble(item = df2)
  df3 = bind_cols(df1, df2)
  for(j in 1:40){
    df4[j] = rbinom(1,1, d2)
    df5[j] = paste("B", j)
  }
  df4 = tibble(value=df4)
  df5 = tibble(item = df5)
  df6 = bind_cols(df4, df5) 
  df7 <- bind_rows(df3, df6)
  df7$Part = paste("Part", i)
  df7$task = c(rep("A", 4), rep("B", 40))
  total <- bind_rows(total, df7)
}

Make wide data

ttl1 <- total %>%
  dplyr::select(-"task") %>%
  pivot_wider(id_cols=Part, names_from=item, values_from=value) %>%
  dplyr::select(-"Part")

Conf detect with correct factor structure.

dtct <- c(rep(1, 4), rep(2, 40))

ttl2 <- data.frame(ttl1)
fscores = rowSums(ttl2)

conf3 <- conf.detect(ttl2, fscores,  dtct)

## -----------------------------------------------------------
## Confirmatory DETECT Analysis 
## Conditioning on 1 Score
## Bandwidth Scale: 1.1 
## Pairwise Estimation of Conditional Covariances
## ...........................................................
## Nonparametric ICC estimation 
##  5% 10% 15% 20% 25% 30% 35% 40% 45% 50% 
## 55% 60% 65% 70% 75% 80% 85% 90% 95% 
## ...........................................................
## Nonparametric Estimation of conditional covariances 
##  5% 10% 15% 20% 25% 30% 35% 40% 45% 50% 
## 55% 60% 65% 70% 75% 80% 85% 90% 95% 
## -----------------------------------------------------------
##           unweighted weighted
## DETECT        -0.091   -0.091
## ASSI          -0.195   -0.195
## RATIO         -0.137   -0.137
## MADCOV100      0.667    0.667
## MCOV100       -0.414   -0.414

Doesn’t detect multidimensionality.

So I think the issue is the imbalance in cluster sizes not the ratio of items to participants, though that may not help.

R Notebook

Scenario 1: 40 vs 400

Scenario 2: 400 vs 400

Scenario 3: 40 vs 40

Scenario 4: 4 vs 40