Simulate data with 40 items on factor 1 and 400 on factor 2.
d1 <- NULL
d2 <- NULL
total = tibble(value=integer(), item=character(), Part=character(), task=character())
for(i in 1:1000){
df1 <- NULL
df2 <- NULL
df3 <- NULL
df4 <- NULL
df5 <- NULL
df6 <- NULL
df7 <- NULL
d1 <- rbeta(1, 2, 2)
d2 <- rbeta(1, 2, 2)
for(j in 1:40){
df1[j] = rbinom(1,1, d1)
df2[j] = paste("A", j)
}
df1 <- tibble(value = df1)
df2 <- tibble(item = df2)
df3 = bind_cols(df1, df2)
for(j in 1:400){
df4[j] = rbinom(1,1, d2)
df5[j] = paste("B", j)
}
df4 = tibble(value=df4)
df5 = tibble(item = df5)
df6 = bind_cols(df4, df5)
df7 <- bind_rows(df3, df6)
df7$Part = paste("Part", i)
df7$task = c(rep("A", 40), rep("B", 400))
total <- bind_rows(total, df7)
}
Sanity check: are the two factors indeed uncorrelated.
total %>%
group_by(Part, task) %>%
mutate(
total = sum(value)
) %>%
slice(1) %>%
ungroup() %>%
dplyr::select(-c("value", "item")) %>%
pivot_wider(id_cols=Part, names_from = task, values_from = total) %>%
ggplot(aes(A, B)) + geom_point()
Make wide data
ttl1 <- total %>%
dplyr::select(-"task") %>%
pivot_wider(id_cols=Part, names_from=item, values_from=value) %>%
dplyr::select(-"Part")
Conf detect with correct factor structure.
dtct <- c(rep(1, 40), rep(2, 400))
ttl2 <- data.frame(ttl1)
fscores = rowSums(ttl2)
#conf1 <- conf.detect(ttl2, fscores, dtct)
#write_rds(conf1, "conf1.rds")
conf1 <- read_rds("conf1.rds")
conf1$detect
Detect suggests uni-dimesnionality
Exploratory detect– clustering looks right but DETECT index small.
#expl1 <- expl.detect(ttl2, fscores, nclusters=2, use_sum_score=TRUE, seed=1)
#write_rds(expl1, "expl1.rds")
expl1 <- read_rds("expl1.rds")
expl1$detect.unweighted
## DETECT.val ASSI.val RATIO.val MADCOV100.val MCOV100.val
## Cl2 0.05352761 0.0534065 0.1028308 0.5205407 -0.06159096
graphics::plot(expl1$clusterfit, main = paste("Cluster Dendogram with ",
2, " Clusters", sep = ""))
stats::rect.hclust(expl1$clusterfit, k = 2, border = "red")
Simulate data with 40 items on factor 1 and 400 on factor 2.
d1 <- NULL
d2 <- NULL
total = tibble(value=integer(), item=character(), Part=character(), task=character())
for(i in 1:1000){
df1 <- NULL
df2 <- NULL
df3 <- NULL
df4 <- NULL
df5 <- NULL
df6 <- NULL
df7 <- NULL
d1 <- rbeta(1, 2, 2)
d2 <- rbeta(1, 2, 2)
for(j in 1:400){
df1[j] = rbinom(1,1, d1)
df2[j] = paste("A", j)
}
df1 <- tibble(value = df1)
df2 <- tibble(item = df2)
df3 = bind_cols(df1, df2)
for(j in 1:400){
df4[j] = rbinom(1,1, d2)
df5[j] = paste("B", j)
}
df4 = tibble(value=df4)
df5 = tibble(item = df5)
df6 = bind_cols(df4, df5)
df7 <- bind_rows(df3, df6)
df7$Part = paste("Part", i)
df7$task = c(rep("A", 400), rep("B", 400))
total <- bind_rows(total, df7)
}
Make wide data
ttl1 <- total %>%
dplyr::select(-"task") %>%
pivot_wider(id_cols=Part, names_from=item, values_from=value) %>%
dplyr::select(-"Part")
Conf detect with correct factor structure.
dtct <- c(rep(1, 400), rep(2, 400))
ttl2 <- data.frame(ttl1)
fscores = rowSums(ttl2)
#conf2 <- conf.detect(ttl2, fscores, dtct)
#write_rds(conf2, "conf2.rds")
conf2 <- read_rds("conf2.rds")
conf2$detect
400 by 400 seems to give sensible results.
d1 <- NULL
d2 <- NULL
total = tibble(value=integer(), item=character(), Part=character(), task=character())
for(i in 1:1000){
df1 <- NULL
df2 <- NULL
df3 <- NULL
df4 <- NULL
df5 <- NULL
df6 <- NULL
df7 <- NULL
d1 <- rbeta(1, 2, 2)
d2 <- rbeta(1, 2, 2)
for(j in 1:40){
df1[j] = rbinom(1,1, d1)
df2[j] = paste("A", j)
}
df1 <- tibble(value = df1)
df2 <- tibble(item = df2)
df3 = bind_cols(df1, df2)
for(j in 1:40){
df4[j] = rbinom(1,1, d2)
df5[j] = paste("B", j)
}
df4 = tibble(value=df4)
df5 = tibble(item = df5)
df6 = bind_cols(df4, df5)
df7 <- bind_rows(df3, df6)
df7$Part = paste("Part", i)
df7$task = c(rep("A", 40), rep("B", 40))
total <- bind_rows(total, df7)
}
Make wide data
ttl1 <- total %>%
dplyr::select(-"task") %>%
pivot_wider(id_cols=Part, names_from=item, values_from=value) %>%
dplyr::select(-"Part")
Conf detect with correct factor structure.
dtct <- c(rep(1, 40), rep(2, 40))
ttl2 <- data.frame(ttl1)
fscores = rowSums(ttl2)
conf3 <- conf.detect(ttl2, fscores, dtct)
## -----------------------------------------------------------
## Confirmatory DETECT Analysis
## Conditioning on 1 Score
## Bandwidth Scale: 1.1
## Pairwise Estimation of Conditional Covariances
## ...........................................................
## Nonparametric ICC estimation
## 5% 10% 15% 20% 25% 30% 35% 40% 45% 50%
## 55% 60% 65% 70% 75% 80% 85% 90% 95%
## ...........................................................
## Nonparametric Estimation of conditional covariances
## 5% 10% 15% 20% 25% 30% 35% 40% 45% 50%
## 55% 60% 65% 70% 75% 80% 85% 90% 95%
## -----------------------------------------------------------
## unweighted weighted
## DETECT 2.543 2.543
## ASSI 1.000 1.000
## RATIO 1.000 1.000
## MADCOV100 2.543 2.543
## MCOV100 -0.251 -0.251
Works ok – gives similar value to 400 vs 400
d1 <- NULL
d2 <- NULL
total = tibble(value=integer(), item=character(), Part=character(), task=character())
for(i in 1:1000){
df1 <- NULL
df2 <- NULL
df3 <- NULL
df4 <- NULL
df5 <- NULL
df6 <- NULL
df7 <- NULL
d1 <- rbeta(1, 2, 2)
d2 <- rbeta(1, 2, 2)
for(j in 1:4){
df1[j] = rbinom(1,1, d1)
df2[j] = paste("A", j)
}
df1 <- tibble(value = df1)
df2 <- tibble(item = df2)
df3 = bind_cols(df1, df2)
for(j in 1:40){
df4[j] = rbinom(1,1, d2)
df5[j] = paste("B", j)
}
df4 = tibble(value=df4)
df5 = tibble(item = df5)
df6 = bind_cols(df4, df5)
df7 <- bind_rows(df3, df6)
df7$Part = paste("Part", i)
df7$task = c(rep("A", 4), rep("B", 40))
total <- bind_rows(total, df7)
}
Make wide data
ttl1 <- total %>%
dplyr::select(-"task") %>%
pivot_wider(id_cols=Part, names_from=item, values_from=value) %>%
dplyr::select(-"Part")
Conf detect with correct factor structure.
dtct <- c(rep(1, 4), rep(2, 40))
ttl2 <- data.frame(ttl1)
fscores = rowSums(ttl2)
conf3 <- conf.detect(ttl2, fscores, dtct)
## -----------------------------------------------------------
## Confirmatory DETECT Analysis
## Conditioning on 1 Score
## Bandwidth Scale: 1.1
## Pairwise Estimation of Conditional Covariances
## ...........................................................
## Nonparametric ICC estimation
## 5% 10% 15% 20% 25% 30% 35% 40% 45% 50%
## 55% 60% 65% 70% 75% 80% 85% 90% 95%
## ...........................................................
## Nonparametric Estimation of conditional covariances
## 5% 10% 15% 20% 25% 30% 35% 40% 45% 50%
## 55% 60% 65% 70% 75% 80% 85% 90% 95%
## -----------------------------------------------------------
## unweighted weighted
## DETECT -0.091 -0.091
## ASSI -0.195 -0.195
## RATIO -0.137 -0.137
## MADCOV100 0.667 0.667
## MCOV100 -0.414 -0.414
Doesn’t detect multidimensionality.
So I think the issue is the imbalance in cluster sizes not the ratio of items to participants, though that may not help.