Databases

## Warning in
## read.spss("/Volumes/mrlab/data/ram_translation_task/ram_crossnotation/kent_datafiles/11-20-21
## KSU intervention data.sav", :
## /Volumes/mrlab/data/ram_translation_task/ram_crossnotation/kent_datafiles/11-20-21
## KSU intervention data.sav: Very long string record(s) found (record type 7,
## subtype 14), each will be imported in consecutive separate variables
## [1] 183

Group Level Analyses

Within and Cross Notation Magnitude Comparison Skills

kent_dataset_both = kent_dataset[c("ResponseId","id", "precrossmean","prewithinmean")]
nrow(kent_dataset)
## [1] 183
kent_dataset_both_gather = gather(kent_dataset_both, "cross","accuracy", -c(ResponseId, id))

summarySE(kent_dataset_both_gather, "accuracy", "cross")
##           cross   N  accuracy        sd          se         ci
## 1  precrossmean 183 0.8665756 0.1472588 0.010885681 0.02147836
## 2 prewithinmean 183 0.9074074 0.1314340 0.009715884 0.01917026
t.test(accuracy ~  cross, kent_dataset_both_gather, paired =T)
## 
##  Paired t-test
## 
## data:  accuracy by cross
## t = -5.733, df = 182, p-value = 4.035e-08
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.05488461 -0.02677902
## sample estimates:
## mean difference 
##     -0.04083182
cohensD(accuracy ~  cross, data = kent_dataset_both_gather, method = "paired")
## Warning in cohensD(accuracy ~ cross, data = kent_dataset_both_gather, method =
## "paired"): calculating paired samples Cohen's d using formula input. Results
## will be incorrect if cases do not appear in the same order for both levels of
## the grouping factor
## [1] 0.4237952
#########################################################
# kent_clusterclass_within = kent_clusterclass
# names(kent_clusterclass_within)[1] = "ResponseId"
# kent_dataset_both_gather_cluster = kent_dataset_both_gather %>%
#   left_join(kent_clusterclass_within, by = "ResponseId")
# 
# ezANOVA(kent_dataset_both_gather_cluster, dv = .(accuracy), wid = .(ResponseId), within  = .c(cross), between = .(cluster))
# 
# summarySE(kent_dataset_both_gather_cluster, "accuracy", c("cluster","cross"))
# 
# kent_dataset_both_gather_cluster_high = subset(kent_dataset_both_gather_cluster, cluster == "High Performing")
# t.test(accuracy ~ cross, kent_dataset_both_gather_cluster_high, paired = T)
# cohensD(accuracy ~  cross, data = kent_dataset_both_gather_cluster_high, method = "paired")
# 
# kent_dataset_both_gather_cluster_biased = subset(kent_dataset_both_gather_cluster, cluster == "Percentage Bias")
# t.test(accuracy ~ cross, kent_dataset_both_gather_cluster_biased, paired = T)
# cohensD(accuracy ~  cross, data = kent_dataset_both_gather_cluster_biased, method = "paired")
#########################################################

Within-Notation Magnitude Comparison

kent_dataset_within = kent_dataset[c("ResponseId","prefvfmean", "predvdmean","prepvpmean")]
nrow(kent_dataset_within)
## [1] 183
kent_dataset_within_gather = gather(kent_dataset_within, "within","accuracy", -c(ResponseId))
ezANOVA(kent_dataset_within_gather, dv = .(accuracy), wid = .(ResponseId), within  = .c(within))
## Warning: Converting "ResponseId" to factor for ANOVA.
## Warning: Converting "within" to factor for ANOVA.
## $ANOVA
##   Effect DFn DFd       F            p p<.05        ges
## 2 within   2 364 35.5594 7.831115e-15     * 0.08799465
## 
## $`Mauchly's Test for Sphericity`
##   Effect         W         p p<.05
## 2 within 0.9967719 0.7463059      
## 
## $`Sphericity Corrections`
##   Effect       GGe        p[GG] p[GG]<.05      HFe        p[HF] p[HF]<.05
## 2 within 0.9967822 8.582718e-15         * 1.007804 7.831115e-15         *
summarySE(kent_dataset_within_gather, "accuracy", "within")
##       within   N  accuracy         sd          se          ci
## 1 predvdmean 183 0.8834244 0.21768216 0.016091528 0.031749938
## 2 prefvfmean 183 0.8524590 0.22530341 0.016654907 0.032861532
## 3 prepvpmean 183 0.9863388 0.06510358 0.004812595 0.009495654
agg_all_participants8_accuracy_gather_within_nofvf = subset(kent_dataset_within_gather, within != "prefvfmean")
t.test(accuracy ~ within, agg_all_participants8_accuracy_gather_within_nofvf, paired = T)
## 
##  Paired t-test
## 
## data:  accuracy by within
## t = -6.3674, df = 182, p-value = 1.523e-09
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.13480484 -0.07102394
## sample estimates:
## mean difference 
##      -0.1029144
cohensD(accuracy ~  within, data = agg_all_participants8_accuracy_gather_within_nofvf, method = "paired")
## Warning in cohensD(accuracy ~ within, data =
## agg_all_participants8_accuracy_gather_within_nofvf, : calculating paired
## samples Cohen's d using formula input. Results will be incorrect if cases do
## not appear in the same order for both levels of the grouping factor
## [1] 0.4706906
agg_all_participants8_accuracy_gather_within_nodvd = subset(kent_dataset_within_gather, within != "predvdmean")
t.test(accuracy ~ within, agg_all_participants8_accuracy_gather_within_nodvd, paired = T)
## 
##  Paired t-test
## 
## data:  accuracy by within
## t = -7.8935, df = 182, p-value = 2.665e-13
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.1673448 -0.1004148
## sample estimates:
## mean difference 
##      -0.1338798
cohensD(accuracy ~  within, data = agg_all_participants8_accuracy_gather_within_nodvd, method = "paired")
## Warning in cohensD(accuracy ~ within, data =
## agg_all_participants8_accuracy_gather_within_nodvd, : calculating paired
## samples Cohen's d using formula input. Results will be incorrect if cases do
## not appear in the same order for both levels of the grouping factor
## [1] 0.5835042
agg_all_participants8_accuracy_gather_within_nopvp = subset(kent_dataset_within_gather, within != "prepvpmean")
t.test(accuracy ~ within, agg_all_participants8_accuracy_gather_within_nopvp, paired = T)
## 
##  Paired t-test
## 
## data:  accuracy by within
## t = 1.8507, df = 182, p-value = 0.06584
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.002048583  0.063979366
## sample estimates:
## mean difference 
##      0.03096539
cohensD(accuracy ~  within, data = agg_all_participants8_accuracy_gather_within_nopvp, method = "paired")
## Warning in cohensD(accuracy ~ within, data =
## agg_all_participants8_accuracy_gather_within_nopvp, : calculating paired
## samples Cohen's d using formula input. Results will be incorrect if cases do
## not appear in the same order for both levels of the grouping factor
## [1] 0.136804

Cross-Notation Magnitude Comparison

kent_dataset_between = kent_dataset[c("ResponseId","prefgtpmean","prepgtfmean","predgtpmean","prepgtdmean","prefgtdmean","predgtfmean")]
names(kent_dataset_between) = c("id","fp_fg_prefgtpmean","fp_pg_prepgtfmean","dp_dg_predgtpmean","dp_pg_prepgtdmean","df_fg_prefgtdmean","df_dg_predgtfmean")
kent_dataset_between_gather = gather(kent_dataset_between, key = "type", value = "accuracy", -id)
kent_dataset_between_gather = separate(kent_dataset_between_gather, col = "type", into = c("components","greater","type"), sep = "_", remove = F)

Pair-wise Comparsions

Fractions vs. Percentages

kent_dataset_between_gather_fp = subset(kent_dataset_between_gather, components =="fp")
kent_dataset_between_gather_fp$greater = as.factor(as.character(kent_dataset_between_gather_fp$greater))
kent_dataset_between_gather_fp$greater <- factor(kent_dataset_between_gather_fp$greater, levels=c("pg","fg"))
kent_dataset_between_gather_fp$comparison = "Percent vs. Fraction \nComparisons"
summarySE(kent_dataset_between_gather_fp, "accuracy", "greater")
##   greater   N  accuracy        sd          se         ci
## 1      pg 183 0.9408015 0.1341508 0.009916709 0.01956650
## 2      fg 183 0.7477231 0.2984790 0.022064203 0.04353453
t.test(accuracy  ~ greater, kent_dataset_between_gather_fp, paired = T)
## 
##  Paired t-test
## 
## data:  accuracy by greater
## t = 9.2324, df = 182, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.1518148 0.2343418
## sample estimates:
## mean difference 
##       0.1930783
cohensD(accuracy ~  greater, data = kent_dataset_between_gather_fp, method = "paired")
## Warning in cohensD(accuracy ~ greater, data = kent_dataset_between_gather_fp, :
## calculating paired samples Cohen's d using formula input. Results will be
## incorrect if cases do not appear in the same order for both levels of the
## grouping factor
## [1] 0.6824759

Decimals vs. Percentages

kent_dataset_between_gather_dp = subset(kent_dataset_between_gather, components =="dp")
kent_dataset_between_gather_dp$greater = as.factor(as.character(kent_dataset_between_gather_dp$greater))
kent_dataset_between_gather_dp$greater <- factor(kent_dataset_between_gather_dp$greater, levels=c("pg","dg"))
kent_dataset_between_gather_dp$comparison = "Percent vs. Decimal \nComparisons"
summarySE(kent_dataset_between_gather_dp, "accuracy", "greater")
##   greater   N  accuracy         sd          se         ci
## 1      pg 183 0.9672131 0.08987003 0.006643384 0.01310796
## 2      dg 183 0.8542805 0.22850489 0.016891567 0.03332848
t.test(accuracy  ~ greater, kent_dataset_between_gather_dp, paired = T)
## 
##  Paired t-test
## 
## data:  accuracy by greater
## t = 7.0012, df = 182, p-value = 4.728e-11
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.08110564 0.14475957
## sample estimates:
## mean difference 
##       0.1129326
cohensD(accuracy ~  greater, data = kent_dataset_between_gather_dp, method = "paired")
## Warning in cohensD(accuracy ~ greater, data = kent_dataset_between_gather_dp, :
## calculating paired samples Cohen's d using formula input. Results will be
## incorrect if cases do not appear in the same order for both levels of the
## grouping factor
## [1] 0.5175403

Decimals vs. Fractions

kent_dataset_between_gather_df = subset(kent_dataset_between_gather, components =="df")
kent_dataset_between_gather_df$greater = as.factor(as.character(kent_dataset_between_gather_df$greater))
kent_dataset_between_gather_df$greater <- factor(kent_dataset_between_gather_df$greater, levels=c("dg","fg"))
kent_dataset_between_gather_df$comparison = "Decimal vs. Fraction \nComparisons"
summarySE(kent_dataset_between_gather_df, "accuracy", "greater")
##   greater   N  accuracy        sd         se         ci
## 1      dg 183 0.8734062 0.2130584 0.01574973 0.03107554
## 2      fg 183 0.8160291 0.2366804 0.01749592 0.03452092
t.test(accuracy  ~ greater, kent_dataset_between_gather_df, paired = T)
## 
##  Paired t-test
## 
## data:  accuracy by greater
## t = 2.9001, df = 182, p-value = 0.004189
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.01834119 0.09641290
## sample estimates:
## mean difference 
##      0.05737705
cohensD(accuracy ~  greater, data = kent_dataset_between_gather_df, method = "paired")
## Warning in cohensD(accuracy ~ greater, data = kent_dataset_between_gather_df, :
## calculating paired samples Cohen's d using formula input. Results will be
## incorrect if cases do not appear in the same order for both levels of the
## grouping factor
## [1] 0.2143851

Cluster Analyses

Determining the number of clusters

kent_dataset_between_gather_fp = subset(kent_dataset_between_gather, components =="fp")

kent_dataset_between_gather_fp_spread = spread(data = kent_dataset_between_gather_fp[c("id","greater","accuracy")], value = accuracy, key = greater)

library(parameters)
set.seed(240)
kent_n_clust <- n_clusters(kent_dataset_between_gather_fp_spread[-c(1)],
                      package = c("easystats", "NbClust", "mclust"),
                      standardize = FALSE)
kent_n_clust
## # Method Agreement Procedure:
## 
## The choice of 2 clusters is supported by 6 (20.00%) methods out of 30 (Elbow, Gap_Maechler2012, CCC, Duda, Pseudot2, Beale).
kmax = 10 # the maximum number of clusters we will examine; you can change this
totwss = rep(0,kmax) # will be filled with total sum of within group sum squares
kmfit = list() # create and empty list
for (i in 1:kmax){
  kclus = kmeans(kent_dataset_between_gather_fp_spread[-1],centers=i,iter.max=20)
  totwss[i] = kclus$tot.withinss
  kmfit[[i]] = kclus
}

kmeansAIC = function(fit){
  m = ncol(fit$centers)
  n = length(fit$cluster)
  k = nrow(fit$centers)
  D = fit$tot.withinss
  return(D + 2*m*k)
}

aic=sapply(kmfit,kmeansAIC)
#mult.fig(1,main="Simulated data with two clusters")
plot(seq(1,kmax),aic,xlab="Number of clusters",ylab="AIC",pch=20,cex=2)

n = nrow(kent_dataset_between_gather_fp_spread[-1])
rsq = 1-(totwss*(n-1))/(totwss[1]*(n-seq(1,kmax)))
cbind(aic,rsq)
##            aic       rsq
##  [1,] 23.48968 0.0000000
##  [2,] 14.90831 0.6435815
##  [3,] 16.31418 0.7761833
##  [4,] 20.18407 0.7817206
##  [5,] 22.37684 0.8753054
##  [6,] 26.36845 0.8750439
##  [7,] 29.83697 0.9025334
##  [8,] 33.09456 0.9415924
##  [9,] 37.24925 0.9329548
## [10,] 41.09879 0.9406890
set.seed(240)
kent_kmeans.re <- kmeans(kent_dataset_between_gather_fp_spread[-c(1)], centers = 2, nstart = 30)
kent_kmeans.re
## K-means clustering with 2 clusters of sizes 140, 43
## 
## Cluster means:
##          fg        pg
## 1 0.8916667 0.9607143
## 2 0.2790698 0.8759690
## 
## Clustering vector:
##   [1] 1 1 2 1 1 1 2 1 2 1 1 1 1 1 1 2 1 1 1 1 1 1 2 2 2 1 2 2 1 2 1 1 1 1 2 1 1
##  [38] 1 2 2 2 1 1 1 2 1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 2 1
##  [75] 1 1 2 1 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1 2 1 1 2 1 2 1 1 1
## [112] 1 1 1 1 1 2 1 1 1 2 1 2 1 1 2 1 2 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 2 1 2 1 1
## [149] 1 1 1 1 1 1 1 1 2 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 2
## 
## Within cluster sum of squares by cluster:
## [1] 4.140873 2.767442
##  (between_SS / total_SS =  64.6 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
# kent_kmeans.re <- kmeans(kent_dataset_between_gather_fp_spread[-c(1)], centers = 4, nstart = 30)
# kent_kmeans.re

kent_clusterclass = as.data.frame(kent_kmeans.re$cluster)
names(kent_clusterclass) ="cluster"
kent_clusterclass = cbind(kent_dataset_between_gather_fp_spread[1],kent_clusterclass)

kent_clusterclass$cluster = as.factor(as.character(kent_clusterclass$cluster))
levels(kent_clusterclass$cluster)[levels(kent_clusterclass$cluster ) == "1"]  <- "High Performing"
levels(kent_clusterclass$cluster)[levels(kent_clusterclass$cluster ) == "2"]  <- "Percentage Bias"

kent_clusterclass$cluster <- factor(kent_clusterclass$cluster, levels=c("High Performing","Percentage Bias"))

Cross Notation Magnitude Comparison

Fractions vs. Percentages

kent_dataset_between_gather_fp_cluster = kent_dataset_between_gather_fp %>%
  left_join(kent_clusterclass, by = "id")

kent_dataset_between_gather_fp_cluster$greater = as.factor(as.character(kent_dataset_between_gather_fp_cluster$greater))
kent_dataset_between_gather_fp_cluster$greater <- factor(kent_dataset_between_gather_fp_cluster$greater, levels=c("pg","fg"))

# kent_dataset_between_gather_fp_cluster$cluster = as.factor(as.character(kent_dataset_between_gather_fp_cluster$cluster))
# levels(kent_dataset_between_gather_fp_cluster$cluster)[levels(kent_dataset_between_gather_fp_cluster$cluster ) == "1"]  <- "No Bias"
# levels(kent_dataset_between_gather_fp_cluster$cluster)[levels(kent_dataset_between_gather_fp_cluster$cluster ) == "2"]  <- "Percentage Bias"
# kent_dataset_between_gather_fp_cluster$cluster <- factor(kent_dataset_between_gather_fp_cluster$cluster, levels=c("Percentage Bias", "No Bias","Fraction Bias"))

kent_dataset_between_gather_fp_cluster$comparison = "Percent vs. Fraction \nComparisons"


kent_dataset_between_gather_fp_cluster_fp_pb = subset(kent_dataset_between_gather_fp_cluster, cluster == "Percentage Bias")
t.test(accuracy  ~ greater, kent_dataset_between_gather_fp_cluster_fp_pb, paired = T)
## 
##  Paired t-test
## 
## data:  accuracy by greater
## t = 16.76, df = 42, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.5250266 0.6687719
## sample estimates:
## mean difference 
##       0.5968992
cohensD(accuracy ~ greater, data = kent_dataset_between_gather_fp_cluster_fp_pb, method = "paired")
## Warning in cohensD(accuracy ~ greater, data =
## kent_dataset_between_gather_fp_cluster_fp_pb, : calculating paired samples
## Cohen's d using formula input. Results will be incorrect if cases do not appear
## in the same order for both levels of the grouping factor
## [1] 2.555888
kent_dataset_between_gather_fp_cluster_fp_nb = subset(kent_dataset_between_gather_fp_cluster, cluster == "High Performing")
t.test(accuracy  ~ greater, kent_dataset_between_gather_fp_cluster_fp_nb, paired = T)
## 
##  Paired t-test
## 
## data:  accuracy by greater
## t = 5.4616, df = 139, p-value = 2.112e-07
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.04405127 0.09404397
## sample estimates:
## mean difference 
##      0.06904762
cohensD(accuracy ~  greater, data = kent_dataset_between_gather_fp_cluster_fp_nb, method = "paired")
## Warning in cohensD(accuracy ~ greater, data =
## kent_dataset_between_gather_fp_cluster_fp_nb, : calculating paired samples
## Cohen's d using formula input. Results will be incorrect if cases do not appear
## in the same order for both levels of the grouping factor
## [1] 0.4615872

Decimal vs. Percentages

kent_dataset_between_gather_dp_cluster = kent_dataset_between_gather_dp %>%
  left_join(kent_clusterclass, by = "id")

kent_dataset_between_gather_dp_cluster$greater = as.factor(as.character(kent_dataset_between_gather_dp_cluster$greater))
kent_dataset_between_gather_dp_cluster$greater <- factor(kent_dataset_between_gather_dp_cluster$greater, levels=c("pg","dg"))
kent_dataset_between_gather_dp_cluster$comparison = "Percent vs. Decimal \nComparisons"
summarySE(kent_dataset_between_gather_dp_cluster, "accuracy", c("greater","cluster"))
##   greater         cluster   N  accuracy         sd          se         ci
## 1      pg High Performing 140 0.9726190 0.09076175 0.007670768 0.01516647
## 2      pg Percentage Bias  43 0.9496124 0.08556445 0.013048456 0.02633285
## 3      dg High Performing 140 0.8964286 0.19015314 0.016070873 0.03177497
## 4      dg Percentage Bias  43 0.7170543 0.28529870 0.043507644 0.08780198
# kent_dataset_between_gather_dp_cluster$cluster = as.factor(as.character(kent_dataset_between_gather_dp_cluster$cluster))
# levels(kent_dataset_between_gather_dp_cluster$cluster)[levels(kent_dataset_between_gather_dp_cluster$cluster ) == "1"]  <- "No Bias"
# levels(kent_dataset_between_gather_dp_cluster$cluster)[levels(kent_dataset_between_gather_dp_cluster$cluster ) == "2"]  <- "Percentage Bias"
# 
# kent_dataset_between_gather_dp_cluster$cluster <- factor(kent_dataset_between_gather_dp_cluster$cluster, levels=c("Percentage Bias", "No Bias","Fraction Bias"))

kent_dataset_between_gather_dp_cluster_pb = subset(kent_dataset_between_gather_dp_cluster, cluster == "Percentage Bias")
t.test(accuracy  ~ greater, kent_dataset_between_gather_dp_cluster_pb, paired = T)
## 
##  Paired t-test
## 
## data:  accuracy by greater
## t = 5.547, df = 42, p-value = 1.776e-06
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.1479500 0.3171662
## sample estimates:
## mean difference 
##       0.2325581
cohensD(accuracy ~ greater, data = kent_dataset_between_gather_dp_cluster_pb, method = "paired")
## Warning in cohensD(accuracy ~ greater, data =
## kent_dataset_between_gather_dp_cluster_pb, : calculating paired samples Cohen's
## d using formula input. Results will be incorrect if cases do not appear in the
## same order for both levels of the grouping factor
## [1] 0.8459099
kent_dataset_between_gather_dp_cluster_nb = subset(kent_dataset_between_gather_dp_cluster, cluster == "High Performing")
t.test(accuracy  ~ greater, kent_dataset_between_gather_dp_cluster_nb, paired = T)
## 
##  Paired t-test
## 
## data:  accuracy by greater
## t = 4.9092, df = 139, p-value = 2.527e-06
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.0455048 0.1068762
## sample estimates:
## mean difference 
##      0.07619048
cohensD(accuracy ~  greater, data = kent_dataset_between_gather_dp_cluster_nb, method = "paired")
## Warning in cohensD(accuracy ~ greater, data =
## kent_dataset_between_gather_dp_cluster_nb, : calculating paired samples Cohen's
## d using formula input. Results will be incorrect if cases do not appear in the
## same order for both levels of the grouping factor
## [1] 0.4149031

Decimals vs. Fractions

kent_dataset_between_gather_df_cluster = kent_dataset_between_gather_df %>%
  left_join(kent_clusterclass, by = "id")

kent_dataset_between_gather_df_cluster$greater = as.factor(as.character(kent_dataset_between_gather_df_cluster$greater))
kent_dataset_between_gather_df_cluster$greater <- factor(kent_dataset_between_gather_df_cluster$greater, levels=c("dg","fg"))
kent_dataset_between_gather_df_cluster$comparison = "Decimal vs. Fraction \nComparisons"
summarySE(kent_dataset_between_gather_df_cluster, "accuracy", c("greater","cluster"))
##   greater         cluster   N  accuracy        sd         se         ci
## 1      dg High Performing 140 0.9107143 0.1836772 0.01552355 0.03069282
## 2      dg Percentage Bias  43 0.7519380 0.2555523 0.03897137 0.07864740
## 3      fg High Performing 140 0.9107143 0.1474683 0.01246334 0.02464224
## 4      fg Percentage Bias  43 0.5077519 0.2087804 0.03183872 0.06425313
kent_dataset_between_gather_df_cluster_pb = subset(kent_dataset_between_gather_df_cluster, cluster == "Percentage Bias")
t.test(accuracy  ~ greater, kent_dataset_between_gather_df_cluster_pb, paired = T)
## 
##  Paired t-test
## 
## data:  accuracy by greater
## t = 4.7379, df = 42, p-value = 2.481e-05
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.1401768 0.3481953
## sample estimates:
## mean difference 
##        0.244186
cohensD(accuracy ~ greater, data = kent_dataset_between_gather_df_cluster_pb, method = "paired")
## Warning in cohensD(accuracy ~ greater, data =
## kent_dataset_between_gather_df_cluster_pb, : calculating paired samples Cohen's
## d using formula input. Results will be incorrect if cases do not appear in the
## same order for both levels of the grouping factor
## [1] 0.722526
kent_dataset_between_gather_df_cluster_nb = subset(kent_dataset_between_gather_df_cluster, cluster == "High Performing")
t.test(accuracy  ~ greater, kent_dataset_between_gather_df_cluster_nb, paired = T)
## 
##  Paired t-test
## 
## data:  accuracy by greater
## t = -9.2057e-16, df = 139, p-value = 1
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.03551217  0.03551217
## sample estimates:
## mean difference 
##   -1.653439e-17
cohensD(accuracy ~  greater, data = kent_dataset_between_gather_df_cluster_nb, method = "paired")
## Warning in cohensD(accuracy ~ greater, data =
## kent_dataset_between_gather_df_cluster_nb, : calculating paired samples Cohen's
## d using formula input. Results will be incorrect if cases do not appear in the
## same order for both levels of the grouping factor
## [1] 0

Figure 4

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Figure 4. Percent correct on (A) percent vs. fraction comparisons, (B) percent vs. decimal comparisons, and (C) decimal vs. fraction cross-notation comparison based on a two-cluster model: ‘percentages bias’ cluster (n=43) and ‘high performing’ cluster (n=140). Most participants exhibit a bias to select the percentages as larger than fractions and decimals, with the percentage-bias profile exhibiting a more drastic difference in performance than the high performing profile. The percentage-bias cluster also demonstrated a bias to select the decimal as larger than fractions. However, there was no bias among the fraction-decimal comparisons for the high performing cluster. Gray lines represent individual participants’ average scores in each of the conditions. Thicker gray lines indicate more participants with the same scores. Error bars represent ± 1 Standard Error. Note. ***p < .001.

Within vs Cross Cluster Comparisons (all)

kent_clusterclass_within = kent_clusterclass
names(kent_clusterclass_within)[1] = "ResponseId"
kent_dataset_both_gather_cluster = kent_dataset_both_gather %>%
  left_join(kent_clusterclass_within, by = "ResponseId")

ezANOVA(kent_dataset_both_gather_cluster, dv = .(accuracy), wid = .(ResponseId), within  = .c(cross), between = .(cluster))
## Warning: Converting "ResponseId" to factor for ANOVA.
## Warning: Converting "cross" to factor for ANOVA.
## Warning: Data is unbalanced (unequal N per group). Make sure you specified a
## well-considered value for the type argument to ezANOVA().
## $ANOVA
##          Effect DFn DFd         F            p p<.05        ges
## 2       cluster   1 181 127.47113 1.016463e-22     * 0.37113728
## 3         cross   1 181  38.97358 2.973561e-09     * 0.03370609
## 4 cluster:cross   1 181  34.81347 1.749430e-08     * 0.03021696
summarySE(kent_dataset_both_gather_cluster, "accuracy", c("cluster","cross"))
##           cluster         cross   N  accuracy         sd          se         ci
## 1 High Performing  precrossmean 140 0.9238095 0.10042384 0.008487364 0.01678103
## 2 High Performing prewithinmean 140 0.9432540 0.09736466 0.008228815 0.01626983
## 3 Percentage Bias  precrossmean  43 0.6802326 0.11884031 0.018122977 0.03657365
## 4 Percentage Bias prewithinmean  43 0.7906977 0.15891923 0.024234955 0.04890812
kent_dataset_both_gather_cluster_high = subset(kent_dataset_both_gather_cluster, cluster == "High Performing")
t.test(accuracy ~ cross, kent_dataset_both_gather_cluster_high, paired = T)
## 
##  Paired t-test
## 
## data:  accuracy by cross
## t = -2.9861, df = 139, p-value = 0.003341
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.032319286 -0.006569603
## sample estimates:
## mean difference 
##     -0.01944444
cohensD(accuracy ~  cross, data = kent_dataset_both_gather_cluster_high, method = "paired")
## Warning in cohensD(accuracy ~ cross, data =
## kent_dataset_both_gather_cluster_high, : calculating paired samples Cohen's d
## using formula input. Results will be incorrect if cases do not appear in the
## same order for both levels of the grouping factor
## [1] 0.2523686
kent_dataset_both_gather_cluster_biased = subset(kent_dataset_both_gather_cluster, cluster == "Percentage Bias")
t.test(accuracy ~ cross, kent_dataset_both_gather_cluster_biased, paired = T)
## 
##  Paired t-test
## 
## data:  accuracy by cross
## t = -6.1023, df = 42, p-value = 2.83e-07
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.14699670 -0.07393354
## sample estimates:
## mean difference 
##      -0.1104651
cohensD(accuracy ~  cross, data = kent_dataset_both_gather_cluster_biased, method = "paired")
## Warning in cohensD(accuracy ~ cross, data =
## kent_dataset_both_gather_cluster_biased, : calculating paired samples Cohen's d
## using formula input. Results will be incorrect if cases do not appear in the
## same order for both levels of the grouping factor
## [1] 0.930596

Within vs Cross Cluster Comparisons (no fp)

kent_dataset_within2 = kent_dataset[c("ResponseId","prefvfmean", "predvdmean","prepvpmean")]
kent_dataset_within2$within = (kent_dataset_within2$prefvfmean + kent_dataset_within2$predvdmean + kent_dataset_within2$prepvpmean)/3

kent_dataset_between2 = kent_dataset[c("ResponseId","predgtpmean","prepgtdmean","prefgtdmean","predgtfmean")]
kent_dataset_between2$cross = (kent_dataset_between2$predgtpmean + kent_dataset_between2$prepgtdmean + kent_dataset_between2$prefgtdmean + kent_dataset_between2$predgtfmean)/4

kent_dataset_within_between = kent_dataset_between2[c("ResponseId","cross")] %>%
  left_join(kent_dataset_within2[c("ResponseId","within")], by = "ResponseId")

kent_dataset_within_between_gather = gather(data = kent_dataset_within_between, key = "cross","accuracy", -ResponseId)
kent_dataset_within_between_gather$id = kent_dataset_within_between_gather$ResponseId
kent_dataset_within_between_gather_cluster = kent_dataset_within_between_gather %>%
  left_join(kent_clusterclass, by = "id")

ezANOVA(kent_dataset_within_between_gather_cluster, dv = .(accuracy), wid = .(ResponseId), within  = .c(cross), between = .(cluster))
## Warning: Converting "ResponseId" to factor for ANOVA.
## Warning: Converting "cross" to factor for ANOVA.
## Warning: Data is unbalanced (unequal N per group). Make sure you specified a
## well-considered value for the type argument to ezANOVA().
## $ANOVA
##          Effect DFn DFd         F            p p<.05         ges
## 2       cluster   1 181 86.932545 3.942022e-17     * 0.285886430
## 3         cross   1 181 18.063233 3.416846e-05     * 0.016341474
## 4 cluster:cross   1 181  5.457946 2.057454e-02     * 0.004994661
summarySE(kent_dataset_within_between_gather_cluster, "accuracy", c("cluster","cross"))
##           cluster  cross   N  accuracy         sd          se         ci
## 1 High Performing  cross 140 0.9226190 0.11249072 0.009507201 0.01879743
## 2 High Performing within 140 0.9432540 0.09736466 0.008228815 0.01626983
## 3 Percentage Bias  cross  43 0.7315891 0.13027949 0.019867436 0.04009411
## 4 Percentage Bias within  43 0.7906977 0.15891923 0.024234955 0.04890812
kent_dataset_both_gather_cluster_high = subset(kent_dataset_within_between_gather_cluster, cluster == "High Performing")
t.test(accuracy ~ cross, kent_dataset_both_gather_cluster_high, paired = T)
## 
##  Paired t-test
## 
## data:  accuracy by cross
## t = -2.9981, df = 139, p-value = 0.00322
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.03424338 -0.00702646
## sample estimates:
## mean difference 
##     -0.02063492
cohensD(accuracy ~  cross, data = kent_dataset_both_gather_cluster_high, method = "paired")
## Warning in cohensD(accuracy ~ cross, data =
## kent_dataset_both_gather_cluster_high, : calculating paired samples Cohen's d
## using formula input. Results will be incorrect if cases do not appear in the
## same order for both levels of the grouping factor
## [1] 0.2533818
kent_dataset_both_gather_cluster_biased = subset(kent_dataset_within_between_gather_cluster, cluster == "Percentage Bias")
t.test(accuracy ~ cross, kent_dataset_both_gather_cluster_biased, paired = T)
## 
##  Paired t-test
## 
## data:  accuracy by cross
## t = -3.0176, df = 42, p-value = 0.004316
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.09863852 -0.01957853
## sample estimates:
## mean difference 
##     -0.05910853
cohensD(accuracy ~  cross, data = kent_dataset_both_gather_cluster_biased, method = "paired")
## Warning in cohensD(accuracy ~ cross, data =
## kent_dataset_both_gather_cluster_biased, : calculating paired samples Cohen's d
## using formula input. Results will be incorrect if cases do not appear in the
## same order for both levels of the grouping factor
## [1] 0.4601801

Math Measures

Databases

Fraction Arithmetic Calculation

t.test(pre_arithmetic_mean ~ cluster, kent_dataset_intervention_generamath_measures,var.equal =T)
## 
##  Two Sample t-test
## 
## data:  pre_arithmetic_mean by cluster
## t = 3.0007, df = 181, p-value = 0.003074
## alternative hypothesis: true difference in means between group High Performing and group Percentage Bias is not equal to 0
## 95 percent confidence interval:
##  0.05897153 0.28546368
## sample estimates:
## mean in group High Performing mean in group Percentage Bias 
##                     0.7303571                     0.5581395

Fraction Arithmetic Estimation

t.test(preluremean ~ cluster, kent_dataset_intervention_generamath_measures,var.equal =T)
## 
##  Two Sample t-test
## 
## data:  preluremean by cluster
## t = 3.6728, df = 181, p-value = 0.0003157
## alternative hypothesis: true difference in means between group High Performing and group Percentage Bias is not equal to 0
## 95 percent confidence interval:
##  0.08119165 0.26970536
## sample estimates:
## mean in group High Performing mean in group Percentage Bias 
##                     0.4576190                     0.2821705
kent_dataset_intervention_generamath_measures_pb = subset(kent_dataset_intervention_generamath_measures, cluster =="Percentage Bias")
t.test(kent_dataset_intervention_generamath_measures_pb$preluremean, mu = .33)
## 
##  One Sample t-test
## 
## data:  kent_dataset_intervention_generamath_measures_pb$preluremean
## t = -1.426, df = 42, p-value = 0.1613
## alternative hypothesis: true mean is not equal to 0.33
## 95 percent confidence interval:
##  0.2144798 0.3498613
## sample estimates:
## mean of x 
## 0.2821705
kent_dataset_intervention_generamath_measures_hf = subset(kent_dataset_intervention_generamath_measures, cluster =="High Performing")
t.test(kent_dataset_intervention_generamath_measures_hf$preluremean, mu = .33)
## 
##  One Sample t-test
## 
## data:  kent_dataset_intervention_generamath_measures_hf$preluremean
## t = 5.2372, df = 139, p-value = 5.902e-07
## alternative hypothesis: true mean is not equal to 0.33
## 95 percent confidence interval:
##  0.4094395 0.5057986
## sample estimates:
## mean of x 
##  0.457619
###Polinommial regression
# kent_dataset_intervention_generamath_measures$preluremean_factor = as.factor(kent_dataset_intervention_generamath_measures$preluremean)
# m <- polr(preluremean_factor ~ as.numeric(cluster), data = kent_dataset_intervention_generamath_measures, Hess=TRUE)
# summary(m)
# 
# (ctable <- coef(summary(m)))
# 
# p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2
# 
# ## combined table
# (ctable <- cbind(ctable, "p value" = p))
# exp(coef(m))
# (ci <- confint(m)) #
# 
# exp(cbind(OR = coef(m), ci))

#model <- polr(response ~ predictor1 + predictor2, data = your_data)
############

Number Line Estimation

t.test(PRE_PAE_indiv ~ cluster, kent_dataset_intervention_generamath_measures,var.equal =T)
## 
##  Two Sample t-test
## 
## data:  PRE_PAE_indiv by cluster
## t = -4.8415, df = 181, p-value = 2.753e-06
## alternative hypothesis: true difference in means between group High Performing and group Percentage Bias is not equal to 0
## 95 percent confidence interval:
##  -0.10717140 -0.04510925
## sample estimates:
## mean in group High Performing mean in group Percentage Bias 
##                     0.1296439                     0.2057842

Number Line Confidence

t.test(preconfidmean ~ cluster, kent_dataset_intervention_generamath_measures,var.equal =T)
## 
##  Two Sample t-test
## 
## data:  preconfidmean by cluster
## t = 4.3706, df = 181, p-value = 2.085e-05
## alternative hypothesis: true difference in means between group High Performing and group Percentage Bias is not equal to 0
## 95 percent confidence interval:
##  11.06789 29.28609
## sample estimates:
## mean in group High Performing mean in group Percentage Bias 
##                      65.41653                      45.23953

ACT

t.test(Q1741_MathACT ~ cluster, kent_dataset_intervention_generamath_measures,var.equal =T)
## 
##  Two Sample t-test
## 
## data:  Q1741_MathACT by cluster
## t = 3.2411, df = 114, p-value = 0.001561
## alternative hypothesis: true difference in means between group High Performing and group Percentage Bias is not equal to 0
## 95 percent confidence interval:
##  1.490139 6.175356
## sample estimates:
## mean in group High Performing mean in group Percentage Bias 
##                      22.75275                      18.92000
summarySE(kent_dataset_intervention_generamath_measures, "Q1741_MathACT", "cluster", na.rm =T)
##           cluster  N Q1741_MathACT       sd        se       ci
## 1 High Performing 91      22.75275 5.552464 0.5820564 1.156357
## 2 Percentage Bias 25      18.92000 3.828838 0.7657676 1.580467

Figure 5

## Warning in geom_dotplot(binaxis = "y", stackdir = "center", dotsize = 0.25, :
## Ignoring unknown parameters: `shape`
## Warning in geom_dotplot(binaxis = "y", stackdir = "center", dotsize = 0.25, : Ignoring unknown parameters: `shape`
## Ignoring unknown parameters: `shape`
## Ignoring unknown parameters: `shape`
## Ignoring unknown parameters: `shape`
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
## Warning: Removed 67 rows containing missing values or values outside the scale range
## (`stat_bindot()`).
## Warning: Removed 67 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Removed 67 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 67 rows containing missing values or values outside the scale range
## (`geom_line()`).
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

Figure 5. Descriptive statistics for measures (B-F) collected in Study 2 only, by cluster: percentage bias (or biased) and high performing. Note. For the SAT scores from Study 1, the ‘biased’ cluster depicted here includes the three biased clusters from Study 1 with dots in red (strong percentage bias), yellow (moderate percentage bias), and blue (fraction bias). Error bars represent ± 1 Standard Error. Dashed line in Part C represents chance performance for that task. Note. *p<.05, **p<.01, ***p<.001 and exact test statistics appear in Supplemental Materials.

Study 2 Supplementary Analyses

Cross-Notation Comparison Accuracy (Overall)

Figure 2

Figure 2. Percent correct for cross-notation magnitude comparison. (A.) percent vs. fraction comparisons (e.g., 2/5 vs. 25%), (B.) percent vs. decimal comparisons (e.g., 40% vs. .25), and (C.) decimal vs. fraction comparisons (e.g., .40 vs. 1/4). Gray lines represent individual participants’ average scores in each of the conditions. Thicker gray lines indicate more participants with the same scores. Error bars represent ± 1 Standard Error. Note. ***p < .001.

Within Notation Magnitude Comparison

kent_dataset_within_gather_cluster = kent_dataset_within_gather
names(kent_dataset_within_gather_cluster)[1] = "id"
kent_dataset_within_gather_cluster = kent_dataset_within_gather_cluster %>%
  left_join(kent_clusterclass, by = "id")

kent_dataset_within_gather_cluster$within = as.factor(as.character(kent_dataset_within_gather_cluster$within))


kent_dataset_within_gather_cluster$within <- factor(kent_dataset_within_gather_cluster$within, levels=c("prepvpmean", "prefvfmean","predvdmean"))
levels(kent_dataset_within_gather_cluster$within)[levels(kent_dataset_within_gather_cluster$within ) == "prepvpmean"]  <- "Percent to \nPercent"
levels(kent_dataset_within_gather_cluster$within)[levels(kent_dataset_within_gather_cluster$within ) == "prefvfmean"]  <- "Fraction to \nFraction"
levels(kent_dataset_within_gather_cluster$within)[levels(kent_dataset_within_gather_cluster$within ) == "predvdmean"]  <- "Decimal to \nDecimal"


# kent_dataset_within_gather_cluster$cluster = as.factor(as.character(kent_dataset_within_gather_cluster$cluster))
# levels(kent_dataset_within_gather_cluster$cluster)[levels(kent_dataset_within_gather_cluster$cluster ) == "1"]  <- "No Bias"
# levels(kent_dataset_within_gather_cluster$cluster)[levels(kent_dataset_within_gather_cluster$cluster ) == "2"]  <- "Percentage Bias"
# 
# kent_dataset_within_gather_cluster$cluster <- factor(kent_dataset_within_gather_cluster$cluster, levels=c("Percentage Bias", "No Bias","Fraction Bias"))

summarySE(kent_dataset_within_gather_cluster, "accuracy", c("within","cluster"))
##                   within         cluster   N  accuracy         sd          se
## 1   Percent to \nPercent High Performing 140 0.9857143 0.06775406 0.005726264
## 2   Percent to \nPercent Percentage Bias  43 0.9883721 0.05628899 0.008583991
## 3 Fraction to \nFraction High Performing 140 0.9142857 0.16903085 0.014285714
## 4 Fraction to \nFraction Percentage Bias  43 0.6511628 0.26680045 0.040686687
## 5   Decimal to \nDecimal High Performing 140 0.9297619 0.16002080 0.013524226
## 6   Decimal to \nDecimal Percentage Bias  43 0.7325581 0.30021935 0.045783022
##           ci
## 1 0.01132184
## 2 0.01732320
## 3 0.02824540
## 4 0.08210906
## 5 0.02673980
## 6 0.09239388
ezANOVA(subset(kent_dataset_within_gather_cluster, cluster=="High Performing"), dv = .(accuracy), wid = .(id), within  = .c(within))
## Warning: Converting "id" to factor for ANOVA.
## $ANOVA
##   Effect DFn DFd        F            p p<.05        ges
## 2 within   2 278 13.03602 3.876991e-06     * 0.04616537
## 
## $`Mauchly's Test for Sphericity`
##   Effect         W         p p<.05
## 2 within 0.9912508 0.5453375      
## 
## $`Sphericity Corrections`
##   Effect       GGe       p[GG] p[GG]<.05      HFe        p[HF] p[HF]<.05
## 2 within 0.9913267 4.20382e-06         * 1.005608 3.876991e-06         *
pairwise.t.test(subset(kent_dataset_within_gather_cluster, cluster=="High Performing")$accuracy, subset(kent_dataset_within_gather_cluster, cluster=="High Performing")$within, p.adj = "none", paired = T)
## 
##  Pairwise comparisons using paired t tests 
## 
## data:  subset(kent_dataset_within_gather_cluster, cluster == "High Performing")$accuracy and subset(kent_dataset_within_gather_cluster, cluster == "High Performing")$within 
## 
##                        Percent to \nPercent Fraction to \nFraction
## Fraction to \nFraction 3.3e-06              -                     
## Decimal to \nDecimal   0.00012              0.31323               
## 
## P value adjustment method: none
ezANOVA(subset(kent_dataset_within_gather_cluster, cluster=="Percentage Bias"), dv = .(accuracy), wid = .(id), within  = .c(within))
## Warning: Converting "id" to factor for ANOVA.
## $ANOVA
##   Effect DFn DFd        F            p p<.05       ges
## 2 within   2  84 30.01456 1.461559e-10     * 0.2782127
## 
## $`Mauchly's Test for Sphericity`
##   Effect         W         p p<.05
## 2 within 0.9351018 0.2527011      
## 
## $`Sphericity Corrections`
##   Effect       GGe        p[GG] p[GG]<.05       HFe        p[HF] p[HF]<.05
## 2 within 0.9390569 4.701758e-10         * 0.9814954 2.083737e-10         *
pairwise.t.test(subset(kent_dataset_within_gather_cluster, cluster=="Percentage Bias")$accuracy, subset(kent_dataset_within_gather_cluster, cluster=="Percentage Bias")$within, p.adj = "none", paired = T)
## 
##  Pairwise comparisons using paired t tests 
## 
## data:  subset(kent_dataset_within_gather_cluster, cluster == "Percentage Bias")$accuracy and subset(kent_dataset_within_gather_cluster, cluster == "Percentage Bias")$within 
## 
##                        Percent to \nPercent Fraction to \nFraction
## Fraction to \nFraction 2.6e-10              -                     
## Decimal to \nDecimal   7.8e-07              0.12                  
## 
## P value adjustment method: none
graph_kent_within_cluster = ggplot(kent_dataset_within_gather_cluster, aes(x = interaction(within), y = accuracy)) +
  geom_bar(stat = "identity", data = summarySE(kent_dataset_within_gather_cluster, "accuracy", c("within","cluster")),
           fill = NA, aes(color = as.factor(within)), size = 1, width = 0.55) +
  stat_summary(fun.data = data_summary, geom = "errorbar",
               position = position_dodge(width = 0.10), width = .05, colour = "black", size =0.5)+
  scale_y_continuous(breaks=seq(0, 1, .25), limits=c(0,1.3),trans = shift_trans(0), expand = c(0,0))+
  scale_color_manual(values = c("#1b7837","#e08214","#40004b"))+
  #scale_color_manual(values = c("#1b7837","#40004b"))+
  geom_line(aes(group = interaction (id)),
            alpha = 0.15,
            size = .25, colour = "#737373") +
  geom_hline(yintercept = .5, linetype = 2, size = .5)+
  ylab("Accuracy")+
  facet_grid(.~cluster)+
  #scale_x_discrete(labels=c("dg" = "Decimal \n> \nFraction", "fg" = "Fraction \n> \nDecimal"))+
  stat_summary(fun.data = data_summary, geom = "errorbar",
               position = position_dodge(width = 0.10), width = 0.001, colour = "black", size =.5)+
  theme_bw()+
  theme(legend.position="none",
        axis.title.x=element_blank(),
        axis.text.x =  element_text(size=9),
        #axis.title.x =  element_text(size = size_text),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "white", colour = "grey50"),
        strip.background =element_rect(fill="#f0f0f0"),
        strip.text = element_text(size = size_textb),
        axis.text.y =  element_text(size=size_text),
        axis.title.y =  element_text(size=size_text),
        legend.text=element_text(size=size_text))
graph_kent_within_cluster

Figure 3. Within-Notation comparison accuracy based on the two-cluster model: (A) ‘high performing’ cluster (n=140) and (B) ‘percentages-are-larger bias’ cluster (n=43). Gray lines represent individual participants’ average scores in each of the conditions. Thicker gray lines indicate more participants with the same scores. Error bars represent ± 1 Standard Error. Note. *p<.05, **p<.01, ***p<.001

Cluster Analyses - All trial types

Determining the number of clusters (all trials)

kent_dataset_between_gather_spread = (kent_dataset_between_gather)

kent_dataset_between_gather_spread = spread(data = kent_dataset_between_gather_spread[c("id","type","accuracy")], value = accuracy, key = type)

library(parameters)
set.seed(240)
kent_n_clust2 <- n_clusters(kent_dataset_between_gather_spread[-c(1)],
                      package = c("easystats", "NbClust", "mclust"),
                      standardize = FALSE)
kent_n_clust2
## # Method Agreement Procedure:
## 
## The choice of 2 clusters is supported by 10 (35.71%) methods out of 28 (Elbow, Silhouette, Ch, CCC, DB, Duda, Pseudot2, Beale, Ratkowsky, Mcclain).
set.seed(240)
kent_kmeans.re2 <- kmeans(kent_dataset_between_gather_spread[-c(1)], centers = 2, nstart = 30)
kent_kmeans.re2
## K-means clustering with 2 clusters of sizes 130, 53
## 
## Cluster means:
##   predgtfmean predgtpmean prefgtdmean prefgtpmean prepgtdmean prepgtfmean
## 1   0.9487179   0.9371795   0.9320513   0.9000000   0.9858974   0.9782051
## 2   0.6886792   0.6509434   0.5314465   0.3742138   0.9213836   0.8490566
## 
## Clustering vector:
##   [1] 1 1 2 1 1 1 2 1 2 1 1 1 1 1 1 2 1 1 1 1 1 1 2 2 2 1 2 2 1 2 2 1 1 1 2 1 1
##  [38] 1 2 2 2 1 1 1 2 1 2 1 1 1 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 2 1
##  [75] 1 1 2 1 1 1 2 1 2 1 1 2 2 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1 2 2 1 2 1 2 1 1 2
## [112] 1 2 1 1 1 2 1 2 1 2 1 2 1 1 2 1 2 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 2 1 2 1 1
## [149] 2 1 1 1 1 1 1 1 2 2 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1 1 1 2 2
## 
## Within cluster sum of squares by cluster:
## [1]  9.046154 17.007338
##  (between_SS / total_SS =  46.7 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
kent_clusterclass2 = as.data.frame(kent_kmeans.re2$cluster)
names(kent_clusterclass2) ="cluster"
kent_clusterclass2 = cbind(kent_dataset_between_gather_spread[1],kent_clusterclass2)

kent_clusterclass2$cluster = as.factor(as.character(kent_clusterclass2$cluster))
levels(kent_clusterclass2$cluster)[levels(kent_clusterclass2$cluster ) == "1"]  <- "High Performing"
levels(kent_clusterclass2$cluster)[levels(kent_clusterclass2$cluster ) == "2"]  <- "Percentage Bias"

kent_clusterclass2$cluster <- factor(kent_clusterclass2$cluster, levels=c("High Performing","Percentage Bias"))

kent_dataset_between_gather_fp_cluster = kent_dataset_between_gather_fp %>%
  left_join(kent_clusterclass2, by = "id")
kent_dataset_between_gather_fp_cluster$greater = as.factor(as.character(kent_dataset_between_gather_fp_cluster$greater))
kent_dataset_between_gather_fp_cluster$greater <- factor(kent_dataset_between_gather_fp_cluster$greater, levels=c("pg","fg"))
kent_dataset_between_gather_fp_cluster$comparison = "Percent vs. Fraction \nComparisons"

kent_dataset_between_gather_dp_cluster = kent_dataset_between_gather_dp %>%
  left_join(kent_clusterclass2, by = "id")

kent_dataset_between_gather_dp_cluster$greater = as.factor(as.character(kent_dataset_between_gather_dp_cluster$greater))
kent_dataset_between_gather_dp_cluster$greater <- factor(kent_dataset_between_gather_dp_cluster$greater, levels=c("pg","dg"))
kent_dataset_between_gather_dp_cluster$comparison = "Percent vs. Decimal \nComparisons"
summarySE(kent_dataset_between_gather_dp_cluster, "accuracy", c("greater","cluster"))
##   greater         cluster   N  accuracy         sd         se          ci
## 1      pg High Performing 130 0.9858974 0.04656419 0.00408395 0.008080194
## 2      pg Percentage Bias  53 0.9213836 0.14098165 0.01936532 0.038859356
## 3      dg High Performing 130 0.9371795 0.11787924 0.01033869 0.020455357
## 4      dg Percentage Bias  53 0.6509434 0.29825732 0.04096879 0.082209898
kent_dataset_between_gather_dp_cluster = kent_dataset_between_gather_dp %>%
  left_join(kent_clusterclass2, by = "id")

kent_dataset_between_gather_dp_cluster$greater = as.factor(as.character(kent_dataset_between_gather_dp_cluster$greater))
kent_dataset_between_gather_dp_cluster$greater <- factor(kent_dataset_between_gather_dp_cluster$greater, levels=c("pg","dg"))
kent_dataset_between_gather_dp_cluster$comparison = "Percent vs. Decimal \nComparisons"
summarySE(kent_dataset_between_gather_dp_cluster, "accuracy", c("greater","cluster"))
##   greater         cluster   N  accuracy         sd         se          ci
## 1      pg High Performing 130 0.9858974 0.04656419 0.00408395 0.008080194
## 2      pg Percentage Bias  53 0.9213836 0.14098165 0.01936532 0.038859356
## 3      dg High Performing 130 0.9371795 0.11787924 0.01033869 0.020455357
## 4      dg Percentage Bias  53 0.6509434 0.29825732 0.04096879 0.082209898

Figure