## Warning in
## read.spss("/Volumes/mrlab/data/ram_translation_task/ram_crossnotation/kent_datafiles/11-20-21
## KSU intervention data.sav", :
## /Volumes/mrlab/data/ram_translation_task/ram_crossnotation/kent_datafiles/11-20-21
## KSU intervention data.sav: Very long string record(s) found (record type 7,
## subtype 14), each will be imported in consecutive separate variables
## [1] 183
kent_dataset_both = kent_dataset[c("ResponseId","id", "precrossmean","prewithinmean")]
nrow(kent_dataset)
## [1] 183
kent_dataset_both_gather = gather(kent_dataset_both, "cross","accuracy", -c(ResponseId, id))
summarySE(kent_dataset_both_gather, "accuracy", "cross")
## cross N accuracy sd se ci
## 1 precrossmean 183 0.8665756 0.1472588 0.010885681 0.02147836
## 2 prewithinmean 183 0.9074074 0.1314340 0.009715884 0.01917026
t.test(accuracy ~ cross, kent_dataset_both_gather, paired =T)
##
## Paired t-test
##
## data: accuracy by cross
## t = -5.733, df = 182, p-value = 4.035e-08
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.05488461 -0.02677902
## sample estimates:
## mean difference
## -0.04083182
cohensD(accuracy ~ cross, data = kent_dataset_both_gather, method = "paired")
## Warning in cohensD(accuracy ~ cross, data = kent_dataset_both_gather, method =
## "paired"): calculating paired samples Cohen's d using formula input. Results
## will be incorrect if cases do not appear in the same order for both levels of
## the grouping factor
## [1] 0.4237952
#########################################################
# kent_clusterclass_within = kent_clusterclass
# names(kent_clusterclass_within)[1] = "ResponseId"
# kent_dataset_both_gather_cluster = kent_dataset_both_gather %>%
# left_join(kent_clusterclass_within, by = "ResponseId")
#
# ezANOVA(kent_dataset_both_gather_cluster, dv = .(accuracy), wid = .(ResponseId), within = .c(cross), between = .(cluster))
#
# summarySE(kent_dataset_both_gather_cluster, "accuracy", c("cluster","cross"))
#
# kent_dataset_both_gather_cluster_high = subset(kent_dataset_both_gather_cluster, cluster == "High Performing")
# t.test(accuracy ~ cross, kent_dataset_both_gather_cluster_high, paired = T)
# cohensD(accuracy ~ cross, data = kent_dataset_both_gather_cluster_high, method = "paired")
#
# kent_dataset_both_gather_cluster_biased = subset(kent_dataset_both_gather_cluster, cluster == "Percentage Bias")
# t.test(accuracy ~ cross, kent_dataset_both_gather_cluster_biased, paired = T)
# cohensD(accuracy ~ cross, data = kent_dataset_both_gather_cluster_biased, method = "paired")
#########################################################
kent_dataset_within = kent_dataset[c("ResponseId","prefvfmean", "predvdmean","prepvpmean")]
nrow(kent_dataset_within)
## [1] 183
kent_dataset_within_gather = gather(kent_dataset_within, "within","accuracy", -c(ResponseId))
ezANOVA(kent_dataset_within_gather, dv = .(accuracy), wid = .(ResponseId), within = .c(within))
## Warning: Converting "ResponseId" to factor for ANOVA.
## Warning: Converting "within" to factor for ANOVA.
## $ANOVA
## Effect DFn DFd F p p<.05 ges
## 2 within 2 364 35.5594 7.831115e-15 * 0.08799465
##
## $`Mauchly's Test for Sphericity`
## Effect W p p<.05
## 2 within 0.9967719 0.7463059
##
## $`Sphericity Corrections`
## Effect GGe p[GG] p[GG]<.05 HFe p[HF] p[HF]<.05
## 2 within 0.9967822 8.582718e-15 * 1.007804 7.831115e-15 *
summarySE(kent_dataset_within_gather, "accuracy", "within")
## within N accuracy sd se ci
## 1 predvdmean 183 0.8834244 0.21768216 0.016091528 0.031749938
## 2 prefvfmean 183 0.8524590 0.22530341 0.016654907 0.032861532
## 3 prepvpmean 183 0.9863388 0.06510358 0.004812595 0.009495654
agg_all_participants8_accuracy_gather_within_nofvf = subset(kent_dataset_within_gather, within != "prefvfmean")
t.test(accuracy ~ within, agg_all_participants8_accuracy_gather_within_nofvf, paired = T)
##
## Paired t-test
##
## data: accuracy by within
## t = -6.3674, df = 182, p-value = 1.523e-09
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.13480484 -0.07102394
## sample estimates:
## mean difference
## -0.1029144
cohensD(accuracy ~ within, data = agg_all_participants8_accuracy_gather_within_nofvf, method = "paired")
## Warning in cohensD(accuracy ~ within, data =
## agg_all_participants8_accuracy_gather_within_nofvf, : calculating paired
## samples Cohen's d using formula input. Results will be incorrect if cases do
## not appear in the same order for both levels of the grouping factor
## [1] 0.4706906
agg_all_participants8_accuracy_gather_within_nodvd = subset(kent_dataset_within_gather, within != "predvdmean")
t.test(accuracy ~ within, agg_all_participants8_accuracy_gather_within_nodvd, paired = T)
##
## Paired t-test
##
## data: accuracy by within
## t = -7.8935, df = 182, p-value = 2.665e-13
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.1673448 -0.1004148
## sample estimates:
## mean difference
## -0.1338798
cohensD(accuracy ~ within, data = agg_all_participants8_accuracy_gather_within_nodvd, method = "paired")
## Warning in cohensD(accuracy ~ within, data =
## agg_all_participants8_accuracy_gather_within_nodvd, : calculating paired
## samples Cohen's d using formula input. Results will be incorrect if cases do
## not appear in the same order for both levels of the grouping factor
## [1] 0.5835042
agg_all_participants8_accuracy_gather_within_nopvp = subset(kent_dataset_within_gather, within != "prepvpmean")
t.test(accuracy ~ within, agg_all_participants8_accuracy_gather_within_nopvp, paired = T)
##
## Paired t-test
##
## data: accuracy by within
## t = 1.8507, df = 182, p-value = 0.06584
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.002048583 0.063979366
## sample estimates:
## mean difference
## 0.03096539
cohensD(accuracy ~ within, data = agg_all_participants8_accuracy_gather_within_nopvp, method = "paired")
## Warning in cohensD(accuracy ~ within, data =
## agg_all_participants8_accuracy_gather_within_nopvp, : calculating paired
## samples Cohen's d using formula input. Results will be incorrect if cases do
## not appear in the same order for both levels of the grouping factor
## [1] 0.136804
kent_dataset_between = kent_dataset[c("ResponseId","prefgtpmean","prepgtfmean","predgtpmean","prepgtdmean","prefgtdmean","predgtfmean")]
names(kent_dataset_between) = c("id","fp_fg_prefgtpmean","fp_pg_prepgtfmean","dp_dg_predgtpmean","dp_pg_prepgtdmean","df_fg_prefgtdmean","df_dg_predgtfmean")
kent_dataset_between_gather = gather(kent_dataset_between, key = "type", value = "accuracy", -id)
kent_dataset_between_gather = separate(kent_dataset_between_gather, col = "type", into = c("components","greater","type"), sep = "_", remove = F)
kent_dataset_between_gather_fp = subset(kent_dataset_between_gather, components =="fp")
kent_dataset_between_gather_fp$greater = as.factor(as.character(kent_dataset_between_gather_fp$greater))
kent_dataset_between_gather_fp$greater <- factor(kent_dataset_between_gather_fp$greater, levels=c("pg","fg"))
kent_dataset_between_gather_fp$comparison = "Percent vs. Fraction \nComparisons"
summarySE(kent_dataset_between_gather_fp, "accuracy", "greater")
## greater N accuracy sd se ci
## 1 pg 183 0.9408015 0.1341508 0.009916709 0.01956650
## 2 fg 183 0.7477231 0.2984790 0.022064203 0.04353453
t.test(accuracy ~ greater, kent_dataset_between_gather_fp, paired = T)
##
## Paired t-test
##
## data: accuracy by greater
## t = 9.2324, df = 182, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.1518148 0.2343418
## sample estimates:
## mean difference
## 0.1930783
cohensD(accuracy ~ greater, data = kent_dataset_between_gather_fp, method = "paired")
## Warning in cohensD(accuracy ~ greater, data = kent_dataset_between_gather_fp, :
## calculating paired samples Cohen's d using formula input. Results will be
## incorrect if cases do not appear in the same order for both levels of the
## grouping factor
## [1] 0.6824759
kent_dataset_between_gather_dp = subset(kent_dataset_between_gather, components =="dp")
kent_dataset_between_gather_dp$greater = as.factor(as.character(kent_dataset_between_gather_dp$greater))
kent_dataset_between_gather_dp$greater <- factor(kent_dataset_between_gather_dp$greater, levels=c("pg","dg"))
kent_dataset_between_gather_dp$comparison = "Percent vs. Decimal \nComparisons"
summarySE(kent_dataset_between_gather_dp, "accuracy", "greater")
## greater N accuracy sd se ci
## 1 pg 183 0.9672131 0.08987003 0.006643384 0.01310796
## 2 dg 183 0.8542805 0.22850489 0.016891567 0.03332848
t.test(accuracy ~ greater, kent_dataset_between_gather_dp, paired = T)
##
## Paired t-test
##
## data: accuracy by greater
## t = 7.0012, df = 182, p-value = 4.728e-11
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.08110564 0.14475957
## sample estimates:
## mean difference
## 0.1129326
cohensD(accuracy ~ greater, data = kent_dataset_between_gather_dp, method = "paired")
## Warning in cohensD(accuracy ~ greater, data = kent_dataset_between_gather_dp, :
## calculating paired samples Cohen's d using formula input. Results will be
## incorrect if cases do not appear in the same order for both levels of the
## grouping factor
## [1] 0.5175403
kent_dataset_between_gather_df = subset(kent_dataset_between_gather, components =="df")
kent_dataset_between_gather_df$greater = as.factor(as.character(kent_dataset_between_gather_df$greater))
kent_dataset_between_gather_df$greater <- factor(kent_dataset_between_gather_df$greater, levels=c("dg","fg"))
kent_dataset_between_gather_df$comparison = "Decimal vs. Fraction \nComparisons"
summarySE(kent_dataset_between_gather_df, "accuracy", "greater")
## greater N accuracy sd se ci
## 1 dg 183 0.8734062 0.2130584 0.01574973 0.03107554
## 2 fg 183 0.8160291 0.2366804 0.01749592 0.03452092
t.test(accuracy ~ greater, kent_dataset_between_gather_df, paired = T)
##
## Paired t-test
##
## data: accuracy by greater
## t = 2.9001, df = 182, p-value = 0.004189
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.01834119 0.09641290
## sample estimates:
## mean difference
## 0.05737705
cohensD(accuracy ~ greater, data = kent_dataset_between_gather_df, method = "paired")
## Warning in cohensD(accuracy ~ greater, data = kent_dataset_between_gather_df, :
## calculating paired samples Cohen's d using formula input. Results will be
## incorrect if cases do not appear in the same order for both levels of the
## grouping factor
## [1] 0.2143851
kent_dataset_between_gather_fp = subset(kent_dataset_between_gather, components =="fp")
kent_dataset_between_gather_fp_spread = spread(data = kent_dataset_between_gather_fp[c("id","greater","accuracy")], value = accuracy, key = greater)
library(parameters)
set.seed(240)
kent_n_clust <- n_clusters(kent_dataset_between_gather_fp_spread[-c(1)],
package = c("easystats", "NbClust", "mclust"),
standardize = FALSE)
kent_n_clust
## # Method Agreement Procedure:
##
## The choice of 2 clusters is supported by 6 (20.00%) methods out of 30 (Elbow, Gap_Maechler2012, CCC, Duda, Pseudot2, Beale).
kmax = 10 # the maximum number of clusters we will examine; you can change this
totwss = rep(0,kmax) # will be filled with total sum of within group sum squares
kmfit = list() # create and empty list
for (i in 1:kmax){
kclus = kmeans(kent_dataset_between_gather_fp_spread[-1],centers=i,iter.max=20)
totwss[i] = kclus$tot.withinss
kmfit[[i]] = kclus
}
kmeansAIC = function(fit){
m = ncol(fit$centers)
n = length(fit$cluster)
k = nrow(fit$centers)
D = fit$tot.withinss
return(D + 2*m*k)
}
aic=sapply(kmfit,kmeansAIC)
#mult.fig(1,main="Simulated data with two clusters")
plot(seq(1,kmax),aic,xlab="Number of clusters",ylab="AIC",pch=20,cex=2)
n = nrow(kent_dataset_between_gather_fp_spread[-1])
rsq = 1-(totwss*(n-1))/(totwss[1]*(n-seq(1,kmax)))
cbind(aic,rsq)
## aic rsq
## [1,] 23.48968 0.0000000
## [2,] 14.90831 0.6435815
## [3,] 16.31418 0.7761833
## [4,] 20.18407 0.7817206
## [5,] 22.37684 0.8753054
## [6,] 26.36845 0.8750439
## [7,] 29.83697 0.9025334
## [8,] 33.09456 0.9415924
## [9,] 37.24925 0.9329548
## [10,] 41.09879 0.9406890
set.seed(240)
kent_kmeans.re <- kmeans(kent_dataset_between_gather_fp_spread[-c(1)], centers = 2, nstart = 30)
kent_kmeans.re
## K-means clustering with 2 clusters of sizes 140, 43
##
## Cluster means:
## fg pg
## 1 0.8916667 0.9607143
## 2 0.2790698 0.8759690
##
## Clustering vector:
## [1] 1 1 2 1 1 1 2 1 2 1 1 1 1 1 1 2 1 1 1 1 1 1 2 2 2 1 2 2 1 2 1 1 1 1 2 1 1
## [38] 1 2 2 2 1 1 1 2 1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 2 1
## [75] 1 1 2 1 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1 2 1 1 2 1 2 1 1 1
## [112] 1 1 1 1 1 2 1 1 1 2 1 2 1 1 2 1 2 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 2 1 2 1 1
## [149] 1 1 1 1 1 1 1 1 2 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 2
##
## Within cluster sum of squares by cluster:
## [1] 4.140873 2.767442
## (between_SS / total_SS = 64.6 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
# kent_kmeans.re <- kmeans(kent_dataset_between_gather_fp_spread[-c(1)], centers = 4, nstart = 30)
# kent_kmeans.re
kent_clusterclass = as.data.frame(kent_kmeans.re$cluster)
names(kent_clusterclass) ="cluster"
kent_clusterclass = cbind(kent_dataset_between_gather_fp_spread[1],kent_clusterclass)
kent_clusterclass$cluster = as.factor(as.character(kent_clusterclass$cluster))
levels(kent_clusterclass$cluster)[levels(kent_clusterclass$cluster ) == "1"] <- "High Performing"
levels(kent_clusterclass$cluster)[levels(kent_clusterclass$cluster ) == "2"] <- "Percentage Bias"
kent_clusterclass$cluster <- factor(kent_clusterclass$cluster, levels=c("High Performing","Percentage Bias"))
kent_dataset_between_gather_fp_cluster = kent_dataset_between_gather_fp %>%
left_join(kent_clusterclass, by = "id")
kent_dataset_between_gather_fp_cluster$greater = as.factor(as.character(kent_dataset_between_gather_fp_cluster$greater))
kent_dataset_between_gather_fp_cluster$greater <- factor(kent_dataset_between_gather_fp_cluster$greater, levels=c("pg","fg"))
# kent_dataset_between_gather_fp_cluster$cluster = as.factor(as.character(kent_dataset_between_gather_fp_cluster$cluster))
# levels(kent_dataset_between_gather_fp_cluster$cluster)[levels(kent_dataset_between_gather_fp_cluster$cluster ) == "1"] <- "No Bias"
# levels(kent_dataset_between_gather_fp_cluster$cluster)[levels(kent_dataset_between_gather_fp_cluster$cluster ) == "2"] <- "Percentage Bias"
# kent_dataset_between_gather_fp_cluster$cluster <- factor(kent_dataset_between_gather_fp_cluster$cluster, levels=c("Percentage Bias", "No Bias","Fraction Bias"))
kent_dataset_between_gather_fp_cluster$comparison = "Percent vs. Fraction \nComparisons"
kent_dataset_between_gather_fp_cluster_fp_pb = subset(kent_dataset_between_gather_fp_cluster, cluster == "Percentage Bias")
t.test(accuracy ~ greater, kent_dataset_between_gather_fp_cluster_fp_pb, paired = T)
##
## Paired t-test
##
## data: accuracy by greater
## t = 16.76, df = 42, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.5250266 0.6687719
## sample estimates:
## mean difference
## 0.5968992
cohensD(accuracy ~ greater, data = kent_dataset_between_gather_fp_cluster_fp_pb, method = "paired")
## Warning in cohensD(accuracy ~ greater, data =
## kent_dataset_between_gather_fp_cluster_fp_pb, : calculating paired samples
## Cohen's d using formula input. Results will be incorrect if cases do not appear
## in the same order for both levels of the grouping factor
## [1] 2.555888
kent_dataset_between_gather_fp_cluster_fp_nb = subset(kent_dataset_between_gather_fp_cluster, cluster == "High Performing")
t.test(accuracy ~ greater, kent_dataset_between_gather_fp_cluster_fp_nb, paired = T)
##
## Paired t-test
##
## data: accuracy by greater
## t = 5.4616, df = 139, p-value = 2.112e-07
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.04405127 0.09404397
## sample estimates:
## mean difference
## 0.06904762
cohensD(accuracy ~ greater, data = kent_dataset_between_gather_fp_cluster_fp_nb, method = "paired")
## Warning in cohensD(accuracy ~ greater, data =
## kent_dataset_between_gather_fp_cluster_fp_nb, : calculating paired samples
## Cohen's d using formula input. Results will be incorrect if cases do not appear
## in the same order for both levels of the grouping factor
## [1] 0.4615872
kent_dataset_between_gather_dp_cluster = kent_dataset_between_gather_dp %>%
left_join(kent_clusterclass, by = "id")
kent_dataset_between_gather_dp_cluster$greater = as.factor(as.character(kent_dataset_between_gather_dp_cluster$greater))
kent_dataset_between_gather_dp_cluster$greater <- factor(kent_dataset_between_gather_dp_cluster$greater, levels=c("pg","dg"))
kent_dataset_between_gather_dp_cluster$comparison = "Percent vs. Decimal \nComparisons"
summarySE(kent_dataset_between_gather_dp_cluster, "accuracy", c("greater","cluster"))
## greater cluster N accuracy sd se ci
## 1 pg High Performing 140 0.9726190 0.09076175 0.007670768 0.01516647
## 2 pg Percentage Bias 43 0.9496124 0.08556445 0.013048456 0.02633285
## 3 dg High Performing 140 0.8964286 0.19015314 0.016070873 0.03177497
## 4 dg Percentage Bias 43 0.7170543 0.28529870 0.043507644 0.08780198
# kent_dataset_between_gather_dp_cluster$cluster = as.factor(as.character(kent_dataset_between_gather_dp_cluster$cluster))
# levels(kent_dataset_between_gather_dp_cluster$cluster)[levels(kent_dataset_between_gather_dp_cluster$cluster ) == "1"] <- "No Bias"
# levels(kent_dataset_between_gather_dp_cluster$cluster)[levels(kent_dataset_between_gather_dp_cluster$cluster ) == "2"] <- "Percentage Bias"
#
# kent_dataset_between_gather_dp_cluster$cluster <- factor(kent_dataset_between_gather_dp_cluster$cluster, levels=c("Percentage Bias", "No Bias","Fraction Bias"))
kent_dataset_between_gather_dp_cluster_pb = subset(kent_dataset_between_gather_dp_cluster, cluster == "Percentage Bias")
t.test(accuracy ~ greater, kent_dataset_between_gather_dp_cluster_pb, paired = T)
##
## Paired t-test
##
## data: accuracy by greater
## t = 5.547, df = 42, p-value = 1.776e-06
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.1479500 0.3171662
## sample estimates:
## mean difference
## 0.2325581
cohensD(accuracy ~ greater, data = kent_dataset_between_gather_dp_cluster_pb, method = "paired")
## Warning in cohensD(accuracy ~ greater, data =
## kent_dataset_between_gather_dp_cluster_pb, : calculating paired samples Cohen's
## d using formula input. Results will be incorrect if cases do not appear in the
## same order for both levels of the grouping factor
## [1] 0.8459099
kent_dataset_between_gather_dp_cluster_nb = subset(kent_dataset_between_gather_dp_cluster, cluster == "High Performing")
t.test(accuracy ~ greater, kent_dataset_between_gather_dp_cluster_nb, paired = T)
##
## Paired t-test
##
## data: accuracy by greater
## t = 4.9092, df = 139, p-value = 2.527e-06
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.0455048 0.1068762
## sample estimates:
## mean difference
## 0.07619048
cohensD(accuracy ~ greater, data = kent_dataset_between_gather_dp_cluster_nb, method = "paired")
## Warning in cohensD(accuracy ~ greater, data =
## kent_dataset_between_gather_dp_cluster_nb, : calculating paired samples Cohen's
## d using formula input. Results will be incorrect if cases do not appear in the
## same order for both levels of the grouping factor
## [1] 0.4149031
kent_dataset_between_gather_df_cluster = kent_dataset_between_gather_df %>%
left_join(kent_clusterclass, by = "id")
kent_dataset_between_gather_df_cluster$greater = as.factor(as.character(kent_dataset_between_gather_df_cluster$greater))
kent_dataset_between_gather_df_cluster$greater <- factor(kent_dataset_between_gather_df_cluster$greater, levels=c("dg","fg"))
kent_dataset_between_gather_df_cluster$comparison = "Decimal vs. Fraction \nComparisons"
summarySE(kent_dataset_between_gather_df_cluster, "accuracy", c("greater","cluster"))
## greater cluster N accuracy sd se ci
## 1 dg High Performing 140 0.9107143 0.1836772 0.01552355 0.03069282
## 2 dg Percentage Bias 43 0.7519380 0.2555523 0.03897137 0.07864740
## 3 fg High Performing 140 0.9107143 0.1474683 0.01246334 0.02464224
## 4 fg Percentage Bias 43 0.5077519 0.2087804 0.03183872 0.06425313
kent_dataset_between_gather_df_cluster_pb = subset(kent_dataset_between_gather_df_cluster, cluster == "Percentage Bias")
t.test(accuracy ~ greater, kent_dataset_between_gather_df_cluster_pb, paired = T)
##
## Paired t-test
##
## data: accuracy by greater
## t = 4.7379, df = 42, p-value = 2.481e-05
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.1401768 0.3481953
## sample estimates:
## mean difference
## 0.244186
cohensD(accuracy ~ greater, data = kent_dataset_between_gather_df_cluster_pb, method = "paired")
## Warning in cohensD(accuracy ~ greater, data =
## kent_dataset_between_gather_df_cluster_pb, : calculating paired samples Cohen's
## d using formula input. Results will be incorrect if cases do not appear in the
## same order for both levels of the grouping factor
## [1] 0.722526
kent_dataset_between_gather_df_cluster_nb = subset(kent_dataset_between_gather_df_cluster, cluster == "High Performing")
t.test(accuracy ~ greater, kent_dataset_between_gather_df_cluster_nb, paired = T)
##
## Paired t-test
##
## data: accuracy by greater
## t = -9.2057e-16, df = 139, p-value = 1
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.03551217 0.03551217
## sample estimates:
## mean difference
## -1.653439e-17
cohensD(accuracy ~ greater, data = kent_dataset_between_gather_df_cluster_nb, method = "paired")
## Warning in cohensD(accuracy ~ greater, data =
## kent_dataset_between_gather_df_cluster_nb, : calculating paired samples Cohen's
## d using formula input. Results will be incorrect if cases do not appear in the
## same order for both levels of the grouping factor
## [1] 0
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Figure 4. Percent correct on (A) percent
vs. fraction comparisons, (B) percent vs. decimal comparisons, and (C)
decimal vs. fraction cross-notation comparison based on a two-cluster
model: ‘percentages bias’ cluster (n=43) and ‘high performing’ cluster
(n=140). Most participants exhibit a bias to select the percentages as
larger than fractions and decimals, with the percentage-bias profile
exhibiting a more drastic difference in performance than the high
performing profile. The percentage-bias cluster also demonstrated a bias
to select the decimal as larger than fractions. However, there was no
bias among the fraction-decimal comparisons for the high performing
cluster. Gray lines represent individual participants’ average scores in
each of the conditions. Thicker gray lines indicate more participants
with the same scores. Error bars represent ± 1 Standard Error. Note.
***p < .001.
kent_clusterclass_within = kent_clusterclass
names(kent_clusterclass_within)[1] = "ResponseId"
kent_dataset_both_gather_cluster = kent_dataset_both_gather %>%
left_join(kent_clusterclass_within, by = "ResponseId")
ezANOVA(kent_dataset_both_gather_cluster, dv = .(accuracy), wid = .(ResponseId), within = .c(cross), between = .(cluster))
## Warning: Converting "ResponseId" to factor for ANOVA.
## Warning: Converting "cross" to factor for ANOVA.
## Warning: Data is unbalanced (unequal N per group). Make sure you specified a
## well-considered value for the type argument to ezANOVA().
## $ANOVA
## Effect DFn DFd F p p<.05 ges
## 2 cluster 1 181 127.47113 1.016463e-22 * 0.37113728
## 3 cross 1 181 38.97358 2.973561e-09 * 0.03370609
## 4 cluster:cross 1 181 34.81347 1.749430e-08 * 0.03021696
summarySE(kent_dataset_both_gather_cluster, "accuracy", c("cluster","cross"))
## cluster cross N accuracy sd se ci
## 1 High Performing precrossmean 140 0.9238095 0.10042384 0.008487364 0.01678103
## 2 High Performing prewithinmean 140 0.9432540 0.09736466 0.008228815 0.01626983
## 3 Percentage Bias precrossmean 43 0.6802326 0.11884031 0.018122977 0.03657365
## 4 Percentage Bias prewithinmean 43 0.7906977 0.15891923 0.024234955 0.04890812
kent_dataset_both_gather_cluster_high = subset(kent_dataset_both_gather_cluster, cluster == "High Performing")
t.test(accuracy ~ cross, kent_dataset_both_gather_cluster_high, paired = T)
##
## Paired t-test
##
## data: accuracy by cross
## t = -2.9861, df = 139, p-value = 0.003341
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.032319286 -0.006569603
## sample estimates:
## mean difference
## -0.01944444
cohensD(accuracy ~ cross, data = kent_dataset_both_gather_cluster_high, method = "paired")
## Warning in cohensD(accuracy ~ cross, data =
## kent_dataset_both_gather_cluster_high, : calculating paired samples Cohen's d
## using formula input. Results will be incorrect if cases do not appear in the
## same order for both levels of the grouping factor
## [1] 0.2523686
kent_dataset_both_gather_cluster_biased = subset(kent_dataset_both_gather_cluster, cluster == "Percentage Bias")
t.test(accuracy ~ cross, kent_dataset_both_gather_cluster_biased, paired = T)
##
## Paired t-test
##
## data: accuracy by cross
## t = -6.1023, df = 42, p-value = 2.83e-07
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.14699670 -0.07393354
## sample estimates:
## mean difference
## -0.1104651
cohensD(accuracy ~ cross, data = kent_dataset_both_gather_cluster_biased, method = "paired")
## Warning in cohensD(accuracy ~ cross, data =
## kent_dataset_both_gather_cluster_biased, : calculating paired samples Cohen's d
## using formula input. Results will be incorrect if cases do not appear in the
## same order for both levels of the grouping factor
## [1] 0.930596
kent_dataset_within2 = kent_dataset[c("ResponseId","prefvfmean", "predvdmean","prepvpmean")]
kent_dataset_within2$within = (kent_dataset_within2$prefvfmean + kent_dataset_within2$predvdmean + kent_dataset_within2$prepvpmean)/3
kent_dataset_between2 = kent_dataset[c("ResponseId","predgtpmean","prepgtdmean","prefgtdmean","predgtfmean")]
kent_dataset_between2$cross = (kent_dataset_between2$predgtpmean + kent_dataset_between2$prepgtdmean + kent_dataset_between2$prefgtdmean + kent_dataset_between2$predgtfmean)/4
kent_dataset_within_between = kent_dataset_between2[c("ResponseId","cross")] %>%
left_join(kent_dataset_within2[c("ResponseId","within")], by = "ResponseId")
kent_dataset_within_between_gather = gather(data = kent_dataset_within_between, key = "cross","accuracy", -ResponseId)
kent_dataset_within_between_gather$id = kent_dataset_within_between_gather$ResponseId
kent_dataset_within_between_gather_cluster = kent_dataset_within_between_gather %>%
left_join(kent_clusterclass, by = "id")
ezANOVA(kent_dataset_within_between_gather_cluster, dv = .(accuracy), wid = .(ResponseId), within = .c(cross), between = .(cluster))
## Warning: Converting "ResponseId" to factor for ANOVA.
## Warning: Converting "cross" to factor for ANOVA.
## Warning: Data is unbalanced (unequal N per group). Make sure you specified a
## well-considered value for the type argument to ezANOVA().
## $ANOVA
## Effect DFn DFd F p p<.05 ges
## 2 cluster 1 181 86.932545 3.942022e-17 * 0.285886430
## 3 cross 1 181 18.063233 3.416846e-05 * 0.016341474
## 4 cluster:cross 1 181 5.457946 2.057454e-02 * 0.004994661
summarySE(kent_dataset_within_between_gather_cluster, "accuracy", c("cluster","cross"))
## cluster cross N accuracy sd se ci
## 1 High Performing cross 140 0.9226190 0.11249072 0.009507201 0.01879743
## 2 High Performing within 140 0.9432540 0.09736466 0.008228815 0.01626983
## 3 Percentage Bias cross 43 0.7315891 0.13027949 0.019867436 0.04009411
## 4 Percentage Bias within 43 0.7906977 0.15891923 0.024234955 0.04890812
kent_dataset_both_gather_cluster_high = subset(kent_dataset_within_between_gather_cluster, cluster == "High Performing")
t.test(accuracy ~ cross, kent_dataset_both_gather_cluster_high, paired = T)
##
## Paired t-test
##
## data: accuracy by cross
## t = -2.9981, df = 139, p-value = 0.00322
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.03424338 -0.00702646
## sample estimates:
## mean difference
## -0.02063492
cohensD(accuracy ~ cross, data = kent_dataset_both_gather_cluster_high, method = "paired")
## Warning in cohensD(accuracy ~ cross, data =
## kent_dataset_both_gather_cluster_high, : calculating paired samples Cohen's d
## using formula input. Results will be incorrect if cases do not appear in the
## same order for both levels of the grouping factor
## [1] 0.2533818
kent_dataset_both_gather_cluster_biased = subset(kent_dataset_within_between_gather_cluster, cluster == "Percentage Bias")
t.test(accuracy ~ cross, kent_dataset_both_gather_cluster_biased, paired = T)
##
## Paired t-test
##
## data: accuracy by cross
## t = -3.0176, df = 42, p-value = 0.004316
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.09863852 -0.01957853
## sample estimates:
## mean difference
## -0.05910853
cohensD(accuracy ~ cross, data = kent_dataset_both_gather_cluster_biased, method = "paired")
## Warning in cohensD(accuracy ~ cross, data =
## kent_dataset_both_gather_cluster_biased, : calculating paired samples Cohen's d
## using formula input. Results will be incorrect if cases do not appear in the
## same order for both levels of the grouping factor
## [1] 0.4601801
t.test(pre_arithmetic_mean ~ cluster, kent_dataset_intervention_generamath_measures,var.equal =T)
##
## Two Sample t-test
##
## data: pre_arithmetic_mean by cluster
## t = 3.0007, df = 181, p-value = 0.003074
## alternative hypothesis: true difference in means between group High Performing and group Percentage Bias is not equal to 0
## 95 percent confidence interval:
## 0.05897153 0.28546368
## sample estimates:
## mean in group High Performing mean in group Percentage Bias
## 0.7303571 0.5581395
t.test(preluremean ~ cluster, kent_dataset_intervention_generamath_measures,var.equal =T)
##
## Two Sample t-test
##
## data: preluremean by cluster
## t = 3.6728, df = 181, p-value = 0.0003157
## alternative hypothesis: true difference in means between group High Performing and group Percentage Bias is not equal to 0
## 95 percent confidence interval:
## 0.08119165 0.26970536
## sample estimates:
## mean in group High Performing mean in group Percentage Bias
## 0.4576190 0.2821705
kent_dataset_intervention_generamath_measures_pb = subset(kent_dataset_intervention_generamath_measures, cluster =="Percentage Bias")
t.test(kent_dataset_intervention_generamath_measures_pb$preluremean, mu = .33)
##
## One Sample t-test
##
## data: kent_dataset_intervention_generamath_measures_pb$preluremean
## t = -1.426, df = 42, p-value = 0.1613
## alternative hypothesis: true mean is not equal to 0.33
## 95 percent confidence interval:
## 0.2144798 0.3498613
## sample estimates:
## mean of x
## 0.2821705
kent_dataset_intervention_generamath_measures_hf = subset(kent_dataset_intervention_generamath_measures, cluster =="High Performing")
t.test(kent_dataset_intervention_generamath_measures_hf$preluremean, mu = .33)
##
## One Sample t-test
##
## data: kent_dataset_intervention_generamath_measures_hf$preluremean
## t = 5.2372, df = 139, p-value = 5.902e-07
## alternative hypothesis: true mean is not equal to 0.33
## 95 percent confidence interval:
## 0.4094395 0.5057986
## sample estimates:
## mean of x
## 0.457619
###Polinommial regression
# kent_dataset_intervention_generamath_measures$preluremean_factor = as.factor(kent_dataset_intervention_generamath_measures$preluremean)
# m <- polr(preluremean_factor ~ as.numeric(cluster), data = kent_dataset_intervention_generamath_measures, Hess=TRUE)
# summary(m)
#
# (ctable <- coef(summary(m)))
#
# p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2
#
# ## combined table
# (ctable <- cbind(ctable, "p value" = p))
# exp(coef(m))
# (ci <- confint(m)) #
#
# exp(cbind(OR = coef(m), ci))
#model <- polr(response ~ predictor1 + predictor2, data = your_data)
############
t.test(PRE_PAE_indiv ~ cluster, kent_dataset_intervention_generamath_measures,var.equal =T)
##
## Two Sample t-test
##
## data: PRE_PAE_indiv by cluster
## t = -4.8415, df = 181, p-value = 2.753e-06
## alternative hypothesis: true difference in means between group High Performing and group Percentage Bias is not equal to 0
## 95 percent confidence interval:
## -0.10717140 -0.04510925
## sample estimates:
## mean in group High Performing mean in group Percentage Bias
## 0.1296439 0.2057842
t.test(preconfidmean ~ cluster, kent_dataset_intervention_generamath_measures,var.equal =T)
##
## Two Sample t-test
##
## data: preconfidmean by cluster
## t = 4.3706, df = 181, p-value = 2.085e-05
## alternative hypothesis: true difference in means between group High Performing and group Percentage Bias is not equal to 0
## 95 percent confidence interval:
## 11.06789 29.28609
## sample estimates:
## mean in group High Performing mean in group Percentage Bias
## 65.41653 45.23953
t.test(Q1741_MathACT ~ cluster, kent_dataset_intervention_generamath_measures,var.equal =T)
##
## Two Sample t-test
##
## data: Q1741_MathACT by cluster
## t = 3.2411, df = 114, p-value = 0.001561
## alternative hypothesis: true difference in means between group High Performing and group Percentage Bias is not equal to 0
## 95 percent confidence interval:
## 1.490139 6.175356
## sample estimates:
## mean in group High Performing mean in group Percentage Bias
## 22.75275 18.92000
summarySE(kent_dataset_intervention_generamath_measures, "Q1741_MathACT", "cluster", na.rm =T)
## cluster N Q1741_MathACT sd se ci
## 1 High Performing 91 22.75275 5.552464 0.5820564 1.156357
## 2 Percentage Bias 25 18.92000 3.828838 0.7657676 1.580467
## Warning in geom_dotplot(binaxis = "y", stackdir = "center", dotsize = 0.25, :
## Ignoring unknown parameters: `shape`
## Warning in geom_dotplot(binaxis = "y", stackdir = "center", dotsize = 0.25, : Ignoring unknown parameters: `shape`
## Ignoring unknown parameters: `shape`
## Ignoring unknown parameters: `shape`
## Ignoring unknown parameters: `shape`
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
## Warning: Removed 67 rows containing missing values or values outside the scale range
## (`stat_bindot()`).
## Warning: Removed 67 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Removed 67 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 67 rows containing missing values or values outside the scale range
## (`geom_line()`).
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
Figure 5. Descriptive statistics for measures (B-F)
collected in Study 2 only, by cluster: percentage bias (or biased) and
high performing. Note. For the SAT scores from Study 1, the ‘biased’
cluster depicted here includes the three biased clusters from Study 1
with dots in red (strong percentage bias), yellow (moderate percentage
bias), and blue (fraction bias). Error bars represent ± 1 Standard
Error. Dashed line in Part C represents chance performance for that
task. Note. *p<.05, **p<.01, ***p<.001 and exact
test statistics appear in Supplemental Materials.
Figure 2. Percent correct for cross-notation
magnitude comparison. (A.) percent vs. fraction comparisons (e.g., 2/5
vs. 25%), (B.) percent vs. decimal comparisons (e.g., 40% vs. .25), and
(C.) decimal vs. fraction comparisons (e.g., .40 vs. 1/4). Gray lines
represent individual participants’ average scores in each of the
conditions. Thicker gray lines indicate more participants with the same
scores. Error bars represent ± 1 Standard Error. Note.
***p < .001.
kent_dataset_within_gather_cluster = kent_dataset_within_gather
names(kent_dataset_within_gather_cluster)[1] = "id"
kent_dataset_within_gather_cluster = kent_dataset_within_gather_cluster %>%
left_join(kent_clusterclass, by = "id")
kent_dataset_within_gather_cluster$within = as.factor(as.character(kent_dataset_within_gather_cluster$within))
kent_dataset_within_gather_cluster$within <- factor(kent_dataset_within_gather_cluster$within, levels=c("prepvpmean", "prefvfmean","predvdmean"))
levels(kent_dataset_within_gather_cluster$within)[levels(kent_dataset_within_gather_cluster$within ) == "prepvpmean"] <- "Percent to \nPercent"
levels(kent_dataset_within_gather_cluster$within)[levels(kent_dataset_within_gather_cluster$within ) == "prefvfmean"] <- "Fraction to \nFraction"
levels(kent_dataset_within_gather_cluster$within)[levels(kent_dataset_within_gather_cluster$within ) == "predvdmean"] <- "Decimal to \nDecimal"
# kent_dataset_within_gather_cluster$cluster = as.factor(as.character(kent_dataset_within_gather_cluster$cluster))
# levels(kent_dataset_within_gather_cluster$cluster)[levels(kent_dataset_within_gather_cluster$cluster ) == "1"] <- "No Bias"
# levels(kent_dataset_within_gather_cluster$cluster)[levels(kent_dataset_within_gather_cluster$cluster ) == "2"] <- "Percentage Bias"
#
# kent_dataset_within_gather_cluster$cluster <- factor(kent_dataset_within_gather_cluster$cluster, levels=c("Percentage Bias", "No Bias","Fraction Bias"))
summarySE(kent_dataset_within_gather_cluster, "accuracy", c("within","cluster"))
## within cluster N accuracy sd se
## 1 Percent to \nPercent High Performing 140 0.9857143 0.06775406 0.005726264
## 2 Percent to \nPercent Percentage Bias 43 0.9883721 0.05628899 0.008583991
## 3 Fraction to \nFraction High Performing 140 0.9142857 0.16903085 0.014285714
## 4 Fraction to \nFraction Percentage Bias 43 0.6511628 0.26680045 0.040686687
## 5 Decimal to \nDecimal High Performing 140 0.9297619 0.16002080 0.013524226
## 6 Decimal to \nDecimal Percentage Bias 43 0.7325581 0.30021935 0.045783022
## ci
## 1 0.01132184
## 2 0.01732320
## 3 0.02824540
## 4 0.08210906
## 5 0.02673980
## 6 0.09239388
ezANOVA(subset(kent_dataset_within_gather_cluster, cluster=="High Performing"), dv = .(accuracy), wid = .(id), within = .c(within))
## Warning: Converting "id" to factor for ANOVA.
## $ANOVA
## Effect DFn DFd F p p<.05 ges
## 2 within 2 278 13.03602 3.876991e-06 * 0.04616537
##
## $`Mauchly's Test for Sphericity`
## Effect W p p<.05
## 2 within 0.9912508 0.5453375
##
## $`Sphericity Corrections`
## Effect GGe p[GG] p[GG]<.05 HFe p[HF] p[HF]<.05
## 2 within 0.9913267 4.20382e-06 * 1.005608 3.876991e-06 *
pairwise.t.test(subset(kent_dataset_within_gather_cluster, cluster=="High Performing")$accuracy, subset(kent_dataset_within_gather_cluster, cluster=="High Performing")$within, p.adj = "none", paired = T)
##
## Pairwise comparisons using paired t tests
##
## data: subset(kent_dataset_within_gather_cluster, cluster == "High Performing")$accuracy and subset(kent_dataset_within_gather_cluster, cluster == "High Performing")$within
##
## Percent to \nPercent Fraction to \nFraction
## Fraction to \nFraction 3.3e-06 -
## Decimal to \nDecimal 0.00012 0.31323
##
## P value adjustment method: none
ezANOVA(subset(kent_dataset_within_gather_cluster, cluster=="Percentage Bias"), dv = .(accuracy), wid = .(id), within = .c(within))
## Warning: Converting "id" to factor for ANOVA.
## $ANOVA
## Effect DFn DFd F p p<.05 ges
## 2 within 2 84 30.01456 1.461559e-10 * 0.2782127
##
## $`Mauchly's Test for Sphericity`
## Effect W p p<.05
## 2 within 0.9351018 0.2527011
##
## $`Sphericity Corrections`
## Effect GGe p[GG] p[GG]<.05 HFe p[HF] p[HF]<.05
## 2 within 0.9390569 4.701758e-10 * 0.9814954 2.083737e-10 *
pairwise.t.test(subset(kent_dataset_within_gather_cluster, cluster=="Percentage Bias")$accuracy, subset(kent_dataset_within_gather_cluster, cluster=="Percentage Bias")$within, p.adj = "none", paired = T)
##
## Pairwise comparisons using paired t tests
##
## data: subset(kent_dataset_within_gather_cluster, cluster == "Percentage Bias")$accuracy and subset(kent_dataset_within_gather_cluster, cluster == "Percentage Bias")$within
##
## Percent to \nPercent Fraction to \nFraction
## Fraction to \nFraction 2.6e-10 -
## Decimal to \nDecimal 7.8e-07 0.12
##
## P value adjustment method: none
graph_kent_within_cluster = ggplot(kent_dataset_within_gather_cluster, aes(x = interaction(within), y = accuracy)) +
geom_bar(stat = "identity", data = summarySE(kent_dataset_within_gather_cluster, "accuracy", c("within","cluster")),
fill = NA, aes(color = as.factor(within)), size = 1, width = 0.55) +
stat_summary(fun.data = data_summary, geom = "errorbar",
position = position_dodge(width = 0.10), width = .05, colour = "black", size =0.5)+
scale_y_continuous(breaks=seq(0, 1, .25), limits=c(0,1.3),trans = shift_trans(0), expand = c(0,0))+
scale_color_manual(values = c("#1b7837","#e08214","#40004b"))+
#scale_color_manual(values = c("#1b7837","#40004b"))+
geom_line(aes(group = interaction (id)),
alpha = 0.15,
size = .25, colour = "#737373") +
geom_hline(yintercept = .5, linetype = 2, size = .5)+
ylab("Accuracy")+
facet_grid(.~cluster)+
#scale_x_discrete(labels=c("dg" = "Decimal \n> \nFraction", "fg" = "Fraction \n> \nDecimal"))+
stat_summary(fun.data = data_summary, geom = "errorbar",
position = position_dodge(width = 0.10), width = 0.001, colour = "black", size =.5)+
theme_bw()+
theme(legend.position="none",
axis.title.x=element_blank(),
axis.text.x = element_text(size=9),
#axis.title.x = element_text(size = size_text),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "white", colour = "grey50"),
strip.background =element_rect(fill="#f0f0f0"),
strip.text = element_text(size = size_textb),
axis.text.y = element_text(size=size_text),
axis.title.y = element_text(size=size_text),
legend.text=element_text(size=size_text))
graph_kent_within_cluster
Figure 3. Within-Notation comparison accuracy based
on the two-cluster model: (A) ‘high performing’ cluster (n=140) and (B)
‘percentages-are-larger bias’ cluster (n=43). Gray lines represent
individual participants’ average scores in each of the conditions.
Thicker gray lines indicate more participants with the same scores.
Error bars represent ± 1 Standard Error. Note.
*p<.05, **p<.01, ***p<.001
kent_dataset_between_gather_spread = (kent_dataset_between_gather)
kent_dataset_between_gather_spread = spread(data = kent_dataset_between_gather_spread[c("id","type","accuracy")], value = accuracy, key = type)
library(parameters)
set.seed(240)
kent_n_clust2 <- n_clusters(kent_dataset_between_gather_spread[-c(1)],
package = c("easystats", "NbClust", "mclust"),
standardize = FALSE)
kent_n_clust2
## # Method Agreement Procedure:
##
## The choice of 2 clusters is supported by 10 (35.71%) methods out of 28 (Elbow, Silhouette, Ch, CCC, DB, Duda, Pseudot2, Beale, Ratkowsky, Mcclain).
set.seed(240)
kent_kmeans.re2 <- kmeans(kent_dataset_between_gather_spread[-c(1)], centers = 2, nstart = 30)
kent_kmeans.re2
## K-means clustering with 2 clusters of sizes 130, 53
##
## Cluster means:
## predgtfmean predgtpmean prefgtdmean prefgtpmean prepgtdmean prepgtfmean
## 1 0.9487179 0.9371795 0.9320513 0.9000000 0.9858974 0.9782051
## 2 0.6886792 0.6509434 0.5314465 0.3742138 0.9213836 0.8490566
##
## Clustering vector:
## [1] 1 1 2 1 1 1 2 1 2 1 1 1 1 1 1 2 1 1 1 1 1 1 2 2 2 1 2 2 1 2 2 1 1 1 2 1 1
## [38] 1 2 2 2 1 1 1 2 1 2 1 1 1 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 2 1
## [75] 1 1 2 1 1 1 2 1 2 1 1 2 2 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1 2 2 1 2 1 2 1 1 2
## [112] 1 2 1 1 1 2 1 2 1 2 1 2 1 1 2 1 2 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 2 1 2 1 1
## [149] 2 1 1 1 1 1 1 1 2 2 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1 1 1 2 2
##
## Within cluster sum of squares by cluster:
## [1] 9.046154 17.007338
## (between_SS / total_SS = 46.7 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
kent_clusterclass2 = as.data.frame(kent_kmeans.re2$cluster)
names(kent_clusterclass2) ="cluster"
kent_clusterclass2 = cbind(kent_dataset_between_gather_spread[1],kent_clusterclass2)
kent_clusterclass2$cluster = as.factor(as.character(kent_clusterclass2$cluster))
levels(kent_clusterclass2$cluster)[levels(kent_clusterclass2$cluster ) == "1"] <- "High Performing"
levels(kent_clusterclass2$cluster)[levels(kent_clusterclass2$cluster ) == "2"] <- "Percentage Bias"
kent_clusterclass2$cluster <- factor(kent_clusterclass2$cluster, levels=c("High Performing","Percentage Bias"))
kent_dataset_between_gather_fp_cluster = kent_dataset_between_gather_fp %>%
left_join(kent_clusterclass2, by = "id")
kent_dataset_between_gather_fp_cluster$greater = as.factor(as.character(kent_dataset_between_gather_fp_cluster$greater))
kent_dataset_between_gather_fp_cluster$greater <- factor(kent_dataset_between_gather_fp_cluster$greater, levels=c("pg","fg"))
kent_dataset_between_gather_fp_cluster$comparison = "Percent vs. Fraction \nComparisons"
kent_dataset_between_gather_dp_cluster = kent_dataset_between_gather_dp %>%
left_join(kent_clusterclass2, by = "id")
kent_dataset_between_gather_dp_cluster$greater = as.factor(as.character(kent_dataset_between_gather_dp_cluster$greater))
kent_dataset_between_gather_dp_cluster$greater <- factor(kent_dataset_between_gather_dp_cluster$greater, levels=c("pg","dg"))
kent_dataset_between_gather_dp_cluster$comparison = "Percent vs. Decimal \nComparisons"
summarySE(kent_dataset_between_gather_dp_cluster, "accuracy", c("greater","cluster"))
## greater cluster N accuracy sd se ci
## 1 pg High Performing 130 0.9858974 0.04656419 0.00408395 0.008080194
## 2 pg Percentage Bias 53 0.9213836 0.14098165 0.01936532 0.038859356
## 3 dg High Performing 130 0.9371795 0.11787924 0.01033869 0.020455357
## 4 dg Percentage Bias 53 0.6509434 0.29825732 0.04096879 0.082209898
kent_dataset_between_gather_dp_cluster = kent_dataset_between_gather_dp %>%
left_join(kent_clusterclass2, by = "id")
kent_dataset_between_gather_dp_cluster$greater = as.factor(as.character(kent_dataset_between_gather_dp_cluster$greater))
kent_dataset_between_gather_dp_cluster$greater <- factor(kent_dataset_between_gather_dp_cluster$greater, levels=c("pg","dg"))
kent_dataset_between_gather_dp_cluster$comparison = "Percent vs. Decimal \nComparisons"
summarySE(kent_dataset_between_gather_dp_cluster, "accuracy", c("greater","cluster"))
## greater cluster N accuracy sd se ci
## 1 pg High Performing 130 0.9858974 0.04656419 0.00408395 0.008080194
## 2 pg Percentage Bias 53 0.9213836 0.14098165 0.01936532 0.038859356
## 3 dg High Performing 130 0.9371795 0.11787924 0.01033869 0.020455357
## 4 dg Percentage Bias 53 0.6509434 0.29825732 0.04096879 0.082209898