Data Set-Up

data <- read_xlsx("~/Desktop/UTK/Tanner Lab/Fall 2024 - RA/R/Data.xlsx")
# One line for each trial, with latency and speakers that "won" and "lost"

realized <- read_xlsx("~/Desktop/UTK/Tanner Lab/Fall 2024 - RA/R/TC_Realized_Values.xlsx")
# each speaker's realized CR, CV, and scores

format2 <- read_xlsx("~/Desktop/UTK/Tanner Lab/Fall 2024 - RA/R/TC_NSF_Format2.xlsx")
# One line for each speaker, with their associated data + realized CR and CV. Labeled as whether they "won" or "lost", and later in this script "chosen" v "not chosen"
final <- final %>% 
  mutate(best = pmax(W_Score, L_Score, L2_Score, L3_Score, L4_Score, L5_Score, L6_Score, L7_Score,
                     na.rm = TRUE))

final <- final %>%
  mutate(correct = case_when(best == W_Score ~ 'Correct',
                             best != W_Score ~ 'Incorrect'))

# write.csv(final,"09102024_NSF_Data_Scores.csv",row.names=FALSE)
final <- as.data.frame(final)
final_type2 <- as.data.frame(final_type2)
final$NumStimuli <- as.factor(final$NumStimuli)

dat2 <- final %>% filter(NumStimuli == '2')
dat4 <- final %>% filter(NumStimuli == '4')
dat8 <- final %>% filter(NumStimuli == '8')

final_type2 <- final_type2 %>% mutate(Chosen = case_when(Choice == 'W' ~ 'chosen',
                                                         Choice != 'W' ~ 'not_chosen'))

dat2_type2 <- final_type2 %>% filter(NumStimuli == '2')
dat4_type2 <- final_type2 %>% filter(NumStimuli == '4')
dat8_type2 <- final_type2 %>% filter(NumStimuli == '8')

Latencies x Num of Choices

ggplot(final,aes(x=correct,y=Latency))+
  geom_boxplot()+
  theme_classic()
## Warning: Removed 43 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

final.table <- final %>% group_by(NumStimuli) %>%
  summarize(n = n(),
            n.na = sum(is.na(Latency)),
            n.respond = sum(!is.na(Latency)),
            perc = sum(is.na(Latency))/sum(Latency, na.rm = TRUE),
            Latency = mean(Latency, na.rm = TRUE),
            )

final.table <- as.data.frame(final.table)

ggplot(final.table, aes(x = NumStimuli, y = n.na))+
  geom_bar(stat = "identity")+
  theme_classic()

no.choice.data <- final[is.na(final$Latency), ]

# significantly different in amount of NAs
prop.test(x=c(6,4,33),n=c(508,508,508))
## 
##  3-sample test for equality of proportions without continuity correction
## 
## data:  c(6, 4, 33) out of c(508, 508, 508)
## X-squared = 37.667, df = 2, p-value = 6.616e-09
## alternative hypothesis: two.sided
## sample estimates:
##      prop 1      prop 2      prop 3 
## 0.011811024 0.007874016 0.064960630
# not signif
prop.test(x=c(6,4),n=c(508,508))
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(6, 4) out of c(508, 508)
## X-squared = 0.10099, df = 1, p-value = 0.7506
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.01016958  0.01804360
## sample estimates:
##      prop 1      prop 2 
## 0.011811024 0.007874016
# v significant
prop.test(x=c(4,33),n=c(508,508))
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(4, 33) out of c(508, 508)
## X-squared = 21.99, df = 1, p-value = 2.741e-06
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.08182332 -0.03234991
## sample estimates:
##      prop 1      prop 2 
## 0.007874016 0.064960630
# v significant
prop.test(x=c(6,33),n=c(508,508))
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(6, 33) out of c(508, 508)
## X-squared = 18.025, df = 1, p-value = 2.18e-05
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.07851846 -0.02778075
## sample estimates:
##     prop 1     prop 2 
## 0.01181102 0.06496063
latencies <- final$Latency
latencies <- latencies[!is.na(latencies)]
plotdist(latencies)

lnorm <- fitdist(latencies, "lnorm")
denscomp(list(lnorm))

final <- final %>% mutate(LogLatency = log(Latency))
ggplot(final,aes(x=NumStimuli,y=LogLatency))+
  geom_boxplot()+
  theme_classic()
## Warning: Removed 43 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

latency <- lmer(LogLatency ~ NumStimuli+(1|FemaleID),final)
summary(latency)
## Linear mixed model fit by REML ['lmerMod']
## Formula: LogLatency ~ NumStimuli + (1 | FemaleID)
##    Data: final
## 
## REML criterion at convergence: 2330.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.6081 -0.6537 -0.1198  0.5984  3.5947 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  FemaleID (Intercept) 0.1139   0.3375  
##  Residual             0.2384   0.4883  
## Number of obs: 1481, groups:  FemaleID, 127
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  4.31888    0.03706 116.549
## NumStimuli4  0.19658    0.03081   6.381
## NumStimuli8  0.34966    0.03138  11.143
## 
## Correlation of Fixed Effects:
##             (Intr) NmStm4
## NumStimuli4 -0.417       
## NumStimuli8 -0.410  0.493

% Correct x Num of Choices

Overview Graph

final %>%
  count(NumStimuli, correct) %>%       
  group_by(NumStimuli) %>%
  mutate(pct= prop.table(n) * 100) %>%
  ggplot() + aes(NumStimuli, pct, fill=correct) +
  geom_bar(stat="identity") +
  ylab("% Correct Identifications") +
  geom_text(aes(label=paste0(sprintf("%1.1f", pct),"%")),
            position=position_stack(vjust=0.5)) +
  ggtitle("# of Choices + Correct or Not ") +
  theme_classic()

Analysis

# make a factor
final$correct <- factor(final$correct)

# get rid of NAs / make a binary response
final_na <- subset(final, (!is.na(final[,7])))

final_na <- final_na %>% mutate(Correct = case_when(correct == 'Correct' ~ 1,
                                                     correct == 'Incorrect' ~ 0))

final_na$Correct <- factor(final_na$Correct)

# plot 1: just straight up loess smoothing
ggplot(final_na, aes(x = W_Score, y = Correct, group = NumStimuli, color = NumStimuli))+
  geom_jitter(alpha = 0.3)+
  theme_classic()+
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

# test 1: linear model with binomial response variable
linear.model <- glmer(Correct~W_Score*NumStimuli+(1|FemaleID),
                 data = final_na,
                 family = "binomial"
                 )
Anova(linear.model)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: Correct
##                      Chisq Df Pr(>Chisq)    
## W_Score            222.736  1  < 2.2e-16 ***
## NumStimuli         195.552  2  < 2.2e-16 ***
## W_Score:NumStimuli  23.315  2  8.655e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(linear.model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Correct ~ W_Score * NumStimuli + (1 | FemaleID)
##    Data: final_na
## 
##      AIC      BIC   logLik deviance df.resid 
##   1078.7   1115.8   -532.3   1064.7     1474 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.6610 -0.3403 -0.0391  0.3754  4.9936 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  FemaleID (Intercept) 0.6306   0.7941  
## Number of obs: 1481, groups:  FemaleID, 127
## 
## Fixed effects:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           0.7048     0.1425   4.946 7.58e-07 ***
## W_Score               1.2824     0.1278  10.035  < 2e-16 ***
## NumStimuli4          -2.3384     0.2147 -10.892  < 2e-16 ***
## NumStimuli8          -5.5720     0.5109 -10.905  < 2e-16 ***
## W_Score:NumStimuli4   0.2722     0.1800   1.513     0.13    
## W_Score:NumStimuli8   1.5379     0.3193   4.816 1.46e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) W_Scor NmStm4 NmStm8 W_S:NS4
## W_Score      0.100                             
## NumStimuli4 -0.519 -0.150                      
## NumStimuli8 -0.233 -0.105  0.209               
## W_Scr:NmSt4 -0.048 -0.593 -0.341  0.006        
## W_Scr:NmSt8 -0.020 -0.314 -0.005 -0.811  0.244
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
emmeans(linear.model, pairwise ~ W_Score |NumStimuli)
## $emmeans
## NumStimuli = 2:
##  W_Score emmean    SE  df asymp.LCL asymp.UCL
##    0.388   1.20 0.156 Inf     0.898     1.508
## 
## NumStimuli = 4:
##  W_Score emmean    SE  df asymp.LCL asymp.UCL
##    0.388  -1.03 0.158 Inf    -1.339    -0.721
## 
## NumStimuli = 8:
##  W_Score emmean    SE  df asymp.LCL asymp.UCL
##    0.388  -3.77 0.393 Inf    -4.541    -3.003
## 
## Results are given on the logit (not the response) scale. 
## Confidence level used: 0.95 
## 
## $contrasts
## NumStimuli = 2:
##  contrast  estimate SE df z.ratio p.value
##  (nothing)   nonEst NA NA      NA      NA
## 
## NumStimuli = 4:
##  contrast  estimate SE df z.ratio p.value
##  (nothing)   nonEst NA NA      NA      NA
## 
## NumStimuli = 8:
##  contrast  estimate SE df z.ratio p.value
##  (nothing)   nonEst NA NA      NA      NA
## 
## Note: contrasts are still on the logit scale
emmeans(linear.model, pairwise ~ NumStimuli |W_Score)
## $emmeans
## W_Score = 0.388:
##  NumStimuli emmean    SE  df asymp.LCL asymp.UCL
##  2            1.20 0.156 Inf     0.898     1.508
##  4           -1.03 0.158 Inf    -1.339    -0.721
##  8           -3.77 0.393 Inf    -4.541    -3.003
## 
## Results are given on the logit (not the response) scale. 
## Confidence level used: 0.95 
## 
## $contrasts
## W_Score = 0.388:
##  contrast                  estimate    SE  df z.ratio p.value
##  NumStimuli2 - NumStimuli4     2.23 0.202 Inf  11.062  <.0001
##  NumStimuli2 - NumStimuli8     4.97 0.417 Inf  11.935  <.0001
##  NumStimuli4 - NumStimuli8     2.74 0.405 Inf   6.764  <.0001
## 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: tukey method for comparing a family of 3 estimates
# plot of linear model
# install.packages("sjPlot"), potentially introduce more points
library(sjPlot)
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
plot_model(linear.model,
  type = "pred",
  terms = c("W_Score", "NumStimuli")
) +
  labs(y = "Prob(correct)")+
  theme_classic()
## Data were 'prettified'. Consider using `terms="W_Score [all]"` to get
##   smooth plots.

# test 2: chi square for distribution - just shows that distributions are not equal, so not super helpful
chi_square <- data.frame(final$correct,final$NumStimuli)
chi_square <- table(final$correct,final$NumStimuli, useNA="ifany")
chisq.test(chi_square)
## 
##  Pearson's Chi-squared test
## 
## data:  chi_square
## X-squared = 277.05, df = 4, p-value < 2.2e-16

Histograms across choice tests

ggplot(subset(dat2_type2, !is.na(Score)), aes(x = Chosen, y = Score))+
  geom_boxplot()+
  theme_classic()+
  labs(title = "2 choice score")

ggplot(subset(dat2_type2, !is.na(RealizedCR)), aes(x = Chosen, y = RealizedCR))+
  geom_boxplot()+
  theme_classic()+
  labs(title = "2 choice CR")

ggplot(subset(dat2_type2, !is.na(RealizedCVw)), aes(x = Chosen, y = RealizedCVw))+
  geom_boxplot()+
  theme_classic()+
  labs(title = "2 choice CV")

ggplot(subset(dat4_type2, !is.na(Score)), aes(x = Choice, y = Score))+
  geom_boxplot()+
  theme_classic()+
  labs(title = "4 choice score")

ggplot(subset(dat4_type2, !is.na(Score)), aes(x = Chosen, y = Score))+
  geom_boxplot()+
  theme_classic()+
  labs(title = "4 choice score collapsed")

ggplot(subset(dat8_type2, !is.na(Score)), aes(x = Choice, y = Score))+
  geom_boxplot()+
  theme_classic()+
  labs(title = "8 choice score")

ggplot(subset(dat8_type2, !is.na(Score)), aes(x = Chosen, y = Score))+
  geom_boxplot()+
  theme_classic()+
  labs(title = "8 choice score collapsed")

Z-Scores

ggplot(realized, aes(x = CR_Z, y = CV_Z, color = Score))+
  geom_point()+
  theme_classic()+
  xlim(-3,4.5)+
  ylim(-5.5,2.5)

ggplot(realized, aes(x = CR_Z, y = CV_Z, color = Score))+
  geom_point()+
  theme_classic()+
  scale_color_gradient(low = "dodgerblue", high = "firebrick2")+
  xlim(-3,4.5)+
  ylim(-5.5,2.5)

2 Choice Test Analysis

dat2 <- dat2 %>% mutate(CRdif = W_CR - L_CR)
dat2 <- dat2 %>% mutate(CVdif = W_CVw - L_CVw)
dat2 <- dat2 %>% mutate(scoredif = W_Score - L_Score)

dat2_na <- subset(dat2, (!is.na(dat2[,7])))

ggplot(data = dat2_na, aes(x = CRdif, y = Latency))+
  geom_point()+
  theme_classic()+
  labs(title = "2 choice test, dif in CRs")

ggplot(data = dat2_na, aes(x = CRdif, y = Latency, color = correct))+
  geom_point()+
  theme_classic()+
  labs(title = "2 choice test, dif in CRs")

ggplot(data = dat2_na, aes(x = CRdif, y = Latency, color = correct))+
  stat_smooth(geom = "smooth", method = "loess", se = FALSE)+
  geom_point(alpha = 0.5)+
  theme_classic()+
  labs(title = "2 choice test, dif in CRs")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data = dat2_na, aes(x = CVdif, y = Latency))+
  geom_point()+
  theme_classic()+
  labs(title = "2 choice test, dif in CVs")

ggplot(data = dat2_na, aes(x = CVdif, y = Latency, color = correct))+
  geom_point(alpha = 0.3)+
  theme_classic()+
  labs(title = "2 choice test, dif in CVs")

ggplot(data = dat2_na, aes(x = CVdif, y = Latency, color = correct))+
  stat_smooth(geom = "smooth", method = "loess", se = FALSE)+
  geom_point(alpha = 0.3)+
  theme_classic()+
  labs(title = "2 choice test, dif in CVs")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data = dat2_na, aes(x = scoredif, y = Latency))+
  geom_point()+
  theme_classic()+
  labs(title = "2 choice test, dif in scores")

ggplot(data = dat2_na, aes(x = scoredif, y = Latency, color = correct))+
  geom_point()+
  theme_classic()+
  labs(title = "2 choice test, dif in scores")

linear.regression <- lm(Latency~CRdif*CVdif, dat2_na)
summary(linear.regression)
## 
## Call:
## lm(formula = Latency ~ CRdif * CVdif, data = dat2_na)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -86.33 -37.25 -17.45  14.89 501.47 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  91.6701     3.1335  29.254   <2e-16 ***
## CRdif        -0.7354     0.7497  -0.981   0.3271    
## CVdif         6.7813    17.2500   0.393   0.6944    
## CRdif:CVdif   6.2546     3.3232   1.882   0.0604 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 65.62 on 498 degrees of freedom
## Multiple R-squared:  0.01426,    Adjusted R-squared:  0.008324 
## F-statistic: 2.402 on 3 and 498 DF,  p-value: 0.06694
plot_ly(dat2_na, x = ~CRdif, y = ~CVdif, z = ~(Latency),
        size = 1,
        type = "scatter3d",
        mode = "markers")
ggplot(dat2_na, aes(x = CRdif, y = CVdif, color = log(Latency)))+
  geom_point()+
  theme_classic()+
  scale_color_gradient(low = "dodgerblue", high = "firebrick2")

4 Choice Test Analysis

dat4_type2_not_chosen <- dat4_type2 %>% filter(Chosen == "not_chosen")

dat4_type3 <- dat4_type2_not_chosen %>% group_by(Trial_ID) %>%
  summarize(maxscore = max(Score),
            secondscore = nth(Score, 2, order_by = Score),
            worstscore = nth(Score, 1, order_by = Score),,
            maxCR = max(RealizedCR),
            secondCR = nth(RealizedCR, 2, order_by = RealizedCR),
            worstCR = nth(RealizedCR, 1, order_by = RealizedCR),
            maxCV = max(RealizedCVw),
            secondCV = nth(RealizedCVw, 2, order_by = RealizedCVw),
            worstCV = nth(RealizedCVw, 1, order_by = RealizedCVw),
            )

dat4_type3 <- as.data.frame(dat4_type3)

dat4_type_1_3 <- left_join(dat4,dat4_type3,
                  by = join_by(Trial_ID == Trial_ID)
                  )

dat4_type_1_3 <- dat4_type_1_3 %>% mutate(CRdif12 = W_CR-secondCR) %>% mutate(CRdif14 = W_CR-worstCR)
dat4_type_1_3 <- dat4_type_1_3 %>% mutate(CVdif12 = W_CVw-secondCV) %>% mutate(CVdif14 = W_CVw-worstCV)
dat4_type_1_3 <- dat4_type_1_3 %>% mutate(scoredif12 = W_Score-secondscore) %>% mutate(scoredif14 = W_Score-worstscore)

dat4_type_1_3_na <- subset(dat4_type_1_3, (!is.na(dat4_type_1_3[,7])))

ggplot(data = dat4_type_1_3_na, aes(x = CRdif12, y = Latency, color = correct))+
  geom_point()+
  theme_classic()+
  labs(title = "4 choice test, dif in CRs between best and second best")

ggplot(data = dat4_type_1_3_na, aes(x = CRdif14, y = Latency, color = correct))+
  geom_point()+
  theme_classic()+
  labs(title = "4 choice test, dif in CRs between best and worst")

ggplot(data = dat4_type_1_3_na, aes(x = CVdif12, y = Latency, color = correct))+
  geom_point()+
  theme_classic()+
  labs(title = "4 choice test, dif in CVs between best and second best")

ggplot(data = dat4_type_1_3_na, aes(x = CVdif14, y = Latency, color = correct))+
  geom_point()+
  theme_classic()+
  labs(title = "4 choice test, dif in CVs between best and worst")

ggplot(data = dat4_type_1_3_na, aes(x = scoredif12, y = Latency, color = correct))+
  geom_point()+
  theme_classic()+
  labs(title = "4 choice test, dif in scores between best and second best")

ggplot(data = dat4_type_1_3_na, aes(x = scoredif14, y = Latency, color = correct))+
  geom_point()+
  theme_classic()+
  labs(title = "4 choice test, dif in scores between best and worst")

8 Choice Test Analysis

dat8_type2_not_chosen <- dat8_type2 %>% filter(Chosen == "not_chosen")

dat8_type3 <- dat8_type2_not_chosen %>% group_by(Trial_ID) %>%
  summarize(maxscore = max(Score),
            secondscore = nth(Score, 6, order_by = Score),
            thirdscore = nth(Score, 5, order_by = Score),
            fourthscore = nth(Score, 4, order_by = Score),
            fifthscore = nth(Score, 3, order_by = Score),
            sixthscore = nth(Score, 2, order_by = Score),
            worstscore = nth(Score, 1, order_by = Score),
            maxCR = max(RealizedCR),
            secondCR = nth(RealizedCR, 6, order_by = RealizedCR),
            thirdCR = nth(RealizedCR, 5, order_by = RealizedCR),
            fourthCR = nth(RealizedCR, 4, order_by = RealizedCR),
            fifthCR = nth(RealizedCR, 3, order_by = RealizedCR),
            sixthCR = nth(RealizedCR, 2, order_by = RealizedCR),
            worstCR = nth(RealizedCR, 1, order_by = RealizedCR),
            maxCV = max(RealizedCVw),
            secondCV = nth(RealizedCVw, 6, order_by = RealizedCVw),
            thirdCV = nth(RealizedCVw, 5, order_by = RealizedCVw),
            fourthCV = nth(RealizedCVw, 4, order_by = RealizedCVw),
            fifthCV = nth(RealizedCVw, 3, order_by = RealizedCVw),
            sixthCV = nth(RealizedCVw, 2, order_by = RealizedCVw),
            worstCV = nth(RealizedCVw, 1, order_by = RealizedCVw),
            )

dat8_type3 <- as.data.frame(dat8_type3)

dat8_type_1_3 <- left_join(dat8,dat8_type3,
                  by = join_by(Trial_ID == Trial_ID)
                  )

dat8_type_1_3 <- dat8_type_1_3 %>% mutate(CRdif12 = W_CR-secondCR) %>% mutate(CRdif14 = W_CR-worstCR)
dat8_type_1_3 <- dat8_type_1_3 %>% mutate(CVdif12 = W_CVw-secondCV) %>% mutate(CVdif14 = W_CVw-worstCV)
dat8_type_1_3 <- dat8_type_1_3 %>% mutate(scoredif12 = W_Score-secondscore) %>% mutate(scoredif14 = W_Score-worstscore)

dat8_type_1_3_na <- subset(dat8_type_1_3, (!is.na(dat8_type_1_3[,7])))
ggplot(data = dat8_type_1_3_na, aes(x = CRdif12, y = Latency, color = correct))+
  geom_point()+
  theme_classic()+
  labs(title = "8 choice test, dif in CRs between best and second best")

ggplot(data = dat8_type_1_3_na, aes(x = CRdif14, y = Latency, color = correct))+
  geom_point()+
  theme_classic()+
  labs(title = "8 choice test, dif in CRs between best and worst")

ggplot(data = dat8_type_1_3, aes(x = CVdif12, y = Latency, color = correct))+
  geom_point()+
  theme_classic()+
  labs(title = "8 choice test, dif in CVs between best and second best")
## Warning: Removed 33 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data = dat8_type_1_3_na, aes(x = CVdif14, y = Latency, color = correct))+
  geom_point()+
  theme_classic()+
  labs(title = "8 choice test, dif in CVs between best and worst")

ggplot(data = dat8_type_1_3_na, aes(x = scoredif12, y = Latency, color = correct))+
  geom_point()+
  theme_classic()+
  labs(title = "8 choice test, dif in scores between best and second best")

ggplot(data = dat8_type_1_3_na, aes(x = scoredif14, y = Latency, color = correct))+
  geom_point()+
  theme_classic()+
  labs(title = "8 choice test, dif in scores between best and worst")

Combining the dif choice tests

First, make dat2_na the same so that one column has everything no matter what I chose

dat2_na <- dat2_na %>% mutate(CRdif12 = CRdif) %>% mutate(CRdif14 = CRdif) %>% mutate(CVdif12 = CVdif) %>% mutate(CVdif14 = CVdif) %>% mutate(scoredif12 = scoredif) %>% mutate(scoredif14 = scoredif) %>% mutate(maxCR = W_CR) %>% mutate(maxCV = W_CVw) %>% mutate(best = W_Score)
full.cleaned.data <- bind_rows(dat8_type_1_3_na, dat4_type_1_3_na, dat2_na, .id = NULL)

# write.csv(full.cleaned.data,"09172024_NSF_Data_Scores_Best_Worst.csv",row.names=FALSE)

Choice Overload Questions

Score Dif by num stimuli

ggplot(data = full.cleaned.data, aes(x = scoredif12, y = Latency))+
  geom_point()+
  theme_classic()

ggplot(full.cleaned.data, aes(scoredif12, fill = NumStimuli))+
  geom_histogram(bins = 50, position="identity", alpha = 0.5,
                 mapping = aes(y = after_stat(density)))+
  theme_classic()

ggplot(data = full.cleaned.data, mapping = aes(x = scoredif12, y = Latency, color = NumStimuli))+
  geom_point(alpha = 0.3)+
  theme_classic()+
  stat_smooth(method = 'lm', formula = y ~ x + I(x^2), se = FALSE)

ggplot(data = full.cleaned.data, aes(x = CRdif12, y = Latency))+
  geom_point()+
  theme_classic()

CR dif by num stimuli

ggplot(full.cleaned.data, aes(CRdif12, fill = NumStimuli))+
  geom_histogram(bins = 50, position="identity", alpha = 0.5,
                 mapping = aes(y = after_stat(density)))+
  theme_classic()

ggplot(data = full.cleaned.data, mapping = aes(x = CRdif12, y = Latency, color = NumStimuli))+
  geom_point(alpha = 0.3)+
  theme_classic()+
  stat_smooth(method = 'lm', formula = y ~ x + I(x^2), se = FALSE)

ggplot(data = full.cleaned.data, aes(x = CVdif12, y = Latency))+
  geom_point()+
  theme_classic()

CV dif by num stimuli

ggplot(full.cleaned.data, aes(CVdif12, fill = NumStimuli))+
  geom_histogram(bins = 50, position="identity", alpha = 0.5,
                 mapping = aes(y = after_stat(density)))+
  theme_classic()

ggplot(data = full.cleaned.data, mapping = aes(x = CVdif12, y = Latency, color = NumStimuli))+
  geom_point(alpha = 0.3)+
  theme_classic()+
  stat_smooth(method = 'lm', formula = y ~ x + I(x^2), se = FALSE)

close v not

full.cleaned.data <- full.cleaned.data %>% 
  mutate(absCRdif = abs(CRdif12)) %>%
  mutate(absCVdif = abs(CVdif12)) %>%
  mutate(absScoredif = abs(scoredif12)) %>%
  mutate(CRclose = case_when(absCRdif <= 3.09349763 ~ 'Close',
                             absCRdif >= 3.09349763 ~ 'Far')) %>%
  mutate(CVclose = case_when(absCVdif <= 0.12313909 ~ 'Close',
                             absCVdif >= 0.12313909 ~ 'Far')) %>%
  mutate(Scoreclose = case_when(absScoredif <= 1 ~ 'Close',
                                absScoredif >= 1 ~ 'Far'))
ggplot(full.cleaned.data, aes(x = absCRdif, y = Latency))+
  geom_point()+
  theme_classic()

ggplot(full.cleaned.data, aes(x = absCVdif, y = Latency, color = CRclose))+
  geom_point()+
  theme_classic()

ggplot(full.cleaned.data, aes(x = absCVdif, y = Latency, color = CRclose))+
  geom_point(alpha = 0.3)+
  theme_classic()+
  stat_smooth(method = 'lm', formula = y ~ poly(x, 2), se = FALSE)

ggplot(full.cleaned.data, aes(x = absScoredif, y = Latency, color = CRclose))+
  geom_point()+
  theme_classic()

ggplot(full.cleaned.data, aes(x = absScoredif, y = Latency, color = CRclose))+
  geom_point(alpha = 0.3)+
  theme_classic()+
  stat_smooth(method = 'lm', formula = y ~ poly(x, 2), se = FALSE)

ggplot(full.cleaned.data, aes(x = absCVdif, y = Latency))+
  geom_point()+
  theme_classic()

ggplot(full.cleaned.data, aes(x = absCRdif, y = Latency, color = CVclose))+
  geom_point()+
  theme_classic()

ggplot(full.cleaned.data, aes(x = absCRdif, y = Latency, color = CVclose))+
  geom_point(alpha = 0.3)+
  theme_classic()+
  stat_smooth(method = 'lm', formula = y ~ poly(x, 2), se = FALSE)

ggplot(full.cleaned.data, aes(x = absScoredif, y = Latency, color = CVclose))+
  geom_point()+
  theme_classic()

ggplot(full.cleaned.data, aes(x = absScoredif, y = Latency, color = CVclose))+
  geom_point(alpha = 0.3)+
  theme_classic()+
  stat_smooth(method = 'lm', formula = y ~ poly(x, 2), se = FALSE)

ggplot(full.cleaned.data, aes(x = absScoredif, y = Latency))+
  geom_point()+
  theme_classic()

ggplot(full.cleaned.data, aes(best, fill = NumStimuli))+
  geom_histogram(bins = 50, position="identity", alpha = 0.5,
                 mapping = aes(y = stat(density)))+
  theme_classic()
## Warning: `stat(density)` was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(full.cleaned.data, aes(maxCR, fill = NumStimuli))+
  geom_histogram(bins = 50, position="identity", alpha = 0.5,
                 mapping = aes(y = stat(density)))+
  theme_classic()

ggplot(full.cleaned.data, aes(maxCV, fill = NumStimuli))+
  geom_histogram(bins = 50, position="identity", alpha = 0.5,
                 mapping = aes(y = stat(density)))+
  theme_classic()

ggplot(full.cleaned.data, aes(x = best, y = Latency, color = absScoredif))+
  geom_point()+
  theme_classic()

ggplot(full.cleaned.data, aes(x = best, y = absScoredif, color = correct))+
  geom_point()+
  theme_classic()+ 
  facet_grid(NumStimuli ~ .)

ggplot(full.cleaned.data, aes(x = best, y = absScoredif, color = correct))+
  geom_point(alpha = 0.3)+
  theme_classic()+ 
  stat_smooth(method = 'lm', se = FALSE)+
  facet_grid(NumStimuli ~ .)
## `geom_smooth()` using formula = 'y ~ x'

ggplot(full.cleaned.data, aes(x = best, y = absScoredif, color = correct))+
  geom_point()+
  theme_classic()

full.cleaned.data$SetOrder <- as.factor(full.cleaned.data$SetOrder)

Set Order

four_eight_data <- full.cleaned.data %>% filter(NumStimuli != 2)
four_data <- full.cleaned.data %>% filter(NumStimuli == 4)
eight_data <- full.cleaned.data %>% filter(NumStimuli == 8)

Four Choice

four_data <- four_data %>% mutate(type = case_when(Choice_ID == '4.1a' | Choice_ID == '4.1b' ~ 'Set_1',
                                                   Choice_ID == '4.2a' | Choice_ID == '4.2b' ~ 'Set_2',))

ggplot(four_data, aes(x = SetOrder, y = Latency))+
  geom_boxplot()+
  facet_grid(.~type)+
  theme_classic()

linear.four <- lm(data = four_data, Latency ~ SetOrder + type)
summary(linear.four)
## 
## Call:
## lm(formula = Latency ~ SetOrder + type, data = four_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -100.18  -45.25  -15.13   26.90  317.85 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  126.698     10.224  12.392  < 2e-16 ***
## SetOrder2     -8.553     14.043  -0.609  0.54280    
## SetOrder3    -45.867     14.582  -3.145  0.00176 ** 
## SetOrder4     -4.443     14.204  -0.313  0.75459    
## SetOrder5    -20.244     14.207  -1.425  0.15481    
## SetOrder6    -22.620     14.120  -1.602  0.10980    
## SetOrder7    -36.761     14.385  -2.556  0.01090 *  
## SetOrder8      5.517     14.303   0.386  0.69988    
## SetOrder9    -21.677     14.207  -1.526  0.12771    
## SetOrder10   -26.259     14.292  -1.837  0.06677 .  
## SetOrder11   -24.894     15.163  -1.642  0.10128    
## SetOrder12   -16.453     14.501  -1.135  0.25710    
## typeSet_2     -1.521      6.069  -0.251  0.80220    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 67.64 on 491 degrees of freedom
## Multiple R-squared:  0.04324,    Adjusted R-squared:  0.01986 
## F-statistic: 1.849 on 12 and 491 DF,  p-value: 0.03839

Eight Choice

eight_data$SetOrder <- as.numeric(eight_data$SetOrder)
eight_data <- eight_data %>% mutate(Half = case_when(SetOrder <= 6 ~ 'First',
                                                     SetOrder >= 7 ~ 'Second'))
eight_data$SetOrder <- as.factor(eight_data$SetOrder)

ggplot(eight_data, aes(x = SetOrder, y = Latency))+
  geom_boxplot()+
  theme_classic()

eight_data %>% group_by(Half) %>%
  summarize(mean.lat = mean(Latency),
            correct = sum(correct == 'Correct'))
## # A tibble: 2 × 3
##   Half   mean.lat correct
##   <chr>     <dbl>   <int>
## 1 First      145       56
## 2 Second     115.      42
linear.eight <- lm(data = eight_data, Latency ~ SetOrder)
Anova(linear.eight)
## Anova Table (Type II tests)
## 
## Response: Latency
##            Sum Sq  Df F value    Pr(>F)    
## SetOrder   357584  11  3.6235 6.185e-05 ***
## Residuals 4153726 463                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(linear.eight)
## 
## Call:
## lm(formula = Latency ~ SetOrder, data = eight_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -169.75  -57.03  -26.35   29.52  457.65 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   204.75      15.79  12.970  < 2e-16 ***
## SetOrder2     -62.47      22.33  -2.798 0.005352 ** 
## SetOrder3     -54.45      21.40  -2.545 0.011263 *  
## SetOrder4     -76.40      20.79  -3.675 0.000266 ***
## SetOrder5     -59.22      22.65  -2.614 0.009228 ** 
## SetOrder6     -94.24      21.40  -4.404 1.32e-05 ***
## SetOrder7     -68.12      23.41  -2.909 0.003799 ** 
## SetOrder8     -80.30      21.29  -3.772 0.000183 ***
## SetOrder9    -100.25      21.51  -4.660 4.14e-06 ***
## SetOrder10   -117.64      22.03  -5.340 1.46e-07 ***
## SetOrder11    -86.72      21.89  -3.962 8.62e-05 ***
## SetOrder12    -83.48      21.63  -3.859 0.000130 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 94.72 on 463 degrees of freedom
## Multiple R-squared:  0.07926,    Adjusted R-squared:  0.05739 
## F-statistic: 3.623 on 11 and 463 DF,  p-value: 6.185e-05
emmeans(linear.eight, pairwise ~ SetOrder)
## $emmeans
##  SetOrder emmean   SE  df lower.CL upper.CL
##  1         204.8 15.8 463    173.7      236
##  2         142.3 15.8 463    111.3      173
##  3         150.3 14.4 463    121.9      179
##  4         128.3 13.5 463    101.8      155
##  5         145.5 16.2 463    113.6      177
##  6         110.5 14.4 463     82.1      139
##  7         136.6 17.3 463    102.7      171
##  8         124.5 14.3 463     96.4      153
##  9         104.5 14.6 463     75.8      133
##  10         87.1 15.4 463     56.9      117
##  11        118.0 15.2 463     88.2      148
##  12        121.3 14.8 463     92.2      150
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                estimate   SE  df t.ratio p.value
##  SetOrder1 - SetOrder2      62.47 22.3 463   2.798  0.1841
##  SetOrder1 - SetOrder3      54.45 21.4 463   2.545  0.3150
##  SetOrder1 - SetOrder4      76.40 20.8 463   3.675  0.0140
##  SetOrder1 - SetOrder5      59.22 22.7 463   2.614  0.2746
##  SetOrder1 - SetOrder6      94.24 21.4 463   4.404  0.0008
##  SetOrder1 - SetOrder7      68.12 23.4 463   2.909  0.1411
##  SetOrder1 - SetOrder8      80.30 21.3 463   3.772  0.0099
##  SetOrder1 - SetOrder9     100.25 21.5 463   4.660  0.0003
##  SetOrder1 - SetOrder10    117.64 22.0 463   5.340  <.0001
##  SetOrder1 - SetOrder11     86.72 21.9 463   3.962  0.0049
##  SetOrder1 - SetOrder12     83.48 21.6 463   3.859  0.0072
##  SetOrder2 - SetOrder3      -8.02 21.4 463  -0.375  1.0000
##  SetOrder2 - SetOrder4      13.93 20.8 463   0.670  0.9999
##  SetOrder2 - SetOrder5      -3.25 22.7 463  -0.144  1.0000
##  SetOrder2 - SetOrder6      31.77 21.4 463   1.485  0.9443
##  SetOrder2 - SetOrder7       5.64 23.4 463   0.241  1.0000
##  SetOrder2 - SetOrder8      17.82 21.3 463   0.837  0.9995
##  SetOrder2 - SetOrder9      37.78 21.5 463   1.756  0.8407
##  SetOrder2 - SetOrder10     55.17 22.0 463   2.505  0.3397
##  SetOrder2 - SetOrder11     24.25 21.9 463   1.108  0.9943
##  SetOrder2 - SetOrder12     21.01 21.6 463   0.971  0.9982
##  SetOrder3 - SetOrder4      21.96 19.8 463   1.109  0.9942
##  SetOrder3 - SetOrder5       4.77 21.7 463   0.220  1.0000
##  SetOrder3 - SetOrder6      39.79 20.4 463   1.948  0.7282
##  SetOrder3 - SetOrder7      13.67 22.5 463   0.607  1.0000
##  SetOrder3 - SetOrder8      25.85 20.3 463   1.273  0.9821
##  SetOrder3 - SetOrder9      45.80 20.5 463   2.229  0.5294
##  SetOrder3 - SetOrder10     63.20 21.1 463   2.997  0.1128
##  SetOrder3 - SetOrder11     32.28 20.9 463   1.541  0.9283
##  SetOrder3 - SetOrder12     29.03 20.7 463   1.404  0.9624
##  SetOrder4 - SetOrder5     -17.18 21.1 463  -0.813  0.9997
##  SetOrder4 - SetOrder6      17.84 19.8 463   0.901  0.9991
##  SetOrder4 - SetOrder7      -8.29 22.0 463  -0.377  1.0000
##  SetOrder4 - SetOrder8       3.89 19.7 463   0.198  1.0000
##  SetOrder4 - SetOrder9      23.85 19.9 463   1.197  0.9890
##  SetOrder4 - SetOrder10     41.24 20.5 463   2.014  0.6835
##  SetOrder4 - SetOrder11     10.32 20.3 463   0.508  1.0000
##  SetOrder4 - SetOrder12      7.08 20.0 463   0.353  1.0000
##  SetOrder5 - SetOrder6      35.02 21.7 463   1.611  0.9046
##  SetOrder5 - SetOrder7       8.90 23.7 463   0.375  1.0000
##  SetOrder5 - SetOrder8      21.07 21.6 463   0.974  0.9981
##  SetOrder5 - SetOrder9      41.03 21.9 463   1.878  0.7726
##  SetOrder5 - SetOrder10     58.42 22.4 463   2.613  0.2754
##  SetOrder5 - SetOrder11     27.50 22.2 463   1.238  0.9857
##  SetOrder5 - SetOrder12     24.26 22.0 463   1.104  0.9944
##  SetOrder6 - SetOrder7     -26.12 22.5 463  -1.159  0.9916
##  SetOrder6 - SetOrder8     -13.94 20.3 463  -0.686  0.9999
##  SetOrder6 - SetOrder9       6.01 20.5 463   0.293  1.0000
##  SetOrder6 - SetOrder10     23.41 21.1 463   1.110  0.9942
##  SetOrder6 - SetOrder11     -7.51 20.9 463  -0.359  1.0000
##  SetOrder6 - SetOrder12    -10.76 20.7 463  -0.520  1.0000
##  SetOrder7 - SetOrder8      12.18 22.4 463   0.543  1.0000
##  SetOrder7 - SetOrder9      32.13 22.6 463   1.419  0.9594
##  SetOrder7 - SetOrder10     49.53 23.1 463   2.141  0.5934
##  SetOrder7 - SetOrder11     18.61 23.0 463   0.809  0.9997
##  SetOrder7 - SetOrder12     15.37 22.8 463   0.675  0.9999
##  SetOrder8 - SetOrder9      19.95 20.4 463   0.977  0.9981
##  SetOrder8 - SetOrder10     37.35 21.0 463   1.781  0.8279
##  SetOrder8 - SetOrder11      6.43 20.8 463   0.309  1.0000
##  SetOrder8 - SetOrder12      3.19 20.6 463   0.155  1.0000
##  SetOrder9 - SetOrder10     17.39 21.2 463   0.820  0.9996
##  SetOrder9 - SetOrder11    -13.53 21.1 463  -0.642  1.0000
##  SetOrder9 - SetOrder12    -16.77 20.8 463  -0.806  0.9997
##  SetOrder10 - SetOrder11   -30.92 21.6 463  -1.432  0.9567
##  SetOrder10 - SetOrder12   -34.16 21.3 463  -1.602  0.9080
##  SetOrder11 - SetOrder12    -3.24 21.2 463  -0.153  1.0000
## 
## P value adjustment: tukey method for comparing a family of 12 estimates
eight_data %>%
  count(SetOrder, correct) %>%       
  group_by(SetOrder) %>%
  mutate(pct= prop.table(n) * 100) %>%
  ggplot() + aes(SetOrder, pct, fill=correct) +
  geom_bar(stat="identity") +
  ylab("% Correct Identifications") +
  geom_text(aes(label=paste0(sprintf("%1.1f", pct),"%")),
            position=position_stack(vjust=0.5)) +
  theme_classic()

Categorization / Correct

full.cleaned.data <- full.cleaned.data %>% mutate(Correct = case_when(correct == 'Correct' ~ 1,
                                                     correct == 'Incorrect' ~ 0))

full.cleaned.data$Correct <- factor(full.cleaned.data$Correct)

correct.model <- glmer(Correct~best*absScoredif+(1|FemaleID),
                 data = full.cleaned.data,
                 family = "binomial"
                 )
Anova(correct.model)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: Correct
##                   Chisq Df Pr(>Chisq)    
## best              4.152  1    0.04159 *  
## absScoredif      49.227  1  2.280e-12 ***
## best:absScoredif 32.517  1  1.181e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(correct.model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Correct ~ best * absScoredif + (1 | FemaleID)
##    Data: full.cleaned.data
## 
##      AIC      BIC   logLik deviance df.resid 
##   1899.3   1925.8   -944.7   1889.3     1476 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.3716 -0.7977 -0.5803  1.0780  2.1279 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  FemaleID (Intercept) 0.06942  0.2635  
## Number of obs: 1481, groups:  FemaleID, 127
## 
## Fixed effects:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -0.47033    0.12337  -3.812 0.000138 ***
## best             -0.41140    0.07055  -5.831 5.51e-09 ***
## absScoredif       0.14799    0.06684   2.214 0.026826 *  
## best:absScoredif  0.17866    0.03133   5.702 1.18e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) best   absScr
## best        -0.581              
## absScoredif -0.775  0.398       
## bst:bsScrdf  0.516 -0.816 -0.598
full.cleaned.data <- full.cleaned.data %>% 
  mutate(absScoredifClass = case_when(absScoredif <=1 ~ '<1',
                                      absScoredif >=1 & absScoredif <=2 ~ '1-2',
                                      absScoredif >=2 ~ '2+'))

full.cleaned.data <- full.cleaned.data %>% mutate(bestClass = case_when(best <= -2 ~ '<-2',
                               best >= -2 & best <= -1 ~ '-2 to -1',
                               best >= -1 & best <= 0~ '-1 to 0',
                               best >= 0 & best <= 1~ '0 to 1',
                               best >= 1 & best <= 2~ '1 to 2',
                               best >= 2 ~ '>2',))

full.cleaned.data$bestClass <- factor(full.cleaned.data$bestClass, 
                                      levels = c("<-2", "-2 to -1", "-1 to 0", "0 to 1", "1 to 2", ">2"))

less.than.one <- full.cleaned.data %>% filter(absScoredifClass == '<1')
one.to.two <- full.cleaned.data %>% filter(absScoredifClass == '1-2')
more.than.2 <- full.cleaned.data %>% filter(absScoredifClass == '2+')

ggplot(full.cleaned.data, aes(x = best, y = Correct))+
  geom_jitter(alpha = 0.3)+
  geom_boxplot(outliers = FALSE)+
  facet_grid(absScoredifClass~.)+
  theme_classic()

full.cleaned.data %>%
  count(bestClass, correct) %>%       
  group_by(bestClass) %>%
  mutate(pct= prop.table(n) * 100) %>%
  ggplot() + aes(bestClass, pct, fill=correct) +
  geom_bar(stat="identity") +
  ylab("% Correct Identifications") +
  geom_text(aes(label=paste0(sprintf("%1.1f", pct),"%")),
            position=position_stack(vjust=0.5)) +
  theme_classic()

less.than.one %>%
  count(bestClass, correct) %>%       
  group_by(bestClass) %>%
  mutate(pct= prop.table(n) * 100) %>%
  ggplot() + aes(bestClass, pct, fill=correct) +
  geom_bar(stat="identity") +
  ylab("% Correct Identifications") +
  geom_text(aes(label=paste0(sprintf("%1.1f", pct),"%")),
            position=position_stack(vjust=0.5)) +
  theme_classic()+
  labs(title = "When AbsScoreDif < 1")

one.to.two %>%
  count(bestClass, correct) %>%       
  group_by(bestClass) %>%
  mutate(pct= prop.table(n) * 100) %>%
  ggplot() + aes(bestClass, pct, fill=correct) +
  geom_bar(stat="identity") +
  ylab("% Correct Identifications") +
  geom_text(aes(label=paste0(sprintf("%1.1f", pct),"%")),
            position=position_stack(vjust=0.5)) +
  theme_classic()+
  labs(title = "When AbsScoreDif between 1 and 2")

more.than.2 %>%
  count(bestClass, correct) %>%       
  group_by(bestClass) %>%
  mutate(pct= prop.table(n) * 100) %>%
  ggplot() + aes(bestClass, pct, fill=correct) +
  geom_bar(stat="identity") +
  ylab("% Correct Identifications") +
  geom_text(aes(label=paste0(sprintf("%1.1f", pct),"%")),
            position=position_stack(vjust=0.5)) +
  theme_classic()+
  labs(title = "When AbsScoreDif is >2")

full.cleaned.data %>%
  count(absScoredifClass, correct) %>%       
  group_by(absScoredifClass) %>%
  mutate(pct= prop.table(n) * 100) %>%
  ggplot() + aes(absScoredifClass, pct, fill=correct) +
  geom_bar(stat="identity") +
  ylab("% Correct Identifications") +
  geom_text(aes(label=paste0(sprintf("%1.1f", pct),"%")),
            position=position_stack(vjust=0.5)) +
  theme_classic()

Doing Histograms

ggplot(final_type2, aes(RealizedCR, fill = Chosen))+
  geom_histogram(bins = 50, position="identity", alpha = 0.5,
                 mapping = aes(y = stat(density)))+
  theme_classic()+
  labs(title = "Realized CR Histogram, scaled for different sample sizes")
## Warning: Removed 292 rows containing non-finite outside the scale range
## (`stat_bin()`).

ggplot(final_type2, aes(RealizedCVw, fill = Chosen))+
  geom_histogram(bins = 50, position="identity", alpha = 0.5,
                 mapping = aes(y = stat(density)))+
  theme_classic()+
  labs(title = "Realized CV Histogram, scaled for different sample sizes")
## Warning: Removed 292 rows containing non-finite outside the scale range
## (`stat_bin()`).

ggplot(final_type2, aes(Score, fill = Chosen))+
  geom_histogram(bins = 50, position="identity", alpha = 0.5,
                 mapping = aes(y = stat(density)))+
  theme_classic()+
  labs(title = "Realized Score Histogram, scaled for different sample sizes")
## Warning: Removed 292 rows containing non-finite outside the scale range
## (`stat_bin()`).

Females who didn’t choose

NEED TO WRITE OUT THE NA’S AND INPUT IDENTITIES

Investigate factors influencing the lack of decision

no.choice.data <- final[is.na(final$Latency), ]

# write.csv(no.choice.data,"10102024_no_choice_females.csv",row.names=FALSE)

no.choice <- read_csv("~/Desktop/UTK/Tanner Lab/Fall 2024 - RA/R/10102024_no_choice_females_edited.csv")
## Rows: 43 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): FemaleID, Choice_ID, W_Identity, L_Identity, L2_Identity, L3_Ident...
## dbl (12): StimulusSet, SetOrder, Trial_ID, NumStimuli, W_Speaker, L_Speaker,...
## lgl  (1): Latency
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
no.choice.identities <- read_xlsx("~/Desktop/UTK/Tanner Lab/Fall 2024 - RA/R/101024_no_choice_speaker_identities.xlsx")

nochoice1 <- left_join(no.choice,no.choice.identities[ , c("Identity", "CR", "CVw", "CR_Z", "CV_Z", "Score")],
                  by = join_by(W_Identity == Identity)
                  )
nochoice1 <- nochoice1 %>% rename(W_CR = CR,
                             W_CVw = CVw,
                             W_CR_Z = CR_Z,
                             W_CV_Z = CV_Z,
                             W_Score = Score)

nochoice2 <- left_join(nochoice1, no.choice.identities[ , c("Identity", "CR", "CVw", "CR_Z", "CV_Z", "Score")],
                  by = join_by(L_Identity == Identity)
                  )

nochoice2 <- nochoice2 %>% rename(L_CR = CR,
                             L_CVw = CVw,
                             L_CR_Z = CR_Z,
                             L_CV_Z = CV_Z,
                             L_Score = Score)


nochoice3 <- left_join(nochoice2, no.choice.identities[ , c("Identity", "CR", "CVw", "CR_Z", "CV_Z", "Score")],
                  by = join_by(L2_Identity == Identity)
                  )

nochoice3 <- nochoice3 %>% rename(L2_CR = CR,
                             L2_CVw = CVw,
                             L2_CR_Z = CR_Z,
                             L2_CV_Z = CV_Z,
                             L2_Score = Score)

nochoice4 <- left_join(nochoice3, no.choice.identities[ , c("Identity", "CR", "CVw", "CR_Z", "CV_Z", "Score")],
                  by = join_by(L3_Identity == Identity)
                  )

nochoice4 <- nochoice4 %>% rename(L3_CR = CR,
                             L3_CVw = CVw,
                             L3_CR_Z = CR_Z,
                             L3_CV_Z = CV_Z,
                             L3_Score = Score)

nochoice5 <- left_join(nochoice4, no.choice.identities[ , c("Identity", "CR", "CVw", "CR_Z", "CV_Z", "Score")],
                  by = join_by(L4_Identity == Identity)
                  )

nochoice5 <- nochoice5 %>% rename(L4_CR = CR,
                             L4_CVw = CVw,
                             L4_CR_Z = CR_Z,
                             L4_CV_Z = CV_Z,
                             L4_Score = Score)

nochoice6 <- left_join(nochoice5, no.choice.identities[ , c("Identity", "CR", "CVw", "CR_Z", "CV_Z", "Score")],
                  by = join_by(L5_Identity == Identity)
                  )

nochoice6 <- nochoice6 %>% rename(L5_CR = CR,
                             L5_CVw = CVw,
                             L5_CR_Z = CR_Z,
                             L5_CV_Z = CV_Z,
                             L5_Score = Score)

nochoice7 <- left_join(nochoice6, no.choice.identities[ , c("Identity", "CR", "CVw", "CR_Z", "CV_Z", "Score")],
                  by = join_by(L6_Identity == Identity)
                  )

nochoice7 <- nochoice7 %>% rename(L6_CR = CR,
                             L6_CVw = CVw,
                             L6_CR_Z = CR_Z,
                             L6_CV_Z = CV_Z,
                             L6_Score = Score)

nochoicefinal <- left_join(nochoice7, no.choice.identities[ , c("Identity", "CR", "CVw", "CR_Z", "CV_Z", "Score")],
                  by = join_by(L7_Identity == Identity)
                  )

nochoicefinal <- nochoicefinal %>% rename(L7_CR = CR,
                             L7_CVw = CVw,
                             L7_CR_Z = CR_Z,
                             L7_CV_Z = CV_Z,
                             L7_Score = Score)
nochoiceidentities.test <- no.choice.identities %>% group_by(Trial_ID) %>%
  summarize(maxscore = max(Score),
            secondscore = nth(Score, 6, order_by = Score),
            thirdscore = nth(Score, 5, order_by = Score),
            fourthscore = nth(Score, 4, order_by = Score),
            fifthscore = nth(Score, 3, order_by = Score),
            sixthscore = nth(Score, 2, order_by = Score),
            worstscore = nth(Score, 1, order_by = Score),
            maxCR = max(CR),
            secondCR = nth(CR, 6, order_by = CR),
            thirdCR = nth(CR, 5, order_by = CR),
            fourthCR = nth(CR, 4, order_by = CR),
            fifthCR = nth(CR, 3, order_by = CR),
            sixthCR = nth(CR, 2, order_by = CR),
            worstCR = nth(CR, 1, order_by = CR),
            maxCV = max(CVw),
            secondCV = nth(CVw, 6, order_by = CVw),
            thirdCV = nth(CVw, 5, order_by = CVw),
            fourthCV = nth(CVw, 4, order_by = CVw),
            fifthCV = nth(CVw, 3, order_by = CVw),
            sixthCV = nth(CVw, 2, order_by = CVw),
            worstCV = nth(CVw, 1, order_by = CVw),
            )

nochoiceidentities.test <- as.data.frame(nochoiceidentities.test)

nochoicefinalfinal <- left_join(nochoicefinal,nochoiceidentities.test,
                  by = join_by(Trial_ID == Trial_ID)
                  )

nochoicefinalfinal <- nochoicefinalfinal %>% mutate(CRdif12 = W_CR-secondCR)
nochoicefinalfinal <- nochoicefinalfinal %>% mutate(CVdif12 = W_CVw-secondCV) 
nochoicefinalfinal <- nochoicefinalfinal %>% mutate(scoredif12 = W_Score-secondscore)
ggplot(nochoicefinalfinal, aes(x = CRdif12, y = CVdif12))+
  geom_point(data = full.cleaned.data)+
  geom_point(color = "red", size = 2)+
  geom_point(color = "dodgerblue", size = 1)+
  theme_classic()