library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Set personal working directory
setwd("~/Dropbox/psychology/PSYC3361/data")

# Read excel file into R
normative <- read_excel("normative.xlsx")
# creating assigning the normative data set to the name fig2_data
  fig2_data <- normative %>% 
  select(`Overall (%)`) %>% # selecting overall normative accuracy column
  
# plott starts here
  ggplot(data = ., mapping = aes(x = `Overall (%)`)) +
  geom_histogram(
    mapping = aes(y = after_stat(density)),
    binwidth = 2,
    fill = "red", 
    color = "white") + 
  
  coord_cartesian(xlim = c(40, 100)) +  
  scale_y_continuous(limits = c(0, 0.15)) +
  labs(
    x = "UNSW Face Test Score (percent correct)",
    y = "Proportion of respondents") +
  
# normative distribution line
  stat_function(fun = dnorm, 
      args = list(
        mean = mean(normative$`Overall (%)`), 
        sd = sd(normative$`Overall (%)`)), 
        color= "black", lwd = 1) +
  
#reference line for Chance accuracy 
  geom_vline(
    xintercept = 50, color = "black", linetype = "dotted") +
  
  geom_text(
    aes(x = 50, y = 0, label = "CHANCE"), 
    color = "black", size = 3, angle = 90, vjust = -0.5, hjust = -6) + 

#threshold line for Mean + 2 SD
  geom_vline(
    xintercept = 
     mean(normative$`Overall (%)`) + 
     2 * sd(normative$`Overall (%)`), 
    color = "black", linetype = "dotted") +
  
  geom_text(aes(
    x = mean(normative$`Overall (%)`) + 
    2 * sd(normative$`Overall (%)`), y = 0, label = "MEAN +2SD"), 
    color = "black", size = 3, angle = 90, vjust = -0.5, hjust = -4.2) +  
  
#adding N=290 label for participant count 
  geom_text(
    x = 90, y = 0.10, label = paste("N =", nrow(normative)), 
    color = "red", size = 6) +
  
  theme_minimal() 


print(fig2_data)