knitr::opts_chunk$set(message = FALSE, warning = FALSE, results='asis')
library(readxl)
library(tidyverse)

# Set personal working directory
setwd("/Users/jessicahenderson/Desktop/psychology/PSYC3361/face_lab/group_data")

# Read excel file into R
  normative <- read_excel("normative.xlsx")

This is my code for figure 2

More comments below the graph!

I have made notes where adjustments have been made and more comments below the histogram.

#creating assigning the normative data set to the name fig2_data
  fig2_data <- normative %>% 
  select(`Overall (%)`) %>% # selecting overall normative accuracy column
  
# plott starts here
  ggplot(data = ., mapping = aes(x = `Overall (%)`)) +
  geom_histogram(
    mapping = aes(y = after_stat(count/sum(count))),   #changed from after_stat(density) to after_stat(count/sum(count))
    binwidth = 2,
    fill = "red", 
    color = "white", 
    closed = "left"   # we have to add in closed left
    ) + 

# coord_cartesian function specifically adjusts the visible portion of the plot's x and y axes while maintaining the original data range.
  coord_cartesian(xlim = c(40, 100)) +  
  scale_y_continuous(limits = c(0, 0.20)) +
  labs(
    x = "UNSW Face Test Score (percent correct)",
    y = "Proportion of respondents") +
  
# normative distribution line
  stat_function(
    fun = dnorm, 
    args = list(
      mean = mean(normative$`Overall (%)`), 
      sd = sd(normative$`Overall (%)`)), 
    color= "black", lwd = 1) +

#reference line for Chance accuracy 
  geom_vline(
    xintercept = 50, color = "black", linetype = "dotted") +
  
  geom_text(
    aes(x = 50, y = 0, label = "CHANCE"), 
    color = "black", size = 3, angle = 90, vjust = -0.5, hjust = -6) + 

#threshold line for Mean + 2 SD
  geom_vline(
    xintercept = 
     mean(normative$`Overall (%)`) + 
     2 * sd(normative$`Overall (%)`), 
    color = "black", linetype = "dotted") +
  
  geom_text(aes(
    x = mean(normative$`Overall (%)`) + 
    2 * sd(normative$`Overall (%)`), y = 0, label = "MEAN +2SD"), 
    color = "black", size = 3, angle = 90, vjust = -0.5, hjust = -4.2) +  
  
#adding N=290 label for participant count 
  geom_text(
    x = 90, y = 0.10, label = paste("N =", nrow(normative)), 
    color = "red", size = 6) +
  theme_minimal() 

print(fig2_data)

Why is the plot like that?

As you can see the graph height is now where it needs to be - (happy)! However the normative distribution is not - (sad) !

To create this Figure we have to first create a frequency table to plot the histogram. Even though we fixed the bin height we can’t fix the normative distribution without a frequency table.

James said he actually didn’t plot the raw data - he plotted a frequency table he made from the raw data. The frequency table shows where the bins are and so even though we have specified binwidth = 2 it looks like the binwidth = 2.5. James confirmed that he used binwidth = 2. so it should be 45-75 but if you look at the frequency table I have made below from the data we are using it shows 45 to 47.5 James showed me his freqency table there should be 5 participants between 45-75 giving a frequency of 5/290 = 0.0172

library(gt)

# Create frequency table
freq_table <- table(normative$`Overall (%)`)

# Convert frequency table to data frame
freq_df <- data.frame("Overall (%)" = as.numeric(names(freq_table)), "Frequency" = as.numeric(freq_table))

# Create gt table object
gt_table <- gt(freq_df, caption = "Frequency Table")

# Display the gt table
gt_table
Frequency Table
Overall.... Frequency
45.00000 1
45.83333 2
46.66667 2
47.50000 1
48.33333 6
49.16667 8
50.00000 3
50.83333 5
51.66667 12
52.50000 8
53.33333 8
54.16667 15
55.00000 17
55.83333 13
56.66667 10
57.50000 9
58.33333 16
59.16667 15
60.00000 23
60.83333 17
61.66667 13
62.50000 18
63.33333 11
64.16667 14
65.00000 7
65.83333 4
66.66667 5
67.50000 6
68.33333 7
69.16667 6
70.00000 1
70.83333 2
71.66667 2
72.50000 2
75.00000 1