knitr::opts_chunk$set(message = FALSE, warning = FALSE, results='asis')

library(readxl)
library(tidyverse)

# Set personal working directory
setwd("/Users/jessicahenderson/Desktop/psychology/PSYC3361/face_lab/group_data")

# Read excel file into R
  normative <- read_excel("normative.xlsx")

This is my code for figure 2

More comments below the graph!

I have made notes where adjustments have been made and more comments below the histogram.

#creating assigning the normative data set to the name fig2_data
  fig2_data <- normative %>% 
  select(`Overall (%)`) %>% # selecting overall normative accuracy column
  
# plott starts here
  ggplot(data = ., mapping = aes(x = `Overall (%)`)) +
  geom_histogram(
    mapping = aes(y = after_stat(count/sum(count))),   #changed from after_stat(density) to after_stat(count/sum(count))
    binwidth = 2,
    fill = "red", 
    color = "white", 
    closed = "left"   # we have to add in closed left
    ) + 

# coord_cartesian function specifically adjusts the visible portion of the plot's x and y axes while maintaining the original data range.
  coord_cartesian(xlim = c(40, 100)) +  
  scale_y_continuous(limits = c(0, 0.20)) +
  labs(
    x = "UNSW Face Test Score (percent correct)",
    y = "Proportion of respondents") +
  
# normative distribution line
  stat_function(
    fun = dnorm, 
    args = list(
      mean = mean(normative$`Overall (%)`), 
      sd = sd(normative$`Overall (%)`)), 
    color= "black", lwd = 1) +

#reference line for Chance accuracy 
  geom_vline(
    xintercept = 50, color = "black", linetype = "dotted") +
  
  geom_text(
    aes(x = 50, y = 0, label = "CHANCE"), 
    color = "black", size = 3, angle = 90, vjust = -0.5, hjust = -6) + 

#threshold line for Mean + 2 SD
  geom_vline(
    xintercept = 
     mean(normative$`Overall (%)`) + 
     2 * sd(normative$`Overall (%)`), 
    color = "black", linetype = "dotted") +
  
  geom_text(aes(
    x = mean(normative$`Overall (%)`) + 
    2 * sd(normative$`Overall (%)`), y = 0, label = "MEAN +2SD"), 
    color = "black", size = 3, angle = 90, vjust = -0.5, hjust = -4.2) +  
  
#adding N=290 label for participant count 
  geom_text(
    x = 90, y = 0.10, label = paste("N =", nrow(normative)), 
    color = "red", size = 6) +
  theme_minimal() 

print(fig2_data)

Why is the plot like that?

As you can see the graph height is now where it needs to be - (happy)! However the normative distribution is not - (sad) !

To create this Figure we have to first create a frequency table to plot the histogram. Even though we fixed the bin height we can’t fix the normative distribution without a frequency table.

James said he actually didn’t plot the raw data - he plotted a frequency table he made from the raw data. The frequency table shows where the bins are and so even though we have specified binwidth = 2 it looks like the binwidth = 2.5. James confirmed that he used binwidth = 2. so it should be 45-75 but if you look at the frequency table I have made below from the data we are using it shows 45 to 47.5 James showed me his freqency table there should be 5 participants between 45-75 giving a frequency of 5/290 = 0.0172

library(gt)

# Create frequency table
freq_table <- table(normative$`Overall (%)`)

# Convert frequency table to data frame
freq_df <- data.frame("Overall (%)" = as.numeric(names(freq_table)), "Frequency" = as.numeric(freq_table))

# Create gt table object
gt_table <- gt(freq_df, caption = "Frequency Table")

# Display the gt table
gt_table

Frequency Table
Overall....	Frequency
45.00000	1
45.83333	2
46.66667	2
47.50000	1
48.33333	6
49.16667	8
50.00000	3
50.83333	5
51.66667	12
52.50000	8
53.33333	8
54.16667	15
55.00000	17
55.83333	13
56.66667	10
57.50000	9
58.33333	16
59.16667	15
60.00000	23
60.83333	17
61.66667	13
62.50000	18
63.33333	11
64.16667	14
65.00000	7
65.83333	4
66.66667	5
67.50000	6
68.33333	7
69.16667	6
70.00000	1
70.83333	2
71.66667	2
72.50000	2
75.00000	1

Ta Da : A graph for all

When I spoke to James he suggested multiplying the density scaling factor by 2 . But that seemed to easy so I thought it was cheating. However chatgpt says this:

No, multiplying the probability density function (PDF) by a scaling factor of 2 is not considered cheating. It is a common practice to adjust the scale of the PDF to ensure that the area under the curve integrates to 1, which is a property of a valid probability density function.

The area under the curve represents the probability, and it should sum up to 1 for a valid probability distribution. By scaling the PDF, you are adjusting its height to ensure that the total area under the curve is equal to 1. This scaling factor does not affect the shape or characteristics of the distribution, but only ensures that it meets the criteria of a proper probability density function.

Therefore, multiplying the PDF by 2 in this context is a valid technique to normalize the density and ensure the correct interpretation of probabilities.

library(ggplot2)

# Rename "Overall (%)" to "Overall"
colnames(normative)[colnames(normative) == "Overall (%)"] <- "Overall"

# Create frequency table
freq_table <- table(normative$Overall)

# Convert frequency table to data frame
freq_df <- data.frame("Overall" = as.numeric(names(freq_table)), "Frequency" = as.numeric(freq_table))

# Plot the histogram and adjusted normal distribution
ggplot(normative, aes(x = Overall)) +
  geom_histogram(aes(y = after_stat(count/sum(count))), 
                 binwidth = 2,
                 fill = "red", 
                 color = "white", 
                 closed = "left") +
  stat_function(
    fun = function(x) 2 * dnorm(x, mean = mean(normative$Overall), sd = sd(normative$Overall)), 
    color = "black", lwd = 1) +
  labs(x = "Overall", y = "Proportion of respondents", title = "Histogram with Normal Distribution") +
  geom_vline(xintercept = 50, color = "black", linetype = "dotted") +
  geom_text(aes(x = 50, y = 0, label = "CHANCE"), color = "black", size = 3, angle = 90, vjust = -0.5, hjust = -4) +
  geom_vline(xintercept = mean(normative$Overall) + 2 * sd(normative$Overall), color = "black", linetype = "dotted") +
  geom_text(aes(x = mean(normative$Overall) + 2 * sd(normative$Overall), y = 0, label = "MEAN + 2SD"), color = "black", size = 3, angle = 90, vjust = -0.5, hjust = -3.2) +
  geom_text(x = 90, y = 0.10, label = paste("N =", nrow(normative)), color = "red", size = 6) +
  coord_cartesian(xlim = c(40, 100)) +
  scale_y_continuous(limits = c(0, 0.20)) +
  theme_minimal()

Figure 2 Face Lab Verification

Jessica Henderson

29th June 2023

This is my code for figure 2

More comments below the graph!

Why is the plot like that?

As you can see the graph height is now where it needs to be - (happy)! However the normative distribution is not - (sad) !

Ta Da : A graph for all

The End