knitr::opts_chunk$set(message = FALSE, warning = FALSE, results='asis')
library(readxl)
library(tidyverse)
# Set personal working directory
setwd("/Users/jessicahenderson/Desktop/psychology/PSYC3361/face_lab/group_data")
# Read excel file into R
normative <- read_excel("normative.xlsx")
I have made notes where adjustments have been made and more comments below the histogram.
#creating assigning the normative data set to the name fig2_data
fig2_data <- normative %>%
select(`Overall (%)`) %>% # selecting overall normative accuracy column
# plott starts here
ggplot(data = ., mapping = aes(x = `Overall (%)`)) +
geom_histogram(
mapping = aes(y = after_stat(count/sum(count))), #changed from after_stat(density) to after_stat(count/sum(count))
binwidth = 2,
fill = "red",
color = "white",
closed = "left" # we have to add in closed left
) +
# coord_cartesian function specifically adjusts the visible portion of the plot's x and y axes while maintaining the original data range.
coord_cartesian(xlim = c(40, 100)) +
scale_y_continuous(limits = c(0, 0.20)) +
labs(
x = "UNSW Face Test Score (percent correct)",
y = "Proportion of respondents") +
# normative distribution line
stat_function(
fun = dnorm,
args = list(
mean = mean(normative$`Overall (%)`),
sd = sd(normative$`Overall (%)`)),
color= "black", lwd = 1) +
#reference line for Chance accuracy
geom_vline(
xintercept = 50, color = "black", linetype = "dotted") +
geom_text(
aes(x = 50, y = 0, label = "CHANCE"),
color = "black", size = 3, angle = 90, vjust = -0.5, hjust = -6) +
#threshold line for Mean + 2 SD
geom_vline(
xintercept =
mean(normative$`Overall (%)`) +
2 * sd(normative$`Overall (%)`),
color = "black", linetype = "dotted") +
geom_text(aes(
x = mean(normative$`Overall (%)`) +
2 * sd(normative$`Overall (%)`), y = 0, label = "MEAN +2SD"),
color = "black", size = 3, angle = 90, vjust = -0.5, hjust = -4.2) +
#adding N=290 label for participant count
geom_text(
x = 90, y = 0.10, label = paste("N =", nrow(normative)),
color = "red", size = 6) +
theme_minimal()
print(fig2_data)
To create this Figure we have to first create a frequency table to plot the histogram. Even though we fixed the bin height we can’t fix the normative distribution without a frequency table.
James said he actually didn’t plot the raw data - he plotted a frequency table he made from the raw data. The frequency table shows where the bins are and so even though we have specified binwidth = 2 it looks like the binwidth = 2.5. James confirmed that he used binwidth = 2. so it should be 45-75 but if you look at the frequency table I have made below from the data we are using it shows 45 to 47.5 James showed me his freqency table there should be 5 participants between 45-75 giving a frequency of 5/290 = 0.0172
library(gt)
# Create frequency table
freq_table <- table(normative$`Overall (%)`)
# Convert frequency table to data frame
freq_df <- data.frame("Overall (%)" = as.numeric(names(freq_table)), "Frequency" = as.numeric(freq_table))
# Create gt table object
gt_table <- gt(freq_df, caption = "Frequency Table")
# Display the gt table
gt_table
| Overall.... | Frequency |
|---|---|
| 45.00000 | 1 |
| 45.83333 | 2 |
| 46.66667 | 2 |
| 47.50000 | 1 |
| 48.33333 | 6 |
| 49.16667 | 8 |
| 50.00000 | 3 |
| 50.83333 | 5 |
| 51.66667 | 12 |
| 52.50000 | 8 |
| 53.33333 | 8 |
| 54.16667 | 15 |
| 55.00000 | 17 |
| 55.83333 | 13 |
| 56.66667 | 10 |
| 57.50000 | 9 |
| 58.33333 | 16 |
| 59.16667 | 15 |
| 60.00000 | 23 |
| 60.83333 | 17 |
| 61.66667 | 13 |
| 62.50000 | 18 |
| 63.33333 | 11 |
| 64.16667 | 14 |
| 65.00000 | 7 |
| 65.83333 | 4 |
| 66.66667 | 5 |
| 67.50000 | 6 |
| 68.33333 | 7 |
| 69.16667 | 6 |
| 70.00000 | 1 |
| 70.83333 | 2 |
| 71.66667 | 2 |
| 72.50000 | 2 |
| 75.00000 | 1 |