StatsFinalProject

library(readxl)

## Warning: package 'readxl' was built under R version 4.4.2

# We manually entered the data into a spreadsheet, as we did for pilot A. For pilot B, we had concatenated the csv files downloaded from osf in R but had great trouble doing so and cleaning and analyzing the data given the format, so we chose to manually enter the data into the spreadsheet that I will now load into R.
data <- read_excel("C:\\Users\\Owner\\OneDrive\\Documents\\R\\Stat201A\\stats_replication_data.xlsx")

# confirmatory test comparing the proportion of participants who responded "2" to the distorted question in the two conditions
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

brush_total <- data %>%
  filter(Condition == "Brush", grepl("Noah", Knowledge)) %>%
  nrow()

arial_total <- data %>%
  filter(Condition == "Arial", grepl("Noah", Knowledge)) %>%
  nrow()

brush_count <- sum(data$Distorted == "2.0" & data$Condition == "Brush")
arial_count <- sum(data$Distorted == "2.0" & data$Condition == "Arial")

p1 <- brush_count/brush_total
p2 <- arial_count/arial_total

p <- (brush_count + arial_count)/(brush_total + arial_total)

z <- (p1 - p2)/sqrt(p*(1-p)*(1/brush_total + 1/arial_total))
z

## [1] -2.047379

p_value <- 2 * (1 - pnorm(abs(z)))
p_value

## [1] 0.04062091

# As in the original paper, I will now conduct a z-test of the proportions of participants who responded "can't say" to the distorted question in the two conditions.
brush_count2 <- sum(data$Distorted == "Can't say" & data$Condition == "Brush")
arial_count2 <- sum(data$Distorted == "Can't say" & data$Condition == "Arial")

p1 <- brush_count2/brush_total
p2 <- arial_count2/arial_total

p <- (brush_count2 + arial_count2)/(brush_total + arial_total)

z <- (p1 - p2)/sqrt(p*(1-p)*(1/brush_total + 1/arial_total))
z

## [1] 1.001026

p_value <- 2 * (1 - pnorm(abs(z)))
p_value

## [1] 0.316814

# The goal of the previous test was to compare the proportion of participants in the two conditions who correctly noticed the distortion. However, participants provided responses other than "Can't say" which indicated they noticed the distortion. For example, multiple participants responded that Moses did not build an ark. Thus, we are conducting a test to compare the proportion of participants in the two conditions who responded "Can't say" or gave an alternative response indicating they noticed the distortion. This was counted manually.

brush_count3 <- 7
arial_count3 <- 1

p1 <- brush_count3/brush_total
p2 <- arial_count3/arial_total

p <- (brush_count3 + arial_count3)/(brush_total + arial_total)

z <- (p1 - p2)/sqrt(p*(1-p)*(1/brush_total + 1/arial_total))
z

## [1] 2.250274

p_value <- 2 * (1 - pnorm(abs(z)))
p_value

## [1] 0.02443154

# I will now conduct a z-test comparing the proportion of people who responded "Switzerland" to the control question in the two conditions. 

brush_total2 <- sum(data$Condition == "Brush")
arial_total2 <- sum(data$Condition == "Arial")

brush_count4 <- sum(data$Control == "Switzerland" & data$Condition == "Brush")
arial_count4 <- sum(data$Control == "Switzerland" & data$Condition == "Arial")

p1 <- brush_count4/brush_total2
p2 <- arial_count4/arial_total2

p <- (brush_count4 + arial_count4)/(brush_total2 + arial_total2)

z <- (p1 - p2)/sqrt(p*(1-p)*(1/brush_total2 + 1/arial_total2))
z

## [1] 0.5563486

p_value <- 2 * (1 - pnorm(abs(z)))
p_value

## [1] 0.5779725

# I will now conduct a z-test comparing the proportion of people who responded a country or region other than Switzerland to the control question in the two conditions. This will be counted manually.

brush_count5 <- 7
arial_count5 <- 10
p1 <- brush_count5/brush_total2
p2 <- arial_count5/arial_total2

p <- (brush_count5 + arial_count5)/(brush_total2 + arial_total2)

z <- (p1 - p2)/sqrt(p*(1-p)*(1/brush_total2 + 1/arial_total2))
z

## [1] -0.8868791

p_value <- 2 * (1 - pnorm(abs(z)))
p_value

## [1] 0.375144

# I will now conduct a z-test comparing the proportion of people who responded that they didn't know the answer to the control question in the conditions.

brush_count6 <- sum(data$Control == "Don't know" & data$Condition == "Brush")  + sum(data$Control == "I don't Know" & data$Condition == "Brush")
arial_count6 <- sum(data$Control == "Don't know" & data$Condition == "Arial")

p1 <- brush_count6/brush_total2
p2 <- arial_count6/arial_total2

p <- (brush_count6 + arial_count6)/(brush_total2 + arial_total2)

z <- (p1 - p2)/sqrt(p*(1-p)*(1/brush_total2 + 1/arial_total2))
z

## [1] 0

p_value <- 2 * (1 - pnorm(abs(z)))
p_value

## [1] 1

StatsFinalProject

2024-12-03