library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.5     ✔ purrr   1.0.1
## ✔ tibble  3.2.1     ✔ dplyr   1.1.1
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.0.2     ✔ forcats 0.5.1
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.2
## Warning: package 'stringr' was built under R version 4.1.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
# Read in raw data from qualtrics
prefrosh = read.csv("~/Downloads/Stanford Communities Project - Prefrosh 2022-2023_September 28, 2022_10.26.csv", header=T, na.strings=c("","NA"))

# Removing unceccessary rows. Row 548 is my test row (dogun@stanford.edu)
prefrosh <- prefrosh[-c(1, 2, 548), ]

# Removing duplicate submissions
prefrosh = prefrosh %>% 
  distinct(RecipientEmail, .keep_all = TRUE)

For presentation, (1) an overview of what’s included in prefrosh survey, (2) some sample demographics (gender, political ideology, urm etc), (3) some temporal trends (e.g., how is this class rate in happiness, stress, depression compared to past prefrosh samples), and (4) some hypothesis testing (e.g., for the intervention perception questions, we can look at whether difference in other vs. self predicts their social behaviors like ERQ)

Demographic Data Cleaning

Visualisations

prefrosh_race = prefrosh %>% 
  filter(Race != "Unknown") %>% 
  filter(Race != "Other. Please specify:")
# Race

p<-ggplot(prefrosh_race, aes(x=Race)) + 
  geom_bar(color="black", fill="white")

p + theme(axis.text.x = element_text(size = 7, angle = 45, hjust = 1))

# Gender

prefrosh_gender = prefrosh %>% 
  filter(Gender != "Unknown")

p<-ggplot(prefrosh_gender, aes(x=Gender)) + 
  geom_bar(color="black", fill="white")

p + theme(axis.text.x = element_text(size = 7, angle = 45, hjust = 1))

Political Ideology

prefrosh %>% 
  select(PoliIdeology_general) %>% 
  filter(PoliIdeology_general != "") %>% 
  mutate(PoliIdeology_general = factor(PoliIdeology_general, levels = c("Extremely Liberal\n(1)","Liberal\n(2)","Moderately Liberal\n(3)","Moderate\n(4)","Moderateley Conservative\n(5)", "Conservative\n(6)","Extremely Conservative\n(7)"))) %>% 
  filter(!is.na(PoliIdeology_general)) %>% 
  ggplot(aes(x = PoliIdeology_general)) +
  geom_bar() +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_line(color = "grey66"),
        axis.text.y = element_text(color = "black"),
        axis.text.x = element_text(color = "black",
                                   face = "bold",
                                   size = 7,
                                   angle = 0,
                                   vjust = -0.2),
        axis.title.x = element_blank(),
        axis.title.y = element_blank())

Family Income

prefrosh %>% 
  select(familyincome) %>% 
  filter(familyincome != "") %>% 
  mutate(familyincome = factor(familyincome, levels = c("$0-$20,000","$20,001-$40,000","$40,001-$60,000","$60,001-$80,000","$80,001-$100,000","$100,001-$120,000","$120,001-$140,000","$140,001-$160,000","$160,001-$180,000","$180,001-$200,000","$200,001-$250,000","$250,001-$300,000","$300,001-$350,000","$350,001-$400,000","$400,001-$450,000","$450,001-$500,000","$500,001+"))) %>% 
  filter(!is.na(familyincome)) %>% 
  ggplot(aes(x = familyincome)) +
  geom_bar() +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_line(color = "grey66"),
        axis.text.y = element_text(color = "black"),
        axis.text.x = element_text(color = "black",
                                   face = "bold",
                                   size = 11,
                                   angle = 300,
                                   vjust = -0.2),
        axis.title.x = element_blank(),
        axis.title.y = element_blank())