rm(list =ls())knitr::opts_chunk$set(warning =FALSE, message =FALSE) Mypackages <-c("lme4","tidyverse","effects","ggplot2","psych","MASS","Rmisc","lmerTest","ggthemes", "knitr","lsmeans","pastecs","sjstats","car","ordinal","Rcpp","corrplot", "ggpubr", "EnvStats","easyStats", "cowplot","see","datawizard", "ggcorrplot", "lavaan", "qualtRics", "effsize")# install.packages(Mypackages) #you must remove the # in this comment if you need to install the packages! lapply(Mypackages, require,character.only =TRUE)options(knitr.kable.NA ='—')set.seed(1)
Load Data
Code
# read in data filessetwd("~/Desktop")data_raw <-read_survey("/Users/mtrenfield17/Desktop/Research/Boston College Research/SISC Lab Research/IS Project/Honduras/IS Honduras Pilot 1.csv")
Functions
Code
plot_fn <-function(data, iv, dv, coln =NULL, rown =NULL, facet_var =NULL, facet_var2 =NULL, x_label ="", y_label ="", title ="", x_text_size =13, y_text_size =13, x_title_size =13, y_title_size =13, plot_title_size =16, facet_text_size =12, x_levels =NULL, x_labels =NULL) {# Reorder the x variable if x_levels is providedif (!is.null(x_levels)) { data[[deparse(substitute(iv))]] <-factor(data[[deparse(substitute(iv))]], levels = x_levels) }# Rename the x variable if x_labels is providedif (!is.null(x_labels)) { data[[deparse(substitute(iv))]] <-factor(data[[deparse(substitute(iv))]], labels = x_labels) }# Create the base plot part1 <-ggplot(data, aes(x = {{iv}}, y = {{dv}}, fill = {{iv}})) +geom_violin(alpha =0.3, scale ="count") +stat_summary(fun ="mean", geom ="point", size =3, color ="black") +stat_summary(fun.data = mean_cl_normal, geom ="errorbar", width =0.2,size =1.5, color ="black") +theme_classic() +xlab(x_label) +ylab(y_label) +ggtitle(title) +theme(panel.background =element_rect(fill ="transparent"), legend.position ="right", plot.title =element_text(face ="bold", hjust =0.5, size = plot_title_size), plot.subtitle =element_text(hjust =0.5),panel.grid.major.y =element_line(color='grey75'), axis.text.x =element_text(face ="plain", size = x_text_size, color ="black"),axis.text.y =element_text(face ="plain", size = y_text_size, color ="black"),axis.title.x =element_text(face ="plain", size = x_title_size, color ="black"), axis.title.y =element_text(face ="plain", size = y_title_size, color ="black", margin =margin(t =0, r =10, b =0, l =0)),panel.border =element_rect(color ="black", fill =NA, size =1),strip.text =element_text(size = facet_text_size) # Adjust the facet text size )# Check if a facet_var (row) and facet_var2 (column) are providedif (!is.null(facet_var) &!is.null(facet_var2)) {# If both row and column variables are provided, use facet_grid part1 <- part1 +facet_grid(as.formula(paste(facet_var, "~", facet_var2))) } elseif (!is.null(facet_var)) {# If only one facet variable is provided, facet by rows part1 <- part1 +facet_wrap(as.formula(paste("~", facet_var)), ncol =if (!is.null(coln)) coln elseNULL, nrow =if (!is.null(rown)) rown elseNULL, scales ="free", as.table =TRUE) }# Final plot adjustmentsggpar(part1, legend ="none")}
Reshaping data
Demographics
Code
# making conservative, liberal, and moderate group data_raw <- data_raw %>%mutate(political_group =ifelse(pol <4, "Conservative",ifelse(pol >4, "Liberal", "Moderate")))# making a column for white vs non-whitedata_raw$White <-ifelse(grepl("White", data_raw$race_TEXT), "White", "Non-White")# making a column for URM vs non-URMurm_groups <-c("Black", "Hispanic or Latino/a/x", "American Indian and Native Alaskan", "Pacific Islander or Native Hawaiian", "Middle Eastern and North African")data_raw$URM <-ifelse(grepl(paste(urm_groups, collapse="|"), data_raw$race_TEXT), "URM", "Non-URM")# reordering demosdata_raw <- data_raw %>%mutate(Gender_TEXT =factor(gen_TEXT, levels =c("Man", "Woman", "I identify as:")),pol_TEXT =factor(pol_TEXT, levels =c("Very Liberal", "Liberal", "Somewhat Liberal", "Moderate","Somewhat Conservative", "Conservative", "Very Conservative")),edu_TEXT =factor(edu_TEXT, levels =c("Some schooling, but no high school diploma or degree", "High school diploma or GED", "Some college, Technical degree, or Associates degree", "Bachelor's degree", "Graduate degree (Masters, PhD, etc)")),inc_TEXT =factor(inc_TEXT, levels =c("Less than $25,000", "$25,000 - $49,999", "$50,000 - $74,999", "$75,000 - $99,999", "$100,000 - $149,999", "$150,000 - $199,999","More than $200,000")),political_group =factor(political_group, levels =c("Liberal", "Moderate", "Conservative")),White =factor(White, levels =c("White", "Non-White")),URM =factor(URM, levels =c("Non-URM", "URM")) )
# Subset your data frame to include only the demographic columnsdemo_data <- data[, c("gen_TEXT", "race_TEXT", "inc_TEXT", "edu_TEXT", "pol_TEXT", "pid_TEXT", "area_TEXT", "political_group", "White", "URM", "priorKnowledge_TEXT")]# Agemean(data$age, na.rm=TRUE)
[1] 41.19217
Code
sd(data$age, na.rm=TRUE)
[1] 12.23968
Code
# Loop through each demographic column and calculate frequency countsfreq_tables <-list()for (col innames(demo_data)) { { freq_table <-as.data.frame(table(demo_data[[col]])) freq_table$Percent <-round(freq_table$Freq /sum(freq_table$Freq) *100, 2) freq_tables[[col]] <- freq_table }}# Print the frequency tablesfor (i inseq_along(freq_tables)) {if (!is.null(freq_tables[[i]])) {cat("\nTable of frequencies for", names(freq_tables)[i], ":\n")print(freq_tables[[i]]) }}
Table of frequencies for gen_TEXT :
Var1 Freq Percent
1 I identify as: 6 1.07
2 Man 277 49.29
3 Woman 279 49.64
Table of frequencies for race_TEXT :
Var1
1 American Indian and Native Alaskan
2 American Indian and Native Alaskan,Black
3 American Indian and Native Alaskan,Black,East Asian,South Asian,Southeast Asian,Pacific Islander or Native Hawaiian,Hispanic or Latino/a/x,Middle Eastern and North African,White
4 American Indian and Native Alaskan,Black,White
5 American Indian and Native Alaskan,Hispanic or Latino/a/x,White
6 American Indian and Native Alaskan,White
7 Black
8 Black,Hispanic or Latino/a/x
9 Black,Hispanic or Latino/a/x,White
10 Black,Southeast Asian
11 Black,White
12 East Asian
13 East Asian,White
14 Hispanic or Latino/a/x
15 Hispanic or Latino/a/x,White
16 Middle Eastern and North African
17 Middle Eastern and North African,White
18 Pacific Islander or Native Hawaiian,White
19 South Asian
20 South Asian,Southeast Asian
21 South Asian,White
22 Southeast Asian
23 White
Freq Percent
1 1 0.18
2 1 0.18
3 1 0.18
4 2 0.36
5 1 0.18
6 3 0.53
7 54 9.61
8 1 0.18
9 3 0.53
10 1 0.18
11 5 0.89
12 12 2.14
13 3 0.53
14 26 4.63
15 12 2.14
16 1 0.18
17 1 0.18
18 1 0.18
19 3 0.53
20 1 0.18
21 1 0.18
22 10 1.78
23 418 74.38
Table of frequencies for inc_TEXT :
Var1 Freq Percent
1 Less than $25,000 63 11.21
2 $25,000 - $49,999 119 21.17
3 $50,000 - $74,999 121 21.53
4 $75,000 - $99,999 86 15.30
5 $100,000 - $149,999 96 17.08
6 $150,000 - $199,999 48 8.54
7 More than $200,000 29 5.16
Table of frequencies for edu_TEXT :
Var1 Freq Percent
1 Some schooling, but no high school diploma or degree 3 0.53
2 High school diploma or GED 83 14.77
3 Some college, Technical degree, or Associates degree 160 28.47
4 Bachelor's degree 217 38.61
5 Graduate degree (Masters, PhD, etc) 99 17.62
Table of frequencies for pol_TEXT :
Var1 Freq Percent
1 Very Liberal 74 13.17
2 Liberal 87 15.48
3 Somewhat Liberal 73 12.99
4 Moderate 128 22.78
5 Somewhat Conservative 59 10.50
6 Conservative 97 17.26
7 Very Conservative 44 7.83
Table of frequencies for pid_TEXT :
Var1 Freq Percent
1 Democrat 205 36.48
2 Independent / Other 186 33.10
3 Republican 171 30.43
Table of frequencies for area_TEXT :
Var1 Freq Percent
1 Rural 108 19.22
2 Suburban 316 56.23
3 Urban 138 24.56
Table of frequencies for political_group :
Var1 Freq Percent
1 Liberal 234 41.64
2 Moderate 128 22.78
3 Conservative 200 35.59
Table of frequencies for White :
Var1 Freq Percent
1 White 451 80.25
2 Non-White 111 19.75
Table of frequencies for URM :
Var1 Freq Percent
1 Non-URM 448 79.72
2 URM 114 20.28
Table of frequencies for priorKnowledge_TEXT :
Var1 Freq Percent
1 Knowledgeable 7 1.25
2 Not at all knowledgeable 377 67.08
3 Slightly knowledgeable 140 24.91
4 Somewhat knowledgeable 34 6.05
5 Very knowledgeable 4 0.71
Demographic Plot
Code
# List of demographic columns to plotdemographic_columns <-c("gen_TEXT", "race_TEXT", "inc_TEXT", "edu_TEXT", "pol_TEXT", "pid_TEXT", "area_TEXT", "political_group", "White", "URM", "priorKnowledge_TEXT") # Use column names as strings# Function to create percent plotcreate_percent_plot <-function(data, column) {# Calculate the frequency and percentage for each category freq_table <- data %>%group_by(across(all_of(column))) %>% dplyr::summarise(Freq =n()) %>%mutate(Percent = Freq /sum(Freq) *100)# Create the plot p <-ggplot(freq_table, aes_string(x = column, y ="Percent", fill = column)) +geom_bar(stat ="identity", position ="dodge") +scale_y_continuous(labels = scales::percent_format(scale =1)) +labs(x = column, y ="Percentage", title =paste("Distribution of", column)) +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))return(p)}# Loop through demographic columns and plotlapply(demographic_columns, function(col) create_percent_plot(demo_data, col))
[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
[[7]]
[[8]]
[[9]]
[[10]]
[[11]]
Code
ggplot(data, aes(x = age)) +geom_histogram(binwidth =5, color ="black", alpha =0.7) +labs(title ="Age", x =" ", y ="Distribution") +theme_minimal()
Call:
lm(formula = donation ~ condition, data = data)
Residuals:
Min 1Q Median 3Q Max
-4.8235 -3.0934 0.1765 1.9066 5.9275
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.09341 0.25692 15.933 <2e-16 ***
conditionindividualScope 0.73012 0.36090 2.023 0.0435 *
conditionpopulationScope -0.02087 0.35813 -0.058 0.9536
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 3.466 on 559 degrees of freedom
Multiple R-squared: 0.0101, Adjusted R-squared: 0.00656
F-statistic: 2.852 on 2 and 559 DF, p-value: 0.05856
Code
mod_donation_indiv<-lm(donation ~ condition, data = data_I)summary(mod_donation_indiv)
Call:
lm(formula = donation ~ condition, data = data_I)
Residuals:
Min 1Q Median 3Q Max
-4.8235 -3.0934 0.1765 1.9066 5.9275
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.8235 0.2535 19.030 <2e-16 ***
conditionmergedScope -0.7301 0.3609 -2.023 0.0435 *
conditionpopulationScope -0.7510 0.3557 -2.112 0.0352 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 3.466 on 559 degrees of freedom
Multiple R-squared: 0.0101, Adjusted R-squared: 0.00656
F-statistic: 2.852 on 2 and 559 DF, p-value: 0.05856
Code
# Load necessary packagesif (!requireNamespace("emmeans", quietly =TRUE)) install.packages("emmeans")if (!requireNamespace("effsize", quietly =TRUE)) install.packages("effsize")library(emmeans)library(effsize)# Fit the linear model with all three conditionsmod_donation <-lm(donation ~ condition, data = data)# Get pairwise comparisons of the means for condition levelspairwise_emmeans <-emmeans(mod_donation, pairwise ~ condition)# Calculate Cohen's d for each pairwise comparisonpairwise_d <-eff_size(pairwise_emmeans, sigma =sigma(mod_donation), edf =df.residual(mod_donation))# Display the pairwise Cohen's d resultssummary(pairwise_d)