knitr::opts_chunk$set(warning =FALSE, message =FALSE) Mypackages <-c("lme4","tidyverse","effects","ggplot2","psych","MASS","Rmisc","lmerTest","ggthemes", "knitr","lsmeans","pastecs","sjstats","car","ordinal","Rcpp","corrplot", "ggpubr", "EnvStats","easyStats", "cowplot","see","datawizard", "ggcorrplot", "lavaan", "qualtRics", "effsize")# install.packages(Mypackages) #you must remove the # in this comment if you need to install the packages! lapply(Mypackages, require,character.only =TRUE)options(knitr.kable.NA ='—')set.seed(1)
Load Data
Code
# read in data filessetwd("~/Desktop")data_raw <-read_survey("/Users/mtrenfield17/Desktop/Research/Boston College Research/SISC Lab Research/IS Project/Gambling/Pilot 2/IS Gambling_PILOT 2.csv")
Functions
Code
plot_fn <-function(data, iv, dv, coln =NULL, rown =NULL, facet_var =NULL, facet_var2 =NULL, x_label ="", y_label ="", title ="", x_text_size =13, y_text_size =13, x_title_size =13, y_title_size =13, plot_title_size =16, facet_text_size =12, x_levels =NULL, x_labels =NULL) {# Reorder the x variable if x_levels is providedif (!is.null(x_levels)) { data[[deparse(substitute(iv))]] <-factor(data[[deparse(substitute(iv))]], levels = x_levels) }# Rename the x variable if x_labels is providedif (!is.null(x_labels)) { data[[deparse(substitute(iv))]] <-factor(data[[deparse(substitute(iv))]], labels = x_labels) }# Create the base plot part1 <-ggplot(data, aes(x = {{iv}}, y = {{dv}}, fill = {{iv}})) +geom_violin(alpha =0.3, scale ="count") +stat_summary(fun ="mean", geom ="point", size =3, color ="black") +stat_summary(fun.data = mean_cl_normal, geom ="errorbar", width =0.2,size =1.5, color ="black") +theme_classic() +xlab(x_label) +ylab(y_label) +ggtitle(title) +theme(panel.background =element_rect(fill ="transparent"), legend.position ="right", plot.title =element_text(face ="bold", hjust =0.5, size = plot_title_size), plot.subtitle =element_text(hjust =0.5),panel.grid.major.y =element_line(color='grey75'), axis.text.x =element_text(face ="plain", size = x_text_size, color ="black"),axis.text.y =element_text(face ="plain", size = y_text_size, color ="black"),axis.title.x =element_text(face ="plain", size = x_title_size, color ="black"), axis.title.y =element_text(face ="plain", size = y_title_size, color ="black", margin =margin(t =0, r =10, b =0, l =0)),panel.border =element_rect(color ="black", fill =NA, size =1),strip.text =element_text(size = facet_text_size) # Adjust the facet text size )# Check if a facet_var (row) and facet_var2 (column) are providedif (!is.null(facet_var) &!is.null(facet_var2)) {# If both row and column variables are provided, use facet_grid part1 <- part1 +facet_grid(as.formula(paste(facet_var, "~", facet_var2))) } elseif (!is.null(facet_var)) {# If only one facet variable is provided, facet by rows part1 <- part1 +facet_wrap(as.formula(paste("~", facet_var)), ncol =if (!is.null(coln)) coln elseNULL, nrow =if (!is.null(rown)) rown elseNULL, scales ="free", as.table =TRUE) }# Final plot adjustmentsggpar(part1, legend ="none")}
Reshaping data
Code
# making conservative, liberal, and moderate group data_raw <- data_raw %>%mutate(political_group =ifelse(pol <4, "Conservative",ifelse(pol >4, "Liberal", "Moderate")))# making a column for white vs non-whitedata_raw$White <-ifelse(grepl("White", data_raw$race_TEXT), "White", "Non-White")# making a column for URM vs non-URMurm_groups <-c("Black", "Hispanic or Latino/a/x", "American Indian and Native Alaskan", "Pacific Islander or Native Hawaiian", "Middle Eastern and North African")data_raw$URM <-ifelse(grepl(paste(urm_groups, collapse="|"), data_raw$race_TEXT), "URM", "Non-URM")#### filtering people who failed the attn check ####data <- data_raw %>%filter(attentionCheck ==3)## changing condition to factor and reordering ##data$condition <-as.factor(data$condition)data$condition <-factor(data$condition, levels =c("individualScope", "populationScope", "mergedScope"))# changing numeric DVs to numericdata <- data %>%mutate_at(vars(policyDV, donation, helpDV_1:blame_2, age, pol, pid, edu, inc, gambling, charityRealYN, `Duration (in seconds)`), as.numeric)## renaming matrix variablesnames(data)[names(data) =='helpDV_1'] <-'usGovHelp'names(data)[names(data) =='helpDV_2'] <-'industryHelp'names(data)[names(data) =='helpDV_3'] <-'gamblerHelp'names(data)[names(data) =='helpDV_4'] <-'individualHelp'names(data)[names(data) =='helpDV_5'] <-'youHelp'names(data)[names(data) =='preventDV_1'] <-'usGovPrevent'names(data)[names(data) =='preventDV_2'] <-'industryPrevent'names(data)[names(data) =='preventDV_3'] <-'gamblerPrevent'names(data)[names(data) =='preventDV_4'] <-'individualPrevent'names(data)[names(data) =='preventDV_5'] <-'youPrevent'names(data)[names(data) =='blame_1'] <-'industryBlame'names(data)[names(data) =='blame_2'] <-'individualBlame'# making a gambling binary variabledata <- data %>%mutate(gambling_binary =ifelse(gambling >1, "Never gambled",ifelse(gambling ==1, "Gambled", "Not Found")))# reordering demosdata <- data %>%mutate(Gender_TEXT =factor(gen_TEXT, levels =c("Man", "Woman", "I identify as:")),pol_TEXT =factor(pol_TEXT, levels =c("Very Liberal", "Liberal", "Somewhat Liberal", "Moderate","Somewhat Conservative", "Conservative", "Very Conservative")),edu_TEXT =factor(edu_TEXT, levels =c("Some schooling, but no high school diploma or degree", "High school diploma or GED", "Some college, Technical degree, or Associates degree", "Bachelor's degree", "Graduate degree (Masters, PhD, etc)")),inc_TEXT =factor(inc_TEXT, levels =c("less than $25,000", "$25,000 - $49,999", "$50,000 - $74,999", "$75,000 - $99,999", "$100,000 - $149,999", "$150,000 - $199,999","more than $200,000")),political_group =factor(political_group, levels =c("Liberal", "Moderate", "Conservative")),White =factor(White, levels =c("White", "Non-White")),URM =factor(URM, levels =c("Non-URM", "URM")),gambling_TEXT =factor(gambling_TEXT, levels =c("Not at all", "A few times a year", "Once a month", "A few times a month", "Once a week or more")),gambling_binary =factor(gambling_binary, levels =c("Never gambled", "Gambled", "Not Found")), )data$condition <-relevel(data$condition, ref ="mergedScope")data_I <- datadata_I$condition <-relevel(data_I$condition, ref ="individualScope")
Data Quality
Charity Belief
Code
plot_fn(data, iv = condition, dv = charityRealYN, x_label ="Condition", y_label ="Charity Doubt", title ="Extent Participants Doubted the Charity was real", x_levels =c("individualScope", "populationScope", "mergedScope"),x_labels =c("Individual \n\ Frame", "Structural \n\ Frame", "Combined \n\ Frame"))
# Subset your data frame to include only the demographic columnsdemo_data <- data[, c("gambling_TEXT", "gen_TEXT", "race_TEXT", "inc_TEXT", "edu_TEXT", "pol_TEXT", "pid_TEXT", "area_TEXT", "political_group", "White", "URM", "gambling_binary")]# Agemean(data$age, na.rm=TRUE)
[1] 42.07846
Code
sd(data$age, na.rm=TRUE)
[1] 14.52027
Code
# Loop through each demographic column and calculate frequency countsfreq_tables <-list()for (col innames(demo_data)) { { freq_table <-as.data.frame(table(demo_data[[col]])) freq_table$Percent <-round(freq_table$Freq /sum(freq_table$Freq) *100, 2) freq_tables[[col]] <- freq_table }}# Print the frequency tablesfor (i inseq_along(freq_tables)) {if (!is.null(freq_tables[[i]])) {cat("\nTable of frequencies for", names(freq_tables)[i], ":\n")print(freq_tables[[i]]) }}
Table of frequencies for gambling_TEXT :
Var1 Freq Percent
1 Not at all 337 56.45
2 A few times a year 157 26.30
3 Once a month 25 4.19
4 A few times a month 36 6.03
5 Once a week or more 42 7.04
Table of frequencies for gen_TEXT :
Var1 Freq Percent
1 I identify as: 3 0.50
2 Man 298 49.75
3 Woman 298 49.75
Table of frequencies for race_TEXT :
Var1 Freq Percent
1 American Indian and Native Alaskan 1 0.17
2 American Indian and Native Alaskan,Black,Hispanic or Latino/a/x 1 0.17
3 American Indian and Native Alaskan,Hispanic or Latino/a/x,White 1 0.17
4 American Indian and Native Alaskan,White 3 0.50
5 Black 68 11.35
6 Black,East Asian,White 1 0.17
7 Black,Hispanic or Latino/a/x 2 0.33
8 Black,White 8 1.34
9 East Asian 20 3.34
10 East Asian,White 3 0.50
11 Hispanic or Latino/a/x 24 4.01
12 Hispanic or Latino/a/x,White 14 2.34
13 Middle Eastern and North African 2 0.33
14 Middle Eastern and North African,White 1 0.17
15 Pacific Islander or Native Hawaiian 3 0.50
16 South Asian 7 1.17
17 South Asian,Hispanic or Latino/a/x 1 0.17
18 South Asian,White 2 0.33
19 Southeast Asian 13 2.17
20 Southeast Asian,Middle Eastern and North African,White 1 0.17
21 Southeast Asian,White 2 0.33
22 White 421 70.28
Table of frequencies for inc_TEXT :
Var1 Freq Percent
1 less than $25,000 0 0.00
2 $25,000 - $49,999 138 28.05
3 $50,000 - $74,999 123 25.00
4 $75,000 - $99,999 92 18.70
5 $100,000 - $149,999 109 22.15
6 $150,000 - $199,999 30 6.10
7 more than $200,000 0 0.00
Table of frequencies for edu_TEXT :
Var1 Freq Percent
1 Some schooling, but no high school diploma or degree 6 1.00
2 High school diploma or GED 90 15.03
3 Some college, Technical degree, or Associates degree 216 36.06
4 Bachelor's degree 213 35.56
5 Graduate degree (Masters, PhD, etc) 74 12.35
Table of frequencies for pol_TEXT :
Var1 Freq Percent
1 Very Liberal 66 11.04
2 Liberal 105 17.56
3 Somewhat Liberal 74 12.37
4 Moderate 133 22.24
5 Somewhat Conservative 69 11.54
6 Conservative 108 18.06
7 Very Conservative 43 7.19
Table of frequencies for pid_TEXT :
Var1 Freq Percent
1 Democrat 218 36.39
2 Independent / Other 192 32.05
3 Republican 189 31.55
Table of frequencies for area_TEXT :
Var1 Freq Percent
1 Rural 118 19.70
2 Suburban 323 53.92
3 Urban 158 26.38
Table of frequencies for political_group :
Var1 Freq Percent
1 Liberal 245 40.97
2 Moderate 133 22.24
3 Conservative 220 36.79
Table of frequencies for White :
Var1 Freq Percent
1 White 457 76.29
2 Non-White 142 23.71
Table of frequencies for URM :
Var1 Freq Percent
1 Non-URM 468 78.13
2 URM 131 21.87
Table of frequencies for gambling_binary :
Var1 Freq Percent
1 Never gambled 260 43.55
2 Gambled 337 56.45
3 Not Found 0 0.00
Demographic Plot
Code
# List of demographic columns to plotdemographic_columns <-c("gambling_TEXT", "gambling_binary", "gen_TEXT", "race_TEXT", "inc_TEXT", "edu_TEXT", "pol_TEXT", "pid_TEXT", "area_TEXT", "political_group", "White", "URM")# Function to create percent plotcreate_percent_plot <-function(data, column) {# Calculate the frequency and percentage for each category freq_table <- data %>%group_by(across(all_of(column))) %>% dplyr::summarise(Freq =n()) %>%mutate(Percent = Freq /sum(Freq) *100)# Create the plot p <-ggplot(freq_table, aes_string(x = column, y ="Percent", fill = column)) +geom_bar(stat ="identity", position ="dodge") +scale_y_continuous(labels = scales::percent_format(scale =1)) +labs(x = column, y ="Percentage", title =paste("Distribution of", column)) +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))return(p)}# Loop through demographic columns and plotplots <-lapply(demographic_columns, function(col) create_percent_plot(demo_data, col))# Display the plotsprint(plots)