knitr::opts_chunk$set(warning =FALSE, message =FALSE) Mypackages <-c("lme4","tidyverse","effects","ggplot2","psych","MASS","Rmisc","lmerTest","ggthemes", "knitr","lsmeans","pastecs","sjstats","car","ordinal","Rcpp","corrplot", "ggpubr", "EnvStats","easyStats", "cowplot","see","datawizard", "ggcorrplot", "lavaan", "qualtRics")# install.packages(Mypackages) #you must remove the # in this comment if you need to install the packages! lapply(Mypackages, require,character.only =TRUE)options(knitr.kable.NA ='—')set.seed(1)
Load Data
remove participant who shows up twice (first time they accidentally said no to the consent)
Code
# read in data filessetwd("~/Desktop")data_raw <-read_survey("/Users/mtrenfield17/Desktop/Research/Boston College Research/SISC Lab Research/IS Project/Gambling/IS_Gambling_N200.csv")# Remove rows where 'condition' is NAdata <- data_raw %>%filter(!is.na(condition))
Functions
Code
plot_fn <-function(data, iv, dv, coln =NULL, rown =NULL, facet_var =NULL, facet_var2 =NULL, x_label ="", y_label ="", title ="", x_text_size =13, y_text_size =13, x_title_size =13, y_title_size =13, plot_title_size =16, facet_text_size =12) {# Create the base plot part1 <-ggplot(data, aes(x = {{iv}}, y = {{dv}}, fill = {{iv}})) +geom_violin(alpha =0.3, scale ="count") +stat_summary(fun ="mean", geom ="point", size =3, color ="black") +stat_summary(fun.data = mean_cl_normal, geom ="errorbar", width =0.2,size =1.5, color ="black") +theme_classic() +xlab(x_label) +ylab(y_label) +ggtitle(title) +theme(panel.background =element_rect(fill ="transparent"), legend.position ="right", plot.title =element_text(face ="bold", hjust =0.5, size = plot_title_size), plot.subtitle =element_text(hjust =0.5),panel.grid.major.y =element_line(color='grey75'), axis.text.x =element_text(face ="plain", size = x_text_size, color ="black"),axis.text.y =element_text(face ="plain", size = y_text_size, color ="black"),axis.title.x =element_text(face ="plain", size = x_title_size, color ="black"), axis.title.y =element_text(face ="plain", size = y_title_size, color ="black", margin =margin(t =0, r =10, b =0, l =0)),panel.border =element_rect(color ="black", fill =NA, size =1),strip.text =element_text(size = facet_text_size) # Adjust the facet text size )# Check if a facet_var (row) and facet_var2 (column) are providedif (!is.null(facet_var) &!is.null(facet_var2)) {# If both row and column variables are provided, use facet_grid part1 <- part1 +facet_grid(as.formula(paste(facet_var, "~", facet_var2))) } elseif (!is.null(facet_var)) {# If only one facet variable is provided, facet by rows part1 <- part1 +facet_wrap(as.formula(paste("~", facet_var)), ncol =if (!is.null(coln)) coln elseNULL, nrow =if (!is.null(rown)) rown elseNULL, scales ="free", as.table =TRUE) }# Final plot adjustmentsggpar(part1, legend ="none")}
Reshaping data
Code
# making conservative, liberal, and moderate group data <- data %>%mutate(political_group =ifelse(pol <4, "Conservative",ifelse(pol >4, "Liberal", "Moderate")))# making a column for white vs non-whitedata$White <-ifelse(grepl("White", data$race_TEXT), "White", "Non-White")# making a column for URM vs non-URMurm_groups <-c("Black", "Hispanic or Latino/a/x", "American Indian and Native Alaskan", "Pacific Islander or Native Hawaiian", "Middle Eastern and North African")data$URM <-ifelse(grepl(paste(urm_groups, collapse="|"), data$race_TEXT), "URM", "Non-URM")## changing condition to factor and reordering ##data$condition <-as.factor(data$condition)data$condition <-factor(data$condition, levels =c("individualScope", "populationScope"))# changing numeric DVs to numericdata <- data %>%mutate_at(vars(policyDV, donation, helpDV_1:blame_2, age, pol, pid, edu, inc, gambling, charityRealYN, `Duration (in seconds)`), as.numeric)## renaming matrix variablesnames(data)[names(data) =='helpDV_1'] <-'usGovHelp'names(data)[names(data) =='helpDV_2'] <-'industryHelp'names(data)[names(data) =='helpDV_3'] <-'gamblerHelp'names(data)[names(data) =='helpDV_4'] <-'individualHelp'names(data)[names(data) =='helpDV_5'] <-'youHelp'names(data)[names(data) =='preventDV_1'] <-'usGovPrevent'names(data)[names(data) =='preventDV_2'] <-'industryPrevent'names(data)[names(data) =='preventDV_3'] <-'gamblerPrevent'names(data)[names(data) =='preventDV_4'] <-'individualPrevent'names(data)[names(data) =='preventDV_5'] <-'youPrevent'names(data)[names(data) =='blame_1'] <-'industryBlame'names(data)[names(data) =='blame_2'] <-'individualBlame'# making a gambling binary variabledata <- data %>%mutate(gambling_binary =ifelse(gambling >1, "Never gamble",ifelse(gambling ==1, "Gamble", "Not Found")))# reordering demosdata <- data %>%mutate(Gender_TEXT =factor(gen_TEXT, levels =c("Man", "Woman", "I identify as:")),pol_TEXT =factor(pol_TEXT, levels =c("Very Liberal", "Liberal", "Somewhat Liberal", "Moderate","Somewhat Conservative", "Conservative", "Very Conservative")),edu_TEXT =factor(edu_TEXT, levels =c("Some schooling, but no high school diploma or degree", "High school diploma or GED", "Some college, Technical degree, or Associates degree", "Bachelor's degree", "Graduate degree (Masters, PhD, etc)")),inc_TEXT =factor(inc_TEXT, levels =c("less than $25,000", "$25,000 - $49,999", "$50,000 - $74,999", "$75,000 - $99,999", "$100,000 - $149,999", "$150,000 - $199,999","more than $200,000")),political_group =factor(political_group, levels =c("Liberal", "Moderate", "Conservative")),White =factor(White, levels =c("White", "Non-White")),URM =factor(URM, levels =c("Non-URM", "URM")),gambling_TEXT =factor(gambling_TEXT, levels =c("Not at all", "A few times a year", "Once a month", "A few times a month", "Once a week or more")),gambling_binary =factor(gambling_binary, levels =c("Never gamble", "Gamble", "Not Found")), )
Data Quality
Charity Belief
Code
ggplot(data, aes(x = condition, y = charityRealYN)) +geom_violin() +geom_boxplot(width=0.1, fill="white") +theme_minimal() +labs(y ="Not at all to Extremely", x ="Condition", title ="Extent Participants Doubted the Charity was real")
Demographics
Code
# Subset your data frame to include only the demographic columnsdemo_data <- data[, c("gambling_TEXT", "gen_TEXT", "race_TEXT", "inc_TEXT", "edu_TEXT", "pol_TEXT", "pid_TEXT", "area_TEXT", "political_group", "White", "URM", "gambling_binary")]# Agemean(data$age, na.rm=TRUE)
[1] 37.67539
Code
sd(data$age, na.rm=TRUE)
[1] 11.61892
Code
# Loop through each demographic column and calculate frequency countsfreq_tables <-list()for (col innames(demo_data)) { { freq_table <-as.data.frame(table(demo_data[[col]])) freq_table$Percent <-round(freq_table$Freq /sum(freq_table$Freq) *100, 2) freq_tables[[col]] <- freq_table }}# Print the frequency tablesfor (i inseq_along(freq_tables)) {if (!is.null(freq_tables[[i]])) {cat("\nTable of frequencies for", names(freq_tables)[i], ":\n")print(freq_tables[[i]]) }}
Table of frequencies for gambling_TEXT :
Var1 Freq Percent
1 Not at all 101 53.16
2 A few times a year 63 33.16
3 Once a month 7 3.68
4 A few times a month 11 5.79
5 Once a week or more 8 4.21
Table of frequencies for gen_TEXT :
Var1 Freq Percent
1 I identify as: 1 0.52
2 Man 97 50.79
3 Woman 93 48.69
Table of frequencies for race_TEXT :
Var1 Freq Percent
1 American Indian and Native Alaskan 1 0.52
2 American Indian and Native Alaskan,White 2 1.05
3 Black 22 11.52
4 East Asian 6 3.14
5 East Asian,Southeast Asian 1 0.52
6 East Asian,White 1 0.52
7 Hispanic or Latino/a/x 8 4.19
8 Hispanic or Latino/a/x,White 3 1.57
9 Middle Eastern and North African 2 1.05
10 Middle Eastern and North African,White 3 1.57
11 South Asian 4 2.09
12 Southeast Asian 3 1.57
13 Southeast Asian,White 1 0.52
14 White 134 70.16
Table of frequencies for inc_TEXT :
Var1 Freq Percent
1 less than $25,000 28 14.66
2 $25,000 - $49,999 36 18.85
3 $50,000 - $74,999 50 26.18
4 $75,000 - $99,999 32 16.75
5 $100,000 - $149,999 20 10.47
6 $150,000 - $199,999 13 6.81
7 more than $200,000 12 6.28
Table of frequencies for edu_TEXT :
Var1 Freq Percent
1 Some schooling, but no high school diploma or degree 2 1.05
2 High school diploma or GED 32 16.75
3 Some college, Technical degree, or Associates degree 59 30.89
4 Bachelor's degree 71 37.17
5 Graduate degree (Masters, PhD, etc) 27 14.14
Table of frequencies for pol_TEXT :
Var1 Freq Percent
1 Very Liberal 32 16.75
2 Liberal 36 18.85
3 Somewhat Liberal 28 14.66
4 Moderate 44 23.04
5 Somewhat Conservative 24 12.57
6 Conservative 19 9.95
7 Very Conservative 8 4.19
Table of frequencies for pid_TEXT :
Var1 Freq Percent
1 Democrat 85 44.50
2 Independent / Other 63 32.98
3 Republican 43 22.51
Table of frequencies for area_TEXT :
Var1 Freq Percent
1 Rural 35 18.32
2 Suburban 101 52.88
3 Urban 55 28.80
Table of frequencies for political_group :
Var1 Freq Percent
1 Liberal 96 50.26
2 Moderate 44 23.04
3 Conservative 51 26.70
Table of frequencies for White :
Var1 Freq Percent
1 White 144 75.39
2 Non-White 47 24.61
Table of frequencies for URM :
Var1 Freq Percent
1 Non-URM 150 78.53
2 URM 41 21.47
Table of frequencies for gambling_binary :
Var1 Freq Percent
1 Never gamble 89 46.84
2 Gamble 101 53.16
3 Not Found 0 0.00
Demographic Plot
Code
# List of demographic columns to plotdemographic_columns <-c("gambling_TEXT", "gambling_binary", "gen_TEXT", "race_TEXT", "inc_TEXT", "edu_TEXT", "pol_TEXT", "pid_TEXT", "area_TEXT", "political_group", "White", "URM")# Function to create percent plotcreate_percent_plot <-function(data, column) {# Calculate the frequency and percentage for each category freq_table <- data %>%group_by(across(all_of(column))) %>% dplyr::summarise(Freq =n()) %>%mutate(Percent = Freq /sum(Freq) *100)# Create the plot p <-ggplot(freq_table, aes_string(x = column, y ="Percent", fill = column)) +geom_bar(stat ="identity", position ="dodge") +scale_y_continuous(labels = scales::percent_format(scale =1)) +labs(x = column, y ="Percentage", title =paste("Distribution of", column)) +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))return(p)}# Loop through demographic columns and plotplots <-lapply(demographic_columns, function(col) create_percent_plot(demo_data, col))# Display the plotsprint(plots)