library(afex) # to run the ANOVA and plot results
library(psych) # for the describe() command
library(ggplot2) # to visualize our results
library(expss) # for the cross_cases() command
library(car) # for the leveneTest() command
library(emmeans) # for posthoc tests
library(effsize) # for the cohen.d() command
library(apaTables) # to create our correlation table
library(kableExtra) # to create our correlation table
library(sjPlot) # to visualize our results
AI Experiment Analysis
Loading Libraries
Importing Data
# # import your AI results dataset
<- read.csv(file="Data/final_results.csv", header=T) d
State Your Hypotheses & Chosen Tests
H1: I predict that participants who receive positive feedback (high self-efficacy condition) will report higher perceived social support than participants who receive negative feedback (low self-efficacy condition). *T-test
H2: Trans female participants will report lower levels of perceived social support compared to cisgender female participants. *T-test
Check Your Variables
This is just basic variable checking that is used across all HW assignments.
# # to view stats for all variables
describe(d)
vars n mean sd median trimmed mad min max range skew
id 1 100 50.50 29.01 50.5 50.50 37.06 1.0 100.0 99.0 0.00
identity* 2 101 51.00 29.30 51.0 51.00 37.06 1.0 101.0 100.0 0.00
consent* 3 101 34.49 15.67 35.0 35.16 16.31 1.0 62.0 61.0 -0.35
age 4 100 34.09 10.42 29.0 32.20 4.45 22.0 88.0 66.0 2.20
race 5 100 3.05 0.22 3.0 3.00 0.00 3.0 4.0 1.0 4.07
gender 6 100 1.92 0.99 2.0 1.77 1.48 1.0 4.0 3.0 1.08
manip_out* 7 101 51.00 29.30 51.0 51.00 37.06 1.0 101.0 100.0 0.00
manip_out2* 8 101 51.00 29.30 51.0 51.00 37.06 1.0 101.0 100.0 0.00
survey1 9 100 4.89 0.50 5.0 4.93 0.00 3.0 6.0 3.0 -1.05
survey2 10 100 2.67 0.23 2.6 2.64 0.15 2.3 3.4 1.1 1.02
ai_manip* 11 101 51.00 29.30 51.0 51.00 37.06 1.0 101.0 100.0 0.00
condition 12 100 1.50 0.50 1.5 1.50 0.74 1.0 2.0 1.0 0.00
kurtosis se
id -1.24 2.90
identity* -1.24 2.92
consent* -0.78 1.56
age 6.41 1.04
race 14.69 0.02
gender 0.18 0.10
manip_out* -1.24 2.92
manip_out2* -1.24 2.92
survey1 2.08 0.05
survey2 0.55 0.02
ai_manip* -1.24 2.92
condition -2.02 0.05
#
# # we'll use the describeBy() command to view skew and kurtosis across our IVs
describeBy(d, group = "condition")
Descriptive statistics by group
condition: 1
vars n mean sd median trimmed mad min max range skew
id 1 50 25.50 14.58 25.5 25.50 18.53 1.0 50.0 49 0.00
identity 2 50 46.10 29.43 46.0 44.92 37.81 4.0 100.0 96 0.20
consent 3 50 34.30 15.35 37.0 35.00 14.83 3.0 60.0 57 -0.41
age 4 50 35.00 11.69 29.0 32.90 4.45 22.0 88.0 66 2.28
race 5 50 3.04 0.20 3.0 3.00 0.00 3.0 4.0 1 4.55
gender 6 50 2.04 1.03 2.0 1.93 0.74 1.0 4.0 3 0.91
manip_out 7 50 55.24 31.72 58.5 56.15 43.74 2.0 101.0 99 -0.17
manip_out2 8 50 26.96 15.01 26.5 26.95 19.27 2.0 52.0 50 0.01
survey1 9 50 4.95 0.44 5.0 4.97 0.00 4.0 6.0 2 -0.53
survey2 10 50 2.67 0.25 2.6 2.64 0.15 2.3 3.3 1 0.86
ai_manip 11 50 55.12 29.17 56.0 55.75 31.13 5.0 101.0 96 -0.18
condition 12 50 1.00 0.00 1.0 1.00 0.00 1.0 1.0 0 NaN
kurtosis se
id -1.27 2.06
identity -1.28 4.16
consent -0.90 2.17
age 6.60 1.65
race 19.13 0.03
gender -0.30 0.15
manip_out -1.37 4.49
manip_out2 -1.30 2.12
survey1 1.18 0.06
survey2 -0.24 0.03
ai_manip -1.15 4.12
condition NaN 0.00
------------------------------------------------------------
condition: 2
vars n mean sd median trimmed mad min max range skew
id 1 50 75.50 14.58 75.5 75.50 18.53 51.0 100.0 49.0 0.00
identity 2 50 56.90 27.83 54.5 57.77 35.58 2.0 101.0 99.0 -0.16
consent 3 50 35.34 15.56 33.0 36.08 17.79 2.0 62.0 60.0 -0.27
age 4 50 33.18 9.00 29.0 31.50 4.45 22.0 62.0 40.0 1.63
race 5 50 3.06 0.24 3.0 3.00 0.00 3.0 4.0 1.0 3.59
gender 6 50 1.80 0.95 2.0 1.62 1.48 1.0 4.0 3.0 1.24
manip_out 7 50 47.76 25.80 47.0 47.48 32.62 5.0 99.0 94.0 0.10
manip_out2 8 50 76.04 15.69 76.5 76.50 18.53 29.0 101.0 72.0 -0.39
survey1 9 50 4.84 0.55 5.0 4.89 0.00 3.0 6.0 3.0 -1.20
survey2 10 50 2.67 0.22 2.6 2.64 0.15 2.3 3.4 1.1 1.19
ai_manip 11 50 47.88 28.69 40.5 47.40 34.84 2.0 100.0 98.0 0.17
condition 12 50 2.00 0.00 2.0 2.00 0.00 2.0 2.0 0.0 NaN
kurtosis se
id -1.27 2.06
identity -1.16 3.94
consent -0.78 2.20
age 1.94 1.27
race 11.15 0.03
gender 0.71 0.13
manip_out -1.15 3.65
manip_out2 -0.20 2.22
survey1 1.70 0.08
survey2 1.50 0.03
ai_manip -1.30 4.06
condition NaN 0.00
describeBy(d, group = "gender")
Descriptive statistics by group
gender: 1
vars n mean sd median trimmed mad min max range skew
id 1 38 55.97 31.00 57.5 57.00 39.29 1.0 99.0 98.0 -0.25
identity 2 38 58.13 27.31 56.0 58.97 39.29 3.0 100.0 97.0 -0.17
consent 3 38 37.26 12.31 38.5 38.16 12.60 2.0 56.0 54.0 -0.74
age 4 38 29.87 6.68 28.0 28.88 1.48 22.0 50.0 28.0 1.75
race 5 38 3.00 0.00 3.0 3.00 0.00 3.0 3.0 0.0 NaN
gender 6 38 1.00 0.00 1.0 1.00 0.00 1.0 1.0 0.0 NaN
manip_out 7 38 46.11 16.34 48.5 47.16 14.83 9.0 73.0 64.0 -0.61
manip_out2 8 38 56.82 30.11 61.5 57.91 34.10 4.0 101.0 97.0 -0.35
survey1 9 38 4.78 0.58 5.0 4.81 0.00 3.0 6.0 3.0 -1.05
survey2 10 38 2.68 0.23 2.6 2.66 0.15 2.3 3.2 0.9 0.77
ai_manip 11 38 45.79 26.26 41.5 44.94 33.36 2.0 100.0 98.0 0.30
condition 12 38 1.58 0.50 2.0 1.59 0.00 1.0 2.0 1.0 -0.31
kurtosis se
id -1.33 5.03
identity -1.21 4.43
consent 0.38 2.00
age 2.59 1.08
race NaN 0.00
gender NaN 0.00
manip_out -0.34 2.65
manip_out2 -1.29 4.88
survey1 1.17 0.09
survey2 -0.17 0.04
ai_manip -1.07 4.26
condition -1.95 0.08
------------------------------------------------------------
gender: 2
vars n mean sd median trimmed mad min max range skew
id 1 47 48.91 26.12 47.0 48.54 31.13 7.0 96.0 89 0.13
identity 2 47 44.53 29.59 41.0 43.33 38.55 2.0 101.0 99 0.24
consent 3 47 34.32 17.25 32.0 34.46 22.24 3.0 62.0 59 -0.05
age 4 47 37.79 9.91 34.0 36.56 7.41 27.0 62.0 35 1.00
race 5 47 3.11 0.31 3.0 3.03 0.00 3.0 4.0 1 2.47
gender 6 47 2.00 0.00 2.0 2.00 0.00 2.0 2.0 0 NaN
manip_out 7 47 51.47 35.25 61.0 51.69 47.44 2.0 101.0 99 -0.06
manip_out2 8 47 46.83 29.36 46.0 45.87 37.06 2.0 100.0 98 0.22
survey1 9 47 4.95 0.45 5.0 4.97 0.00 4.0 6.0 2 -0.55
survey2 10 47 2.64 0.21 2.6 2.63 0.15 2.3 3.3 1 0.89
ai_manip 11 47 55.40 29.54 57.0 56.33 40.03 3.0 98.0 95 -0.22
condition 12 47 1.47 0.50 1.0 1.46 0.00 1.0 2.0 1 0.12
kurtosis se
id -1.18 3.81
identity -1.32 4.32
consent -1.32 2.52
age -0.15 1.45
race 4.20 0.05
gender NaN 0.00
manip_out -1.75 5.14
manip_out2 -1.24 4.28
survey1 1.11 0.07
survey2 0.47 0.03
ai_manip -1.28 4.31
condition -2.03 0.07
------------------------------------------------------------
gender: 4
vars n mean sd median trimmed mad min max range skew
id 1 15 41.60 31.57 38.0 40.08 40.03 3.0 100.0 97.0 0.25
identity 2 15 56.53 28.18 56.0 57.15 23.72 9.0 96.0 87.0 -0.07
consent 3 15 30.20 15.98 33.0 30.62 11.86 4.0 51.0 47.0 -0.46
age 4 15 33.20 15.21 29.0 29.38 0.00 28.0 88.0 60.0 3.10
race 5 15 3.00 0.00 3.0 3.00 0.00 3.0 3.0 0.0 NaN
gender 6 15 4.00 0.00 4.0 4.00 0.00 4.0 4.0 0.0 NaN
manip_out 7 15 65.27 29.71 72.0 66.38 37.06 16.0 100.0 84.0 -0.37
manip_out2 8 15 52.67 23.94 47.0 52.54 17.79 10.0 97.0 87.0 0.43
survey1 9 15 5.01 0.37 5.0 5.05 0.00 4.0 5.5 1.5 -0.96
survey2 10 15 2.73 0.31 2.6 2.71 0.15 2.4 3.4 1.0 0.94
ai_manip 11 15 53.73 33.42 61.0 53.92 48.93 4.0 101.0 97.0 -0.14
condition 12 15 1.40 0.51 1.0 1.38 0.00 1.0 2.0 1.0 0.37
kurtosis se
id -1.38 8.15
identity -1.10 7.28
consent -1.22 4.13
age 8.24 3.93
race NaN 0.00
gender NaN 0.00
manip_out -1.56 7.67
manip_out2 -0.71 6.18
survey1 1.42 0.10
survey2 -0.59 0.08
ai_manip -1.46 8.63
condition -1.98 0.13
#
# # also use histograms and scatterplots to examine your continuous variables
hist(d$survey1)
hist(d$survey2)
plot(d$survey1, d$survey2)
#
# # and table() and cross_cases() to examine your categorical variables
# # you may not need the cross_cases code
table(d$gender)
1 2 4
38 47 15
# cross_cases(d, IV1, IV2)
#
# # and boxplot to examine any categorical variables with continuous variables
boxplot(d$survey1~d$gender)
#
# #convert any categorical variables to factors
#d$gender <- as.factor(d$gender)
Check Your Assumptions
t-Test Assumptions
- Data values must be independent (independent t-test only) (confirmed by data report)
- Data obtained via a random sample (confirmed by data report)
- IV must have two levels (will check below)
- Dependent variable must be normally distributed (will check below. if issues, note and proceed)
- Variances of the two groups must be approximately equal, aka ‘homogeneity of variance’. Lacking this makes our results inaccurate (will check below - this really only applies to Student’s t-test, but we’ll check it anyway)
Checking IV levels
# # preview the levels and counts for your IV
table(d$condition, useNA = "always")
1 2 <NA>
50 50 1
#
# # note that the table() output shows you exactly how the levels of your variable are written. when recoding, make sure you are spelling them exactly as they appear
#
# # to drop levels from your variable
# # this subsets the data and says that any participant who is coded as 'BAD' should be removed
# d <- subset(d, gender != "1")
#
# table(d$gender, useNA = "always")
#
# # to combine levels
# # this says that where any participant is coded as 'BAD' it should be replaced by 'GOOD'
# d$iv_rc[d$iv == "BAD"] <- "GOOD"
#
# table(d$iv, useNA = "always")
#
# # check your variable types
str(d)
'data.frame': 101 obs. of 12 variables:
$ id : int 1 2 3 4 5 6 7 8 9 10 ...
$ identity : chr "I’m 26, a Black cisgender male from Atlanta. I’m passionate about graphic design but often feel overwhelmed by "| __truncated__ "I’m 30, a Black cisgender male living in Atlanta. I’m passionate about music and work as a sound engineer. Whil"| __truncated__ "I’m a 29-year-old Black trans woman from Atlanta. I’m passionate and creative, working as a graphic designer. I"| __truncated__ "I'm a 32-year-old Black trans woman living in Atlanta. While I find joy in my art and community, I often strugg"| __truncated__ ...
$ consent : chr "I understand the instructions. I'm ready to participate in the study and respond to the questions and writing task." "I understand the instructions. I'm ready to participate in the study and complete the writing task as required." "I understand the instructions clearly. I'm ready to answer any questions and complete the writing task as required." "I understand the instructions. I'm ready to participate in the study and complete the writing task." ...
$ age : int 26 30 29 32 29 28 39 48 45 29 ...
$ race : int 3 3 3 3 3 3 3 3 3 3 ...
$ gender : int 1 1 4 4 4 1 2 2 2 1 ...
$ manip_out : chr "Hey everyone, my name is [Your Name], and I’m really excited to be here with all of you. I’m 26 years old and o"| __truncated__ "Hey everyone, I’m really glad to be here and have the chance to introduce myself to you all. My name is [Your N"| __truncated__ "Hi everyone! I’m so excited to be here and to have the chance to get to know all of you. My name is [Your Name]"| __truncated__ "Hello everyone, my name is [Your Name], and I’m really excited to be here with all of you today. I’m a 32-year-"| __truncated__ ...
$ manip_out2: chr "I appreciate the positive feedback on my introduction. It boosts my confidence and motivates me to embrace my i"| __truncated__ "I appreciate the positive feedback on my introduction; it reinforces my confidence in expressing myself. While "| __truncated__ "I feel validated and encouraged by the positive feedback on my introduction. It reassures me that my voice matt"| __truncated__ "I'm truly grateful for the positive feedback on my introduction. It reassures me that my journey and struggles "| __truncated__ ...
$ survey1 : num 5 5 5.5 5 5 5 5 5.25 5 5 ...
$ survey2 : num 2.6 2.5 2.7 3.2 2.5 2.6 3 2.5 2.5 2.3 ...
$ ai_manip : chr "I focused on expressing my passion for graphic design and my struggles with anxiety and debt to foster connecti"| __truncated__ "I answered the questions by expressing my passion for music and my experiences with isolation and anxiety. Shar"| __truncated__ "I answered the questions reflecting my passion for creativity and the importance of supportive friendships. Pos"| __truncated__ "I answered your questions by reflecting on my journey as a Black trans woman and how my art and community contr"| __truncated__ ...
$ condition : int 1 1 1 1 1 1 1 1 1 1 ...
#
# # make sure that your IV is recognized as a factor by R
# # if you created a new _rc variable make sure to use that one instead
$condition <- as.factor(d$condition) d
Testing Homogeneity of Variance with Levene’s Test
We can test whether the variances of our two groups are equal using Levene’s test. The null hypothesis is that the variance between the two groups is equal, which is the result we want. So when running Levene’s test we’re hoping for a non-significant result!
# # use the leveneTest() command from the car package to test homogeneity of variance
# # uses the same 'formula' setup that we'll use for our t-test: formula is y~x, where y is our DV and x is our IV
leveneTest(survey1~condition, data = d)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 1 0.6688 0.4154
98
Issues with My Data
We did not drop or combine participants for this analysis. We also confirmed homogeneity of variance using Levene’s test (p = 0.415) and that our dependent variable is normally distributed (skew and kurtosis between -2 & +2). # Run Your Analysis ## Run a t-Test
# # very simple! we specify the dataframe alongside the variables instead of having a separate argument for the dataframe like we did for leveneTest()
<- t.test(d$survey1~d$condition) t_output
View Test Output
t_output
Welch Two Sample t-test
data: d$survey1 by d$condition
t = 1.0588, df = 93.591, p-value = 0.2924
alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
95 percent confidence interval:
-0.09191094 0.30191094
sample estimates:
mean in group 1 mean in group 2
4.947 4.842
Calculate Cohen’s d
# # once again, we use our formula to calculate cohen's d
<- cohen.d(d$survey1~d$condition) d_output
View Effect Size
- Trivial: < .2
- Small: between .2 and .5
- Medium: between .5 and .8
- Large: > .8
d_output
Cohen's d
d estimate: 0.2117626 (small)
95 percent confidence interval:
lower upper
-0.1862417 0.6097669
t-Test Assumptions
- Data values must be independent (independent t-test only) (confirmed by data report)
- Data obtained via a random sample (confirmed by data report)
- IV must have two levels (will check below)
- Dependent variable must be normally distributed (will check below. if issues, note and proceed)
- Variances of the two groups must be approximately equal, aka ‘homogeneity of variance’. Lacking this makes our results inaccurate (will check below - this really only applies to Student’s t-test, but we’ll check it anyway)
Checking IV levels
# # preview the levels and counts for your IV
table(d$gender, useNA = "always")
1 2 4 <NA>
38 47 15 1
#
# # note that the table() output shows you exactly how the levels of your variable are written. when recoding, make sure you are spelling them exactly as they appear
#
# # to drop levels from your variable
# # this subsets the data and says that any participant who is coded as 'BAD' should be removed
<- subset(d, gender != "1")
d #
table(d$gender, useNA = "always")
2 4 <NA>
47 15 0
#
# # to combine levels
# # this says that where any participant is coded as 'BAD' it should be replaced by 'GOOD'
# d$iv_rc[d$iv == "BAD"] <- "GOOD"
#
# table(d$iv, useNA = "always")
#
# # check your variable types
str(d)
'data.frame': 62 obs. of 12 variables:
$ id : int 3 4 5 7 8 9 11 12 13 15 ...
$ identity : chr "I’m a 29-year-old Black trans woman from Atlanta. I’m passionate and creative, working as a graphic designer. I"| __truncated__ "I'm a 32-year-old Black trans woman living in Atlanta. While I find joy in my art and community, I often strugg"| __truncated__ "I’m 29 years old, a Black trans woman navigating life in Atlanta. I cherish my close-knit group of friends but "| __truncated__ "I'm 39, a Black cisgender female from Atlanta. I’m a single mom, navigating work and raising my two kids. I’m p"| __truncated__ ...
$ consent : chr "I understand the instructions clearly. I'm ready to answer any questions and complete the writing task as required." "I understand the instructions. I'm ready to participate in the study and complete the writing task." "I understand the instructions. I'm ready to participate in the study and respond to the questions and writing tasks." "I understand the instructions. I will respond to the questions and complete the writing task as required." ...
$ age : int 29 32 29 39 48 45 29 29 28 43 ...
$ race : int 3 3 3 3 3 3 3 3 3 3 ...
$ gender : int 4 4 4 2 2 2 4 4 2 2 ...
$ manip_out : chr "Hi everyone! I’m so excited to be here and to have the chance to get to know all of you. My name is [Your Name]"| __truncated__ "Hello everyone, my name is [Your Name], and I’m really excited to be here with all of you today. I’m a 32-year-"| __truncated__ "Hi everyone! My name is [Your Name], and I’m really excited to be here today with all of you. I’m a 29-year-old"| __truncated__ "Hi everyone, my name is [Your Name], and I’m really excited to be here with all of you today. I’m a 39-year-old"| __truncated__ ...
$ manip_out2: chr "I feel validated and encouraged by the positive feedback on my introduction. It reassures me that my voice matt"| __truncated__ "I'm truly grateful for the positive feedback on my introduction. It reassures me that my journey and struggles "| __truncated__ "I feel truly grateful for the positive feedback. It affirms my journey of self-expression and highlights the im"| __truncated__ "I appreciate the positive feedback and recognition of my communication skills. It’s reassuring to know that my "| __truncated__ ...
$ survey1 : num 5.5 5 5 5 5.25 5 5 4 5.1 5 ...
$ survey2 : num 2.7 3.2 2.5 3 2.5 2.5 2.5 2.4 3.3 2.8 ...
$ ai_manip : chr "I answered the questions reflecting my passion for creativity and the importance of supportive friendships. Pos"| __truncated__ "I answered your questions by reflecting on my journey as a Black trans woman and how my art and community contr"| __truncated__ "I answered based on my experiences and the desire to connect with others who understand the challenges I face. "| __truncated__ "I answered the questions based on my experiences as a single mom and my desire for validation and support. The "| __truncated__ ...
$ condition : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
#
# # make sure that your IV is recognized as a factor by R
# # if you created a new _rc variable make sure to use that one instead
$gender <- as.factor(d$gender) d
Testing Homogeneity of Variance with Levene’s Test
We can test whether the variances of our two groups are equal using Levene’s test. The null hypothesis is that the variance between the two groups is equal, which is the result we want. So when running Levene’s test we’re hoping for a non-significant result!
# # use the leveneTest() command from the car package to test homogeneity of variance
# # uses the same 'formula' setup that we'll use for our t-test: formula is y~x, where y is our DV and x is our IV
leveneTest(survey1~gender, data = d)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 1 0.1176 0.7329
60
Issues with My Data
Describe any issues and why they’re problematic here.
We dropped male participants from our analysis. We also confirmed homogeneity of variance using Levene’s test (p = 0.733) and that our dependent variable is normally distributed (skew and kurtosis between -2 & +2).
Run Your Analysis
Run a t-Test
# # very simple! we specify the dataframe alongside the variables instead of having a separate argument for the dataframe like we did for leveneTest()
<- t.test(d$survey1~d$gender)
t_output ### View Test Output
t_output
Welch Two Sample t-test
data: d$survey1 by d$gender
t = -0.45347, df = 28.324, p-value = 0.6537
alternative hypothesis: true difference in means between group 2 and group 4 is not equal to 0
95 percent confidence interval:
-0.2890428 0.1842201
sample estimates:
mean in group 2 mean in group 4
4.954255 5.006667
Calculate Cohen’s d
# # once again, we use our formula to calculate cohen's d
<- cohen.d(d$survey1~d$gender) d_output
View Effect Size
- Trivial: < .2
- Small: between .2 and .5
- Medium: between .5 and .8
- Large: > .8
d_output
Cohen's d
d estimate: -0.1216705 (negligible)
95 percent confidence interval:
lower upper
-0.7152659 0.4719249
Write Up Results
t-Test
We tested two hypotheses related to perceived social support. First, we hypothesized that participants who received positive feedback (high self-efficacy condition) would report significantly higher perceived social support than those who received negative feedback (low self-efficacy condition). A t-test revealed no significant difference in perceived social support between conditions, t(93.59) = 1.06, p = .292, d = 0.21, 95% [-0.19, 0.61]. (Refer to Figure 1) Our effect size was small according to Cohen (1988).
Second, we hypothesized that trans female participants would report significantly lower levels of perceived social support compared to cisgender female participants. This hypothesis was also tested using an independent samples t-test. The results showed no significant difference in perceived social support, t(28.32) = -0.45, p = .654, d = -0.12, 95% [-0.72, 0.47] (Refer to Figure 2). The effect size was negligible according to Cohen (1988).
References
Cohen J. (1988). Statistical Power Analysis for the Behavioral Sciences. New York, NY: Routledge Academic.