AI Experiment Analysis

Loading Libraries

library(afex) # to run the ANOVA and plot results
library(psych) # for the describe() command
library(ggplot2) # to visualize our results
library(expss) # for the cross_cases() command
library(car) # for the leveneTest() command
library(emmeans) # for posthoc tests
library(effsize) # for the cohen.d() command
library(apaTables) # to create our correlation table
library(kableExtra) # to create our correlation table
library(sjPlot) # to visualize our results

Importing Data

# # import your AI results dataset
d <- read.csv(file="Data/Final_Results4.csv", header=T)

State Your Hypotheses & Chosen Tests

H1: I predict that people who spend more time on social media will perceive more stress compared to people who spend less time on social media.

To test this hypothesis, we will be running an independent samples t-test.

H2: I predict that people who spend more time on social media will have a lower satisfaction with life compare to people who spend less time on social media.

To test this hypothesis, we will be running an independent samples t-test.

Check Your Variables

This is just basic variable checking that is used across all HW assignments.

# # to view stats for all variables
describe(d)
           vars   n  mean    sd median trimmed   mad   min   max range  skew
id            1 100 50.50 29.01  50.50   50.50 37.06  1.00 100.0 99.00  0.00
identity*     2 100 50.50 29.01  50.50   50.50 37.06  1.00 100.0 99.00  0.00
consent*      3 100  8.11  1.87   8.00    8.30  0.00  1.00  11.0 10.00 -1.37
age           4 100 32.54 11.80  29.00   30.23  7.41 19.00  80.0 61.00  2.04
race          5 100  3.83  1.23   3.00    3.69  0.00  2.00   6.0  4.00  0.96
gender        6 100  1.90  0.30   2.00    2.00  0.00  1.00   2.0  1.00 -2.63
manip_out*    7 100 50.50 29.01  50.50   50.50 37.06  1.00 100.0 99.00  0.00
swb           8 100  3.21  0.26   3.17    3.16  0.25  2.83   4.0  1.17  1.36
stress        9 100  3.54  0.17   3.60    3.57  0.15  3.00   3.8  0.80 -1.12
ai_manip*    10 100 50.50 29.01  50.50   50.50 37.06  1.00 100.0 99.00  0.00
condition    11 100  1.50  0.50   1.50    1.50  0.74  1.00   2.0  1.00  0.00
           kurtosis   se
id            -1.24 2.90
identity*     -1.24 2.90
consent*       3.49 0.19
age            4.37 1.18
race          -0.66 0.12
gender         4.95 0.03
manip_out*    -1.24 2.90
swb            1.18 0.03
stress         0.60 0.02
ai_manip*     -1.24 2.90
condition     -2.02 0.05
# 
# # we'll use the describeBy() command to view skew and kurtosis across our IVs
describeBy(d, group = d$condition)

 Descriptive statistics by group 
group: 1
          vars  n  mean    sd median trimmed   mad   min    max range  skew
id           1 50 25.50 14.58  25.50   25.50 18.53  1.00  50.00  49.0  0.00
identity     2 50 43.84 27.24  41.50   42.83 31.88  1.00 100.00  99.0  0.23
consent      3 50  7.80  1.86   8.00    8.05  0.00  2.00  11.00   9.0 -1.39
age          4 50 31.94 12.56  29.00   29.25  7.41 19.00  80.00  61.0  2.04
race         5 50  3.98  1.27   3.00    3.85  0.00  3.00   6.00   3.0  0.80
gender       6 50  1.92  0.27   2.00    2.00  0.00  1.00   2.00   1.0 -3.00
manip_out    7 50 74.94 15.38  75.50   75.17 18.53 48.00 100.00  52.0 -0.11
swb          8 50  3.18  0.21   3.17    3.15  0.25  2.83   3.83   1.0  1.13
stress       9 50  3.55  0.16   3.60    3.57  0.15  3.10   3.70   0.6 -1.10
ai_manip    10 50 47.94 31.00  50.50   47.30 42.25  1.00  98.00  97.0  0.11
condition   11 50  1.00  0.00   1.00    1.00  0.00  1.00   1.00   0.0   NaN
          kurtosis   se
id           -1.27 2.06
identity     -1.05 3.85
consent       2.60 0.26
age           3.90 1.78
race         -1.14 0.18
gender        7.17 0.04
manip_out    -1.21 2.18
swb           0.68 0.03
stress        0.24 0.02
ai_manip     -1.50 4.38
condition      NaN 0.00
------------------------------------------------------------ 
group: 2
          vars  n  mean    sd median trimmed   mad   min   max range  skew
id           1 50 75.50 14.58  75.50   75.50 18.53 51.00 100.0 49.00  0.00
identity     2 50 57.16 29.46  60.50   58.30 37.81  2.00  99.0 97.00 -0.28
consent      3 50  8.42  1.85   8.00    8.55  0.00  1.00  11.0 10.00 -1.42
age          4 50 33.14 11.09  29.50   31.23  6.67 21.00  79.0 58.00  2.02
race         5 50  3.68  1.19   3.00    3.52  0.00  2.00   6.0  4.00  1.13
gender       6 50  1.88  0.33   2.00    1.98  0.00  1.00   2.0  1.00 -2.27
manip_out    7 50 26.06 15.65  25.50   25.50 18.53  1.00  61.0 60.00  0.26
swb          8 50  3.23  0.29   3.17    3.18  0.25  2.83   4.0  1.17  1.27
stress       9 50  3.54  0.18   3.60    3.57  0.15  3.00   3.8  0.80 -1.09
ai_manip    10 50 53.06 26.95  51.00   53.75 25.20  2.00 100.0 98.00 -0.08
condition   11 50  2.00  0.00   2.00    2.00  0.00  2.00   2.0  0.00   NaN
          kurtosis   se
id           -1.27 2.06
identity     -1.26 4.17
consent       4.54 0.26
age           4.71 1.57
race         -0.13 0.17
gender        3.21 0.05
manip_out    -0.83 2.21
swb           0.52 0.04
stress        0.67 0.03
ai_manip     -0.91 3.81
condition      NaN 0.00
# 
# # also use histograms and scatterplots to examine your continuous variables
hist(d$swb)

hist(d$gender)

plot(d$swb, d$stress)

# 
# # and table() and cross_cases() to examine your categorical variables
# # you may not need the cross_cases code
table(d$condition)

 1  2 
50 50 
table(d$race)

 2  3  4  6 
 2 57 19 22 
table(d$age)

19 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 38 40 42 44 45 46 49 54 57 
 1  4  6  3  7  6  2  6  9  7  9  4  5  1  8  2  1  2  2  2  1  1  1  1  2  1 
60 62 66 79 80 
 1  2  1  1  1 
# cross_cases(d, gender, age)
# 
# # and boxplot to examine any categorical variables with continuous variables
boxplot(d$swb~d$condition)

boxplot(d$stress~d$condition)

# 
# #convert any categorical variables to factors
d$condition <- as.factor(d$condition)

Check Your Assumptions

t-Test1 Assumptions

  • Data values must be independent (independent t-test only) (confirmed by data report)
  • Data obtained via a random sample (confirmed by data report)
  • IV must have two levels (will check below)
  • Dependent variable must be normally distributed (will check below. if issues, note and proceed)
  • Variances of the two groups must be approximately equal, aka ‘homogeneity of variance’. Lacking this makes our results inaccurate (will check below - this really only applies to Student’s t-test, but we’ll check it anyway)

Checking IV levels

# # preview the levels and counts for your IV
table(d$condition, useNA = "always")

   1    2 <NA> 
  50   50    0 
# 
# # note that the table() output shows you exactly how the levels of your variable are written. when recoding, make sure you are spelling them exactly as they appear
# 
# # to drop levels from your variable
# # this subsets the data and says that any participant who is coded as 'BAD' should be removed
# d <- subset(d, condition != "BAD")
# 
# table(d$iv, useNA = "always")
# 
# # to combine levels
# # this says that where any participant is coded as 'BAD' it should be replaced by 'GOOD'
# d$iv_rc[d$iv == "BAD"] <- "GOOD"
# 
# table(d$iv, useNA = "always")
# 
# # check your variable types
str(d)
'data.frame':   100 obs. of  11 variables:
 $ id       : int  1 2 3 4 5 6 7 8 9 10 ...
 $ identity : chr  "I’m a 31-year-old Black student at Indiana University, pursuing a degree in psychology. I often feel isolated a"| __truncated__ "I'm a 24-year-old White university student from Indiana. I often feel overwhelmed balancing my studies in psych"| __truncated__ "I’m Jamal, a 24-year-old Black man from Indianapolis. I’m pursuing a degree in social work to help those in nee"| __truncated__ "I'm a 54-year-old Black woman from Indiana, studying social work. I often feel overwhelmed balancing school, wo"| __truncated__ ...
 $ consent  : chr  "I understand the instructions and conditions for the study." "I understand the instructions and conditions for the study." "I understand the instructions and conditions for the study." "I understand these instructions and conditions." ...
 $ age      : int  31 24 24 54 62 24 30 34 32 28 ...
 $ race     : int  3 6 3 3 3 3 6 4 3 3 ...
 $ gender   : int  1 2 1 2 2 1 1 2 2 2 ...
 $ manip_out: chr  "During the seven days with the flip phone, I'll rely on my laptop for essential tasks like managing assignments"| __truncated__ "Using a flip phone for seven days feels daunting, especially since I rely on my smartphone for organization and"| __truncated__ "Using a flip phone for a week will be a significant change for me. I’ll rely on my tablet and laptop for screen"| __truncated__ "Using a flip phone for seven days will be challenging, given my reliance on my smartphone for studies and stayi"| __truncated__ ...
 $ swb      : num  3.17 3 3.5 3.5 3 ...
 $ stress   : num  3.6 3.6 3.3 3.2 3.7 3.7 3.2 3.2 3.7 3.5 ...
 $ ai_manip : chr  "I answered the questions based on my desire to manage anxiety and isolation while finding healthier coping mech"| __truncated__ "I answered the questions based on my reliance on technology for organization and social interaction, reflecting"| __truncated__ "I answered the questions based on my struggles with balancing school and work while feeling lonely. I aimed to "| __truncated__ "I answered based on my experiences balancing school, work, and parenting, highlighting my reliance on technolog"| __truncated__ ...
 $ condition: Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
# 
# # make sure that your IV is recognized as a factor by R
# # if you created a new _rc variable make sure to use that one instead
# d$iv <- as.factor(d$iv)

Testing Homogeneity of Variance with Levene’s Test

We can test whether the variances of our two groups are equal using Levene’s test. The null hypothesis is that the variance between the two groups is equal, which is the result we want. So when running Levene’s test we’re hoping for a non-significant result!

# # use the leveneTest() command from the car package to test homogeneity of variance
# # uses the same 'formula' setup that we'll use for our t-test: formula is y~x, where y is our DV and x is our IV
leveneTest(swb~condition, data = d)
Levene's Test for Homogeneity of Variance (center = median)
      Df F value Pr(>F)
group  1  1.1889 0.2782
      98               

t-Test2 Assumptions

  • Data values must be independent (independent t-test only) (confirmed by data report)
  • Data obtained via a random sample (confirmed by data report)
  • IV must have two levels (will check below)
  • Dependent variable must be normally distributed (will check below. if issues, note and proceed)
  • Variances of the two groups must be approximately equal, aka ‘homogeneity of variance’. Lacking this makes our results inaccurate (will check below - this really only applies to Student’s t-test, but we’ll check it anyway)

Checking IV levels

# # preview the levels and counts for your IV
table(d$condition, useNA = "always")

   1    2 <NA> 
  50   50    0 
# 
# # note that the table() output shows you exactly how the levels of your variable are written. when recoding, make sure you are spelling them exactly as they appear
# 
# # to drop levels from your variable
# # this subsets the data and says that any participant who is coded as 'BAD' should be removed
# d <- subset(d, condition != "BAD")
# 
# table(d$iv, useNA = "always")
# 
# # to combine levels
# # this says that where any participant is coded as 'BAD' it should be replaced by 'GOOD'
# d$iv_rc[d$iv == "BAD"] <- "GOOD"
# 
# table(d$iv, useNA = "always")
# 
# # check your variable types
str(d)
'data.frame':   100 obs. of  11 variables:
 $ id       : int  1 2 3 4 5 6 7 8 9 10 ...
 $ identity : chr  "I’m a 31-year-old Black student at Indiana University, pursuing a degree in psychology. I often feel isolated a"| __truncated__ "I'm a 24-year-old White university student from Indiana. I often feel overwhelmed balancing my studies in psych"| __truncated__ "I’m Jamal, a 24-year-old Black man from Indianapolis. I’m pursuing a degree in social work to help those in nee"| __truncated__ "I'm a 54-year-old Black woman from Indiana, studying social work. I often feel overwhelmed balancing school, wo"| __truncated__ ...
 $ consent  : chr  "I understand the instructions and conditions for the study." "I understand the instructions and conditions for the study." "I understand the instructions and conditions for the study." "I understand these instructions and conditions." ...
 $ age      : int  31 24 24 54 62 24 30 34 32 28 ...
 $ race     : int  3 6 3 3 3 3 6 4 3 3 ...
 $ gender   : int  1 2 1 2 2 1 1 2 2 2 ...
 $ manip_out: chr  "During the seven days with the flip phone, I'll rely on my laptop for essential tasks like managing assignments"| __truncated__ "Using a flip phone for seven days feels daunting, especially since I rely on my smartphone for organization and"| __truncated__ "Using a flip phone for a week will be a significant change for me. I’ll rely on my tablet and laptop for screen"| __truncated__ "Using a flip phone for seven days will be challenging, given my reliance on my smartphone for studies and stayi"| __truncated__ ...
 $ swb      : num  3.17 3 3.5 3.5 3 ...
 $ stress   : num  3.6 3.6 3.3 3.2 3.7 3.7 3.2 3.2 3.7 3.5 ...
 $ ai_manip : chr  "I answered the questions based on my desire to manage anxiety and isolation while finding healthier coping mech"| __truncated__ "I answered the questions based on my reliance on technology for organization and social interaction, reflecting"| __truncated__ "I answered the questions based on my struggles with balancing school and work while feeling lonely. I aimed to "| __truncated__ "I answered based on my experiences balancing school, work, and parenting, highlighting my reliance on technolog"| __truncated__ ...
 $ condition: Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
# 
# # make sure that your IV is recognized as a factor by R
# # if you created a new _rc variable make sure to use that one instead
# d$iv <- as.factor(d$iv)

Testing Homogeneity of Variance with Levene’s Test

We can test whether the variances of our two groups are equal using Levene’s test. The null hypothesis is that the variance between the two groups is equal, which is the result we want. So when running Levene’s test we’re hoping for a non-significant result!

# # use the leveneTest() command from the car package to test homogeneity of variance
# # uses the same 'formula' setup that we'll use for our t-test: formula is y~x, where y is our DV and x is our IV
leveneTest(stress~condition, data = d)
Levene's Test for Homogeneity of Variance (center = median)
      Df F value Pr(>F)
group  1  0.6003 0.4403
      98               

Issues with My Data

The study used participants generated by AI to get an idea of how this study would interact with the general public. It is not an ideal set of participants due to no variety in the majors of these students.

There were no issues in our data within the two t-tests used. No participants were dropped from this study. However, the plot between social well-being and stress does not have pattern coinciding with a “good” plot and no red line can be accurately drawn though the points.

Run Your Analysis

Run a t-Test1

# # very simple! we specify the dataframe alongside the variables instead of having a separate argument for the dataframe like we did for leveneTest()
t_output1 <- t.test(d$swb~d$condition)

View Test Output

t_output1

    Welch Two Sample t-test

data:  d$swb by d$condition
t = -0.97807, df = 89.833, p-value = 0.3307
alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
95 percent confidence interval:
 -0.15156379  0.05156379
sample estimates:
mean in group 1 mean in group 2 
       3.183333        3.233333 

Calculate Cohen’s d

# # once again, we use our formula to calculate cohen's d
d_output1 <- cohen.d(d$swb~d$condition)

View Effect Size

  • Trivial: < .2
  • Small: between .2 and .5
  • Medium: between .5 and .8
  • Large: > .8
d_output1

Cohen's d

d estimate: -0.1956135 (negligible)
95 percent confidence interval:
    lower     upper 
-0.593455  0.202228 

Run a t-Test2

# # very simple! we specify the dataframe alongside the variables instead of having a separate argument for the dataframe like we did for leveneTest()
t_output2 <- t.test(d$stress~d$condition)

View Test Output

t_output2

    Welch Two Sample t-test

data:  d$stress by d$condition
t = 0.11695, df = 97.215, p-value = 0.9071
alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
95 percent confidence interval:
 -0.06388016  0.07188016
sample estimates:
mean in group 1 mean in group 2 
          3.546           3.542 

Calculate Cohen’s d

# # once again, we use our formula to calculate cohen's d
d_output2 <- cohen.d(d$stress~d$condition)

View Effect Size

  • Trivial: < .2
  • Small: between .2 and .5
  • Medium: between .5 and .8
  • Large: > .8
d_output2

Cohen's d

d estimate: 0.02339025 (negligible)
95 percent confidence interval:
     lower      upper 
-0.3735168  0.4202973 

Write Up Results

t-Test1

We tested our hypothesis of people who spend more time on social media will perceive more stress compared to people who spend less time on social media using an independent sample t-test. Our data met all the assumptions of a t-test. However, we did not find a significant difference, t(89.83) = -0.98, p = 0.33, d = -0.20, 95% [-0.59, 0.20]. Our effect size was trivial according to Cohen (1988).

t-Test 2

We tested our hypothesis that people who spend more time on social media will have a lower satisfaction with life compare to people who spend less time on social media. Our data met all the assumptions of a t-test. However, we did not find a significant difference, t(97.22) = 0.11, p = 0.91, d = 0.02, 95% [0.37, 0.42]. Our effect size was trivial according to Cohen (1988).

References

Cohen J. (1988). Statistical Power Analysis for the Behavioral Sciences. New York, NY: Routledge Academic.