# Step 0: Save this file to your workspace. When you do, it should prompt
# you to install any packages that you need. If it doesn't, make a small change
# to the file (e.g., add a comment) and save again. If that still doesn't work,
# please email me right away!
library(psych)
library(DT)
library(ggpubr)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Loading required package: magrittr
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
library(effsize)
##
## Attaching package: 'effsize'
## The following object is masked from 'package:psych':
##
## cohen.d
# Step 1: Load the .csv data file(s)
# In this example, the data has already been cleaned and organized. You
# can use R to clean and organize data, but there's a pretty steep learning
# curve, so we'll do all data cleaning in Excel/Google Sheets.
data <- read.csv(file="socsupport.csv", header = T)
data2 <- read.csv(file="socsupport2.csv", header = T)
head(data)
## X gender age country marital livewith employment
## 1 1 male 21-24 australia other partner employed part-time
## 2 2 female 21-24 australia single partner parental support
## 3 3 male 21-24 australia single residences employed part-time
## 4 4 male 18-20 australia single parents employed part-time
## 5 5 female 21-24 australia single friends employed part-time
## 6 6 female 21-24 australia single friends govt assistance
## firstyr enrolment emotional emotionalsat tangible tangiblesat affect
## 1 other full-time 22 23 17 18 15
## 2 other full-time 21 20 12 10 10
## 3 other full-time 21 18 16 16 15
## 4 first year full-time 19 19 20 17 11
## 5 other full-time 16 19 11 15 6
## 6 other full-time 20 17 16 15 12
## affectsat psi psisat esupport psupport supsources BDI
## 1 15 12 13 13 11 13 5
## 2 6 9 6 12 7 10 8
## 3 15 13 12 14 13 14 16
## 4 11 13 12 15 15 15 0
## 5 10 11 12 9 7 9 9
## 6 14 12 11 13 12 13 0
str(data)
## 'data.frame': 95 obs. of 21 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ gender : Factor w/ 2 levels "female","male": 2 1 2 2 1 1 1 1 1 2 ...
## $ age : Factor w/ 5 levels "18-20","21-24",..: 2 2 2 1 2 2 3 3 1 5 ...
## $ country : Factor w/ 2 levels "australia","other": 1 1 1 1 1 1 1 1 1 1 ...
## $ marital : Factor w/ 3 levels "married","other",..: 2 3 3 3 3 3 1 1 3 2 ...
## $ livewith : Factor w/ 6 levels "alone","friends",..: 5 5 6 4 2 2 5 5 4 1 ...
## $ employment : Factor w/ 5 levels "employed fulltime",..: 2 5 2 2 2 3 2 2 2 2 ...
## $ firstyr : Factor w/ 2 levels "first year","other": 2 2 2 1 2 2 2 2 2 2 ...
## $ enrolment : Factor w/ 3 levels "","full-time",..: 2 2 2 2 2 2 2 3 2 2 ...
## $ emotional : int 22 21 21 19 16 20 20 20 24 13 ...
## $ emotionalsat: int 23 20 18 19 19 17 23 20 25 18 ...
## $ tangible : int 17 12 16 20 11 16 20 16 20 6 ...
## $ tangiblesat : int 18 10 16 17 15 15 20 16 20 14 ...
## $ affect : int 15 10 15 11 6 12 14 12 15 6 ...
## $ affectsat : int 15 6 15 11 10 14 15 12 15 12 ...
## $ psi : int 12 9 13 13 11 12 15 12 12 6 ...
## $ psisat : int 13 6 12 12 12 11 15 12 15 11 ...
## $ esupport : int 13 12 14 15 9 13 15 13 16 10 ...
## $ psupport : int 11 7 13 15 7 12 10 11 NA 8 ...
## $ supsources : int 13 10 14 15 9 13 13 11 NA 9 ...
## $ BDI : int 5 8 16 0 9 0 1 14 12 20 ...
head(data2)
## X gender emotional
## 1 1 female 17.77
## 2 2 female 15.72
## 3 3 male 20.86
## 4 4 female 16.92
## 5 5 male 18.77
## 6 6 male 14.49
str(data2)
## 'data.frame': 100 obs. of 3 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ gender : Factor w/ 2 levels "female","male": 1 1 2 1 2 2 2 2 2 2 ...
## $ emotional: num 17.8 15.7 20.9 16.9 18.8 ...
# Once you've loaded the data, it should show up to the right in the
# environment window. If you click on it, it will open it in a
# viewer-friendly spreadsheet format. Note that you can't edit the data
# in this window the way you can in a spreadsheet.
#############
# Step 2: Calculate descriptives
# Use the 'describe' function in the psych package to calculate descriptive
# statistics for the file, and then use the kable package to display
# the statistics in a friendly way. You can use the popout button to
# display the table in a new tab/window.
data_descriptives <- describe(data)
datatable(data_descriptives) %>%
formatRound(1:13, 2)
data2_descriptives <- describe(data2)
datatable(data2_descriptives) %>%
formatRound(1:13, 2)
# Variables with an asterisk (*) are categorical variables (called 'factors')
# in R. To calculate the values displayed in the table, R turns the levels
# of the variable into numbers (also called 'dummy coding'). For instance, the
# variable gender is recoded so that that male = 0 and female = 1. To view
# categorical variables correctly, use the 'table' command below, modifying
# as necessary.
table(data$gender) #table() is the command; inside the parentheses, you enter
##
## female male
## 71 24
# what you want to be displayed. In this case, this is the
# 'gender' variable from the dataframe 'data', which is
# written as data$gender
#############
# Step 3: Check for outliers and homogenity of variance.
# Checking for univariate outliers and dropping them from the dataframe. The
# original dataset is saved as 'olddata'.
outliercheckz <- data.frame(data$X, scale(data$emotional, center = T, scale = T)) #converting to z-score
outliercheckz[outliercheckz[2] >= 3 | outliercheckz[2] <= -3] #display cases with z-scores > |3|
## [1] 36.000000 -3.558855
olddata <- data #backup original dataframe
data <- subset(data, X != 36) #filter out indicated cases
outliercheckz <- data.frame(data$X, scale(data$emotionalsat, center = T, scale = T)) #converting to z-score
outliercheckz[outliercheckz[2] >= 3 | outliercheckz[2] <= -3] #display cases with z-scores > |3|
## [1] NA NA
leveneTest(emotional ~ gender, data=data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.4613 0.4987
## 92
leveneTest(emotional ~ gender, data=data2)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.0015 0.9693
## 98
leveneTest(emotionalsat ~ gender, data=data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.3051 0.582
## 91
#############
# Step 4: Run your t-test
# Independent samples t-test with binary x
t.test(emotional ~ gender, data=data, var.equal=T)
##
## Two Sample t-test
##
## data: emotional by gender
## t = 2.6955, df = 92, p-value = 0.008357
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.5727707 3.7799544
## sample estimates:
## mean in group female mean in group male
## 19.91549 17.73913
cohen.d(emotional ~ gender, data=data)
##
## Cohen's d
##
## d estimate: 0.6467052 (medium)
## 95 percent confidence interval:
## lower upper
## 0.1610781 1.1323323
# Independent samples t-test with continuous x
t.test(data$emotional, data2$emotional, var.equal=T)
##
## Two Sample t-test
##
## data: data$emotional and data2$emotional
## t = 7.5279, df = 192, p-value = 1.954e-12
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 2.626111 4.490846
## sample estimates:
## mean of x mean of y
## 19.38298 15.82450
cohen.d(data$emotional, data2$emotional)
##
## Cohen's d
##
## d estimate: 1.081455 (large)
## 95 percent confidence interval:
## lower upper
## 0.7781122 1.3847976
# Paired samples t-test with continuous x
t.test(data$emotional, data$emotionalsat, var.equal=T)
##
## Two Sample t-test
##
## data: data$emotional and data$emotionalsat
## t = -1.3258, df = 185, p-value = 0.1866
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.5619732 0.3064253
## sample estimates:
## mean of x mean of y
## 19.38298 20.01075
cohen.d(data$emotional, data$emotionalsat)
##
## Cohen's d
##
## d estimate: NA (NA)
## 95 percent confidence interval:
## lower upper
## NA NA
#############
# Step 5: Take notes of your results so you can pull them up easily later
# Example: Independent samples t-test with binary x
# This analysis compared men and women's emotional support availability (ESA)
# There were 71 women and 23 men
# ESA M = 19.24, SD = 3.72
# One outlier was detected and removed
# Equal variance was confirmed
# T-test: t(92) = 2.70, p = .008, d = .65
# Women reported significantly higher ESA (M = 19.91) than men (17.74)
#############
# Step 6: Visualize your results
ggboxplot(data, x = "gender", y = "emotional", color = "gender", add = "jitter", shape = "gender",
xlab = "(Predictor Variable)",
ylab = "(Outcome Variable)",
title = "(Plot Title)")

ggdensity(data, x = "emotional", color = "gender", add="mean",
xlab = "(Predictor Variable)",
ylab = "(Outcome Variable)",
title = "(Plot Title)")

ggdotplot(data=data, x = "gender", y = "emotional", binwidth = .5, add = "mean_ci",
xlab = "(Predictor Variable)",
ylab = "(Outcome Variable)",
title = "(Plot Title)")

ggscatterhist(data, x = "emotional", y = "emotionalsat",
xlab = "Emotional Support Availability",
ylab = "Emotional Support Satisfaction",
title = "(Plot Title)")
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_density).
