# Step 0: Save this file to your workspace. When you do, it should prompt
# you to install any packages that you need. If it doesn't, make a small change
# to the file (e.g., add a comment) and save again. If that still doesn't work,
# please email me right away!

library(psych)
library(DT)
library(ggpubr)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## Loading required package: magrittr
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
library(effsize)
## 
## Attaching package: 'effsize'
## The following object is masked from 'package:psych':
## 
##     cohen.d
# Step 1: Load the .csv data file(s)

# In this example, the data has already been cleaned and organized. You
# can use R to clean and organize data, but there's a pretty steep learning
# curve, so we'll do all data cleaning in Excel/Google Sheets.

data <- read.csv(file="socsupport.csv", header = T)
data2 <- read.csv(file="socsupport2.csv", header = T)
head(data)
##   X gender   age   country marital   livewith         employment
## 1 1   male 21-24 australia   other    partner employed part-time
## 2 2 female 21-24 australia  single    partner   parental support
## 3 3   male 21-24 australia  single residences employed part-time
## 4 4   male 18-20 australia  single    parents employed part-time
## 5 5 female 21-24 australia  single    friends employed part-time
## 6 6 female 21-24 australia  single    friends    govt assistance
##      firstyr enrolment emotional emotionalsat tangible tangiblesat affect
## 1      other full-time        22           23       17          18     15
## 2      other full-time        21           20       12          10     10
## 3      other full-time        21           18       16          16     15
## 4 first year full-time        19           19       20          17     11
## 5      other full-time        16           19       11          15      6
## 6      other full-time        20           17       16          15     12
##   affectsat psi psisat esupport psupport supsources BDI
## 1        15  12     13       13       11         13   5
## 2         6   9      6       12        7         10   8
## 3        15  13     12       14       13         14  16
## 4        11  13     12       15       15         15   0
## 5        10  11     12        9        7          9   9
## 6        14  12     11       13       12         13   0
str(data)
## 'data.frame':    95 obs. of  21 variables:
##  $ X           : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ gender      : Factor w/ 2 levels "female","male": 2 1 2 2 1 1 1 1 1 2 ...
##  $ age         : Factor w/ 5 levels "18-20","21-24",..: 2 2 2 1 2 2 3 3 1 5 ...
##  $ country     : Factor w/ 2 levels "australia","other": 1 1 1 1 1 1 1 1 1 1 ...
##  $ marital     : Factor w/ 3 levels "married","other",..: 2 3 3 3 3 3 1 1 3 2 ...
##  $ livewith    : Factor w/ 6 levels "alone","friends",..: 5 5 6 4 2 2 5 5 4 1 ...
##  $ employment  : Factor w/ 5 levels "employed fulltime",..: 2 5 2 2 2 3 2 2 2 2 ...
##  $ firstyr     : Factor w/ 2 levels "first year","other": 2 2 2 1 2 2 2 2 2 2 ...
##  $ enrolment   : Factor w/ 3 levels "","full-time",..: 2 2 2 2 2 2 2 3 2 2 ...
##  $ emotional   : int  22 21 21 19 16 20 20 20 24 13 ...
##  $ emotionalsat: int  23 20 18 19 19 17 23 20 25 18 ...
##  $ tangible    : int  17 12 16 20 11 16 20 16 20 6 ...
##  $ tangiblesat : int  18 10 16 17 15 15 20 16 20 14 ...
##  $ affect      : int  15 10 15 11 6 12 14 12 15 6 ...
##  $ affectsat   : int  15 6 15 11 10 14 15 12 15 12 ...
##  $ psi         : int  12 9 13 13 11 12 15 12 12 6 ...
##  $ psisat      : int  13 6 12 12 12 11 15 12 15 11 ...
##  $ esupport    : int  13 12 14 15 9 13 15 13 16 10 ...
##  $ psupport    : int  11 7 13 15 7 12 10 11 NA 8 ...
##  $ supsources  : int  13 10 14 15 9 13 13 11 NA 9 ...
##  $ BDI         : int  5 8 16 0 9 0 1 14 12 20 ...
head(data2)
##   X gender emotional
## 1 1 female     17.77
## 2 2 female     15.72
## 3 3   male     20.86
## 4 4 female     16.92
## 5 5   male     18.77
## 6 6   male     14.49
str(data2)
## 'data.frame':    100 obs. of  3 variables:
##  $ X        : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ gender   : Factor w/ 2 levels "female","male": 1 1 2 1 2 2 2 2 2 2 ...
##  $ emotional: num  17.8 15.7 20.9 16.9 18.8 ...
# Once you've loaded the data, it should show up to the right in the
# environment window. If you click on it, it will open it in a 
# viewer-friendly spreadsheet format. Note that you can't edit the data
# in this window the way you can in a spreadsheet.

#############

# Step 2: Calculate descriptives

# Use the 'describe' function in the psych package to calculate descriptive
# statistics for the file, and then use the kable package to display
# the statistics in a friendly way. You can use the popout button to
# display the table in a new tab/window.

data_descriptives <- describe(data)
datatable(data_descriptives) %>%
  formatRound(1:13, 2)
data2_descriptives <- describe(data2)
datatable(data2_descriptives) %>%
  formatRound(1:13, 2)
# Variables with an asterisk (*) are categorical variables (called 'factors')
# in R. To calculate the values displayed in the table, R turns the levels
# of the variable into numbers (also called 'dummy coding'). For instance, the
# variable gender is recoded so that that male = 0 and female = 1. To view 
# categorical variables correctly, use the 'table' command below, modifying
# as necessary.

table(data$gender) #table() is the command; inside the parentheses, you enter
## 
## female   male 
##     71     24
                    # what you want to be displayed. In this case, this is the
                    # 'gender' variable from the dataframe 'data', which is
                    # written as data$gender

#############

# Step 3: Check for outliers and homogenity of variance.

# Checking for univariate outliers and dropping them from the dataframe. The
# original dataset is saved as 'olddata'.

outliercheckz <- data.frame(data$X, scale(data$emotional, center = T, scale = T)) #converting to z-score
outliercheckz[outliercheckz[2] >= 3 | outliercheckz[2] <= -3] #display cases with z-scores > |3|
## [1] 36.000000 -3.558855
olddata <- data #backup original dataframe
data <- subset(data, X != 36) #filter out indicated cases

outliercheckz <- data.frame(data$X, scale(data$emotionalsat, center = T, scale = T)) #converting to z-score
outliercheckz[outliercheckz[2] >= 3 | outliercheckz[2] <= -3] #display cases with z-scores > |3|
## [1] NA NA
leveneTest(emotional ~ gender, data=data)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1  0.4613 0.4987
##       92
leveneTest(emotional ~ gender, data=data2)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1  0.0015 0.9693
##       98
leveneTest(emotionalsat ~ gender, data=data)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1  0.3051  0.582
##       91
#############

# Step 4: Run your t-test

# Independent samples t-test with binary x
t.test(emotional ~ gender, data=data, var.equal=T)
## 
##  Two Sample t-test
## 
## data:  emotional by gender
## t = 2.6955, df = 92, p-value = 0.008357
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.5727707 3.7799544
## sample estimates:
## mean in group female   mean in group male 
##             19.91549             17.73913
cohen.d(emotional ~ gender, data=data)
## 
## Cohen's d
## 
## d estimate: 0.6467052 (medium)
## 95 percent confidence interval:
##     lower     upper 
## 0.1610781 1.1323323
# Independent samples t-test with continuous x
t.test(data$emotional, data2$emotional, var.equal=T)
## 
##  Two Sample t-test
## 
## data:  data$emotional and data2$emotional
## t = 7.5279, df = 192, p-value = 1.954e-12
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  2.626111 4.490846
## sample estimates:
## mean of x mean of y 
##  19.38298  15.82450
cohen.d(data$emotional, data2$emotional)
## 
## Cohen's d
## 
## d estimate: 1.081455 (large)
## 95 percent confidence interval:
##     lower     upper 
## 0.7781122 1.3847976
# Paired samples t-test with continuous x
t.test(data$emotional, data$emotionalsat, var.equal=T)
## 
##  Two Sample t-test
## 
## data:  data$emotional and data$emotionalsat
## t = -1.3258, df = 185, p-value = 0.1866
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.5619732  0.3064253
## sample estimates:
## mean of x mean of y 
##  19.38298  20.01075
cohen.d(data$emotional, data$emotionalsat)
## 
## Cohen's d
## 
## d estimate: NA (NA)
## 95 percent confidence interval:
## lower upper 
##    NA    NA
#############

# Step 5: Take notes of your results so you can pull them up easily later

# Example: Independent samples t-test with binary x
# This analysis compared men and women's emotional support availability (ESA)
# There were 71 women and 23 men
# ESA M = 19.24, SD = 3.72
# One outlier was detected and removed
# Equal variance was confirmed
# T-test: t(92) = 2.70, p = .008, d = .65
# Women reported significantly higher ESA (M = 19.91) than men (17.74)

#############

# Step 6: Visualize your results

ggboxplot(data, x = "gender", y = "emotional", color = "gender", add = "jitter", shape = "gender",
          xlab = "(Predictor Variable)",
          ylab = "(Outcome Variable)",
          title = "(Plot Title)")

ggdensity(data, x = "emotional", color = "gender", add="mean",
          xlab = "(Predictor Variable)",
          ylab = "(Outcome Variable)",
          title = "(Plot Title)")

ggdotplot(data=data, x = "gender", y = "emotional", binwidth = .5, add = "mean_ci",
          xlab = "(Predictor Variable)",
          ylab = "(Outcome Variable)",
          title = "(Plot Title)")

ggscatterhist(data, x = "emotional", y = "emotionalsat",
              xlab = "Emotional Support Availability",
              ylab = "Emotional Support Satisfaction",
              title = "(Plot Title)")
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_density).