Basic Statistics

Load Libraries

# if you haven't run this code before, you'll need to download the below packages first
# instructions on how to do this are included in the video
# but as a reminder, you use the packages tab to the right

library(psych) # for the describe() command
library(expss) # for the cross_cases() command
## Loading required package: maditr
## 
## To select columns from data: columns(mtcars, mpg, vs:carb)
## 
## Attaching package: 'maditr'
## The following object is masked from 'package:base':
## 
##     sort_by

Import Data

 # import our data for the lab
 # for the homework, you will import the mydata.csv that we created in the data Prep Lab

d2 <- read.csv(file="Data/mydata.csv", header = T)

Univariate Plots: Histograms & Tables

table(d2$sexual_orientation) #the table command shows us what the levels of this variable are, and how many participants are in each level
## 
##               Asexual                    Bi           Gay/Lesbian 
##                    38                   179                    58 
## Heterosexual/Straight    I use another term     Prefer not to say 
##                   927                    35                   100
table(d2$relationship_status)
## 
##   In a relationship/married and cohabiting 
##                                        282 
## In a relationship/married but living apart 
##                                        106 
##                          Prefer not to say 
##                                        104 
##                Single, divorced or widowed 
##                                         44 
##                      Single, never married 
##                                        801
hist(d2$big5_open) #the hist command creates a histogram of the variable

hist(d2$pswq)

hist(d2$mfq_26)

hist(d2$rse)

Univariate Normality

We analyzed the skew and kurtosis of our continuous variables and all were within the accepted range (-2/+2).

describe(d2) #we use this to check univariate normality... skew and kurtosis, (-2/+2)
##                      vars    n mean   sd median trimmed  mad min max range
## sexual_orientation*     1 1337 3.78 1.05   4.00    3.79 0.00   1   6     5
## relationship_status*    2 1337 3.73 1.68   5.00    3.91 0.00   1   5     4
## big5_open               3 1337 5.23 1.12   5.33    5.31 0.99   1   7     6
## pswq                    4 1337 2.75 0.79   2.79    2.75 0.95   1   5     4
## mfq_26                  5 1337 4.28 0.70   4.35    4.31 0.67   1   6     5
## rse                     6 1337 2.61 0.72   2.70    2.62 0.74   1   4     3
##                       skew kurtosis   se
## sexual_orientation*  -0.44     0.99 0.03
## relationship_status* -0.74    -1.24 0.05
## big5_open            -0.69     0.33 0.03
## pswq                  0.01    -0.77 0.02
## mfq_26               -0.55     0.96 0.02
## rse                  -0.17    -0.72 0.02

Bivariate Plots

Crosstabs

cross_cases(d2, sexual_orientation, relationship_status) #update variable2 and variable 3 with your categorical variable names
 relationship_status 
 In a relationship/married and cohabiting   In a relationship/married but living apart   Prefer not to say   Single, divorced or widowed   Single, never married 
 sexual_orientation 
   Asexual  6 4 2 26
   Bi  4 28 19 128
   Gay/Lesbian  8 4 46
   Heterosexual/Straight  265 62 37 40 523
   I use another term  2 6 3 24
   Prefer not to say  5 2 37 2 54
   #Total cases  282 106 104 44 801

Scatterplots

plot(d2$big5_open, d2$pswq,
     main="Scatterplot of big5_open and pswq",
     xlab = "big5_open",
     ylab = "pswq")

plot(d2$mfq_26, d2$variable11,
     main="Scatterplot of mfq_26 and rse",
     xlab = "mfq_26",
     ylab = "rse")

Boxplots

 #boxplots use ONE CATEGORICAL AND ONE CONTINUOUS variable
 #make sure that you enter them in the right order!!!
 #continuous variable goes BEFORE the tilde~
 #categorical variable goes AFTER the tilde!
boxplot(data=d2, rse~relationship_status,
        main="Boxplot of relationship_status and rse",
        xlab = "relationship_status",
        ylab = "rse")

boxplot(data=d2, pswq~sexual_orientation,
        main="Boxplot of sexual_orientation and pswq",
        xlab = "sexual_orientation",
        ylab = "pswq")