Basic Statistics

Load Libraries

# if you haven't run this code before, you'll need to download the below packages first
# instructions on how to do this are included in the video
# but as a reminder, you use the packages tab to the right

library(psych) # for the describe() command
library(expss) # for the cross_cases() command
## Loading required package: maditr
## 
## To aggregate data: take(mtcars, mean_mpg = mean(mpg), by = am)
## 
## Attaching package: 'maditr'
## The following object is masked from 'package:base':
## 
##     sort_by

Import Data

# import our data for the lab
# for the homework, you will import the mydata.csv that we created in the Data Prep Lab

d2<-read.csv(file="Data/mydata.csv",header= T)

Univariate Plots: Histograms & Tables

table(d2$sleep_hours) 
## 
##  1 < 5 hours  2 5-6 hours  3 7-8 hours 4 8-10 hours 5 > 10 hours 
##           39          109          122           61           19
table(d2$exercise)
## 
##    0  0.2 0.25  0.3 0.45  0.5  0.6  0.7 0.72 0.75  0.8    1  1.1  1.2  1.3 1.45 
##   32    2    2    1    1   19    2    3    1    2    3   69    1    2    2    2 
##  1.5    2  2.1  2.5    3  3.2  3.5    4  4.5    5  5.5    6  6.5    7  7.3  7.5 
##   24   46    1   22   30    1    9   18    3   13    4   10    2    6    1    1 
##    8    9   10   11   12   14   16 
##    2    2    5    1    2    1    2
hist(d2$brs) # the hist command creates a histogram for the variable

hist(d2$phq)

hist(d2$gad)

hist(d2$mfq_26)

Univariate Normality

We analyzed the skew and kurtosis of our categorical and continuous variables and most were within the accepted range (-2/+2). However, the variable exercise was outside of the accepted range. For this analysis, we will use them anyway, but outside of this class this is bad practice.

describe(d2) 
##              vars   n mean   sd median trimmed  mad min   max range  skew
## sleep_hours*    1 350 2.75 1.04   3.00    2.74 1.48 1.0  5.00  4.00  0.21
## exercise        2 350 2.50 2.53   2.00    2.07 1.48 0.0 16.00 16.00  2.21
## brs             3 350 2.74 0.88   2.83    2.73 0.99 1.0  5.00  4.00  0.07
## phq             4 350 2.53 0.89   2.56    2.53 0.99 1.0  4.00  3.00  0.02
## gad             5 350 2.51 0.93   2.57    2.50 1.27 1.0  4.00  3.00  0.02
## mfq_26          6 350 4.13 0.70   4.15    4.14 0.67 1.8  5.75  3.95 -0.17
##              kurtosis   se
## sleep_hours*    -0.50 0.06
## exercise         6.50 0.14
## brs             -0.67 0.05
## phq             -1.09 0.05
## gad             -1.17 0.05
## mfq_26           0.16 0.04

Bivariate Plots

Crosstabs

cross_cases(d2, sleep_hours, exercise) #update variable 2 and variable 3 to our catagorical variables 
 exercise 
 0   0.2   0.25   0.3   0.45   0.5   0.6   0.7   0.72   0.75   0.8   1   1.1   1.2   1.3   1.45   1.5   2   2.1   2.5   3   3.2   3.5   4   4.5   5   5.5   6   6.5   7   7.3   7.5   8   9   10   11   12   14   16 
 sleep_hours 
   1 < 5 hours  6 1 1 9 4 3 5 1 5 1 1 1 1
   2 5-6 hours  5 1 1 7 1 1 1 2 21 1 2 1 5 14 8 12 1 3 4 4 1 6 2 1 1 1 1 1
   3 7-8 hours  13 1 7 1 1 23 1 11 20 6 9 4 6 1 7 3 2 1 1 2 1 1
   4 8-10 hours  6 1 3 1 1 1 12 1 8 5 1 4 3 1 2 1 2 1 1 1 1 2 1 1
   5 > 10 hours  2 1 1 1 4 1 3 1 1 1 1 1 1
   #Total cases  32 2 2 1 1 19 2 3 1 2 3 69 1 2 2 2 24 46 1 22 30 1 9 18 3 13 4 10 2 6 1 1 2 2 5 1 2 1 2

Scatterplots

plot(d2$gad, d2$phq,
     main="Scatterplot of Anxiety and Depression",
     xlab = "Anxiety",
     ylab = "Depression")

plot(d2$gad, d2$mfq_26,
     main="Scatterplot of Anxiety and Mental Flexibility",
     xlab = "Anxiety",
     ylab = "Mental Flexibility")

Boxplots

# boxplots use one CATEGORICAL and one CONTINUOUS variable
# make sure you enter them in the right order !!!!)
#categorical goes before the tilde~ 
#continuous variable goes after tilde~
boxplot(data=d2, exercise~gad,
        main="Boxplot of Anxiety and Exercise",
        xlab = "Anxiety",
        ylab = "Exercise")

boxplot(data=d2, exercise~phq,
        main="Boxplot of Depression and Exericse",
        xlab = "Depression",
        ylab = "Exercise")