Basic Statistics

Load Libraries

# if you haven't run this code before, you'll need to download the below packages first
# instructions on how to do this are included in the video
# but as a reminder, you use the packages tab to the right

library(psych) # for the describe() command
## Warning: package 'psych' was built under R version 4.2.3
library(expss) # for the cross_cases() command
## Warning: package 'expss' was built under R version 4.2.3
## Loading required package: maditr
## Warning: package 'maditr' was built under R version 4.2.3
## 
## To aggregate data: take(mtcars, mean_mpg = mean(mpg), by = am)
## 
## Use 'expss_output_rnotebook()' to display tables inside R Notebooks.
##  To return to the console output, use 'expss_output_default()'.

##Import Data

#import our data for the lab
#for the homework, you will import the mydata.csv that we created in the Data Prep Lab

d2 <- read.csv(file="Data/mydata.csv", header = T)

Univariate Plots: Histograms & Tables

table(d2$gender) #the table command shows us what the levels of this variable are, and how many participants are in each level
## 
##             female I use another term               male  Prefer not to say 
##                969                 29                188                 21
table(d2$mhealth) 
## 
##              anxiety disorder                       bipolar 
##                           122                             5 
##                    depression              eating disorders 
##                            29                            26 
##                    none or NA obsessive compulsive disorder 
##                           938                            26 
##                         other                          ptsd 
##                            37                            24
hist(d2$big5_neu) #the hist command creates a histogram of the variable

hist(d2$mfq_26)

hist(d2$mfq_state)

hist(d2$pas_covid)

Univariate Normality

We analyzed the skew and kurtosis of our continuous variables and all were within the accepted range (-2/+2). (True for the lab!! May not be true for the HW!!)

We analyzed the skew and kurtosis of our … and most were within the accepted range (-2/+2). However, some variables (list them in parentheses) were outside of the accepted range. For this analysis, we will use them anyway, but outside of this class this is bad practice.

describe(d2) #we use this to check univariate normality...skew and kurtosis, (-2/+2)
##           vars    n mean   sd median trimmed  mad min max range  skew kurtosis
## gender*      1 1207 1.39 0.81   1.00    1.21 0.00 1.0   4   3.0  1.74     1.41
## mhealth*     2 1207 4.66 1.41   5.00    4.89 0.00 1.0   8   7.0 -1.41     2.60
## big5_neu     3 1207 4.36 1.52   4.67    4.40 1.48 1.0   7   6.0 -0.28    -0.78
## mfq_26       4 1207 4.32 0.67   4.35    4.34 0.67 1.8   6   4.2 -0.34     0.22
## mfq_state    5 1207 4.11 0.98   4.25    4.17 0.93 1.0   6   5.0 -0.57     0.25
## pas_covid    6 1207 3.23 0.68   3.22    3.24 0.66 1.0   5   4.0 -0.18    -0.02
##             se
## gender*   0.02
## mhealth*  0.04
## big5_neu  0.04
## mfq_26    0.02
## mfq_state 0.03
## pas_covid 0.02

Bivariate Plots

Crosstabs

cross_cases(d2, gender,mhealth )#update variable2 and variable3 with your categorical variable names
 mhealth 
 anxiety disorder   bipolar   depression   eating disorders   none or NA   obsessive compulsive disorder   other   ptsd 
 gender 
   I use another term  5 1 1 14 5 3
   Prefer not to say  2 2 16 1
   female  96 2 26 24 753 23 27 18
   male  19 2 2 155 3 4 3
   #Total cases  122 5 29 26 938 26 37 24

Scatterplots

plot(d2$big5_neu, d2$mfq_26,
     main="Scatterplot of Neuroticism and Mental Flexibility",
     xlab = "big5_neu",
     ylab = "mfq_26")

plot(d2$mfq_state, d2$pas_covid,
     main="Scatterplot of Variable10 and Variable11",
     xlab = "mfq_state",
     ylab = "pas_covid")

Boxplots

#boxplots use ONE CATEGORICAL and ONE CONTINUOUS variable
#make sure that you enter then in the right order!!
#categorical variable gos BEFORE the tilde ~
#continuous variable goes AFTER the tilde !

boxplot(data=d2, big5_neu~gender,
        main="Boxplot of Gender and Neuroticism",
        xlab = "gender",
        ylab = "big5_neu")

boxplot(data=d2, pas_covid~mhealth,
        main="Boxplot of Mental Health and Pandemic Anxiety Scale",
        xlab = "mhealth",
        ylab = "pas_covid")