# if you haven't run this code before, you'll need to download the below packages first
# instructions on how to do this are included in the video
# but as a reminder, you use the packages tab to the right
library(psych) # for the describe() command
## Warning: package 'psych' was built under R version 4.2.3
library(expss) # for the cross_cases() command
## Warning: package 'expss' was built under R version 4.2.3
## Loading required package: maditr
## Warning: package 'maditr' was built under R version 4.2.3
##
## To aggregate data: take(mtcars, mean_mpg = mean(mpg), by = am)
##
## Use 'expss_output_rnotebook()' to display tables inside R Notebooks.
## To return to the console output, use 'expss_output_default()'.
##Import Data
#import our data for the lab
#for the homework, you will import the mydata.csv that we created in the Data Prep Lab
d2 <- read.csv(file="Data/mydata.csv", header = T)
table(d2$gender) #the table command shows us what the levels of this variable are, and how many participants are in each level
##
## female I use another term male Prefer not to say
## 969 29 188 21
table(d2$mhealth)
##
## anxiety disorder bipolar
## 122 5
## depression eating disorders
## 29 26
## none or NA obsessive compulsive disorder
## 938 26
## other ptsd
## 37 24
hist(d2$big5_neu) #the hist command creates a histogram of the variable
hist(d2$mfq_26)
hist(d2$mfq_state)
hist(d2$pas_covid)
We analyzed the skew and kurtosis of our continuous variables and all were within the accepted range (-2/+2). (True for the lab!! May not be true for the HW!!)
We analyzed the skew and kurtosis of our … and most were within the accepted range (-2/+2). However, some variables (list them in parentheses) were outside of the accepted range. For this analysis, we will use them anyway, but outside of this class this is bad practice.
describe(d2) #we use this to check univariate normality...skew and kurtosis, (-2/+2)
## vars n mean sd median trimmed mad min max range skew kurtosis
## gender* 1 1207 1.39 0.81 1.00 1.21 0.00 1.0 4 3.0 1.74 1.41
## mhealth* 2 1207 4.66 1.41 5.00 4.89 0.00 1.0 8 7.0 -1.41 2.60
## big5_neu 3 1207 4.36 1.52 4.67 4.40 1.48 1.0 7 6.0 -0.28 -0.78
## mfq_26 4 1207 4.32 0.67 4.35 4.34 0.67 1.8 6 4.2 -0.34 0.22
## mfq_state 5 1207 4.11 0.98 4.25 4.17 0.93 1.0 6 5.0 -0.57 0.25
## pas_covid 6 1207 3.23 0.68 3.22 3.24 0.66 1.0 5 4.0 -0.18 -0.02
## se
## gender* 0.02
## mhealth* 0.04
## big5_neu 0.04
## mfq_26 0.02
## mfq_state 0.03
## pas_covid 0.02
cross_cases(d2, gender,mhealth )#update variable2 and variable3 with your categorical variable names
|  mhealth | ||||||||
|---|---|---|---|---|---|---|---|---|
|  anxiety disorder |  bipolar |  depression |  eating disorders |  none or NA |  obsessive compulsive disorder |  other |  ptsd | |
|  gender | ||||||||
|    I use another term | 5 | 1 | 1 | 14 | 5 | 3 | ||
|    Prefer not to say | 2 | 2 | 16 | 1 | ||||
|    female | 96 | 2 | 26 | 24 | 753 | 23 | 27 | 18 |
|    male | 19 | 2 | 2 | 155 | 3 | 4 | 3 | |
|    #Total cases | 122 | 5 | 29 | 26 | 938 | 26 | 37 | 24 |
plot(d2$big5_neu, d2$mfq_26,
main="Scatterplot of Neuroticism and Mental Flexibility",
xlab = "big5_neu",
ylab = "mfq_26")
plot(d2$mfq_state, d2$pas_covid,
main="Scatterplot of Variable10 and Variable11",
xlab = "mfq_state",
ylab = "pas_covid")
#boxplots use ONE CATEGORICAL and ONE CONTINUOUS variable
#make sure that you enter then in the right order!!
#categorical variable gos BEFORE the tilde ~
#continuous variable goes AFTER the tilde !
boxplot(data=d2, big5_neu~gender,
main="Boxplot of Gender and Neuroticism",
xlab = "gender",
ylab = "big5_neu")
boxplot(data=d2, pas_covid~mhealth,
main="Boxplot of Mental Health and Pandemic Anxiety Scale",
xlab = "mhealth",
ylab = "pas_covid")