# if you haven't run this code before, you'll need to download the below packages first
# instructions on how to do this are included in the video
# but as a reminder, you use the packages tab to the right
library(psych) # for the describe() command
library(expss) # for the cross_cases() command
## Loading required package: maditr
##
## To select rows from data: rows(mtcars, am==0)
##
## Attaching package: 'maditr'
## The following object is masked from 'package:base':
##
## sort_by
# Import our data for the lab
# for the homework, you will import the mydata.csv file that we created in the data prep lab
d2 <- read.csv(file="Data/mydata.csv", header = T)
# (table command is for categorical variables and hist command is for continuous variables), in homework change "variablenumber" to the variable name in data.
table(d2$gender) # the table command shows us what the levels of this variable are, and how many participants are in each level.
##
## female I use another term male Prefer not to say
## 285 1 39 4
table(d2$income)
##
## 1 low 2 middle 3 high prefer not to say
## 38 157 86 48
hist(d2$covid_neg) # the hist command creates a histogram of the variable
hist(d2$pswq)
hist(d2$iou)
hist(d2$edeq12)
# skew is how centered it is- want to be between -2 to +2 (above +2 is positively skewed and below -2 is negatively skewed)
# Kurtosis to look at height of distribution- will be flat or skinny, again -2 to +2
We analyzed the skew and kurtosis of our continuous variables and most were within the accepted range (-2/+2). However, some variables (covid_neg) were outside of the accepted range. For this analysis, we will use them anyway, but outside of this class this is bad practice.
describe(d2) # we use this to check univariate normality ... skew and kurtosis, (-2/+2)
## vars n mean sd median trimmed mad min max range skew kurtosis
## gender* 1 329 1.28 0.72 1.00 1.09 0.00 1.00 4.00 3.00 2.28 3.57
## income* 2 329 2.44 0.88 2.00 2.42 1.48 1.00 4.00 3.00 0.32 -0.64
## covid_neg 3 329 0.20 0.78 0.00 0.00 0.00 0.00 5.00 5.00 4.16 17.21
## pswq 4 329 2.88 0.88 2.88 2.86 1.02 1.12 4.94 3.81 0.18 -0.82
## iou 5 329 2.19 0.69 2.07 2.13 0.71 1.04 4.44 3.41 0.71 0.13
## edeq12 6 329 1.69 0.55 1.58 1.63 0.49 1.00 3.75 2.75 0.99 0.74
## se
## gender* 0.04
## income* 0.05
## covid_neg 0.04
## pswq 0.05
## iou 0.04
## edeq12 0.03
cross_cases(d2, gender, income) # update variabele 2 and variable 3 with your categorical variable names
|  income | ||||
|---|---|---|---|---|
|  1 low |  2 middle |  3 high |  prefer not to say | |
|  gender | ||||
|    I use another term | 1 | |||
|    Prefer not to say | 2 | 1 | 1 | |
|    female | 34 | 139 | 71 | 41 |
|    male | 4 | 15 | 14 | 6 |
|    #Total cases | 38 | 157 | 86 | 48 |
# 2 continuous variables together
plot(d2$covid_neg, d2$pswq,
main="Scatterplot of covid_neg and pswq",
xlab = "covid_neg",
ylab = "pswq")
plot(d2$iou, d2$edeq12,
main="Scatterplot of iou and edeq12",
xlab = "iou",
ylab = "edeq12")
# boxplots use ONE CATEGORICAL and ONE CONTINUOUS variable
# Be sure that you enter them in the right order!!!!!!!!!!!
# categorical variable goes AFTER the tilde ~
# continuous variable goes BEFORE the tilde ~
# categorical variable is x
boxplot(data=d2, edeq12~income,
main="Boxplot of income and edeq12",
xlab = "income",
ylab = "edeq12")
boxplot(data=d2, iou~gender,
main="Boxplot of gender and iou",
xlab = "gender",
ylab = "iou")
# bold line is mean, box indicates the quartiles closest to mean, line is absolute range