# if you haven't run this code before, you'll need to download the below packages first
# instructions on how to do this are included in the video
# but as a reminder, you use the packages tab to the right
library(psych) # for the describe() command
library(expss) # for the cross_cases() command
## Loading required package: maditr
##
## To get total summary skip 'by' argument: take_all(mtcars, mean)
##
## Attaching package: 'maditr'
## The following object is masked from 'package:base':
##
## sort_by
# Import our data for the lab
# For the homework, you will import the mydata.csv that we created in the Data Prep Lab
d2 <- read.csv(file="Data/mydata.csv", header = T)
table(d2$race_rc) # the table command shows us what the levels of this variable are and how many participants are in each level
##
## asian black hispanic multiracial nativeamer other
## 208 241 284 293 12 97
## white
## 2011
table(d2$marriage5)
##
## are currently divorced from one another
## 733
## are currently married to one another
## 2121
## never married each other and are not together
## 245
## never married each other but are currently together
## 47
hist(d2$belong) # the hist command creates a histogram of the variable
hist(d2$efficacy)
hist(d2$socmeduse)
hist(d2$npi)
We analyzed the skew and kurtosis of our continuous variables and all were within the accepted range (-2/+2).
describe(d2) # we use this to check univariate normality... skew and kurtosis, (-2/+2)
## vars n mean sd median trimmed mad min max range skew
## race_rc* 1 3146 5.54 2.12 7.00 5.88 0.00 1.0 7 6.0 -0.99
## marriage5* 2 3146 1.87 0.60 2.00 1.83 0.00 1.0 4 3.0 0.47
## belong 3 3146 3.23 0.61 3.30 3.25 0.59 1.3 5 3.7 -0.26
## efficacy 4 3146 3.13 0.45 3.10 3.13 0.44 1.1 4 2.9 -0.24
## socmeduse 5 3146 34.45 8.55 35.00 34.72 7.41 11.0 55 44.0 -0.31
## npi 6 3146 0.28 0.31 0.15 0.24 0.23 0.0 1 1.0 0.94
## kurtosis se
## race_rc* -0.66 0.04
## marriage5* 1.49 0.01
## belong -0.13 0.01
## efficacy 0.45 0.01
## socmeduse 0.27 0.15
## npi -0.69 0.01
cross_cases(d2, race_rc, marriage5) # update variable2 and variable3 with your categorical variable names
| Â marriage5Â | ||||
|---|---|---|---|---|
|  are currently divorced from one another |  are currently married to one another |  never married each other and are not together |  never married each other but are currently together | |
|  race_rc | ||||
|    asian | 25 | 175 | 7 | 1 |
|    black | 65 | 102 | 66 | 8 |
|    hispanic | 65 | 171 | 38 | 10 |
|    multiracial | 80 | 176 | 34 | 3 |
|    nativeamer | 3 | 8 | 1 | |
|    other | 23 | 61 | 10 | 3 |
|    white | 472 | 1428 | 90 | 21 |
|    #Total cases | 733 | 2121 | 245 | 47 |
plot(d2$efficacy, d2$socmeduse,
main="Scatterplot of efficacy and socmeduse",
xlab = "efficacy",
ylab = "socmeduse")
plot(d2$socmeduse, d2$npi,
main="Scatterplot of socmeduse and npi",
xlab = "socmeduse",
ylab = "npi")
# boxplots use ONE CATEGORICAL and ONE CONTINUOUS variable
# make sure that you enter them in the right order!!!!!!!!!!!!!!!!!!!!!
# continuous variable goes BEFORE the tilde ~
# categorical variable goes AFTER the tilde !
boxplot(data=d2, belong~race_rc,
main="Boxplot of race_rc and belong",
xlab = "race_rc",
ylab = "belong")
boxplot(data=d2, efficacy~race_rc,
main="Boxplot of race_rc and efficacy",
xlab = "race_rc",
ylab = "efficacy")