Load Libraries
library(psych) # for the describe() command
library(naniar) # for the gg_miss_upset() command
library(expss) # for the cross_cases() command
## Loading required package: maditr
##
## To aggregate several columns with one summary: take(mtcars, mpg, hp, fun = mean, by = am)
##
## Use 'expss_output_rnotebook()' to display tables inside R Notebooks.
## To return to the console output, use 'expss_output_default()'.
##
## Attaching package: 'expss'
## The following object is masked from 'package:naniar':
##
## is_na
Import Data
d <- read.csv(file="final.csv", header=T) # import the file you created in last lab
d <- subset(d, select=c(id, lotr, hope, coninc, race, goodlife_tri)) # subset to only your selected variables if necessary. If not necessary, you can skip this step
Check Data
Univariate
Normality
table(d$race)
##
## aian asian black hispanic multi nhpi other white
## 45 63 417 101 234 19 11 1953
describe(d)
## vars n mean sd median trimmed mad min max
## id* 1 2867 1434.00 827.78 1434.00 1434.00 1063.02 1.00 2867.00
## lotr 2 1441 2.56 0.64 2.67 2.58 0.49 0.00 4.00
## hope 3 1426 5.11 1.22 5.33 5.23 0.99 0.00 7.00
## coninc 4 2867 0.00 1.00 -0.26 -0.16 0.81 -1.05 2.89
## race* 5 2843 6.62 2.16 8.00 6.96 0.00 1.00 8.00
## goodlife_tri* 6 1658 2.44 0.61 3.00 2.50 0.00 1.00 3.00
## range skew kurtosis se
## id* 2866.00 0.00 -1.20 15.46
## lotr 4.00 -0.40 0.60 0.02
## hope 7.00 -1.06 1.39 0.03
## coninc 3.93 1.40 1.61 0.02
## race* 7.00 -1.10 -0.45 0.04
## goodlife_tri* 2.00 -0.59 -0.58 0.01
Histograms
# use the hist() command to create a histogram for your continuous variables
hist(d$lotr)

hist(d$hope)

hist(d$coninc)

# use the table() command to create a table for your categorical variables (other than your ID variable)
table(d$race, useNA = "always")
##
## aian asian black hispanic multi nhpi other white
## 45 63 417 101 234 19 11 1953
## <NA>
## 24
table(d$goodlife_tri, useNA = "always")
##
## 0 1 2 <NA>
## 102 724 832 1209
Missing Data
# use the gg_miss_upset() command to visualize your missing data
gg_miss_upset(d, nsets = "6")

# create a new dataframe with only your complete cases/observations
d2 <- na.omit(d)
Crosstabs &
Scatterplots
Crosstabs
table(d2$race)
##
## aian asian black hispanic multi nhpi other white
## 20 29 156 39 100 6 4 829
# use the cross_cases() command to create a crosstab of your categorical variables
cross_cases(d2, race, goodlife_tri)
|
 goodlife_triÂ
|
|
 0Â
|
 1Â
|
 2Â
|
 raceÂ
|
   aianÂ
|
2
|
8
|
10
|
   asianÂ
|
1
|
7
|
21
|
   blackÂ
|
13
|
51
|
92
|
   hispanicÂ
|
2
|
17
|
20
|
   multiÂ
|
6
|
49
|
45
|
   nhpiÂ
|
|
5
|
1
|
   otherÂ
|
1
|
2
|
1
|
   whiteÂ
|
42
|
377
|
410
|
   #Total casesÂ
|
67
|
516
|
600
|
Scatterplots
# use the plot() command to create scatterplots of your continuous variables
plot(d2$coninc, d2$lotr,
main="scatterplot of income (standardized) and life orientation test score",
xlab = "income (standardized)",
ylab = "LOTR score")

plot(d2$coninc, d2$hope,
main="scatterplot of income (standardized) and hope scale score",
xlab = "income (standardized)",
ylab = "HOPE score")

plot(d2$lotr, d2$hope,
main="scatterplot of life orientation test score and hope scale score",
xlab = "LOTR score",
ylab = "HOPE score")

Boxplots
# use the boxplot() command to create boxplots of your continuous and categorical variables
boxplot(data=d2, hope~goodlife_tri,
main="hope score by good life estimate",
xlab = "good life estimate",
ylab = "HOPE score")

boxplot(data=d2, hope~race,
main="hope score by race/ethnicity",
xlab = "race/ethnicity",
ylab = "HOPE score")
