1 Load Libraries

library(psych) # for the describe() command
library(naniar) # for the gg_miss_upset() command
library(expss) # for the cross_cases() command

## Loading required package: maditr

## 
## To aggregate several columns with one summary: take(mtcars, mpg, hp, fun = mean, by = am)

## 
## Use 'expss_output_rnotebook()' to display tables inside R Notebooks.
##  To return to the console output, use 'expss_output_default()'.

## 
## Attaching package: 'expss'

## The following object is masked from 'package:naniar':
## 
##     is_na

2 Import Data

d <- read.csv(file="final.csv", header=T) # import the file you created in last lab
d <- subset(d, select=c(id, lotr, hope, coninc, race, goodlife_tri)) # subset to only your selected variables if necessary. If not necessary, you can skip this step

3 Check Data

3.1 Formatting

head(d)

##   id     lotr     hope      coninc  race goodlife_tri
## 1  1       NA       NA  2.88748724 white           NA
## 2  2 2.833333 4.333333 -0.08526274 white            1
## 3  3 2.000000       NA  0.39370676 white            2
## 4  4       NA       NA  2.88748724 white           NA
## 5  5 3.000000 4.833333  2.88748724 white            1
## 6  6 3.000000 6.000000  0.13245067 white            2

str(d)

## 'data.frame':    2867 obs. of  6 variables:
##  $ id          : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ lotr        : num  NA 2.83 2 NA 3 ...
##  $ hope        : num  NA 4.33 NA NA 4.83 ...
##  $ coninc      : num  2.8875 -0.0853 0.3937 2.8875 2.8875 ...
##  $ race        : chr  "white" "white" "white" "white" ...
##  $ goodlife_tri: int  NA 1 2 NA 1 2 NA 2 NA NA ...

# use as.factor() command to make sure your categorical variables are recognized as such
d$id <- as.factor(d$id)
d$race <- as.factor(d$race)
d$goodlife_tri <- as.factor(d$goodlife_tri)

3.2 Univariate Normality

table(d$race)

## 
##     aian    asian    black hispanic    multi     nhpi    other    white 
##       45       63      417      101      234       19       11     1953

describe(d)

##               vars    n    mean     sd  median trimmed     mad   min     max
## id*              1 2867 1434.00 827.78 1434.00 1434.00 1063.02  1.00 2867.00
## lotr             2 1441    2.56   0.64    2.67    2.58    0.49  0.00    4.00
## hope             3 1426    5.11   1.22    5.33    5.23    0.99  0.00    7.00
## coninc           4 2867    0.00   1.00   -0.26   -0.16    0.81 -1.05    2.89
## race*            5 2843    6.62   2.16    8.00    6.96    0.00  1.00    8.00
## goodlife_tri*    6 1658    2.44   0.61    3.00    2.50    0.00  1.00    3.00
##                 range  skew kurtosis    se
## id*           2866.00  0.00    -1.20 15.46
## lotr             4.00 -0.40     0.60  0.02
## hope             7.00 -1.06     1.39  0.03
## coninc           3.93  1.40     1.61  0.02
## race*            7.00 -1.10    -0.45  0.04
## goodlife_tri*    2.00 -0.59    -0.58  0.01

3.3 Histograms

# use the hist() command to create a histogram for your continuous variables
hist(d$lotr)

hist(d$hope)

hist(d$coninc)

# use the table() command to create a table for your categorical variables (other than your ID variable)
table(d$race, useNA = "always")

## 
##     aian    asian    black hispanic    multi     nhpi    other    white 
##       45       63      417      101      234       19       11     1953 
##     <NA> 
##       24

table(d$goodlife_tri, useNA = "always")

## 
##    0    1    2 <NA> 
##  102  724  832 1209

4 Missing Data

# use the gg_miss_upset() command to visualize your missing data
gg_miss_upset(d, nsets = "6")

# create a new dataframe with only your complete cases/observations
d2 <- na.omit(d)

5 Crosstabs & Scatterplots

5.1 Crosstabs

table(d2$race)

## 
##     aian    asian    black hispanic    multi     nhpi    other    white 
##       20       29      156       39      100        6        4      829

# use the cross_cases() command to create a crosstab of your categorical variables
cross_cases(d2, race, goodlife_tri)

	goodlife_tri
	0	1	2
race
aian	2	8	10
asian	1	7	21
black	13	51	92
hispanic	2	17	20
multi	6	49	45
nhpi		5	1
other	1	2	1
white	42	377	410
#Total cases	67	516	600

5.2 Scatterplots

# use the plot() command to create scatterplots of your continuous variables
plot(d2$coninc, d2$lotr,
     main="scatterplot of income (standardized) and life orientation test score",
     xlab = "income (standardized)",
     ylab = "LOTR score")

plot(d2$coninc, d2$hope,
     main="scatterplot of income (standardized) and hope scale score",
     xlab = "income (standardized)",
     ylab = "HOPE score")

plot(d2$lotr, d2$hope,
     main="scatterplot of life orientation test score and hope scale score",
     xlab = "LOTR score",
     ylab = "HOPE score")

5.3 Boxplots

# use the boxplot() command to create boxplots of your continuous and categorical variables
boxplot(data=d2, hope~goodlife_tri,
        main="hope score by good life estimate",
        xlab = "good life estimate",
        ylab = "HOPE score")

boxplot(data=d2, hope~race,
        main="hope score by race/ethnicity",
        xlab = "race/ethnicity",
        ylab = "HOPE score")

Basic Statistics

Heather Perkins

2023-05-25

1 Load Libraries

2 Import Data

3 Check Data

3.1 Formatting

3.2 Univariate Normality

3.3 Histograms

4 Missing Data

5 Crosstabs & Scatterplots

5.1 Crosstabs

5.2 Scatterplots

5.3 Boxplots