Basic Statistics

Load Libraries

# if you haven't run this code before, you'll need to download the below packages first
# instructions on how to do this are included in the video
# but as a reminder, you use the packages tab to the right

library(psych) # for the describe() command
library(expss) # for the cross_cases() command
## Loading required package: maditr
## 
## To drop variable use NULL: let(mtcars, am = NULL) %>% head()
## 
## Attaching package: 'maditr'
## The following object is masked from 'package:base':
## 
##     sort_by

Import Data

# Import data for the lab
# for the hw, import the mydata.csv that you created in the data prep lab

d2 <- read.csv(file="Data/mydata.csv", header = T)

Univariate Plots: Histograms & Tables

table(d2$race_rc) #the table command shows what the levels of this variable are, and how many participants are in each level
## 
##       asian       black    hispanic multiracial  nativeamer       other 
##         201         226         271         281          12          94 
##       white 
##        1962
table(d2$marriage5)
## 
##             are currently divorced from one another 
##                                                 709 
##                are currently married to one another 
##                                                2061 
##       never married each other and are not together 
##                                                 233 
## never married each other but are currently together 
##                                                  44
table(d2$moa_role)
## 
##                1 1.16666666666667 1.33333333333333              1.5 
##                5               13               20               48 
## 1.66666666666667 1.83333333333333                2 2.16666666666667 
##               86              112              146              159 
## 2.33333333333333              2.5 2.66666666666667 2.83333333333333 
##              179              194              202              197 
##                3 3.16666666666667 3.33333333333333              3.5 
##              235              233              232              251 
## 3.66666666666667 3.83333333333333                4 
##              211              206              318
table(d2$moa_safety)
## 
##    1 1.25  1.5 1.75    2 2.25  2.5 2.75    3 3.25  3.5 3.75    4 
##   10   16   22   71   77  128  258  293  367  414  464  392  535
table(d2$belong)
## 
## 1.3 1.4 1.5 1.6 1.7 1.8 1.9   2 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9   3 3.1 3.2 
##   2   5   7   8  14  17  26  30  30  49  52  78  75 133 103 144 165 185 171 208 
## 3.3 3.4 3.5 3.6 3.7 3.8 3.9   4 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9   5 
## 209 174 184 185 173 155 104 104  86  64  40  25  21  15   3   1   1   1
table(d2$npi)
## 
##                  0 0.0769230769230769  0.153846153846154  0.230769230769231 
##                674                695                481                251 
##  0.307692307692308  0.384615384615385  0.615384615384615  0.692307692307692 
##                 95                 24                159                229 
##  0.769230769230769  0.846153846153846  0.923076923076923                  1 
##                175                146                 83                 35
hist(d2$moa_role) #the hist command created a histogram of the variable

hist(d2$moa_safety)

hist(d2$belong)

hist(d2$npi)

Univariate Normality

We analyzed the skew and kurtosis of our continuous variables and all were within the accepted range (-2/+2).

We analyzed the skew and kurtosis of our … and most were within the accepted range (-2/+2). However, some variables (none) were outside of the accepted range. For this analysis, we will use them anyway, but outside of this class this is bad practice.

describe(d2) #use this to check univariate normality 
##            vars    n mean   sd median trimmed  mad min max range  skew kurtosis
## race_rc*      1 3047 5.56 2.11   7.00    5.91 0.00 1.0   7   6.0 -1.01    -0.61
## marriage5*    2 3047 1.87 0.59   2.00    1.83 0.00 1.0   4   3.0  0.46     1.50
## moa_role      3 3047 2.96 0.72   3.00    3.00 0.74 1.0   4   3.0 -0.32    -0.85
## moa_safety    4 3047 3.20 0.64   3.25    3.26 0.74 1.0   4   3.0 -0.71     0.04
## belong        5 3047 3.23 0.61   3.30    3.25 0.59 1.3   5   3.7 -0.27    -0.12
## npi           6 3047 0.28 0.31   0.15    0.24 0.23 0.0   1   1.0  0.94    -0.69
##              se
## race_rc*   0.04
## marriage5* 0.01
## moa_role   0.01
## moa_safety 0.01
## belong     0.01
## npi        0.01

Bivariate Plots

Crosstabs

cross_cases(d2, race_rc, marriage5) #UPDATE WITH CATEGORICAL VARIABLE NAMES 
 marriage5 
 are currently divorced from one another   are currently married to one another   never married each other and are not together   never married each other but are currently together 
 race_rc 
   asian  23 169 8 1
   black  63 93 62 8
   hispanic  62 165 36 8
   multiracial  76 170 32 3
   nativeamer  3 8 1
   other  23 58 10 3
   white  459 1398 85 20
   #Total cases  709 2061 233 44

Scatterplots

plot(d2$moa_role, d2$npi,
     main="Scatterplot of moa_role and npi",
     xlab = "moa_role",
     ylab = "npi")

plot(d2$moa_safety, d2$belong,
     main="Scatterplot of moa_safety and belong",
     xlab = "moa_safety",
     ylab = "belong")

Boxplots

#boxplots use one CATEGORICAL and one CONTINOUS
# make sure to enter them in the RIGHT ORDER
# CATEGORICAL GOES BEFORE THE TILDE~
# CONTINOUS GOES AFTER ~
boxplot(data=d2, belong~marriage5,
        main="Boxplot of marriage5 and belong",
        xlab = "marriage5",
        ylab = "belong")

boxplot(data=d2, npi~race_rc,
        main="Boxplot of race_rc and npi",
        xlab = "race_rc",
        ylab = "npi")