Basic Statistics

Load Libraries

# if you haven't run this code before, you'll need to download the below packages first
# instructions on how to do this are included in the video
# but as a reminder, you use the packages tab to the right

library(psych) # for the describe() command
library(expss) # for the cross_cases() command
## Loading required package: maditr
## 
## To get total summary skip 'by' argument: take_all(mtcars, mean)
## 
## Attaching package: 'maditr'
## The following object is masked from 'package:base':
## 
##     sort_by

Import Data

# Import our data for the lab
# For the homework, you will import the mydata.csv that we created in the Data Prep Lab

d2 <- read.csv(file="Data/mydata.csv", header = T)

Univariate Plots: Histograms & Tables

table(d2$race_rc) # the table command shows us what the levels of this variable are and how many participants are in each level
## 
##       asian       black    hispanic multiracial  nativeamer       other 
##         208         241         284         293          12          97 
##       white 
##        2011
table(d2$marriage5)
## 
##             are currently divorced from one another 
##                                                 733 
##                are currently married to one another 
##                                                2121 
##       never married each other and are not together 
##                                                 245 
## never married each other but are currently together 
##                                                  47
hist(d2$belong) # the hist command creates a histogram of the variable

hist(d2$efficacy)

hist(d2$socmeduse)

hist(d2$npi)

Univariate Normality

We analyzed the skew and kurtosis of our continuous variables and all were within the accepted range (-2/+2).

describe(d2) # we use this to check univariate normality... skew and kurtosis, (-2/+2)
##            vars    n  mean   sd median trimmed  mad  min max range  skew
## race_rc*      1 3146  5.54 2.12   7.00    5.88 0.00  1.0   7   6.0 -0.99
## marriage5*    2 3146  1.87 0.60   2.00    1.83 0.00  1.0   4   3.0  0.47
## belong        3 3146  3.23 0.61   3.30    3.25 0.59  1.3   5   3.7 -0.26
## efficacy      4 3146  3.13 0.45   3.10    3.13 0.44  1.1   4   2.9 -0.24
## socmeduse     5 3146 34.45 8.55  35.00   34.72 7.41 11.0  55  44.0 -0.31
## npi           6 3146  0.28 0.31   0.15    0.24 0.23  0.0   1   1.0  0.94
##            kurtosis   se
## race_rc*      -0.66 0.04
## marriage5*     1.49 0.01
## belong        -0.13 0.01
## efficacy       0.45 0.01
## socmeduse      0.27 0.15
## npi           -0.69 0.01

Bivariate Plots

Crosstabs

cross_cases(d2, race_rc, marriage5) # update variable2 and variable3 with your categorical variable names
 marriage5 
 are currently divorced from one another   are currently married to one another   never married each other and are not together   never married each other but are currently together 
 race_rc 
   asian  25 175 7 1
   black  65 102 66 8
   hispanic  65 171 38 10
   multiracial  80 176 34 3
   nativeamer  3 8 1
   other  23 61 10 3
   white  472 1428 90 21
   #Total cases  733 2121 245 47

Scatterplots

plot(d2$efficacy, d2$socmeduse,
     main="Scatterplot of efficacy and socmeduse",
     xlab = "efficacy",
     ylab = "socmeduse")

plot(d2$socmeduse, d2$npi,
     main="Scatterplot of socmeduse and npi",
     xlab = "socmeduse",
     ylab = "npi")

Boxplots

# boxplots use ONE CATEGORICAL and ONE CONTINUOUS variable
# make sure that you enter them in the right order!!!!!!!!!!!!!!!!!!!!!
# continuous variable goes BEFORE the tilde ~
# categorical variable goes AFTER the tilde !

boxplot(data=d2, belong~race_rc,
        main="Boxplot of race_rc and belong",
        xlab = "race_rc",
        ylab = "belong")

boxplot(data=d2, efficacy~race_rc,
        main="Boxplot of race_rc and efficacy",
        xlab = "race_rc",
        ylab = "efficacy")