Basic Statistics

Load Libraries

# if you haven't run this code before, you'll need to download the below packages first
# instructions on how to do this are included in the video
# but as a reminder, you use the packages tab to the right

library(psych) # for the describe() command
library(expss) # for the cross_cases() command
## Loading required package: maditr
## 
## To select rows from data: rows(mtcars, am==0)
## 
## Attaching package: 'maditr'
## The following object is masked from 'package:base':
## 
##     sort_by
## 
## Use 'expss_output_rnotebook()' to display tables inside R Notebooks.
##  To return to the console output, use 'expss_output_default()'.

Import Data

# import our data for the lab
# for the homework, you will import the mydata.csv that we created in the Data Prep Lab

d2 <- read.csv(file="Data/mydata.csv", header = T)

Univariate Plots: Histograms & Tables

table(d2$age) #the table command shows us what the levels of this variable are, and how many participants are in each level
## 
## 1 between 18 and 25 2 between 26 and 35 3 between 36 and 45           4 over 45 
##                1928                 110                  37                  17
table(d2$income)
## 
##          1 low       2 middle         3 high rather not say 
##            582            604            339            567
hist(d2$moa_independence) #the hist command creates a histogram of the variable

hist(d2$swb)

hist(d2$efficacy)

hist(d2$stress)

Univariate Normality

We analyzed the skew and kurtosis of our continuous variables and all were within the accepted range (-2/+2).

We analyzed the skew and kurtosis of our … and most were within the accepted range (-2/+2). However, some variables (age) were outside of the accepted range. For this analysis, we will use them anyway, but outside of this class this is bad practice.

describe(d2) #we use this to check univariate normality... skew and kurtosis, (-2/+2).
##                  vars    n mean   sd median trimmed  mad min max range  skew
## age*                1 2092 1.11 0.43   1.00    1.00 0.00 1.0 4.0   3.0  4.44
## income*             2 2092 2.43 1.16   2.00    2.41 1.48 1.0 4.0   3.0  0.17
## moa_independence    3 2092 3.54 0.47   3.67    3.61 0.49 1.0 4.0   3.0 -1.49
## swb                 4 2092 4.43 1.33   4.50    4.49 1.48 1.0 7.0   6.0 -0.36
## efficacy            5 2092 3.11 0.44   3.10    3.12 0.44 1.2 4.0   2.8 -0.20
## stress              6 2092 3.07 0.60   3.10    3.07 0.59 1.3 4.6   3.3 -0.02
##                  kurtosis   se
## age*                21.31 0.01
## income*             -1.43 0.03
## moa_independence     2.75 0.01
## swb                 -0.49 0.03
## efficacy             0.39 0.01
## stress              -0.15 0.01

Bivariate Plots

Crosstabs

cross_cases(d2, age, income) #update variable2 and variable3 with your categorical variable names
 income 
 1 low   2 middle   3 high   rather not say 
 age 
   1 between 18 and 25  514 545 322 547
   2 between 26 and 35  52 36 5 17
   3 between 36 and 45  11 15 9 2
   4 over 45  5 8 3 1
   #Total cases  582 604 339 567

Scatterplots

plot(d2$stress, d2$efficacy,
     main="Scatterplot of stress and efficacy",
     xlab = "stress",
     ylab = "efficacy")

plot(d2$moa_independence, d2$swb,
     main="Scatterplot of moa_independence and swb",
     xlab = "moa_independence",
     ylab = "swb")

Boxplots

# box plots use one categorical and one continuous variable
# make sure that you enter them in the right order!
#categorical variable goes before tilde
#continous variable goes after the tilde

boxplot(data=d2,stress~income,
        main="Boxplot of stress and income",
        xlab = "income",
        ylab = "stress")

boxplot(data=d2, swb~age,
        main="Boxplot of swb and age",
        xlab = "age",
        ylab = "swb")