Basic Statistics

Load Libraries

# if you haven't run this code before, you'll need to download the below packages first
# instructions on how to do this are included in the video
# but as a reminder, you use the packages tab to the right

library(psych) # for the describe() command
library(expss) # for the cross_cases() command
## Loading required package: maditr
## 
## To select columns from data: columns(mtcars, mpg, vs:carb)
## 
## Attaching package: 'maditr'
## The following object is masked from 'package:base':
## 
##     sort_by
## 
## Use 'expss_output_viewer()' to display tables in the RStudio Viewer.
##  To return to the console output, use 'expss_output_default()'.

Import Data

#import our data for the lab 
#for the homework, you will import the mydata.csv that we created in the Data Prep Lab

d2 <- read.csv(file="Data/mydata.csv", header = T)

Univariate Plots: Histograms & Tables

table(d2$sibling) #table command shows the level of variable, and how many participants are in each level
## 
## at least one sibling           only child 
##                 2861                  304
table(d2$marriage5) 
## 
##             are currently divorced from one another 
##                                                 736 
##                are currently married to one another 
##                                                2132 
##       never married each other and are not together 
##                                                 250 
## never married each other but are currently together 
##                                                  47
hist(d2$efficacy) #hist command creates a histogram of the variable  

hist(d2$support)

hist(d2$swb)

hist(d2$idea)

Univariate Normality

We analyzed the skew and kurtosis of our continuous variables and all were within the accepted range (-2/+2).True in lab!

describe(d2) #check UN... skew and kurtosis, (-2/+2)
##            vars    n mean   sd median trimmed  mad min max range  skew kurtosis
## sibling*      1 3165 1.10 0.29   1.00    1.00 0.00   1   2     1  2.74     5.51
## marriage5*    2 3165 1.88 0.60   2.00    1.83 0.00   1   4     3  0.47     1.47
## efficacy      3 3165 3.12 0.45   3.10    3.13 0.44   1   4     3 -0.29     0.63
## support       4 3165 5.53 1.14   5.75    5.66 0.99   0   7     7 -1.12     1.49
## swb           5 3165 4.47 1.32   4.67    4.53 1.48   1   7     6 -0.36    -0.45
## idea          6 3165 3.57 0.38   3.62    3.62 0.37   1   4     3 -1.51     4.25
##              se
## sibling*   0.01
## marriage5* 0.01
## efficacy   0.01
## support    0.02
## swb        0.02
## idea       0.01

Bivariate Plots

Crosstabs

cross_cases(d2, sibling, marriage5) #update variable 2/3 with categorical variable names
 marriage5 
 are currently divorced from one another   are currently married to one another   never married each other and are not together   never married each other but are currently together 
 sibling 
   at least one sibling  670 1942 210 39
   only child  66 190 40 8
   #Total cases  736 2132 250 47

Scatterplots

plot(d2$support, d2$swb,
     main="Scatterplot of support and swb",
     xlab = "support",
     ylab = "swb")

plot(d2$support, d2$efficacy,
     main="Scatterplot of support and efficacy",
     xlab = "support",
     ylab = "efficacy")

Boxplots

#one categorical and one continuous variable
#enter in correct order!!! 
#categorical before ~
#continuous after ~ 

boxplot(data=d2, support~sibling,
        main="Boxplot of sibling and support",
        xlab = "sibling",
        ylab = "support")

boxplot(data=d2, swb~marriage5,
        main="Boxplot of marriage5 and swb",
        xlab = "marriage5",
        ylab = "swb")