Basic Statistics

Load Libraries

# if you haven't run this code before, you'll need to download the below packages first
# instructions on how to do this are included in the video
# but as a reminder, you use the packages tab to the right

library(psych) # for the describe() command
library(expss) # for the cross_cases() command
## Loading required package: maditr
## 
## To aggregate all non-grouping columns: take_all(mtcars, mean, by = am)
## 
## Attaching package: 'maditr'
## The following object is masked from 'package:base':
## 
##     sort_by

Import Data

# import our data for the lab
# for the homework, you will import the mydaya.csv that we created in the Data Prep Lab

d2 <- read.csv(file="Data/mydata.csv", header = T)

Univariate Plots: Histograms & Tables

table(d2$gender) #the table command shows us what the levels of this variable are, and how many participants are in each level
## 
##    f    m   nb 
## 2298  781   54
table(d2$sibling)
## 
## at least one sibling           only child 
##                 2832                  301
hist(d2$moa_maturity) #the hist command creates a histogram of the variable

hist(d2$support)

hist(d2$socmeduse)

hist(d2$stress)

Univariate Normality

We analyzed the skew and kurtosis of our continuous variables and most were within the accepted range (-2/+2). However, some variables (sibling) were outside of the accepted range. For this analysis, we will use them anyway, but outside of this class this is bad practice.

describe(d2) 
##              vars    n  mean   sd median trimmed  mad  min  max range  skew
## gender*         1 3133  1.28 0.49   1.00    1.21 0.00  1.0  3.0   2.0  1.40
## sibling*        2 3133  1.10 0.29   1.00    1.00 0.00  1.0  2.0   1.0  2.74
## moa_maturity    3 3133  3.59 0.43   3.67    3.65 0.49  1.0  4.0   3.0 -1.20
## support         4 3133  5.54 1.13   5.75    5.66 0.99  0.0  7.0   7.0 -1.10
## socmeduse       5 3133 34.48 8.56  35.00   34.75 7.41 11.0 55.0  44.0 -0.31
## stress          6 3133  3.05 0.60   3.00    3.05 0.59  1.3  4.7   3.4  0.03
##              kurtosis   se
## gender*          0.89 0.01
## sibling*         5.51 0.01
## moa_maturity     1.87 0.01
## support          1.45 0.02
## socmeduse        0.26 0.15
## stress          -0.17 0.01
#we use this to check univariate normality... skew and kurtosis (-2/+2)

Bivariate Plots

Crosstabs

cross_cases(d2, gender, sibling) # update variables 2 and 3 for the homework (categorical variable names)
 sibling 
 at least one sibling   only child 
 gender 
   f  2083 215
   m  700 81
   nb  49 5
   #Total cases  2832 301

Scatterplots

plot(d2$moa_maturity, d2$support,
     main="Scatterplot of Maturity & Support",
     xlab = "Maturity",
     ylab = "Support")

plot(d2$socmeduse, d2$stress,
     main="Scatterplot of Social Media Use & Stress",
     xlab = "Social Media Use",
     ylab = "Stress")

Boxplots

#box plots use ONE CONTINUOUS and ONE CATEGORICAL variable
#make sure you enter them in the right order!!!!
#categorical goes BEFORE the tilde~
#continuous variable goes After the tilde!

boxplot(data=d2, socmeduse~gender,
        main="Boxplot of Social Media Use & Gender",
        xlab = "Gender",
        ylab = "Social Media Use")

boxplot(data=d2, stress~sibling,
        main="Boxplot of Stress & Siblings",
        xlab = "Siblings",
        ylab = "Stress")