Basic Statistics Lab

Load Libraries

# remember, you might need to install packages

library(psych) # for the describe() command
library(expss) # for the cross_cases() command

Load Data

d <- read.csv(file="Data/mydata.csv", header=T)
names(d)
[1] "age"      "gender"   "swb"      "efficacy" "support"  "stress"  

Univariate Plots: Histograms & Tables

table(d$age) # UPDATE FOR HW!!!!!

1 between 18 and 25 2 between 26 and 35 3 between 36 and 45           4 over 45 
               1991                 116                  38                  18 
table(d$gender)

   f    m   nb 
1585  547   31 
hist(d$swb)

hist(d$efficacy)

hist(d$support)

hist(d$stress)

Univariate Normality

Check skew and kurtosis. Cutoffs are -2 to +2; if skew or kurtosis are higher or lower than these values, I need to mention it in my writeup!!!!!

describe(d)
         vars    n mean   sd median trimmed  mad min max range  skew kurtosis
age*        1 2163 1.11 0.43   1.00    1.00 0.00 1.0 4.0   3.0  4.41    21.10
gender*     2 2163 1.28 0.48   1.00    1.21 0.00 1.0 3.0   2.0  1.35     0.69
swb         3 2163 4.43 1.33   4.50    4.49 1.48 1.0 7.0   6.0 -0.35    -0.49
efficacy    4 2163 3.11 0.44   3.10    3.12 0.44 1.2 4.0   2.8 -0.19     0.36
support     5 2163 5.53 1.14   5.75    5.65 0.99 0.0 7.0   7.0 -1.09     1.34
stress      6 2163 3.07 0.60   3.10    3.07 0.59 1.3 4.6   3.3 -0.01    -0.15
           se
age*     0.01
gender*  0.01
swb      0.03
efficacy 0.01
support  0.02
stress   0.01

Bivariate Plots

Crosstabs

cross_cases(d, gender, age)
 age 
 1 between 18 and 25   2 between 26 and 35   3 between 36 and 45   4 over 45 
 gender 
   f  1475 70 28 12
   m  486 46 9 6
   nb  30 1
   #Total cases  1991 116 38 18

Scatterplots

plot(d$swb, d$efficacy,
     main="Scatterplot of Satisfaction with Life and Efficacy",
     xlab = "Satisfaction with Life",
     ylab = "Efficacy")

plot(d$swb, d$support,
     main="Scatterplot of Satisfaction with Life and Support",
     xlab = "Satisfaction with Life",
     ylab = "Support")

plot(d$swb, d$stress,
     main="Scatterplot of Satisfaction with Life and Stress",
     xlab = "Satisfaction with Life",
     ylab = "Stress")

plot(d$efficacy, d$support,
     main="Scatterplot of Efficacy and Support",
     xlab = "Efficacy",
     ylab = "Support")

plot(d$efficacy, d$stress,
     main="Scatterplot of Efficacy and Stress",
     xlab = "Efficacy",
     ylab = "Stress")

plot(d$support, d$stress,
     main="Scatterplot of Support and Stress",
     xlab = "Support",
     ylab = "Stress")

Boxplots

boxplot(data=d, support~gender,
        main="Boxplot of Support and Gender Identity",
        xlab = "Support",
        ylab = "Gender Identity")

boxplot(data=d, support~age,
        main="Boxplot of Support and Age",
        xlab = "Support",
        ylab = "Age")

Write-Up

We reviewed plots and descriptive statistics for our six chosen variables. All four of our continuous variables had skew and kurtosis within the accepted range (-2/+2).