Basic Statistics Lab

Load Libraries

# remember, you might need to install packages

library(psych) # for the describe() command
library(expss) # for the cross_cases() command

Load Data

# WILL NEED TO UPDATE THIS FOR HW!! USE MYDATA
 d <- read.csv(file="Data/mydata.csv", header=T)
names(d)
[1] "exercise_cat" "sleep_hours"  "gad"          "pas_covid"    "iou"         
[6] "rse"         

Univariate Plots: Histograms & Tables

table(d$exercise_cat)

1 less than 1 hour        2 1-2 hours        3 2-5 hours        4 5-8 hours 
               185                519                244                 47 
    5 over 8 hours 
                44 
table(d$sleep_hours)

 1 < 5 hours  2 5-6 hours  3 7-8 hours 4 8-10 hours 5 > 10 hours 
          70          278          401          246           44 
hist(d$gad)

hist(d$pas_covid)

hist(d$iou)

hist(d$rse)

Univariate Normality

Check skew and kurtosis.(-2 and 2)

describe(d)
              vars    n mean   sd median trimmed  mad  min max range  skew
exercise_cat*    1 1039 2.27 0.95   2.00    2.18 1.48 1.00   5  4.00  0.94
sleep_hours*     2 1039 2.92 0.97   3.00    2.93 1.48 1.00   5  4.00  0.00
gad              3 1039 2.00 0.90   1.71    1.90 0.85 1.00   4  3.00  0.76
pas_covid        4 1039 3.21 0.68   3.22    3.22 0.66 1.22   5  3.78 -0.19
iou              5 1039 2.54 0.89   2.41    2.48 0.93 1.00   5  4.00  0.53
rse              6 1039 2.66 0.72   2.70    2.68 0.74 1.00   4  3.00 -0.28
              kurtosis   se
exercise_cat*     1.04 0.03
sleep_hours*     -0.47 0.03
gad              -0.56 0.03
pas_covid        -0.03 0.02
iou              -0.54 0.03
rse              -0.65 0.02

Bivariate Plots

Crosstabs

cross_cases(d, exercise_cat, sleep_hours)
 sleep_hours 
 1 < 5 hours   2 5-6 hours   3 7-8 hours   4 8-10 hours   5 > 10 hours 
 exercise_cat 
   1 less than 1 hour  12 44 66 50 13
   2 1-2 hours  29 129 215 123 23
   3 2-5 hours  24 78 83 53 6
   4 5-8 hours  3 14 20 9 1
   5 over 8 hours  2 13 17 11 1
   #Total cases  70 278 401 246 44

Scatterplots

 plot(d$gad, d$pas_covid,
      main="Scatterplot of General Anxiety Disorder and Pandemic Anxiety Scale",
      xlab = "General Anxiety Disorder",
      ylab = "Pandemic Anxiety Scale")

plot(d$gad, d$iou,
      main="Scatterplot of General Anxiety Disorder and Intolerance of Uncertainty",
      xlab = "General Anxiety Disorder",
      ylab = "Intolerance of Uncertainty")

plot(d$gad, d$rse,
      main="Scatterplot of General Anxiety Disorder and Self-esteem",
      xlab = "General Anxiety Disorder",
      ylab = "Self-esteem")

plot(d$pas_covid, d$iou,
      main="Scatterplot of Pandemic Anxiety Scale and Intolerance of Uncertainty",
      xlab = "Pandemic Anxiety Scale",
      ylab = "Intolerance of Uncertainty")

plot(d$pas_covid, d$rse,
      main="Scatterplot of Pandemic Anxiety Scale and Rosenberg Self-esteem Inventory",
      xlab = "Pandemic Anxiety Scale ",
      ylab = "Self-esteem")

plot(d$iou, d$rse,
      main="Intolerance of Uncertainty and Self-esteem",
      xlab = "Intolerance of Uncertainty",
      ylab = "Self-esteem")

Boxplots

#remember that continous variable comes first, CONTINUOUS~CATEGORICAL
 boxplot(data=d, pas_covid~exercise_cat,
         main="Boxplot of Pandemic Anxiety Scale and Hours of Exercise per day",
         xlab = "Hours of exercise per day",
         ylab = "Pandemic Anxiety Scale")

 boxplot(data=d, pas_covid~sleep_hours,
         main="Boxplot of Pandemic Anxiety Scale and Hours of Sleep",
         xlab = "Hours of Sleep",
         ylab = "Pandemic Anxiety Scale")

Write-Up

If skew and kurtosis are good: We reviewed plots and descriptive statistics for our six chosen variables (sleep_hours,exercise_cat, gad, pas_covid, iou, rse). All four of our continuous variables (gad, pas_covid, iou, rse) had skew and kurtosis within the accepted range (-2/+2).