# remember, you might need to install packages
library(psych) # for the describe() command
library(expss) # for the cross_cases() commandBasic Statistics Lab
Load Libraries
Load Data
# WILL NEED TO UPDATE THIS FOR HW!! USE MYDATA
d <- read.csv(file="Data/mydata.csv", header=T)
names(d)[1] "exercise_cat" "sleep_hours" "gad" "pas_covid" "iou"
[6] "rse"
Univariate Plots: Histograms & Tables
table(d$exercise_cat)
1 less than 1 hour 2 1-2 hours 3 2-5 hours 4 5-8 hours
185 519 244 47
5 over 8 hours
44
table(d$sleep_hours)
1 < 5 hours 2 5-6 hours 3 7-8 hours 4 8-10 hours 5 > 10 hours
70 278 401 246 44
hist(d$gad)hist(d$pas_covid)hist(d$iou)hist(d$rse)Univariate Normality
Check skew and kurtosis.(-2 and 2)
describe(d) vars n mean sd median trimmed mad min max range skew
exercise_cat* 1 1039 2.27 0.95 2.00 2.18 1.48 1.00 5 4.00 0.94
sleep_hours* 2 1039 2.92 0.97 3.00 2.93 1.48 1.00 5 4.00 0.00
gad 3 1039 2.00 0.90 1.71 1.90 0.85 1.00 4 3.00 0.76
pas_covid 4 1039 3.21 0.68 3.22 3.22 0.66 1.22 5 3.78 -0.19
iou 5 1039 2.54 0.89 2.41 2.48 0.93 1.00 5 4.00 0.53
rse 6 1039 2.66 0.72 2.70 2.68 0.74 1.00 4 3.00 -0.28
kurtosis se
exercise_cat* 1.04 0.03
sleep_hours* -0.47 0.03
gad -0.56 0.03
pas_covid -0.03 0.02
iou -0.54 0.03
rse -0.65 0.02
Bivariate Plots
Crosstabs
cross_cases(d, exercise_cat, sleep_hours)| sleep_hours | |||||
|---|---|---|---|---|---|
| 1 < 5 hours | 2 5-6 hours | 3 7-8 hours | 4 8-10 hours | 5 > 10 hours | |
| exercise_cat | |||||
| 1 less than 1 hour | 12 | 44 | 66 | 50 | 13 |
| 2 1-2 hours | 29 | 129 | 215 | 123 | 23 |
| 3 2-5 hours | 24 | 78 | 83 | 53 | 6 |
| 4 5-8 hours | 3 | 14 | 20 | 9 | 1 |
| 5 over 8 hours | 2 | 13 | 17 | 11 | 1 |
| #Total cases | 70 | 278 | 401 | 246 | 44 |
Scatterplots
plot(d$gad, d$pas_covid,
main="Scatterplot of General Anxiety Disorder and Pandemic Anxiety Scale",
xlab = "General Anxiety Disorder",
ylab = "Pandemic Anxiety Scale")plot(d$gad, d$iou,
main="Scatterplot of General Anxiety Disorder and Intolerance of Uncertainty",
xlab = "General Anxiety Disorder",
ylab = "Intolerance of Uncertainty")plot(d$gad, d$rse,
main="Scatterplot of General Anxiety Disorder and Self-esteem",
xlab = "General Anxiety Disorder",
ylab = "Self-esteem")plot(d$pas_covid, d$iou,
main="Scatterplot of Pandemic Anxiety Scale and Intolerance of Uncertainty",
xlab = "Pandemic Anxiety Scale",
ylab = "Intolerance of Uncertainty")plot(d$pas_covid, d$rse,
main="Scatterplot of Pandemic Anxiety Scale and Rosenberg Self-esteem Inventory",
xlab = "Pandemic Anxiety Scale ",
ylab = "Self-esteem")plot(d$iou, d$rse,
main="Intolerance of Uncertainty and Self-esteem",
xlab = "Intolerance of Uncertainty",
ylab = "Self-esteem")Boxplots
#remember that continous variable comes first, CONTINUOUS~CATEGORICAL
boxplot(data=d, pas_covid~exercise_cat,
main="Boxplot of Pandemic Anxiety Scale and Hours of Exercise per day",
xlab = "Hours of exercise per day",
ylab = "Pandemic Anxiety Scale") boxplot(data=d, pas_covid~sleep_hours,
main="Boxplot of Pandemic Anxiety Scale and Hours of Sleep",
xlab = "Hours of Sleep",
ylab = "Pandemic Anxiety Scale")Write-Up
If skew and kurtosis are good: We reviewed plots and descriptive statistics for our six chosen variables (sleep_hours,exercise_cat, gad, pas_covid, iou, rse). All four of our continuous variables (gad, pas_covid, iou, rse) had skew and kurtosis within the accepted range (-2/+2).