# remember, you might need to install packages
library(psych) # for the describe() command
library(expss) # for the cross_cases() command
Basic Statistics Lab
Load Libraries
Load Data
# WILL NEED TO UPDATE THIS FOR THE HW!!! USE MYDATA
<- read.csv(file="Data/mydata.csv", header=T)
d names(d)
[1] "gender" "age" "big5_ext" "pswq" "covid_pos" "covid_neg"
Univariate Plots: Histograms & Tables
table(d$gender)
female I use another term male Prefer not to say
1028 28 199 17
table(d$age)
1 under 18 2 between 18 and 25 3 between 26 and 35 4 between 36 and 45
837 75 12 120
5 over 45
228
hist(d$big5_ext)
hist(d$pswq)
hist(d$covid_pos)
hist(d$covid_neg)
Univariate Normality
Check skew and kurtosis.
describe(d)
vars n mean sd median trimmed mad min max range skew
gender* 1 1272 1.38 0.79 1.00 1.20 0.00 1.00 4.00 3.00 1.76
age* 2 1272 2.08 1.63 1.00 1.85 0.00 1.00 5.00 4.00 1.00
big5_ext 3 1272 4.37 1.45 4.33 4.41 1.48 1.00 7.00 6.00 -0.24
pswq 4 1272 -0.02 1.00 0.02 -0.02 1.17 -2.25 2.38 4.63 -0.08
covid_pos 5 1272 2.04 3.37 0.00 1.31 0.00 0.00 15.00 15.00 1.58
covid_neg 6 1272 1.20 1.87 0.00 0.84 0.00 0.00 8.00 8.00 1.31
kurtosis se
gender* 1.42 0.02
age* -0.83 0.05
big5_ext -0.78 0.04
pswq -0.92 0.03
covid_pos 1.48 0.09
covid_neg 0.50 0.05
Bivariate Plots
Crosstabs
cross_cases(d, gender, age)
age | |||||
---|---|---|---|---|---|
1 under 18 | 2 between 18 and 25 | 3 between 26 and 35 | 4 between 36 and 45 | 5 over 45 | |
gender | |||||
I use another term | 24 | 3 | 1 | ||
Prefer not to say | 14 | 1 | 2 | ||
female | 646 | 66 | 10 | 109 | 197 |
male | 153 | 6 | 2 | 9 | 29 |
#Total cases | 837 | 75 | 12 | 120 | 228 |
Scatterplots
plot(d$big5_ext, d$pswq,
main="Scatterplot of Extraversion and Worry",
xlab = "Extraversion",
ylab = "Worry")
plot(d$big5_ext, d$covid_pos,
main="Scatterplot of Extraversion and Covid Positive",
xlab = "Extraversion",
ylab = "Covid Positive")
plot(d$big5_ext, d$covid_neg,
main="Scatterplot of Extraversion and Covid Negative",
xlab = "Extraversion",
ylab = "Covid Negative")
plot(d$pswq, d$covid_pos,
main="Scatterplot of Worry and Covid Positive",
xlab = "Worry",
ylab = "Covid Positive")
plot(d$pswq, d$covid_neg,
main="Scatterplot of Worry and Covid Negative",
xlab = "Worry",
ylab = "Covid Negative")
plot(d$covid_pos, d$covid_neg,
main="Scatterplot of Covid Positive and Covid Negative",
xlab = "Covid Positive",
ylab = "Covid Negative")
Boxplots
# remember that continuous variable comes first, CONTINUOUS~CATEGORICAL
boxplot(data=d, big5_ext~gender,
main="Boxplot of Extraversion and Gender",
xlab = "Gender",
ylab = "Extraversion")
boxplot(data=d, pswq~age,
main="Boxplot of Worry and Age",
xlab = "Age",
ylab = "Worry")
Write-Up
If skew and kurtosis are good: We reviewed plots and descriptive statistics for our six chosen variables. All four of our continuous variables had skew and kurtosis within the accepted range (-2/+2).