Basic Statistics Lab

Load Libraries

# remember, you might need to install packages

library(psych) # for the describe() command
library(expss) # for the cross_cases() command

Load Data

# WILL NEED TO UPDATE THIS FOR THE HW!!! USE MYDATA
d <- read.csv(file="Data/mydata.csv", header=T)
names(d)
[1] "gender"    "age"       "big5_ext"  "pswq"      "covid_pos" "covid_neg"

Univariate Plots: Histograms & Tables

table(d$gender)

            female I use another term               male  Prefer not to say 
              1028                 28                199                 17 
table(d$age)

         1 under 18 2 between 18 and 25 3 between 26 and 35 4 between 36 and 45 
                837                  75                  12                 120 
          5 over 45 
                228 
hist(d$big5_ext)

hist(d$pswq)

hist(d$covid_pos)

hist(d$covid_neg)

Univariate Normality

Check skew and kurtosis.

describe(d)
          vars    n  mean   sd median trimmed  mad   min   max range  skew
gender*      1 1272  1.38 0.79   1.00    1.20 0.00  1.00  4.00  3.00  1.76
age*         2 1272  2.08 1.63   1.00    1.85 0.00  1.00  5.00  4.00  1.00
big5_ext     3 1272  4.37 1.45   4.33    4.41 1.48  1.00  7.00  6.00 -0.24
pswq         4 1272 -0.02 1.00   0.02   -0.02 1.17 -2.25  2.38  4.63 -0.08
covid_pos    5 1272  2.04 3.37   0.00    1.31 0.00  0.00 15.00 15.00  1.58
covid_neg    6 1272  1.20 1.87   0.00    0.84 0.00  0.00  8.00  8.00  1.31
          kurtosis   se
gender*       1.42 0.02
age*         -0.83 0.05
big5_ext     -0.78 0.04
pswq         -0.92 0.03
covid_pos     1.48 0.09
covid_neg     0.50 0.05

Bivariate Plots

Crosstabs

cross_cases(d, gender, age)
 age 
 1 under 18   2 between 18 and 25   3 between 26 and 35   4 between 36 and 45   5 over 45 
 gender 
   I use another term  24 3 1
   Prefer not to say  14 1 2
   female  646 66 10 109 197
   male  153 6 2 9 29
   #Total cases  837 75 12 120 228

Scatterplots

plot(d$big5_ext, d$pswq,
     main="Scatterplot of Extraversion and Worry",
     xlab = "Extraversion",
     ylab = "Worry")

plot(d$big5_ext, d$covid_pos,
     main="Scatterplot of Extraversion and Covid Positive",
     xlab = "Extraversion",
     ylab = "Covid Positive")

plot(d$big5_ext, d$covid_neg,
     main="Scatterplot of Extraversion and Covid Negative",
     xlab = "Extraversion",
     ylab = "Covid Negative")

plot(d$pswq, d$covid_pos,
     main="Scatterplot of Worry and Covid Positive",
     xlab = "Worry",
     ylab = "Covid Positive")

plot(d$pswq, d$covid_neg,
     main="Scatterplot of Worry and Covid Negative",
     xlab = "Worry",
     ylab = "Covid Negative")

plot(d$covid_pos, d$covid_neg,
     main="Scatterplot of Covid Positive and Covid Negative",
     xlab = "Covid Positive",
     ylab = "Covid Negative")

Boxplots

# remember that continuous variable comes first, CONTINUOUS~CATEGORICAL
boxplot(data=d, big5_ext~gender,
        main="Boxplot of Extraversion and Gender",
        xlab = "Gender",
        ylab = "Extraversion")

boxplot(data=d, pswq~age,
        main="Boxplot of Worry and Age",
        xlab = "Age",
        ylab = "Worry")

Write-Up

If skew and kurtosis are good: We reviewed plots and descriptive statistics for our six chosen variables. All four of our continuous variables had skew and kurtosis within the accepted range (-2/+2).