Basic Statistics Lab

Load Libraries

# remember, you might need to install packages

library(psych) # for the describe() command
library(expss) # for the cross_cases() command

Load Data

# WILL NEED TO UPDATE THIS FOR THE HW!! USE MYDATA
d <- read.csv(file="Data/mydata.csv", header=T)
 names(d)
[1] "gender"    "mhealth"   "big5_open" "pswq"      "mfq_state" "brs"      

Univariate Plots: Histograms & Tables

 table(d$gender)

            female I use another term               male  Prefer not to say 
               319                 23                 56                 10 
table(d$mhealth)

             anxiety disorder                       bipolar 
                           52                             3 
                   depression              eating disorders 
                           12                            21 
                   none or NA obsessive compulsive disorder 
                          273                            15 
                        other                          ptsd 
                           22                            10 
hist(d$big5_open)

hist(d$pswq)

hist(d$mfq_state)

hist(d$brs)

Univariate Normality

Check skew and kurtosis.

describe(d)
          vars   n mean   sd median trimmed  mad   min  max range  skew
gender*      1 408 1.40 0.81   1.00    1.23 0.00  1.00 4.00  3.00  1.75
mhealth*     2 408 4.58 1.60   5.00    4.71 0.00  1.00 8.00  7.00 -1.01
big5_open    3 408 5.29 1.12   5.33    5.38 0.99  1.00 7.00  6.00 -0.78
pswq         4 408 0.27 0.95   0.38    0.32 0.98 -2.25 2.02  4.27 -0.45
mfq_state    5 408 3.73 1.01   3.75    3.76 1.11  1.00 6.00  5.00 -0.29
brs          6 408 2.69 0.88   2.67    2.69 0.99  1.00 5.00  4.00  0.07
          kurtosis   se
gender*       1.60 0.04
mhealth*      1.04 0.08
big5_open     0.61 0.06
pswq         -0.63 0.05
mfq_state    -0.28 0.05
brs          -0.68 0.04

Bivariate Plots

Crosstabs

cross_cases(d, gender, mhealth)
 mhealth 
 anxiety disorder   bipolar   depression   eating disorders   none or NA   obsessive compulsive disorder   other   ptsd 
 gender 
   I use another term  4 1 14 3 1
   Prefer not to say  1 2 6 1
   female  38 1 10 19 210 14 18 9
   male  9 1 2 43 1
   #Total cases  52 3 12 21 273 15 22 10

Scatterplots

plot(d$big5_open, d$pswq,
     main="Scatterplot of Openness and Worry",
     xlab = "Openness",
     ylab = "Worry")

plot(d$big5_open, d$mfq_state,
     main="Scatterplot of Openness and Mental Flexibility (State)",
     xlab = "Openness",
     ylab = "Mental Flexibility (State)")

plot(d$big5_open, d$brs,
     main="Scatterplot of Openness and Resilience",
     xlab = "Openness",
     ylab = "Resilience")

plot(d$pswq, d$mfq_state,
     main="Scatterplot of Worry and Mental Flexibility (State)",
     xlab = "Worry",
     ylab = "Mental Flexibility (State)")

plot(d$pswq, d$brs,
     main="Scatterplot of Worry and Resilience",
     xlab = "Worry",
     ylab = "Resilience")

plot(d$mfq_state, d$brs,
     main="Scatterplot of Mental Flexibility (State) and Resilience",
     xlab = "Mental Flexibility (State)",
     ylab = "Resilience")

Boxplots

# remember that continuous variable comes first, CONTINUOUS~CATEGORICAL
boxplot(data=d, pswq~gender,
        main="Boxplot of Worry and Gender Identity",
        xlab = "Gender Identity",
        ylab = "Worry")

boxplot(data=d, pswq~mhealth,
        main="Boxplot of Worry and Mental Health Diagnosis",
        xlab = "Mental Health Diagnosis",
        ylab = "Worry")

Write-Up

We reviewed plots and descriptive statistics for our six chosen variables. All four of our continuous variables had skew and kurtosis within the accepted range (-2/+2).