Basic Statistics HW

Load Libraries

# remember, you might need to install packages

library(psych) # for the describe() command
library(expss) # for the cross_cases() command

Load Data

d <- read.csv(file="Data/mydata.csv", header=T)
names(d)
[1] "sexual_orientation" "mhealth"            "support"           
[4] "swemws"             "rse"                "iou"               

Univariate Plots: Histograms & Tables

table(d$mhealth)

             anxiety disorder                       bipolar 
                          117                             5 
                   depression              eating disorders 
                           29                            29 
                   none or NA obsessive compulsive disorder 
                          883                            22 
                        other                          ptsd 
                           34                            21 
table(d$sexual_orientation)

              Asexual                    Bi           Gay/Lesbian 
                   31                   144                    49 
Heterosexual/Straight    I use another term     Prefer not to say 
                  805                    34                    77 
hist(d$support)

hist(d$swemws)

hist(d$rse)

hist(d$iou)

Univariate Normality

Check skew and kurtosis.

describe(d)
                    vars    n mean   sd median trimmed  mad min max range  skew
sexual_orientation*    1 1140 3.79 1.02   4.00    3.81 0.00   1   6     5 -0.50
mhealth*               2 1140 4.63 1.41   5.00    4.88 0.00   1   8     7 -1.41
support                3 1140 3.57 0.96   3.67    3.62 0.99   1   5     4 -0.43
swemws                 4 1140 3.14 0.85   3.14    3.17 0.85   1   5     4 -0.26
rse                    5 1140 2.63 0.72   2.70    2.65 0.74   1   4     3 -0.24
iou                    6 1140 2.56 0.91   2.41    2.50 0.99   1   5     4  0.51
                    kurtosis   se
sexual_orientation*     1.21 0.03
mhealth*                2.52 0.04
support                -0.57 0.03
swemws                 -0.29 0.03
rse                    -0.74 0.02
iou                    -0.62 0.03

Bivariate Plots

Crosstabs

cross_cases(d, mhealth, sexual_orientation)
 sexual_orientation 
 Asexual   Bi   Gay/Lesbian   Heterosexual/Straight   I use another term   Prefer not to say 
 mhealth 
   anxiety disorder  3 24 11 69 3 7
   bipolar  2 1 1 1
   depression  6 21 2
   eating disorders  9 1 15 1 3
   none or NA  24 80 30 663 27 59
   obsessive compulsive disorder  2 4 2 13 1
   other  2 13 3 10 6
   ptsd  6 1 13 1
   #Total cases  31 144 49 805 34 77

Scatterplots

plot(d$support, d$swemws,
     main="Scatterplot of Social Support and Mental Well-Being",
     xlab = "Social Support",
     ylab = "Mental Well-being")

plot(d$support, d$rse,
     main="Scatterplot of Social Support and Self-Esteem",
     xlab = "Social Support",
     ylab = "Self-Esteem")

plot(d$support, d$iou,
     main="Scatterplot of Social Support and Intolerance of Uncertainty",
     xlab = "Social Support",
     ylab = "Intolerance of Uncertainty")

plot(d$swemws, d$rse,
     main="Scatterplot of Mental Well-Being and Self-Esteem",
     xlab = "Mental Well-Being",
     ylab = "Self-Esteem")

plot(d$swemws, d$iou,
     main="Scatterplot of Mental Well-Being and Intolerance of Uncertainty",
     xlab = "Mental Well-Being",
     ylab = "Intolerance of Uncertainty")

plot(d$rse, d$iou,
     main="Scatterplot of Self-Esteem and Intolerance of Uncertainty",
     xlab = "Self-Esteem",
     ylab = "Intolerance of Uncertainty")

Boxplots

# rememeber that continuous variable comes first, CONTINUOUS~CATEGORICAL
boxplot(data=d, support~sexual_orientation,
        main="Boxplot of Social Support and Sexual Orientation",
        xlab = "Sexual Orientation",
        ylab = "Social Support")

boxplot(data=d, rse~mhealth,
        main="Boxplot of Self-Esteem and Mental Health Diagnosis",
        xlab = "Mental Health Diagnosis",
        ylab = "Self-Esteem")

Write-Up

We reviewed plots and descriptive statistics for our six chosen variables. All four of our continuous variables had skew and kurtosis within the accepted range (-2/+2).