Basic Statistics Lab

Load Libraries

# remember, you might need to install packages

library(psych) # for the describe() command
library(expss) # for the cross_cases() command

Load Data

#WILL NEED TO UPDATE THIS FOR THE HW!! USE MYDATA
 d <- read.csv(file="Data/mydata.csv", header=T)
 names(d)
[1] "gender"   "mhealth"  "iou"      "big5_neu" "big5_con" "mfq_26"  

Univariate Plots: Histograms & Tables

table(d$mhealth)

             anxiety disorder                       bipolar 
                          148                             7 
                   depression              eating disorders 
                           34                            33 
                   none or NA obsessive compulsive disorder 
                         1040                            32 
                        other                          ptsd 
                           39                            21 
table(d$gender)

            female I use another term               male  Prefer not to say 
              1070                 36                223                 25 
hist(d$big5_neu)

hist(d$big5_con)

hist(d$iou)

hist(d$mfq_26)

Univariate Normality

Check skew and kurtosis.

#skew cut off is betw -2 and +2
describe(d)
         vars    n mean   sd median trimmed  mad min max range  skew kurtosis
gender*     1 1354 1.41 0.83   1.00    1.24 0.00   1   4     3  1.65     1.08
mhealth*    2 1354 4.60 1.43   5.00    4.84 0.00   1   8     7 -1.43     2.26
iou         3 1354 2.58 0.92   2.44    2.52 0.99   1   5     4  0.48    -0.56
big5_neu    4 1354 4.38 1.52   4.67    4.43 1.48   1   7     6 -0.29    -0.78
big5_con    5 1354 4.80 1.19   4.67    4.83 1.48   1   7     6 -0.25    -0.30
mfq_26      6 1354 4.29 0.69   4.35    4.32 0.67   1   6     5 -0.47     0.68
           se
gender*  0.02
mhealth* 0.04
iou      0.02
big5_neu 0.04
big5_con 0.03
mfq_26   0.02

Bivariate Plots

Crosstabs

cross_cases(d, mhealth, gender)
 gender 
 female   I use another term   male   Prefer not to say 
 mhealth 
   anxiety disorder  115 8 23 2
   bipolar  3 1 3
   depression  31 1 2
   eating disorders  31 2
   none or NA  817 20 184 19
   obsessive compulsive disorder  27 4 1
   other  29 4 5 1
   ptsd  17 2 2
   #Total cases  1070 36 223 25

Scatterplots

plot(d$big5_neu, d$big5_con,
     main="Scatterplot of Neuroticism and Conscientiousness",
     xlab = "Neuroticism",
     ylab = "Conscientiousness")

plot(d$big5_neu, d$iou,
     main="Scatterplot of Neuroticism and Intolerance of Uncertainty",
     xlab = "Neuroticism",
     ylab = "Intolerance of Uncertainty")

plot(d$big5_neu, d$mfq_26,
     main="Scatterplot of Neuroticism and Mental Flexibility",
     xlab = "Neuroticism",
     ylab = "Mental Flexibility")

plot(d$big5_con, d$iou,
     main="Scatterplot of Conscientiousness and Intolerance of Uncertainty",
     xlab = "Conscientiousness",
     ylab = "Intolerance of Uncertainty")

plot(d$big5_con, d$mfq_26,
     main="Scatterplot of Conscientiousness and Mental Flexibility",
     xlab = "Conscientiousness",
     ylab = "Mental Flexibility")

plot(d$iou, d$mfq_26,
     main="Scatterplot of Intolerance of Uncertainty and Mental Flexibility",
     xlab = "Intolerance of Uncertainty",
     ylab = "Mental Flexibility")

Boxplots

#REMEMBER THAT CONTINIOUS VAR COMES FIRST, CONT~CATEG
boxplot(data=d, iou~gender,
        main="Boxplot of Intolerance of Uncertainty and Gender",
        xlab = "Gender",
        ylab = "Intolerance of Uncertainty")

boxplot(data=d, big5_neu~mhealth,
        main="Boxplot of Neuroticism and Mental Health diagnosis",
        xlab = "Mental Health Diagnosis",
        ylab = "Neuroticism")

Write-Up

We reviewed plots and descriptive statistics for our six chosen variables. All four of our continuous variables had skew and kurtosis within the accepted range (-2/+2).