Basic Statistics Lab

Load Libraries

# remember, you might need to install packages

library(psych) # for the describe() command
library(expss) # for the cross_cases() command

Load Data

# will need to update for homework, use mydata 
d <- read.csv(file="Data/mydata.csv", header=T)
names(d)
[1] "employment" "gender"     "big5_neu"   "big5_agr"   "big5_open" 
[6] "big5_ext"  

Univariate Plots: Histograms & Tables

table(d$employment)

1 high school equivalent     2 college/university               3 employed 
                    1147                       33                      435 
            4 unemployed                5 retired        prefer not to say 
                      79                        4                       24 
table(d$gender)

            female I use another term               male  Prefer not to say 
              1360                 46                287                 29 
hist(d$big5_neu)

hist(d$big5_agr)

hist(d$big5_open)

hist(d$big5_ext)

Univariate Normality

Check skew and kurtosis.

describe(d)
            vars    n mean   sd median trimmed  mad min max range  skew
employment*    1 1722 1.74 1.13   1.00    1.56 0.00   1   6     5  1.33
gender*        2 1722 1.41 0.82   1.00    1.24 0.00   1   4     3  1.64
big5_neu       3 1722 4.40 1.51   4.67    4.46 1.48   1   7     6 -0.31
big5_agr       4 1722 4.98 1.11   5.00    5.02 0.99   1   7     6 -0.39
big5_open      5 1722 5.20 1.14   5.33    5.29 0.99   1   7     6 -0.72
big5_ext       6 1722 4.35 1.45   4.33    4.40 1.48   1   7     6 -0.25
            kurtosis   se
employment*     1.20 0.03
gender*         1.03 0.02
big5_neu       -0.73 0.04
big5_agr       -0.02 0.03
big5_open       0.43 0.03
big5_ext       -0.76 0.03

Bivariate Plots

Crosstabs

cross_cases(d, big5_neu, employment)
 employment 
 1 high school equivalent   2 college/university   3 employed   4 unemployed   5 retired   prefer not to say 
 big5_neu 
   1  18 10 1 1
   1.333333333  7 10 3 1
   1.666666667  25 1 17 5
   2  37 36 7
   2.333333333  29 27 3 2
   2.666666667  50 24 6 2
   3  37 1 26 3 1
   3.333333333  47 1 39 2 1
   3.666666667  59 1 33 3 1
   4  68 1 45 3 1
   4.333333333  81 3 34 9 1 2
   4.666666667  99 5 33 7
   5  104 6 35 8 4
   5.333333333  117 2 19 6 4
   5.666666667  103 3 19 4 2
   6  75 1 13 2 1
   6.333333333  72 2 4 4
   6.666666667  80 5 4 1 1
   7  39 1 7 3 2
   #Total cases  1147 33 435 79 4 24

Scatterplots

 plot(d$big5_ext, d$big5_agr,
      main="Scatterplot of Extroversion and Agreeableness",
      xlab = "Extroversion",
      ylab = "Agreeableness")

 plot(d$big5_neu, d$big5_agr,
      main="Scatterplot of Neuroticism and Agreeableness",
      xlab = "Neuroticism",
      ylab = "Agreeableness")

  plot(d$big5_open, d$big5_agr,
      main="Scatterplot of Openness and Agreeableness",
      xlab = "Openness",
      ylab = "Agreeableness")

  plot(d$big5_neu, d$big5_open,
      main="Scatterplot of Neuroticism and Openness",
      xlab = "Neuroticism",
      ylab = "Openness")

   plot(d$big5_neu, d$big5_ext,
      main="Scatterplot of Neuroticism and Extroversion",
      xlab = "Neuroticism",
      ylab = "Extroversion")

   plot(d$big5_ext, d$big5_open,
      main="Scatterplot of Extroversion and Openness",
      xlab = "Extroversion",
      ylab = "Openness")

Boxplots

# remember that continious varaible comes first, continious~categorical 
boxplot(data=d, big5_ext~gender,
        main="Boxplot of Extroversion and Gender",
        xlab = "Extroversion",
        ylab = "Gender")

boxplot(data=d, big5_neu~employment,
        main="Boxplot of Neuroticism and Employment",
        xlab = "Neuroticism",
        ylab = "Employment")

Write-Up

We reviewed plots and descriptive statistics for our six chosen variables. All four of our continuous variables had skew and kurtosis within the accepted range (-2/+2), with only employment and gender reaching 1.