# remember, you might need to install packages
library(psych) # for the describe() command
library(expss) # for the cross_cases() commandBasic Statistics Lab
Load Libraries
Load Data
# will need to update for homework, use mydata
d <- read.csv(file="Data/mydata.csv", header=T)
names(d)[1] "employment" "gender" "big5_neu" "big5_agr" "big5_open"
[6] "big5_ext"
Univariate Plots: Histograms & Tables
table(d$employment)
1 high school equivalent 2 college/university 3 employed
1147 33 435
4 unemployed 5 retired prefer not to say
79 4 24
table(d$gender)
female I use another term male Prefer not to say
1360 46 287 29
hist(d$big5_neu)hist(d$big5_agr)hist(d$big5_open)hist(d$big5_ext)Univariate Normality
Check skew and kurtosis.
describe(d) vars n mean sd median trimmed mad min max range skew
employment* 1 1722 1.74 1.13 1.00 1.56 0.00 1 6 5 1.33
gender* 2 1722 1.41 0.82 1.00 1.24 0.00 1 4 3 1.64
big5_neu 3 1722 4.40 1.51 4.67 4.46 1.48 1 7 6 -0.31
big5_agr 4 1722 4.98 1.11 5.00 5.02 0.99 1 7 6 -0.39
big5_open 5 1722 5.20 1.14 5.33 5.29 0.99 1 7 6 -0.72
big5_ext 6 1722 4.35 1.45 4.33 4.40 1.48 1 7 6 -0.25
kurtosis se
employment* 1.20 0.03
gender* 1.03 0.02
big5_neu -0.73 0.04
big5_agr -0.02 0.03
big5_open 0.43 0.03
big5_ext -0.76 0.03
Bivariate Plots
Crosstabs
cross_cases(d, big5_neu, employment)| employment | ||||||
|---|---|---|---|---|---|---|
| 1 high school equivalent | 2 college/university | 3 employed | 4 unemployed | 5 retired | prefer not to say | |
| big5_neu | ||||||
| 1 | 18 | 10 | 1 | 1 | ||
| 1.333333333 | 7 | 10 | 3 | 1 | ||
| 1.666666667 | 25 | 1 | 17 | 5 | ||
| 2 | 37 | 36 | 7 | |||
| 2.333333333 | 29 | 27 | 3 | 2 | ||
| 2.666666667 | 50 | 24 | 6 | 2 | ||
| 3 | 37 | 1 | 26 | 3 | 1 | |
| 3.333333333 | 47 | 1 | 39 | 2 | 1 | |
| 3.666666667 | 59 | 1 | 33 | 3 | 1 | |
| 4 | 68 | 1 | 45 | 3 | 1 | |
| 4.333333333 | 81 | 3 | 34 | 9 | 1 | 2 |
| 4.666666667 | 99 | 5 | 33 | 7 | ||
| 5 | 104 | 6 | 35 | 8 | 4 | |
| 5.333333333 | 117 | 2 | 19 | 6 | 4 | |
| 5.666666667 | 103 | 3 | 19 | 4 | 2 | |
| 6 | 75 | 1 | 13 | 2 | 1 | |
| 6.333333333 | 72 | 2 | 4 | 4 | ||
| 6.666666667 | 80 | 5 | 4 | 1 | 1 | |
| 7 | 39 | 1 | 7 | 3 | 2 | |
| #Total cases | 1147 | 33 | 435 | 79 | 4 | 24 |
Scatterplots
plot(d$big5_ext, d$big5_agr,
main="Scatterplot of Extroversion and Agreeableness",
xlab = "Extroversion",
ylab = "Agreeableness") plot(d$big5_neu, d$big5_agr,
main="Scatterplot of Neuroticism and Agreeableness",
xlab = "Neuroticism",
ylab = "Agreeableness") plot(d$big5_open, d$big5_agr,
main="Scatterplot of Openness and Agreeableness",
xlab = "Openness",
ylab = "Agreeableness") plot(d$big5_neu, d$big5_open,
main="Scatterplot of Neuroticism and Openness",
xlab = "Neuroticism",
ylab = "Openness") plot(d$big5_neu, d$big5_ext,
main="Scatterplot of Neuroticism and Extroversion",
xlab = "Neuroticism",
ylab = "Extroversion") plot(d$big5_ext, d$big5_open,
main="Scatterplot of Extroversion and Openness",
xlab = "Extroversion",
ylab = "Openness")Boxplots
# remember that continious varaible comes first, continious~categorical
boxplot(data=d, big5_ext~gender,
main="Boxplot of Extroversion and Gender",
xlab = "Extroversion",
ylab = "Gender")boxplot(data=d, big5_neu~employment,
main="Boxplot of Neuroticism and Employment",
xlab = "Neuroticism",
ylab = "Employment")Write-Up
We reviewed plots and descriptive statistics for our six chosen variables. All four of our continuous variables had skew and kurtosis within the accepted range (-2/+2), with only employment and gender reaching 1.