# remember, you might need to install packages
library(psych) # for the describe() command
library(expss) # for the cross_cases() commandBasic Statistics HW
Load Libraries
Load Data
# WILL NEED TO UPDATE THIS FOR THE HOMEWORK!! USE MYDATA
d <- read.csv(file="Data/mydata.csv", header=T)
names(d)[1] "income" "edu" "swb" "efficacy" "exploit" "stress"
Univariate Plots: Histograms & Tables
table(d$income)
1 low 2 middle 3 high rather not say
879 880 535 854
table(d$edu)
1 High school diploma or less, and NO COLLEGE
58
2 Currently in college
2548
3 Completed some college, but no longer in college
34
4 Complete 2 year College degree
179
5 Completed Bachelors Degree
135
6 Currently in graduate education
134
7 Completed some graduate degree
60
# for continuous variables:
hist(d$swb)hist(d$efficacy)hist(d$exploit)hist(d$stress)Univariate Normality
Check skew and kurtosis.
describe(d) vars n mean sd median trimmed mad min max range skew kurtosis
income* 1 3148 2.43 1.16 2.00 2.42 1.48 1.0 4.0 3.0 0.15 -1.43
edu* 2 3148 2.50 1.25 2.00 2.17 0.00 1.0 7.0 6.0 2.21 3.78
swb 3 3148 4.47 1.32 4.67 4.53 1.48 1.0 7.0 6.0 -0.36 -0.45
efficacy 4 3148 3.13 0.45 3.10 3.13 0.44 1.1 4.0 2.9 -0.24 0.45
exploit 5 3148 2.38 1.37 2.00 2.21 1.48 1.0 7.0 6.0 0.94 0.35
stress 6 3148 3.05 0.60 3.00 3.05 0.59 1.3 4.7 3.4 0.03 -0.17
se
income* 0.02
edu* 0.02
swb 0.02
efficacy 0.01
exploit 0.02
stress 0.01
Bivariate Plots
Crosstabs
# (Dataframe, variable, variable)
# for categorical variables
cross_cases(d, income, edu)| edu | |||||||
|---|---|---|---|---|---|---|---|
| 1 High school diploma or less, and NO COLLEGE | 2 Currently in college | 3 Completed some college, but no longer in college | 4 Complete 2 year College degree | 5 Completed Bachelors Degree | 6 Currently in graduate education | 7 Completed some graduate degree | |
| income | |||||||
| 1 low | 20 | 633 | 14 | 74 | 55 | 65 | 18 |
| 2 middle | 10 | 727 | 14 | 44 | 37 | 25 | 23 |
| 3 high | 7 | 470 | 2 | 24 | 16 | 8 | 8 |
| rather not say | 21 | 718 | 4 | 37 | 27 | 36 | 11 |
| #Total cases | 58 | 2548 | 34 | 179 | 135 | 134 | 60 |
Scatterplots
# for continuous variables; all pairs
plot(d$swb, d$efficacy,
main="Scatterplot of Subjective Well-Being and Efficacy",
xlab = "Subjective Well-Being",
ylab = "Efficacy")plot(d$swb, d$exploit,
main="Scatterplot of Subjective Well-Being and Exploitativeness",
xlab = "Subjective Well-Being",
ylab = "Exploitativeness")plot(d$swb, d$stress,
main="Scatterplot of Subjective Well-Being and Stress",
xlab = "Subjective Well-Being",
ylab = "Stress")plot(d$efficacy, d$exploit,
main="Scatterplot of Efficacy and Exploitativeness",
xlab = "Efficacy",
ylab = "Exploitativeness")plot(d$efficacy, d$stress,
main="Scatterplot of Efficacy and Stress",
xlab = "Efficacy",
ylab = "Stress")plot(d$exploit, d$stress,
main="Scatterplot of Exploitativeness and Stress",
xlab = "Exploitativeness",
ylab = "Stress")Boxplots
# remember that continuous variable comes first, continuous~categorical
boxplot(data=d, efficacy~edu,
main="Boxplot of Efficacy and Education",
xlab = "Education",
ylab = "Efficacy")boxplot(data=d, exploit~income,
main="Boxplot of Exploitativeness and Income",
xlab = "Income",
ylab = "Exploitativeness")# Extra:
boxplot(data=d, swb~income,
main="Boxplot of Subjective Well-Being and Income",
xlab = "Income",
ylab = "Subjective Well-Being")Write-Up
We reviewed plots and descriptive statistics for our six chosen variables. All four of our continuous variables had skew and kurtosis within the accepted range (-2/+2).