# remember, you might need to install packages
library(psych) # for the describe() command
library(expss) # for the cross_cases() commandBasic Statistics Lab
Load Libraries
Load Data
d <- read.csv(file="Data/mydata.csv", header=T)
names(d)[1] "age" "education" "edeq12" "brs" "pss" "rse"
Univariate Plots: Histograms & Tables
table(d$age) # UPDATE FOR HW
1 under 18 2 between 18 and 25 4 between 36 and 45 5 over 45
273 28 9 13
table(d$education)
1 equivalent to not completing high school
77
2 equivalent to high school completion
145
3 equivalent to vocational/technical program completion
3
4 equivalent to AP/IB completion
56
5 undergraduate degree
10
6 graduate degree or higher
6
prefer not to say
26
# CONTINUOUS VARS NEED HISTOS
hist(d$edeq12)hist(d$brs)hist(d$pss)hist(d$rse)Univariate Normality
Check skew and kurtosis.Cut offs are +2 and -2. If skew or kurtosis are higher/lower than these values, I need to mention it in my write up!!!!
describe(d) vars n mean sd median trimmed mad min max range skew kurtosis
age* 1 323 1.26 0.70 1.00 1.07 0.00 1 4 3 2.86 7.47
education* 2 323 2.69 1.75 2.00 2.39 1.48 1 7 6 1.23 0.56
edeq12 3 323 2.10 0.79 2.00 2.06 0.99 1 4 3 0.33 -0.97
brs 4 323 2.67 0.87 2.67 2.67 0.99 1 5 4 0.11 -0.64
pss 5 323 3.46 0.91 3.50 3.50 1.11 1 5 4 -0.41 -0.61
rse 6 323 2.25 0.68 2.10 2.22 0.74 1 4 3 0.35 -0.62
se
age* 0.04
education* 0.10
edeq12 0.04
brs 0.05
pss 0.05
rse 0.04
Bivariate Plots
Crosstabs
the 2 categoritcal data compared
cross_cases(d, age, education)| education | |||||||
|---|---|---|---|---|---|---|---|
| 1 equivalent to not completing high school | 2 equivalent to high school completion | 3 equivalent to vocational/technical program completion | 4 equivalent to AP/IB completion | 5 undergraduate degree | 6 graduate degree or higher | prefer not to say | |
| age | |||||||
| 1 under 18 | 77 | 142 | 2 | 27 | 25 | ||
| 2 between 18 and 25 | 2 | 1 | 25 | ||||
| 4 between 36 and 45 | 1 | 2 | 4 | 2 | |||
| 5 over 45 | 2 | 6 | 4 | 1 | |||
| #Total cases | 77 | 145 | 3 | 56 | 10 | 6 | 26 |
Scatterplots
plot(d$edeq12, d$brs,
main="Scatterplot of Eating Disorder Symptoms and Level of Resilience",
xlab = "Eating Disorder Symptoms",
ylab = "Level of Resilience")plot(d$edeq12, d$pss,
main="Scatterplot of Eating Disorder Symptoms and Stress",
xlab = "Eating Disorder Symptoms",
ylab = "Stress")plot(d$edeq12, d$rse,
main="Scatterplot of Eating Disorder Symptoms and Self-Esteem",
xlab = "Eating Disorder Symptoms",
ylab = "Self-Esteem")plot(d$brs, d$pss,
main="Scatterplot of Level of Resilience and Stress",
xlab = "Level of Resilience",
ylab = "Stress")plot(d$brs, d$rse,
main="Scatterplot of Level of Resilience and Self-Esteem",
xlab = "Level of Resilience",
ylab = "Self-Esteem")plot(d$pss, d$rse,
main="Scatterplot of Stress and Self-Esteem",
xlab = "Stress",
ylab = "Self-Esteem")Boxplots
#CATEGORICAL = X AND CONTINUOUS = Y
boxplot(data=d, edeq12~education,
main="Boxplot of Education Level of Eating Disorder Symptoms",
xlab = "Education",
ylab = "Eating Disorder Symptoms")boxplot(data=d, brs~education,
main="Boxplot of Resilience Level and Education Level",
xlab = "Resilience Level",
ylab = "Education Level")Write-Up
The most important things that I’ve done during this homework is begin comparing the data between my six different variables using charts such as histograms, scatter plots, box plots, and tables. All four of my continous variables had acceptable skew and kurtosis, as they were within the range of (-2,2).