Basic Statistics Lab

Load Libraries

# remember, you might need to install packages

library(psych) # for the describe() command
library(expss) # for the cross_cases() command

Load Data

d <- read.csv(file="Data/mydata.csv", header=T)
names(d)
[1] "age"       "education" "edeq12"    "brs"       "pss"       "rse"      

Univariate Plots: Histograms & Tables

table(d$age) # UPDATE FOR HW

         1 under 18 2 between 18 and 25 4 between 36 and 45           5 over 45 
                273                  28                   9                  13 
table(d$education)

             1 equivalent to not completing high school 
                                                     77 
                 2 equivalent to high school completion 
                                                    145 
3 equivalent to vocational/technical program completion 
                                                      3 
                       4 equivalent to AP/IB completion 
                                                     56 
                                 5 undergraduate degree 
                                                     10 
                            6 graduate degree or higher 
                                                      6 
                                      prefer not to say 
                                                     26 
# CONTINUOUS VARS NEED HISTOS
hist(d$edeq12)

hist(d$brs)

hist(d$pss)

hist(d$rse)

Univariate Normality

Check skew and kurtosis.Cut offs are +2 and -2. If skew or kurtosis are higher/lower than these values, I need to mention it in my write up!!!!

describe(d)
           vars   n mean   sd median trimmed  mad min max range  skew kurtosis
age*          1 323 1.26 0.70   1.00    1.07 0.00   1   4     3  2.86     7.47
education*    2 323 2.69 1.75   2.00    2.39 1.48   1   7     6  1.23     0.56
edeq12        3 323 2.10 0.79   2.00    2.06 0.99   1   4     3  0.33    -0.97
brs           4 323 2.67 0.87   2.67    2.67 0.99   1   5     4  0.11    -0.64
pss           5 323 3.46 0.91   3.50    3.50 1.11   1   5     4 -0.41    -0.61
rse           6 323 2.25 0.68   2.10    2.22 0.74   1   4     3  0.35    -0.62
             se
age*       0.04
education* 0.10
edeq12     0.04
brs        0.05
pss        0.05
rse        0.04

Bivariate Plots

Crosstabs

the 2 categoritcal data compared

cross_cases(d, age, education)
 education 
 1 equivalent to not completing high school   2 equivalent to high school completion   3 equivalent to vocational/technical program completion   4 equivalent to AP/IB completion   5 undergraduate degree   6 graduate degree or higher   prefer not to say 
 age 
   1 under 18  77 142 2 27 25
   2 between 18 and 25  2 1 25
   4 between 36 and 45  1 2 4 2
   5 over 45  2 6 4 1
   #Total cases  77 145 3 56 10 6 26

Scatterplots

plot(d$edeq12, d$brs,
     main="Scatterplot of Eating Disorder Symptoms and Level of Resilience",
     xlab = "Eating Disorder Symptoms",
     ylab = "Level of Resilience")

plot(d$edeq12, d$pss,
     main="Scatterplot of Eating Disorder Symptoms and Stress",
     xlab = "Eating Disorder Symptoms",
     ylab = "Stress")

plot(d$edeq12, d$rse,
     main="Scatterplot of Eating Disorder Symptoms and Self-Esteem",
     xlab = "Eating Disorder Symptoms",
     ylab = "Self-Esteem")

plot(d$brs, d$pss,
     main="Scatterplot of Level of Resilience and Stress",
     xlab = "Level of Resilience",
     ylab = "Stress")

plot(d$brs, d$rse,
     main="Scatterplot of Level of Resilience and Self-Esteem",
     xlab = "Level of Resilience",
     ylab = "Self-Esteem")

plot(d$pss, d$rse,
     main="Scatterplot of Stress and Self-Esteem",
     xlab = "Stress",
     ylab = "Self-Esteem")

Boxplots

#CATEGORICAL = X AND CONTINUOUS = Y
boxplot(data=d, edeq12~education,
        main="Boxplot of Education Level of Eating Disorder Symptoms",
        xlab = "Education",
        ylab = "Eating Disorder Symptoms")

boxplot(data=d, brs~education,
        main="Boxplot of Resilience Level and Education Level",
        xlab = "Resilience Level",
        ylab = "Education Level")

Write-Up

The most important things that I’ve done during this homework is begin comparing the data between my six different variables using charts such as histograms, scatter plots, box plots, and tables. All four of my continous variables had acceptable skew and kurtosis, as they were within the range of (-2,2).