Basic Statistics Homework

Load Libraries

# remember, you might need to install packages

library(psych) # for the describe() command
library(expss) # for the cross_cases() command

Load Data

d <- read.csv(file="Data/mydata.csv", header=T)
names(d)
[1] "income"     "edu"        "swb"        "moa_safety" "stress"    
[6] "pipwd"     

Univariate Plots: Histograms & Tables

table(d$income)

         1 low       2 middle         3 high rather not say 
           479            434            243            436 
table(d$edu)

     1 High school diploma or less, and NO COLLEGE 
                                                33 
                            2 Currently in college 
                                              1306 
3 Completed some college, but no longer in college 
                                                17 
                  4 Complete 2 year College degree 
                                                95 
                      5 Completed Bachelors Degree 
                                                62 
                 6 Currently in graduate education 
                                                52 
                  7 Completed some graduate degree 
                                                27 
hist(d$swb)

hist(d$moa_safety)

hist(d$stress)

hist(d$pipwd)

Univariate Normality

Check skew and kurtosis. Cutoffs are -2 to +2

describe(d)
           vars    n mean   sd median trimmed  mad  min max range  skew
income*       1 1592 2.40 1.18   2.00    2.37 1.48 1.00 4.0  3.00  0.19
edu*          2 1592 2.44 1.17   2.00    2.13 0.00 1.00 7.0  6.00  2.38
swb           3 1592 4.33 1.35   4.50    4.38 1.48 1.00 7.0  6.00 -0.32
moa_safety    4 1592 3.21 0.64   3.25    3.27 0.74 1.00 4.0  3.00 -0.72
stress        5 1592 3.12 0.61   3.10    3.12 0.59 1.40 4.7  3.30  0.02
pipwd         6 1592 2.93 0.56   3.00    2.93 0.40 1.13 5.0  3.87  0.12
           kurtosis   se
income*       -1.46 0.03
edu*           4.80 0.03
swb           -0.50 0.03
moa_safety     0.04 0.02
stress        -0.20 0.02
pipwd          1.33 0.01

Bivariate Plots

Crosstabs

cross_cases(d, income, edu)
 edu 
 1 High school diploma or less, and NO COLLEGE   2 Currently in college   3 Completed some college, but no longer in college   4 Complete 2 year College degree   5 Completed Bachelors Degree   6 Currently in graduate education   7 Completed some graduate degree 
 income 
   1 low  16 352 12 41 24 25 9
   2 middle  3 366 3 28 16 8 10
   3 high  3 212 10 9 6 3
   rather not say  11 376 2 16 13 13 5
   #Total cases  33 1306 17 95 62 52 27

Scatterplots

plot(d$swb, d$moa_safety,
     main="Scatterplot of Satisfaction with Life Scale and Safety",
     xlab = "Satisfaction with Life Scale",
     ylab = "Safety")

plot(d$swb, d$stress,
     main="Scatterplot of Satisfaction with Life Scale and Stress",
     xlab = "Satisfaction with Life Scale",
     ylab = "Stress")

plot(d$swb, d$pipwd,
     main="Scatterplot of Satisfaction with Life Scale and Positive Identity as a Person With a Disability",
     xlab = "Satisfaction with Life Scale",
     ylab = "Positive Identity as a Person With a Disability")

plot(d$moa_safety, d$stress,
     main="Scatterplot of Safety and Stress",
     xlab = "Safety",
     ylab = "Stress")

plot(d$moa_safety, d$pipwd,
     main="Scatterplot of Safety and Positive Identity as a Person With a Disability",
     xlab = "Safety",
     ylab = "Positive Identity as a Person With a Disability")

plot(d$stress, d$pipwd,
     main="Scatterplot of Stress and Positive Identity as a Person With a Disability",
     xlab = "Stress",
     ylab = "Positive Identity as a Person With a Disability")

Boxplots

boxplot(data=d, stress~edu,
        main="Boxplot of Stress and Education",
        xlab = "Education",
        ylab = "Stress")

boxplot(data=d, stress~income,
        main="Boxplot of Stress and Income",
        xlab = "Income",
        ylab = "Stress")

Write-Up

We reviewed plots and descriptive statistics for our six chosen variables. All four of our continuous variables had skew and kurtosis within the accepted range (-2/+2).