Basic Statistics HW

Load Libraries

# remember, you might need to install packages

library(psych) # for the describe() command
library(expss) # for the cross_cases() command

Load Data

# WILL NEED TO UPDATE THIS FOR THE HOMEWORK!! USE MYDATA
d <- read.csv(file="Data/mydata.csv", header=T)
names(d)
[1] "income"   "edu"      "swb"      "efficacy" "exploit"  "stress"  

Univariate Plots: Histograms & Tables

table(d$income)

         1 low       2 middle         3 high rather not say 
           879            880            535            854 
table(d$edu)

     1 High school diploma or less, and NO COLLEGE 
                                                58 
                            2 Currently in college 
                                              2548 
3 Completed some college, but no longer in college 
                                                34 
                  4 Complete 2 year College degree 
                                               179 
                      5 Completed Bachelors Degree 
                                               135 
                 6 Currently in graduate education 
                                               134 
                  7 Completed some graduate degree 
                                                60 
# for continuous variables:
hist(d$swb)

hist(d$efficacy)

hist(d$exploit)

hist(d$stress)

Univariate Normality

Check skew and kurtosis.

describe(d)
         vars    n mean   sd median trimmed  mad min max range  skew kurtosis
income*     1 3148 2.43 1.16   2.00    2.42 1.48 1.0 4.0   3.0  0.15    -1.43
edu*        2 3148 2.50 1.25   2.00    2.17 0.00 1.0 7.0   6.0  2.21     3.78
swb         3 3148 4.47 1.32   4.67    4.53 1.48 1.0 7.0   6.0 -0.36    -0.45
efficacy    4 3148 3.13 0.45   3.10    3.13 0.44 1.1 4.0   2.9 -0.24     0.45
exploit     5 3148 2.38 1.37   2.00    2.21 1.48 1.0 7.0   6.0  0.94     0.35
stress      6 3148 3.05 0.60   3.00    3.05 0.59 1.3 4.7   3.4  0.03    -0.17
           se
income*  0.02
edu*     0.02
swb      0.02
efficacy 0.01
exploit  0.02
stress   0.01

Bivariate Plots

Crosstabs

# (Dataframe, variable, variable)
# for categorical variables
cross_cases(d, income, edu)
 edu 
 1 High school diploma or less, and NO COLLEGE   2 Currently in college   3 Completed some college, but no longer in college   4 Complete 2 year College degree   5 Completed Bachelors Degree   6 Currently in graduate education   7 Completed some graduate degree 
 income 
   1 low  20 633 14 74 55 65 18
   2 middle  10 727 14 44 37 25 23
   3 high  7 470 2 24 16 8 8
   rather not say  21 718 4 37 27 36 11
   #Total cases  58 2548 34 179 135 134 60

Scatterplots

# for continuous variables; all pairs
plot(d$swb, d$efficacy,
     main="Scatterplot of Subjective Well-Being and Efficacy",
     xlab = "Subjective Well-Being",
     ylab = "Efficacy")

plot(d$swb, d$exploit,
     main="Scatterplot of Subjective Well-Being and Exploitativeness",
     xlab = "Subjective Well-Being",
     ylab = "Exploitativeness")

plot(d$swb, d$stress,
     main="Scatterplot of Subjective Well-Being and Stress",
     xlab = "Subjective Well-Being",
     ylab = "Stress")

plot(d$efficacy, d$exploit,
     main="Scatterplot of Efficacy and Exploitativeness",
     xlab = "Efficacy",
     ylab = "Exploitativeness")

plot(d$efficacy, d$stress,
     main="Scatterplot of Efficacy and Stress",
     xlab = "Efficacy",
     ylab = "Stress")

plot(d$exploit, d$stress,
     main="Scatterplot of Exploitativeness and Stress",
     xlab = "Exploitativeness",
     ylab = "Stress")

Boxplots

# remember that continuous variable comes first, continuous~categorical
boxplot(data=d, efficacy~edu,
        main="Boxplot of Efficacy and Education",
        xlab = "Education",
        ylab = "Efficacy")

boxplot(data=d, exploit~income,
        main="Boxplot of Exploitativeness and Income",
        xlab = "Income",
        ylab = "Exploitativeness")

# Extra:

boxplot(data=d, swb~income,
        main="Boxplot of Subjective Well-Being and Income",
        xlab = "Income",
        ylab = "Subjective Well-Being")

Write-Up

We reviewed plots and descriptive statistics for our six chosen variables. All four of our continuous variables had skew and kurtosis within the accepted range (-2/+2).