Basic Statistics Lab

Load Libraries

# remember, you might need to install packages

library(psych) # for the describe() command
library(expss) # for the cross_cases() command

Load Data

# WILL NEED TO UPDATE THIS FOR THE HW!!! USE MYDATA
d <- read.csv(file="Data/mydata.csv", header=T)
names(d)
[1] "big5_con"            "big5_neu"            "isolation"          
[4] "support"             "relationship_status" "gender"             

Univariate Plots: Histograms & Tables

table(d$relationship_status)

  In a relationship/married and cohabiting 
                                       288 
In a relationship/married but living apart 
                                        99 
                         Prefer not to say 
                                        91 
               Single, divorced or widowed 
                                        42 
                     Single, never married 
                                       742 
table(d$gender)

            female I use another term               male  Prefer not to say 
              1011                 31                199                 21 
hist(d$isolation)

hist(d$support)

hist(d$big5_con)

hist(d$big5_neu)

Univariate Normality

Check skew and kurtosis. # scores should be within -2 and +2

describe(d)
                     vars    n mean   sd median trimmed  mad min max range
big5_con                1 1262 4.83 1.20   5.00    4.87 1.48   1 7.0   6.0
big5_neu                2 1262 4.38 1.51   4.67    4.43 1.48   1 7.0   6.0
isolation               3 1262 2.15 0.84   2.00    2.12 1.11   1 3.5   2.5
support                 4 1262 3.57 0.95   3.67    3.62 0.99   1 5.0   4.0
relationship_status*    5 1262 3.67 1.71   5.00    3.84 0.00   1 5.0   4.0
gender*                 6 1262 1.39 0.81   1.00    1.22 0.00   1 4.0   3.0
                      skew kurtosis   se
big5_con             -0.27    -0.30 0.03
big5_neu             -0.30    -0.76 0.04
isolation             0.16    -1.29 0.02
support              -0.43    -0.56 0.03
relationship_status* -0.68    -1.35 0.05
gender*               1.73     1.34 0.02

Bivariate Plots

Crosstabs

cross_cases(d, relationship_status, gender)
 gender 
 female   I use another term   male   Prefer not to say 
 relationship_status 
   In a relationship/married and cohabiting  250 1 36 1
   In a relationship/married but living apart  81 2 15 1
   Prefer not to say  61 4 15 11
   Single, divorced or widowed  37 5
   Single, never married  582 24 128 8
   #Total cases  1011 31 199 21

Scatterplots

plot(d$isolation, d$support,
    main="Scatterplot of UCLA Loneliness Scale (Adult) and Social Support Measure",
    xlab = "UCLA Loneliness Scale (Adult)",
    ylab = "Social Support Measure")

plot(d$isolation, d$big5_con,
    main="Scatterplot of UCLA Loneliness Scale (Adult) and Conscientiousness",
    xlab = "UCLA Loneliness Scale (Adult)",
    ylab = "Conscientiousness")

plot(d$isolation, d$big5_neu,
    main="Scatterplot of UCLA Loneliness Scale (Adult) and Neuroticism",
    xlab = "UCLA Loneliness Scale (Adult)",
    ylab = "Neuroticism")

plot(d$support, d$big5_con,
    main="Scatterplot of Social Support Measure and Conscientiousness",
    xlab = "Social Support Measure",
    ylab = "Conscientiousness")

plot(d$support, d$big5_neu,
    main="Scatterplot of Social Support Measure and Neuroticism",
    xlab = "Social Support Measure",
    ylab = "Neuroticism")

plot(d$big5_con, d$big5_neu,
    main="Scatterplot of Conscientiousness and Neuroticism",
    xlab = "Conscientiousness",
    ylab = "Neuroticism")

Boxplots

# remeber that continuous variable comes first, CONTINUOUS~CATEGORICAL
boxplot(data=d, isolation~relationship_status,
        main="Boxplot of UCLA Loneliness Scale (Adult) and Relationship Status",
        xlab = "Relationship Status",
        ylab = "UCLA Loneliness Scale (Adult)")

boxplot(data=d, isolation~gender,
        main="Boxplot of UCLA Loneliness Scale (Adult) and Gender Identity Diagnosis",
        xlab = "Gender Identity",
        ylab = "UCLA Loneliness Scale (Adult)")

Write-Up

If skew and kurtosis are good: We reviewed plots and descriptive statistics for our six chosen variables. All four of our continuous variables had skew and kurtosis within the accepted range (-2/+2).