Basic Statistics HW

Load Libraries

# remember, you might need to install packages

library(psych) # for the describe() command
library(expss) # for the cross_cases() command

Load Data

d <- read.csv(file="Data/mydata.csv", header=T)
names(d)
[1] "swb"        "gender"     "race_rc"    "socmeduse"  "moa_safety"
[6] "exploit"   

Univariate Plots: Histograms & Tables

table(d$gender) 

   f    m   nb 
2278  777   54 
table(d$race_rc)

      asian       black    hispanic multiracial  nativeamer       other 
        204         239         279         286          12          94 
      white 
       1995 
hist(d$exploit)

hist(d$moa_safety)

hist(d$socmeduse)

hist(d$swb)

Univariate Normality

Check skew and kurtosis. Cutoffs are -2 to +2; if skew or kurtosis are higher or lower than these values, I need to mention it in my writeup!!

describe(d)
           vars    n  mean   sd median trimmed  mad min max range  skew
swb           1 3109  4.48 1.32   4.67    4.53 1.48   1   7     6 -0.37
gender*       2 3109  1.28 0.49   1.00    1.21 0.00   1   3     2  1.39
race_rc*      3 3109  5.55 2.12   7.00    5.89 0.00   1   7     6 -1.00
socmeduse     4 3109 34.46 8.58  35.00   34.73 7.41  11  55    44 -0.31
moa_safety    5 3109  3.20 0.64   3.25    3.26 0.74   1   4     3 -0.71
exploit       6 3109  2.39 1.37   2.00    2.21 1.48   1   7     6  0.94
           kurtosis   se
swb           -0.45 0.02
gender*        0.88 0.01
race_rc*      -0.65 0.04
socmeduse      0.26 0.15
moa_safety     0.04 0.01
exploit        0.36 0.02

Bivariate Plots

Crosstabs

cross_cases(d, gender, race_rc)
 race_rc 
 asian   black   hispanic   multiracial   nativeamer   other   white 
 gender 
   f  147 176 202 217 11 69 1456
   m  56 61 75 59 1 24 501
   nb  1 2 2 10 1 38
   #Total cases  204 239 279 286 12 94 1995

Scatterplots

plot(d$swb, d$socmeduse,
     main= "Scatterplot of Satisfaction With Life Scale and Social Media Use",
     xlab = "Satisfaction With Life Scale",
     ylab = "Social Media Use")

plot(d$socmeduse, d$moa_safety,
     main= "Scatterplot of Social Media Use and Safety",
     xlab = "Social Media Use",
     ylab = "Safety")

plot(d$moa_safety, d$exploit,
     main= "Scatterplot of Safety and Interpersonal Exploitativeness Scale",
     xlab = "Safety",
     ylab = "Interpersonal Exploitativeness Scale")

plot(d$exploit, d$swb,
     main= "Scatterplot of Interpersonal Exploitativeness Scale and Satisfaction With Life Scale",
     xlab = "Interpersonal Exploitativeness Scale",
     ylab = "Satisfaction With Life Scale")

plot(d$exploit, d$socmeduse,
     main= "Scatterplot of Interpersonal Exploitativeness Scale and Social Media Use",
     xlab = "Interpersonal Exploitativeness Scale",
     ylab = "Social Media Use")

plot(d$swb, d$moa_safety,
     main= "Scatterplot of Satisfaction With Life Scale and Safety",
     xlab = "Satisfaction With Life Scale",
     ylab = "Safety")

Boxplots

boxplot(data=d, swb~race_rc,
        main="Boxplot of Satisfaction With Life Scale and Race",
        xlab = "Satisfaction With Life Scale",
        ylab = "Race")

boxplot(data=d, socmeduse~race_rc,
        main="Boxplot of Social Media Use and Race",
        xlab = "Social Media Use",
        ylab = "Race")

Write-Up

We reviewed plots and descriptive statistics for our six chosen variables. All four of our continuous variables had skew and kurtosis within the accepted range (-2/+2).

I hope I did this correctly! Let me know in the submission comments if I made any mistakes. Thanks Professor!