# remember, you might need to install packages
library(psych) # for the describe() command
library(expss) # for the cross_cases() commandBasic Statistics Lab
Load Libraries
Load Data
d <- read.csv(file="Data/eammi2_data_final.csv", header=T)
names(d) [1] "ResponseId" "gender" "race_rc" "age"
[5] "income" "edu" "sibling" "party_rc"
[9] "disability" "marriage5" "phys_sym" "pipwd"
[13] "moa_independence" "moa_role" "moa_safety" "moa_maturity"
[17] "idea" "swb" "mindful" "belong"
[21] "efficacy" "support" "socmeduse" "usdream"
[25] "npi" "exploit" "stress"
Univariate Plots: Histograms & Tables
table(d$age) # UPDATE FOR HW!
1 between 18 and 25 2 between 26 and 35 3 between 36 and 45 4 over 45
1997 116 38 18
table(d$gender)
f m nb
2332 792 54
hist(d$stress)hist(d$npi)hist(d$exploit)hist(d$socmeduse)Univariate Normality
Check skew and kurtosis.cutoffs are -2 to +2 if skew or kurtosis are higher or lower than these values, I need to mention it in my writeup!!!
describe(d) vars n mean sd median trimmed mad min max
ResponseId* 1 3182 1591.50 918.71 1591.50 1591.50 1179.41 1.00 3182.0
gender* 2 3178 1.28 0.49 1.00 1.21 0.00 1.00 3.0
race_rc* 3 3173 5.53 2.13 7.00 5.88 0.00 1.00 7.0
age* 4 2169 1.11 0.43 1.00 1.00 0.00 1.00 4.0
income* 5 3157 2.44 1.16 2.00 2.42 1.48 1.00 4.0
edu* 6 3174 2.51 1.25 2.00 2.18 0.00 1.00 7.0
sibling* 7 3182 1.10 0.29 1.00 1.00 0.00 1.00 2.0
party_rc* 8 3165 2.46 1.01 2.00 2.45 0.00 1.00 4.0
disability* 9 864 3.71 1.70 5.00 3.78 1.48 1.00 6.0
marriage5* 10 3172 1.88 0.60 2.00 1.83 0.00 1.00 4.0
phys_sym* 11 3174 2.26 0.86 3.00 2.32 0.00 1.00 3.0
pipwd 12 1624 2.93 0.56 3.00 2.93 0.40 1.13 5.0
moa_independence 13 3107 3.54 0.47 3.67 3.61 0.49 1.00 4.0
moa_role 14 3111 2.97 0.72 3.00 3.00 0.74 1.00 4.0
moa_safety 15 3123 3.20 0.64 3.25 3.26 0.74 1.00 4.0
moa_maturity 16 3146 3.59 0.43 3.67 3.65 0.49 1.00 4.0
idea 17 3177 3.57 0.38 3.62 3.62 0.37 1.00 4.0
swb 18 3178 4.47 1.32 4.67 4.53 1.48 1.00 7.0
mindful 19 3173 3.71 0.84 3.73 3.71 0.79 1.13 6.0
belong 20 3175 3.23 0.60 3.30 3.25 0.59 1.30 5.0
efficacy 21 3176 3.13 0.45 3.10 3.13 0.44 1.00 4.0
support 22 3182 5.53 1.14 5.75 5.65 0.99 0.00 7.0
socmeduse 23 3175 34.45 8.58 35.00 34.72 7.41 11.00 55.0
usdream* 24 3171 2.39 1.55 2.00 2.24 1.48 1.00 5.0
npi 25 3167 0.28 0.31 0.15 0.24 0.23 0.00 1.0
exploit 26 3177 2.39 1.37 2.00 2.21 1.48 1.00 7.0
stress 27 3176 3.05 0.60 3.00 3.05 0.59 1.30 4.7
range skew kurtosis se
ResponseId* 3181.00 0.00 -1.20 16.29
gender* 2.00 1.40 0.88 0.01
race_rc* 6.00 -0.98 -0.68 0.04
age* 3.00 4.42 21.17 0.01
income* 3.00 0.14 -1.44 0.02
edu* 6.00 2.18 3.66 0.02
sibling* 1.00 2.74 5.53 0.01
party_rc* 3.00 0.42 -1.04 0.02
disability* 5.00 -0.44 -1.35 0.06
marriage5* 3.00 0.47 1.48 0.01
phys_sym* 2.00 -0.52 -1.46 0.02
pipwd 3.87 0.12 1.34 0.01
moa_independence 3.00 -1.44 2.53 0.01
moa_role 3.00 -0.33 -0.84 0.01
moa_safety 3.00 -0.71 0.03 0.01
moa_maturity 3.00 -1.20 1.87 0.01
idea 3.00 -1.54 4.42 0.01
swb 6.00 -0.36 -0.46 0.02
mindful 4.87 -0.06 -0.13 0.01
belong 3.70 -0.26 -0.12 0.01
efficacy 3.00 -0.29 0.63 0.01
support 7.00 -1.14 1.61 0.02
socmeduse 44.00 -0.31 0.26 0.15
usdream* 4.00 0.62 -1.13 0.03
npi 1.00 0.94 -0.69 0.01
exploit 6.00 0.95 0.37 0.02
stress 3.40 0.04 -0.17 0.01
Bivariate Plots
Crosstabs
cross_cases(d, age, gender)| gender | |||
|---|---|---|---|
| f | m | nb | |
| age | |||
| 1 between 18 and 25 | 1481 | 486 | 30 |
| 2 between 26 and 35 | 70 | 46 | |
| 3 between 36 and 45 | 28 | 9 | 1 |
| 4 over 45 | 12 | 6 | |
| #Total cases | 1591 | 547 | 31 |
Scatterplots
plot(d$stress, d$npi,
main="Scatterplot of [Percieved Stress Questionnaire and Narcissistic Personality Inventory ]",
xlab = "Percieved Stress Questionnaire",
ylab = "Narcissistic Personality Inventory")plot(d$stress, d$exploit,
main="Scatterplot of [Percieved Stress Questionnaire and Interpersonal Exploitativeness Scale]",
xlab = "Percieved Stress Questionnaire",
ylab = "Interpersonal Exploitativeness Scale")plot(d$stress, d$socmeduse,
main="Scatterplot of [Percieved Stress Questionnaire and Social Media Use]",
xlab = "Percieved Stress Questionnair",
ylab = "Social Media Use")plot(d$npi, d$exploit,
main="Scatterplot of [Narcissistic Personality Inventory and Interpersonal Exploitativeness Scale]",
xlab = "Narcissistic Personality Inventory",
ylab = "Interpersonal Exploitativeness Scale")plot(d$npi, d$socmeduse,
main="Scatterplot of [Narcissistic Personality Inventory and Social Media Use]",
xlab = "Narcissistic Personality Inventory",
ylab = "Social Media Use")plot(d$exploit, d$socmeduse,
main="Scatterplot of [Interpersonal Exploitativeness Scale and Social Media Use]",
xlab = "Interpersonal Exploitativeness Scale",
ylab = "Social Media Use")Boxplots
boxplot(data=d, socmeduse~age,
main="Boxplot of [Social media use and age",
xlab = "age",
ylab = "social media use")boxplot(data=d, socmeduse~gender,
main="Boxplot of [Social media use and gender",
xlab = "gender",
ylab = "social media use")Write-Up
The most important things I learned here were getting comfortable with viewing the data in different formats and being able to understand the data in graph forms or table forms. Throughout this I had problems with pulling up describe(d) and cross cases. While running the lab in the description they both showed up as error, but when i render the lab it shows up so I am just unable to view it within the lab.
As far as any findings within the lab, for the boxplots, i learned there is not much disparity between social media use and gender/ age. I wanted to look at this as a big focus of my lab will be social media use and was curious how that looks demographically.