# if you haven't run this code before, you'll need to download the below packages first
# you should see a prompt near the top of the page (in a yellow bar within the RStudio window)
# you can also use the packages tab to the right
library(naniar) # for the gg_miss-upset() command
Data Prep
Load Libraries
Import Data
# # for the HW, you'll import the CSV file of your chosen dataset
<- read.csv(file="Data/eammi2_data_final.csv", header=T) df
Viewing Data
# # these are commands useful for viewing a dataframe
# # you can also click the object in the environment tab to view it in a new window
names(df)
[1] "age" "gender" "swb" "efficacy" "support" "stress"
head(df)
age gender swb efficacy support stress
1 1 between 18 and 25 f 4.333333 3.4 6.000000 3.3
2 1 between 18 and 25 m 4.166667 3.4 6.750000 3.3
3 1 between 18 and 25 m 1.833333 2.2 5.166667 4.0
4 1 between 18 and 25 f 5.166667 2.8 5.583333 3.2
5 1 between 18 and 25 m 3.666667 3.0 6.000000 3.1
6 1 between 18 and 25 f 4.000000 2.4 4.500000 3.5
str(df)
'data.frame': 2163 obs. of 6 variables:
$ age : chr "1 between 18 and 25" "1 between 18 and 25" "1 between 18 and 25" "1 between 18 and 25" ...
$ gender : chr "f" "m" "m" "f" ...
$ swb : num 4.33 4.17 1.83 5.17 3.67 ...
$ efficacy: num 3.4 3.4 2.2 2.8 3 2.4 2.3 3 3 3.7 ...
$ support : num 6 6.75 5.17 5.58 6 ...
$ stress : num 3.3 3.3 4 3.2 3.1 3.5 3.3 2.4 2.9 2.7 ...
Subsetting Data
# # use the codebook you created in the codebook activity to get the names of your variables (first column)
# # enter this list of names in the select=c() argument to subset those columns from the dataframe
<- subset(df, select=c(age, gender, swb, efficacy, support, stress)) d
Missing Data
# use the gg_miss_upset() command for a visualization of your missing data
# gg_miss_upset(d, nsets = 6)
# use the na.omit() command to create a new dataframe in which any participants with missing data are dropped from the dataframe
# d2 <- na.omit(d)
# use a bit of math to see what percentage of participants had missing data
# math will go here
# 2163
# 2163```
## Exporting Data
# # last step is to export the data after you've dropped NAs
# write.csv(d2, file="Data/mydata.csv", row.names = F)
# MAKE SURE TO RENAME TO MYDATA FOR THE HOMEWORK
# DON'T FORGET!!!!!
Write-Up
We selected six variables from the EAMMI2 dataset to focus on in our analysis: age, gender, satisfaction with life, efficacy, support, and stress. Participants with missing data 0% in these six variables were dropped from our analysis, leaving us a final sample of n = 2163.
``