# if you haven't used a given package before, you'll need to download it first
# after download is finished, insert a "#" before the install function so that the file will Knit later
# then run the library function calling that package
#install.packages("psych")
#install.packages("expss") #new package
library(psych) # for the describe() command
library(expss) # for the cross_cases() command
## Loading required package: maditr
##
## To modify variables or add new variables:
## let(mtcars, new_var = 42, new_var2 = new_var*hp) %>% head()
# Import the "projectdata.csv" file
d2 <- read.csv("projectdata.csv")
describe(d2)
## vars n mean sd median trimmed mad min max
## ResponseID* 1 3092 1546.50 892.73 1546.50 1546.50 1146.05 1.0 3092
## gender* 2 3092 1.28 0.49 1.00 1.21 0.00 1.0 3
## sibling* 3 3092 1.10 0.30 1.00 1.00 0.00 1.0 2
## moa_independence 4 3092 3.54 0.46 3.67 3.61 0.49 1.0 4
## swb 5 3092 4.47 1.32 4.67 4.53 1.48 1.0 7
## belong 6 3092 3.23 0.61 3.30 3.25 0.59 1.3 5
## socmeduse 7 3092 34.42 8.60 35.00 34.70 7.41 11.0 55
## range skew kurtosis se
## ResponseID* 3091.0 0.00 -1.20 16.05
## gender* 2.0 1.40 0.90 0.01
## sibling* 1.0 2.73 5.44 0.01
## moa_independence 3.0 -1.44 2.52 0.01
## swb 6.0 -0.37 -0.45 0.02
## belong 3.7 -0.26 -0.13 0.01
## socmeduse 44.0 -0.31 0.26 0.15
# Note: for the HW, you will import "projectdata.csv" that you created and exported in the Data Prep Lab
Tables are used to visualize individual categorical variables. Histograms are used to visualize individual continuous variables.
# use tables to visualize categorical data (2 variables)
table(d2$gender)
##
## f m nb
## 2270 769 53
table(d2$sibling)
##
## at least one sibling only child
## 2793 299
# use histograms to visualize continuous data (4 variables)
hist(d2$moa_independence)
hist(d2$swb)
hist(d2$belong)
hist(d2$socmeduse)
describe(d2)
## vars n mean sd median trimmed mad min max
## ResponseID* 1 3092 1546.50 892.73 1546.50 1546.50 1146.05 1.0 3092
## gender* 2 3092 1.28 0.49 1.00 1.21 0.00 1.0 3
## sibling* 3 3092 1.10 0.30 1.00 1.00 0.00 1.0 2
## moa_independence 4 3092 3.54 0.46 3.67 3.61 0.49 1.0 4
## swb 5 3092 4.47 1.32 4.67 4.53 1.48 1.0 7
## belong 6 3092 3.23 0.61 3.30 3.25 0.59 1.3 5
## socmeduse 7 3092 34.42 8.60 35.00 34.70 7.41 11.0 55
## range skew kurtosis se
## ResponseID* 3091.0 0.00 -1.20 16.05
## gender* 2.0 1.40 0.90 0.01
## sibling* 1.0 2.73 5.44 0.01
## moa_independence 3.0 -1.44 2.52 0.01
## swb 6.0 -0.37 -0.45 0.02
## belong 3.7 -0.26 -0.13 0.01
## socmeduse 44.0 -0.31 0.26 0.15
## For the required write-up below, choose one of these options to paste and edit below based on your output.
## OPTION 1
# We analyzed the skew and kurtosis of our continuous variables and all were within the accepted range (-2/+2).
## OPTION 2
# We analyzed the skew and kurtosis of our continuous variables and (#) were within the accepted range (-2/+2). However, (#) variables (list variable name(s) here) were outside of the accepted range. For this analysis, we will use them anyway, but outside of this class this is bad practice.