# if you haven't run this code before, you'll need to download the below packages first
# instructions on how to do this are included in the video
# but as a reminder, you use the packages tab to the right
library(psych) # for the describe() command
library(expss) # for the cross_cases() command
## Loading required package: maditr
##
## To aggregate several columns with one summary: take(mtcars, mpg, hp, fun = mean, by = am)
##
## Attaching package: 'maditr'
## The following object is masked from 'package:base':
##
## sort_by
# import our data for the lab
d2 <- read.csv(file="Data/fakedata.csv", header = T)
table(d2$variable2) #the table command shows us what the levels of this variable are and how many participants are in each level. replace these two variablenumber texts with categorical variables
##
## level a level b level c level d level e level f
## 36 261 379 247 53 4
table(d2$variable3)
##
## level a level b
## 220 760
hist(d2$variable5) # hist command creates the histogram for the variables, continuous variables
hist(d2$variable8)
hist(d2$variable10)
hist(d2$variable11)
We analyzed the skew and kurtosis of our continuous variables and all were within the accepted range (-2/+2). (potentially false for HW, unlike the lab)
We analyzed the skew and kurtosis of our … and most were within the accepted range (-2/+2). However, some variables (list them in parentheses) were outside of the accepted range. For this analysis, we will use them anyway, but outside of this class this is bad practice.
describe(d2) #used to check univariate normality, skew and kurtosis range is (-2/+2)
## vars n mean sd median trimmed mad min max range
## id* 1 1000 500.50 288.82 500.50 500.50 370.65 1.00 1000.00 999.00
## variable1* 2 980 2.01 0.66 2.00 2.02 0.00 1.00 3.00 2.00
## variable2* 3 980 3.03 0.96 3.00 3.01 1.48 1.00 6.00 5.00
## variable3* 4 980 1.78 0.42 2.00 1.84 0.00 1.00 2.00 1.00
## variable4* 5 666 2.54 0.60 3.00 2.61 0.00 1.00 3.00 2.00
## variable5 6 980 2.52 0.49 2.50 2.51 0.50 1.09 4.15 3.06
## variable6 7 980 2.99 0.73 2.98 2.99 0.74 0.80 4.97 4.18
## variable7 8 980 1.63 0.40 1.59 1.60 0.42 1.00 3.44 2.44
## variable8 9 980 3.88 0.65 3.96 3.92 0.68 1.36 5.00 3.63
## variable9 10 980 1.28 0.19 1.25 1.27 0.19 1.00 2.15 1.15
## variable10 11 980 4.87 0.97 4.90 4.89 0.96 1.04 6.98 5.94
## variable11 12 980 1.81 0.61 1.70 1.75 0.64 1.00 3.99 2.99
## variable12 13 980 4.15 1.89 4.13 4.11 2.02 0.17 9.91 9.74
## skew kurtosis se
## id* 0.00 -1.20 9.13
## variable1* -0.01 -0.74 0.02
## variable2* 0.16 -0.30 0.03
## variable3* -1.32 -0.26 0.01
## variable4* -0.91 -0.18 0.02
## variable5 0.12 -0.06 0.02
## variable6 -0.07 -0.18 0.02
## variable7 0.70 0.35 0.01
## variable8 -0.59 0.08 0.02
## variable9 0.74 0.38 0.01
## variable10 -0.33 0.18 0.03
## variable11 0.82 0.13 0.02
## variable12 0.19 -0.45 0.06
cross_cases(d2, variable2, variable3) #update variables with categorical variables and next the continuous Vs
 variable3 | ||
---|---|---|
 level a |  level b | |
 variable2 | ||
   level a | 5 | 29 |
   level b | 65 | 192 |
   level c | 84 | 289 |
   level d | 48 | 193 |
   level e | 16 | 37 |
   level f | 1 | 2 |
   #Total cases | 219 | 742 |
plot(d2$variable5, d2$variable8,
main="Scatterplot of Variable5 and Variable8",
xlab = "Variable5",
ylab = "Variable8")
plot(d2$variable10, d2$variable11,
main="Scatterplot of Variable10 and Variable11",
xlab = "Variable10",
ylab = "Variable11")
# boxplots use 1 categorical and 1 continuous variable
# use them in the right order! continuous~categorical
boxplot(data=d2, variable5~variable2,
main="Boxplot of Variable2 and Variable5",
xlab = "Variable2",
ylab = "Variable5")
boxplot(data=d2, variable11~variable3,
main="Boxplot of Variable3 and Variable11",
xlab = "Variable3",
ylab = "Variable11")