library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
[37m── [1mAttaching packages[22m ───────────────────────────────── tidyverse 1.3.0 ──[39m
[37m[32m✓[37m [34mggplot2[37m 3.3.2 [32m✓[37m [34mpurrr [37m 0.3.4
[32m✓[37m [34mtibble [37m 3.0.3 [32m✓[37m [34mdplyr [37m 1.0.0
[32m✓[37m [34mtidyr [37m 1.1.0 [32m✓[37m [34mstringr[37m 1.4.0
[32m✓[37m [34mreadr [37m 1.3.1 [32m✓[37m [34mforcats[37m 0.5.0[39m
package ‘ggplot2’ was built under R version 3.6.2package ‘tibble’ was built under R version 3.6.2package ‘tidyr’ was built under R version 3.6.2package ‘purrr’ was built under R version 3.6.2package ‘dplyr’ was built under R version 3.6.2[37m── [1mConflicts[22m ──────────────────────────────────── tidyverse_conflicts() ──
[31mx[37m [34mdplyr[37m::[32mfilter()[37m masks [34mstats[37m::filter()
[31mx[37m [34mdplyr[37m::[32mlag()[37m masks [34mstats[37m::lag()[39m
library(psych)
Attaching package: ‘psych’
The following objects are masked from ‘package:ggplot2’:
%+%, alpha
library(haven)
package ‘haven’ was built under R version 3.6.2
videogames <- read_dta("VideoGames.dta")
glimpse(videogames)
Rows: 442
Columns: 4
$ ID [3m[38;5;246m<dbl>[39m[23m 69, 55, 7, 96, 130, 124, 72, 139, 102, 179, 171, 196…
$ aggression [3m[38;5;246m<dbl>[39m[23m 13, 38, 30, 23, 25, 46, 41, 22, 35, 23, 32, 34, 35, …
$ vidgames [3m[38;5;246m<dbl>[39m[23m 16, 12, 32, 10, 11, 29, 23, 15, 20, 20, 27, 27, 29, …
$ CaUnTs [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 1, 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4…
describe and table are a great way to get a quick summary of the variables found in a dataset.describe(videogames, fast = TRUE)
aggression as our outcome, and vidgames and CaUnTs.str(videogames$aggression)
num [1:442] 13 38 30 23 25 46 41 22 35 23 ...
- attr(*, "label")= chr "Agression"
- attr(*, "format.stata")= chr "%12.0g"
table(videogames$aggression)
9 10 11 13 14 15 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
1 1 1 3 3 5 1 4 1 5 5 7 6 9 6 8 5 8 8 11 7 11 19 11
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
13 18 10 17 12 14 14 15 13 17 9 10 15 10 15 8 8 10 6 6 9 5 7 4
59 60 61 62 63 64 65 66 67 69 72 73 82
3 2 1 5 2 3 3 3 1 2 2 3 1
str(videogames$vidgames)
num [1:442] 16 12 32 10 11 29 23 15 20 20 ...
- attr(*, "label")= chr "Video Games (Hours per week)"
- attr(*, "format.stata")= chr "%12.0g"
table(videogames$vidgames)
1 2 3 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
1 2 1 1 5 5 5 8 8 24 10 15 11 18 22 27 30 20 25 28 24 18 27 13
28 29 30 31 32 33 34 35 36 37 38
13 21 13 7 8 7 8 4 4 5 4
str(videogames$CaUnTs)
num [1:442] 0 0 0 1 1 1 2 3 3 3 ...
- attr(*, "label")= chr "Callous Unemotional Traits"
- attr(*, "format.stata")= chr "%12.0g"
table(videogames$CaUnTs)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
3 3 1 6 10 6 11 14 17 18 19 14 14 20 18 19 12 11 16 18 11 20 12 11
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 41 42 43
11 14 11 9 11 5 16 8 9 12 6 6 2 7 2 6 1 1 1
CaUnTs. In this process, we are also making a new dataset. Remember, this is a great way to keep your original data set as is if you ever need to go back!videogames.clean <- videogames %>%
rename(.,
callous = CaUnTs)
glimpse(videogames.clean)
Rows: 442
Columns: 4
$ ID [3m[38;5;246m<dbl>[39m[23m 69, 55, 7, 96, 130, 124, 72, 139, 102, 179, 171, 196…
$ aggression [3m[38;5;246m<dbl>[39m[23m 13, 38, 30, 23, 25, 46, 41, 22, 35, 23, 32, 34, 35, …
$ vidgames [3m[38;5;246m<dbl>[39m[23m 16, 12, 32, 10, 11, 29, 23, 15, 20, 20, 27, 27, 29, …
$ callous [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 1, 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4…
linearmodel1 <- lm(aggression ~ vidgames + callous, data = videogames.clean)
summary(linearmodel1)
Call:
lm(formula = aggression ~ vidgames + callous, data = videogames.clean)
Residuals:
Min 1Q Median 3Q Max
-27.952 -6.696 -0.168 7.022 32.499
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 21.76433 1.80731 12.042 < 2e-16 ***
vidgames 0.18769 0.06940 2.705 0.00711 **
callous 0.76312 0.05024 15.191 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 10.13 on 439 degrees of freedom
Multiple R-squared: 0.3559, Adjusted R-squared: 0.353
F-statistic: 121.3 on 2 and 439 DF, p-value: < 2.2e-16
linearmodel2 <- lm(aggression ~ vidgames + callous + vidgames*callous, data = videogames.clean)
summary(linearmodel2)
Call:
lm(formula = aggression ~ vidgames + callous + vidgames * callous,
data = videogames.clean)
Residuals:
Min 1Q Median 3Q Max
-29.7144 -6.9087 -0.1923 6.9036 29.2290
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 33.120233 3.427254 9.664 < 2e-16 ***
vidgames -0.333597 0.150826 -2.212 0.027495 *
callous 0.168949 0.161049 1.049 0.294731
vidgames:callous 0.027062 0.006981 3.877 0.000122 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9.976 on 438 degrees of freedom
Multiple R-squared: 0.3773, Adjusted R-squared: 0.373
F-statistic: 88.46 on 3 and 438 DF, p-value: < 2.2e-16
interplot to get predictions of aggressive behavior at different values of vidgames and callous…library(interplot)
Loading required package: abind
Loading required package: arm
Loading required package: MASS
Attaching package: ‘MASS’
The following object is masked from ‘package:dplyr’:
select
Loading required package: Matrix
Attaching package: ‘Matrix’
The following objects are masked from ‘package:tidyr’:
expand, pack, unpack
Loading required package: lme4
package ‘lme4’ was built under R version 3.6.2Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Registered S3 method overwritten by 'data.table':
method from
print.data.table
arm (Version 1.11-2, built: 2020-7-27)
Working directory is /Volumes/GoogleDrive/My Drive/2020_2021_VCU/Multivariate Statistics/Course Materials/Week 3
Attaching package: ‘arm’
The following objects are masked from ‘package:psych’:
logit, rescale, sim
interplot(linearmodel2, var1 = "vidgames", var2 = "callous", hist = TRUE)
#Part 3: Continuous X / Dummy Moderator (0 1) ## Sometimes, we want to conduct a moderator analysis when one of our variables is categorical. Here, we create a dummy variable for people with high and low levels of callous. Note: if you have this as continuous measure originally, you would leave it as continuous. We are just creating this categorical variable for demonstration purposes.
ggplot(data = videogames.clean, mapping = aes(x = callous)) + geom_bar() +
labs(title = "Distribution of CaUnTs Personality Scores",
x = "Callous Unemotional Traits")
case_when to create a new variable called high_callous that is equal to 1 for all values of 20 or greater, and 0 for all values less than 20.videogames.clean <- videogames.clean %>%
mutate(.,
high_callous = case_when(
callous >= 20 ~ 1,
callous < 20 ~ 0
))
table(videogames.clean$high_callous)
0 1
250 192
library(expss)
package ‘expss’ was built under R version 3.6.2Registered S3 methods overwritten by 'expss':
method from
[.labelled Hmisc
as.data.frame.labelled base
print.labelled Hmisc
Use 'expss_output_viewer()' to display tables in the RStudio Viewer.
To return to the console output, use 'expss_output_default()'.
Attaching package: ‘expss’
The following object is masked from ‘package:lme4’:
dummy
The following objects are masked from ‘package:haven’:
is.labelled, read_spss
The following objects are masked from ‘package:stringr’:
fixed, regex
The following objects are masked from ‘package:dplyr’:
between, compute, contains, first, last, na_if, recode, vars
The following objects are masked from ‘package:purrr’:
keep, modify, modify_if, transpose, when
The following objects are masked from ‘package:tidyr’:
contains, nest
The following object is masked from ‘package:ggplot2’:
vars
val_lab(videogames.clean$high_callous) = num_lab("
1 High Callous
0 Low Callous
")
str(videogames.clean$high_callous)
Class 'labelled' num [1:442] 0 0 0 0 0 0 0 0 0 0 ...
.. .. VALUE LABELS [1:2]: 0=Low Callous, 1=High Callous
linearmodel3 <- lm(aggression ~ vidgames + high_callous + vidgames*high_callous, data = videogames.clean)
summary(linearmodel3)
Call:
lm(formula = aggression ~ vidgames + high_callous + vidgames *
high_callous, data = videogames.clean)
Residuals:
Min 1Q Median 3Q Max
-30.9825 -6.3774 -0.0474 6.8175 30.8649
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 35.85328 2.23308 16.056 < 2e-16 ***
vidgames -0.06529 0.09792 -0.667 0.505
high_callous -1.02189 3.31738 -0.308 0.758
vidgames:high_callous 0.63694 0.14453 4.407 1.32e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 10.53 on 438 degrees of freedom
Multiple R-squared: 0.3064, Adjusted R-squared: 0.3016
F-statistic: 64.49 on 3 and 438 DF, p-value: < 2.2e-16
interplot(linearmodel3, var1 = "vidgames", var2 = "high_callous")
vidgames.ggplot(data = videogames.clean, mapping = aes(x = vidgames)) + geom_bar() +
labs(title = "Distribution of Hours of Video Games Played",
x = "Video Games (Hours Per Week)")
vidgames into high and low categories.videogames.clean <- videogames.clean %>%
mutate(.,
high_vidgame = case_when(
vidgames >= 22 ~ 1,
vidgames < 22 ~ 0
))
##Code for labelling the new dummy variable:
val_lab(videogames.clean$high_vidgame) = num_lab("
1 High Video Games
0 Low Video Games")
str(videogames.clean$high_vidgame)
Class 'labelled' num [1:442] 0 0 1 0 0 1 1 0 0 0 ...
.. .. VALUE LABELS [1:2]: 0=Low Video Games, 1=High Video Games
high_callous as a dichotomous (0 1) moderator of high_vidgame…linearmodel4 <- lm(aggression ~ high_vidgame + high_callous + high_vidgame*high_callous, data = videogames.clean)
summary(linearmodel4)
Call:
lm(formula = aggression ~ high_vidgame + high_callous + high_vidgame *
high_callous, data = videogames.clean)
Residuals:
Min 1Q Median 3Q Max
-27.7745 -7.2507 -0.4094 6.5122 31.2255
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 35.488 0.954 37.200 < 2e-16 ***
high_vidgame -2.078 1.338 -1.553 0.121
high_callous 8.034 1.468 5.475 7.4e-08 ***
high_vidgame:high_callous 9.331 2.033 4.590 5.8e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 10.58 on 438 degrees of freedom
Multiple R-squared: 0.2997, Adjusted R-squared: 0.2949
F-statistic: 62.47 on 3 and 438 DF, p-value: < 2.2e-16
ggplot(data = videogames.clean, mapping = aes(x = as_factor(high_callous), y = aggression, color = as_factor(high_vidgame))) +
geom_boxplot() +
labs(title = "Distribution of Aggression Scores, by Video Games Played and Personality Trait",
y = "Aggression Score",
x = "Hours of Video Games Played",
caption = "N = 442.")
callous and vidgames by subtracting their respective means from each score.videogames.clean <- videogames.clean %>%
mutate(.,
vidgames_cent = vidgames - mean(vidgames),
callous_cent = callous - mean(callous))
describe(videogames.clean)
linearmodel5 <- lm(aggression ~ vidgames_cent + callous_cent + vidgames_cent*callous_cent, data = videogames.clean)
summary(linearmodel5)
Call:
lm(formula = aggression ~ vidgames_cent + callous_cent + vidgames_cent *
callous_cent, data = videogames.clean)
Residuals:
Min 1Q Median 3Q Max
-29.7144 -6.9087 -0.1923 6.9036 29.2290
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 39.967108 0.475057 84.131 < 2e-16 ***
vidgames_cent 0.169625 0.068473 2.477 0.013616 *
callous_cent 0.760093 0.049458 15.368 < 2e-16 ***
vidgames_cent:callous_cent 0.027062 0.006981 3.877 0.000122 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9.976 on 438 degrees of freedom
Multiple R-squared: 0.3773, Adjusted R-squared: 0.373
F-statistic: 88.46 on 3 and 438 DF, p-value: < 2.2e-16
interplot(linearmodel5, var1 = "vidgames_cent", var2 = "callous_cent", hist = TRUE)
library(modelsummary)
package ‘modelsummary’ was built under R version 3.6.2
Attaching package: ‘modelsummary’
The following object is masked from ‘package:psych’:
SD
models <- list(linearmodel1,
linearmodel2,
linearmodel3,
linearmodel4,
linearmodel5)
modelsummary(models, output = "default")
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | |
|---|---|---|---|---|---|
| (Intercept) | 21.764 | 33.120 | 35.853 | 35.488 | 39.967 |
| (1.807) | (3.427) | (2.233) | (0.954) | (0.475) | |
| callous | 0.763 | 0.169 | |||
| (0.050) | (0.161) | ||||
| vidgames | 0.188 | -0.334 | -0.065 | ||
| (0.069) | (0.151) | (0.098) | |||
| vidgames × callous | 0.027 | ||||
| (0.007) | |||||
| high_callous | -1.022 | 8.034 | |||
| (3.317) | (1.468) | ||||
| vidgames × high_callous | 0.637 | ||||
| (0.145) | |||||
| high_vidgame | -2.078 | ||||
| (1.338) | |||||
| high_vidgame × high_callous | 9.331 | ||||
| (2.033) | |||||
| callous_cent | 0.760 | ||||
| (0.049) | |||||
| vidgames_cent | 0.170 | ||||
| (0.068) | |||||
| vidgames_cent × callous_cent | 0.027 | ||||
| (0.007) | |||||
| Num.Obs. | 442 | 442 | 442 | 442 | 442 |
| R2 | 0.356 | 0.377 | 0.306 | 0.300 | 0.377 |
| R2 Adj. | 0.353 | 0.373 | 0.302 | 0.295 | 0.373 |
| AIC | 3306.6 | 3293.7 | 3341.4 | 3345.6 | 3293.7 |
| BIC | 3323.0 | 3314.2 | 3361.8 | 3366.1 | 3314.2 |
| Log.Lik. | -1649.311 | -1641.856 | -1665.691 | -1667.822 | -1641.856 |
| F | 121.298 | 88.459 | 64.488 | 62.468 | 88.459 |