library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
[37m── [1mAttaching packages[22m ─────────────────────────────── tidyverse 1.3.0 ──[39m
[37m[32m✓[37m [34mggplot2[37m 3.3.2 [32m✓[37m [34mpurrr [37m 0.3.4
[32m✓[37m [34mtibble [37m 3.0.3 [32m✓[37m [34mdplyr [37m 1.0.0
[32m✓[37m [34mtidyr [37m 1.1.0 [32m✓[37m [34mstringr[37m 1.4.0
[32m✓[37m [34mreadr [37m 1.3.1 [32m✓[37m [34mforcats[37m 0.5.0[39m
package ‘ggplot2’ was built under R version 3.6.2package ‘tibble’ was built under R version 3.6.2package ‘tidyr’ was built under R version 3.6.2package ‘purrr’ was built under R version 3.6.2package ‘dplyr’ was built under R version 3.6.2[37m── [1mConflicts[22m ────────────────────────────────── tidyverse_conflicts() ──
[31mx[37m [34mdplyr[37m::[32mfilter()[37m masks [34mstats[37m::filter()
[31mx[37m [34mdplyr[37m::[32mlag()[37m masks [34mstats[37m::lag()[39m
library(psych)
Attaching package: ‘psych’
The following objects are masked from ‘package:ggplot2’:
%+%, alpha
library(dplyr)
library(ggplot2)
library(haven)
videogames <- read_dta("VideoGames.dta")
View(videogames)
describe and table are a great way to get a quick summary of the variables found in a dataset.describe(videogames, fast = TRUE)
aggression as our outcome, and vidgames and CaUnTs.str(videogames$aggression)
num [1:442] 13 38 30 23 25 46 41 22 35 23 ...
- attr(*, "label")= chr "Agression"
- attr(*, "format.stata")= chr "%12.0g"
table(videogames$aggression)
9 10 11 13 14 15 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
1 1 1 3 3 5 1 4 1 5 5 7 6 9 6 8 5 8 8 11 7 11 19 11 13 18 10 17 12 14
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 69 72 73
14 15 13 17 9 10 15 10 15 8 8 10 6 6 9 5 7 4 3 2 1 5 2 3 3 3 1 2 2 3
82
1
str(videogames$vidgames)
num [1:442] 16 12 32 10 11 29 23 15 20 20 ...
- attr(*, "label")= chr "Video Games (Hours per week)"
- attr(*, "format.stata")= chr "%12.0g"
table(videogames$vidgames)
1 2 3 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
1 2 1 1 5 5 5 8 8 24 10 15 11 18 22 27 30 20 25 28 24 18 27 13 13 21 13 7 8 7
34 35 36 37 38
8 4 4 5 4
str(videogames$CaUnTs)
num [1:442] 0 0 0 1 1 1 2 3 3 3 ...
- attr(*, "label")= chr "Callous Unemotional Traits"
- attr(*, "format.stata")= chr "%12.0g"
table(videogames$CaUnTs)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
3 3 1 6 10 6 11 14 17 18 19 14 14 20 18 19 12 11 16 18 11 20 12 11 11 14 11 9 11 5
30 31 32 33 34 35 36 37 38 39 41 42 43
16 8 9 12 6 6 2 7 2 6 1 1 1
CaUnTs. In this process, we are also making a new dataset. Remember, this is a great way to keep your original data set as is if you ever need to go back!videogames.clean <- videogames %>%
rename(.,
callous = CaUnTs)
glimpse(videogames.clean)
Rows: 442
Columns: 4
$ ID [3m[38;5;246m<dbl>[39m[23m 69, 55, 7, 96, 130, 124, 72, 139, 102, 179, 171, 196, 23, 111, 20, 1…
$ aggression [3m[38;5;246m<dbl>[39m[23m 13, 38, 30, 23, 25, 46, 41, 22, 35, 23, 32, 34, 35, 33, 21, 38, 26, …
$ vidgames [3m[38;5;246m<dbl>[39m[23m 16, 12, 32, 10, 11, 29, 23, 15, 20, 20, 27, 27, 29, 13, 15, 15, 15, …
$ callous [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 1, 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,…
linearmodel1 <- lm(aggression ~ vidgames + callous, data = videogames.clean)
summary(linearmodel1)
Call:
lm(formula = aggression ~ vidgames + callous, data = videogames.clean)
Residuals:
Min 1Q Median 3Q Max
-27.952 -6.696 -0.168 7.022 32.499
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 21.76433 1.80731 12.042 < 2e-16 ***
vidgames 0.18769 0.06940 2.705 0.00711 **
callous 0.76312 0.05024 15.191 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 10.13 on 439 degrees of freedom
Multiple R-squared: 0.3559, Adjusted R-squared: 0.353
F-statistic: 121.3 on 2 and 439 DF, p-value: < 2.2e-16
linearmodel2 <- lm(aggression ~ vidgames + callous + vidgames*callous, data = videogames.clean)
summary(linearmodel2)
Call:
lm(formula = aggression ~ vidgames + callous + vidgames * callous,
data = videogames.clean)
Residuals:
Min 1Q Median 3Q Max
-29.7144 -6.9087 -0.1923 6.9036 29.2290
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 33.120233 3.427254 9.664 < 2e-16 ***
vidgames -0.333597 0.150826 -2.212 0.027495 *
callous 0.168949 0.161049 1.049 0.294731
vidgames:callous 0.027062 0.006981 3.877 0.000122 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9.976 on 438 degrees of freedom
Multiple R-squared: 0.3773, Adjusted R-squared: 0.373
F-statistic: 88.46 on 3 and 438 DF, p-value: < 2.2e-16
interplot to get predictions of aggressive behavior at different values of vidgames and callous…#Part 3: Continuous X / Dummy Moderator (0 1) ## Sometimes, we want to conduct a moderator analysis when one of our variables is categorical. Here, we create a dummy variable for people with high and low levels of callous. Note: if you have this as continuous measure originally, you would leave it as continuous. We are just creating this categorical variable for demonstration purposes.
ggplot(data = videogames.clean, mapping = aes(x = callous)) + geom_bar() +
labs(title = "Distribution of CaUnTs Personality Scores",
x = "Callous Unemotional Traits")
case_when to create a new variable called high_callous that is equal to 1 for all values of 20 or greater, and 0 for all values less than 20.videogames.clean <- videogames.clean %>%
mutate(.,
high_callous = case_when(
callous >= 20 ~ 1,
callous < 20 ~ 0
))
table(videogames.clean$high_callous)
0 1
250 192
library(expss)
val_lab(videogames.clean$high_callous) = num_lab("
1 High Callous
0 Low Callous
")
str(videogames.clean$high_callous)
Class 'labelled' num [1:442] 0 0 0 0 0 0 0 0 0 0 ...
.. .. VALUE LABELS [1:2]: 0=Low Callous, 1=High Callous
linearmodel3 <- lm(aggression ~ vidgames + high_callous + vidgames*high_callous, data = videogames.clean)
summary(linearmodel3)
Call:
lm(formula = aggression ~ vidgames + high_callous + vidgames *
high_callous, data = videogames.clean)
Residuals:
Min 1Q Median 3Q Max
-30.9825 -6.3774 -0.0474 6.8175 30.8649
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 35.85328 2.23308 16.056 < 2e-16 ***
vidgames -0.06529 0.09792 -0.667 0.505
high_callous -1.02189 3.31738 -0.308 0.758
vidgames:high_callous 0.63694 0.14453 4.407 1.32e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 10.53 on 438 degrees of freedom
Multiple R-squared: 0.3064, Adjusted R-squared: 0.3016
F-statistic: 64.49 on 3 and 438 DF, p-value: < 2.2e-16
interplot(linearmodel3, var1 = "vidgames", var2 = "high_callous")
vidgames.ggplot(data = videogames.clean, mapping = aes(x = vidgames)) + geom_bar() +
labs(title = "Distribution of Hours of Video Games Played",
x = "Video Games (Hours Per Week)")
vidgames into high and low categories.videogames.clean <- videogames.clean %>%
mutate(.,
high_vidgame = case_when(
vidgames >= 22 ~ 1,
vidgames < 22 ~ 0
))
##Code for labelling the new dummy variable:
val_lab(videogames.clean$high_vidgame) = num_lab("
1 High Video Games
0 Low Video Games")
str(videogames.clean$high_vidgame)
Class 'labelled' num [1:442] 0 0 1 0 0 1 1 0 0 0 ...
.. .. VALUE LABELS [1:2]: 0=Low Video Games, 1=High Video Games
high_callous as a dichotomous (0 1) moderator of high_vidgame…linearmodel4 <- lm(aggression ~ high_vidgame + high_callous + high_vidgame*high_callous, data = videogames.clean)
summary(linearmodel4)
Call:
lm(formula = aggression ~ high_vidgame + high_callous + high_vidgame *
high_callous, data = videogames.clean)
Residuals:
Min 1Q Median 3Q Max
-27.7745 -7.2507 -0.4094 6.5122 31.2255
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 35.488 0.954 37.200 < 2e-16 ***
high_vidgame -2.078 1.338 -1.553 0.121
high_callous 8.034 1.468 5.475 7.4e-08 ***
high_vidgame:high_callous 9.331 2.033 4.590 5.8e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 10.58 on 438 degrees of freedom
Multiple R-squared: 0.2997, Adjusted R-squared: 0.2949
F-statistic: 62.47 on 3 and 438 DF, p-value: < 2.2e-16
interplot(linearmodel4, var1 = "high_vidgame", var2 = "high_callous")
callous and vidgames by subtracting their respective means from each score.videogames.clean <- videogames.clean %>%
mutate(.,
vidgames_cent = vidgames - mean(vidgames),
callous_cent = callous - mean(callous))
describe(videogames.clean)
linearmodel5 <- lm(aggression ~ vidgames_cent + callous_cent + vidgames_cent*callous_cent, data = videogames.clean)
summary(linearmodel5)
Call:
lm(formula = aggression ~ vidgames_cent + callous_cent + vidgames_cent *
callous_cent, data = videogames.clean)
Residuals:
Min 1Q Median 3Q Max
-29.7144 -6.9087 -0.1923 6.9036 29.2290
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 39.967108 0.475057 84.131 < 2e-16 ***
vidgames_cent 0.169625 0.068473 2.477 0.013616 *
callous_cent 0.760093 0.049458 15.368 < 2e-16 ***
vidgames_cent:callous_cent 0.027062 0.006981 3.877 0.000122 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9.976 on 438 degrees of freedom
Multiple R-squared: 0.3773, Adjusted R-squared: 0.373
F-statistic: 88.46 on 3 and 438 DF, p-value: < 2.2e-16
interplot(linearmodel5, var1 = "vidgames_cent", var2 = "callous_cent", hist = TRUE)
library(modelsummary)
models <- list(linearmodel1,
linearmodel2,
linearmodel3,
linearmodel4,
linearmodel5)
modelsummary(models, output = "default")
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | |
|---|---|---|---|---|---|
| (Intercept) | 21.764 | 33.120 | 35.853 | 35.488 | 39.967 |
| (1.807) | (3.427) | (2.233) | (0.954) | (0.475) | |
| callous | 0.763 | 0.169 | |||
| (0.050) | (0.161) | ||||
| vidgames | 0.188 | -0.334 | -0.065 | ||
| (0.069) | (0.151) | (0.098) | |||
| vidgames × callous | 0.027 | ||||
| (0.007) | |||||
| high_callous | -1.022 | 8.034 | |||
| (3.317) | (1.468) | ||||
| vidgames × high_callous | 0.637 | ||||
| (0.145) | |||||
| high_vidgame | -2.078 | ||||
| (1.338) | |||||
| high_vidgame × high_callous | 9.331 | ||||
| (2.033) | |||||
| callous_cent | 0.760 | ||||
| (0.049) | |||||
| vidgames_cent | 0.170 | ||||
| (0.068) | |||||
| vidgames_cent × callous_cent | 0.027 | ||||
| (0.007) | |||||
| Num.Obs. | 442 | 442 | 442 | 442 | 442 |
| R2 | 0.356 | 0.377 | 0.306 | 0.300 | 0.377 |
| R2 Adj. | 0.353 | 0.373 | 0.302 | 0.295 | 0.373 |
| AIC | 3306.6 | 3293.7 | 3341.4 | 3345.6 | 3293.7 |
| BIC | 3323.0 | 3314.2 | 3361.8 | 3366.1 | 3314.2 |
| Log.Lik. | -1649.311 | -1641.856 | -1665.691 | -1667.822 | -1641.856 |
| F | 121.298 | 88.459 | 64.488 | 62.468 | 88.459 |