Homework 2

Load packages

# Add additional packages you need
library(tidyverse)

## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'pillar'

## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'tibble'

## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'hms'

library(psych)
library(modelsummary)

## Warning: package 'modelsummary' was built under R version 4.1.2

library(interactions)
library(gridExtra)

#install.packages("sjPlot")
library(sjPlot)

Multiple Regression Analysis

# Run regression using lm()

salary_regression = lm(salary ~ sex + time, data = salary_dat)
summary(salary_regression)

## 
## Call:
## lm(formula = salary ~ sex + time, data = salary_dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15139.0  -5351.9   -464.2   4289.7  24117.0 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  46349.0     2237.8  20.712  < 2e-16 ***
## sex          -1493.8     2043.6  -0.731    0.468    
## time          1342.7      238.8   5.623 5.41e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7801 on 59 degrees of freedom
## Multiple R-squared:  0.3752, Adjusted R-squared:  0.354 
## F-statistic: 17.71 on 2 and 59 DF,  p-value: 9.429e-07

Table of coefficients

# Summarize your regression coefficients in a table. 

msummary(salary_regression)

	Model 1
(Intercept)	46348.999
	(2237.773)
sex	−1493.783
	(2043.559)
time	1342.687
	(238.793)
Num.Obs.	62
R2	0.375
R2 Adj.	0.354
AIC	1292.2
BIC	1300.7
Log.Lik.	−642.081
F	17.715
RMSE	7801.04

Regression slopes of [Predictor 1] and [Predictor 2] predicting salary

# Insert code for plotting the slope. You can use the 
# `sjPlot::plot_model(..., show.data = TRUE)` function.

sjPlot::plot_model(salary_regression, type = "pred", show.data = TRUE)

## $sex

## 
## $time

Interpretation

Based on the model, for two people with the same level of time, the person with one unit higher on sex is expected to report a higher salary by $1,342.70.

Simulation

[The only change you need to change should be m3 in the following code.]

# Simulate 100 data sets based on your model results by modifying the following
# codes
your_model <- salary_regression  # change `m3` to the object name of your regression model
# Set the seed so that you always get the same simulated data set 
set.seed(1331)
# No need to change the following
num_simulations <- 1000
# Simulate new salary variables
simulated_salary100 <- simulate(your_model, nsim = num_simulations)
# Placeholders for storing simulation results
simulated_slopes <- matrix(nrow = num_simulations, ncol = 2, 
                           dimnames = list(NULL, 
                                           c("slopes_x1", "slopes_x2")))
# Run regression 100 times, each time using a different simulated salary outcome
for (i in 1:num_simulations) {
  reg_model_sim <- update(your_model, 
                          simulated_salary100[ , i] ~ .)
  # Store slope results
  simulated_slopes[i, ] <- coef(reg_model_sim)[2:3]
}
# Plot the sampling distributions
as_tibble(simulated_slopes) %>% 
  pivot_longer(1:2, names_to = "predictor", 
               values_to = "slopes", 
               names_prefix = "slopes_") %>% 
  ggplot(aes(x = slopes)) + 
  geom_histogram() + 
  facet_wrap(~ predictor, scales = "free_x")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

The histograms both had very messy distributions at 100 permutations, but when changing it from 100 to 1,000 allowed the histograms to better converge to a normal distribution.

Interaction plot

# Include interaction in lm()

salary_dat <- salary_dat %>%
  mutate(time_c = time - mean(time),
          sex_c = sex - mean(sex))
cor(select(salary_dat, time, time_c, sex, sex_c))

##              time     time_c        sex      sex_c
## time    1.0000000  1.0000000 -0.2095909 -0.2095909
## time_c  1.0000000  1.0000000 -0.2095909 -0.2095909
## sex    -0.2095909 -0.2095909  1.0000000  1.0000000
## sex_c  -0.2095909 -0.2095909  1.0000000  1.0000000

mra3 <- lm(salary ~ time * sex, salary_dat)
mra3_c <- lm(salary ~ time_c * sex_c, salary_dat)

summary(mra3)

## 
## Call:
## lm(formula = salary ~ time * sex, data = salary_dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14396.5  -5596.2    -47.5   4513.7  23037.0 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  45150.0     2406.3  18.763  < 2e-16 ***
## time          1501.0      266.5   5.632 5.46e-07 ***
## sex           3215.3     4137.0   0.777    0.440    
## time:sex      -765.9      586.1  -1.307    0.196    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7755 on 58 degrees of freedom
## Multiple R-squared:  0.3931, Adjusted R-squared:  0.3617 
## F-statistic: 12.52 on 3 and 58 DF,  p-value: 2.034e-06

summary(mra3_c)

## 
## Call:
## lm(formula = salary ~ time_c * sex_c, data = salary_dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14396.5  -5596.2    -47.5   4513.7  23037.0 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   54478.0     1018.2  53.505  < 2e-16 ***
## time_c         1167.5      272.6   4.283 7.03e-05 ***
## sex_c         -1985.2     2065.9  -0.961    0.341    
## time_c:sex_c   -765.9      586.1  -1.307    0.196    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7755 on 58 degrees of freedom
## Multiple R-squared:  0.3931, Adjusted R-squared:  0.3617 
## F-statistic: 12.52 on 3 and 58 DF,  p-value: 2.034e-06

# Interaction plot

interact_plot(mra3_c, pred=time_c, modx = sex_c)

interact_plot(mra3_c, pred=sex_c, modx = time_c)

Homework 2

Kamryn Mattingly

2022-04-11