packages <- c("tidyverse", "Lock5Data", "modelsummary", "effects", "survey", "car", "interactions", "kableExtra", "flextable", "scales", "sjPlot", "sjmisc", "rockchalk", "nnet", "ggeffects", "carData")
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
## backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
##
## Revert to `kableExtra` for one session:
##
## options(modelsummary_factory_default = 'kableExtra')
## options(modelsummary_factory_latex = 'kableExtra')
## options(modelsummary_factory_html = 'kableExtra')
##
## Silence this message forever:
##
## config_modelsummary(startup_message = FALSE)
##
## Loading required package: carData
##
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
##
## Loading required package: grid
##
## Loading required package: Matrix
##
##
## Attaching package: 'Matrix'
##
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Loading required package: survival
##
##
## Attaching package: 'survey'
##
##
## The following object is masked from 'package:graphics':
##
## dotchart
##
##
##
## Attaching package: 'car'
##
##
## The following object is masked from 'package:dplyr':
##
## recode
##
##
## The following object is masked from 'package:purrr':
##
## some
##
##
##
## Attaching package: 'kableExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## group_rows
##
##
##
## Attaching package: 'flextable'
##
##
## The following objects are masked from 'package:kableExtra':
##
## as_image, footnote
##
##
## The following object is masked from 'package:purrr':
##
## compose
##
##
##
## Attaching package: 'scales'
##
##
## The following object is masked from 'package:purrr':
##
## discard
##
##
## The following object is masked from 'package:readr':
##
## col_factor
##
##
## Learn more about sjPlot with 'browseVignettes("sjPlot")'.
##
##
## Attaching package: 'sjmisc'
##
##
## The following object is masked from 'package:purrr':
##
## is_empty
##
##
## The following object is masked from 'package:tidyr':
##
## replace_na
##
##
## The following object is masked from 'package:tibble':
##
## add_case
##
##
##
## Attaching package: 'rockchalk'
##
##
## The following object is masked from 'package:dplyr':
##
## summarize
##
##
##
## Attaching package: 'ggeffects'
##
##
## The following object is masked from 'package:interactions':
##
## johnson_neyman
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "Lock5Data" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "modelsummary" "Lock5Data" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "effects" "carData" "modelsummary" "Lock5Data" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## [[5]]
## [1] "survey" "survival" "Matrix" "grid" "effects"
## [6] "carData" "modelsummary" "Lock5Data" "lubridate" "forcats"
## [11] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [16] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [21] "grDevices" "utils" "datasets" "methods" "base"
##
## [[6]]
## [1] "car" "survey" "survival" "Matrix" "grid"
## [6] "effects" "carData" "modelsummary" "Lock5Data" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[7]]
## [1] "interactions" "car" "survey" "survival" "Matrix"
## [6] "grid" "effects" "carData" "modelsummary" "Lock5Data"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
##
## [[8]]
## [1] "kableExtra" "interactions" "car" "survey" "survival"
## [6] "Matrix" "grid" "effects" "carData" "modelsummary"
## [11] "Lock5Data" "lubridate" "forcats" "stringr" "dplyr"
## [16] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [21] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [26] "datasets" "methods" "base"
##
## [[9]]
## [1] "flextable" "kableExtra" "interactions" "car" "survey"
## [6] "survival" "Matrix" "grid" "effects" "carData"
## [11] "modelsummary" "Lock5Data" "lubridate" "forcats" "stringr"
## [16] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [21] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [26] "utils" "datasets" "methods" "base"
##
## [[10]]
## [1] "scales" "flextable" "kableExtra" "interactions" "car"
## [6] "survey" "survival" "Matrix" "grid" "effects"
## [11] "carData" "modelsummary" "Lock5Data" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [21] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [26] "grDevices" "utils" "datasets" "methods" "base"
##
## [[11]]
## [1] "sjPlot" "scales" "flextable" "kableExtra" "interactions"
## [6] "car" "survey" "survival" "Matrix" "grid"
## [11] "effects" "carData" "modelsummary" "Lock5Data" "lubridate"
## [16] "forcats" "stringr" "dplyr" "purrr" "readr"
## [21] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [26] "graphics" "grDevices" "utils" "datasets" "methods"
## [31] "base"
##
## [[12]]
## [1] "sjmisc" "sjPlot" "scales" "flextable" "kableExtra"
## [6] "interactions" "car" "survey" "survival" "Matrix"
## [11] "grid" "effects" "carData" "modelsummary" "Lock5Data"
## [16] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [21] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [26] "stats" "graphics" "grDevices" "utils" "datasets"
## [31] "methods" "base"
##
## [[13]]
## [1] "rockchalk" "sjmisc" "sjPlot" "scales" "flextable"
## [6] "kableExtra" "interactions" "car" "survey" "survival"
## [11] "Matrix" "grid" "effects" "carData" "modelsummary"
## [16] "Lock5Data" "lubridate" "forcats" "stringr" "dplyr"
## [21] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [26] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [31] "datasets" "methods" "base"
##
## [[14]]
## [1] "nnet" "rockchalk" "sjmisc" "sjPlot" "scales"
## [6] "flextable" "kableExtra" "interactions" "car" "survey"
## [11] "survival" "Matrix" "grid" "effects" "carData"
## [16] "modelsummary" "Lock5Data" "lubridate" "forcats" "stringr"
## [21] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [26] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [31] "utils" "datasets" "methods" "base"
##
## [[15]]
## [1] "ggeffects" "nnet" "rockchalk" "sjmisc" "sjPlot"
## [6] "scales" "flextable" "kableExtra" "interactions" "car"
## [11] "survey" "survival" "Matrix" "grid" "effects"
## [16] "carData" "modelsummary" "Lock5Data" "lubridate" "forcats"
## [21] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [26] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [31] "grDevices" "utils" "datasets" "methods" "base"
##
## [[16]]
## [1] "ggeffects" "nnet" "rockchalk" "sjmisc" "sjPlot"
## [6] "scales" "flextable" "kableExtra" "interactions" "car"
## [11] "survey" "survival" "Matrix" "grid" "effects"
## [16] "carData" "modelsummary" "Lock5Data" "lubridate" "forcats"
## [21] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [26] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [31] "grDevices" "utils" "datasets" "methods" "base"
data(USStates)
States <- USStates
States$Clinton <- abs(as.numeric(States$Elect2016) - 2)
table(States$Clinton)
##
## 0 1
## 30 20
head(States[, c("State", "Clinton", "College", "ClintonVote")])
## State Clinton College ClintonVote
## 1 Alabama 0 26.0 34.36
## 2 Alaska 0 26.5 36.55
## 3 Arizona 0 27.4 45.13
## 4 Arkansas 0 24.7 33.65
## 5 California 1 34.5 61.73
## 6 Colorado 1 39.6 48.16
TASK 1
m1 <- lm(ClintonVote ~ College + HouseholdIncome + NonWhite, data = States)
summary(m1)
##
## Call:
## lm(formula = ClintonVote ~ College + HouseholdIncome + NonWhite,
## data = States)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.697 -3.605 -1.036 4.495 13.686
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.99082 6.01413 0.165 0.870
## College 1.12498 0.22514 4.997 8.88e-06 ***
## HouseholdIncome -0.07554 0.15345 -0.492 0.625
## NonWhite 0.43123 0.08246 5.230 4.05e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.622 on 46 degrees of freedom
## Multiple R-squared: 0.6155, Adjusted R-squared: 0.5904
## F-statistic: 24.55 on 3 and 46 DF, p-value: 1.237e-09
tab_model(m1,show.ci=FALSE, show.se=TRUE, show.stat=TRUE)
| Â | ClintonVote | |||
|---|---|---|---|---|
| Predictors | Estimates | std. Error | Statistic | p |
| (Intercept) | 0.99 | 6.01 | 0.16 | 0.870 |
| College | 1.12 | 0.23 | 5.00 | <0.001 |
| HouseholdIncome | -0.08 | 0.15 | -0.49 | 0.625 |
| NonWhite | 0.43 | 0.08 | 5.23 | <0.001 |
| Observations | 50 | |||
| R2 / R2 adjusted | 0.616 / 0.590 | |||
TASK 2
m2 <- lm(ClintonVote ~ College + HouseholdIncome + NonWhite + Region, data = States)
summary(m2)
##
## Call:
## lm(formula = ClintonVote ~ College + HouseholdIncome + NonWhite +
## Region, data = States)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.3217 -4.1695 0.0435 4.2880 11.2477
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.80589 7.15877 2.068 0.0447 *
## College 1.18862 0.25668 4.631 3.36e-05 ***
## HouseholdIncome -0.42863 0.17107 -2.506 0.0161 *
## NonWhite 0.52461 0.08175 6.417 9.03e-08 ***
## RegionNE 8.03052 2.72062 2.952 0.0051 **
## RegionS -3.40381 2.74204 -1.241 0.2212
## RegionW 5.65798 2.77249 2.041 0.0474 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.82 on 43 degrees of freedom
## Multiple R-squared: 0.7224, Adjusted R-squared: 0.6836
## F-statistic: 18.65 on 6 and 43 DF, p-value: 1.545e-10
AIC(m1,m2)
## df AIC
## m1 5 336.7667
## m2 8 326.4857
BIC(m1,m2)
## df BIC
## m1 5 346.3268
## m2 8 341.7819
m3 <- list("M1" = m1, "M2" = m2)
modelsummary(m3, fmt = 2, statistic = "std.error")
| M1 | M2 | |
|---|---|---|
| (Intercept) | 0.99 | 14.81 |
| (6.01) | (7.16) | |
| College | 1.12 | 1.19 |
| (0.23) | (0.26) | |
| HouseholdIncome | -0.08 | -0.43 |
| (0.15) | (0.17) | |
| NonWhite | 0.43 | 0.52 |
| (0.08) | (0.08) | |
| RegionNE | 8.03 | |
| (2.72) | ||
| RegionS | -3.40 | |
| (2.74) | ||
| RegionW | 5.66 | |
| (2.77) | ||
| Num.Obs. | 50 | 50 |
| R2 | 0.616 | 0.722 |
| R2 Adj. | 0.590 | 0.684 |
| AIC | 336.8 | 326.5 |
| BIC | 346.3 | 341.8 |
| Log.Lik. | -163.383 | -155.243 |
| RMSE | 6.35 | 5.40 |
TASK3
m.interact <- lm(ClintonVote ~ College + NonWhite + College:NonWhite, data=States)
summary(m.interact)
##
## Call:
## lm(formula = ClintonVote ~ College + NonWhite + College:NonWhite,
## data = States)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.446 -3.543 -0.295 3.936 13.831
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -15.22574 13.26349 -1.148 0.256927
## College 1.50058 0.39913 3.760 0.000479 ***
## NonWhite 0.99725 0.47769 2.088 0.042397 *
## College:NonWhite -0.01802 0.01457 -1.236 0.222664
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.532 on 46 degrees of freedom
## Multiple R-squared: 0.6259, Adjusted R-squared: 0.6015
## F-statistic: 25.66 on 3 and 46 DF, p-value: 6.636e-10
library(effects)
interaction_plot <- effect("College*NonWhite", m.interact, xlevels=list(College=seq(0, 100, 10), NonWhite=seq(0, 100, 10)))
plot(interaction_plot, main="Interaction Effect of College and NonWhite on ClintonVote", xlab="% College Educated", ylab="% Clinton Vote")
TASK4:
States$ClintonWin <- ifelse(States$ClintonVote > 50, 1, 0)
head(States)
## State HouseholdIncome Region Population EighthGradeMath HighSchool
## 1 Alabama 46.472 S 4.875 268.7 87.1
## 2 Alaska 76.114 W 0.740 274.3 92.8
## 3 Arizona 53.510 W 7.016 279.9 87.1
## 4 Arkansas 43.813 S 3.004 274.4 89.1
## 5 California 67.169 W 39.537 275.6 87.4
## 6 Colorado 65.458 W 5.607 284.7 91.5
## College IQ GSP Vegetables Fruit Smokers PhysicalActivity Obese NonWhite
## 1 26.0 95.7 40.279 80.7 55.1 20.9 42.8 36.2 31.6
## 2 26.5 99.0 70.936 81.0 63.1 21.0 58.3 29.5 34.7
## 3 27.4 97.4 43.096 79.2 62.8 15.6 52.7 29.5 22.5
## 4 24.7 97.5 38.467 80.7 55.3 22.3 45.4 37.1 22.7
## 5 34.5 95.5 67.698 78.6 67.5 11.3 57.5 25.8 39.4
## 6 39.6 101.6 59.057 82.6 67.0 14.6 58.7 23.0 15.8
## HeavyDrinkers Electoral ClintonVote Elect2016 TwoParents StudentSpending
## 1 5.45 9 34.36 R 60.9 9.236
## 2 7.33 3 36.55 R 71.5 17.510
## 3 5.57 11 45.13 R 62.7 7.613
## 4 5.32 6 33.65 R 63.3 9.846
## 5 5.95 55 61.73 D 66.8 11.495
## 6 7.30 9 48.16 D 71.9 9.575
## Insured Clinton ClintonWin
## 1 83.7 0 0
## 2 80.2 0 0
## 3 83.4 0 0
## 4 84.1 0 0
## 5 85.2 1 1
## 6 87.2 1 0
m.logit <- glm(ClintonWin ~ College + HouseholdIncome + Region, data=States, family=binomial(link="logit"))
summary(m.logit)
##
## Call:
## glm(formula = ClintonWin ~ College + HouseholdIncome + Region,
## family = binomial(link = "logit"), data = States)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -17.16240 6.31603 -2.717 0.00658 **
## College 0.28494 0.14633 1.947 0.05150 .
## HouseholdIncome 0.07628 0.07376 1.034 0.30110
## RegionNE 2.17592 1.40578 1.548 0.12166
## RegionS -16.31016 2420.64651 -0.007 0.99462
## RegionW 2.58895 1.62430 1.594 0.11096
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 57.306 on 49 degrees of freedom
## Residual deviance: 25.780 on 44 degrees of freedom
## AIC: 37.78
##
## Number of Fisher Scoring iterations: 18
modelsummary(m.logit, fmt = 2, statistic = "std.error")
| (1) | |
|---|---|
| (Intercept) | -17.16 |
| (6.32) | |
| College | 0.28 |
| (0.15) | |
| HouseholdIncome | 0.08 |
| (0.07) | |
| RegionNE | 2.18 |
| (1.41) | |
| RegionS | -16.31 |
| (2420.65) | |
| RegionW | 2.59 |
| (1.62) | |
| Num.Obs. | 50 |
| AIC | 37.8 |
| BIC | 49.3 |
| Log.Lik. | -12.890 |
| RMSE | 0.29 |
tab_model(m.logit,show.ci=FALSE, show.se=TRUE, show.stat=TRUE)
| Â | ClintonWin | |||
|---|---|---|---|---|
| Predictors | Odds Ratios | std. Error | Statistic | p |
| (Intercept) | 0.00 | 0.00 | -2.72 | 0.007 |
| College | 1.33 | 0.19 | 1.95 | 0.052 |
| HouseholdIncome | 1.08 | 0.08 | 1.03 | 0.301 |
| Region [NE] | 8.81 | 12.39 | 1.55 | 0.122 |
| Region [S] | 0.00 | 0.00 | -0.01 | 0.995 |
| Region [W] | 13.32 | 21.63 | 1.59 | 0.111 |
| Observations | 50 | |||
| R2 Tjur | 0.564 | |||
TASK5:
effect_plot <- allEffects(m.logit)
plot(effect_plot)
plot_model(m.logit, type = "eff", terms = c("College", "HouseholdIncome", "Region"))