packages <- c("tidyverse", "Lock5Data", "modelsummary", "effects", "survey", "car", "interactions", "kableExtra", "flextable", "scales", "sjPlot", "sjmisc", "rockchalk", "nnet", "ggeffects", "carData")

new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
##   options(modelsummary_factory_latex = 'kableExtra')
##   options(modelsummary_factory_html = 'kableExtra')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
## 
## Loading required package: carData
## 
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## 
## Loading required package: grid
## 
## Loading required package: Matrix
## 
## 
## Attaching package: 'Matrix'
## 
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## Loading required package: survival
## 
## 
## Attaching package: 'survey'
## 
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
## 
## 
## 
## Attaching package: 'car'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## 
## 
## Attaching package: 'kableExtra'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## 
## 
## 
## Attaching package: 'flextable'
## 
## 
## The following objects are masked from 'package:kableExtra':
## 
##     as_image, footnote
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## 
## 
## 
## Attaching package: 'scales'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
## 
## 
## Learn more about sjPlot with 'browseVignettes("sjPlot")'.
## 
## 
## Attaching package: 'sjmisc'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_empty
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     replace_na
## 
## 
## The following object is masked from 'package:tibble':
## 
##     add_case
## 
## 
## 
## Attaching package: 'rockchalk'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     summarize
## 
## 
## 
## Attaching package: 'ggeffects'
## 
## 
## The following object is masked from 'package:interactions':
## 
##     johnson_neyman
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "Lock5Data" "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "modelsummary" "Lock5Data"    "lubridate"    "forcats"      "stringr"     
##  [6] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [11] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [16] "utils"        "datasets"     "methods"      "base"        
## 
## [[4]]
##  [1] "effects"      "carData"      "modelsummary" "Lock5Data"    "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"        
## 
## [[5]]
##  [1] "survey"       "survival"     "Matrix"       "grid"         "effects"     
##  [6] "carData"      "modelsummary" "Lock5Data"    "lubridate"    "forcats"     
## [11] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [16] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [21] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[6]]
##  [1] "car"          "survey"       "survival"     "Matrix"       "grid"        
##  [6] "effects"      "carData"      "modelsummary" "Lock5Data"    "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[7]]
##  [1] "interactions" "car"          "survey"       "survival"     "Matrix"      
##  [6] "grid"         "effects"      "carData"      "modelsummary" "Lock5Data"   
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"        
## 
## [[8]]
##  [1] "kableExtra"   "interactions" "car"          "survey"       "survival"    
##  [6] "Matrix"       "grid"         "effects"      "carData"      "modelsummary"
## [11] "Lock5Data"    "lubridate"    "forcats"      "stringr"      "dplyr"       
## [16] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [21] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [26] "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "flextable"    "kableExtra"   "interactions" "car"          "survey"      
##  [6] "survival"     "Matrix"       "grid"         "effects"      "carData"     
## [11] "modelsummary" "Lock5Data"    "lubridate"    "forcats"      "stringr"     
## [16] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [21] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [26] "utils"        "datasets"     "methods"      "base"        
## 
## [[10]]
##  [1] "scales"       "flextable"    "kableExtra"   "interactions" "car"         
##  [6] "survey"       "survival"     "Matrix"       "grid"         "effects"     
## [11] "carData"      "modelsummary" "Lock5Data"    "lubridate"    "forcats"     
## [16] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [21] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [26] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[11]]
##  [1] "sjPlot"       "scales"       "flextable"    "kableExtra"   "interactions"
##  [6] "car"          "survey"       "survival"     "Matrix"       "grid"        
## [11] "effects"      "carData"      "modelsummary" "Lock5Data"    "lubridate"   
## [16] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [21] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [26] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [31] "base"        
## 
## [[12]]
##  [1] "sjmisc"       "sjPlot"       "scales"       "flextable"    "kableExtra"  
##  [6] "interactions" "car"          "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "Lock5Data"   
## [16] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [21] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [26] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [31] "methods"      "base"        
## 
## [[13]]
##  [1] "rockchalk"    "sjmisc"       "sjPlot"       "scales"       "flextable"   
##  [6] "kableExtra"   "interactions" "car"          "survey"       "survival"    
## [11] "Matrix"       "grid"         "effects"      "carData"      "modelsummary"
## [16] "Lock5Data"    "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"        
## 
## [[14]]
##  [1] "nnet"         "rockchalk"    "sjmisc"       "sjPlot"       "scales"      
##  [6] "flextable"    "kableExtra"   "interactions" "car"          "survey"      
## [11] "survival"     "Matrix"       "grid"         "effects"      "carData"     
## [16] "modelsummary" "Lock5Data"    "lubridate"    "forcats"      "stringr"     
## [21] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [26] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [31] "utils"        "datasets"     "methods"      "base"        
## 
## [[15]]
##  [1] "ggeffects"    "nnet"         "rockchalk"    "sjmisc"       "sjPlot"      
##  [6] "scales"       "flextable"    "kableExtra"   "interactions" "car"         
## [11] "survey"       "survival"     "Matrix"       "grid"         "effects"     
## [16] "carData"      "modelsummary" "Lock5Data"    "lubridate"    "forcats"     
## [21] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [26] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [31] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[16]]
##  [1] "ggeffects"    "nnet"         "rockchalk"    "sjmisc"       "sjPlot"      
##  [6] "scales"       "flextable"    "kableExtra"   "interactions" "car"         
## [11] "survey"       "survival"     "Matrix"       "grid"         "effects"     
## [16] "carData"      "modelsummary" "Lock5Data"    "lubridate"    "forcats"     
## [21] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [26] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [31] "grDevices"    "utils"        "datasets"     "methods"      "base"
data(USStates)
States <- USStates

States$Clinton <- abs(as.numeric(States$Elect2016) - 2)
table(States$Clinton)
## 
##  0  1 
## 30 20
head(States[, c("State", "Clinton", "College", "ClintonVote")])
##        State Clinton College ClintonVote
## 1    Alabama       0    26.0       34.36
## 2     Alaska       0    26.5       36.55
## 3    Arizona       0    27.4       45.13
## 4   Arkansas       0    24.7       33.65
## 5 California       1    34.5       61.73
## 6   Colorado       1    39.6       48.16

TASK 1

m1 <- lm(ClintonVote ~ College + HouseholdIncome + NonWhite, data = States)
summary(m1)
## 
## Call:
## lm(formula = ClintonVote ~ College + HouseholdIncome + NonWhite, 
##     data = States)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.697  -3.605  -1.036   4.495  13.686 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.99082    6.01413   0.165    0.870    
## College          1.12498    0.22514   4.997 8.88e-06 ***
## HouseholdIncome -0.07554    0.15345  -0.492    0.625    
## NonWhite         0.43123    0.08246   5.230 4.05e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.622 on 46 degrees of freedom
## Multiple R-squared:  0.6155, Adjusted R-squared:  0.5904 
## F-statistic: 24.55 on 3 and 46 DF,  p-value: 1.237e-09
tab_model(m1,show.ci=FALSE, show.se=TRUE, show.stat=TRUE)
  ClintonVote
Predictors Estimates std. Error Statistic p
(Intercept) 0.99 6.01 0.16 0.870
College 1.12 0.23 5.00 <0.001
HouseholdIncome -0.08 0.15 -0.49 0.625
NonWhite 0.43 0.08 5.23 <0.001
Observations 50
R2 / R2 adjusted 0.616 / 0.590

TASK 2

m2 <- lm(ClintonVote ~ College + HouseholdIncome + NonWhite + Region, data = States)
summary(m2)
## 
## Call:
## lm(formula = ClintonVote ~ College + HouseholdIncome + NonWhite + 
##     Region, data = States)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.3217  -4.1695   0.0435   4.2880  11.2477 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     14.80589    7.15877   2.068   0.0447 *  
## College          1.18862    0.25668   4.631 3.36e-05 ***
## HouseholdIncome -0.42863    0.17107  -2.506   0.0161 *  
## NonWhite         0.52461    0.08175   6.417 9.03e-08 ***
## RegionNE         8.03052    2.72062   2.952   0.0051 ** 
## RegionS         -3.40381    2.74204  -1.241   0.2212    
## RegionW          5.65798    2.77249   2.041   0.0474 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.82 on 43 degrees of freedom
## Multiple R-squared:  0.7224, Adjusted R-squared:  0.6836 
## F-statistic: 18.65 on 6 and 43 DF,  p-value: 1.545e-10
AIC(m1,m2)
##    df      AIC
## m1  5 336.7667
## m2  8 326.4857
BIC(m1,m2)
##    df      BIC
## m1  5 346.3268
## m2  8 341.7819
m3 <- list("M1" = m1, "M2" = m2)
modelsummary(m3, fmt = 2, statistic = "std.error")
tinytable_3hzwy30m1745ym7kard9
M1 M2
(Intercept) 0.99 14.81
(6.01) (7.16)
College 1.12 1.19
(0.23) (0.26)
HouseholdIncome -0.08 -0.43
(0.15) (0.17)
NonWhite 0.43 0.52
(0.08) (0.08)
RegionNE 8.03
(2.72)
RegionS -3.40
(2.74)
RegionW 5.66
(2.77)
Num.Obs. 50 50
R2 0.616 0.722
R2 Adj. 0.590 0.684
AIC 336.8 326.5
BIC 346.3 341.8
Log.Lik. -163.383 -155.243
RMSE 6.35 5.40

TASK3

m.interact <- lm(ClintonVote ~ College + NonWhite + College:NonWhite, data=States)
summary(m.interact)
## 
## Call:
## lm(formula = ClintonVote ~ College + NonWhite + College:NonWhite, 
##     data = States)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.446  -3.543  -0.295   3.936  13.831 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -15.22574   13.26349  -1.148 0.256927    
## College            1.50058    0.39913   3.760 0.000479 ***
## NonWhite           0.99725    0.47769   2.088 0.042397 *  
## College:NonWhite  -0.01802    0.01457  -1.236 0.222664    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.532 on 46 degrees of freedom
## Multiple R-squared:  0.6259, Adjusted R-squared:  0.6015 
## F-statistic: 25.66 on 3 and 46 DF,  p-value: 6.636e-10
library(effects)
interaction_plot <- effect("College*NonWhite", m.interact, xlevels=list(College=seq(0, 100, 10), NonWhite=seq(0, 100, 10)))
plot(interaction_plot, main="Interaction Effect of College and NonWhite on ClintonVote", xlab="% College Educated", ylab="% Clinton Vote")

TASK4:

States$ClintonWin <- ifelse(States$ClintonVote > 50, 1, 0)
head(States)
##        State HouseholdIncome Region Population EighthGradeMath HighSchool
## 1    Alabama          46.472      S      4.875           268.7       87.1
## 2     Alaska          76.114      W      0.740           274.3       92.8
## 3    Arizona          53.510      W      7.016           279.9       87.1
## 4   Arkansas          43.813      S      3.004           274.4       89.1
## 5 California          67.169      W     39.537           275.6       87.4
## 6   Colorado          65.458      W      5.607           284.7       91.5
##   College    IQ    GSP Vegetables Fruit Smokers PhysicalActivity Obese NonWhite
## 1    26.0  95.7 40.279       80.7  55.1    20.9             42.8  36.2     31.6
## 2    26.5  99.0 70.936       81.0  63.1    21.0             58.3  29.5     34.7
## 3    27.4  97.4 43.096       79.2  62.8    15.6             52.7  29.5     22.5
## 4    24.7  97.5 38.467       80.7  55.3    22.3             45.4  37.1     22.7
## 5    34.5  95.5 67.698       78.6  67.5    11.3             57.5  25.8     39.4
## 6    39.6 101.6 59.057       82.6  67.0    14.6             58.7  23.0     15.8
##   HeavyDrinkers Electoral ClintonVote Elect2016 TwoParents StudentSpending
## 1          5.45         9       34.36         R       60.9           9.236
## 2          7.33         3       36.55         R       71.5          17.510
## 3          5.57        11       45.13         R       62.7           7.613
## 4          5.32         6       33.65         R       63.3           9.846
## 5          5.95        55       61.73         D       66.8          11.495
## 6          7.30         9       48.16         D       71.9           9.575
##   Insured Clinton ClintonWin
## 1    83.7       0          0
## 2    80.2       0          0
## 3    83.4       0          0
## 4    84.1       0          0
## 5    85.2       1          1
## 6    87.2       1          0
m.logit <- glm(ClintonWin ~ College + HouseholdIncome + Region, data=States, family=binomial(link="logit"))
summary(m.logit)
## 
## Call:
## glm(formula = ClintonWin ~ College + HouseholdIncome + Region, 
##     family = binomial(link = "logit"), data = States)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)   
## (Intercept)      -17.16240    6.31603  -2.717  0.00658 **
## College            0.28494    0.14633   1.947  0.05150 . 
## HouseholdIncome    0.07628    0.07376   1.034  0.30110   
## RegionNE           2.17592    1.40578   1.548  0.12166   
## RegionS          -16.31016 2420.64651  -0.007  0.99462   
## RegionW            2.58895    1.62430   1.594  0.11096   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 57.306  on 49  degrees of freedom
## Residual deviance: 25.780  on 44  degrees of freedom
## AIC: 37.78
## 
## Number of Fisher Scoring iterations: 18
modelsummary(m.logit, fmt = 2, statistic = "std.error")
tinytable_rgzfz09iz5iewse0g6a3
(1)
(Intercept) -17.16
(6.32)
College 0.28
(0.15)
HouseholdIncome 0.08
(0.07)
RegionNE 2.18
(1.41)
RegionS -16.31
(2420.65)
RegionW 2.59
(1.62)
Num.Obs. 50
AIC 37.8
BIC 49.3
Log.Lik. -12.890
RMSE 0.29
tab_model(m.logit,show.ci=FALSE, show.se=TRUE, show.stat=TRUE)
  ClintonWin
Predictors Odds Ratios std. Error Statistic p
(Intercept) 0.00 0.00 -2.72 0.007
College 1.33 0.19 1.95 0.052
HouseholdIncome 1.08 0.08 1.03 0.301
Region [NE] 8.81 12.39 1.55 0.122
Region [S] 0.00 0.00 -0.01 0.995
Region [W] 13.32 21.63 1.59 0.111
Observations 50
R2 Tjur 0.564

TASK5:

effect_plot <- allEffects(m.logit)
plot(effect_plot)

plot_model(m.logit, type = "eff", terms = c("College", "HouseholdIncome", "Region"))