AER

Author

WD

remove(list = ls())
# Backward Selection on PSID1976 from the AER package
# ---------------------------------------------------

# 1. Setup ----------------------------------------------------------------
rm(list = ls())

# 1a. Install & load necessary packages
if (!requireNamespace("AER", quietly = TRUE)) install.packages("AER")
library(AER)        # PSID1976 data
Loading required package: car
Loading required package: carData
Loading required package: lmtest
Loading required package: zoo

Attaching package: 'zoo'
The following objects are masked from 'package:base':

    as.Date, as.Date.numeric
Loading required package: sandwich
Loading required package: survival
if (!requireNamespace("visdat", quietly = TRUE)) install.packages("visdat")
library(visdat)     # for quick data overview

if (!requireNamespace("stargazer", quietly = TRUE)) install.packages("stargazer")
library(stargazer)  # for formatted model output

Please cite as: 
 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer 
if (!requireNamespace("MASS", quietly = TRUE)) install.packages("MASS")
library(MASS)       # for stepAIC()

# 2. Load & Preview ------------------------------------------------------
data("PSID1976", package = "AER")
df <- PSID1976
vis_dat(df[1:500, ])    # glimpse first 500 rows

# 3. Select & Clean ------------------------------------------------------
# 3a. Pick 12 key variables
keep <- c(
  "hours",      # annual hours worked (wife)
  "wage",       # wife's hourly wage
  "fincome",    # family income
  "education",  # wife's years of schooling
  "age",        # wife's age
  "experience", # wife's labor‐market experience
  "repwage",    # reported wage
  "hhours",     # husband's hours worked
  "hwage",      # husband's wage
  "youngkids",  # children < 6
  "oldkids",    # children 6–18
  "city"        # lives in SMSA
)

# 3b. Subset & drop NAs
df <- df[, keep]
df <- na.omit(df)

# 3c. Factorize
df$city <- factor(df$city)

# 4. Fit Full Model ------------------------------------------------------
full_mod <- lm(hours ~ ., data = df)
stargazer(full_mod,
          type  = "text",
          title = "Full Model: Annual Hours Worked")

Full Model: Annual Hours Worked
===============================================
                        Dependent variable:    
                    ---------------------------
                               hours           
-----------------------------------------------
wage                           1.352           
                              (9.552)          
                                               
fincome                      0.026***          
                              (0.003)          
                                               
education                    -19.176*          
                             (11.507)          
                                               
age                         -22.684***         
                              (3.643)          
                                               
experience                   26.772***         
                              (3.341)          
                                               
repwage                     150.349***         
                             (13.333)          
                                               
hhours                       -0.200***         
                              (0.046)          
                                               
hwage                       -70.154***         
                              (9.615)          
                                               
youngkids                   -292.199***        
                             (49.758)          
                                               
oldkids                       -31.129          
                             (19.302)          
                                               
cityyes                       -54.671          
                             (50.747)          
                                               
Constant                   1,905.897***        
                             (243.275)         
                                               
-----------------------------------------------
Observations                    753            
R2                             0.495           
Adjusted R2                    0.488           
Residual Std. Error     623.556 (df = 741)     
F Statistic          66.119*** (df = 11; 741)  
===============================================
Note:               *p<0.1; **p<0.05; ***p<0.01
# 5. Backward Selection --------------------------------------------------
red_mod <- stepAIC(full_mod,
                   direction = "backward",
                   trace     = FALSE)

# 6. Display Reduced Model -----------------------------------------------
stargazer(red_mod,
          type  = "text",
          title = "Reduced Model after Backward Selection")

Reduced Model after Backward Selection
===============================================
                        Dependent variable:    
                    ---------------------------
                               hours           
-----------------------------------------------
fincome                      0.026***          
                              (0.003)          
                                               
education                    -19.811*          
                             (11.269)          
                                               
age                         -23.079***         
                              (3.623)          
                                               
experience                   26.878***         
                              (3.336)          
                                               
repwage                     150.500***         
                             (11.269)          
                                               
hhours                       -0.199***         
                              (0.046)          
                                               
hwage                       -72.141***         
                              (9.416)          
                                               
youngkids                   -292.365***        
                             (49.594)          
                                               
oldkids                       -31.611          
                             (19.283)          
                                               
Constant                   1,908.230***        
                             (243.059)         
                                               
-----------------------------------------------
Observations                    753            
R2                             0.495           
Adjusted R2                    0.488           
Residual Std. Error     623.215 (df = 743)     
F Statistic           80.768*** (df = 9; 743)  
===============================================
Note:               *p<0.1; **p<0.05; ***p<0.01
# 7. Compare Full vs. Reduced -------------------------------------------
stargazer(full_mod, red_mod,
          type          = "text",
          column.labels = c("Full Model", "Reduced Model"))

====================================================================
                                  Dependent variable:               
                    ------------------------------------------------
                                         hours                      
                           Full Model             Reduced Model     
                              (1)                      (2)          
--------------------------------------------------------------------
wage                         1.352                                  
                            (9.552)                                 
                                                                    
fincome                     0.026***                0.026***        
                            (0.003)                  (0.003)        
                                                                    
education                   -19.176*                -19.811*        
                            (11.507)                (11.269)        
                                                                    
age                        -22.684***              -23.079***       
                            (3.643)                  (3.623)        
                                                                    
experience                 26.772***                26.878***       
                            (3.341)                  (3.336)        
                                                                    
repwage                    150.349***              150.500***       
                            (13.333)                (11.269)        
                                                                    
hhours                     -0.200***                -0.199***       
                            (0.046)                  (0.046)        
                                                                    
hwage                      -70.154***              -72.141***       
                            (9.615)                  (9.416)        
                                                                    
youngkids                 -292.199***              -292.365***      
                            (49.758)                (49.594)        
                                                                    
oldkids                     -31.129                  -31.611        
                            (19.302)                (19.283)        
                                                                    
cityyes                     -54.671                                 
                            (50.747)                                
                                                                    
Constant                  1,905.897***            1,908.230***      
                           (243.275)                (243.059)       
                                                                    
--------------------------------------------------------------------
Observations                  753                      753          
R2                           0.495                    0.495         
Adjusted R2                  0.488                    0.488         
Residual Std. Error    623.556 (df = 741)      623.215 (df = 743)   
F Statistic         66.119*** (df = 11; 741) 80.768*** (df = 9; 743)
====================================================================
Note:                                    *p<0.1; **p<0.05; ***p<0.01
# Backward Selection Analysis Script
# ---------------------------------

# 1. Setup -----------------------------------------------------------------
remove(list = ls())

library(visdat)
library(stargazer)
library(dplyr)

Attaching package: 'dplyr'
The following object is masked from 'package:MASS':

    select
The following object is masked from 'package:car':

    recode
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(MASS)

# 2. Import & Preview Movies Data ------------------------------------------
movies_metadata <- read.csv("movies_metadata.csv", stringsAsFactors = FALSE)
vis_dat(movies_metadata[1:10000,])

# 3. Clean Movies Data -----------------------------------------------------
df <- movies_metadata
df$budget <- as.numeric(df$budget)           # coerce to numeric (introduces NAs)
Warning: NAs introduced by coercion
df$overview <- NULL                          # drop overview column
df$original_language_english <- if_else(
  df$original_language == "en", 1, 0
)
df$original_language <- NULL
df$title <- NULL

# 4. Regression on Movies Data ---------------------------------------------
reg1 <- lm(vote_average ~ vote_count + budget, data = df)
stargazer(reg1, type = "text")

===============================================
                        Dependent variable:    
                    ---------------------------
                           vote_average        
-----------------------------------------------
vote_count                   0.001***          
                             (0.00002)         
                                               
budget                       -0.000***         
                              (0.000)          
                                               
Constant                     5.568***          
                              (0.009)          
                                               
-----------------------------------------------
Observations                  45,460           
R2                             0.015           
Adjusted R2                    0.015           
Residual Std. Error     1.909 (df = 45457)     
F Statistic         357.097*** (df = 2; 45457) 
===============================================
Note:               *p<0.1; **p<0.05; ***p<0.01
# 5. Backward Selection on mtcars ------------------------------------------
# 5a. Visualize mtcars
vis_dat(mtcars)

# 5b. Full model
reg1_mtc <- lm(mpg ~ ., data = mtcars)
stargazer(reg1_mtc, type = "text")

===============================================
                        Dependent variable:    
                    ---------------------------
                                mpg            
-----------------------------------------------
cyl                           -0.111           
                              (1.045)          
                                               
disp                           0.013           
                              (0.018)          
                                               
hp                            -0.021           
                              (0.022)          
                                               
drat                           0.787           
                              (1.635)          
                                               
wt                            -3.715*          
                              (1.894)          
                                               
qsec                           0.821           
                              (0.731)          
                                               
vs                             0.318           
                              (2.105)          
                                               
am                             2.520           
                              (2.057)          
                                               
gear                           0.655           
                              (1.493)          
                                               
carb                          -0.199           
                              (0.829)          
                                               
Constant                      12.303           
                             (18.718)          
                                               
-----------------------------------------------
Observations                    32             
R2                             0.869           
Adjusted R2                    0.807           
Residual Std. Error       2.650 (df = 21)      
F Statistic           13.932*** (df = 10; 21)  
===============================================
Note:               *p<0.1; **p<0.05; ***p<0.01
# 5c. Perform backward stepwise AIC
stepAIC(
  object    = reg1_mtc,
  direction = "backward"
)
Start:  AIC=70.9
mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear + carb

       Df Sum of Sq    RSS    AIC
- cyl   1    0.0799 147.57 68.915
- vs    1    0.1601 147.66 68.932
- carb  1    0.4067 147.90 68.986
- gear  1    1.3531 148.85 69.190
- drat  1    1.6270 149.12 69.249
- disp  1    3.9167 151.41 69.736
- hp    1    6.8399 154.33 70.348
- qsec  1    8.8641 156.36 70.765
<none>              147.49 70.898
- am    1   10.5467 158.04 71.108
- wt    1   27.0144 174.51 74.280

Step:  AIC=68.92
mpg ~ disp + hp + drat + wt + qsec + vs + am + gear + carb

       Df Sum of Sq    RSS    AIC
- vs    1    0.2685 147.84 66.973
- carb  1    0.5201 148.09 67.028
- gear  1    1.8211 149.40 67.308
- drat  1    1.9826 149.56 67.342
- disp  1    3.9009 151.47 67.750
- hp    1    7.3632 154.94 68.473
<none>              147.57 68.915
- qsec  1   10.0933 157.67 69.032
- am    1   11.8359 159.41 69.384
- wt    1   27.0280 174.60 72.297

Step:  AIC=66.97
mpg ~ disp + hp + drat + wt + qsec + am + gear + carb

       Df Sum of Sq    RSS    AIC
- carb  1    0.6855 148.53 65.121
- gear  1    2.1437 149.99 65.434
- drat  1    2.2139 150.06 65.449
- disp  1    3.6467 151.49 65.753
- hp    1    7.1060 154.95 66.475
<none>              147.84 66.973
- am    1   11.5694 159.41 67.384
- qsec  1   15.6830 163.53 68.200
- wt    1   27.3799 175.22 70.410

Step:  AIC=65.12
mpg ~ disp + hp + drat + wt + qsec + am + gear

       Df Sum of Sq    RSS    AIC
- gear  1     1.565 150.09 63.457
- drat  1     1.932 150.46 63.535
<none>              148.53 65.121
- disp  1    10.110 158.64 65.229
- am    1    12.323 160.85 65.672
- hp    1    14.826 163.35 66.166
- qsec  1    26.408 174.94 68.358
- wt    1    69.127 217.66 75.350

Step:  AIC=63.46
mpg ~ disp + hp + drat + wt + qsec + am

       Df Sum of Sq    RSS    AIC
- drat  1     3.345 153.44 62.162
- disp  1     8.545 158.64 63.229
<none>              150.09 63.457
- hp    1    13.285 163.38 64.171
- am    1    20.036 170.13 65.466
- qsec  1    25.574 175.67 66.491
- wt    1    67.572 217.66 73.351

Step:  AIC=62.16
mpg ~ disp + hp + wt + qsec + am

       Df Sum of Sq    RSS    AIC
- disp  1     6.629 160.07 61.515
<none>              153.44 62.162
- hp    1    12.572 166.01 62.682
- qsec  1    26.470 179.91 65.255
- am    1    32.198 185.63 66.258
- wt    1    69.043 222.48 72.051

Step:  AIC=61.52
mpg ~ hp + wt + qsec + am

       Df Sum of Sq    RSS    AIC
- hp    1     9.219 169.29 61.307
<none>              160.07 61.515
- qsec  1    20.225 180.29 63.323
- am    1    25.993 186.06 64.331
- wt    1    78.494 238.56 72.284

Step:  AIC=61.31
mpg ~ wt + qsec + am

       Df Sum of Sq    RSS    AIC
<none>              169.29 61.307
- am    1    26.178 195.46 63.908
- qsec  1   109.034 278.32 75.217
- wt    1   183.347 352.63 82.790

Call:
lm(formula = mpg ~ wt + qsec + am, data = mtcars)

Coefficients:
(Intercept)           wt         qsec           am  
      9.618       -3.917        1.226        2.936  
# 5d. Final reduced model (from stepAIC output)
reg2 <- lm(mpg ~ wt + qsec + am, data = mtcars)

# 6. Compare Full vs. Reduced Model ----------------------------------------
stargazer(
  reg1_mtc,
  reg2,
  type = "text",
  column.labels = c("Full Model", "Reduced Model")
)

==================================================================
                                 Dependent variable:              
                    ----------------------------------------------
                                         mpg                      
                          Full Model            Reduced Model     
                              (1)                    (2)          
------------------------------------------------------------------
cyl                         -0.111                                
                            (1.045)                               
                                                                  
disp                         0.013                                
                            (0.018)                               
                                                                  
hp                          -0.021                                
                            (0.022)                               
                                                                  
drat                         0.787                                
                            (1.635)                               
                                                                  
wt                          -3.715*               -3.917***       
                            (1.894)                (0.711)        
                                                                  
qsec                         0.821                 1.226***       
                            (0.731)                (0.289)        
                                                                  
vs                           0.318                                
                            (2.105)                               
                                                                  
am                           2.520                 2.936**        
                            (2.057)                (1.411)        
                                                                  
gear                         0.655                                
                            (1.493)                               
                                                                  
carb                        -0.199                                
                            (0.829)                               
                                                                  
Constant                    12.303                  9.618         
                           (18.718)                (6.960)        
                                                                  
------------------------------------------------------------------
Observations                  32                      32          
R2                           0.869                  0.850         
Adjusted R2                  0.807                  0.834         
Residual Std. Error     2.650 (df = 21)        2.459 (df = 28)    
F Statistic         13.932*** (df = 10; 21) 52.750*** (df = 3; 28)
==================================================================
Note:                                  *p<0.1; **p<0.05; ***p<0.01