Backward Selection

SetUp

remove(list = ls())

library(visdat)
library(stargazer)

Please cite as: 
 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer 
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(MASS)

Attaching package: 'MASS'
The following object is masked from 'package:dplyr':

    select

https://www.kaggle.com/datasets/rounakbanik/the-movies-dataset/data?select=movies_metadata.csv

Import Data

movies_metadata <- read.csv("movies_metadata.csv")

vis_dat(movies_metadata[1:10000,])

Clean Data

df <- movies_metadata  

df$budget <- as.numeric(df$budget)   # 
Warning: NAs introduced by coercion
df$overview <- NULL                  # deleting the column

sort(x = table(df$original_language), decreasing = T)

   en    fr    it    ja    de    es    ru    hi    ko    zh    sv    pt    cn 
32269  2438  1529  1350  1080   994   826   508   444   409   384   316   313 
   fi    nl    da    pl    tr    cs    el    no    fa    hu    ta    th    he 
  297   248   225   219   150   130   113   106   101   100    78    76    67 
   sr    ro    te    ar    ml    xx    bn    hr    mr    et    is    tl    id 
   63    57    45    39    36    33    29    29    25    24    24    23    20 
   ka    lv    sl    uk    bs    ca          ab    bg    vi    lt    sk    ur 
   18    18    17    16    14    12    11    10    10    10     9     9     8 
   nb    mk    ms    sh    sq    wo    bm    eu    kk    kn    ku    ky    af 
    6     5     5     5     5     5     3     3     3     3     3     3     2 
   am    bo    iu    lo    mn    ne    pa    ps 104.0  68.0  82.0    ay    cy 
    2     2     2     2     2     2     2     2     1     1     1     1     1 
   eo    fy    gl    hy    jv    la    lb    mt    qu    rw    si    sm    tg 
    1     1     1     1     1     1     1     1     1     1     1     1     1 
   uz    zu 
    1     1 
df$original_language_english <- if_else(condition = df$original_language == "en", 
                                        true = 1,
                                        false = 0 
                                        )  
df$original_language <- NULL
df$title <- NULL

head(!is.na(x = df$video))
[1] TRUE TRUE TRUE TRUE TRUE TRUE
table(df$video)

      False  True 
    6 45367    93 
# Keep rows where video is not NA
df <- df[ ! is.na(df$video), ]
table(df$video)

      False  True 
    6 45367    93 
str(df)
'data.frame':   45466 obs. of  22 variables:
 $ adult                    : chr  "False" "False" "False" "False" ...
 $ belongs_to_collection    : chr  "{'id': 10194, 'name': 'Toy Story Collection', 'poster_path': '/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg', 'backdrop_path"| __truncated__ "" "{'id': 119050, 'name': 'Grumpy Old Men Collection', 'poster_path': '/nLvUdqgPgm3F85NMCii9gVFUcet.jpg', 'backdro"| __truncated__ "" ...
 $ budget                   : num  3.0e+07 6.5e+07 0.0 1.6e+07 0.0 6.0e+07 5.8e+07 0.0 3.5e+07 5.8e+07 ...
 $ genres                   : chr  "[{'id': 16, 'name': 'Animation'}, {'id': 35, 'name': 'Comedy'}, {'id': 10751, 'name': 'Family'}]" "[{'id': 12, 'name': 'Adventure'}, {'id': 14, 'name': 'Fantasy'}, {'id': 10751, 'name': 'Family'}]" "[{'id': 10749, 'name': 'Romance'}, {'id': 35, 'name': 'Comedy'}]" "[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'name': 'Drama'}, {'id': 10749, 'name': 'Romance'}]" ...
 $ homepage                 : chr  "http://toystory.disney.com/toy-story" "" "" "" ...
 $ id                       : chr  "862" "8844" "15602" "31357" ...
 $ imdb_id                  : chr  "tt0114709" "tt0113497" "tt0113228" "tt0114885" ...
 $ original_title           : chr  "Toy Story" "Jumanji" "Grumpier Old Men" "Waiting to Exhale" ...
 $ popularity               : chr  "21.946943" "17.015539" "11.7129" "3.859495" ...
 $ poster_path              : chr  "/rhIRbceoE9lR4veEXuwCC2wARtG.jpg" "/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg" "/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg" "/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg" ...
 $ production_companies     : chr  "[{'name': 'Pixar Animation Studios', 'id': 3}]" "[{'name': 'TriStar Pictures', 'id': 559}, {'name': 'Teitler Film', 'id': 2550}, {'name': 'Interscope Communicat"| __truncated__ "[{'name': 'Warner Bros.', 'id': 6194}, {'name': 'Lancaster Gate', 'id': 19464}]" "[{'name': 'Twentieth Century Fox Film Corporation', 'id': 306}]" ...
 $ production_countries     : chr  "[{'iso_3166_1': 'US', 'name': 'United States of America'}]" "[{'iso_3166_1': 'US', 'name': 'United States of America'}]" "[{'iso_3166_1': 'US', 'name': 'United States of America'}]" "[{'iso_3166_1': 'US', 'name': 'United States of America'}]" ...
 $ release_date             : chr  "1995-10-30" "1995-12-15" "1995-12-22" "1995-12-22" ...
 $ revenue                  : num  3.74e+08 2.63e+08 0.00 8.15e+07 7.66e+07 ...
 $ runtime                  : num  81 104 101 127 106 170 127 97 106 130 ...
 $ spoken_languages         : chr  "[{'iso_639_1': 'en', 'name': 'English'}]" "[{'iso_639_1': 'en', 'name': 'English'}, {'iso_639_1': 'fr', 'name': 'Français'}]" "[{'iso_639_1': 'en', 'name': 'English'}]" "[{'iso_639_1': 'en', 'name': 'English'}]" ...
 $ status                   : chr  "Released" "Released" "Released" "Released" ...
 $ tagline                  : chr  "" "Roll the dice and unleash the excitement!" "Still Yelling. Still Fighting. Still Ready for Love." "Friends are the people who let you be yourself... and never let you forget it." ...
 $ video                    : chr  "False" "False" "False" "False" ...
 $ vote_average             : num  7.7 6.9 6.5 6.1 5.7 7.7 6.2 5.4 5.5 6.6 ...
 $ vote_count               : int  5415 2413 92 34 173 1886 141 45 174 1194 ...
 $ original_language_english: num  1 1 1 1 1 1 1 1 1 1 ...

Run Regression

reg1 <- lm(data = df, 
           formula = vote_average ~ vote_count + budget + video + original_language_english )  


reg2 <- lm(data = movies_metadata, 
           formula = vote_average ~ vote_count + df$budget + video + original_language )  


stargazer(reg1, reg2, type = "text")

===============================================================================
                                           Dependent variable:                 
                          -----------------------------------------------------
                                              vote_average                     
                                     (1)                        (2)            
-------------------------------------------------------------------------------
vote_count                         0.001***                   0.001***         
                                  (0.00002)                  (0.00002)         
                                                                               
budget                              -0.000                                     
                                   (0.000)                                     
                                                                               
budget                                                         -0.000          
                                                              (0.000)          
                                                                               
videoTrue                         -0.729***                  -0.736***         
                                   (0.197)                    (0.196)          
                                                                               
original_language_english         -0.493***                                    
                                   (0.020)                                     
                                                                               
original_languageab                                           1.914**          
                                                              (0.825)          
                                                                               
original_languageaf                                           3.021**          
                                                              (1.450)          
                                                                               
original_languageam                                           4.295***         
                                                              (1.450)          
                                                                               
original_languagear                                           3.657***         
                                                              (0.644)          
                                                                               
original_languageay                                            2.846           
                                                              (1.971)          
                                                                               
original_languagebg                                           2.964***         
                                                              (0.824)          
                                                                               
original_languagebm                                           3.107**          
                                                              (1.229)          
                                                                               
original_languagebn                                           3.979***         
                                                              (0.668)          
                                                                               
original_languagebo                                           3.192**          
                                                              (1.450)          
                                                                               
original_languagebs                                           2.118***         
                                                              (0.760)          
                                                                               
original_languageca                                           3.577***         
                                                              (0.788)          
                                                                               
original_languagecn                                           3.019***         
                                                              (0.579)          
                                                                               
original_languagecs                                           3.121***         
                                                              (0.592)          
                                                                               
original_languagecy                                            3.346*          
                                                              (1.971)          
                                                                               
original_languageda                                           2.915***         
                                                              (0.583)          
                                                                               
original_languagede                                           2.697***         
                                                              (0.572)          
                                                                               
original_languageel                                           3.216***         
                                                              (0.596)          
                                                                               
original_languageen                                           2.268***         
                                                              (0.569)          
                                                                               
original_languageeo                                            1.144           
                                                              (1.971)          
                                                                               
original_languagees                                           2.526***         
                                                              (0.572)          
                                                                               
original_languageet                                           2.367***         
                                                              (0.687)          
                                                                               
original_languageeu                                            0.743           
                                                              (1.229)          
                                                                               
original_languagefa                                           3.052***         
                                                              (0.599)          
                                                                               
original_languagefi                                           1.871***         
                                                              (0.579)          
                                                                               
original_languagefr                                           2.878***         
                                                              (0.570)          
                                                                               
original_languagefy                                            -3.153          
                                                              (1.971)          
                                                                               
original_languagegl                                            3.745*          
                                                              (1.971)          
                                                                               
original_languagehe                                           2.753***         
                                                              (0.614)          
                                                                               
original_languagehi                                           2.615***         
                                                              (0.575)          
                                                                               
original_languagehr                                           2.521***         
                                                              (0.668)          
                                                                               
original_languagehu                                           2.726***         
                                                              (0.599)          
                                                                               
original_languagehy                                            1.847           
                                                              (1.971)          
                                                                               
original_languageid                                           3.374***         
                                                              (0.708)          
                                                                               
original_languageis                                           2.769***         
                                                              (0.687)          
                                                                               
original_languageit                                           2.288***         
                                                              (0.571)          
                                                                               
original_languageiu                                           3.496**          
                                                              (1.450)          
                                                                               
original_languageja                                           3.098***         
                                                              (0.571)          
                                                                               
original_languagejv                                            1.847           
                                                              (1.971)          
                                                                               
original_languageka                                           2.660***         
                                                              (0.722)          
                                                                               
original_languagekk                                            2.279*          
                                                              (1.229)          
                                                                               
original_languagekn                                           3.810***         
                                                              (1.229)          
                                                                               
original_languageko                                           3.395***         
                                                              (0.576)          
                                                                               
original_languageku                                           3.438***         
                                                              (1.229)          
                                                                               
original_languageky                                            -0.054          
                                                              (1.229)          
                                                                               
original_languagela                                           4.839**          
                                                              (1.971)          
                                                                               
original_languagelb                                            -3.153          
                                                              (1.971)          
                                                                               
original_languagelo                                           3.389**          
                                                              (1.450)          
                                                                               
original_languagelt                                           2.565***         
                                                              (0.848)          
                                                                               
original_languagelv                                            1.230*          
                                                              (0.722)          
                                                                               
original_languagemk                                           3.904***         
                                                              (1.018)          
                                                                               
original_languageml                                           3.227***         
                                                              (0.650)          
                                                                               
original_languagemn                                           4.380***         
                                                              (1.450)          
                                                                               
original_languagemr                                           2.406***         
                                                              (0.683)          
                                                                               
original_languagems                                            0.583           
                                                              (1.018)          
                                                                               
original_languagemt                                            -3.153          
                                                              (1.971)          
                                                                               
original_languagenb                                           2.706***         
                                                              (0.958)          
                                                                               
original_languagene                                           2.894**          
                                                              (1.450)          
                                                                               
original_languagenl                                           2.428***         
                                                              (0.581)          
                                                                               
original_languageno                                           2.879***         
                                                              (0.598)          
                                                                               
original_languagepa                                           4.194***         
                                                              (1.450)          
                                                                               
original_languagepl                                           2.791***         
                                                              (0.583)          
                                                                               
original_languageps                                           2.988**          
                                                              (1.450)          
                                                                               
original_languagept                                           2.693***         
                                                              (0.579)          
                                                                               
original_languagequ                                            1.847           
                                                              (1.971)          
                                                                               
original_languagero                                           2.997***         
                                                              (0.621)          
                                                                               
original_languageru                                           2.545***         
                                                              (0.573)          
                                                                               
original_languagerw                                            3.545*          
                                                              (1.971)          
                                                                               
original_languagesh                                           2.438**          
                                                              (1.018)          
                                                                               
original_languagesi                                            0.245           
                                                              (1.971)          
                                                                               
original_languagesk                                           3.200***         
                                                              (0.848)          
                                                                               
original_languagesl                                           2.587***         
                                                              (0.730)          
                                                                               
original_languagesm                                            3.546*          
                                                              (1.971)          
                                                                               
original_languagesq                                           3.924***         
                                                              (1.018)          
                                                                               
original_languagesr                                           3.289***         
                                                              (0.617)          
                                                                               
original_languagesv                                           2.579***         
                                                              (0.577)          
                                                                               
original_languageta                                           3.536***         
                                                              (0.608)          
                                                                               
original_languagete                                           3.059***         
                                                              (0.635)          
                                                                               
original_languagetg                                            3.146           
                                                              (1.971)          
                                                                               
original_languageth                                           2.926***         
                                                              (0.609)          
                                                                               
original_languagetl                                           3.495***         
                                                              (0.692)          
                                                                               
original_languagetr                                           3.377***         
                                                              (0.589)          
                                                                               
original_languageuk                                           3.302***         
                                                              (0.739)          
                                                                               
original_languageur                                           3.321***         
                                                              (0.877)          
                                                                               
original_languageuz                                            -3.153          
                                                              (1.971)          
                                                                               
original_languagevi                                           2.722***         
                                                              (0.824)          
                                                                               
original_languagewo                                           3.365***         
                                                              (1.018)          
                                                                               
original_languagexx                                           2.688***         
                                                              (0.657)          
                                                                               
original_languagezh                                           3.201***         
                                                              (0.576)          
                                                                               
original_languagezu                                           5.144***         
                                                              (1.971)          
                                                                               
Constant                           5.913***                   3.153***         
                                   (0.017)                    (0.569)          
                                                                               
-------------------------------------------------------------------------------
Observations                        45,460                     45,460          
R2                                  0.029                      0.040           
Adjusted R2                         0.029                      0.039           
Residual Std. Error           1.896 (df = 45455)         1.887 (df = 45367)    
F Statistic               340.862*** (df = 4; 45455) 20.796*** (df = 92; 45367)
===============================================================================
Note:                                               *p<0.1; **p<0.05; ***p<0.01

stepAIC

vis_dat(mtcars)

reg1 <-
lm(formula = mpg ~ .
   , data = mtcars)
stargazer(reg1, type = "text")

===============================================
                        Dependent variable:    
                    ---------------------------
                                mpg            
-----------------------------------------------
cyl                           -0.111           
                              (1.045)          
                                               
disp                           0.013           
                              (0.018)          
                                               
hp                            -0.021           
                              (0.022)          
                                               
drat                           0.787           
                              (1.635)          
                                               
wt                            -3.715*          
                              (1.894)          
                                               
qsec                           0.821           
                              (0.731)          
                                               
vs                             0.318           
                              (2.105)          
                                               
am                             2.520           
                              (2.057)          
                                               
gear                           0.655           
                              (1.493)          
                                               
carb                          -0.199           
                              (0.829)          
                                               
Constant                      12.303           
                             (18.718)          
                                               
-----------------------------------------------
Observations                    32             
R2                             0.869           
Adjusted R2                    0.807           
Residual Std. Error       2.650 (df = 21)      
F Statistic           13.932*** (df = 10; 21)  
===============================================
Note:               *p<0.1; **p<0.05; ***p<0.01
stepAIC(object = reg1, 
        direction = c("backward")
        )
Start:  AIC=70.9
mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear + carb

       Df Sum of Sq    RSS    AIC
- cyl   1    0.0799 147.57 68.915
- vs    1    0.1601 147.66 68.932
- carb  1    0.4067 147.90 68.986
- gear  1    1.3531 148.85 69.190
- drat  1    1.6270 149.12 69.249
- disp  1    3.9167 151.41 69.736
- hp    1    6.8399 154.33 70.348
- qsec  1    8.8641 156.36 70.765
<none>              147.49 70.898
- am    1   10.5467 158.04 71.108
- wt    1   27.0144 174.51 74.280

Step:  AIC=68.92
mpg ~ disp + hp + drat + wt + qsec + vs + am + gear + carb

       Df Sum of Sq    RSS    AIC
- vs    1    0.2685 147.84 66.973
- carb  1    0.5201 148.09 67.028
- gear  1    1.8211 149.40 67.308
- drat  1    1.9826 149.56 67.342
- disp  1    3.9009 151.47 67.750
- hp    1    7.3632 154.94 68.473
<none>              147.57 68.915
- qsec  1   10.0933 157.67 69.032
- am    1   11.8359 159.41 69.384
- wt    1   27.0280 174.60 72.297

Step:  AIC=66.97
mpg ~ disp + hp + drat + wt + qsec + am + gear + carb

       Df Sum of Sq    RSS    AIC
- carb  1    0.6855 148.53 65.121
- gear  1    2.1437 149.99 65.434
- drat  1    2.2139 150.06 65.449
- disp  1    3.6467 151.49 65.753
- hp    1    7.1060 154.95 66.475
<none>              147.84 66.973
- am    1   11.5694 159.41 67.384
- qsec  1   15.6830 163.53 68.200
- wt    1   27.3799 175.22 70.410

Step:  AIC=65.12
mpg ~ disp + hp + drat + wt + qsec + am + gear

       Df Sum of Sq    RSS    AIC
- gear  1     1.565 150.09 63.457
- drat  1     1.932 150.46 63.535
<none>              148.53 65.121
- disp  1    10.110 158.64 65.229
- am    1    12.323 160.85 65.672
- hp    1    14.826 163.35 66.166
- qsec  1    26.408 174.94 68.358
- wt    1    69.127 217.66 75.350

Step:  AIC=63.46
mpg ~ disp + hp + drat + wt + qsec + am

       Df Sum of Sq    RSS    AIC
- drat  1     3.345 153.44 62.162
- disp  1     8.545 158.64 63.229
<none>              150.09 63.457
- hp    1    13.285 163.38 64.171
- am    1    20.036 170.13 65.466
- qsec  1    25.574 175.67 66.491
- wt    1    67.572 217.66 73.351

Step:  AIC=62.16
mpg ~ disp + hp + wt + qsec + am

       Df Sum of Sq    RSS    AIC
- disp  1     6.629 160.07 61.515
<none>              153.44 62.162
- hp    1    12.572 166.01 62.682
- qsec  1    26.470 179.91 65.255
- am    1    32.198 185.63 66.258
- wt    1    69.043 222.48 72.051

Step:  AIC=61.52
mpg ~ hp + wt + qsec + am

       Df Sum of Sq    RSS    AIC
- hp    1     9.219 169.29 61.307
<none>              160.07 61.515
- qsec  1    20.225 180.29 63.323
- am    1    25.993 186.06 64.331
- wt    1    78.494 238.56 72.284

Step:  AIC=61.31
mpg ~ wt + qsec + am

       Df Sum of Sq    RSS    AIC
<none>              169.29 61.307
- am    1    26.178 195.46 63.908
- qsec  1   109.034 278.32 75.217
- wt    1   183.347 352.63 82.790

Call:
lm(formula = mpg ~ wt + qsec + am, data = mtcars)

Coefficients:
(Intercept)           wt         qsec           am  
      9.618       -3.917        1.226        2.936  
reg2 <-
lm(formula = mpg ~ wt + qsec + am
   , data = mtcars)

stargazer(reg1, reg2, type = "text")

==================================================================
                                 Dependent variable:              
                    ----------------------------------------------
                                         mpg                      
                              (1)                    (2)          
------------------------------------------------------------------
cyl                         -0.111                                
                            (1.045)                               
                                                                  
disp                         0.013                                
                            (0.018)                               
                                                                  
hp                          -0.021                                
                            (0.022)                               
                                                                  
drat                         0.787                                
                            (1.635)                               
                                                                  
wt                          -3.715*               -3.917***       
                            (1.894)                (0.711)        
                                                                  
qsec                         0.821                 1.226***       
                            (0.731)                (0.289)        
                                                                  
vs                           0.318                                
                            (2.105)                               
                                                                  
am                           2.520                 2.936**        
                            (2.057)                (1.411)        
                                                                  
gear                         0.655                                
                            (1.493)                               
                                                                  
carb                        -0.199                                
                            (0.829)                               
                                                                  
Constant                    12.303                  9.618         
                           (18.718)                (6.960)        
                                                                  
------------------------------------------------------------------
Observations                  32                      32          
R2                           0.869                  0.850         
Adjusted R2                  0.807                  0.834         
Residual Std. Error     2.650 (df = 21)        2.459 (df = 28)    
F Statistic         13.932*** (df = 10; 21) 52.750*** (df = 3; 28)
==================================================================
Note:                                  *p<0.1; **p<0.05; ***p<0.01