Day 8

remove(list = ls())

library(visdat)
library(stargazer)


Please cite as:

 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.

 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

library(help = "datasets")
?Boston

No documentation for 'Boston' in specified packages and libraries:
you could try '??Boston'

library(MASS)       
library(stargazer)
library(visdat)      

data("Boston")

clean_df <- Boston 

vis_dat(clean_df)

stargazer(clean_df, type = "text",
          covariate.labels = c("crim", "zn", "nox", "rm", "age", "dis", "rad", "tax", "lstat", "medv"))


=============================================
Statistic  N   Mean   St. Dev.  Min     Max  
---------------------------------------------
crim      506  3.614   8.602   0.006  88.976 
zn        506 11.364   23.322  0.000  100.000
nox       506 11.137   6.860   0.460  27.740 
rm        506  0.069   0.254     0       1   
age       506  0.555   0.116   0.385   0.871 
dis       506  6.285   0.703   3.561   8.780 
rad       506 68.575   28.149  2.900  100.000
tax       506  3.795   2.106   1.130  12.126 
lstat     506  9.549   8.707     1      24   
medv      506 408.237 168.537   187     711  
ptratio   506 18.456   2.165   12.600 22.000 
black     506 356.674  91.295  0.320  396.900
lstat     506 12.653   7.141   1.730  37.970 
medv      506 22.533   9.197   5.000  50.000 
---------------------------------------------

clean_df <- Boston

if ("CONT" %in% names(clean_df)) {
  clean_df$CONT <- as.numeric(clean_df$CONT)
}

clean_df <- clean_df[, !(names(clean_df) %in% c("INTG", "CFMG"))]

str(clean_df)

'data.frame':   506 obs. of  14 variables:
 $ crim   : num  0.00632 0.02731 0.02729 0.03237 0.06905 ...
 $ zn     : num  18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
 $ indus  : num  2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
 $ chas   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ nox    : num  0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
 $ rm     : num  6.58 6.42 7.18 7 7.15 ...
 $ age    : num  65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
 $ dis    : num  4.09 4.97 4.97 6.06 6.06 ...
 $ rad    : int  1 2 2 3 3 3 5 5 5 5 ...
 $ tax    : num  296 242 242 222 222 222 311 311 311 311 ...
 $ ptratio: num  15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
 $ black  : num  397 397 393 395 397 ...
 $ lstat  : num  4.98 9.14 4.03 2.94 5.33 ...
 $ medv   : num  24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...

clean_df <- Boston %>% 
  na.omit()

names(clean_df)

 [1] "crim"    "zn"      "indus"   "chas"    "nox"     "rm"      "age"    
 [8] "dis"     "rad"     "tax"     "ptratio" "black"   "lstat"   "medv"

fullreg <- lm(crim ~ ., data = clean_df)

stepAIC(fullreg, direction = "backward")

Start:  AIC=1898.56
crim ~ zn + indus + chas + nox + rm + age + dis + rad + tax + 
    ptratio + black + lstat + medv

          Df Sum of Sq   RSS    AIC
- age      1      0.27 20400 1896.6
- chas     1     16.71 20417 1897.0
- rm       1     20.43 20420 1897.1
- tax      1     22.29 20422 1897.1
- indus    1     24.30 20424 1897.2
<none>                 20400 1898.6
- ptratio  1     87.65 20488 1898.7
- lstat    1    115.18 20515 1899.4
- nox      1    158.47 20558 1900.5
- black    1    174.58 20574 1900.9
- zn       1    237.70 20638 1902.4
- medv     1    447.85 20848 1907.5
- dis      1    508.77 20909 1909.0
- rad      1   1850.44 22250 1940.5

Step:  AIC=1896.56
crim ~ zn + indus + chas + nox + rm + dis + rad + tax + ptratio + 
    black + lstat + medv

          Df Sum of Sq   RSS    AIC
- chas     1     16.54 20417 1895.0
- rm       1     22.14 20422 1895.1
- tax      1     22.16 20422 1895.1
- indus    1     24.30 20424 1895.2
<none>                 20400 1896.6
- ptratio  1     87.41 20488 1896.7
- lstat    1    131.43 20532 1897.8
- nox      1    166.37 20567 1898.7
- black    1    174.40 20575 1898.9
- zn       1    239.21 20639 1900.5
- medv     1    447.81 20848 1905.5
- dis      1    559.06 20959 1908.2
- rad      1   1857.98 22258 1938.7

Step:  AIC=1894.97
crim ~ zn + indus + nox + rm + dis + rad + tax + ptratio + black + 
    lstat + medv

          Df Sum of Sq   RSS    AIC
- tax      1     18.81 20436 1893.4
- rm       1     22.76 20440 1893.5
- indus    1     28.82 20446 1893.7
<none>                 20417 1895.0
- ptratio  1     84.57 20501 1895.1
- lstat    1    129.63 20546 1896.2
- nox      1    175.96 20593 1897.3
- black    1    178.37 20595 1897.4
- zn       1    241.26 20658 1898.9
- medv     1    483.38 20900 1904.8
- dis      1    563.37 20980 1906.8
- rad      1   1842.82 22260 1936.7

Step:  AIC=1893.44
crim ~ zn + indus + nox + rm + dis + rad + ptratio + black + 
    lstat + medv

          Df Sum of Sq   RSS    AIC
- rm       1      23.0 20459 1892.0
- indus    1      64.4 20500 1893.0
<none>                 20436 1893.4
- ptratio  1      87.4 20523 1893.6
- lstat    1     137.9 20574 1894.8
- black    1     178.1 20614 1895.8
- nox      1     181.9 20617 1895.9
- zn       1     222.9 20658 1896.9
- medv     1     465.3 20901 1902.8
- dis      1     556.9 20992 1905.0
- rad      1    4693.4 25129 1996.0

Step:  AIC=1892.01
crim ~ zn + indus + nox + dis + rad + ptratio + black + lstat + 
    medv

          Df Sum of Sq   RSS    AIC
- indus    1      74.0 20533 1891.8
<none>                 20459 1892.0
- ptratio  1      88.2 20547 1892.2
- lstat    1     118.9 20577 1892.9
- nox      1     176.9 20636 1894.4
- black    1     202.4 20661 1895.0
- zn       1     233.9 20692 1895.8
- medv     1     458.7 20917 1901.2
- dis      1     572.2 21031 1904.0
- rad      1    4811.3 25270 1996.9

Step:  AIC=1891.83
crim ~ zn + nox + dis + rad + ptratio + black + lstat + medv

          Df Sum of Sq   RSS    AIC
<none>                 20533 1891.8
- lstat    1     104.7 20637 1892.4
- ptratio  1     119.0 20652 1892.8
- black    1     198.4 20731 1894.7
- zn       1     239.6 20772 1895.7
- nox      1     296.6 20829 1897.1
- medv     1     430.2 20963 1900.3
- dis      1     507.8 21040 1902.2
- rad      1    4739.5 25272 1994.9


Call:
lm(formula = crim ~ zn + nox + dis + rad + ptratio + black + 
    lstat + medv, data = clean_df)

Coefficients:
(Intercept)           zn          nox          dis          rad      ptratio  
  19.683128     0.043293   -12.753708    -0.918318     0.532617    -0.310541  
      black        lstat         medv  
  -0.007922     0.110173    -0.174207