Step 0

pacman::p_load(tidyverse, haven, magrittr, GGally,
               foreign, car, lmtest, sandwich, stargazer,
               install = TRUE, update = TRUE
               )
## 
##   There are binary versions available but the source versions are later:
##             binary source needs_compilation
## textshaping  0.2.1  0.3.0              TRUE
## usethis      2.0.0  2.0.1             FALSE
## 
##   Binaries will be installed
## package 'textshaping' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\n8atk\AppData\Local\Temp\RtmpUFSFPU\downloaded_packages
## installing the source package 'usethis'

Step 1

# foreign::read.dta preserves democrary as factored
cdata <- read_dta("G:/My Drive/homework/Tommy K/countries.dta")

cdata %<>% select(free_trade, avg_school, democracy)
class(cdata$democracy) # dbl_lbl
## [1] "haven_labelled" "vctrs_vctr"     "double"
cdata %<>% mutate(democracy = as_factor(democracy))
# cdata %<>% filter(complete.cases(.))

Step 2(a)

cdata %>% class
## [1] "tbl_df"     "tbl"        "data.frame"
cdata %>% glimpse()
## Rows: 193
## Columns: 3
## $ free_trade <dbl> 85.1, 86.0, 74.3, 73.3, 72.2, NA, NA, 61.7, 79.1, 42.2, ...
## $ avg_school <dbl> 12.123364, 12.677570, NA, NA, 9.752500, NA, NA, 10.56048...
## $ democracy  <fct> 1. Democracy, 1. Democracy, 1. Democracy, 1. Democracy, ...
cdata %>% summary()
##    free_trade      avg_school               democracy  
##  Min.   : 0.00   Min.   : 1.810   0. Dictatorship: 74  
##  1st Qu.:67.25   1st Qu.: 6.098   1. Democracy   :118  
##  Median :75.90   Median : 8.645   NA's           :  1  
##  Mean   :74.09   Mean   : 8.142                        
##  3rd Qu.:85.00   3rd Qu.:10.146                        
##  Max.   :90.00   Max.   :13.090                        
##  NA's   :15      NA's   :50
cdata %>% slice_sample(n = 10)
## # A tibble: 10 x 3
##    free_trade avg_school democracy      
##         <dbl>      <dbl> <fct>          
##  1       44.5       6.02 1. Democracy   
##  2       NA        NA    1. Democracy   
##  3       74.9       7.69 0. Dictatorship
##  4       83.2      NA    0. Dictatorship
##  5       85         8.93 1. Democracy   
##  6       80.5      10.5  1. Democracy   
##  7       87.5      10.4  1. Democracy   
##  8       76         8.48 0. Dictatorship
##  9       64.3       4.60 0. Dictatorship
## 10       42.2      NA    1. Democracy
cdata %>%
  mutate(democracy = unclass(democracy) - 1) %>% # stargazer doesn't process factors
  as.data.frame() %>% # stargazer data.frame to be the only class
  stargazer(type = "text", # type = "html"
            title = "Summary Statistics",     
            nobs = TRUE,                      
            mean.sd = TRUE,                   
            median = TRUE,                    
            min.max = TRUE,                   
            iqr = TRUE,                       
            covariate.labels = c("Free Trade (countries)",
                                "Average Schooling (years for ages >15)",
                                "Democracy (countries, categorical)"
                                )
            )
## 
## Summary Statistics
## ================================================================================================
## Statistic                               N   Mean  St. Dev.  Min  Pctl(25) Median Pctl(75)  Max  
## ------------------------------------------------------------------------------------------------
## Free Trade (countries)                 178 74.088  13.205  0.000  67.250  75.900  85.000  90.000
## Average Schooling (years for ages >15) 143 8.142   2.637   1.810  6.098   8.645   10.146  13.090
## Democracy (countries, categorical)     192 0.615   0.488   0.000  0.000   1.000   1.000   1.000 
## ------------------------------------------------------------------------------------------------
# cdata %>% ggplot(aes(free_trade)) + geom_histogram()
# cdata %>% ggplot(aes(avg_school)) + geom_histogram()
# cdata %>% ggplot(aes(democracy)) + geom_histogram(stat = "count")

cdata %>%
  mutate(democracy = unclass(democracy) - 1) %>%
  gather() %>%
  ggplot(aes(value)) +
  geom_histogram() +
  facet_wrap(vars(key), scales = "free")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Step 2(b)

# cdata %>% ggpairs()
cdata %>% filter(complete.cases(.)) %>% ggpairs(axisLabels = "none")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Step 3

https://api.rpubs.com/andrewheiss/purr_stargazer

model.bivar <- cdata %>% lm(free_trade ~ avg_school, data = .)
summary(model.bivar)
## 
## Call:
## lm(formula = free_trade ~ avg_school, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.312  -3.796   2.073   6.254  19.341 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  57.5223     2.5796   22.30  < 2e-16 ***
## avg_school    2.2859     0.2992    7.64 3.39e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.167 on 137 degrees of freedom
##   (54 observations deleted due to missingness)
## Multiple R-squared:  0.2988, Adjusted R-squared:  0.2936 
## F-statistic: 58.37 on 1 and 137 DF,  p-value: 3.39e-12
model.multi <- cdata %>% lm(free_trade ~ ., data = .)
summary(model.multi)
## 
## Call:
## lm(formula = free_trade ~ ., data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.445  -4.631   2.320   5.580  18.811 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             57.034      2.506  22.759  < 2e-16 ***
## avg_school               1.936      0.311   6.225 5.59e-09 ***
## democracy1. Democracy    5.252      1.684   3.118  0.00222 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.888 on 136 degrees of freedom
##   (54 observations deleted due to missingness)
## Multiple R-squared:  0.3455, Adjusted R-squared:  0.3359 
## F-statistic:  35.9 on 2 and 136 DF,  p-value: 3.019e-13
stargazer(type = "text",
          model.bivar,
          model.multi,
          column.labels = c("Bivariate", "Multivariate")
          )
## 
## =====================================================================
##                                     Dependent variable:              
##                       -----------------------------------------------
##                                         free_trade                   
##                              Bivariate             Multivariate      
##                                 (1)                     (2)          
## ---------------------------------------------------------------------
## avg_school                   2.286***                1.936***        
##                               (0.299)                 (0.311)        
##                                                                      
## democracy1. Democracy                                5.252***        
##                                                       (1.684)        
##                                                                      
## Constant                     57.522***               57.034***       
##                               (2.580)                 (2.506)        
##                                                                      
## ---------------------------------------------------------------------
## Observations                    139                     139          
## R2                             0.299                   0.346         
## Adjusted R2                    0.294                   0.336         
## Residual Std. Error      9.167 (df = 137)        8.888 (df = 136)    
## F Statistic           58.366*** (df = 1; 137) 35.903*** (df = 2; 136)
## =====================================================================
## Note:                                     *p<0.1; **p<0.05; ***p<0.01

Step 4

resids.bivar <-
  tibble(Fitted = model.bivar$fitted.values, # $fitted
         Residuals = model.bivar$residuals) # $resid

resids.bivar %>%
  ggplot(aes(Fitted, Residuals)) +
  geom_point() +
  geom_hline(yintercept = mean(model.bivar$residuals), col = "red") +
  labs(title = "Bivaraite Model")

resids.bivar %>%
  ggplot(aes(Fitted)) +
  geom_histogram() +
  labs(title = "Bivaraite Model")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

resids.multi <-
  tibble(Fitted = model.multi$fitted.values,
         Residuals = model.multi$residuals)

resids.multi %>%
  ggplot(aes(Fitted, Residuals)) +
  geom_point() +
  geom_hline(yintercept = mean(model.multi$residuals), col = "red") +
  labs(title = "Multivaraite Model")

resids.bivar %>%
  ggplot(aes(Fitted)) +
  geom_histogram() +
  labs(title = "Multivariate Model")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Violation Testing (Bonus)

cdata %>%
  ggplot(aes(sample = free_trade)) +
  geom_qq() +
  geom_qq_line(col = "red") +
  labs(title = "Dependant Variable (Free Trade) QQ Plot")
## Warning: Removed 15 rows containing non-finite values (stat_qq).
## Warning: Removed 15 rows containing non-finite values (stat_qq_line).

sqrt(vif(model.multi)) > 2 
## avg_school  democracy 
##      FALSE      FALSE
ncvTest(model.bivar)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 2.676806, Df = 1, p = 0.10182
ncvTest(model.multi)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 5.998279, Df = 1, p = 0.01432
coeftest(model.bivar, vcov=vcovHC(model.bivar, type="HC1")) # coef(summary(model.bivar))
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)  57.5223     2.5603 22.4668 < 2.2e-16 ***
## avg_school    2.2859     0.2774  8.2402 1.233e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(model.multi, vcov=vcovHC(model.multi, type="HC1"))
## 
## t test of coefficients:
## 
##                       Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)           57.03418    2.49301 22.8777 < 2.2e-16 ***
## avg_school             1.93618    0.32134  6.0253 1.493e-08 ***
## democracy1. Democracy  5.25178    1.96556  2.6719  0.008464 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bgtest(model.bivar)
## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  model.bivar
## LM test = 1.4805, df = 1, p-value = 0.2237
bgtest(model.multi)
## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  model.multi
## LM test = 3.7936, df = 1, p-value = 0.05145