pacman::p_load(tidyverse, haven, magrittr, GGally,
foreign, car, lmtest, sandwich, stargazer,
install = TRUE, update = TRUE
)
##
## There are binary versions available but the source versions are later:
## binary source needs_compilation
## textshaping 0.2.1 0.3.0 TRUE
## usethis 2.0.0 2.0.1 FALSE
##
## Binaries will be installed
## package 'textshaping' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\n8atk\AppData\Local\Temp\RtmpUFSFPU\downloaded_packages
## installing the source package 'usethis'
# foreign::read.dta preserves democrary as factored
cdata <- read_dta("G:/My Drive/homework/Tommy K/countries.dta")
cdata %<>% select(free_trade, avg_school, democracy)
class(cdata$democracy) # dbl_lbl
## [1] "haven_labelled" "vctrs_vctr" "double"
cdata %<>% mutate(democracy = as_factor(democracy))
# cdata %<>% filter(complete.cases(.))
cdata %>% class
## [1] "tbl_df" "tbl" "data.frame"
cdata %>% glimpse()
## Rows: 193
## Columns: 3
## $ free_trade <dbl> 85.1, 86.0, 74.3, 73.3, 72.2, NA, NA, 61.7, 79.1, 42.2, ...
## $ avg_school <dbl> 12.123364, 12.677570, NA, NA, 9.752500, NA, NA, 10.56048...
## $ democracy <fct> 1. Democracy, 1. Democracy, 1. Democracy, 1. Democracy, ...
cdata %>% summary()
## free_trade avg_school democracy
## Min. : 0.00 Min. : 1.810 0. Dictatorship: 74
## 1st Qu.:67.25 1st Qu.: 6.098 1. Democracy :118
## Median :75.90 Median : 8.645 NA's : 1
## Mean :74.09 Mean : 8.142
## 3rd Qu.:85.00 3rd Qu.:10.146
## Max. :90.00 Max. :13.090
## NA's :15 NA's :50
cdata %>% slice_sample(n = 10)
## # A tibble: 10 x 3
## free_trade avg_school democracy
## <dbl> <dbl> <fct>
## 1 44.5 6.02 1. Democracy
## 2 NA NA 1. Democracy
## 3 74.9 7.69 0. Dictatorship
## 4 83.2 NA 0. Dictatorship
## 5 85 8.93 1. Democracy
## 6 80.5 10.5 1. Democracy
## 7 87.5 10.4 1. Democracy
## 8 76 8.48 0. Dictatorship
## 9 64.3 4.60 0. Dictatorship
## 10 42.2 NA 1. Democracy
cdata %>%
mutate(democracy = unclass(democracy) - 1) %>% # stargazer doesn't process factors
as.data.frame() %>% # stargazer data.frame to be the only class
stargazer(type = "text", # type = "html"
title = "Summary Statistics",
nobs = TRUE,
mean.sd = TRUE,
median = TRUE,
min.max = TRUE,
iqr = TRUE,
covariate.labels = c("Free Trade (countries)",
"Average Schooling (years for ages >15)",
"Democracy (countries, categorical)"
)
)
##
## Summary Statistics
## ================================================================================================
## Statistic N Mean St. Dev. Min Pctl(25) Median Pctl(75) Max
## ------------------------------------------------------------------------------------------------
## Free Trade (countries) 178 74.088 13.205 0.000 67.250 75.900 85.000 90.000
## Average Schooling (years for ages >15) 143 8.142 2.637 1.810 6.098 8.645 10.146 13.090
## Democracy (countries, categorical) 192 0.615 0.488 0.000 0.000 1.000 1.000 1.000
## ------------------------------------------------------------------------------------------------
# cdata %>% ggplot(aes(free_trade)) + geom_histogram()
# cdata %>% ggplot(aes(avg_school)) + geom_histogram()
# cdata %>% ggplot(aes(democracy)) + geom_histogram(stat = "count")
cdata %>%
mutate(democracy = unclass(democracy) - 1) %>%
gather() %>%
ggplot(aes(value)) +
geom_histogram() +
facet_wrap(vars(key), scales = "free")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# cdata %>% ggpairs()
cdata %>% filter(complete.cases(.)) %>% ggpairs(axisLabels = "none")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
https://api.rpubs.com/andrewheiss/purr_stargazer
model.bivar <- cdata %>% lm(free_trade ~ avg_school, data = .)
summary(model.bivar)
##
## Call:
## lm(formula = free_trade ~ avg_school, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.312 -3.796 2.073 6.254 19.341
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 57.5223 2.5796 22.30 < 2e-16 ***
## avg_school 2.2859 0.2992 7.64 3.39e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.167 on 137 degrees of freedom
## (54 observations deleted due to missingness)
## Multiple R-squared: 0.2988, Adjusted R-squared: 0.2936
## F-statistic: 58.37 on 1 and 137 DF, p-value: 3.39e-12
model.multi <- cdata %>% lm(free_trade ~ ., data = .)
summary(model.multi)
##
## Call:
## lm(formula = free_trade ~ ., data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.445 -4.631 2.320 5.580 18.811
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 57.034 2.506 22.759 < 2e-16 ***
## avg_school 1.936 0.311 6.225 5.59e-09 ***
## democracy1. Democracy 5.252 1.684 3.118 0.00222 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.888 on 136 degrees of freedom
## (54 observations deleted due to missingness)
## Multiple R-squared: 0.3455, Adjusted R-squared: 0.3359
## F-statistic: 35.9 on 2 and 136 DF, p-value: 3.019e-13
stargazer(type = "text",
model.bivar,
model.multi,
column.labels = c("Bivariate", "Multivariate")
)
##
## =====================================================================
## Dependent variable:
## -----------------------------------------------
## free_trade
## Bivariate Multivariate
## (1) (2)
## ---------------------------------------------------------------------
## avg_school 2.286*** 1.936***
## (0.299) (0.311)
##
## democracy1. Democracy 5.252***
## (1.684)
##
## Constant 57.522*** 57.034***
## (2.580) (2.506)
##
## ---------------------------------------------------------------------
## Observations 139 139
## R2 0.299 0.346
## Adjusted R2 0.294 0.336
## Residual Std. Error 9.167 (df = 137) 8.888 (df = 136)
## F Statistic 58.366*** (df = 1; 137) 35.903*** (df = 2; 136)
## =====================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
resids.bivar <-
tibble(Fitted = model.bivar$fitted.values, # $fitted
Residuals = model.bivar$residuals) # $resid
resids.bivar %>%
ggplot(aes(Fitted, Residuals)) +
geom_point() +
geom_hline(yintercept = mean(model.bivar$residuals), col = "red") +
labs(title = "Bivaraite Model")
resids.bivar %>%
ggplot(aes(Fitted)) +
geom_histogram() +
labs(title = "Bivaraite Model")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
resids.multi <-
tibble(Fitted = model.multi$fitted.values,
Residuals = model.multi$residuals)
resids.multi %>%
ggplot(aes(Fitted, Residuals)) +
geom_point() +
geom_hline(yintercept = mean(model.multi$residuals), col = "red") +
labs(title = "Multivaraite Model")
resids.bivar %>%
ggplot(aes(Fitted)) +
geom_histogram() +
labs(title = "Multivariate Model")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
cdata %>%
ggplot(aes(sample = free_trade)) +
geom_qq() +
geom_qq_line(col = "red") +
labs(title = "Dependant Variable (Free Trade) QQ Plot")
## Warning: Removed 15 rows containing non-finite values (stat_qq).
## Warning: Removed 15 rows containing non-finite values (stat_qq_line).
sqrt(vif(model.multi)) > 2
## avg_school democracy
## FALSE FALSE
ncvTest(model.bivar)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 2.676806, Df = 1, p = 0.10182
ncvTest(model.multi)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 5.998279, Df = 1, p = 0.01432
coeftest(model.bivar, vcov=vcovHC(model.bivar, type="HC1")) # coef(summary(model.bivar))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 57.5223 2.5603 22.4668 < 2.2e-16 ***
## avg_school 2.2859 0.2774 8.2402 1.233e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(model.multi, vcov=vcovHC(model.multi, type="HC1"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 57.03418 2.49301 22.8777 < 2.2e-16 ***
## avg_school 1.93618 0.32134 6.0253 1.493e-08 ***
## democracy1. Democracy 5.25178 1.96556 2.6719 0.008464 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bgtest(model.bivar)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: model.bivar
## LM test = 1.4805, df = 1, p-value = 0.2237
bgtest(model.multi)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: model.multi
## LM test = 3.7936, df = 1, p-value = 0.05145