rm(list=ls(all=TRUE))
pacman::p_load(tidyverse, foreign, car, stargazer,
naniar, olsrr, haven, magrittr, GGally, DT, corrplot,
install = FALSE, update = FALSE) # lmtest, sandwich, reshape
setwd("G:/My Drive/homework")
cdata <- read_dta("Tommy K/countries.dta")
cdata %>% dim()
## [1] 193 58
# Count NAs for continuous variables
cdata %>%
select(!where(is.labelled)) %>%
select(!where(is_character)) %>%
select(!c(civil_lib, polity2)) %>%
lapply(., is.na) %>%
lapply(., sum) %>%
as.matrix()
## [,1]
## avg_school 50
## bornyear 1
## co2 7
## conf_church 138
## conf_gov 139
## conf_unions 138
## debt 124
## dist_mag 76
## durable 29
## eff_part 81
## ethnic_frac 6
## free_bus 13
## free_corrupt 13
## free_finance 15
## free_fiscal 16
## free_govt 15
## free_index 16
## free_invest 15
## free_labor 14
## free_monet 15
## free_propright 16
## free_trade 15
## gdp 51
## gdp_pc 51
## gend_equal 47
## gini 108
## globalize 12
## gov_eff 2
## health_exp 6
## hiv 46
## inf_mort 2
## inflation 6
## mil_exp 42
## oil_prod 23
## pop 51
## post_mater 139
## proportion_dem 0
## prot_area 5
## trade 19
## unemp 72
## urban_pop 2
## women_parlmnt 4
## years_office 19
# DV: globalize
# IV: bornyear, women_parlmnt, trade (can be discontinuous)
# ex: gdp, inflation, pop, unemp
cdata %<>% select(globalize, bornyear, women_parlmnt, trade)
cdata %>% glimpse()
## Rows: 193
## Columns: 4
## $ globalize <dbl> 81.25179, 78.26860, 40.06293, 40.03246, 60.77024, 41....
## $ bornyear <dbl> 1920, 1920, 1978, 1979, 1962, 1983, 1981, 1909, 1820,...
## $ women_parlmnt <dbl> 27.3, 33.6, 14.3, 21.7, 13.3, 6.7, 10.5, 43.2, 4.1, 1...
## $ trade <dbl> 44.86351, 54.37358, 87.52569, 86.31783, 86.67049, 75....
cdata %>% summary()
## globalize bornyear women_parlmnt trade
## Min. :22.81 Min. :1624 Min. : 0.00 Min. : 22.12
## 1st Qu.:45.54 1st Qu.:1919 1st Qu.: 9.10 1st Qu.: 57.59
## Median :54.71 Median :1960 Median :15.60 Median : 78.61
## Mean :57.09 Mean :1937 Mean :17.37 Mean : 86.13
## 3rd Qu.:68.02 3rd Qu.:1976 3rd Qu.:23.50 3rd Qu.:104.64
## Max. :92.61 Max. :2006 Max. :56.30 Max. :421.57
## NA's :12 NA's :1 NA's :4 NA's :19
cdata %>% filter(!complete.cases(.)) %>% nrow()
## [1] 22
cdata %>% slice_sample(n = 10)
## # A tibble: 10 x 4
## globalize bornyear women_parlmnt trade
## <dbl> <dbl> <dbl> <dbl>
## 1 NA 1990 3 NA
## 2 86.6 1816 27.4 63.5
## 3 59.3 1956 27.6 93.0
## 4 NA 1948 15.6 NA
## 5 52.5 1966 30 NA
## 6 48.2 1975 39.2 68.1
## 7 43.1 1960 12.5 81.6
## 8 NA 1968 NA NA
## 9 64.2 1920 44.5 55.7
## 10 55.0 1990 26.9 110.
https://stackoverflow.com/a/61501002
cdata %>% ggpairs()
to_string <- as_labeller(c(`globalize` = "Global Index", # ' also works instead of `
`bornyear` = "Independence Year",
`trade` = "Trade % of GDP EE-6",
`women_parlmnt` = "Women % Parliment"
)
)
cdata %>%
gather() %>%
ggplot(aes(value)) +
geom_histogram(aes(y = stat(density))) +
geom_density(col = "red") +
facet_wrap(vars(key),
scales = "free",
labeller = to_string
)
cdata %>%
gather() %>%
ggplot(aes(y = value)) +
geom_boxplot() +
facet_wrap(vars(key),
scales = "free",
labeller = to_string
)
to_string <- as_labeller(c(`bornyear` = "Independence Year",
`trade` = "Trade % of GDP EE-6",
`women_parlmnt` = "Women % Parliment"
)
)
cdata %>%
gather(IV, value, -globalize) %>%
ggplot(aes(value, globalize)) +
geom_point() +
# geom_smooth(method = loess) +
facet_wrap(vars(IV),
scales = "free",
labeller = to_string
) +
ggtitle("Global Index by Our Indicators")
https://cran.r-project.org/web/packages/naniar/vignettes/naniar-visualisation.html https://www.wikiwand.com/en/Missing_data#/Types
cdata %>% vis_miss()
cdata %>% gg_miss_upset()
to_string <- as_labeller(c(`bornyear` = "Independence Year",
`trade` = "Trade % of GDP EE-6",
`women_parlmnt` = "Women % Parliment"
)
)
cdata %>%
gather(IV, value, -globalize) %>%
ggplot(aes(value, globalize)) +
geom_miss_point() +
facet_wrap(vars(IV),
scales = "free",
labeller = to_string
) +
ggtitle("Global Index by Our IV Indicators")
model.fit <-
cdata %>%
lm(globalize ~ ., data = .)
summary(model.fit)
##
## Call:
## lm(formula = globalize ~ ., data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.769 -10.170 -0.050 9.672 35.035
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 251.33282 35.38133 7.104 3.35e-11 ***
## bornyear -0.10753 0.01827 -5.886 2.11e-08 ***
## women_parlmnt 0.26786 0.10076 2.658 0.00862 **
## trade 0.11528 0.02404 4.796 3.57e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.17 on 167 degrees of freedom
## (22 observations deleted due to missingness)
## Multiple R-squared: 0.2719, Adjusted R-squared: 0.2588
## F-statistic: 20.79 on 3 and 167 DF, p-value: 1.706e-11
stargazer(model.fit, type = "text")
##
## ===============================================
## Dependent variable:
## ---------------------------
## globalize
## -----------------------------------------------
## bornyear -0.108***
## (0.018)
##
## women_parlmnt 0.268***
## (0.101)
##
## trade 0.115***
## (0.024)
##
## Constant 251.333***
## (35.381)
##
## -----------------------------------------------
## Observations 171
## R2 0.272
## Adjusted R2 0.259
## Residual Std. Error 14.167 (df = 167)
## F Statistic 20.788*** (df = 3; 167)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
model.fit %>%
influencePlot(.,
id.method="identify",
main="Influence Plot",
sub="Circle size is proportial to Cook's Distance"
)
## StudRes Hat CookD
## 34 -1.5709919 0.09435629 0.06372361
## 115 -0.5617155 0.32569323 0.03825672
## 135 -2.4326624 0.01164814 0.01693730
## 174 2.5255068 0.01028752 0.01605731
## 181 -2.0912677 0.20338750 0.27362204
model.fit %>% leveragePlots()
ols_plot_resid_stud(model.fit)
plot(model.fit, which = 4)
ols_plot_dfbetas(model.fit)
model.fit <-
cdata %>%
slice(-c(34, 115, 135, 174, 181)) %>%
lm(globalize ~ ., data = .)
summary(model.fit)
##
## Call:
## lm(formula = globalize ~ ., data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.879 -9.310 -0.138 9.450 31.480
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 287.00446 38.15911 7.521 3.52e-12 ***
## bornyear -0.12671 0.01970 -6.432 1.35e-09 ***
## women_parlmnt 0.28924 0.10375 2.788 0.00594 **
## trade 0.13182 0.02825 4.667 6.37e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.54 on 162 degrees of freedom
## (22 observations deleted due to missingness)
## Multiple R-squared: 0.3111, Adjusted R-squared: 0.2983
## F-statistic: 24.38 on 3 and 162 DF, p-value: 4.496e-13
model.fit %>%
influencePlot(.,
id.method="identify",
main="Influence Plot",
sub="Circle size is proportial to Cook's Distance"
)
## StudRes Hat CookD
## 9 -2.28261259 0.05234699 7.013000e-02
## 33 -1.98217955 0.08913622 9.441579e-02
## 72 -2.45595552 0.01729790 2.574356e-02
## 127 -2.56685483 0.01771259 2.871153e-02
## 182 0.03872247 0.19669014 9.235292e-05
model.fit %>% leveragePlots()
ols_plot_resid_stud(model.fit)
plot(model.fit, which = 4)
ols_plot_dfbetas(model.fit)