Install packages

install.packages("tidyverse", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdHOJb7/downloaded_packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
install.packages("psych", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdHOJb7/downloaded_packages
library(psych)
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
install.packages("knitr",repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdHOJb7/downloaded_packages
library("knitr")
install.packages("kableExtra",repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdHOJb7/downloaded_packages
library("kableExtra")
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
install.packages("apaTables", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdHOJb7/downloaded_packages
library(apaTables)
install.packages("robustbase",repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdHOJb7/downloaded_packages
library(robustbase)
install.packages("plm", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdHOJb7/downloaded_packages
library(plm)
## 
## Attaching package: 'plm'
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, lag, lead
install.packages("modelsummary", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdHOJb7/downloaded_packages
library("modelsummary")
## 
## Attaching package: 'modelsummary'
## 
## The following object is masked from 'package:psych':
## 
##     SD
install.packages("AER",repos = 
"https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdHOJb7/downloaded_packages
library("AER")
## Loading required package: car
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:psych':
## 
##     logit
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: sandwich
## Loading required package: survival
## 
## Attaching package: 'survival'
## 
## The following object is masked from 'package:robustbase':
## 
##     heart

Download data

dt <- read_csv("pd1.csv")
## Rows: 360 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): country
## dbl (21): year, proportion of urban population living in slums, gdp_per_capi...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Filtering data only for numeric variabels

dt1 <- dt[,-1:-2]
dt2 <- dt1[-310:-312,]
dt2[,c(2,5,6)] <- log(dt2[,c(2,5,6)])
df <- dt[-307:-312,]
df[,c(4,7,8)] <- log(df[,c(4,7,8)])
dtcor <- dt2[,-3]
dtcor <- dtcor[,-4]
dtcor <- dtcor[,-4]
dtcor <- dtcor[,-5:-10]
dtcor <- dtcor[,-11]

Creating a table for summary of statistics

options(scipen =999)
summary_table1 <- describe(dt2)
table1 <- knitr::kable(summary_table1, 
             "html",
             caption = "Summary Statistics country-level Panel Dataset",
             digits = 3) %>%
footnote(general = "N = 60 countries and 360 observations ") %>% 
  kable_styling(font_size = 10)
table1
Summary Statistics country-level Panel Dataset
vars n mean sd median trimmed mad min max range skew kurtosis se
proportion of urban population living in slums 1 310 53.207 22.884 54.650 53.252 27.502 3.300 98.900 95.600 -0.059 -0.923 1.300
gdp_per_capita_US_dollars 2 346 7.942 0.894 7.884 7.941 1.011 6.073 9.719 3.646 0.040 -1.025 0.048
gdp growth 3 343 4.193 4.346 4.500 4.217 2.669 -6.900 35.200 42.100 1.085 8.143 0.235
unemployment rate 4 295 8.218 6.111 7.400 7.272 4.448 0.600 37.300 36.700 1.873 4.524 0.356
refugees number 5 320 9.464 2.874 9.790 9.601 2.990 0.000 14.996 14.996 -0.495 -0.099 0.161
total population 6 133 17.497 1.475 17.301 17.402 1.686 14.885 20.999 6.115 0.531 -0.139 0.128
percent urban 7 357 40.536 19.385 37.100 39.262 18.236 2.500 90.800 88.300 0.567 -0.393 1.026
urbangrowth 8 356 3.618 1.796 3.500 3.499 1.483 -0.800 14.500 15.300 1.907 8.936 0.095
popdensity 9 357 102.239 154.570 53.100 70.689 59.007 1.400 1148.500 1147.100 4.021 20.793 8.181
income share of poorest 20 percent 10 75 45.255 9.283 45.700 45.220 9.933 28.600 63.100 34.500 0.004 -1.057 1.072
gini 11 75 5.289 2.239 5.000 5.218 2.372 0.800 9.400 8.600 0.284 -0.998 0.259
income share of richest 10% 12 75 35.775 7.046 36.100 35.582 9.192 23.600 51.700 28.100 0.247 -0.971 0.814
urban poverty rate 13 60 27.185 14.602 28.350 26.460 18.829 1.000 61.500 60.500 0.236 -0.832 1.885
phones 14 253 26.596 28.894 15.800 22.033 22.387 0.100 131.100 131.000 1.181 0.634 1.817
internet 15 158 4.222 8.518 0.700 2.317 0.890 0.100 69.400 69.300 4.247 23.897 0.678
healthcare spending as percent of GDP 16 287 6.136 7.278 5.100 5.335 1.483 0.800 80.200 79.400 8.888 82.079 0.430
infant mortality rate 17 354 63.136 32.456 59.400 61.476 38.177 12.900 158.000 145.100 0.396 -0.734 1.725
HDI 18 104 0.529 0.130 0.500 0.533 0.148 0.200 0.800 0.600 -0.174 -0.538 0.013
government effectiveness 19 294 -0.599 0.575 -0.550 -0.570 0.556 -2.340 0.880 3.220 -0.476 0.163 0.034
political stability 20 297 -0.740 0.846 -0.600 -0.697 0.860 -3.320 1.020 4.340 -0.472 -0.201 0.049
Note:
N = 60 countries and 360 observations

Creating a correlation table to identify our Independent varibles

apa.cor.table(dtcor,
              filename = "cortable6.doc",
              table.number = )
## 
## 
## Means, standard deviations, and correlations with confidence intervals
##  
## 
##   Variable                                          M     SD    1           
##   1. proportion of urban population living in slums 53.21 22.88             
##                                                                             
##   2. gdp_per_capita_US_dollars                      7.94  0.89  -.77**      
##                                                                 [-.81, -.72]
##                                                                             
##   3. unemployment rate                              8.22  6.11  -.33**      
##                                                                 [-.43, -.22]
##                                                                             
##   4. percent urban                                  40.54 19.39 -.63**      
##                                                                 [-.69, -.56]
##                                                                             
##   5. phones                                         26.60 28.89 -.49**      
##                                                                 [-.58, -.38]
##                                                                             
##   6. internet                                       4.22  8.52  -.46**      
##                                                                 [-.58, -.32]
##                                                                             
##   7. healthcare spending as percent of GDP          6.14  7.28  -.11        
##                                                                 [-.23, .02] 
##                                                                             
##   8. infant mortality rate                          63.14 32.46 .75**       
##                                                                 [.69, .79]  
##                                                                             
##   9. HDI                                            0.53  0.13  -.73**      
##                                                                 [-.81, -.63]
##                                                                             
##   10. government effectiveness                      -0.60 0.57  -.54**      
##                                                                 [-.62, -.45]
##                                                                             
##   2            3            4            5            6            7           
##                                                                                
##                                                                                
##                                                                                
##                                                                                
##                                                                                
##   .29**                                                                        
##   [.18, .39]                                                                   
##                                                                                
##   .73**        .18**                                                           
##   [.68, .78]   [.07, .29]                                                      
##                                                                                
##   .48**        .09          .44**                                              
##   [.38, .57]   [-.03, .21]  [.34, .54]                                         
##                                                                                
##   .55**        .31**        .51**        .56**                                 
##   [.43, .65]   [.16, .44]   [.38, .62]   [.45, .66]                            
##                                                                                
##   -.03         .23**        -.15**       .21**        .05                      
##   [-.14, .09]  [.12, .34]   [-.26, -.04] [.09, .33]   [-.11, .21]              
##                                                                                
##   -.69**       -.12*        -.60**       -.49**       -.40**       -.13*       
##   [-.74, -.63] [-.23, -.00] [-.66, -.53] [-.58, -.39] [-.52, -.26] [-.24, -.01]
##                                                                                
##   .85**        .29*         .78**        .58**        .50**        -.21        
##   [.79, .90]   [.07, .48]   [.70, .85]   [.41, .72]   [.23, .69]   [-.42, .01] 
##                                                                                
##   .51**        .17**        .27**        .27**        .44**        -.02        
##   [.42, .59]   [.06, .28]   [.16, .38]   [.15, .38]   [.31, .56]   [-.13, .10] 
##                                                                                
##   8            9         
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##   -.88**                 
##   [-.92, -.83]           
##                          
##   -.54**       .55**     
##   [-.62, -.46] [.38, .69]
##                          
## 
## Note. M and SD are used to represent mean and standard deviation, respectively.
## Values in square brackets indicate the 95% confidence interval.
## The confidence interval is a plausible range of population correlations 
## that could have caused the sample correlation (Cumming, 2014).
##  * indicates p < .05. ** indicates p < .01.
## 

Create histograms for our most important variables

plot(x = dt2$`infant mortality rate`,
     y = dt2$`proportion of urban population living in slums`,
     xlim = c(0,150),
     xlab = "Infant Mortality Rate",
     ylab = "Urban Pop Living Slums",
     main = "Prevalence of Slums on Infant Moratlity Rate", 
     sub = "N = 360 observations",
     frame = TRUE,
     col = "pink"
     )

# Histogram 2

plot(x = dt2$gdp_per_capita_US_dollars,
     y = dt2$`proportion of urban population living in slums`,
     main = "Prevalence of Slums on GDP per capita",
     xlab = "GDP per capita (US Dollar)",
     ylab = "Urban Pop living in Slums",
     sub = "N = 360 observations",
     frame = TRUE,
     col = "pink")

# Histogram 3

plot(x = dt2$HDI,
     y = dt2$`proportion of urban population living in slums`,
     main = "Prevalence of Slums on HDI",
     xlab = "HDI",
     ylab = "Urban Pop living in Slums",
     sub = "N = 360 observations",
     frame = TRUE,
     col = "pink")

Creating the Pooled-OLS Model

PooledOLS <- lmrob(df$`proportion of urban population living in slums` ~ df$`infant mortality rate` + df$gdp_per_capita_US_dollars + df$HDI + df$`percent urban` + df$`government effectiveness` + df$phones + df$`unemployment rate`)

Creating the FE model

Fe_model <- plm(df$`proportion of urban population living in slums` ~ df$`infant mortality rate` + df$gdp_per_capita_US_dollars + df$HDI + df$`percent urban` + df$`government effectiveness` + df$phones + df$`unemployment rate`,
                data = df,
                index = c("country", "year"), model = "within")
## Warning in pdata.frame(data, index): at least one NA in at least one index dimension in resulting pdata.frame
##  to find out which, use, e.g., table(index(your_pdataframe), useNA = "ifany")

Solving the issue of the simulteiarity by adding IV.

IVreg <- ivreg(df$`proportion of urban population living in slums` ~
               df$`infant mortality rate` +                          df$gdp_per_capita_US_dollars + 
               df$HDI + 
               df$`percent urban` +                                  df$`government effectiveness` +                       df$phones + 
               df$`unemployment rate` |               
               df$gdp_per_capita_US_dollars + 
               df$HDI +
               df$`percent urban` +                                  df$`government effectiveness` +                       df$phones + 
               df$`unemployment rate` + 
               df$internet, data = df)

Testing for relevance

IVreg2 <- ivreg(df$`proportion of urban population living in slums` ~
               df$`infant mortality rate` +                          df$gdp_per_capita_US_dollars + 
               df$HDI + 
               df$`percent urban` +                                  df$`government effectiveness` +                       df$phones + 
               df$`unemployment rate` |               
               df$gdp_per_capita_US_dollars + 
               df$HDI +
               df$`percent urban` +                                  df$`government effectiveness` +                       df$phones + 
               df$`unemployment rate` + 
               df$internet + df$`healthcare spending as percent of GDP`, data = df)

Perfroming the F test

summary(IVreg, vcov. = sandwich, diagnostics = TRUE)
## 
## Call:
## ivreg(formula = df$`proportion of urban population living in slums` ~ 
##     df$`infant mortality rate` + df$gdp_per_capita_US_dollars + 
##         df$HDI + df$`percent urban` + df$`government effectiveness` + 
##         df$phones + df$`unemployment rate` | df$gdp_per_capita_US_dollars + 
##         df$HDI + df$`percent urban` + df$`government effectiveness` + 
##         df$phones + df$`unemployment rate` + df$internet, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -165.724  -56.999   -1.409   17.078  227.552 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)                     940.4844  1900.6891   0.495    0.624
## df$`infant mortality rate`       -5.8616    13.4253  -0.437    0.665
## df$gdp_per_capita_US_dollars      7.8538    57.1528   0.137    0.892
## df$HDI                        -1271.5351  2737.3745  -0.465    0.645
## df$`percent urban`                0.1127     0.9390   0.120    0.905
## df$`government effectiveness`   -10.1106    43.6639  -0.232    0.818
## df$phones                        -0.4597     1.6208  -0.284    0.778
## df$`unemployment rate`            3.9628     9.5194   0.416    0.680
## 
## Diagnostic tests:
##                  df1 df2 statistic p-value   
## Weak instruments   1  33     0.237 0.62950   
## Wu-Hausman         1  32     9.541 0.00413 **
## Sargan             0  NA        NA      NA   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 86.87 on 33 degrees of freedom
## Multiple R-Squared: -17.4,   Adjusted R-squared: -21.3 
## Wald test: 0.6065 on 7 and 33 DF,  p-value: 0.7462
summary(IVreg2, vcov. = sandwich, diagnostics = TRUE)
## 
## Call:
## ivreg(formula = df$`proportion of urban population living in slums` ~ 
##     df$`infant mortality rate` + df$gdp_per_capita_US_dollars + 
##         df$HDI + df$`percent urban` + df$`government effectiveness` + 
##         df$phones + df$`unemployment rate` | df$gdp_per_capita_US_dollars + 
##         df$HDI + df$`percent urban` + df$`government effectiveness` + 
##         df$phones + df$`unemployment rate` + df$internet + df$`healthcare spending as percent of GDP`, 
##     data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.8814  -5.6410   0.7004   5.6938  19.7651 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                    200.04952  134.15168   1.491  0.14570   
## df$`infant mortality rate`      -0.13699    0.92289  -0.148  0.88293   
## df$gdp_per_capita_US_dollars   -11.97501    4.39196  -2.727  0.01030 * 
## df$HDI                        -112.85270  198.11560  -0.570  0.57291   
## df$`percent urban`               0.13591    0.11065   1.228  0.22831   
## df$`government effectiveness`  -11.46071    4.09092  -2.802  0.00856 **
## df$phones                        0.09181    0.17636   0.521  0.60624   
## df$`unemployment rate`          -0.15817    0.70936  -0.223  0.82497   
## 
## Diagnostic tests:
##                  df1 df2 statistic p-value   
## Weak instruments   2  31     0.293 0.74840   
## Wu-Hausman         1  31     0.533 0.47086   
## Sargan             1  NA     8.200 0.00419 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.06 on 32 degrees of freedom
## Multiple R-Squared: 0.7547,  Adjusted R-squared: 0.701 
## Wald test:  51.1 on 7 and 32 DF,  p-value: 0.000000000000001395

Creating A fixed Effects + Panel Data

IV_FE1 <- plm(data = df,
            df$`proportion of urban population living in slums` ~
            df$`infant mortality rate` +                 df$gdp_per_capita_US_dollars + 
            df$HDI + df$`percent urban` +                df$`government effectiveness` +              df$phones + 
            df$`unemployment rate` |                     df$internet +
            df$gdp_per_capita_US_dollars + 
            df$HDI + df$`percent urban` +                df$`government effectiveness` +              df$phones + 
            df$`unemployment rate`,
            effect = "twoways",
            index = c("country", "year"),                model = "within")
## Warning in pdata.frame(data, index): at least one NA in at least one index dimension in resulting pdata.frame
##  to find out which, use, e.g., table(index(your_pdataframe), useNA = "ifany")

IV + Fixed Effects + 2 instruments

IV_FE2 <- plm(data = df,
            df$`proportion of urban population living in slums` ~
            df$`infant mortality rate` +                 df$gdp_per_capita_US_dollars + 
            df$HDI + df$`percent urban` +                df$`government effectiveness` +              df$phones + 
            df$`unemployment rate` |                     df$internet +
              df$`healthcare spending as percent of GDP` +
            df$gdp_per_capita_US_dollars + 
            df$HDI + df$`percent urban` +                df$`government effectiveness` +              df$phones + 
            df$`unemployment rate`,
            index = c("country", "year"),                model = "within")
## Warning in pdata.frame(data, index): at least one NA in at least one index dimension in resulting pdata.frame
##  to find out which, use, e.g., table(index(your_pdataframe), useNA = "ifany")

Creating a Side By Side Table

SBST3 <- list( "Pooled-OLS" = PooledOLS,
               "Fixed-Effects" = Fe_model,
               "IV model" = IVreg2,
               "IV+FE" = IV_FE2)
modelsummary(SBST3,
             fmt = 5,
             stars = TRUE,
             output = "table3.5.docx")
SBST4 <- list( "Pooled-OLS" = PooledOLS,
               "Fixed-Effects" = Fe_model)
modelsummary(SBST4,
             fmt = 5,
             stars = TRUE,
             output = "table3.4.docx")