Install packages

install.packages("tidyverse", repos = "https://cloud.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdHOJb7/downloaded_packages

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors

install.packages("psych", repos = "https://cloud.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdHOJb7/downloaded_packages

library(psych)

## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

install.packages("knitr",repos = "https://cloud.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdHOJb7/downloaded_packages

library("knitr")
install.packages("kableExtra",repos = "https://cloud.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdHOJb7/downloaded_packages

library("kableExtra")

## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

install.packages("apaTables", repos = "https://cloud.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdHOJb7/downloaded_packages

library(apaTables)
install.packages("robustbase",repos = "https://cloud.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdHOJb7/downloaded_packages

library(robustbase)
install.packages("plm", repos = "https://cloud.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdHOJb7/downloaded_packages

library(plm)

## 
## Attaching package: 'plm'
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, lag, lead

install.packages("modelsummary", repos = "https://cloud.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdHOJb7/downloaded_packages

library("modelsummary")

## 
## Attaching package: 'modelsummary'
## 
## The following object is masked from 'package:psych':
## 
##     SD

install.packages("AER",repos = 
"https://cloud.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdHOJb7/downloaded_packages

library("AER")

## Loading required package: car
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:psych':
## 
##     logit
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: sandwich
## Loading required package: survival
## 
## Attaching package: 'survival'
## 
## The following object is masked from 'package:robustbase':
## 
##     heart

Download data

dt <- read_csv("pd1.csv")

## Rows: 360 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): country
## dbl (21): year, proportion of urban population living in slums, gdp_per_capi...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Filtering data only for numeric variabels

dt1 <- dt[,-1:-2]

dt2 <- dt1[-310:-312,]

dt2[,c(2,5,6)] <- log(dt2[,c(2,5,6)])

df <- dt[-307:-312,]

df[,c(4,7,8)] <- log(df[,c(4,7,8)])

dtcor <- dt2[,-3]

dtcor <- dtcor[,-4]

dtcor <- dtcor[,-4]

dtcor <- dtcor[,-5:-10]

dtcor <- dtcor[,-11]

Creating a table for summary of statistics

options(scipen =999)

summary_table1 <- describe(dt2)

table1 <- knitr::kable(summary_table1, 
             "html",
             caption = "Summary Statistics country-level Panel Dataset",
             digits = 3) %>%
footnote(general = "N = 60 countries and 360 observations ") %>% 
  kable_styling(font_size = 10)
table1

Summary Statistics country-level Panel Dataset
	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
proportion of urban population living in slums	1	310	53.207	22.884	54.650	53.252	27.502	3.300	98.900	95.600	-0.059	-0.923	1.300
gdp_per_capita_US_dollars	2	346	7.942	0.894	7.884	7.941	1.011	6.073	9.719	3.646	0.040	-1.025	0.048
gdp growth	3	343	4.193	4.346	4.500	4.217	2.669	-6.900	35.200	42.100	1.085	8.143	0.235
unemployment rate	4	295	8.218	6.111	7.400	7.272	4.448	0.600	37.300	36.700	1.873	4.524	0.356
refugees number	5	320	9.464	2.874	9.790	9.601	2.990	0.000	14.996	14.996	-0.495	-0.099	0.161
total population	6	133	17.497	1.475	17.301	17.402	1.686	14.885	20.999	6.115	0.531	-0.139	0.128
percent urban	7	357	40.536	19.385	37.100	39.262	18.236	2.500	90.800	88.300	0.567	-0.393	1.026
urbangrowth	8	356	3.618	1.796	3.500	3.499	1.483	-0.800	14.500	15.300	1.907	8.936	0.095
popdensity	9	357	102.239	154.570	53.100	70.689	59.007	1.400	1148.500	1147.100	4.021	20.793	8.181
income share of poorest 20 percent	10	75	45.255	9.283	45.700	45.220	9.933	28.600	63.100	34.500	0.004	-1.057	1.072
gini	11	75	5.289	2.239	5.000	5.218	2.372	0.800	9.400	8.600	0.284	-0.998	0.259
income share of richest 10%	12	75	35.775	7.046	36.100	35.582	9.192	23.600	51.700	28.100	0.247	-0.971	0.814
urban poverty rate	13	60	27.185	14.602	28.350	26.460	18.829	1.000	61.500	60.500	0.236	-0.832	1.885
phones	14	253	26.596	28.894	15.800	22.033	22.387	0.100	131.100	131.000	1.181	0.634	1.817
internet	15	158	4.222	8.518	0.700	2.317	0.890	0.100	69.400	69.300	4.247	23.897	0.678
healthcare spending as percent of GDP	16	287	6.136	7.278	5.100	5.335	1.483	0.800	80.200	79.400	8.888	82.079	0.430
infant mortality rate	17	354	63.136	32.456	59.400	61.476	38.177	12.900	158.000	145.100	0.396	-0.734	1.725
HDI	18	104	0.529	0.130	0.500	0.533	0.148	0.200	0.800	0.600	-0.174	-0.538	0.013
government effectiveness	19	294	-0.599	0.575	-0.550	-0.570	0.556	-2.340	0.880	3.220	-0.476	0.163	0.034
political stability	20	297	-0.740	0.846	-0.600	-0.697	0.860	-3.320	1.020	4.340	-0.472	-0.201	0.049
Note:
N = 60 countries and 360 observations

Creating a correlation table to identify our Independent varibles

apa.cor.table(dtcor,
              filename = "cortable6.doc",
              table.number = )

## 
## 
## Means, standard deviations, and correlations with confidence intervals
##  
## 
##   Variable                                          M     SD    1           
##   1. proportion of urban population living in slums 53.21 22.88             
##                                                                             
##   2. gdp_per_capita_US_dollars                      7.94  0.89  -.77**      
##                                                                 [-.81, -.72]
##                                                                             
##   3. unemployment rate                              8.22  6.11  -.33**      
##                                                                 [-.43, -.22]
##                                                                             
##   4. percent urban                                  40.54 19.39 -.63**      
##                                                                 [-.69, -.56]
##                                                                             
##   5. phones                                         26.60 28.89 -.49**      
##                                                                 [-.58, -.38]
##                                                                             
##   6. internet                                       4.22  8.52  -.46**      
##                                                                 [-.58, -.32]
##                                                                             
##   7. healthcare spending as percent of GDP          6.14  7.28  -.11        
##                                                                 [-.23, .02] 
##                                                                             
##   8. infant mortality rate                          63.14 32.46 .75**       
##                                                                 [.69, .79]  
##                                                                             
##   9. HDI                                            0.53  0.13  -.73**      
##                                                                 [-.81, -.63]
##                                                                             
##   10. government effectiveness                      -0.60 0.57  -.54**      
##                                                                 [-.62, -.45]
##                                                                             
##   2            3            4            5            6            7           
##                                                                                
##                                                                                
##                                                                                
##                                                                                
##                                                                                
##   .29**                                                                        
##   [.18, .39]                                                                   
##                                                                                
##   .73**        .18**                                                           
##   [.68, .78]   [.07, .29]                                                      
##                                                                                
##   .48**        .09          .44**                                              
##   [.38, .57]   [-.03, .21]  [.34, .54]                                         
##                                                                                
##   .55**        .31**        .51**        .56**                                 
##   [.43, .65]   [.16, .44]   [.38, .62]   [.45, .66]                            
##                                                                                
##   -.03         .23**        -.15**       .21**        .05                      
##   [-.14, .09]  [.12, .34]   [-.26, -.04] [.09, .33]   [-.11, .21]              
##                                                                                
##   -.69**       -.12*        -.60**       -.49**       -.40**       -.13*       
##   [-.74, -.63] [-.23, -.00] [-.66, -.53] [-.58, -.39] [-.52, -.26] [-.24, -.01]
##                                                                                
##   .85**        .29*         .78**        .58**        .50**        -.21        
##   [.79, .90]   [.07, .48]   [.70, .85]   [.41, .72]   [.23, .69]   [-.42, .01] 
##                                                                                
##   .51**        .17**        .27**        .27**        .44**        -.02        
##   [.42, .59]   [.06, .28]   [.16, .38]   [.15, .38]   [.31, .56]   [-.13, .10] 
##                                                                                
##   8            9         
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##                          
##   -.88**                 
##   [-.92, -.83]           
##                          
##   -.54**       .55**     
##   [-.62, -.46] [.38, .69]
##                          
## 
## Note. M and SD are used to represent mean and standard deviation, respectively.
## Values in square brackets indicate the 95% confidence interval.
## The confidence interval is a plausible range of population correlations 
## that could have caused the sample correlation (Cumming, 2014).
##  * indicates p < .05. ** indicates p < .01.
##

Create histograms for our most important variables

plot(x = dt2$`infant mortality rate`,
     y = dt2$`proportion of urban population living in slums`,
     xlim = c(0,150),
     xlab = "Infant Mortality Rate",
     ylab = "Urban Pop Living Slums",
     main = "Prevalence of Slums on Infant Moratlity Rate", 
     sub = "N = 360 observations",
     frame = TRUE,
     col = "pink"
     )

# Histogram 2

plot(x = dt2$gdp_per_capita_US_dollars,
     y = dt2$`proportion of urban population living in slums`,
     main = "Prevalence of Slums on GDP per capita",
     xlab = "GDP per capita (US Dollar)",
     ylab = "Urban Pop living in Slums",
     sub = "N = 360 observations",
     frame = TRUE,
     col = "pink")

# Histogram 3

plot(x = dt2$HDI,
     y = dt2$`proportion of urban population living in slums`,
     main = "Prevalence of Slums on HDI",
     xlab = "HDI",
     ylab = "Urban Pop living in Slums",
     sub = "N = 360 observations",
     frame = TRUE,
     col = "pink")

Creating the Pooled-OLS Model

PooledOLS <- lmrob(df$`proportion of urban population living in slums` ~ df$`infant mortality rate` + df$gdp_per_capita_US_dollars + df$HDI + df$`percent urban` + df$`government effectiveness` + df$phones + df$`unemployment rate`)

Creating the FE model

Fe_model <- plm(df$`proportion of urban population living in slums` ~ df$`infant mortality rate` + df$gdp_per_capita_US_dollars + df$HDI + df$`percent urban` + df$`government effectiveness` + df$phones + df$`unemployment rate`,
                data = df,
                index = c("country", "year"), model = "within")

## Warning in pdata.frame(data, index): at least one NA in at least one index dimension in resulting pdata.frame
##  to find out which, use, e.g., table(index(your_pdataframe), useNA = "ifany")

Solving the issue of the simulteiarity by adding IV.

IVreg <- ivreg(df$`proportion of urban population living in slums` ~
               df$`infant mortality rate` +                          df$gdp_per_capita_US_dollars + 
               df$HDI + 
               df$`percent urban` +                                  df$`government effectiveness` +                       df$phones + 
               df$`unemployment rate` |               
               df$gdp_per_capita_US_dollars + 
               df$HDI +
               df$`percent urban` +                                  df$`government effectiveness` +                       df$phones + 
               df$`unemployment rate` + 
               df$internet, data = df)

Testing for relevance

IVreg2 <- ivreg(df$`proportion of urban population living in slums` ~
               df$`infant mortality rate` +                          df$gdp_per_capita_US_dollars + 
               df$HDI + 
               df$`percent urban` +                                  df$`government effectiveness` +                       df$phones + 
               df$`unemployment rate` |               
               df$gdp_per_capita_US_dollars + 
               df$HDI +
               df$`percent urban` +                                  df$`government effectiveness` +                       df$phones + 
               df$`unemployment rate` + 
               df$internet + df$`healthcare spending as percent of GDP`, data = df)

Perfroming the F test

summary(IVreg, vcov. = sandwich, diagnostics = TRUE)

## 
## Call:
## ivreg(formula = df$`proportion of urban population living in slums` ~ 
##     df$`infant mortality rate` + df$gdp_per_capita_US_dollars + 
##         df$HDI + df$`percent urban` + df$`government effectiveness` + 
##         df$phones + df$`unemployment rate` | df$gdp_per_capita_US_dollars + 
##         df$HDI + df$`percent urban` + df$`government effectiveness` + 
##         df$phones + df$`unemployment rate` + df$internet, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -165.724  -56.999   -1.409   17.078  227.552 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)                     940.4844  1900.6891   0.495    0.624
## df$`infant mortality rate`       -5.8616    13.4253  -0.437    0.665
## df$gdp_per_capita_US_dollars      7.8538    57.1528   0.137    0.892
## df$HDI                        -1271.5351  2737.3745  -0.465    0.645
## df$`percent urban`                0.1127     0.9390   0.120    0.905
## df$`government effectiveness`   -10.1106    43.6639  -0.232    0.818
## df$phones                        -0.4597     1.6208  -0.284    0.778
## df$`unemployment rate`            3.9628     9.5194   0.416    0.680
## 
## Diagnostic tests:
##                  df1 df2 statistic p-value   
## Weak instruments   1  33     0.237 0.62950   
## Wu-Hausman         1  32     9.541 0.00413 **
## Sargan             0  NA        NA      NA   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 86.87 on 33 degrees of freedom
## Multiple R-Squared: -17.4,   Adjusted R-squared: -21.3 
## Wald test: 0.6065 on 7 and 33 DF,  p-value: 0.7462

summary(IVreg2, vcov. = sandwich, diagnostics = TRUE)

## 
## Call:
## ivreg(formula = df$`proportion of urban population living in slums` ~ 
##     df$`infant mortality rate` + df$gdp_per_capita_US_dollars + 
##         df$HDI + df$`percent urban` + df$`government effectiveness` + 
##         df$phones + df$`unemployment rate` | df$gdp_per_capita_US_dollars + 
##         df$HDI + df$`percent urban` + df$`government effectiveness` + 
##         df$phones + df$`unemployment rate` + df$internet + df$`healthcare spending as percent of GDP`, 
##     data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.8814  -5.6410   0.7004   5.6938  19.7651 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                    200.04952  134.15168   1.491  0.14570   
## df$`infant mortality rate`      -0.13699    0.92289  -0.148  0.88293   
## df$gdp_per_capita_US_dollars   -11.97501    4.39196  -2.727  0.01030 * 
## df$HDI                        -112.85270  198.11560  -0.570  0.57291   
## df$`percent urban`               0.13591    0.11065   1.228  0.22831   
## df$`government effectiveness`  -11.46071    4.09092  -2.802  0.00856 **
## df$phones                        0.09181    0.17636   0.521  0.60624   
## df$`unemployment rate`          -0.15817    0.70936  -0.223  0.82497   
## 
## Diagnostic tests:
##                  df1 df2 statistic p-value   
## Weak instruments   2  31     0.293 0.74840   
## Wu-Hausman         1  31     0.533 0.47086   
## Sargan             1  NA     8.200 0.00419 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.06 on 32 degrees of freedom
## Multiple R-Squared: 0.7547,  Adjusted R-squared: 0.701 
## Wald test:  51.1 on 7 and 32 DF,  p-value: 0.000000000000001395

Creating A fixed Effects + Panel Data

IV_FE1 <- plm(data = df,
            df$`proportion of urban population living in slums` ~
            df$`infant mortality rate` +                 df$gdp_per_capita_US_dollars + 
            df$HDI + df$`percent urban` +                df$`government effectiveness` +              df$phones + 
            df$`unemployment rate` |                     df$internet +
            df$gdp_per_capita_US_dollars + 
            df$HDI + df$`percent urban` +                df$`government effectiveness` +              df$phones + 
            df$`unemployment rate`,
            effect = "twoways",
            index = c("country", "year"),                model = "within")

## Warning in pdata.frame(data, index): at least one NA in at least one index dimension in resulting pdata.frame
##  to find out which, use, e.g., table(index(your_pdataframe), useNA = "ifany")

IV + Fixed Effects + 2 instruments

IV_FE2 <- plm(data = df,
            df$`proportion of urban population living in slums` ~
            df$`infant mortality rate` +                 df$gdp_per_capita_US_dollars + 
            df$HDI + df$`percent urban` +                df$`government effectiveness` +              df$phones + 
            df$`unemployment rate` |                     df$internet +
              df$`healthcare spending as percent of GDP` +
            df$gdp_per_capita_US_dollars + 
            df$HDI + df$`percent urban` +                df$`government effectiveness` +              df$phones + 
            df$`unemployment rate`,
            index = c("country", "year"),                model = "within")

## Warning in pdata.frame(data, index): at least one NA in at least one index dimension in resulting pdata.frame
##  to find out which, use, e.g., table(index(your_pdataframe), useNA = "ifany")

Creating a Side By Side Table

SBST3 <- list( "Pooled-OLS" = PooledOLS,
               "Fixed-Effects" = Fe_model,
               "IV model" = IVreg2,
               "IV+FE" = IV_FE2)

modelsummary(SBST3,
             fmt = 5,
             stars = TRUE,
             output = "table3.5.docx")

SBST4 <- list( "Pooled-OLS" = PooledOLS,
               "Fixed-Effects" = Fe_model)

modelsummary(SBST4,
             fmt = 5,
             stars = TRUE,
             output = "table3.4.docx")

Advanced Econometrics

Diego De Armas

2023-07-24

Install packages

Download data

Filtering data only for numeric variabels

Creating a table for summary of statistics

Creating a correlation table to identify our Independent varibles

Create histograms for our most important variables

Creating the Pooled-OLS Model

Creating the FE model

Solving the issue of the simulteiarity by adding IV.

Testing for relevance

Perfroming the F test

Creating A fixed Effects + Panel Data

IV + Fixed Effects + 2 instruments

Creating a Side By Side Table