Exemplo baseado nos dados do Atlas do desenvolvimento Humano. Relação da pobreza com a renda e a desigualdade de renda.
\[y_i = \beta_1 + \beta_2 gini_i + \beta_3 renda + u_i \] \[lny_i=\beta_1 + \beta_2 lngini_i + \beta_3 lnrenda + u_i \]
Obs.: Para visualização das referências, ver ementa da disciplina.
# Importando a base de dados
library(haven)
exemp5 <- read_dta("D:/dados/exemp5.dta")
View(exemp5)
# Estatísticas descritivas iniciais
summary(exemp5)
## cod Espacialidades gini renda
## Min. :1100015 Length:5565 Min. :0.2800 Min. : 96.25
## 1st Qu.:2512101 Class :character 1st Qu.:0.4500 1st Qu.: 281.12
## Median :3146206 Mode :character Median :0.4900 Median : 467.65
## Mean :3253053 Mean :0.4944 Mean : 493.61
## 3rd Qu.:4119004 3rd Qu.:0.5400 3rd Qu.: 650.62
## Max. :5300108 Max. :0.8000 Max. :2043.74
##
## ext_pob pob regioes
## Min. : 0.02 Min. : 0.19 Min. :1.000
## 1st Qu.: 1.72 1st Qu.: 7.03 1st Qu.:2.000
## Median : 6.60 Median :18.15 Median :3.000
## Mean :11.47 Mean :23.21 Mean :2.897
## 3rd Qu.:19.22 3rd Qu.:38.52 3rd Qu.:4.000
## Max. :69.67 Max. :78.59 Max. :5.000
## NA's :64 NA's :2
# Deletando os missings
exemp5<- na.omit(exemp5)
# Criando do ln das variáveis
exemp5$lnpob=log(exemp5$pob)
exemp5$ln_ext_pob=log(exemp5$pob)
exemp5$lnrenda=log(exemp5$renda)
exemp5$lngini=log(exemp5$gini)
# Label para variável região
exemp5$regioes <- ordered(exemp5$regioes, levels = c(1,2,3,4,5),
labels = c("Norte",
"Nordeste", "Sudeste", "Sul", "Centro-Oeste"))
# Estatísticas descritivas
summary(exemp5)
## cod Espacialidades gini renda
## Min. :1100015 Length:5501 Min. :0.2800 Min. : 96.25
## 1st Qu.:2510709 Class :character 1st Qu.:0.4500 1st Qu.: 279.49
## Median :3144201 Mode :character Median :0.4900 Median : 462.15
## Mean :3244441 Mean :0.4954 Mean : 489.28
## 3rd Qu.:4117206 3rd Qu.:0.5400 3rd Qu.: 643.93
## Max. :5300108 Max. :0.8000 Max. :2043.74
## ext_pob pob regioes lnpob
## Min. : 0.02 Min. : 0.34 Norte : 449 Min. :-1.079
## 1st Qu.: 1.72 1st Qu.: 7.21 Nordeste :1793 1st Qu.: 1.975
## Median : 6.60 Median :18.70 Sudeste :1649 Median : 2.929
## Mean :11.47 Mean :23.46 Sul :1144 Mean : 2.748
## 3rd Qu.:19.22 3rd Qu.:38.66 Centro-Oeste: 466 3rd Qu.: 3.655
## Max. :69.67 Max. :78.59 Max. : 4.364
## ln_ext_pob lnrenda lngini
## Min. :-1.079 Min. :4.567 Min. :-1.2730
## 1st Qu.: 1.975 1st Qu.:5.633 1st Qu.:-0.7985
## Median : 2.929 Median :6.136 Median :-0.7133
## Mean : 2.748 Mean :6.072 Mean :-0.7110
## 3rd Qu.: 3.655 3rd Qu.:6.468 3rd Qu.:-0.6162
## Max. : 4.364 Max. :7.623 Max. :-0.2231
# Gráficos de dispersão
library(ggplot2)
# Pobreza x renda
g1=ggplot(exemp5, aes(x= renda, y= pob ))+
geom_point()
g1
# Destacando as regiões
g2=ggplot(exemp5, aes(x= renda, y= pob,
label=Espacialidades, colour=regioes))+ geom_point()
g2
# Pobreza x Desigualdade de renda
g3=ggplot(exemp5, aes(x= gini, y= pob,
label=Espacialidades, colour=regioes))+ geom_point()
g3
# Gráficos dinâmicos
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplotly(g2)
ggplotly(g3)
# Histogramas
hist(exemp5$pob, col="green")
hist(exemp5$ext_pob, col="green")
hist (exemp5$gini, col="blue")
hist(exemp5$renda, col="yellow")
# Histogramas com ggplot e ggplotly
g4=ggplot(data=exemp5, aes(renda, colour=regioes)) +
geom_histogram()
g4
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplotly(g4)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Estatísticas
summary(exemp5)
## cod Espacialidades gini renda
## Min. :1100015 Length:5501 Min. :0.2800 Min. : 96.25
## 1st Qu.:2510709 Class :character 1st Qu.:0.4500 1st Qu.: 279.49
## Median :3144201 Mode :character Median :0.4900 Median : 462.15
## Mean :3244441 Mean :0.4954 Mean : 489.28
## 3rd Qu.:4117206 3rd Qu.:0.5400 3rd Qu.: 643.93
## Max. :5300108 Max. :0.8000 Max. :2043.74
## ext_pob pob regioes lnpob
## Min. : 0.02 Min. : 0.34 Norte : 449 Min. :-1.079
## 1st Qu.: 1.72 1st Qu.: 7.21 Nordeste :1793 1st Qu.: 1.975
## Median : 6.60 Median :18.70 Sudeste :1649 Median : 2.929
## Mean :11.47 Mean :23.46 Sul :1144 Mean : 2.748
## 3rd Qu.:19.22 3rd Qu.:38.66 Centro-Oeste: 466 3rd Qu.: 3.655
## Max. :69.67 Max. :78.59 Max. : 4.364
## ln_ext_pob lnrenda lngini
## Min. :-1.079 Min. :4.567 Min. :-1.2730
## 1st Qu.: 1.975 1st Qu.:5.633 1st Qu.:-0.7985
## Median : 2.929 Median :6.136 Median :-0.7133
## Mean : 2.748 Mean :6.072 Mean :-0.7110
## 3rd Qu.: 3.655 3rd Qu.:6.468 3rd Qu.:-0.6162
## Max. : 4.364 Max. :7.623 Max. :-0.2231
# Correlação
X<-cbind(exemp5$pob,exemp5$ext_pob, exemp5$renda, exemp5$gini)
cor(X)
## [,1] [,2] [,3] [,4]
## [1,] 1.0000000 0.9642128 -0.8525796 0.5929508
## [2,] 0.9642128 1.0000000 -0.7707564 0.6092281
## [3,] -0.8525796 -0.7707564 1.0000000 -0.2621389
## [4,] 0.5929508 0.6092281 -0.2621389 1.0000000
# Outra forma, correlação
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dados_corr = select(exemp5, pob, ext_pob, renda, gini)
cor(dados_corr)
## pob ext_pob renda gini
## pob 1.0000000 0.9642128 -0.8525796 0.5929508
## ext_pob 0.9642128 1.0000000 -0.7707564 0.6092281
## renda -0.8525796 -0.7707564 1.0000000 -0.2621389
## gini 0.5929508 0.6092281 -0.2621389 1.0000000
# Adicionando valor-p
library("Hmisc")
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following object is masked from 'package:plotly':
##
## subplot
## The following objects are masked from 'package:base':
##
## format.pval, units
cor_p=rcorr(as.matrix(dados_corr))
cor_p
## pob ext_pob renda gini
## pob 1.00 0.96 -0.85 0.59
## ext_pob 0.96 1.00 -0.77 0.61
## renda -0.85 -0.77 1.00 -0.26
## gini 0.59 0.61 -0.26 1.00
##
## n= 5501
##
##
## P
## pob ext_pob renda gini
## pob 0 0 0
## ext_pob 0 0 0
## renda 0 0 0
## gini 0 0 0
# Modelo lin-lin, pobreza
reg_pob<-lm(pob~gini+renda, data=exemp5)
summary(reg_pob)
##
## Call:
## lm(formula = pob ~ gini + renda, data = exemp5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.293 -4.750 -1.114 4.006 58.797
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.997e+00 7.461e-01 -4.016 5.99e-05 ***
## gini 1.084e+02 1.358e+00 79.802 < 2e-16 ***
## renda -5.569e-02 3.698e-04 -150.582 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.358 on 5498 degrees of freedom
## Multiple R-squared: 0.8735, Adjusted R-squared: 0.8734
## F-statistic: 1.898e+04 on 2 and 5498 DF, p-value: < 2.2e-16
# Modelo log-log, pobreza
reg_lnpob<-lm(log(pob)~log(gini)+log(renda), data=exemp5)
summary(reg_lnpob)
##
## Call:
## lm(formula = log(pob) ~ log(gini) + log(renda), data = exemp5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.12894 -0.15420 0.07106 0.21224 0.98363
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.269182 0.051651 276.26 <2e-16 ***
## log(gini) 2.388454 0.033994 70.26 <2e-16 ***
## log(renda) -1.617649 0.009027 -179.21 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3125 on 5498 degrees of freedom
## Multiple R-squared: 0.9053, Adjusted R-squared: 0.9053
## F-statistic: 2.628e+04 on 2 and 5498 DF, p-value: < 2.2e-16
# Gerando outcome predito e o resíduo
exemp5$ln_pob_estim=fitted(reg_lnpob)
exemp5$res1<-residuals(reg_lnpob)
summary(exemp5)
## cod Espacialidades gini renda
## Min. :1100015 Length:5501 Min. :0.2800 Min. : 96.25
## 1st Qu.:2510709 Class :character 1st Qu.:0.4500 1st Qu.: 279.49
## Median :3144201 Mode :character Median :0.4900 Median : 462.15
## Mean :3244441 Mean :0.4954 Mean : 489.28
## 3rd Qu.:4117206 3rd Qu.:0.5400 3rd Qu.: 643.93
## Max. :5300108 Max. :0.8000 Max. :2043.74
## ext_pob pob regioes lnpob
## Min. : 0.02 Min. : 0.34 Norte : 449 Min. :-1.079
## 1st Qu.: 1.72 1st Qu.: 7.21 Nordeste :1793 1st Qu.: 1.975
## Median : 6.60 Median :18.70 Sudeste :1649 Median : 2.929
## Mean :11.47 Mean :23.46 Sul :1144 Mean : 2.748
## 3rd Qu.:19.22 3rd Qu.:38.66 Centro-Oeste: 466 3rd Qu.: 3.655
## Max. :69.67 Max. :78.59 Max. : 4.364
## ln_ext_pob lnrenda lngini ln_pob_estim
## Min. :-1.079 Min. :4.567 Min. :-1.2730 Min. :0.4038
## 1st Qu.: 1.975 1st Qu.:5.633 1st Qu.:-0.7985 1st Qu.:1.9497
## Median : 2.929 Median :6.136 Median :-0.7133 Median :2.6337
## Mean : 2.748 Mean :6.072 Mean :-0.7110 Mean :2.7481
## 3rd Qu.: 3.655 3rd Qu.:6.468 3rd Qu.:-0.6162 3rd Qu.:3.5505
## Max. : 4.364 Max. :7.623 Max. :-0.2231 Max. :5.8892
## res1
## Min. :-2.12894
## 1st Qu.:-0.15420
## Median : 0.07106
## Mean : 0.00000
## 3rd Qu.: 0.21224
## Max. : 0.98363
# Modelo log-log, extrema pobreza
reg_ln_ext_pob<-lm(log(ext_pob)~log(gini)+log(renda), data=exemp5)
summary(reg_ln_ext_pob)
##
## Call:
## lm(formula = log(ext_pob) ~ log(gini) + log(renda), data = exemp5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2424 -0.2231 0.0656 0.2969 1.9771
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.62264 0.08637 204.03 <2e-16 ***
## log(gini) 3.71846 0.05685 65.41 <2e-16 ***
## log(renda) -2.19081 0.01509 -145.14 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5226 on 5498 degrees of freedom
## Multiple R-squared: 0.8696, Adjusted R-squared: 0.8696
## F-statistic: 1.833e+04 on 2 and 5498 DF, p-value: < 2.2e-16
# Organizando os resultados em tabelas
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
stargazer(reg_pob, reg_lnpob,reg_ln_ext_pob, type="text")
##
## =========================================================================
## Dependent variable:
## -----------------------------------------
## pob log(pob) log(ext_pob)
## (1) (2) (3)
## -------------------------------------------------------------------------
## gini 108.382***
## (1.358)
##
## renda -0.056***
## (0.0004)
##
## log(gini) 2.388*** 3.718***
## (0.034) (0.057)
##
## log(renda) -1.618*** -2.191***
## (0.009) (0.015)
##
## Constant -2.997*** 14.269*** 17.623***
## (0.746) (0.052) (0.086)
##
## -------------------------------------------------------------------------
## Observations 5,501 5,501 5,501
## R2 0.873 0.905 0.870
## Adjusted R2 0.873 0.905 0.870
## Residual Std. Error (df = 5498) 6.358 0.313 0.523
## F Statistic (df = 2; 5498) 18,975.680*** 26,283.210*** 18,334.530***
## =========================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
# Regressão baseados em subamostra
reg_lnpob_NE<-lm(log(pob)~log(gini)+log(renda),
data=subset (exemp5, regioes=="Nordeste"))
summary(reg_lnpob_NE)
##
## Call:
## lm(formula = log(pob) ~ log(gini) + log(renda), data = subset(exemp5,
## regioes == "Nordeste"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65413 -0.03148 0.01552 0.05140 0.37028
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.609759 0.041764 230.09 <2e-16 ***
## log(gini) 1.042872 0.021562 48.37 <2e-16 ***
## log(renda) -0.940743 0.007073 -133.01 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08538 on 1790 degrees of freedom
## Multiple R-squared: 0.9185, Adjusted R-squared: 0.9184
## F-statistic: 1.008e+04 on 2 and 1790 DF, p-value: < 2.2e-16