INTRODUÇÃO

Exemplo baseado nos dados do Atlas do desenvolvimento Humano. Relação da pobreza com a renda e a desigualdade de renda.

Modelos econométricos

\[y_i = \beta_1 + \beta_2 gini_i + \beta_3 renda + u_i \] \[lny_i=\beta_1 + \beta_2 lngini_i + \beta_3 lnrenda + u_i \]

Obs.: Para visualização das referências, ver ementa da disciplina.

TRATAMENTO DA AMOSTRA

# Importando a base de dados
library(haven)
exemp5 <- read_dta("D:/dados/exemp5.dta")
View(exemp5)

# Estatísticas descritivas iniciais
summary(exemp5)

##       cod          Espacialidades          gini            renda        
##  Min.   :1100015   Length:5565        Min.   :0.2800   Min.   :  96.25  
##  1st Qu.:2512101   Class :character   1st Qu.:0.4500   1st Qu.: 281.12  
##  Median :3146206   Mode  :character   Median :0.4900   Median : 467.65  
##  Mean   :3253053                      Mean   :0.4944   Mean   : 493.61  
##  3rd Qu.:4119004                      3rd Qu.:0.5400   3rd Qu.: 650.62  
##  Max.   :5300108                      Max.   :0.8000   Max.   :2043.74  
##                                                                         
##     ext_pob           pob           regioes     
##  Min.   : 0.02   Min.   : 0.19   Min.   :1.000  
##  1st Qu.: 1.72   1st Qu.: 7.03   1st Qu.:2.000  
##  Median : 6.60   Median :18.15   Median :3.000  
##  Mean   :11.47   Mean   :23.21   Mean   :2.897  
##  3rd Qu.:19.22   3rd Qu.:38.52   3rd Qu.:4.000  
##  Max.   :69.67   Max.   :78.59   Max.   :5.000  
##  NA's   :64      NA's   :2

# Deletando os missings 
exemp5<- na.omit(exemp5)

# Criando do ln das variáveis
exemp5$lnpob=log(exemp5$pob)
exemp5$ln_ext_pob=log(exemp5$pob)
exemp5$lnrenda=log(exemp5$renda)
exemp5$lngini=log(exemp5$gini)

# Label para variável região
exemp5$regioes <- ordered(exemp5$regioes, levels = c(1,2,3,4,5), 
                        labels = c("Norte",
                             "Nordeste", "Sudeste", "Sul", "Centro-Oeste"))

ESTATÍSTICAS INICIAIS

Gráficos

# Estatísticas descritivas
summary(exemp5)

##       cod          Espacialidades          gini            renda        
##  Min.   :1100015   Length:5501        Min.   :0.2800   Min.   :  96.25  
##  1st Qu.:2510709   Class :character   1st Qu.:0.4500   1st Qu.: 279.49  
##  Median :3144201   Mode  :character   Median :0.4900   Median : 462.15  
##  Mean   :3244441                      Mean   :0.4954   Mean   : 489.28  
##  3rd Qu.:4117206                      3rd Qu.:0.5400   3rd Qu.: 643.93  
##  Max.   :5300108                      Max.   :0.8000   Max.   :2043.74  
##     ext_pob           pob                regioes         lnpob       
##  Min.   : 0.02   Min.   : 0.34   Norte       : 449   Min.   :-1.079  
##  1st Qu.: 1.72   1st Qu.: 7.21   Nordeste    :1793   1st Qu.: 1.975  
##  Median : 6.60   Median :18.70   Sudeste     :1649   Median : 2.929  
##  Mean   :11.47   Mean   :23.46   Sul         :1144   Mean   : 2.748  
##  3rd Qu.:19.22   3rd Qu.:38.66   Centro-Oeste: 466   3rd Qu.: 3.655  
##  Max.   :69.67   Max.   :78.59                       Max.   : 4.364  
##    ln_ext_pob        lnrenda          lngini       
##  Min.   :-1.079   Min.   :4.567   Min.   :-1.2730  
##  1st Qu.: 1.975   1st Qu.:5.633   1st Qu.:-0.7985  
##  Median : 2.929   Median :6.136   Median :-0.7133  
##  Mean   : 2.748   Mean   :6.072   Mean   :-0.7110  
##  3rd Qu.: 3.655   3rd Qu.:6.468   3rd Qu.:-0.6162  
##  Max.   : 4.364   Max.   :7.623   Max.   :-0.2231

# Gráficos de dispersão 

library(ggplot2)

# Pobreza x renda
g1=ggplot(exemp5, aes(x= renda, y= pob ))+
  geom_point() 
g1

# Destacando as regiões
g2=ggplot(exemp5, aes(x= renda, y= pob, 
                      label=Espacialidades, colour=regioes))+ geom_point() 
g2

# Pobreza x Desigualdade de renda
g3=ggplot(exemp5, aes(x= gini, y= pob, 
                      label=Espacialidades, colour=regioes))+ geom_point() 
g3

# Gráficos dinâmicos
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

ggplotly(g2)

ggplotly(g3)

# Histogramas


hist(exemp5$pob, col="green")

hist(exemp5$ext_pob, col="green")

hist (exemp5$gini, col="blue")

hist(exemp5$renda, col="yellow")

# Histogramas com ggplot e ggplotly

g4=ggplot(data=exemp5, aes(renda, colour=regioes)) + 
  geom_histogram()

g4

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplotly(g4)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Sumário Estatístico

# Estatísticas 
summary(exemp5)

##       cod          Espacialidades          gini            renda        
##  Min.   :1100015   Length:5501        Min.   :0.2800   Min.   :  96.25  
##  1st Qu.:2510709   Class :character   1st Qu.:0.4500   1st Qu.: 279.49  
##  Median :3144201   Mode  :character   Median :0.4900   Median : 462.15  
##  Mean   :3244441                      Mean   :0.4954   Mean   : 489.28  
##  3rd Qu.:4117206                      3rd Qu.:0.5400   3rd Qu.: 643.93  
##  Max.   :5300108                      Max.   :0.8000   Max.   :2043.74  
##     ext_pob           pob                regioes         lnpob       
##  Min.   : 0.02   Min.   : 0.34   Norte       : 449   Min.   :-1.079  
##  1st Qu.: 1.72   1st Qu.: 7.21   Nordeste    :1793   1st Qu.: 1.975  
##  Median : 6.60   Median :18.70   Sudeste     :1649   Median : 2.929  
##  Mean   :11.47   Mean   :23.46   Sul         :1144   Mean   : 2.748  
##  3rd Qu.:19.22   3rd Qu.:38.66   Centro-Oeste: 466   3rd Qu.: 3.655  
##  Max.   :69.67   Max.   :78.59                       Max.   : 4.364  
##    ln_ext_pob        lnrenda          lngini       
##  Min.   :-1.079   Min.   :4.567   Min.   :-1.2730  
##  1st Qu.: 1.975   1st Qu.:5.633   1st Qu.:-0.7985  
##  Median : 2.929   Median :6.136   Median :-0.7133  
##  Mean   : 2.748   Mean   :6.072   Mean   :-0.7110  
##  3rd Qu.: 3.655   3rd Qu.:6.468   3rd Qu.:-0.6162  
##  Max.   : 4.364   Max.   :7.623   Max.   :-0.2231

# Correlação

X<-cbind(exemp5$pob,exemp5$ext_pob, exemp5$renda, exemp5$gini)
cor(X)

##            [,1]       [,2]       [,3]       [,4]
## [1,]  1.0000000  0.9642128 -0.8525796  0.5929508
## [2,]  0.9642128  1.0000000 -0.7707564  0.6092281
## [3,] -0.8525796 -0.7707564  1.0000000 -0.2621389
## [4,]  0.5929508  0.6092281 -0.2621389  1.0000000

# Outra forma, correlação
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

dados_corr = select(exemp5, pob, ext_pob, renda, gini)
cor(dados_corr)

##                pob    ext_pob      renda       gini
## pob      1.0000000  0.9642128 -0.8525796  0.5929508
## ext_pob  0.9642128  1.0000000 -0.7707564  0.6092281
## renda   -0.8525796 -0.7707564  1.0000000 -0.2621389
## gini     0.5929508  0.6092281 -0.2621389  1.0000000

# Adicionando valor-p
library("Hmisc")

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following object is masked from 'package:plotly':
## 
##     subplot

## The following objects are masked from 'package:base':
## 
##     format.pval, units

cor_p=rcorr(as.matrix(dados_corr))
cor_p

##           pob ext_pob renda  gini
## pob      1.00    0.96 -0.85  0.59
## ext_pob  0.96    1.00 -0.77  0.61
## renda   -0.85   -0.77  1.00 -0.26
## gini     0.59    0.61 -0.26  1.00
## 
## n= 5501 
## 
## 
## P
##         pob ext_pob renda gini
## pob          0       0     0  
## ext_pob  0           0     0  
## renda    0   0             0  
## gini     0   0       0

REGRESSÕES

# Modelo lin-lin, pobreza
reg_pob<-lm(pob~gini+renda, data=exemp5)
summary(reg_pob)

## 
## Call:
## lm(formula = pob ~ gini + renda, data = exemp5)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.293  -4.750  -1.114   4.006  58.797 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) -2.997e+00  7.461e-01   -4.016 5.99e-05 ***
## gini         1.084e+02  1.358e+00   79.802  < 2e-16 ***
## renda       -5.569e-02  3.698e-04 -150.582  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.358 on 5498 degrees of freedom
## Multiple R-squared:  0.8735, Adjusted R-squared:  0.8734 
## F-statistic: 1.898e+04 on 2 and 5498 DF,  p-value: < 2.2e-16

# Modelo log-log, pobreza
reg_lnpob<-lm(log(pob)~log(gini)+log(renda), data=exemp5)
summary(reg_lnpob)

## 
## Call:
## lm(formula = log(pob) ~ log(gini) + log(renda), data = exemp5)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.12894 -0.15420  0.07106  0.21224  0.98363 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 14.269182   0.051651  276.26   <2e-16 ***
## log(gini)    2.388454   0.033994   70.26   <2e-16 ***
## log(renda)  -1.617649   0.009027 -179.21   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3125 on 5498 degrees of freedom
## Multiple R-squared:  0.9053, Adjusted R-squared:  0.9053 
## F-statistic: 2.628e+04 on 2 and 5498 DF,  p-value: < 2.2e-16

# Gerando outcome predito e o resíduo
exemp5$ln_pob_estim=fitted(reg_lnpob)
exemp5$res1<-residuals(reg_lnpob)
summary(exemp5)

##       cod          Espacialidades          gini            renda        
##  Min.   :1100015   Length:5501        Min.   :0.2800   Min.   :  96.25  
##  1st Qu.:2510709   Class :character   1st Qu.:0.4500   1st Qu.: 279.49  
##  Median :3144201   Mode  :character   Median :0.4900   Median : 462.15  
##  Mean   :3244441                      Mean   :0.4954   Mean   : 489.28  
##  3rd Qu.:4117206                      3rd Qu.:0.5400   3rd Qu.: 643.93  
##  Max.   :5300108                      Max.   :0.8000   Max.   :2043.74  
##     ext_pob           pob                regioes         lnpob       
##  Min.   : 0.02   Min.   : 0.34   Norte       : 449   Min.   :-1.079  
##  1st Qu.: 1.72   1st Qu.: 7.21   Nordeste    :1793   1st Qu.: 1.975  
##  Median : 6.60   Median :18.70   Sudeste     :1649   Median : 2.929  
##  Mean   :11.47   Mean   :23.46   Sul         :1144   Mean   : 2.748  
##  3rd Qu.:19.22   3rd Qu.:38.66   Centro-Oeste: 466   3rd Qu.: 3.655  
##  Max.   :69.67   Max.   :78.59                       Max.   : 4.364  
##    ln_ext_pob        lnrenda          lngini         ln_pob_estim   
##  Min.   :-1.079   Min.   :4.567   Min.   :-1.2730   Min.   :0.4038  
##  1st Qu.: 1.975   1st Qu.:5.633   1st Qu.:-0.7985   1st Qu.:1.9497  
##  Median : 2.929   Median :6.136   Median :-0.7133   Median :2.6337  
##  Mean   : 2.748   Mean   :6.072   Mean   :-0.7110   Mean   :2.7481  
##  3rd Qu.: 3.655   3rd Qu.:6.468   3rd Qu.:-0.6162   3rd Qu.:3.5505  
##  Max.   : 4.364   Max.   :7.623   Max.   :-0.2231   Max.   :5.8892  
##       res1         
##  Min.   :-2.12894  
##  1st Qu.:-0.15420  
##  Median : 0.07106  
##  Mean   : 0.00000  
##  3rd Qu.: 0.21224  
##  Max.   : 0.98363

# Modelo log-log, extrema pobreza
reg_ln_ext_pob<-lm(log(ext_pob)~log(gini)+log(renda), data=exemp5)
summary(reg_ln_ext_pob)

## 
## Call:
## lm(formula = log(ext_pob) ~ log(gini) + log(renda), data = exemp5)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2424 -0.2231  0.0656  0.2969  1.9771 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 17.62264    0.08637  204.03   <2e-16 ***
## log(gini)    3.71846    0.05685   65.41   <2e-16 ***
## log(renda)  -2.19081    0.01509 -145.14   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5226 on 5498 degrees of freedom
## Multiple R-squared:  0.8696, Adjusted R-squared:  0.8696 
## F-statistic: 1.833e+04 on 2 and 5498 DF,  p-value: < 2.2e-16

# Organizando os resultados em tabelas
library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer

stargazer(reg_pob, reg_lnpob,reg_ln_ext_pob, type="text")

## 
## =========================================================================
##                                            Dependent variable:           
##                                 -----------------------------------------
##                                      pob        log(pob)    log(ext_pob) 
##                                      (1)           (2)           (3)     
## -------------------------------------------------------------------------
## gini                             108.382***                              
##                                    (1.358)                               
##                                                                          
## renda                             -0.056***                              
##                                   (0.0004)                               
##                                                                          
## log(gini)                                       2.388***      3.718***   
##                                                  (0.034)       (0.057)   
##                                                                          
## log(renda)                                      -1.618***     -2.191***  
##                                                  (0.009)       (0.015)   
##                                                                          
## Constant                          -2.997***     14.269***     17.623***  
##                                    (0.746)       (0.052)       (0.086)   
##                                                                          
## -------------------------------------------------------------------------
## Observations                        5,501         5,501         5,501    
## R2                                  0.873         0.905         0.870    
## Adjusted R2                         0.873         0.905         0.870    
## Residual Std. Error (df = 5498)     6.358         0.313         0.523    
## F Statistic (df = 2; 5498)      18,975.680*** 26,283.210*** 18,334.530***
## =========================================================================
## Note:                                         *p<0.1; **p<0.05; ***p<0.01

# Regressão baseados em subamostra 

reg_lnpob_NE<-lm(log(pob)~log(gini)+log(renda), 
                 data=subset (exemp5, regioes=="Nordeste"))
summary(reg_lnpob_NE)

## 
## Call:
## lm(formula = log(pob) ~ log(gini) + log(renda), data = subset(exemp5, 
##     regioes == "Nordeste"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65413 -0.03148  0.01552  0.05140  0.37028 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.609759   0.041764  230.09   <2e-16 ***
## log(gini)    1.042872   0.021562   48.37   <2e-16 ***
## log(renda)  -0.940743   0.007073 -133.01   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08538 on 1790 degrees of freedom
## Multiple R-squared:  0.9185, Adjusted R-squared:  0.9184 
## F-statistic: 1.008e+04 on 2 and 1790 DF,  p-value: < 2.2e-16

MÉTODOS QUANTITATIVOS

INTRODUÇÃO A MODELOS DE REGRESSÃO SIMPLES E MÚLTIPLA