Introdução

Apresentaremos alguns gráficos e estimações baseadas na primeira parte da disciplina de econometria.

Gráfico de Dispersão

Exemplo da relação entre pobreza e renda utilizando dados dos municípios de Alagoas do ano 2010:

aula<-url("https://sites.google.com/site/andersonmoreiraadossantos/base-de-dados/aula.RData")
load(aula)
library("scatterD3")

library(ggplot2)

ggplot(pobreza, aes(x= renda, y= pobreza ))+
  geom_point() 

ggplot(pobreza, aes(x= renda, y= pobreza,  label=mun))+
  geom_point() +
  geom_text(aes(label=ifelse(pobreza==47.02,as.character(mun),'')),hjust=0,vjust=0)

ggplot(pobreza, aes(x= renda, y= pobreza,  label=mun, colour=meso))+
  geom_point() +
  geom_text(aes(label=ifelse(pobreza==47.02,as.character(mun),'')),hjust=0,vjust=0)

Histogramas e dispersão com o comando plot

Exemplos de histogramas das variáveis renda, pobreza e índice de Gini.

hist(pobreza$pobreza, col="green")

hist (pobreza$gini, col="blue")

hist(pobreza$renda, col="yellow")

summary(pobreza)
##      mun                 gini            renda          pobreza     
##  Length:102         Min.   :0.4200   Min.   :151.6   Min.   :15.57  
##  Class :character   1st Qu.:0.4900   1st Qu.:204.6   1st Qu.:40.60  
##  Mode  :character   Median :0.5300   Median :233.4   Median :46.94  
##                     Mean   :0.5344   Mean   :251.2   Mean   :45.94  
##                     3rd Qu.:0.5675   3rd Qu.:269.0   3rd Qu.:51.48  
##                     Max.   :0.6700   Max.   :792.5   Max.   :67.57  
##     micro               meso          
##  Length:102         Length:102        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
ggplot(pobreza, aes(x= renda, y= pobreza ))+
  geom_point()  +
  stat_smooth(method = "lm", col = "blue")

ggplot(pobreza, aes(x= gini, y= pobreza ))+
  geom_point()  +
stat_smooth(method = "lm", col = "green")

Modelo de regressão- MQO: lin-lin e log-log

Estimações baseadas nos dados dos gráficos que já foram observados.

## Modelo lin-lin
reg_pob<-lm(pobreza~gini+renda, data=pobreza)
summary(reg_pob)
## 
## Call:
## lm(formula = pobreza ~ gini + renda, data = pobreza)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.5786 -1.7780 -0.4004  1.8718 18.8251 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 23.288127   3.574381   6.515 3.05e-09 ***
## gini        92.797415   6.535587  14.199  < 2e-16 ***
## renda       -0.107257   0.004177 -25.677  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.309 on 99 degrees of freedom
## Multiple R-squared:  0.8904, Adjusted R-squared:  0.8882 
## F-statistic:   402 on 2 and 99 DF,  p-value: < 2.2e-16
## Modelo log-log
reg_lnpob<-lm(log(pobreza)~log(gini)+log(renda), data=pobreza)
summary(reg_lnpob)
## 
## Call:
## lm(formula = log(pobreza) ~ log(gini) + log(renda), data = pobreza)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.153560 -0.039385  0.003969  0.033929  0.117157 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.24945    0.13559   68.22   <2e-16 ***
## log(gini)    0.77194    0.06272   12.31   <2e-16 ***
## log(renda)  -0.90347    0.02357  -38.33   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05867 on 99 degrees of freedom
## Multiple R-squared:  0.9423, Adjusted R-squared:  0.9412 
## F-statistic:   809 on 2 and 99 DF,  p-value: < 2.2e-16
## Modelo log-log gerando o ln das variáveis

pobreza$lnpobreza<-log(pobreza$pobreza)
pobreza$lngini<-log(pobreza$gini)
pobreza$lnrenda<-log(pobreza$renda)
reg_logpobreza2<-lm(lnpobreza~lngini+lnrenda, data=pobreza)
summary(reg_logpobreza2)
## 
## Call:
## lm(formula = lnpobreza ~ lngini + lnrenda, data = pobreza)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.153560 -0.039385  0.003969  0.033929  0.117157 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.24945    0.13559   68.22   <2e-16 ***
## lngini       0.77194    0.06272   12.31   <2e-16 ***
## lnrenda     -0.90347    0.02357  -38.33   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05867 on 99 degrees of freedom
## Multiple R-squared:  0.9423, Adjusted R-squared:  0.9412 
## F-statistic:   809 on 2 and 99 DF,  p-value: < 2.2e-16
## Organizando os resultados em tabelas
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2015). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2. http://CRAN.R-project.org/package=stargazer
stargazer(reg_pob, reg_lnpob, type="text")
## 
## ==========================================================
##                                   Dependent variable:     
##                               ----------------------------
##                                  pobreza     log(pobreza) 
##                                    (1)            (2)     
## ----------------------------------------------------------
## gini                            92.797***                 
##                                  (6.536)                  
##                                                           
## renda                           -0.107***                 
##                                  (0.004)                  
##                                                           
## log(gini)                                      0.772***   
##                                                 (0.063)   
##                                                           
## log(renda)                                     -0.903***  
##                                                 (0.024)   
##                                                           
## Constant                        23.288***      9.249***   
##                                  (3.574)        (0.136)   
##                                                           
## ----------------------------------------------------------
## Observations                       102            102     
## R2                                0.890          0.942    
## Adjusted R2                       0.888          0.941    
## Residual Std. Error (df = 99)     3.309          0.059    
## F Statistic (df = 2; 99)        402.031***    809.036***  
## ==========================================================
## Note:                          *p<0.1; **p<0.05; ***p<0.01

Problema de Heterocedasticidade

library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
# Teste de Breusch-Pagan 
reg_lnpob
## 
## Call:
## lm(formula = log(pobreza) ~ log(gini) + log(renda), data = pobreza)
## 
## Coefficients:
## (Intercept)    log(gini)   log(renda)  
##      9.2495       0.7719      -0.9035
bptest(reg_lnpob)
## 
##  studentized Breusch-Pagan test
## 
## data:  reg_lnpob
## BP = 14.067, df = 2, p-value = 0.0008819
#Regressão robusta 
library(car)
coeftest(reg_lnpob, vcov=hccm)
## 
## t test of coefficients:
## 
##              Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)  9.249452   0.228803  40.425 < 2.2e-16 ***
## log(gini)    0.771944   0.074498  10.362 < 2.2e-16 ***
## log(renda)  -0.903467   0.042561 -21.227 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# MQG Considerando que Var(u)=sigma2 * lnrenda

mqg_lnpob<-lm(log(pobreza)~log(gini)+log(renda), data=pobreza, weight=1/log(renda))
summary(mqg_lnpob)
## 
## Call:
## lm(formula = log(pobreza) ~ log(gini) + log(renda), data = pobreza, 
##     weights = 1/log(renda))
## 
## Weighted Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.065061 -0.016610  0.002243  0.015028  0.048804 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.19400    0.13640   67.40   <2e-16 ***
## log(gini)    0.76488    0.06193   12.35   <2e-16 ***
## log(renda)  -0.89418    0.02401  -37.25   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02483 on 99 degrees of freedom
## Multiple R-squared:  0.9405, Adjusted R-squared:  0.9393 
## F-statistic: 782.5 on 2 and 99 DF,  p-value: < 2.2e-16

Modelos de Equação salarial, exemplo do Wooldridge

library(car)


#Interceptos diferentes

reg_lnw<-lm(log(w)~educ+ exper + I(exper^2) + mulher, data=salario)
summary(reg_lnw)
## 
## Call:
## lm(formula = log(w) ~ educ + exper + I(exper^2) + mulher, data = salario)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.80836 -0.27728 -0.01813  0.25637  1.23896 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.3904831  0.1022096   3.820 0.000149 ***
## educ         0.0841361  0.0069568  12.094  < 2e-16 ***
## exper        0.0389100  0.0048235   8.067 5.00e-15 ***
## I(exper^2)  -0.0006860  0.0001074  -6.389 3.71e-10 ***
## mulher      -0.3371868  0.0363214  -9.283  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4134 on 521 degrees of freedom
## Multiple R-squared:  0.3996, Adjusted R-squared:  0.395 
## F-statistic: 86.69 on 4 and 521 DF,  p-value: < 2.2e-16
# Teste F beta_educ*mulher=0 e beta_mulher=0

library(car)
myH0<-c("mulher=0", "educ=0")

linearHypothesis(reg_lnw, myH0)
## Linear hypothesis test
## 
## Hypothesis:
## mulher = 0
## educ = 0
## 
## Model 1: restricted model
## Model 2: log(w) ~ educ + exper + I(exper^2) + mulher
## 
##   Res.Df     RSS Df Sum of Sq      F    Pr(>F)    
## 1    523 132.903                                  
## 2    521  89.059  2    43.845 128.25 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Modelo com Interceptos e inclinações diferentes

reg_lnw2<-lm(log(w)~educ+ exper + I(exper^2) + mulher + I(mulher*educ), data=salario)
summary(reg_lnw2)
## 
## Call:
## lm(formula = log(w) ~ educ + exper + I(exper^2) + mulher + I(mulher * 
##     educ), data = salario)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.80847 -0.27707 -0.01697  0.25645  1.24113 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       0.3873644  0.1227335   3.156  0.00169 ** 
## educ              0.0843801  0.0087540   9.639  < 2e-16 ***
## exper             0.0389047  0.0048295   8.056 5.44e-15 ***
## I(exper^2)       -0.0006858  0.0001076  -6.377 4.00e-10 ***
## mulher           -0.3294333  0.1724333  -1.910  0.05662 .  
## I(mulher * educ) -0.0006201  0.0134809  -0.046  0.96333    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4138 on 520 degrees of freedom
## Multiple R-squared:  0.3996, Adjusted R-squared:  0.3938 
## F-statistic: 69.22 on 5 and 520 DF,  p-value: < 2.2e-16