José Gabriel Usiña Mogro A00831435
Lorena Villarreal Vega A01720802
Ximena Solís Islas A00831371
if(!require('WDI')){
install.packages("WDI")
}
library(WDI)
if(!require('wbstats')){
install.packages("wbstats")
}
library(wbstats)
if(!require('gplots')){
install.packages("gplots")
}
library(gplots)
if(!require('plm')){
install.packages("plm")
}
library(plm)
library(tidyverse)
library(dplyr)
library(tidyr)
library(zoo)
gdp_data <- wb_data(country=c("MX","EC","CA"),indicator = "NY.GDP.PCAP.CD",
start_date=2013,end_date=2023)
panel <- select(gdp_data,country,date,NY.GDP.PCAP.CD)
education <- wb_data(country=c("MX","ES","BR"),
indicator = c("NY.GDP.PCAP.CD",
"SL.TLF.TOTL.IN",
"SE.ADT.LITR.ZS",
"SE.SEC.NENR"),start_date=2007,end_date=2017)
education <- na.locf(education)
education <- pdata.frame(education,index=c('country','date'))
plotmeans(NY.GDP.PCAP.CD~date,
data=education,
main="Heterogeneidad entre años",
barcol="red",
xlab="Año")
plotmeans(NY.GDP.PCAP.CD~country,
data=education,
main="Heterogeneidad entre países",
barcol="blue",
xlab="País")
En relación a los años, la línea no tiene muchos picos, es prácticamente plana.En cuanto a los países, notamos picos grandes entre país y país.
Están desfasados, todos tienen diferente largo.
La heterogeneidad se refiere a la “existencia de efectos latentes no observables específicos de cada agente/individuo” (De la Rosa). Son dimensiones características de cada persona, empresa, ciudad, país o unidad que no pueden ser observadas por el econometrista (Bonhomme y Manresa).
En las gráficas podemos observar heterogeneidad entre países, lo cual, si bien no es deseable, es lo esperado de los datos panel puesto que los países son diferentes entre sí. Actualmente, existen diversas técnicas para disminuir el sesgo de la heterogeneidad en modelos predictivos.
De la Rosa, Carlos. “Introducción a modelos de datos panel.” Universidad de Valladolid, Junio 2016.
Bonhomme, Stéphane, and Manresa, Elena. “Grouped Patterns of Heterogeneity in Panel Data.” Econometrica 83, 3 (May 2015): 1147–1184 © 2015 The Econometric Society
pooled <- plm(NY.GDP.PCAP.CD~SL.TLF.TOTL.IN+SE.ADT.LITR.ZS+SE.SEC.NENR,
data=education,model='pooling')
summary(pooled)
## Pooling Model
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SL.TLF.TOTL.IN + SE.ADT.LITR.ZS +
## SE.SEC.NENR, data = education, model = "pooling")
##
## Balanced Panel: n = 3, T = 11, N = 33
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -7328.91 -1651.22 541.96 1744.09 6534.24
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 8.7965e+04 9.1377e+04 0.9627 0.343676
## SL.TLF.TOTL.IN -2.2229e-04 6.9658e-05 -3.1912 0.003394 **
## SE.ADT.LITR.ZS -1.4369e+03 1.0642e+03 -1.3502 0.187401
## SE.SEC.NENR 9.4465e+02 1.8291e+02 5.1646 1.605e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 2985500000
## Residual Sum of Squares: 372650000
## R-Squared: 0.87518
## Adj. R-Squared: 0.86227
## F-statistic: 67.7805 on 3 and 29 DF, p-value: 3.259e-13
within <- plm(NY.GDP.PCAP.CD~SL.TLF.TOTL.IN+SE.ADT.LITR.ZS+SE.SEC.NENR,
data=education,model='within')
summary(within)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SL.TLF.TOTL.IN + SE.ADT.LITR.ZS +
## SE.SEC.NENR, data = education, model = "within")
##
## Balanced Panel: n = 3, T = 11, N = 33
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -4005.00 -1548.50 -102.92 1321.57 5256.50
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## SL.TLF.TOTL.IN 2.6126e-04 3.9866e-04 0.6553 0.5178
## SE.ADT.LITR.ZS -7.7397e+02 1.4753e+03 -0.5246 0.6041
## SE.SEC.NENR -5.3986e+01 1.7789e+02 -0.3035 0.7639
##
## Total Sum of Squares: 129880000
## Residual Sum of Squares: 127630000
## R-Squared: 0.017321
## Adj. R-Squared: -0.16466
## F-statistic: 0.158636 on 3 and 27 DF, p-value: 0.92319
pFtest(within,pooled)
##
## F test for individual effects
##
## data: NY.GDP.PCAP.CD ~ SL.TLF.TOTL.IN + SE.ADT.LITR.ZS + SE.SEC.NENR
## F = 25.915, df1 = 2, df2 = 27, p-value = 5.225e-07
## alternative hypothesis: significant effects
walhus <- plm(NY.GDP.PCAP.CD~SE.ADT.LITR.ZS+SE.SEC.NENR,
data=education,model='random',random.method = 'walhus')
summary(walhus)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SE.ADT.LITR.ZS + SE.SEC.NENR,
## data = education, model = "random", random.method = "walhus")
##
## Balanced Panel: n = 3, T = 11, N = 33
##
## Effects:
## var std.dev share
## idiosyncratic 1.432e+07 3.784e+03 0.939
## individual 9.358e+05 9.674e+02 0.061
## theta: 0.2372
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -7715.66 -2968.88 554.57 3025.89 8235.17
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) -192550.40 36858.45 -5.2240 1.751e-07 ***
## SE.ADT.LITR.ZS 1890.82 487.73 3.8768 0.0001059 ***
## SE.SEC.NENR 378.34 159.89 2.3663 0.0179687 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1791400000
## Residual Sum of Squares: 457960000
## R-Squared: 0.74435
## Adj. R-Squared: 0.7273
## Chisq: 87.3466 on 2 DF, p-value: < 2.22e-16
amemiya <- plm(NY.GDP.PCAP.CD~SL.TLF.TOTL.IN+SE.ADT.LITR.ZS+SE.SEC.NENR,
data=education,model='random',random.method = 'amemiya')
summary(amemiya)
## Oneway (individual) effect Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SL.TLF.TOTL.IN + SE.ADT.LITR.ZS +
## SE.SEC.NENR, data = education, model = "random", random.method = "amemiya")
##
## Balanced Panel: n = 3, T = 11, N = 33
##
## Effects:
## var std.dev share
## idiosyncratic 4254488 2063 0.012
## individual 353359254 18798 0.988
## theta: 0.9669
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -3867.93 -1354.39 -210.04 1068.01 5952.32
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 3.8539e+03 8.2772e+04 0.0466 0.9629
## SL.TLF.TOTL.IN -3.4781e-05 2.3228e-04 -0.1497 0.8810
## SE.ADT.LITR.ZS 1.8101e+02 1.0222e+03 0.1771 0.8594
## SE.SEC.NENR -2.6007e+01 1.7298e+02 -0.1503 0.8805
##
## Total Sum of Squares: 133010000
## Residual Sum of Squares: 132730000
## R-Squared: 0.0020698
## Adj. R-Squared: -0.10116
## Chisq: 0.06015 on 3 DF, p-value: 0.99615
nerlove <- plm(NY.GDP.PCAP.CD~SL.TLF.TOTL.IN+SE.ADT.LITR.ZS+SE.SEC.NENR,
data=education,model='random',random.method = 'nerlove')
summary(nerlove)
## Oneway (individual) effect Random Effect Model
## (Nerlove's transformation)
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SL.TLF.TOTL.IN + SE.ADT.LITR.ZS +
## SE.SEC.NENR, data = education, model = "random", random.method = "nerlove")
##
## Balanced Panel: n = 3, T = 11, N = 33
##
## Effects:
## var std.dev share
## idiosyncratic 3867716 1967 0.007
## individual 530619038 23035 0.993
## theta: 0.9743
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -3843.01 -1377.66 -170.89 1165.24 5874.98
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.7545e+04 8.9844e+04 0.1953 0.8452
## SL.TLF.TOTL.IN 1.9364e-05 2.6756e-04 0.0724 0.9423
## SE.ADT.LITR.ZS 8.4901e+00 1.1083e+03 0.0077 0.9939
## SE.SEC.NENR -3.2044e+01 1.7265e+02 -0.1856 0.8528
##
## Total Sum of Squares: 131780000
## Residual Sum of Squares: 131570000
## R-Squared: 0.0015528
## Adj. R-Squared: -0.10173
## Chisq: 0.0451018 on 3 DF, p-value: 0.99749
phtest(nerlove,within)
##
## Hausman Test
##
## data: NY.GDP.PCAP.CD ~ SL.TLF.TOTL.IN + SE.ADT.LITR.ZS + SE.SEC.NENR
## chisq = 0.67689, df = 3, p-value = 0.8786
## alternative hypothesis: one model is inconsistent
El modelo seleccionado fue el modelo de efectos aleatorios (Walhus), puesto que tiene mayor R2 y menor error. Además, su p-value es significativo, lo que nos indica que el modelo es significativo para predecir la variable endógena.
library(dplyr)
library(magrittr)
library(readxl)
library(plm)
library(tidyr)
library(gplots)
df <- read_excel("PATENT 3.xls", sheet = "Sheet1")
panel <- df %>%
dplyr::select(cusip,patentsg,merger,employ,return,
patents,stckpr,rnd,rndstck,sales,year) %>%
arrange(year) %>%
group_by(cusip) %>%
fill(employ,return,stckpr,rndstck,sales) %>% ungroup() %>%
arrange(cusip)
panel[is.na(panel)] <- 0
panel[] <- sapply(panel, as.numeric)
pdata <- pdata.frame(panel, index=c("cusip","year"))
pooled <- plm(patentsg~employ+stckpr+rnd+rndstck+sales,
data=pdata,model='pooling')
summary(pooled)
## Pooling Model
##
## Call:
## plm(formula = patentsg ~ employ + stckpr + rnd + rndstck + sales,
## data = pdata, model = "pooling")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -344.1260 -10.6748 1.3910 8.0125 540.2245
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) -1.0738e+01 1.4238e+00 -7.5416 6.691e-14 ***
## employ 1.4576e+00 4.5105e-02 32.3167 < 2.2e-16 ***
## stckpr 7.6388e-01 4.4577e-02 17.1363 < 2.2e-16 ***
## rnd -5.5617e-01 6.5975e-02 -8.4299 < 2.2e-16 ***
## rndstck 8.4726e-02 1.2141e-02 6.9787 3.904e-12 ***
## sales -2.6208e-03 5.0678e-04 -5.1715 2.527e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 14168000
## Residual Sum of Squares: 5433700
## R-Squared: 0.61648
## Adj. R-Squared: 0.61563
## F-statistic: 724.622 on 5 and 2254 DF, p-value: < 2.22e-16
within <- plm(patentsg~employ+patents+stckpr+rnd+rndstck,
data=pdata,model='within')
summary(within)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = patentsg ~ employ + patents + stckpr + rnd + rndstck,
## data = pdata, model = "within")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -226.86121 -1.91659 -0.33548 1.51974 258.24199
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## employ -0.1863311 0.0685649 -2.7176 0.006632 **
## patents 0.0735127 0.0187631 3.9179 9.227e-05 ***
## stckpr 0.0248818 0.0276181 0.9009 0.367736
## rnd -0.0846721 0.0321723 -2.6318 0.008557 **
## rndstck -0.0130047 0.0057733 -2.2526 0.024394 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 715640
## Residual Sum of Squares: 588660
## R-Squared: 0.17743
## Adj. R-Squared: 0.08419
## F-statistic: 87.5337 on 5 and 2029 DF, p-value: < 2.22e-16
pFtest(within,pooled)
##
## F test for individual effects
##
## data: patentsg ~ employ + patents + stckpr + rnd + rndstck
## F = 74.222, df1 = 225, df2 = 2029, p-value < 2.2e-16
## alternative hypothesis: significant effects
walhus <- plm(patentsg~employ+patents+stckpr+rnd+rndstck,
data=pdata,model='random',random.method = 'walhus')
summary(walhus)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = patentsg ~ employ + patents + stckpr + rnd + rndstck,
## data = pdata, model = "random", random.method = "walhus")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Effects:
## var std.dev share
## idiosyncratic 597.076 24.435 0.956
## individual 27.492 5.243 0.044
## theta: 0.1725
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -260.84846 -4.12285 -0.39777 1.89895 613.20670
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) -1.7695669 0.8314818 -2.1282 0.03332 *
## employ 0.2464792 0.0285117 8.6448 < 2.2e-16 ***
## patents 0.8745414 0.0129288 67.6429 < 2.2e-16 ***
## stckpr 0.1693922 0.0257784 6.5711 4.995e-11 ***
## rnd -0.1832516 0.0349067 -5.2498 1.523e-07 ***
## rndstck 0.0381972 0.0063317 6.0327 1.613e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 9926700
## Residual Sum of Squares: 1334800
## R-Squared: 0.86554
## Adj. R-Squared: 0.86524
## Chisq: 14508.8 on 5 DF, p-value: < 2.22e-16
amemiya <- plm(patentsg~employ+patents+stckpr+rnd+sales,
data=pdata,model='random',random.method = 'amemiya')
summary(amemiya)
## Oneway (individual) effect Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = patentsg ~ employ + patents + stckpr + rnd + sales,
## data = pdata, model = "random", random.method = "amemiya")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Effects:
## var std.dev share
## idiosyncratic 287.95 16.97 0.035
## individual 7838.47 88.54 0.965
## theta: 0.9395
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -187.47285 -3.03765 -1.55182 0.65098 272.61083
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 23.06322292 5.98017150 3.8566 0.0001150 ***
## employ 0.20753304 0.05504136 3.7705 0.0001629 ***
## patents 0.12913613 0.01801175 7.1695 7.524e-13 ***
## stckpr 0.08789749 0.02598965 3.3820 0.0007196 ***
## rnd -0.10090554 0.01240146 -8.1366 4.066e-16 ***
## sales -0.00146459 0.00034993 -4.1854 2.846e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 764870
## Residual Sum of Squares: 637690
## R-Squared: 0.16629
## Adj. R-Squared: 0.16444
## Chisq: 449.564 on 5 DF, p-value: < 2.22e-16
nerlove <- plm(patentsg~employ+patents+stckpr+rnd+sales,
data=pdata,model='random',random.method = 'nerlove')
summary(nerlove)
## Oneway (individual) effect Random Effect Model
## (Nerlove's transformation)
##
## Call:
## plm(formula = patentsg ~ employ + patents + stckpr + rnd + sales,
## data = pdata, model = "random", random.method = "nerlove")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Effects:
## var std.dev share
## idiosyncratic 259.16 16.10 0.032
## individual 7902.23 88.89 0.968
## theta: 0.9428
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -188.47497 -3.00366 -1.52701 0.66022 272.35959
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 23.72242825 6.29330012 3.7695 0.0001636 ***
## employ 0.18587233 0.05527759 3.3625 0.0007723 ***
## patents 0.12374063 0.01799719 6.8756 6.175e-12 ***
## stckpr 0.08458818 0.02592117 3.2633 0.0011013 **
## rnd -0.10303073 0.01237765 -8.3239 < 2.2e-16 ***
## sales -0.00145991 0.00034891 -4.1842 2.861e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 759610
## Residual Sum of Squares: 633110
## R-Squared: 0.16654
## Adj. R-Squared: 0.16469
## Chisq: 450.374 on 5 DF, p-value: < 2.22e-16
phtest(nerlove,within)
##
## Hausman Test
##
## data: patentsg ~ employ + patents + stckpr + rnd + sales
## chisq = 93.587, df = 4, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
plotmeans(patentsg ~ year, data = pdata,
main = "Promedio de patentes otorgadas por año",
xlab = "Año", ylab = "Promedio de patentes otorgadas",
barcol = "blue")
plotmeans(patentsg ~ sic, data = df,
main = "Promedio de patentes otorgadas por industria",
xlab = "Industria", ylab = "Promedio de patentes otorgadas",
barcol="blue")
El modelo seleccionado fue el de efectos aleatorios (Walhus) ya que tiene la mayor R2. Su p-value es significativo por lo cual concluimos que el modelo en sí es significativo para predecir el número de patentes otorgadas a una empresa.