#install.packages("WDI")
library(WDI)
#install.packages("wbstats")
library(wbstats)
#install.packages("tidyverse")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages("plm") # Para realizar modelos lineal para datos de panel
library(plm)
##
## Adjuntando el paquete: 'plm'
##
## The following objects are masked from 'package:dplyr':
##
## between, lag, lead
#install.packages("gplots")
library(gplots)
##
## Adjuntando el paquete: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
#install.packages("readxl")
library(readxl)
library(lmtest)
## Cargando paquete requerido: zoo
##
## Adjuntando el paquete: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
patentes <- read_excel("c:\\Users\\Chema\\Downloads\\PATENT 3.xls")
sum(is.na(patentes))
## [1] 191
sapply(patentes, function(x) sum (is.na(x))) # NA por variable
## cusip merger employ return patents patentsg stckpr rnd
## 0 0 21 8 0 0 2 0
## rndeflt rndstck sales sic year
## 0 157 3 0 0
patentes$employ[is.na(patentes$employ)] <- mean(patentes$employ, na.rm=TRUE)
patentes$return[is.na(patentes$return)] <- mean(patentes$return, na.rm=TRUE)
patentes$stckpr[is.na(patentes$stckpr)] <- mean(patentes$stckpr, na.rm=TRUE)
patentes$sales[is.na(patentes$sales)] <- mean(patentes$sales, na.rm=TRUE)
patentes$rndstck[is.na(patentes$rndstck)] <- mean(patentes$rndstck, na.rm=TRUE)
summary(patentes)
## cusip merger employ return
## Min. : 800 Min. :0.0000 Min. : 0.085 Min. :-73.022
## 1st Qu.:368514 1st Qu.:0.0000 1st Qu.: 1.242 1st Qu.: 5.139
## Median :501116 Median :0.0000 Median : 3.893 Median : 7.601
## Mean :514536 Mean :0.0177 Mean : 18.826 Mean : 8.003
## 3rd Qu.:754688 3rd Qu.:0.0000 3rd Qu.: 16.034 3rd Qu.: 10.473
## Max. :878555 Max. :1.0000 Max. :506.531 Max. : 48.675
## patents patentsg stckpr rnd
## Min. : 0.0 Min. : 0.00 Min. : 0.1875 Min. : 0.0000
## 1st Qu.: 1.0 1st Qu.: 1.00 1st Qu.: 7.6250 1st Qu.: 0.6847
## Median : 3.0 Median : 4.00 Median : 16.5000 Median : 2.1456
## Mean : 22.9 Mean : 27.14 Mean : 22.6270 Mean : 29.3398
## 3rd Qu.: 15.0 3rd Qu.: 19.00 3rd Qu.: 29.2500 3rd Qu.: 11.9168
## Max. :906.0 Max. :1063.00 Max. :402.0000 Max. :1719.3535
## rndeflt rndstck sales sic
## Min. : 0.0000 Min. : 0.1253 Min. : 1.222 Min. :2000
## 1st Qu.: 0.4788 1st Qu.: 5.5882 1st Qu.: 53.204 1st Qu.:2890
## Median : 1.4764 Median : 16.2341 Median : 174.283 Median :3531
## Mean : 19.7238 Mean : 163.8234 Mean : 1219.601 Mean :3333
## 3rd Qu.: 8.7527 3rd Qu.: 119.1048 3rd Qu.: 743.422 3rd Qu.:3661
## Max. :1000.7876 Max. :9755.3516 Max. :44224.000 Max. :9997
## year
## Min. :2012
## 1st Qu.:2014
## Median :2016
## Mean :2016
## 3rd Qu.:2019
## Max. :2021
sum(is.na(patentes))
## [1] 0
boxplot(patentes$cusip, horizontal=TRUE)
boxplot(patentes$employ, horizontal=TRUE)
boxplot(patentes$return, horizontal=TRUE)
boxplot(patentes$patents, horizontal=TRUE)
boxplot(patentes$patentsg, horizontal=TRUE)
boxplot(patentes$stckpr, horizontal=TRUE)
boxplot(patentes$rnd, horizontal=TRUE)
boxplot(patentes$rndeflt, horizontal=TRUE)
boxplot(patentes$rndstck, horizontal=TRUE)
boxplot(patentes$sales, horizontal=TRUE)
boxplot(patentes$sic, horizontal=TRUE)
boxplot(patentes$year, horizontal=TRUE)
## Generar Conjuntos de Datos Panel
panel_patentes <- pdata.frame(patentes, index=c("cusip", "year"))
plotmeans(patents ~ cusip, main="Prueba de Heterogeneidad", data=panel_patentes)
# Como la linea sale quebrada, sube y baja hay mucha Heterogeneidad por lo tanto hay que ajustar
# Modelo 1. Regresion Agrupada (Pooled) Si es que tenemos Nula heterogeneidad.
pooled_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data = panel_patentes, model="pooling") # Si tienes mas variables usamos simbolo de "+"
summary(pooled_patentes)
## Pooling Model
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_patentes, model = "pooling")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -320.36212 -10.01555 0.94472 7.40861 433.86316
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) -4.1831e-01 5.2757e+00 -0.0793 0.93681
## merger -1.1612e+01 7.2433e+00 -1.6031 0.10905
## employ 1.3683e+00 4.1969e-02 32.6040 < 2.2e-16 ***
## return -4.3505e-03 1.8155e-01 -0.0240 0.98088
## stckpr 6.5137e-01 4.3139e-02 15.0994 < 2.2e-16 ***
## rnd -1.3853e-01 1.6106e-02 -8.6007 < 2.2e-16 ***
## sales -3.2049e-03 4.6962e-04 -6.8246 1.13e-11 ***
## sic -2.6894e-03 1.4820e-03 -1.8146 0.06972 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 10998000
## Residual Sum of Squares: 4600300
## R-Squared: 0.58173
## Adj. R-Squared: 0.58043
## F-statistic: 447.437 on 7 and 2252 DF, p-value: < 2.22e-16
# Modelo 2. Efectos Fijos
within_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data = panel_patentes, model="within") # Si tienes mas variables usamos simbolo de "+"
summary(within_patentes)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_patentes, model = "within")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -497.22898 -1.64569 -0.19669 1.64341 184.49423
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## merger 3.30904770 4.16313684 0.7948 0.42680
## employ 0.11963128 0.07052503 1.6963 0.08998 .
## return -0.07056694 0.10867769 -0.6493 0.51620
## stckpr -0.01107952 0.03242512 -0.3417 0.73262
## rnd -0.19889614 0.01443066 -13.7829 < 2.2e-16 ***
## sales -0.00309052 0.00041525 -7.4426 1.451e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1091400
## Residual Sum of Squares: 819280
## R-Squared: 0.24935
## Adj. R-Squared: 0.16385
## F-statistic: 112.278 on 6 and 2028 DF, p-value: < 2.22e-16
# Prueba F
pFtest(within_patentes, pooled_patentes)
##
## F test for individual effects
##
## data: patents ~ merger + employ + return + stckpr + rnd + sales + sic
## F = 41.782, df1 = 224, df2 = 2028, p-value < 2.2e-16
## alternative hypothesis: significant effects
# Modelo 3
# Metodo Walhus
walhus_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data = panel_patentes, model="random", random.method = "walhus")
summary(walhus_patentes)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_patentes, model = "random", random.method = "walhus")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Effects:
## var std.dev share
## idiosyncratic 555.26 23.56 0.273
## individual 1480.26 38.47 0.727
## theta: 0.8099
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -433.72438 -3.89667 -1.76198 0.78484 211.91016
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 11.84397257 12.78087032 0.9267 0.3541
## merger 4.47647107 4.51685216 0.9911 0.3217
## employ 1.10525428 0.04853786 22.7710 < 2.2e-16 ***
## return -0.12920955 0.11762230 -1.0985 0.2720
## stckpr 0.17097726 0.03355374 5.0956 3.476e-07 ***
## rnd -0.14575073 0.01469317 -9.9196 < 2.2e-16 ***
## sales -0.00393738 0.00042854 -9.1880 < 2.2e-16 ***
## sic -0.00107515 0.00376075 -0.2859 0.7750
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1449600
## Residual Sum of Squares: 1098300
## R-Squared: 0.24236
## Adj. R-Squared: 0.24
## Chisq: 720.388 on 7 DF, p-value: < 2.22e-16
# Metodo Amemiya
amemiya_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data = panel_patentes, model="random", random.method = "amemiya")
summary(amemiya_patentes)
## Oneway (individual) effect Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_patentes, model = "random", random.method = "amemiya")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Effects:
## var std.dev share
## idiosyncratic 402.79 20.07 0.051
## individual 7483.44 86.51 0.949
## theta: 0.9268
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -454.59697 -2.99704 -1.65272 0.59741 193.17353
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 8.58107091 29.77947247 0.2882 0.7732
## merger 3.91351453 4.11354681 0.9514 0.3414
## employ 0.49060426 0.06153621 7.9726 1.554e-15 ***
## return -0.09427795 0.10733800 -0.8783 0.3798
## stckpr 0.04660332 0.03163610 1.4731 0.1407
## rnd -0.17995961 0.01406835 -12.7918 < 2.2e-16 ***
## sales -0.00342554 0.00040647 -8.4275 < 2.2e-16 ***
## sic 0.00425278 0.00877425 0.4847 0.6279
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1144500
## Residual Sum of Squares: 891720
## R-Squared: 0.22085
## Adj. R-Squared: 0.21842
## Chisq: 638.312 on 7 DF, p-value: < 2.22e-16
# Metodo Nerlove
nerlove_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data = panel_patentes, model="random", random.method = "nerlove")
summary(nerlove_patentes)
## Oneway (individual) effect Random Effect Model
## (Nerlove's transformation)
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_patentes, model = "random", random.method = "nerlove")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Effects:
## var std.dev share
## idiosyncratic 362.51 19.04 0.046
## individual 7557.16 86.93 0.954
## theta: 0.9309
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -455.94828 -2.93752 -1.60035 0.62863 192.36375
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 8.38498937 31.41700295 0.2669 0.7896
## merger 3.86675065 4.09938561 0.9433 0.3456
## employ 0.46018862 0.06203371 7.4184 1.186e-13 ***
## return -0.09236163 0.10697310 -0.8634 0.3879
## stckpr 0.04167663 0.03156299 1.3204 0.1867
## rnd -0.18153379 0.01403810 -12.9315 < 2.2e-16 ***
## sales -0.00339833 0.00040545 -8.3816 < 2.2e-16 ***
## sic 0.00451640 0.00925634 0.4879 0.6256
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1138700
## Residual Sum of Squares: 885220
## R-Squared: 0.22262
## Adj. R-Squared: 0.22021
## Chisq: 644.925 on 7 DF, p-value: < 2.22e-16
phtest(walhus_patentes, within_patentes)
##
## Hausman Test
##
## data: patents ~ merger + employ + return + stckpr + rnd + sales + sic
## chisq = 352.48, df = 6, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
# Si el p-value es < 0.05 usamos Efectos Fijos
install.packages("lmtest")
## Warning: package 'lmtest' is in use and will not be installed
library(lmtest)
# Prueba de heterocedasticidad
bptest(within_patentes)
##
## studentized Breusch-Pagan test
##
## data: within_patentes
## BP = 1447.6, df = 7, p-value < 2.2e-16
# Prueba de Autocorrelacion Serial
pwartest(within_patentes)
##
## Wooldridge's test for serial correlation in FE panels
##
## data: within_patentes
## F = 104.29, df1 = 1, df2 = 2032, p-value < 2.2e-16
## alternative hypothesis: serial correlation
# Si el p-value es menor a 0.05 hay autocorrelacion serial en los errores (problema detectado)
# Modelo De Correccion con Errores Estandar Robustos
coeficientes_corregidos <- coeftest(within_patentes, vcov=vcovHC(within_patentes, type = "HC0"))
solo_coeficientes <- coeficientes_corregidos[,1]
datos_de_prueba <- data.frame(merger=0, employ=10, return=6, stckpr=48, rnd=3, sales=344)
prediccion <- sum(solo_coeficientes*datos_de_prueba)
prediccion
## [1] -1.418735