Si considero que se debe buscar tratar de mejor aun y cuando estes haciendo las cosas bien, el mundo esta en constante cambio y si no te puedes adaptar va a ser todo lo contrario a bien. # Cargar librerías
#install.packages("WDI")
library(WDI)
#install.packages("wbstats")
library(wbstats)
#install.packages("tidyverse")
library(tidyverse)
#install.packages("plm") # Paquete para realizar modelos lineales para datos de panel
library(plm)
#install.packages("gplots")
library(gplots)
# install.packages("readxl")
library(readxl)
# install.packages("lmtest")
library(lmtest)
# Obtenr información de varios países
gdp <- wb_data(country=c("MX","US","CA"), indicator=c("NY.GDP.PCAP.CD","SM.POP.NETM"), start_date=1950, end_date=2025)
# Generar conjunto de datos de panel
panel_1 <- select(gdp, country, date, NY.GDP.PCAP.CD, SM.POP.NETM)
panel_2 <- subset(panel_1, date== 1960 | date == 1970 | date == 1980 | date == 1990 | date == 2000 | date == 2010 | date == 2020)
panel_1 <- pdata.frame(panel_1, index = c("country", "date"))
plotmeans(`NY.GDP.PCAP.CD` ~ country,
main = "Prueba de heterogeneidad entre países para el PIB",
data = panel_1)
# Modelo 1. Regresión agrupada (pooled)
pooled <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model="pooling")
summary(pooled)
## Pooling Model
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "pooling")
##
## Balanced Panel: n = 3, T = 65, N = 195
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -25773.31 -9718.04 -722.86 4194.39 44495.44
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 1.2394e+04 1.2015e+03 10.315 < 2.2e-16 ***
## SM.POP.NETM 2.2781e-02 1.8087e-03 12.596 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 7.6255e+10
## Residual Sum of Squares: 4.1852e+10
## R-Squared: 0.45116
## Adj. R-Squared: 0.44831
## F-statistic: 158.649 on 1 and 193 DF, p-value: < 2.22e-16
# Modelo 2. Efectos Fijos (within)
# Cuando las diferencias no observadas son constantes en el tiempo
within <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model="within")
summary(within)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "within")
##
## Balanced Panel: n = 3, T = 65, N = 195
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -24585.3 -8566.5 -1457.9 4442.4 61433.0
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## SM.POP.NETM 0.039755 0.003437 11.567 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 5.4746e+10
## Residual Sum of Squares: 3.2194e+10
## R-Squared: 0.41193
## Adj. R-Squared: 0.4027
## F-statistic: 133.793 on 1 and 191 DF, p-value: < 2.22e-16
# Prueba
pFtest(within, pooled)
##
## F test for individual effects
##
## data: NY.GDP.PCAP.CD ~ SM.POP.NETM
## F = 28.649, df1 = 2, df2 = 191, p-value = 1.315e-11
## alternative hypothesis: significant effects
# Si p-value < 0.05 se prefiere el modelo de efectos fijos
# Modelo 3. Efectos aleatorios
# Cuando las diferencias no observadas son aleatorias
# Método Walhus
walhus <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model="random", random.method = "walhus")
summary(walhus)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "random",
## random.method = "walhus")
##
## Balanced Panel: n = 3, T = 65, N = 195
##
## Effects:
## var std.dev share
## idiosyncratic 189090235 13751 0.881
## individual 25535033 5053 0.119
## theta: 0.6802
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -22598.9 -8395.6 -1903.1 5148.2 53261.7
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 8.8597e+03 3.1427e+03 2.8192 0.004815 **
## SM.POP.NETM 3.3881e-02 3.0190e-03 11.2224 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 5.6945e+10
## Residual Sum of Squares: 3.4459e+10
## R-Squared: 0.39487
## Adj. R-Squared: 0.39174
## Chisq: 125.942 on 1 DF, p-value: < 2.22e-16
# Metodo amemiya
amemiya <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model="random", random.method = "amemiya")
summary(amemiya)
## Oneway (individual) effect Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "random",
## random.method = "amemiya")
##
## Balanced Panel: n = 3, T = 65, N = 195
##
## Effects:
## var std.dev share
## idiosyncratic 167677524 12949 0.536
## individual 144897816 12037 0.464
## theta: 0.8677
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -23174.0 -8660.7 -1786.9 4997.4 58405.0
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 7.4022e+03 7.1246e+03 1.039 0.2988
## SM.POP.NETM 3.8459e-02 3.3396e-03 11.516 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 5.5122e+10
## Residual Sum of Squares: 3.2672e+10
## R-Squared: 0.40728
## Adj. R-Squared: 0.40421
## Chisq: 132.62 on 1 DF, p-value: < 2.22e-16
# Metodo nerlove
nerlove <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model="random", random.method = "nerlove")
summary(nerlove)
## Oneway (individual) effect Random Effect Model
## (Nerlove's transformation)
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "random",
## random.method = "nerlove")
##
## Balanced Panel: n = 3, T = 65, N = 195
##
## Effects:
## var std.dev share
## idiosyncratic 165097870 12849 0.427
## individual 221216206 14873 0.573
## theta: 0.8935
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -23465.1 -8634.0 -1413.4 4866.4 59087.2
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 7.2630e+03 8.7890e+03 0.8264 0.4086
## SM.POP.NETM 3.8896e-02 3.3669e-03 11.5524 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 5.499e+10
## Residual Sum of Squares: 3.251e+10
## R-Squared: 0.40881
## Adj. R-Squared: 0.40574
## Chisq: 133.459 on 1 DF, p-value: < 2.22e-16
# Comparar la R^2 ajustada de los 3 métodos y elegir el que tenga el mayor.
phtest(walhus, within)
##
## Hausman Test
##
## data: NY.GDP.PCAP.CD ~ SM.POP.NETM
## chisq = 12.79, df = 1, p-value = 0.0003485
## alternative hypothesis: one model is inconsistent
# Si el p-value es < 0.05, usamos Efectos Fijos (within)
# Obtener información de varios países
gdp <- wb_data(country=c("AFG","ARG","MX"), indicator=c("SP.DYN.IMRT.IN","NE.EXP.GNFS.ZS"), start_date=1950, end_date=2025)
# Generar conjunto de datos de panel
panel_1 <- select(gdp, country, date, SP.DYN.IMRT.IN, NE.EXP.GNFS.ZS)
panel_2 <- subset(panel_1, date== 1960 | date == 1970 | date == 1980 | date == 1990 | date == 2000 | date == 2010 | date == 2020)
panel_1 <- pdata.frame(panel_1, index = c("country", "date"))
plotmeans(`SP.DYN.IMRT.IN` ~ country,
main = "Prueba de heterogeneidad entre países para dueda externa",
data = panel_1)
## Paso 2. Ejercicio 3. Prueba de
Heterogeneidad
# Modelo 1. Regresión agrupada (pooled)
pooled <- plm(SP.DYN.IMRT.IN ~ NE.EXP.GNFS.ZS, data = panel_1, model="pooling")
summary(pooled)
## Pooling Model
##
## Call:
## plm(formula = SP.DYN.IMRT.IN ~ NE.EXP.GNFS.ZS, data = panel_1,
## model = "pooling")
##
## Unbalanced Panel: n = 3, T = 4-64, N = 123
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -34.49982 -17.87668 0.50125 11.56466 57.38111
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 61.13671 3.86505 15.8178 < 2.2e-16 ***
## NE.EXP.GNFS.ZS -1.55403 0.20574 -7.5533 8.911e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 80821
## Residual Sum of Squares: 54924
## R-Squared: 0.32043
## Adj. R-Squared: 0.31481
## F-statistic: 57.0526 on 1 and 121 DF, p-value: 8.9112e-12
# Modelo 2. Efectos Fijos (within)
# Cuando las diferencias no observadas son constantes en el tiempo
within <- plm(SP.DYN.IMRT.IN ~ NE.EXP.GNFS.ZS, data = panel_1, model="within")
summary(within)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = SP.DYN.IMRT.IN ~ NE.EXP.GNFS.ZS, data = panel_1,
## model = "within")
##
## Unbalanced Panel: n = 3, T = 4-64, N = 123
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -29.8324 -10.0982 -3.1939 10.1605 35.5304
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## NE.EXP.GNFS.ZS -2.27706 0.13957 -16.315 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 68478
## Residual Sum of Squares: 21157
## R-Squared: 0.69105
## Adj. R-Squared: 0.68326
## F-statistic: 266.173 on 1 and 119 DF, p-value: < 2.22e-16
# Prueba
pFtest(within, pooled)
##
## F test for individual effects
##
## data: SP.DYN.IMRT.IN ~ NE.EXP.GNFS.ZS
## F = 94.966, df1 = 2, df2 = 119, p-value < 2.2e-16
## alternative hypothesis: significant effects
# Si p-value < 0.05 se prefiere el modelo de efectos fijos
# Modelo 3. Efectos aleatorios
# Cuando las diferencias no observadas son aleatorias
# Método Walhus
walhus <- plm(SP.DYN.IMRT.IN ~ NE.EXP.GNFS.ZS, data = panel_1, model="random", random.method = "walhus")
summary(walhus)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = SP.DYN.IMRT.IN ~ NE.EXP.GNFS.ZS, data = panel_1,
## model = "random", random.method = "walhus")
##
## Unbalanced Panel: n = 3, T = 4-64, N = 123
##
## Effects:
## var std.dev share
## idiosyncratic 180.66 13.44 0.235
## individual 588.48 24.26 0.765
## theta:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.7330 0.9255 0.9309 0.9221 0.9309 0.9309
##
## Residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -28.903 -9.993 -2.827 -0.209 10.259 36.472
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 76.08400 14.22103 5.3501 8.79e-08 ***
## NE.EXP.GNFS.ZS -2.27300 0.13916 -16.3341 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 69043
## Residual Sum of Squares: 21404
## R-Squared: 0.69006
## Adj. R-Squared: 0.6875
## Chisq: 266.804 on 1 DF, p-value: < 2.22e-16
# Metodo amemiya
amemiya <- plm(SP.DYN.IMRT.IN ~ NE.EXP.GNFS.ZS, data = panel_1, model="random", random.method = "amemiya")
summary(amemiya)
## Oneway (individual) effect Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = SP.DYN.IMRT.IN ~ NE.EXP.GNFS.ZS, data = panel_1,
## model = "random", random.method = "amemiya")
##
## Unbalanced Panel: n = 3, T = 4-64, N = 123
##
## Effects:
## var std.dev share
## idiosyncratic 177.79 13.33 0.229
## individual 600.02 24.50 0.771
## theta:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.7374 0.9268 0.9321 0.9234 0.9321 0.9321
##
## Residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -28.920 -10.006 -2.843 -0.206 10.267 36.455
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 76.09401 14.45995 5.2624 1.422e-07 ***
## NE.EXP.GNFS.ZS -2.27314 0.13913 -16.3381 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 69025
## Residual Sum of Squares: 21396
## R-Squared: 0.6901
## Adj. R-Squared: 0.68754
## Chisq: 266.935 on 1 DF, p-value: < 2.22e-16
# Metodo nerlove
nerlove <- plm(SP.DYN.IMRT.IN ~ NE.EXP.GNFS.ZS, data = panel_1, model="random", random.method = "nerlove")
summary(nerlove)
## Oneway (individual) effect Random Effect Model
## (Nerlove's transformation)
##
## Call:
## plm(formula = SP.DYN.IMRT.IN ~ NE.EXP.GNFS.ZS, data = panel_1,
## model = "random", random.method = "nerlove")
##
## Unbalanced Panel: n = 3, T = 4-64, N = 123
##
## Effects:
## var std.dev share
## idiosyncratic 172.00 13.12 0.264
## individual 480.16 21.91 0.736
## theta:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.7133 0.9196 0.9254 0.9159 0.9254 0.9254
##
## Residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -28.823 -10.023 -2.752 -0.223 10.219 36.554
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 76.03644 13.23131 5.7467 9.1e-09 ***
## NE.EXP.GNFS.ZS -2.27233 0.13928 -16.3150 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 69128
## Residual Sum of Squares: 21445
## R-Squared: 0.68987
## Adj. R-Squared: 0.68731
## Chisq: 266.18 on 1 DF, p-value: < 2.22e-16
# Comparar la R^2 ajustada de los 3 métodos y elegir el que tenga el mayor.
phtest(walhus, within)
##
## Hausman Test
##
## data: SP.DYN.IMRT.IN ~ NE.EXP.GNFS.ZS
## chisq = 0.14309, df = 1, p-value = 0.7052
## alternative hypothesis: one model is inconsistent
# Si el p-value es < 0.05, usamos Efectos Fijos (within)
patentes <- read_excel("C:\\Users\\macha\\Downloads\\PATENT 3.xls")
summary(patentes)
## cusip merger employ return
## Min. : 800 Min. :0.0000 Min. : 0.085 Min. :-73.022
## 1st Qu.:368514 1st Qu.:0.0000 1st Qu.: 1.227 1st Qu.: 5.128
## Median :501116 Median :0.0000 Median : 3.842 Median : 7.585
## Mean :514536 Mean :0.0177 Mean : 18.826 Mean : 8.003
## 3rd Qu.:754688 3rd Qu.:0.0000 3rd Qu.: 15.442 3rd Qu.: 10.501
## Max. :878555 Max. :1.0000 Max. :506.531 Max. : 48.675
## NA's :21 NA's :8
## patents patentsg stckpr rnd
## Min. : 0.0 Min. : 0.00 Min. : 0.1875 Min. : 0.0000
## 1st Qu.: 1.0 1st Qu.: 1.00 1st Qu.: 7.6250 1st Qu.: 0.6847
## Median : 3.0 Median : 4.00 Median : 16.5000 Median : 2.1456
## Mean : 22.9 Mean : 27.14 Mean : 22.6270 Mean : 29.3398
## 3rd Qu.: 15.0 3rd Qu.: 19.00 3rd Qu.: 29.2500 3rd Qu.: 11.9168
## Max. :906.0 Max. :1063.00 Max. :402.0000 Max. :1719.3535
## NA's :2
## rndeflt rndstck sales sic
## Min. : 0.0000 Min. : 0.125 Min. : 1.22 Min. :2000
## 1st Qu.: 0.4788 1st Qu.: 5.152 1st Qu.: 52.99 1st Qu.:2890
## Median : 1.4764 Median : 13.353 Median : 174.06 Median :3531
## Mean : 19.7238 Mean : 163.823 Mean : 1219.60 Mean :3333
## 3rd Qu.: 8.7527 3rd Qu.: 74.563 3rd Qu.: 728.96 3rd Qu.:3661
## Max. :1000.7876 Max. :9755.352 Max. :44224.00 Max. :9997
## NA's :157 NA's :3
## year
## Min. :2012
## 1st Qu.:2014
## Median :2016
## Mean :2016
## 3rd Qu.:2019
## Max. :2021
##
str(patentes)
## tibble [2,260 × 13] (S3: tbl_df/tbl/data.frame)
## $ cusip : num [1:2260] 800 800 800 800 800 800 800 800 800 800 ...
## $ merger : num [1:2260] 0 0 0 0 0 0 0 0 0 0 ...
## $ employ : num [1:2260] 9.85 12.32 12.2 11.84 12.99 ...
## $ return : num [1:2260] 5.82 5.69 4.42 5.28 4.91 ...
## $ patents : num [1:2260] 22 34 31 32 40 60 57 77 38 5 ...
## $ patentsg: num [1:2260] 24 32 30 34 28 33 53 47 64 70 ...
## $ stckpr : num [1:2260] 47.6 57.9 33 38.5 35.1 ...
## $ rnd : num [1:2260] 2.56 3.1 3.27 3.24 3.78 ...
## $ rndeflt : num [1:2260] 2.56 2.91 2.8 2.52 2.78 ...
## $ rndstck : num [1:2260] 16.2 17.4 19.6 21.9 23.1 ...
## $ sales : num [1:2260] 344 436 535 567 631 ...
## $ sic : num [1:2260] 3740 3740 3740 3740 3740 3740 3740 3740 3740 3740 ...
## $ year : num [1:2260] 2012 2013 2014 2015 2016 ...
sum(is.na(patentes))
## [1] 191
sapply(patentes, function(x) sum (is.na(x))) #NA´s por variable
## cusip merger employ return patents patentsg stckpr rnd
## 0 0 21 8 0 0 2 0
## rndeflt rndstck sales sic year
## 0 157 3 0 0
patentes$employ[is.na(patentes$employ)] <- mean(patentes$employ,
na.rm=TRUE)
patentes$return[is.na(patentes$return)] <- mean(patentes$return,
na.rm=TRUE)
patentes$stckpr[is.na(patentes$stckpr)] <- mean(patentes$stckpr,
na.rm=TRUE)
patentes$rndstck[is.na(patentes$rndstck)] <- mean(patentes$rndstck,
na.rm=TRUE)
patentes$sales[is.na(patentes$sales)] <- mean(patentes$sales,
na.rm=TRUE)
summary(patentes)
## cusip merger employ return
## Min. : 800 Min. :0.0000 Min. : 0.085 Min. :-73.022
## 1st Qu.:368514 1st Qu.:0.0000 1st Qu.: 1.242 1st Qu.: 5.139
## Median :501116 Median :0.0000 Median : 3.893 Median : 7.601
## Mean :514536 Mean :0.0177 Mean : 18.826 Mean : 8.003
## 3rd Qu.:754688 3rd Qu.:0.0000 3rd Qu.: 16.034 3rd Qu.: 10.473
## Max. :878555 Max. :1.0000 Max. :506.531 Max. : 48.675
## patents patentsg stckpr rnd
## Min. : 0.0 Min. : 0.00 Min. : 0.1875 Min. : 0.0000
## 1st Qu.: 1.0 1st Qu.: 1.00 1st Qu.: 7.6250 1st Qu.: 0.6847
## Median : 3.0 Median : 4.00 Median : 16.5000 Median : 2.1456
## Mean : 22.9 Mean : 27.14 Mean : 22.6270 Mean : 29.3398
## 3rd Qu.: 15.0 3rd Qu.: 19.00 3rd Qu.: 29.2500 3rd Qu.: 11.9168
## Max. :906.0 Max. :1063.00 Max. :402.0000 Max. :1719.3535
## rndeflt rndstck sales sic
## Min. : 0.0000 Min. : 0.125 Min. : 1.22 Min. :2000
## 1st Qu.: 0.4788 1st Qu.: 5.588 1st Qu.: 53.20 1st Qu.:2890
## Median : 1.4764 Median : 16.234 Median : 174.28 Median :3531
## Mean : 19.7238 Mean : 163.823 Mean : 1219.60 Mean :3333
## 3rd Qu.: 8.7527 3rd Qu.: 119.105 3rd Qu.: 743.42 3rd Qu.:3661
## Max. :1000.7876 Max. :9755.352 Max. :44224.00 Max. :9997
## year
## Min. :2012
## 1st Qu.:2014
## Median :2016
## Mean :2016
## 3rd Qu.:2019
## Max. :2021
sum(is.na(patentes)) # NA´s en la base de datos
## [1] 0
boxplot(patentes$cusip, horizontal=TRUE)
boxplot(patentes$merger, horizontal=TRUE)
boxplot(patentes$employ, horizontal=TRUE)
boxplot(patentes$return, horizontal=TRUE)
boxplot(patentes$patents, horizontal=TRUE)
boxplot(patentes$patentsg, horizontal=TRUE)
boxplot(patentes$stckpr, horizontal=TRUE)
boxplot(patentes$rnd, horizontal=TRUE)
boxplot(patentes$rndeflt, horizontal=TRUE)
boxplot(patentes$rndstck, horizontal=TRUE)
boxplot(patentes$sales, horizontal=TRUE)
boxplot(patentes$sic, horizontal=TRUE)
boxplot(patentes$year, horizontal=TRUE)
patentes$year <- patentes$year - 40
summary(patentes)
## cusip merger employ return
## Min. : 800 Min. :0.0000 Min. : 0.085 Min. :-73.022
## 1st Qu.:368514 1st Qu.:0.0000 1st Qu.: 1.242 1st Qu.: 5.139
## Median :501116 Median :0.0000 Median : 3.893 Median : 7.601
## Mean :514536 Mean :0.0177 Mean : 18.826 Mean : 8.003
## 3rd Qu.:754688 3rd Qu.:0.0000 3rd Qu.: 16.034 3rd Qu.: 10.473
## Max. :878555 Max. :1.0000 Max. :506.531 Max. : 48.675
## patents patentsg stckpr rnd
## Min. : 0.0 Min. : 0.00 Min. : 0.1875 Min. : 0.0000
## 1st Qu.: 1.0 1st Qu.: 1.00 1st Qu.: 7.6250 1st Qu.: 0.6847
## Median : 3.0 Median : 4.00 Median : 16.5000 Median : 2.1456
## Mean : 22.9 Mean : 27.14 Mean : 22.6270 Mean : 29.3398
## 3rd Qu.: 15.0 3rd Qu.: 19.00 3rd Qu.: 29.2500 3rd Qu.: 11.9168
## Max. :906.0 Max. :1063.00 Max. :402.0000 Max. :1719.3535
## rndeflt rndstck sales sic
## Min. : 0.0000 Min. : 0.125 Min. : 1.22 Min. :2000
## 1st Qu.: 0.4788 1st Qu.: 5.588 1st Qu.: 53.20 1st Qu.:2890
## Median : 1.4764 Median : 16.234 Median : 174.28 Median :3531
## Mean : 19.7238 Mean : 163.823 Mean : 1219.60 Mean :3333
## 3rd Qu.: 8.7527 3rd Qu.: 119.105 3rd Qu.: 743.42 3rd Qu.:3661
## Max. :1000.7876 Max. :9755.352 Max. :44224.00 Max. :9997
## year
## Min. :1972
## 1st Qu.:1974
## Median :1976
## Mean :1976
## 3rd Qu.:1979
## Max. :1981
panel_patentes <- pdata.frame(patentes, index = c("cusip", "year"))
plotmeans(`patents` ~ cusip,
main = "Prueba de heterogeneidad entre empresas para su patentes",
data = panel_patentes)
## Paso 3. Prueba de Efectos fijos y
aleatorios
# Modelo 1. Regresión agrupada (pooled)
pooled_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales+ sic, data = panel_patentes, model="pooling")
summary(pooled_patentes)
## Pooling Model
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_patentes, model = "pooling")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -320.36212 -10.01555 0.94472 7.40861 433.86316
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) -4.1831e-01 5.2757e+00 -0.0793 0.93681
## merger -1.1612e+01 7.2433e+00 -1.6031 0.10905
## employ 1.3683e+00 4.1969e-02 32.6040 < 2.2e-16 ***
## return -4.3505e-03 1.8155e-01 -0.0240 0.98088
## stckpr 6.5137e-01 4.3139e-02 15.0994 < 2.2e-16 ***
## rnd -1.3853e-01 1.6106e-02 -8.6007 < 2.2e-16 ***
## sales -3.2049e-03 4.6962e-04 -6.8246 1.13e-11 ***
## sic -2.6894e-03 1.4820e-03 -1.8146 0.06972 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 10998000
## Residual Sum of Squares: 4600300
## R-Squared: 0.58173
## Adj. R-Squared: 0.58043
## F-statistic: 447.437 on 7 and 2252 DF, p-value: < 2.22e-16
# Modelo 2. Efectos Fijos (within)
# Cuando las diferencias no observadas son constantes en el tiempo
within_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales+ sic, data = panel_patentes, model="within")
summary(within_patentes)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_patentes, model = "within")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -497.22898 -1.64569 -0.19669 1.64341 184.49423
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## merger 3.30904770 4.16313684 0.7948 0.42680
## employ 0.11963128 0.07052503 1.6963 0.08998 .
## return -0.07056694 0.10867769 -0.6493 0.51620
## stckpr -0.01107952 0.03242512 -0.3417 0.73262
## rnd -0.19889614 0.01443066 -13.7829 < 2.2e-16 ***
## sales -0.00309052 0.00041525 -7.4426 1.451e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1091400
## Residual Sum of Squares: 819280
## R-Squared: 0.24935
## Adj. R-Squared: 0.16385
## F-statistic: 112.278 on 6 and 2028 DF, p-value: < 2.22e-16
# Prueba
pFtest(within_patentes, pooled_patentes)
##
## F test for individual effects
##
## data: patents ~ merger + employ + return + stckpr + rnd + sales + sic
## F = 41.782, df1 = 224, df2 = 2028, p-value < 2.2e-16
## alternative hypothesis: significant effects
# Si p-value < 0.05 se prefiere el modelo de efectos fijos
# Modelo 3. Efectos aleatorios
# Cuando las diferencias no observadas son aleatorias
# Método Walhus
walhus_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales+ sic, data = panel_patentes, model="random", random.method = "walhus")
summary(walhus_patentes)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_patentes, model = "random", random.method = "walhus")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Effects:
## var std.dev share
## idiosyncratic 555.26 23.56 0.273
## individual 1480.26 38.47 0.727
## theta: 0.8099
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -433.72438 -3.89667 -1.76198 0.78484 211.91016
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 11.84397257 12.78087032 0.9267 0.3541
## merger 4.47647107 4.51685216 0.9911 0.3217
## employ 1.10525428 0.04853786 22.7710 < 2.2e-16 ***
## return -0.12920955 0.11762230 -1.0985 0.2720
## stckpr 0.17097726 0.03355374 5.0956 3.476e-07 ***
## rnd -0.14575073 0.01469317 -9.9196 < 2.2e-16 ***
## sales -0.00393738 0.00042854 -9.1880 < 2.2e-16 ***
## sic -0.00107515 0.00376075 -0.2859 0.7750
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1449600
## Residual Sum of Squares: 1098300
## R-Squared: 0.24236
## Adj. R-Squared: 0.24
## Chisq: 720.388 on 7 DF, p-value: < 2.22e-16
# Metodo amemiya
amemiya_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales+ sic, data = panel_patentes, model="random", random.method = "amemiya")
summary(amemiya_patentes)
## Oneway (individual) effect Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_patentes, model = "random", random.method = "amemiya")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Effects:
## var std.dev share
## idiosyncratic 402.79 20.07 0.051
## individual 7483.44 86.51 0.949
## theta: 0.9268
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -454.59697 -2.99704 -1.65272 0.59741 193.17353
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 8.58107091 29.77947247 0.2882 0.7732
## merger 3.91351453 4.11354681 0.9514 0.3414
## employ 0.49060426 0.06153621 7.9726 1.554e-15 ***
## return -0.09427795 0.10733800 -0.8783 0.3798
## stckpr 0.04660332 0.03163610 1.4731 0.1407
## rnd -0.17995961 0.01406835 -12.7918 < 2.2e-16 ***
## sales -0.00342554 0.00040647 -8.4275 < 2.2e-16 ***
## sic 0.00425278 0.00877425 0.4847 0.6279
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1144500
## Residual Sum of Squares: 891720
## R-Squared: 0.22085
## Adj. R-Squared: 0.21842
## Chisq: 638.312 on 7 DF, p-value: < 2.22e-16
# Metodo nerlove
nerlove_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales+ sic, data = panel_patentes, model="random", random.method = "nerlove")
summary(nerlove_patentes)
## Oneway (individual) effect Random Effect Model
## (Nerlove's transformation)
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_patentes, model = "random", random.method = "nerlove")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Effects:
## var std.dev share
## idiosyncratic 362.51 19.04 0.046
## individual 7557.16 86.93 0.954
## theta: 0.9309
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -455.94828 -2.93752 -1.60035 0.62863 192.36375
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 8.38498937 31.41700295 0.2669 0.7896
## merger 3.86675065 4.09938561 0.9433 0.3456
## employ 0.46018862 0.06203371 7.4184 1.186e-13 ***
## return -0.09236163 0.10697310 -0.8634 0.3879
## stckpr 0.04167663 0.03156299 1.3204 0.1867
## rnd -0.18153379 0.01403810 -12.9315 < 2.2e-16 ***
## sales -0.00339833 0.00040545 -8.3816 < 2.2e-16 ***
## sic 0.00451640 0.00925634 0.4879 0.6256
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1138700
## Residual Sum of Squares: 885220
## R-Squared: 0.22262
## Adj. R-Squared: 0.22021
## Chisq: 644.925 on 7 DF, p-value: < 2.22e-16
# Comparar la R^2 ajustada de los 3 métodos y elegir el que tenga el mayor.
phtest(walhus_patentes, within_patentes)
##
## Hausman Test
##
## data: patents ~ merger + employ + return + stckpr + rnd + sales + sic
## chisq = 352.48, df = 6, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
# Si el p-value es < 0.05, usamos Efectos Fijos (within)
# Por lo tanto nos quedmaos con el modelo de efectos fijos (within)
# Prueba de Heterocedasticidad
bptest(within_patentes)
##
## studentized Breusch-Pagan test
##
## data: within_patentes
## BP = 1447.6, df = 7, p-value < 2.2e-16
# Si el p-value es < 0.05, hay heterocedasticidad en los residuos (problema detectado)
# Prueba de Autocorrelación Serial
pwartest(within_patentes)
##
## Wooldridge's test for serial correlation in FE panels
##
## data: within_patentes
## F = 104.29, df1 = 1, df2 = 2032, p-value < 2.2e-16
## alternative hypothesis: serial correlation
# Si el p-value es < 0.05, hay autocorrelación serial en los errores (problema detectado)
# Modelo de corrección con Errores Estandar Robustos
coeficientes_corregidos <- coeftest(within_patentes, vcov=vcovHC(within_patentes, type = "HC0"))
solo_coeficientes <- coeficientes_corregidos[,1]
datos_de_prueba <- data.frame(merger = 0, employ = 10, return =6, stckpr =48, rnd=3, sales=344, sic=3740)
prediccion <- sum(solo_coeficientes*datos_de_prueba)
prediccion
## [1] 12374.42
En conclusión este ejercicio nos permite generar pronósticos en bases de datos