Pregunta 1: Debe una empresa que lo hace bien, buscar ser sobresaliente? si,no y porque?
Si porque una empresa debe impulsarse constantemente para mejorar y no quedarse atrás en un mundo cambiante. Como dijo Heráclito: “Lo único constante es el cambio”.
# install.packages("WDI")
# install.packages("wbstats")
# install.packages("tidyverse")
#install.packages("plm")
#install.packages("gplots")
library(plm)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between() masks plm::between()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks plm::lag(), stats::lag()
## ✖ dplyr::lead() masks plm::lead()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(WDI)
library(wbstats)
library(gplots)
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
# library(readxl)
# obtener informacion de varios paises
gdp <-wb_data(country=c("MX","US","CA"), indicator=c('NY.GDP.PCAP.CD',"SM.POP.NETM"), start_date=1950, end_date=2025)
# Generar conjunto de datos de panel
panel_1 <-select(gdp, country, date, NY.GDP.PCAP.CD, SM.POP.NETM)
panel_1 <-subset(panel_1, date == 1960 | date==1970 | date==1980 | date==1990 | date== 2000 | date==2010 | date==2020)
panel_1 <-pdata.frame(panel_1, index = c('country', "date"))
plotmeans(NY.GDP.PCAP.CD ~ country, main= "Prueba de heterogenidad entre paises para el PIB", data= panel_1)
# Si la linea sale casi horizontal, hay poca o nula heterogenidad, por lo que no hay diferencias sistematicas que ajustar.
#Si la linea sale quebrada, sube y baja, hay mucha Heterogenidad, por lo que hay que ajustar. (Buscamos esta linea quebarada para utilizar ya sea los modelos fijos o aleatorios)
plotmeans(SM.POP.NETM ~ country, main= "Prueba de heterogenidad entre paises para la migracion", data= panel_1)
# Si estan quebradas las dos graficas por lo cual se tendran que realizar las
# Modelo 1. Regresion agrupada (Pooled) se utliza si la linea nos salio quebrada
## Asume que no hay heteogenidad obervada (linea recta hroizaontal)
pooled <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data= panel_1, model="pooling" )
summary(pooled)
## Pooling Model
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "pooling")
##
## Balanced Panel: n = 3, T = 7, N = 21
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -21506.0 -10924.8 -3728.9 5274.5 45389.3
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 1.2873e+04 4.2134e+03 3.0553 0.006511 **
## SM.POP.NETM 1.8616e-02 7.2324e-03 2.5740 0.018588 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 7259500000
## Residual Sum of Squares: 5382600000
## R-Squared: 0.25855
## Adj. R-Squared: 0.21952
## F-statistic: 6.62533 on 1 and 19 DF, p-value: 0.018588
# Modelo 2. Efectos fijos (within)
## Cuando llas diferencias no observadas son constantes en el tiempo
within <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data= panel_1, model="within")
summary(within)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "within")
##
## Balanced Panel: n = 3, T = 7, N = 21
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -20886.56 -9903.27 -403.03 3407.39 44059.72
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## SM.POP.NETM 0.013921 0.014345 0.9705 0.3454
##
## Total Sum of Squares: 5256100000
## Residual Sum of Squares: 4980200000
## R-Squared: 0.052492
## Adj. R-Squared: -0.11471
## F-statistic: 0.94181 on 1 and 17 DF, p-value: 0.34542
# Prueba F
pFtest(within, pooled)
##
## F test for individual effects
##
## data: NY.GDP.PCAP.CD ~ SM.POP.NETM
## F = 0.68685, df1 = 2, df2 = 17, p-value = 0.5166
## alternative hypothesis: significant effects
# si el P value es menos a 0.05 se prefiere el modelo de efectos fijos
# Modelo 3. Efectos Aleatorios
# Cuando las diferencias no observadas son aleatorias
## Metodo Walhus
walhus <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data= panel_1, model="random", random.method = "walhus")
summary(walhus)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "random",
## random.method = "walhus")
##
## Balanced Panel: n = 3, T = 7, N = 21
##
## Effects:
## var std.dev share
## idiosyncratic 278418900 16686 1
## individual 0 0 0
## theta: 0
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -21506.0 -10924.8 -3728.9 5274.5 45389.3
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.2873e+04 4.2134e+03 3.0553 0.002248 **
## SM.POP.NETM 1.8616e-02 7.2324e-03 2.5740 0.010054 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 7259500000
## Residual Sum of Squares: 5382600000
## R-Squared: 0.25855
## Adj. R-Squared: 0.21952
## Chisq: 6.62533 on 1 DF, p-value: 0.010054
## Metodo amemiya
amemiya <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data= panel_1, model="random", random.method = "amemiya")
summary(amemiya)
## Oneway (individual) effect Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "random",
## random.method = "amemiya")
##
## Balanced Panel: n = 3, T = 7, N = 21
##
## Effects:
## var std.dev share
## idiosyncratic 276675480 16634 1
## individual 0 0 0
## theta: 0
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -21506.0 -10924.8 -3728.9 5274.5 45389.3
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.2873e+04 4.2134e+03 3.0553 0.002248 **
## SM.POP.NETM 1.8616e-02 7.2324e-03 2.5740 0.010054 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 7259500000
## Residual Sum of Squares: 5382600000
## R-Squared: 0.25855
## Adj. R-Squared: 0.21952
## Chisq: 6.62533 on 1 DF, p-value: 0.010054
## Metodo nerlove
nerlove <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data= panel_1, model="random", random.method = "nerlove")
summary(nerlove)
## Oneway (individual) effect Random Effect Model
## (Nerlove's transformation)
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "random",
## random.method = "nerlove")
##
## Balanced Panel: n = 3, T = 7, N = 21
##
## Effects:
## var std.dev share
## idiosyncratic 237150411 15400 0.864
## individual 37271843 6105 0.136
## theta: 0.31
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -20850.0 -9773.4 -2826.2 3450.7 45608.0
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.3174e+04 5.8290e+03 2.2601 0.02382 *
## SM.POP.NETM 1.7563e-02 9.0595e-03 1.9386 0.05255 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 6.21e+09
## Residual Sum of Squares: 5184500000
## R-Squared: 0.16513
## Adj. R-Squared: 0.12119
## Chisq: 3.75814 on 1 DF, p-value: 0.052551
# Comparar la r2 ajustada de los 3 modelos y elegir el que tenga el mayor.
### los primeros dos modelos me dan igual, Adj. R-Squared: 0.21952
phtest(walhus, within)
##
## Hausman Test
##
## data: NY.GDP.PCAP.CD ~ SM.POP.NETM
## chisq = 0.14364, df = 1, p-value = 0.7047
## alternative hypothesis: one model is inconsistent
# p-value = 0.7047
# Al final por lo tanto nos quedamos con el modelo agreupado (pooled)
El entorno de negocios en el que las organizaciones se desarrollan es cada vez más dinámico por lo que las empresas enfrentan constantemente el reto de mantenerse al día y superar los nuevos retos que el ambiente presenta. La innovación es una de las mejores formas que las empresas tienen para conseguirlo. De acuerdo con el artículo “Innovation in business: What it is and why is so important” ´publicado en el Harvard Business Review la innovación presenta tres grandes ventajas para las empresas: les permite adaptarse, promueve el crecimiento y además les ayuda a diferenciarse de su competencia generando ventajas competitivas.
Se proporciona datos sobre patentes en distintas
# install.packages("readxl")
library(readxl)
#install.packages("lmtest")
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(plm)
library(tidyverse)
library(WDI)
library(wbstats)
library(gplots)
patentes <-read_excel("/Users/sebastianespi/Downloads/PATENT 3.xls")
summary(patentes)
## cusip merger employ return
## Min. : 800 Min. :0.0000 Min. : 0.085 Min. :-73.022
## 1st Qu.:368514 1st Qu.:0.0000 1st Qu.: 1.227 1st Qu.: 5.128
## Median :501116 Median :0.0000 Median : 3.842 Median : 7.585
## Mean :514536 Mean :0.0177 Mean : 18.826 Mean : 8.003
## 3rd Qu.:754688 3rd Qu.:0.0000 3rd Qu.: 15.442 3rd Qu.: 10.501
## Max. :878555 Max. :1.0000 Max. :506.531 Max. : 48.675
## NA's :21 NA's :8
## patents patentsg stckpr rnd
## Min. : 0.0 Min. : 0.00 Min. : 0.1875 Min. : 0.0000
## 1st Qu.: 1.0 1st Qu.: 1.00 1st Qu.: 7.6250 1st Qu.: 0.6847
## Median : 3.0 Median : 4.00 Median : 16.5000 Median : 2.1456
## Mean : 22.9 Mean : 27.14 Mean : 22.6270 Mean : 29.3398
## 3rd Qu.: 15.0 3rd Qu.: 19.00 3rd Qu.: 29.2500 3rd Qu.: 11.9168
## Max. :906.0 Max. :1063.00 Max. :402.0000 Max. :1719.3535
## NA's :2
## rndeflt rndstck sales sic
## Min. : 0.0000 Min. : 0.125 Min. : 1.22 Min. :2000
## 1st Qu.: 0.4788 1st Qu.: 5.152 1st Qu.: 52.99 1st Qu.:2890
## Median : 1.4764 Median : 13.353 Median : 174.06 Median :3531
## Mean : 19.7238 Mean : 163.823 Mean : 1219.60 Mean :3333
## 3rd Qu.: 8.7527 3rd Qu.: 74.563 3rd Qu.: 728.96 3rd Qu.:3661
## Max. :1000.7876 Max. :9755.352 Max. :44224.00 Max. :9997
## NA's :157 NA's :3
## year
## Min. :2012
## 1st Qu.:2014
## Median :2016
## Mean :2016
## 3rd Qu.:2019
## Max. :2021
##
str(patentes)
## tibble [2,260 × 13] (S3: tbl_df/tbl/data.frame)
## $ cusip : num [1:2260] 800 800 800 800 800 800 800 800 800 800 ...
## $ merger : num [1:2260] 0 0 0 0 0 0 0 0 0 0 ...
## $ employ : num [1:2260] 9.85 12.32 12.2 11.84 12.99 ...
## $ return : num [1:2260] 5.82 5.69 4.42 5.28 4.91 ...
## $ patents : num [1:2260] 22 34 31 32 40 60 57 77 38 5 ...
## $ patentsg: num [1:2260] 24 32 30 34 28 33 53 47 64 70 ...
## $ stckpr : num [1:2260] 47.6 57.9 33 38.5 35.1 ...
## $ rnd : num [1:2260] 2.56 3.1 3.27 3.24 3.78 ...
## $ rndeflt : num [1:2260] 2.56 2.91 2.8 2.52 2.78 ...
## $ rndstck : num [1:2260] 16.2 17.4 19.6 21.9 23.1 ...
## $ sales : num [1:2260] 344 436 535 567 631 ...
## $ sic : num [1:2260] 3740 3740 3740 3740 3740 3740 3740 3740 3740 3740 ...
## $ year : num [1:2260] 2012 2013 2014 2015 2016 ...
sum(is.na(patentes)) # NAs en la base de datos
## [1] 191
sapply(patentes, function(x) sum (is.na(x))) #NAs por variable
## cusip merger employ return patents patentsg stckpr rnd
## 0 0 21 8 0 0 2 0
## rndeflt rndstck sales sic year
## 0 157 3 0 0
patentes$employ[is.na(patentes$employ)] <- mean(patentes$employ, na.rm=TRUE)
patentes$return[is.na(patentes$return)] <- mean(patentes$return, na.rm=TRUE)
patentes$stckpr[is.na(patentes$stckpr)] <- mean(patentes$stckpr, na.rm=TRUE)
patentes$rndstck[is.na(patentes$rndstck)] <- mean(patentes$rndstck, na.rm=TRUE)
patentes$sales[is.na(patentes$sales)] <- mean(patentes$sales, na.rm=TRUE)
summary(patentes)
## cusip merger employ return
## Min. : 800 Min. :0.0000 Min. : 0.085 Min. :-73.022
## 1st Qu.:368514 1st Qu.:0.0000 1st Qu.: 1.242 1st Qu.: 5.139
## Median :501116 Median :0.0000 Median : 3.893 Median : 7.601
## Mean :514536 Mean :0.0177 Mean : 18.826 Mean : 8.003
## 3rd Qu.:754688 3rd Qu.:0.0000 3rd Qu.: 16.034 3rd Qu.: 10.473
## Max. :878555 Max. :1.0000 Max. :506.531 Max. : 48.675
## patents patentsg stckpr rnd
## Min. : 0.0 Min. : 0.00 Min. : 0.1875 Min. : 0.0000
## 1st Qu.: 1.0 1st Qu.: 1.00 1st Qu.: 7.6250 1st Qu.: 0.6847
## Median : 3.0 Median : 4.00 Median : 16.5000 Median : 2.1456
## Mean : 22.9 Mean : 27.14 Mean : 22.6270 Mean : 29.3398
## 3rd Qu.: 15.0 3rd Qu.: 19.00 3rd Qu.: 29.2500 3rd Qu.: 11.9168
## Max. :906.0 Max. :1063.00 Max. :402.0000 Max. :1719.3535
## rndeflt rndstck sales sic
## Min. : 0.0000 Min. : 0.125 Min. : 1.22 Min. :2000
## 1st Qu.: 0.4788 1st Qu.: 5.588 1st Qu.: 53.20 1st Qu.:2890
## Median : 1.4764 Median : 16.234 Median : 174.28 Median :3531
## Mean : 19.7238 Mean : 163.823 Mean : 1219.60 Mean :3333
## 3rd Qu.: 8.7527 3rd Qu.: 119.105 3rd Qu.: 743.42 3rd Qu.:3661
## Max. :1000.7876 Max. :9755.352 Max. :44224.00 Max. :9997
## year
## Min. :2012
## 1st Qu.:2014
## Median :2016
## Mean :2016
## 3rd Qu.:2019
## Max. :2021
sum(is.na(patentes)) # NA en la base de datos
## [1] 0
boxplot(patentes$cusip, horizontal=TRUE)
boxplot(patentes$merger, horizontal=TRUE)
boxplot(patentes$employ, horizontal=TRUE)
boxplot(patentes$return, horizontal=TRUE)
boxplot(patentes$patents, horizontal=TRUE)
boxplot(patentes$patentsg, horizontal=TRUE)
boxplot(patentes$stckpr, horizontal=TRUE)
boxplot(patentes$rnd, horizontal=TRUE)
boxplot(patentes$rndeflt, horizontal=TRUE)
boxplot(patentes$rndstck, horizontal=TRUE)
boxplot(patentes$sales, horizontal=TRUE)
boxplot(patentes$sic, horizontal=TRUE)
boxplot(patentes$year, horizontal=TRUE)
patentes$year <- patentes$year - 40
summary(patentes)
## cusip merger employ return
## Min. : 800 Min. :0.0000 Min. : 0.085 Min. :-73.022
## 1st Qu.:368514 1st Qu.:0.0000 1st Qu.: 1.242 1st Qu.: 5.139
## Median :501116 Median :0.0000 Median : 3.893 Median : 7.601
## Mean :514536 Mean :0.0177 Mean : 18.826 Mean : 8.003
## 3rd Qu.:754688 3rd Qu.:0.0000 3rd Qu.: 16.034 3rd Qu.: 10.473
## Max. :878555 Max. :1.0000 Max. :506.531 Max. : 48.675
## patents patentsg stckpr rnd
## Min. : 0.0 Min. : 0.00 Min. : 0.1875 Min. : 0.0000
## 1st Qu.: 1.0 1st Qu.: 1.00 1st Qu.: 7.6250 1st Qu.: 0.6847
## Median : 3.0 Median : 4.00 Median : 16.5000 Median : 2.1456
## Mean : 22.9 Mean : 27.14 Mean : 22.6270 Mean : 29.3398
## 3rd Qu.: 15.0 3rd Qu.: 19.00 3rd Qu.: 29.2500 3rd Qu.: 11.9168
## Max. :906.0 Max. :1063.00 Max. :402.0000 Max. :1719.3535
## rndeflt rndstck sales sic
## Min. : 0.0000 Min. : 0.125 Min. : 1.22 Min. :2000
## 1st Qu.: 0.4788 1st Qu.: 5.588 1st Qu.: 53.20 1st Qu.:2890
## Median : 1.4764 Median : 16.234 Median : 174.28 Median :3531
## Mean : 19.7238 Mean : 163.823 Mean : 1219.60 Mean :3333
## 3rd Qu.: 8.7527 3rd Qu.: 119.105 3rd Qu.: 743.42 3rd Qu.:3661
## Max. :1000.7876 Max. :9755.352 Max. :44224.00 Max. :9997
## year
## Min. :1972
## 1st Qu.:1974
## Median :1976
## Mean :1976
## 3rd Qu.:1979
## Max. :1981
#Generar conjunto de datos
panel_patente <-pdata.frame(patentes, index = c('cusip', "year"))
plotmeans(patents ~ cusip, main= "Prueba de heterogenidad entre empresas para sus patentes", data= panel_patente)
#Como la linea sale quebrada, sube y baja, hay mucha Heterogenidad, por lo que hay que ajustar
# Modelo 1. Regresion agrupada (Pooled) se utliza si la linea nos salio quebrada
pooled_patente <- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data= panel_patente, model="pooling" )
summary(pooled_patente)
## Pooling Model
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_patente, model = "pooling")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -320.36212 -10.01555 0.94472 7.40861 433.86316
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) -4.1831e-01 5.2757e+00 -0.0793 0.93681
## merger -1.1612e+01 7.2433e+00 -1.6031 0.10905
## employ 1.3683e+00 4.1969e-02 32.6040 < 2.2e-16 ***
## return -4.3505e-03 1.8155e-01 -0.0240 0.98088
## stckpr 6.5137e-01 4.3139e-02 15.0994 < 2.2e-16 ***
## rnd -1.3853e-01 1.6106e-02 -8.6007 < 2.2e-16 ***
## sales -3.2049e-03 4.6962e-04 -6.8246 1.13e-11 ***
## sic -2.6894e-03 1.4820e-03 -1.8146 0.06972 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 10998000
## Residual Sum of Squares: 4600300
## R-Squared: 0.58173
## Adj. R-Squared: 0.58043
## F-statistic: 447.437 on 7 and 2252 DF, p-value: < 2.22e-16
# Modelo 2. Efectos fijos (within)
## Cuando llas diferencias no observadas son constantes en el tiempo
within_patentes<- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data= panel_patente, model="within" )
summary(within_patentes)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_patente, model = "within")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -497.22898 -1.64569 -0.19669 1.64341 184.49423
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## merger 3.30904770 4.16313684 0.7948 0.42680
## employ 0.11963128 0.07052503 1.6963 0.08998 .
## return -0.07056694 0.10867769 -0.6493 0.51620
## stckpr -0.01107952 0.03242512 -0.3417 0.73262
## rnd -0.19889614 0.01443066 -13.7829 < 2.2e-16 ***
## sales -0.00309052 0.00041525 -7.4426 1.451e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1091400
## Residual Sum of Squares: 819280
## R-Squared: 0.24935
## Adj. R-Squared: 0.16385
## F-statistic: 112.278 on 6 and 2028 DF, p-value: < 2.22e-16
# Prueba F
pFtest(within_patentes, pooled_patente)
##
## F test for individual effects
##
## data: patents ~ merger + employ + return + stckpr + rnd + sales + sic
## F = 41.782, df1 = 224, df2 = 2028, p-value < 2.2e-16
## alternative hypothesis: significant effects
# si el P value es menos a 0.05 se prefiere el modelo de efectos fijos
# Modelo 3. Efectos Aleatorios
# Cuando las diferencias no observadas son aleatorias
## Metodo Walhus
walhus_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data= panel_patente, model="random", random.method ="walhus" )
summary(walhus_patentes)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_patente, model = "random", random.method = "walhus")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Effects:
## var std.dev share
## idiosyncratic 555.26 23.56 0.273
## individual 1480.26 38.47 0.727
## theta: 0.8099
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -433.72438 -3.89667 -1.76198 0.78484 211.91016
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 11.84397257 12.78087032 0.9267 0.3541
## merger 4.47647107 4.51685216 0.9911 0.3217
## employ 1.10525428 0.04853786 22.7710 < 2.2e-16 ***
## return -0.12920955 0.11762230 -1.0985 0.2720
## stckpr 0.17097726 0.03355374 5.0956 3.476e-07 ***
## rnd -0.14575073 0.01469317 -9.9196 < 2.2e-16 ***
## sales -0.00393738 0.00042854 -9.1880 < 2.2e-16 ***
## sic -0.00107515 0.00376075 -0.2859 0.7750
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1449600
## Residual Sum of Squares: 1098300
## R-Squared: 0.24236
## Adj. R-Squared: 0.24
## Chisq: 720.388 on 7 DF, p-value: < 2.22e-16
## Metodo amemiya
amemiya_patentes<- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data= panel_patente, model="random", random.method ="amemiya" )
summary(amemiya_patentes)
## Oneway (individual) effect Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_patente, model = "random", random.method = "amemiya")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Effects:
## var std.dev share
## idiosyncratic 402.79 20.07 0.051
## individual 7483.44 86.51 0.949
## theta: 0.9268
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -454.59697 -2.99704 -1.65272 0.59741 193.17353
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 8.58107091 29.77947247 0.2882 0.7732
## merger 3.91351453 4.11354681 0.9514 0.3414
## employ 0.49060426 0.06153621 7.9726 1.554e-15 ***
## return -0.09427795 0.10733800 -0.8783 0.3798
## stckpr 0.04660332 0.03163610 1.4731 0.1407
## rnd -0.17995961 0.01406835 -12.7918 < 2.2e-16 ***
## sales -0.00342554 0.00040647 -8.4275 < 2.2e-16 ***
## sic 0.00425278 0.00877425 0.4847 0.6279
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1144500
## Residual Sum of Squares: 891720
## R-Squared: 0.22085
## Adj. R-Squared: 0.21842
## Chisq: 638.312 on 7 DF, p-value: < 2.22e-16
## Metodo nerlove
nerlove_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data= panel_patente, model="random", random.method = "nerlove")
summary(nerlove_patentes)
## Oneway (individual) effect Random Effect Model
## (Nerlove's transformation)
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_patente, model = "random", random.method = "nerlove")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Effects:
## var std.dev share
## idiosyncratic 362.51 19.04 0.046
## individual 7557.16 86.93 0.954
## theta: 0.9309
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -455.94828 -2.93752 -1.60035 0.62863 192.36375
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 8.38498937 31.41700295 0.2669 0.7896
## merger 3.86675065 4.09938561 0.9433 0.3456
## employ 0.46018862 0.06203371 7.4184 1.186e-13 ***
## return -0.09236163 0.10697310 -0.8634 0.3879
## stckpr 0.04167663 0.03156299 1.3204 0.1867
## rnd -0.18153379 0.01403810 -12.9315 < 2.2e-16 ***
## sales -0.00339833 0.00040545 -8.3816 < 2.2e-16 ***
## sic 0.00451640 0.00925634 0.4879 0.6256
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1138700
## Residual Sum of Squares: 885220
## R-Squared: 0.22262
## Adj. R-Squared: 0.22021
## Chisq: 644.925 on 7 DF, p-value: < 2.22e-16
# Comparar la r2 ajustada de los 3 modelos y elegir el que tenga el mayor.
phtest(walhus_patentes, within_patentes)
##
## Hausman Test
##
## data: patents ~ merger + employ + return + stckpr + rnd + sales + sic
## chisq = 352.48, df = 6, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
# Si el P-VALUE es < 0.05, usamos efectos fijos
# Al final por lo tanto nos quedamos con el modelo de efectos fijos (within)
# Prueba de Heteroestacidad
bptest(within_patentes)
##
## studentized Breusch-Pagan test
##
## data: within_patentes
## BP = 1447.6, df = 7, p-value < 2.2e-16
# si es el p-value < 0.05, hay heterocedasticidad en los residuos (problema dectectado)
# Prueba de Autocorrelacion Serial
pwartest(within_patentes)
##
## Wooldridge's test for serial correlation in FE panels
##
## data: within_patentes
## F = 104.29, df1 = 1, df2 = 2032, p-value < 2.2e-16
## alternative hypothesis: serial correlation
# Si el p value < 0.05 , hay autocorrelacion serial en los errores (problema detectado)
# Modelo de correcion con errroes Estandar robustos
coeficientes_corregidos <-coeftest(within_patentes, vcov=vcovHC(within_patentes, type= "HC0"))
solo_coeficientes <- coeficientes_corregidos[,1]
datos_de_prueba <- data.frame(merger = 0, employ =10, return =6, stckpr = 48, rnd=3, sales=344)
prediccion <-sum(solo_coeficientes*datos_de_prueba)
prediccion
## [1] -1.418735
Este ejercicio muestra cómo, mediante un análisis de datos en panel, es posible identificar la heterogeneidad entre empresas, seleccionar el modelo econométrico más adecuado (efectos fijos), corregir problemas como heterocedasticidad y autocorrelación, y generar pronósticos más precisos para variables clave como las patentes, apoyando la toma de decisiones basadas en evidencia.
# obtener informacion de varios paises
gdp2 <-wb_data(country=c("FR","DE","PL"), indicator=c('NY.GDP.PCAP.CD',"AG.PRD.CROP.XD"), start_date=1950, end_date=2010)
view(gdp2)
# Generar conjunto de datos de panel
panel_2 <-select(gdp2,country, date, AG.PRD.CROP.XD, NY.GDP.PCAP.CD)
panel_2 <-subset(panel_2, date == 1960 | date==1970 | date==1980 | date==1990 | date== 2000 | date==2010)
panel_2 <-pdata.frame(panel_2, index = c('country', "date"))
plotmeans(AG.PRD.CROP.XD ~ country, main= "Prueba de heterogenidad entre paises para la producción de alimentos ", data= panel_2)
plotmeans(NY.GDP.PCAP.CD ~ country, main= "Prueba de heterogenidad entre paises para GDP per capita", data= panel_2)
#Ambas graficas nos slaieron quebrados, por ende podemos proseguir con el siguente paso.
# Si estan quebradas las dos graficas por lo cual se tendran que realizar las
# Modelo 1. Regresion agrupada (Pooled) se utliza si la linea nos salio quebrada
## Asume que no hay heteogenidad obervada (linea recta hroizaontal)
pooled1 <- plm(AG.PRD.CROP.XD ~ NY.GDP.PCAP.CD, data= panel_2, model="pooling" )
summary(pooled1)
## Pooling Model
##
## Call:
## plm(formula = AG.PRD.CROP.XD ~ NY.GDP.PCAP.CD, data = panel_2,
## model = "pooling")
##
## Unbalanced Panel: n = 3, T = 3-5, N = 13
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -14.9546 -7.9547 -2.2826 4.9992 31.0521
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 9.7223e+01 5.7289e+00 16.9706 3.088e-09 ***
## NY.GDP.PCAP.CD -7.7812e-05 2.6674e-04 -0.2917 0.7759
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1725.9
## Residual Sum of Squares: 1712.6
## R-Squared: 0.0076768
## Adj. R-Squared: -0.082534
## F-statistic: 0.0850985 on 1 and 11 DF, p-value: 0.77593
# Modelo 2. Efectos fijos (within)
## Cuando llas diferencias no observadas son constantes en el tiempo
within1 <- plm(AG.PRD.CROP.XD ~ NY.GDP.PCAP.CD, data= panel_2, model="within")
summary(within1)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = AG.PRD.CROP.XD ~ NY.GDP.PCAP.CD, data = panel_2,
## model = "within")
##
## Unbalanced Panel: n = 3, T = 3-5, N = 13
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -18.7930 -3.9563 -1.2098 3.2086 22.7493
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## NY.GDP.PCAP.CD 0.00015617 0.00028046 0.5568 0.5912
##
## Total Sum of Squares: 1265.7
## Residual Sum of Squares: 1223.6
## R-Squared: 0.033305
## Adj. R-Squared: -0.28893
## F-statistic: 0.310068 on 1 and 9 DF, p-value: 0.59121
# Prueba F
pFtest(within1, pooled1)
##
## F test for individual effects
##
## data: AG.PRD.CROP.XD ~ NY.GDP.PCAP.CD
## F = 1.7985, df1 = 2, df2 = 9, p-value = 0.2202
## alternative hypothesis: significant effects
# si el P value es menos a 0.05 se prefiere el modelo de efectos fijos
## p-value = 0.2202
# Modelo 3. Efectos Aleatorios
# Cuando las diferencias no observadas son aleatorias
## Metodo Walhus
walhus1 <- plm(AG.PRD.CROP.XD ~ NY.GDP.PCAP.CD, data= panel_2, model="random", random.method = "walhus")
summary(walhus1)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = AG.PRD.CROP.XD ~ NY.GDP.PCAP.CD, data = panel_2,
## model = "random", random.method = "walhus")
##
## Unbalanced Panel: n = 3, T = 3-5, N = 13
##
## Effects:
## var std.dev share
## idiosyncratic 141.902 11.912 0.879
## individual 19.618 4.429 0.121
## theta:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1593 0.2311 0.2311 0.2145 0.2311 0.2311
##
## Residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -13.739 -6.434 -3.017 -0.174 3.844 30.077
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 9.6552e+01 6.1002e+00 15.8276 <2e-16 ***
## NY.GDP.PCAP.CD -1.3275e-05 2.6472e-04 -0.0501 0.96
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 2030.5
## Residual Sum of Squares: 1572
## R-Squared: 0.3514
## Adj. R-Squared: 0.29243
## Chisq: 0.00251477 on 1 DF, p-value: 0.96
## Metodo amemiya
amemiya1 <- plm(AG.PRD.CROP.XD ~ NY.GDP.PCAP.CD, data= panel_2, model="random", random.method = "amemiya")
summary(amemiya1)
## Oneway (individual) effect Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = AG.PRD.CROP.XD ~ NY.GDP.PCAP.CD, data = panel_2,
## model = "random", random.method = "amemiya")
##
## Unbalanced Panel: n = 3, T = 3-5, N = 13
##
## Effects:
## var std.dev share
## idiosyncratic 135.955 11.660 0.793
## individual 35.544 5.962 0.207
## theta:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.2514 0.3416 0.3416 0.3208 0.3416 0.3416
##
## Residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -13.044 -5.425 -3.332 -0.231 3.117 29.443
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 9.6207e+01 6.4342e+00 14.9524 <2e-16 ***
## NY.GDP.PCAP.CD 2.0118e-05 2.6326e-04 0.0764 0.9391
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 2063.1
## Residual Sum of Squares: 1501.6
## R-Squared: 0.38487
## Adj. R-Squared: 0.32895
## Chisq: 0.00584005 on 1 DF, p-value: 0.93908
## Metodo nerlove
nerlove1 <- plm(AG.PRD.CROP.XD ~ NY.GDP.PCAP.CD, data= panel_2, model="random", random.method = "nerlove")
summary(nerlove1)
## Oneway (individual) effect Random Effect Model
## (Nerlove's transformation)
##
## Call:
## plm(formula = AG.PRD.CROP.XD ~ NY.GDP.PCAP.CD, data = panel_2,
## model = "random", random.method = "nerlove")
##
## Unbalanced Panel: n = 3, T = 3-5, N = 13
##
## Effects:
## var std.dev share
## idiosyncratic 94.123 9.702 0.573
## individual 70.251 8.382 0.427
## theta:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.4444 0.5403 0.5403 0.5182 0.5403 0.5403
##
## Residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -12.750 -4.351 -3.428 -0.268 2.392 27.962
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 9.5594e+01 7.6603e+00 12.4791 <2e-16 ***
## NY.GDP.PCAP.CD 7.9534e-05 2.5983e-04 0.3061 0.7595
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1888.6
## Residual Sum of Squares: 1378.8
## R-Squared: 0.34578
## Adj. R-Squared: 0.28631
## Chisq: 0.093698 on 1 DF, p-value: 0.75953
# Comparar la r2 ajustada de los 3 modelos y elegir el que tenga el mayor.
###
phtest(walhus1, within1)
##
## Hausman Test
##
## data: AG.PRD.CROP.XD ~ NY.GDP.PCAP.CD
## chisq = 3.3462, df = 1, p-value = 0.06736
## alternative hypothesis: one model is inconsistent
# p-value = 0.06736
# Al final por lo tanto nos quedamos con el modelo agreupado (pooled)