Reflexión sobre Negocios Si una empresa lo esta haciendo bien siempre debe buscar mejorarse a si misma y buscar ser los mejores, pero deben ser siempre cautelosos con estos cambios
#install.packages("WDI")
library(WDI)
#install.packages("wbstats")
library(wbstats)
#install.packages("tidyverse")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages("plm") # Paquete para realizar modelos lineales para datos de panel
library(plm)
##
## Attaching package: 'plm'
##
## The following objects are masked from 'package:dplyr':
##
## between, lag, lead
#install.packages("gplots")
library(gplots)
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
#install.packages("readxl")
library(readxl)
#install.packages("lmtest")
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
#obtener informacion del varios paises
gdp <- wb_data(country = c("MX", "US", "CA"), indicator = c ("NY.GDP.PCAP.CD", "SM.POP.NETM"),
start_date = 1950, end_date = 2025)
#generar conjunto de datos de panel
panel_1 <- select(gdp, country, date, NY.GDP.PCAP.CD, SM.POP.NETM)
panel_1 <- subset(panel_1, date == 1960 | date == 1970 | date == 1980 | date == 1990 | date == 2000 |
date == 2010 | date == 2020 )
panel_1 <- pdata.frame(panel_1, index = c("country","date"))
plotmeans(NY.GDP.PCAP.CD ~ country, main = "Prueba de Heterogeneidad entre paises para el PIB", data = panel_1)
" Si la linea sale casi horizontal, hay poca o nula Hetogeneidad, por lo que no hay diferencias sistematicas que ajustar"
## [1] " Si la linea sale casi horizontal, hay poca o nula Hetogeneidad, por lo que no hay diferencias sistematicas que ajustar"
"Si la linea sale quebrada, sube y baja, hay mucha heterogeneidad, por lo que hay que ajustar. "
## [1] "Si la linea sale quebrada, sube y baja, hay mucha heterogeneidad, por lo que hay que ajustar. "
plotmeans(SM.POP.NETM ~ country, main = "Prueba de Heterogeneidad entre paises para la migracion neta", data = panel_1)
#Modelo 1. Regresion agrupada (pooled) Asume que no hay heterogeniadad
pooled <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "pooling" ) #+Si hay mas variables se separan con simbolo de +
summary(pooled)
## Pooling Model
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "pooling")
##
## Balanced Panel: n = 3, T = 7, N = 21
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -21506.0 -10924.8 -3728.9 5274.5 45389.3
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 1.2873e+04 4.2134e+03 3.0553 0.006511 **
## SM.POP.NETM 1.8616e-02 7.2324e-03 2.5740 0.018588 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 7259500000
## Residual Sum of Squares: 5382600000
## R-Squared: 0.25855
## Adj. R-Squared: 0.21952
## F-statistic: 6.62533 on 1 and 19 DF, p-value: 0.018588
#Modelo 2. Efectos fijos (within) Cuando las diferencias no observadas son constantes en el tiempo
within <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "within" )
summary(within)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "within")
##
## Balanced Panel: n = 3, T = 7, N = 21
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -20886.56 -9903.27 -403.03 3407.39 44059.72
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## SM.POP.NETM 0.013921 0.014345 0.9705 0.3454
##
## Total Sum of Squares: 5256100000
## Residual Sum of Squares: 4980200000
## R-Squared: 0.052492
## Adj. R-Squared: -0.11471
## F-statistic: 0.94181 on 1 and 17 DF, p-value: 0.34542
#Prueba
pFtest(within, pooled)
##
## F test for individual effects
##
## data: NY.GDP.PCAP.CD ~ SM.POP.NETM
## F = 0.68685, df1 = 2, df2 = 17, p-value = 0.5166
## alternative hypothesis: significant effects
" Si el P value es <0.05 se prefiere el modelo de efectos fijos"
## [1] " Si el P value es <0.05 se prefiere el modelo de efectos fijos"
#Modelo 3. Efectos aleatorios - Metodo walhus. Cuando las diferencias no obsevadas son aleatorias
walhus <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "random", random.method = "walhus" )
summary(walhus)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "random",
## random.method = "walhus")
##
## Balanced Panel: n = 3, T = 7, N = 21
##
## Effects:
## var std.dev share
## idiosyncratic 278418900 16686 1
## individual 0 0 0
## theta: 0
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -21506.0 -10924.8 -3728.9 5274.5 45389.3
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.2873e+04 4.2134e+03 3.0553 0.002248 **
## SM.POP.NETM 1.8616e-02 7.2324e-03 2.5740 0.010054 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 7259500000
## Residual Sum of Squares: 5382600000
## R-Squared: 0.25855
## Adj. R-Squared: 0.21952
## Chisq: 6.62533 on 1 DF, p-value: 0.010054
#Modelo 4. Efectos aleatorios - Metodo amemiya.
amemiya <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "random", random.method = "amemiya" )
summary(amemiya)
## Oneway (individual) effect Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "random",
## random.method = "amemiya")
##
## Balanced Panel: n = 3, T = 7, N = 21
##
## Effects:
## var std.dev share
## idiosyncratic 276675480 16634 1
## individual 0 0 0
## theta: 0
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -21506.0 -10924.8 -3728.9 5274.5 45389.3
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.2873e+04 4.2134e+03 3.0553 0.002248 **
## SM.POP.NETM 1.8616e-02 7.2324e-03 2.5740 0.010054 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 7259500000
## Residual Sum of Squares: 5382600000
## R-Squared: 0.25855
## Adj. R-Squared: 0.21952
## Chisq: 6.62533 on 1 DF, p-value: 0.010054
#Modelo 5. Efectos aleatorios - Metodo nerlove.
nerlove <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "random", random.method = "nerlove" )
summary(nerlove)
## Oneway (individual) effect Random Effect Model
## (Nerlove's transformation)
##
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "random",
## random.method = "nerlove")
##
## Balanced Panel: n = 3, T = 7, N = 21
##
## Effects:
## var std.dev share
## idiosyncratic 237150411 15400 0.864
## individual 37271843 6105 0.136
## theta: 0.31
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -20850.0 -9773.4 -2826.2 3450.7 45608.0
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.3174e+04 5.8290e+03 2.2601 0.02382 *
## SM.POP.NETM 1.7563e-02 9.0595e-03 1.9386 0.05255 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 6.21e+09
## Residual Sum of Squares: 5184500000
## R-Squared: 0.16513
## Adj. R-Squared: 0.12119
## Chisq: 3.75814 on 1 DF, p-value: 0.052551
# Comparar la r2 ajustada de los 3 metodos y elegir el que tenga el mayor.
phtest(walhus, within)
##
## Hausman Test
##
## data: NY.GDP.PCAP.CD ~ SM.POP.NETM
## chisq = 0.14364, df = 1, p-value = 0.7047
## alternative hypothesis: one model is inconsistent
#Si el p-value es <0.05, usamos efectos fijos (within)
#Por lo tanto nos quedamos con el modelo agrupado (pooled)
# obtener información de varios países
unemployment <- wb_data(country = c("BR", "DE", "JP"),
indicator = c("SL.UEM.TOTL.ZS","SL.TLF.ACTI.ZS","NY.GDP.PCAP.CD"),
start_date = 1950, end_date = 2025)
# Generar conjunto de datos de panel con las variables necesarias
panel_2 <- select(unemployment, country, date, SL.UEM.TOTL.ZS, NY.GDP.PCAP.CD)
# Filtrar solo ciertos años
panel_2 <- subset(panel_2, date == 1960 | date == 1970 | date == 1980 |
date == 1990 | date == 2000 | date == 2010 | date == 2020)
# Convertir a pdata.frame
panel_2 <- pdata.frame(panel_2, index = c("country", "date"))
# Ejercicio 1. generar Prueba de Heterogeneidad para ejercicio 1
plotmeans(SL.UEM.TOTL.ZS ~ country, main = "Prueba de Desempleo Total (% de la fuerza laboral)", data = panel_2)
plotmeans(NY.GDP.PCAP.CD ~ country, main = "Prueba de PIB per cápita (US$)", data = panel_2)
#Modelo 1. Regresion agrupada (pooled) Asume que no hay heterogeniadad
pooled1 <- plm(SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2, model = "pooling" )
summary(pooled1)
## Pooling Model
##
## Call:
## plm(formula = SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2,
## model = "pooling")
##
## Balanced Panel: n = 3, T = 3, N = 9
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -2.40313 -0.69640 -0.12529 0.75800 2.69294
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 1.2247e+01 1.2149e+00 10.0807 2.029e-05 ***
## NY.GDP.PCAP.CD -1.7575e-04 3.6505e-05 -4.8145 0.001934 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 98.553
## Residual Sum of Squares: 22.859
## R-Squared: 0.76806
## Adj. R-Squared: 0.73492
## F-statistic: 23.1798 on 1 and 7 DF, p-value: 0.0019336
#Modelo 2. Efectos fijos (within) Cuando las diferencias no observadas son constantes en el tiempo
within1 <- plm(SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2, model = "within" )
summary(within1)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2,
## model = "within")
##
## Balanced Panel: n = 3, T = 3, N = 9
##
## Residuals:
## Brazil-2000 Brazil-2010 Brazil-2020 Germany-2000 Germany-2010 Germany-2020
## -0.61876 -2.02843 2.64719 -0.29882 1.38636 -1.08755
## Japan-2000 Japan-2010 Japan-2020
## 0.22105 1.37791 -1.59897
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## NY.GDP.PCAP.CD -0.00013845 0.00010410 -1.33 0.241
##
## Total Sum of Squares: 25.997
## Residual Sum of Squares: 19.203
## R-Squared: 0.26133
## Adj. R-Squared: -0.18188
## F-statistic: 1.76888 on 1 and 5 DF, p-value: 0.24096
#Prueba
pFtest(within1, pooled1)
##
## F test for individual effects
##
## data: SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD
## F = 0.47588, df1 = 2, df2 = 5, p-value = 0.6469
## alternative hypothesis: significant effects
" Como el P value es <0.05 se prefiere el modelo de efectos fijos"
## [1] " Como el P value es <0.05 se prefiere el modelo de efectos fijos"
#Modelo 3. Efectos aleatorios - Metodo walhus. Cuando las diferencias no obsevadas son aleatorias
walhus1 <- plm(SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2, model = "random", random.method = "walhus" )
summary(walhus1)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2,
## model = "random", random.method = "walhus")
##
## Balanced Panel: n = 3, T = 3, N = 9
##
## Effects:
## var std.dev share
## idiosyncratic 3.283 1.812 1
## individual 0.000 0.000 0
## theta: 0
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -2.40313 -0.69640 -0.12529 0.75800 2.69294
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.2247e+01 1.2149e+00 10.0807 < 2.2e-16 ***
## NY.GDP.PCAP.CD -1.7575e-04 3.6505e-05 -4.8145 1.475e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 98.553
## Residual Sum of Squares: 22.859
## R-Squared: 0.76806
## Adj. R-Squared: 0.73492
## Chisq: 23.1798 on 1 DF, p-value: 1.4754e-06
#Modelo 4. Efectos aleatorios - Metodo amemiya.
amemiya1 <- plm(SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2, model = "random", random.method = "amemiya" )
summary(amemiya1)
## Oneway (individual) effect Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2,
## model = "random", random.method = "amemiya")
##
## Balanced Panel: n = 3, T = 3, N = 9
##
## Effects:
## var std.dev share
## idiosyncratic 3.201 1.789 1
## individual 0.000 0.000 0
## theta: 0
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -2.40313 -0.69640 -0.12529 0.75800 2.69294
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.2247e+01 1.2149e+00 10.0807 < 2.2e-16 ***
## NY.GDP.PCAP.CD -1.7575e-04 3.6505e-05 -4.8145 1.475e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 98.553
## Residual Sum of Squares: 22.859
## R-Squared: 0.76806
## Adj. R-Squared: 0.73492
## Chisq: 23.1798 on 1 DF, p-value: 1.4754e-06
#Modelo 5. Efectos aleatorios - Metodo nerlove.
nerlove1 <- plm(SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2, model = "random", random.method = "nerlove" )
summary(nerlove1)
## Oneway (individual) effect Random Effect Model
## (Nerlove's transformation)
##
## Call:
## plm(formula = SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2,
## model = "random", random.method = "nerlove")
##
## Balanced Panel: n = 3, T = 3, N = 9
##
## Effects:
## var std.dev share
## idiosyncratic 2.134 1.461 0.644
## individual 1.177 1.085 0.356
## theta: 0.3863
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -2.15745 -0.59767 -0.36330 0.96808 2.76783
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.2039e+01 1.7468e+00 6.8922 5.493e-12 ***
## NY.GDP.PCAP.CD -1.6855e-04 5.1005e-05 -3.3045 0.0009513 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 53.319
## Residual Sum of Squares: 20.828
## R-Squared: 0.60937
## Adj. R-Squared: 0.55357
## Chisq: 10.92 on 1 DF, p-value: 0.00095134
# Comparar la r2 ajustada de los 3 metodos y elegir el que tenga el mayor.
phtest(walhus1, within1)
##
## Hausman Test
##
## data: SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD
## chisq = 0.1464, df = 1, p-value = 0.702
## alternative hypothesis: one model is inconsistent
#Si el p-value es <0.05, usamos efectos fijos (within)
#Por lo tanto nos quedamos con el modelo agrupado (pooled)
El entorno de negocios en el que las organizaciones se desarrollan es cada vez más dinámico por lo que las empresas enfrentan constantemente el reto de mantenerse al día y superar los nuevos retos que el ambiente presenta. La innovación es una de las mejores formas que las empresas tienen para conseguirlo. De acuerdo con el artículo “Innovation in business: What it is and why is so important” ´publicado en el Harvard Business Review la innovación presenta tres grandes ventajas para las empresas: les permite adaptarse, promueve el crecimiento y además les ayuda a diferenciarse de su competencia generando ventajas competitivas.
patentes <- read_excel("/Users/sebastianfajardo/Downloads/PATENT 3.xls")
## Entender la base de datos
summary(patentes)
## cusip merger employ return
## Min. : 800 Min. :0.0000 Min. : 0.085 Min. :-73.022
## 1st Qu.:368514 1st Qu.:0.0000 1st Qu.: 1.227 1st Qu.: 5.128
## Median :501116 Median :0.0000 Median : 3.842 Median : 7.585
## Mean :514536 Mean :0.0177 Mean : 18.826 Mean : 8.003
## 3rd Qu.:754688 3rd Qu.:0.0000 3rd Qu.: 15.442 3rd Qu.: 10.501
## Max. :878555 Max. :1.0000 Max. :506.531 Max. : 48.675
## NA's :21 NA's :8
## patents patentsg stckpr rnd
## Min. : 0.0 Min. : 0.00 Min. : 0.1875 Min. : 0.0000
## 1st Qu.: 1.0 1st Qu.: 1.00 1st Qu.: 7.6250 1st Qu.: 0.6847
## Median : 3.0 Median : 4.00 Median : 16.5000 Median : 2.1456
## Mean : 22.9 Mean : 27.14 Mean : 22.6270 Mean : 29.3398
## 3rd Qu.: 15.0 3rd Qu.: 19.00 3rd Qu.: 29.2500 3rd Qu.: 11.9168
## Max. :906.0 Max. :1063.00 Max. :402.0000 Max. :1719.3535
## NA's :2
## rndeflt rndstck sales sic
## Min. : 0.0000 Min. : 0.1253 Min. : 1.222 Min. :2000
## 1st Qu.: 0.4788 1st Qu.: 5.1520 1st Qu.: 52.995 1st Qu.:2890
## Median : 1.4764 Median : 13.3532 Median : 174.065 Median :3531
## Mean : 19.7238 Mean : 163.8234 Mean : 1219.601 Mean :3333
## 3rd Qu.: 8.7527 3rd Qu.: 74.5625 3rd Qu.: 728.964 3rd Qu.:3661
## Max. :1000.7876 Max. :9755.3516 Max. :44224.000 Max. :9997
## NA's :157 NA's :3
## year
## Min. :2012
## 1st Qu.:2014
## Median :2016
## Mean :2016
## 3rd Qu.:2019
## Max. :2021
##
str(patentes) #Na en la base de datos
## tibble [2,260 × 13] (S3: tbl_df/tbl/data.frame)
## $ cusip : num [1:2260] 800 800 800 800 800 800 800 800 800 800 ...
## $ merger : num [1:2260] 0 0 0 0 0 0 0 0 0 0 ...
## $ employ : num [1:2260] 9.85 12.32 12.2 11.84 12.99 ...
## $ return : num [1:2260] 5.82 5.69 4.42 5.28 4.91 ...
## $ patents : num [1:2260] 22 34 31 32 40 60 57 77 38 5 ...
## $ patentsg: num [1:2260] 24 32 30 34 28 33 53 47 64 70 ...
## $ stckpr : num [1:2260] 47.6 57.9 33 38.5 35.1 ...
## $ rnd : num [1:2260] 2.56 3.1 3.27 3.24 3.78 ...
## $ rndeflt : num [1:2260] 2.56 2.91 2.8 2.52 2.78 ...
## $ rndstck : num [1:2260] 16.2 17.4 19.6 21.9 23.1 ...
## $ sales : num [1:2260] 344 436 535 567 631 ...
## $ sic : num [1:2260] 3740 3740 3740 3740 3740 3740 3740 3740 3740 3740 ...
## $ year : num [1:2260] 2012 2013 2014 2015 2016 ...
sapply(patentes, function(x) sum(is.na(x))) #NA por variable
## cusip merger employ return patents patentsg stckpr rnd
## 0 0 21 8 0 0 2 0
## rndeflt rndstck sales sic year
## 0 157 3 0 0
patentes$employ[is.na(patentes$employ)] <- mean(patentes$employ, na.rm = TRUE)
patentes$return[is.na(patentes$return)] <- mean(patentes$return, na.rm = TRUE)
patentes$stckpr[is.na(patentes$stckpr)] <- mean(patentes$stckpr, na.rm = TRUE)
patentes$rndstck[is.na(patentes$rndstck)] <- mean(patentes$rndstck, na.rm = TRUE)
patentes$sales[is.na(patentes$sales)] <- mean(patentes$sales, na.rm = TRUE)
summary(patentes)
## cusip merger employ return
## Min. : 800 Min. :0.0000 Min. : 0.085 Min. :-73.022
## 1st Qu.:368514 1st Qu.:0.0000 1st Qu.: 1.242 1st Qu.: 5.139
## Median :501116 Median :0.0000 Median : 3.893 Median : 7.601
## Mean :514536 Mean :0.0177 Mean : 18.826 Mean : 8.003
## 3rd Qu.:754688 3rd Qu.:0.0000 3rd Qu.: 16.034 3rd Qu.: 10.473
## Max. :878555 Max. :1.0000 Max. :506.531 Max. : 48.675
## patents patentsg stckpr rnd
## Min. : 0.0 Min. : 0.00 Min. : 0.1875 Min. : 0.0000
## 1st Qu.: 1.0 1st Qu.: 1.00 1st Qu.: 7.6250 1st Qu.: 0.6847
## Median : 3.0 Median : 4.00 Median : 16.5000 Median : 2.1456
## Mean : 22.9 Mean : 27.14 Mean : 22.6270 Mean : 29.3398
## 3rd Qu.: 15.0 3rd Qu.: 19.00 3rd Qu.: 29.2500 3rd Qu.: 11.9168
## Max. :906.0 Max. :1063.00 Max. :402.0000 Max. :1719.3535
## rndeflt rndstck sales sic
## Min. : 0.0000 Min. : 0.1253 Min. : 1.222 Min. :2000
## 1st Qu.: 0.4788 1st Qu.: 5.5882 1st Qu.: 53.204 1st Qu.:2890
## Median : 1.4764 Median : 16.2341 Median : 174.283 Median :3531
## Mean : 19.7238 Mean : 163.8234 Mean : 1219.601 Mean :3333
## 3rd Qu.: 8.7527 3rd Qu.: 119.1048 3rd Qu.: 743.422 3rd Qu.:3661
## Max. :1000.7876 Max. :9755.3516 Max. :44224.000 Max. :9997
## year
## Min. :2012
## 1st Qu.:2014
## Median :2016
## Mean :2016
## 3rd Qu.:2019
## Max. :2021
sum(is.na(patentes))#Na en la base de datos
## [1] 0
boxplot(patentes$cusip, horizontal = TRUE)
boxplot(patentes$merger, horizontal = TRUE)
boxplot(patentes$employ, horizontal = TRUE)
boxplot(patentes$return, horizontal = TRUE)
boxplot(patentes$patents, horizontal = TRUE)
boxplot(patentes$patentsg, horizontal = TRUE)
boxplot(patentes$stckpr, horizontal = TRUE)
boxplot(patentes$rnd, horizontal = TRUE)
boxplot(patentes$rndeflt, horizontal = TRUE)
boxplot(patentes$rndstck, horizontal = TRUE)
boxplot(patentes$sales, horizontal = TRUE)
boxplot(patentes$sic, horizontal = TRUE)
boxplot(patentes$year, horizontal = TRUE)
patentes$year <- patentes$year -40
summary(patentes)
## cusip merger employ return
## Min. : 800 Min. :0.0000 Min. : 0.085 Min. :-73.022
## 1st Qu.:368514 1st Qu.:0.0000 1st Qu.: 1.242 1st Qu.: 5.139
## Median :501116 Median :0.0000 Median : 3.893 Median : 7.601
## Mean :514536 Mean :0.0177 Mean : 18.826 Mean : 8.003
## 3rd Qu.:754688 3rd Qu.:0.0000 3rd Qu.: 16.034 3rd Qu.: 10.473
## Max. :878555 Max. :1.0000 Max. :506.531 Max. : 48.675
## patents patentsg stckpr rnd
## Min. : 0.0 Min. : 0.00 Min. : 0.1875 Min. : 0.0000
## 1st Qu.: 1.0 1st Qu.: 1.00 1st Qu.: 7.6250 1st Qu.: 0.6847
## Median : 3.0 Median : 4.00 Median : 16.5000 Median : 2.1456
## Mean : 22.9 Mean : 27.14 Mean : 22.6270 Mean : 29.3398
## 3rd Qu.: 15.0 3rd Qu.: 19.00 3rd Qu.: 29.2500 3rd Qu.: 11.9168
## Max. :906.0 Max. :1063.00 Max. :402.0000 Max. :1719.3535
## rndeflt rndstck sales sic
## Min. : 0.0000 Min. : 0.1253 Min. : 1.222 Min. :2000
## 1st Qu.: 0.4788 1st Qu.: 5.5882 1st Qu.: 53.204 1st Qu.:2890
## Median : 1.4764 Median : 16.2341 Median : 174.283 Median :3531
## Mean : 19.7238 Mean : 163.8234 Mean : 1219.601 Mean :3333
## 3rd Qu.: 8.7527 3rd Qu.: 119.1048 3rd Qu.: 743.422 3rd Qu.:3661
## Max. :1000.7876 Max. :9755.3516 Max. :44224.000 Max. :9997
## year
## Min. :1972
## 1st Qu.:1974
## Median :1976
## Mean :1976
## 3rd Qu.:1979
## Max. :1981
#generar conjunto de datos de panel
panel_pantentes <- pdata.frame(patentes, index = c("cusip","year"))
plotmeans(patents ~ cusip , main = "Prueba de Heterogeneidad entre paises para el PIB", data = panel_pantentes)
## Paso 3. Pruebas de efectos fijos y
aleatorios
#Modelo 1. Regresion agrupada (pooled) Asume que no hay heterogeniadad
pooled_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data = panel_pantentes, model = "pooling" ) #+Si hay mas variables se separan con simbolo de +
summary(pooled_patentes)
## Pooling Model
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_pantentes, model = "pooling")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -320.36212 -10.01555 0.94472 7.40861 433.86316
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) -4.1831e-01 5.2757e+00 -0.0793 0.93681
## merger -1.1612e+01 7.2433e+00 -1.6031 0.10905
## employ 1.3683e+00 4.1969e-02 32.6040 < 2.2e-16 ***
## return -4.3505e-03 1.8155e-01 -0.0240 0.98088
## stckpr 6.5137e-01 4.3139e-02 15.0994 < 2.2e-16 ***
## rnd -1.3853e-01 1.6106e-02 -8.6007 < 2.2e-16 ***
## sales -3.2049e-03 4.6962e-04 -6.8246 1.13e-11 ***
## sic -2.6894e-03 1.4820e-03 -1.8146 0.06972 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 10998000
## Residual Sum of Squares: 4600300
## R-Squared: 0.58173
## Adj. R-Squared: 0.58043
## F-statistic: 447.437 on 7 and 2252 DF, p-value: < 2.22e-16
#Modelo 2. Efectos fijos (within) Cuando las diferencias no observadas son constantes en el tiempo
within_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data = panel_pantentes, model = "within" )
summary(within_patentes)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_pantentes, model = "within")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -497.22898 -1.64569 -0.19669 1.64341 184.49423
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## merger 3.30904770 4.16313684 0.7948 0.42680
## employ 0.11963128 0.07052503 1.6963 0.08998 .
## return -0.07056694 0.10867769 -0.6493 0.51620
## stckpr -0.01107952 0.03242512 -0.3417 0.73262
## rnd -0.19889614 0.01443066 -13.7829 < 2.2e-16 ***
## sales -0.00309052 0.00041525 -7.4426 1.451e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1091400
## Residual Sum of Squares: 819280
## R-Squared: 0.24935
## Adj. R-Squared: 0.16385
## F-statistic: 112.278 on 6 and 2028 DF, p-value: < 2.22e-16
#Prueba
pFtest(within_patentes, pooled_patentes)
##
## F test for individual effects
##
## data: patents ~ merger + employ + return + stckpr + rnd + sales + sic
## F = 41.782, df1 = 224, df2 = 2028, p-value < 2.2e-16
## alternative hypothesis: significant effects
" Si el P value es <0.05 se avanza a los siguientes modelos"
## [1] " Si el P value es <0.05 se avanza a los siguientes modelos"
#Modelo 3. Efectos aleatorios - Metodo walhus. Cuando las diferencias no obsevadas son aleatorias
walhus_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data = panel_pantentes, model = "random", random.method = "walhus" )
summary(walhus_patentes)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_pantentes, model = "random", random.method = "walhus")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Effects:
## var std.dev share
## idiosyncratic 555.26 23.56 0.273
## individual 1480.26 38.47 0.727
## theta: 0.8099
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -433.72438 -3.89667 -1.76198 0.78484 211.91016
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 11.84397257 12.78087032 0.9267 0.3541
## merger 4.47647107 4.51685216 0.9911 0.3217
## employ 1.10525428 0.04853786 22.7710 < 2.2e-16 ***
## return -0.12920955 0.11762230 -1.0985 0.2720
## stckpr 0.17097726 0.03355374 5.0956 3.476e-07 ***
## rnd -0.14575073 0.01469317 -9.9196 < 2.2e-16 ***
## sales -0.00393738 0.00042854 -9.1880 < 2.2e-16 ***
## sic -0.00107515 0.00376075 -0.2859 0.7750
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1449600
## Residual Sum of Squares: 1098300
## R-Squared: 0.24236
## Adj. R-Squared: 0.24
## Chisq: 720.388 on 7 DF, p-value: < 2.22e-16
#Modelo 4. Efectos aleatorios - Metodo amemiya.
amemiya_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data = panel_pantentes, model = "random", random.method = "amemiya" )
summary(amemiya_patentes)
## Oneway (individual) effect Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_pantentes, model = "random", random.method = "amemiya")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Effects:
## var std.dev share
## idiosyncratic 402.79 20.07 0.051
## individual 7483.44 86.51 0.949
## theta: 0.9268
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -454.59697 -2.99704 -1.65272 0.59741 193.17353
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 8.58107091 29.77947247 0.2882 0.7732
## merger 3.91351453 4.11354681 0.9514 0.3414
## employ 0.49060426 0.06153621 7.9726 1.554e-15 ***
## return -0.09427795 0.10733800 -0.8783 0.3798
## stckpr 0.04660332 0.03163610 1.4731 0.1407
## rnd -0.17995961 0.01406835 -12.7918 < 2.2e-16 ***
## sales -0.00342554 0.00040647 -8.4275 < 2.2e-16 ***
## sic 0.00425278 0.00877425 0.4847 0.6279
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1144500
## Residual Sum of Squares: 891720
## R-Squared: 0.22085
## Adj. R-Squared: 0.21842
## Chisq: 638.312 on 7 DF, p-value: < 2.22e-16
#Modelo 5. Efectos aleatorios - Metodo nerlove.
nerlove_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data = panel_pantentes, model = "random", random.method = "nerlove" )
summary(nerlove_patentes)
## Oneway (individual) effect Random Effect Model
## (Nerlove's transformation)
##
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd +
## sales + sic, data = panel_pantentes, model = "random", random.method = "nerlove")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Effects:
## var std.dev share
## idiosyncratic 362.51 19.04 0.046
## individual 7557.16 86.93 0.954
## theta: 0.9309
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -455.94828 -2.93752 -1.60035 0.62863 192.36375
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 8.38498937 31.41700295 0.2669 0.7896
## merger 3.86675065 4.09938561 0.9433 0.3456
## employ 0.46018862 0.06203371 7.4184 1.186e-13 ***
## return -0.09236163 0.10697310 -0.8634 0.3879
## stckpr 0.04167663 0.03156299 1.3204 0.1867
## rnd -0.18153379 0.01403810 -12.9315 < 2.2e-16 ***
## sales -0.00339833 0.00040545 -8.3816 < 2.2e-16 ***
## sic 0.00451640 0.00925634 0.4879 0.6256
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1138700
## Residual Sum of Squares: 885220
## R-Squared: 0.22262
## Adj. R-Squared: 0.22021
## Chisq: 644.925 on 7 DF, p-value: < 2.22e-16
# Comparar la r2 ajustada de los 3 metodos y elegir el que tenga el mayor.
phtest(walhus_patentes, within_patentes)
##
## Hausman Test
##
## data: patents ~ merger + employ + return + stckpr + rnd + sales + sic
## chisq = 352.48, df = 6, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
#Si el p-value es <0.05, usamos efectos fijos (within)
#Por lo tanto nos quedamos con el modelo de efectos fijos (within)
#prueba de heterocedasticidad
bptest(within_patentes)
##
## studentized Breusch-Pagan test
##
## data: within_patentes
## BP = 1447.6, df = 7, p-value < 2.2e-16
# Si el p-value <0.05, hay heterocedasticidad en los residuos (problema detectado)
#Prueba de autorcorrelacion serial
pwartest(within_patentes)
##
## Wooldridge's test for serial correlation in FE panels
##
## data: within_patentes
## F = 104.29, df1 = 1, df2 = 2032, p-value < 2.2e-16
## alternative hypothesis: serial correlation
# Si el p-value <0.05, hay autocorrelacion serial en los errores (problema detectado)
# Modelo de correcion
coeficientes_corregidos <- coeftest(within_patentes, vcov =vcovHC(within_patentes, type = "HC0"))
solo_coeficientes <- coeficientes_corregidos[,1]
datos_de_prueba <- data.frame(merger = 0, employ = 10, return =6, stckpr =48, rnd =3, sales =344)
prediccion <- sum(solo_coeficientes*datos_de_prueba)
prediccion
## [1] -1.418735
En conclusion este ejercicio permite generar pronosticos en bases de datos con panel, tomando en cuenta los tratamientos para distintos efectos en los datos y sus errores.