Enlace de shiny

Pregunta negocio

Reflexión sobre Negocios Si una empresa lo esta haciendo bien siempre debe buscar mejorarse a si misma y buscar ser los mejores, pero deben ser siempre cautelosos con estos cambios

Instalar paquetes y llamar librerias

#install.packages("WDI")
library(WDI)
#install.packages("wbstats")
library(wbstats)
#install.packages("tidyverse")
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

#install.packages("plm") # Paquete para realizar modelos lineales para datos de panel
library(plm)

## 
## Attaching package: 'plm'
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, lag, lead

#install.packages("gplots")
library(gplots)

## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess

#install.packages("readxl")
library(readxl)
#install.packages("lmtest")
library(lmtest)

## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

Paso 1. generar conjunto de datos de panel

#obtener informacion del varios paises
gdp <- wb_data(country = c("MX", "US", "CA"), indicator = c ("NY.GDP.PCAP.CD", "SM.POP.NETM"),
               start_date = 1950, end_date = 2025)

#generar conjunto de datos de panel
panel_1 <- select(gdp, country, date, NY.GDP.PCAP.CD, SM.POP.NETM)
panel_1 <- subset(panel_1, date == 1960 | date == 1970 | date == 1980 | date == 1990 | date == 2000 | 
                    date == 2010 | date == 2020 )
panel_1 <- pdata.frame(panel_1, index = c("country","date"))

Paso 2. Prueba de heterogeneidad

plotmeans(NY.GDP.PCAP.CD ~ country, main = "Prueba de Heterogeneidad entre paises para el PIB", data = panel_1)

" Si la linea sale casi horizontal, hay poca o nula Hetogeneidad, por lo que no hay diferencias sistematicas que ajustar"

## [1] " Si la linea sale casi horizontal, hay poca o nula Hetogeneidad, por lo que no hay diferencias sistematicas que ajustar"

"Si la linea sale quebrada, sube y baja, hay mucha heterogeneidad, por lo que hay que ajustar. "

## [1] "Si la linea sale quebrada, sube y baja, hay mucha heterogeneidad, por lo que hay que ajustar. "

plotmeans(SM.POP.NETM ~ country, main = "Prueba de Heterogeneidad entre paises para la migracion neta", data = panel_1)

Paso 3. Pruebas de efectos fijos y aleatorios

#Modelo 1. Regresion agrupada (pooled) Asume que no hay heterogeniadad
pooled <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "pooling" ) #+Si hay mas variables se separan con simbolo de +
summary(pooled)

## Pooling Model
## 
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "pooling")
## 
## Balanced Panel: n = 3, T = 7, N = 21
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -21506.0 -10924.8  -3728.9   5274.5  45389.3 
## 
## Coefficients:
##               Estimate Std. Error t-value Pr(>|t|)   
## (Intercept) 1.2873e+04 4.2134e+03  3.0553 0.006511 **
## SM.POP.NETM 1.8616e-02 7.2324e-03  2.5740 0.018588 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    7259500000
## Residual Sum of Squares: 5382600000
## R-Squared:      0.25855
## Adj. R-Squared: 0.21952
## F-statistic: 6.62533 on 1 and 19 DF, p-value: 0.018588

#Modelo 2. Efectos fijos (within) Cuando las diferencias no observadas son constantes en el tiempo
within <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "within" )
summary(within)

## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "within")
## 
## Balanced Panel: n = 3, T = 7, N = 21
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -20886.56  -9903.27   -403.03   3407.39  44059.72 
## 
## Coefficients:
##             Estimate Std. Error t-value Pr(>|t|)
## SM.POP.NETM 0.013921   0.014345  0.9705   0.3454
## 
## Total Sum of Squares:    5256100000
## Residual Sum of Squares: 4980200000
## R-Squared:      0.052492
## Adj. R-Squared: -0.11471
## F-statistic: 0.94181 on 1 and 17 DF, p-value: 0.34542

#Prueba
pFtest(within, pooled)

## 
##  F test for individual effects
## 
## data:  NY.GDP.PCAP.CD ~ SM.POP.NETM
## F = 0.68685, df1 = 2, df2 = 17, p-value = 0.5166
## alternative hypothesis: significant effects

" Si el P value es <0.05 se prefiere el modelo de efectos fijos"

## [1] " Si el P value es <0.05 se prefiere el modelo de efectos fijos"

#Modelo 3. Efectos aleatorios - Metodo walhus. Cuando las diferencias no obsevadas son aleatorias
walhus <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "random", random.method = "walhus" )
summary(walhus)

## Oneway (individual) effect Random Effect Model 
##    (Wallace-Hussain's transformation)
## 
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "random", 
##     random.method = "walhus")
## 
## Balanced Panel: n = 3, T = 7, N = 21
## 
## Effects:
##                     var   std.dev share
## idiosyncratic 278418900     16686     1
## individual            0         0     0
## theta: 0
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -21506.0 -10924.8  -3728.9   5274.5  45389.3 
## 
## Coefficients:
##               Estimate Std. Error z-value Pr(>|z|)   
## (Intercept) 1.2873e+04 4.2134e+03  3.0553 0.002248 **
## SM.POP.NETM 1.8616e-02 7.2324e-03  2.5740 0.010054 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    7259500000
## Residual Sum of Squares: 5382600000
## R-Squared:      0.25855
## Adj. R-Squared: 0.21952
## Chisq: 6.62533 on 1 DF, p-value: 0.010054

#Modelo 4. Efectos aleatorios - Metodo amemiya.
amemiya <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "random", random.method = "amemiya" )
summary(amemiya)

## Oneway (individual) effect Random Effect Model 
##    (Amemiya's transformation)
## 
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "random", 
##     random.method = "amemiya")
## 
## Balanced Panel: n = 3, T = 7, N = 21
## 
## Effects:
##                     var   std.dev share
## idiosyncratic 276675480     16634     1
## individual            0         0     0
## theta: 0
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -21506.0 -10924.8  -3728.9   5274.5  45389.3 
## 
## Coefficients:
##               Estimate Std. Error z-value Pr(>|z|)   
## (Intercept) 1.2873e+04 4.2134e+03  3.0553 0.002248 **
## SM.POP.NETM 1.8616e-02 7.2324e-03  2.5740 0.010054 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    7259500000
## Residual Sum of Squares: 5382600000
## R-Squared:      0.25855
## Adj. R-Squared: 0.21952
## Chisq: 6.62533 on 1 DF, p-value: 0.010054

#Modelo 5. Efectos aleatorios - Metodo nerlove.
nerlove <- plm(NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "random", random.method = "nerlove" )
summary(nerlove)

## Oneway (individual) effect Random Effect Model 
##    (Nerlove's transformation)
## 
## Call:
## plm(formula = NY.GDP.PCAP.CD ~ SM.POP.NETM, data = panel_1, model = "random", 
##     random.method = "nerlove")
## 
## Balanced Panel: n = 3, T = 7, N = 21
## 
## Effects:
##                     var   std.dev share
## idiosyncratic 237150411     15400 0.864
## individual     37271843      6105 0.136
## theta: 0.31
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -20850.0  -9773.4  -2826.2   3450.7  45608.0 
## 
## Coefficients:
##               Estimate Std. Error z-value Pr(>|z|)  
## (Intercept) 1.3174e+04 5.8290e+03  2.2601  0.02382 *
## SM.POP.NETM 1.7563e-02 9.0595e-03  1.9386  0.05255 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    6.21e+09
## Residual Sum of Squares: 5184500000
## R-Squared:      0.16513
## Adj. R-Squared: 0.12119
## Chisq: 3.75814 on 1 DF, p-value: 0.052551

# Comparar la r2 ajustada de los 3 metodos y elegir el que tenga el mayor.
phtest(walhus, within)

## 
##  Hausman Test
## 
## data:  NY.GDP.PCAP.CD ~ SM.POP.NETM
## chisq = 0.14364, df = 1, p-value = 0.7047
## alternative hypothesis: one model is inconsistent

#Si el p-value es <0.05, usamos efectos fijos (within)

#Por lo tanto nos quedamos con el modelo agrupado (pooled)

Ejercicio 1. generar conjunto de datos de panel 2

# obtener información de varios países
unemployment <- wb_data(country = c("BR", "DE", "JP"), 
                        indicator = c("SL.UEM.TOTL.ZS","SL.TLF.ACTI.ZS","NY.GDP.PCAP.CD"), 
                        start_date = 1950, end_date = 2025)

# Generar conjunto de datos de panel con las variables necesarias
panel_2 <- select(unemployment, country, date, SL.UEM.TOTL.ZS, NY.GDP.PCAP.CD)

# Filtrar solo ciertos años
panel_2 <- subset(panel_2, date == 1960 | date == 1970 | date == 1980 | 
                        date == 1990 | date == 2000 | date == 2010 | date == 2020)

# Convertir a pdata.frame
panel_2 <- pdata.frame(panel_2, index = c("country", "date"))

# Ejercicio 1. generar Prueba de Heterogeneidad para ejercicio 1

plotmeans(SL.UEM.TOTL.ZS ~ country, main = "Prueba de Desempleo Total (% de la fuerza laboral)", data = panel_2)

plotmeans(NY.GDP.PCAP.CD ~ country, main = "Prueba de PIB per cápita (US$)", data = panel_2)

#Modelo 1. Regresion agrupada (pooled) Asume que no hay heterogeniadad
pooled1 <- plm(SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2, model = "pooling" ) 
summary(pooled1)

## Pooling Model
## 
## Call:
## plm(formula = SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2, 
##     model = "pooling")
## 
## Balanced Panel: n = 3, T = 3, N = 9
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -2.40313 -0.69640 -0.12529  0.75800  2.69294 
## 
## Coefficients:
##                   Estimate  Std. Error t-value  Pr(>|t|)    
## (Intercept)     1.2247e+01  1.2149e+00 10.0807 2.029e-05 ***
## NY.GDP.PCAP.CD -1.7575e-04  3.6505e-05 -4.8145  0.001934 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    98.553
## Residual Sum of Squares: 22.859
## R-Squared:      0.76806
## Adj. R-Squared: 0.73492
## F-statistic: 23.1798 on 1 and 7 DF, p-value: 0.0019336

#Modelo 2. Efectos fijos (within) Cuando las diferencias no observadas son constantes en el tiempo
within1 <- plm(SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2, model = "within" )
summary(within1)

## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2, 
##     model = "within")
## 
## Balanced Panel: n = 3, T = 3, N = 9
## 
## Residuals:
##  Brazil-2000  Brazil-2010  Brazil-2020 Germany-2000 Germany-2010 Germany-2020 
##     -0.61876     -2.02843      2.64719     -0.29882      1.38636     -1.08755 
##   Japan-2000   Japan-2010   Japan-2020 
##      0.22105      1.37791     -1.59897 
## 
## Coefficients:
##                   Estimate  Std. Error t-value Pr(>|t|)
## NY.GDP.PCAP.CD -0.00013845  0.00010410   -1.33    0.241
## 
## Total Sum of Squares:    25.997
## Residual Sum of Squares: 19.203
## R-Squared:      0.26133
## Adj. R-Squared: -0.18188
## F-statistic: 1.76888 on 1 and 5 DF, p-value: 0.24096

#Prueba
pFtest(within1, pooled1)

## 
##  F test for individual effects
## 
## data:  SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD
## F = 0.47588, df1 = 2, df2 = 5, p-value = 0.6469
## alternative hypothesis: significant effects

" Como el P value es <0.05 se prefiere el modelo de efectos fijos"

## [1] " Como el P value es <0.05 se prefiere el modelo de efectos fijos"

#Modelo 3. Efectos aleatorios - Metodo walhus. Cuando las diferencias no obsevadas son aleatorias
walhus1 <- plm(SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2, model = "random", random.method = "walhus" )
summary(walhus1)

## Oneway (individual) effect Random Effect Model 
##    (Wallace-Hussain's transformation)
## 
## Call:
## plm(formula = SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2, 
##     model = "random", random.method = "walhus")
## 
## Balanced Panel: n = 3, T = 3, N = 9
## 
## Effects:
##                 var std.dev share
## idiosyncratic 3.283   1.812     1
## individual    0.000   0.000     0
## theta: 0
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -2.40313 -0.69640 -0.12529  0.75800  2.69294 
## 
## Coefficients:
##                   Estimate  Std. Error z-value  Pr(>|z|)    
## (Intercept)     1.2247e+01  1.2149e+00 10.0807 < 2.2e-16 ***
## NY.GDP.PCAP.CD -1.7575e-04  3.6505e-05 -4.8145 1.475e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    98.553
## Residual Sum of Squares: 22.859
## R-Squared:      0.76806
## Adj. R-Squared: 0.73492
## Chisq: 23.1798 on 1 DF, p-value: 1.4754e-06

#Modelo 4. Efectos aleatorios - Metodo amemiya.
amemiya1 <- plm(SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2, model = "random", random.method = "amemiya" )
summary(amemiya1)

## Oneway (individual) effect Random Effect Model 
##    (Amemiya's transformation)
## 
## Call:
## plm(formula = SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2, 
##     model = "random", random.method = "amemiya")
## 
## Balanced Panel: n = 3, T = 3, N = 9
## 
## Effects:
##                 var std.dev share
## idiosyncratic 3.201   1.789     1
## individual    0.000   0.000     0
## theta: 0
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -2.40313 -0.69640 -0.12529  0.75800  2.69294 
## 
## Coefficients:
##                   Estimate  Std. Error z-value  Pr(>|z|)    
## (Intercept)     1.2247e+01  1.2149e+00 10.0807 < 2.2e-16 ***
## NY.GDP.PCAP.CD -1.7575e-04  3.6505e-05 -4.8145 1.475e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    98.553
## Residual Sum of Squares: 22.859
## R-Squared:      0.76806
## Adj. R-Squared: 0.73492
## Chisq: 23.1798 on 1 DF, p-value: 1.4754e-06

#Modelo 5. Efectos aleatorios - Metodo nerlove.
nerlove1 <- plm(SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2, model = "random", random.method = "nerlove" )
summary(nerlove1)

## Oneway (individual) effect Random Effect Model 
##    (Nerlove's transformation)
## 
## Call:
## plm(formula = SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD, data = panel_2, 
##     model = "random", random.method = "nerlove")
## 
## Balanced Panel: n = 3, T = 3, N = 9
## 
## Effects:
##                 var std.dev share
## idiosyncratic 2.134   1.461 0.644
## individual    1.177   1.085 0.356
## theta: 0.3863
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -2.15745 -0.59767 -0.36330  0.96808  2.76783 
## 
## Coefficients:
##                   Estimate  Std. Error z-value  Pr(>|z|)    
## (Intercept)     1.2039e+01  1.7468e+00  6.8922 5.493e-12 ***
## NY.GDP.PCAP.CD -1.6855e-04  5.1005e-05 -3.3045 0.0009513 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    53.319
## Residual Sum of Squares: 20.828
## R-Squared:      0.60937
## Adj. R-Squared: 0.55357
## Chisq: 10.92 on 1 DF, p-value: 0.00095134

# Comparar la r2 ajustada de los 3 metodos y elegir el que tenga el mayor.
phtest(walhus1, within1)

## 
##  Hausman Test
## 
## data:  SL.UEM.TOTL.ZS ~ NY.GDP.PCAP.CD
## chisq = 0.1464, df = 1, p-value = 0.702
## alternative hypothesis: one model is inconsistent

#Si el p-value es <0.05, usamos efectos fijos (within)

#Por lo tanto nos quedamos con el modelo agrupado (pooled)

Actividad 1. Patentes

Contexto. Patentes

El entorno de negocios en el que las organizaciones se desarrollan es cada vez más dinámico por lo que las empresas enfrentan constantemente el reto de mantenerse al día y superar los nuevos retos que el ambiente presenta. La innovación es una de las mejores formas que las empresas tienen para conseguirlo. De acuerdo con el artículo “Innovation in business: What it is and why is so important” ´publicado en el Harvard Business Review la innovación presenta tres grandes ventajas para las empresas: les permite adaptarse, promueve el crecimiento y además les ayuda a diferenciarse de su competencia generando ventajas competitivas.

Importart base

patentes <- read_excel("/Users/sebastianfajardo/Downloads/PATENT 3.xls")

## Entender la base de datos

summary(patentes)

##      cusip            merger           employ            return       
##  Min.   :   800   Min.   :0.0000   Min.   :  0.085   Min.   :-73.022  
##  1st Qu.:368514   1st Qu.:0.0000   1st Qu.:  1.227   1st Qu.:  5.128  
##  Median :501116   Median :0.0000   Median :  3.842   Median :  7.585  
##  Mean   :514536   Mean   :0.0177   Mean   : 18.826   Mean   :  8.003  
##  3rd Qu.:754688   3rd Qu.:0.0000   3rd Qu.: 15.442   3rd Qu.: 10.501  
##  Max.   :878555   Max.   :1.0000   Max.   :506.531   Max.   : 48.675  
##                                    NA's   :21        NA's   :8        
##     patents         patentsg           stckpr              rnd           
##  Min.   :  0.0   Min.   :   0.00   Min.   :  0.1875   Min.   :   0.0000  
##  1st Qu.:  1.0   1st Qu.:   1.00   1st Qu.:  7.6250   1st Qu.:   0.6847  
##  Median :  3.0   Median :   4.00   Median : 16.5000   Median :   2.1456  
##  Mean   : 22.9   Mean   :  27.14   Mean   : 22.6270   Mean   :  29.3398  
##  3rd Qu.: 15.0   3rd Qu.:  19.00   3rd Qu.: 29.2500   3rd Qu.:  11.9168  
##  Max.   :906.0   Max.   :1063.00   Max.   :402.0000   Max.   :1719.3535  
##                                    NA's   :2                             
##     rndeflt             rndstck              sales                sic      
##  Min.   :   0.0000   Min.   :   0.1253   Min.   :    1.222   Min.   :2000  
##  1st Qu.:   0.4788   1st Qu.:   5.1520   1st Qu.:   52.995   1st Qu.:2890  
##  Median :   1.4764   Median :  13.3532   Median :  174.065   Median :3531  
##  Mean   :  19.7238   Mean   : 163.8234   Mean   : 1219.601   Mean   :3333  
##  3rd Qu.:   8.7527   3rd Qu.:  74.5625   3rd Qu.:  728.964   3rd Qu.:3661  
##  Max.   :1000.7876   Max.   :9755.3516   Max.   :44224.000   Max.   :9997  
##                      NA's   :157         NA's   :3                         
##       year     
##  Min.   :2012  
##  1st Qu.:2014  
##  Median :2016  
##  Mean   :2016  
##  3rd Qu.:2019  
##  Max.   :2021  
##

str(patentes) #Na en la base de datos

## tibble [2,260 × 13] (S3: tbl_df/tbl/data.frame)
##  $ cusip   : num [1:2260] 800 800 800 800 800 800 800 800 800 800 ...
##  $ merger  : num [1:2260] 0 0 0 0 0 0 0 0 0 0 ...
##  $ employ  : num [1:2260] 9.85 12.32 12.2 11.84 12.99 ...
##  $ return  : num [1:2260] 5.82 5.69 4.42 5.28 4.91 ...
##  $ patents : num [1:2260] 22 34 31 32 40 60 57 77 38 5 ...
##  $ patentsg: num [1:2260] 24 32 30 34 28 33 53 47 64 70 ...
##  $ stckpr  : num [1:2260] 47.6 57.9 33 38.5 35.1 ...
##  $ rnd     : num [1:2260] 2.56 3.1 3.27 3.24 3.78 ...
##  $ rndeflt : num [1:2260] 2.56 2.91 2.8 2.52 2.78 ...
##  $ rndstck : num [1:2260] 16.2 17.4 19.6 21.9 23.1 ...
##  $ sales   : num [1:2260] 344 436 535 567 631 ...
##  $ sic     : num [1:2260] 3740 3740 3740 3740 3740 3740 3740 3740 3740 3740 ...
##  $ year    : num [1:2260] 2012 2013 2014 2015 2016 ...

sapply(patentes, function(x) sum(is.na(x))) #NA por variable

##    cusip   merger   employ   return  patents patentsg   stckpr      rnd 
##        0        0       21        8        0        0        2        0 
##  rndeflt  rndstck    sales      sic     year 
##        0      157        3        0        0

patentes$employ[is.na(patentes$employ)] <- mean(patentes$employ, na.rm = TRUE)
patentes$return[is.na(patentes$return)] <- mean(patentes$return, na.rm = TRUE)
patentes$stckpr[is.na(patentes$stckpr)] <- mean(patentes$stckpr, na.rm = TRUE)
patentes$rndstck[is.na(patentes$rndstck)] <- mean(patentes$rndstck, na.rm = TRUE)
patentes$sales[is.na(patentes$sales)] <- mean(patentes$sales, na.rm = TRUE)
summary(patentes)

##      cusip            merger           employ            return       
##  Min.   :   800   Min.   :0.0000   Min.   :  0.085   Min.   :-73.022  
##  1st Qu.:368514   1st Qu.:0.0000   1st Qu.:  1.242   1st Qu.:  5.139  
##  Median :501116   Median :0.0000   Median :  3.893   Median :  7.601  
##  Mean   :514536   Mean   :0.0177   Mean   : 18.826   Mean   :  8.003  
##  3rd Qu.:754688   3rd Qu.:0.0000   3rd Qu.: 16.034   3rd Qu.: 10.473  
##  Max.   :878555   Max.   :1.0000   Max.   :506.531   Max.   : 48.675  
##     patents         patentsg           stckpr              rnd           
##  Min.   :  0.0   Min.   :   0.00   Min.   :  0.1875   Min.   :   0.0000  
##  1st Qu.:  1.0   1st Qu.:   1.00   1st Qu.:  7.6250   1st Qu.:   0.6847  
##  Median :  3.0   Median :   4.00   Median : 16.5000   Median :   2.1456  
##  Mean   : 22.9   Mean   :  27.14   Mean   : 22.6270   Mean   :  29.3398  
##  3rd Qu.: 15.0   3rd Qu.:  19.00   3rd Qu.: 29.2500   3rd Qu.:  11.9168  
##  Max.   :906.0   Max.   :1063.00   Max.   :402.0000   Max.   :1719.3535  
##     rndeflt             rndstck              sales                sic      
##  Min.   :   0.0000   Min.   :   0.1253   Min.   :    1.222   Min.   :2000  
##  1st Qu.:   0.4788   1st Qu.:   5.5882   1st Qu.:   53.204   1st Qu.:2890  
##  Median :   1.4764   Median :  16.2341   Median :  174.283   Median :3531  
##  Mean   :  19.7238   Mean   : 163.8234   Mean   : 1219.601   Mean   :3333  
##  3rd Qu.:   8.7527   3rd Qu.: 119.1048   3rd Qu.:  743.422   3rd Qu.:3661  
##  Max.   :1000.7876   Max.   :9755.3516   Max.   :44224.000   Max.   :9997  
##       year     
##  Min.   :2012  
##  1st Qu.:2014  
##  Median :2016  
##  Mean   :2016  
##  3rd Qu.:2019  
##  Max.   :2021

sum(is.na(patentes))#Na en la base de datos

## [1] 0

boxplot(patentes$cusip, horizontal = TRUE)

boxplot(patentes$merger, horizontal = TRUE)

boxplot(patentes$employ, horizontal = TRUE)

boxplot(patentes$return, horizontal = TRUE)

boxplot(patentes$patents, horizontal = TRUE)

boxplot(patentes$patentsg, horizontal = TRUE)

boxplot(patentes$stckpr, horizontal = TRUE)

boxplot(patentes$rnd, horizontal = TRUE)

boxplot(patentes$rndeflt, horizontal = TRUE)

boxplot(patentes$rndstck, horizontal = TRUE)

boxplot(patentes$sales, horizontal = TRUE)

boxplot(patentes$sic, horizontal = TRUE)

boxplot(patentes$year, horizontal = TRUE)

patentes$year <- patentes$year -40
summary(patentes)

##      cusip            merger           employ            return       
##  Min.   :   800   Min.   :0.0000   Min.   :  0.085   Min.   :-73.022  
##  1st Qu.:368514   1st Qu.:0.0000   1st Qu.:  1.242   1st Qu.:  5.139  
##  Median :501116   Median :0.0000   Median :  3.893   Median :  7.601  
##  Mean   :514536   Mean   :0.0177   Mean   : 18.826   Mean   :  8.003  
##  3rd Qu.:754688   3rd Qu.:0.0000   3rd Qu.: 16.034   3rd Qu.: 10.473  
##  Max.   :878555   Max.   :1.0000   Max.   :506.531   Max.   : 48.675  
##     patents         patentsg           stckpr              rnd           
##  Min.   :  0.0   Min.   :   0.00   Min.   :  0.1875   Min.   :   0.0000  
##  1st Qu.:  1.0   1st Qu.:   1.00   1st Qu.:  7.6250   1st Qu.:   0.6847  
##  Median :  3.0   Median :   4.00   Median : 16.5000   Median :   2.1456  
##  Mean   : 22.9   Mean   :  27.14   Mean   : 22.6270   Mean   :  29.3398  
##  3rd Qu.: 15.0   3rd Qu.:  19.00   3rd Qu.: 29.2500   3rd Qu.:  11.9168  
##  Max.   :906.0   Max.   :1063.00   Max.   :402.0000   Max.   :1719.3535  
##     rndeflt             rndstck              sales                sic      
##  Min.   :   0.0000   Min.   :   0.1253   Min.   :    1.222   Min.   :2000  
##  1st Qu.:   0.4788   1st Qu.:   5.5882   1st Qu.:   53.204   1st Qu.:2890  
##  Median :   1.4764   Median :  16.2341   Median :  174.283   Median :3531  
##  Mean   :  19.7238   Mean   : 163.8234   Mean   : 1219.601   Mean   :3333  
##  3rd Qu.:   8.7527   3rd Qu.: 119.1048   3rd Qu.:  743.422   3rd Qu.:3661  
##  Max.   :1000.7876   Max.   :9755.3516   Max.   :44224.000   Max.   :9997  
##       year     
##  Min.   :1972  
##  1st Qu.:1974  
##  Median :1976  
##  Mean   :1976  
##  3rd Qu.:1979  
##  Max.   :1981

Paso 1. generar conjunto de datos de panel

#generar conjunto de datos de panel
panel_pantentes <- pdata.frame(patentes, index = c("cusip","year"))

Paso 2. Prueba de heterogeneidad

plotmeans(patents ~ cusip , main = "Prueba de Heterogeneidad entre paises para el PIB", data = panel_pantentes)

## Paso 3. Pruebas de efectos fijos y aleatorios

#Modelo 1. Regresion agrupada (pooled) Asume que no hay heterogeniadad
pooled_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data = panel_pantentes, model = "pooling" ) #+Si hay mas variables se separan con simbolo de +
summary(pooled_patentes)

## Pooling Model
## 
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd + 
##     sales + sic, data = panel_pantentes, model = "pooling")
## 
## Balanced Panel: n = 226, T = 10, N = 2260
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -320.36212  -10.01555    0.94472    7.40861  433.86316 
## 
## Coefficients:
##                Estimate  Std. Error t-value  Pr(>|t|)    
## (Intercept) -4.1831e-01  5.2757e+00 -0.0793   0.93681    
## merger      -1.1612e+01  7.2433e+00 -1.6031   0.10905    
## employ       1.3683e+00  4.1969e-02 32.6040 < 2.2e-16 ***
## return      -4.3505e-03  1.8155e-01 -0.0240   0.98088    
## stckpr       6.5137e-01  4.3139e-02 15.0994 < 2.2e-16 ***
## rnd         -1.3853e-01  1.6106e-02 -8.6007 < 2.2e-16 ***
## sales       -3.2049e-03  4.6962e-04 -6.8246  1.13e-11 ***
## sic         -2.6894e-03  1.4820e-03 -1.8146   0.06972 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    10998000
## Residual Sum of Squares: 4600300
## R-Squared:      0.58173
## Adj. R-Squared: 0.58043
## F-statistic: 447.437 on 7 and 2252 DF, p-value: < 2.22e-16

#Modelo 2. Efectos fijos (within) Cuando las diferencias no observadas son constantes en el tiempo
within_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data = panel_pantentes, model = "within" )
summary(within_patentes)

## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd + 
##     sales + sic, data = panel_pantentes, model = "within")
## 
## Balanced Panel: n = 226, T = 10, N = 2260
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -497.22898   -1.64569   -0.19669    1.64341  184.49423 
## 
## Coefficients:
##           Estimate  Std. Error  t-value  Pr(>|t|)    
## merger  3.30904770  4.16313684   0.7948   0.42680    
## employ  0.11963128  0.07052503   1.6963   0.08998 .  
## return -0.07056694  0.10867769  -0.6493   0.51620    
## stckpr -0.01107952  0.03242512  -0.3417   0.73262    
## rnd    -0.19889614  0.01443066 -13.7829 < 2.2e-16 ***
## sales  -0.00309052  0.00041525  -7.4426 1.451e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    1091400
## Residual Sum of Squares: 819280
## R-Squared:      0.24935
## Adj. R-Squared: 0.16385
## F-statistic: 112.278 on 6 and 2028 DF, p-value: < 2.22e-16

#Prueba
pFtest(within_patentes, pooled_patentes)

## 
##  F test for individual effects
## 
## data:  patents ~ merger + employ + return + stckpr + rnd + sales + sic
## F = 41.782, df1 = 224, df2 = 2028, p-value < 2.2e-16
## alternative hypothesis: significant effects

" Si el P value es <0.05 se avanza a los siguientes modelos"

## [1] " Si el P value es <0.05 se avanza a los siguientes modelos"

#Modelo 3. Efectos aleatorios - Metodo walhus. Cuando las diferencias no obsevadas son aleatorias
walhus_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data = panel_pantentes, model = "random", random.method = "walhus" )
summary(walhus_patentes)

## Oneway (individual) effect Random Effect Model 
##    (Wallace-Hussain's transformation)
## 
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd + 
##     sales + sic, data = panel_pantentes, model = "random", random.method = "walhus")
## 
## Balanced Panel: n = 226, T = 10, N = 2260
## 
## Effects:
##                   var std.dev share
## idiosyncratic  555.26   23.56 0.273
## individual    1480.26   38.47 0.727
## theta: 0.8099
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -433.72438   -3.89667   -1.76198    0.78484  211.91016 
## 
## Coefficients:
##                Estimate  Std. Error z-value  Pr(>|z|)    
## (Intercept) 11.84397257 12.78087032  0.9267    0.3541    
## merger       4.47647107  4.51685216  0.9911    0.3217    
## employ       1.10525428  0.04853786 22.7710 < 2.2e-16 ***
## return      -0.12920955  0.11762230 -1.0985    0.2720    
## stckpr       0.17097726  0.03355374  5.0956 3.476e-07 ***
## rnd         -0.14575073  0.01469317 -9.9196 < 2.2e-16 ***
## sales       -0.00393738  0.00042854 -9.1880 < 2.2e-16 ***
## sic         -0.00107515  0.00376075 -0.2859    0.7750    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    1449600
## Residual Sum of Squares: 1098300
## R-Squared:      0.24236
## Adj. R-Squared: 0.24
## Chisq: 720.388 on 7 DF, p-value: < 2.22e-16

#Modelo 4. Efectos aleatorios - Metodo amemiya.
amemiya_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data = panel_pantentes, model = "random", random.method = "amemiya" )
summary(amemiya_patentes)

## Oneway (individual) effect Random Effect Model 
##    (Amemiya's transformation)
## 
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd + 
##     sales + sic, data = panel_pantentes, model = "random", random.method = "amemiya")
## 
## Balanced Panel: n = 226, T = 10, N = 2260
## 
## Effects:
##                   var std.dev share
## idiosyncratic  402.79   20.07 0.051
## individual    7483.44   86.51 0.949
## theta: 0.9268
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -454.59697   -2.99704   -1.65272    0.59741  193.17353 
## 
## Coefficients:
##                Estimate  Std. Error  z-value  Pr(>|z|)    
## (Intercept)  8.58107091 29.77947247   0.2882    0.7732    
## merger       3.91351453  4.11354681   0.9514    0.3414    
## employ       0.49060426  0.06153621   7.9726 1.554e-15 ***
## return      -0.09427795  0.10733800  -0.8783    0.3798    
## stckpr       0.04660332  0.03163610   1.4731    0.1407    
## rnd         -0.17995961  0.01406835 -12.7918 < 2.2e-16 ***
## sales       -0.00342554  0.00040647  -8.4275 < 2.2e-16 ***
## sic          0.00425278  0.00877425   0.4847    0.6279    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    1144500
## Residual Sum of Squares: 891720
## R-Squared:      0.22085
## Adj. R-Squared: 0.21842
## Chisq: 638.312 on 7 DF, p-value: < 2.22e-16

#Modelo 5. Efectos aleatorios - Metodo nerlove.
nerlove_patentes <- plm(patents ~ merger + employ + return + stckpr + rnd + sales + sic, data = panel_pantentes, model = "random", random.method = "nerlove" )
summary(nerlove_patentes)

## Oneway (individual) effect Random Effect Model 
##    (Nerlove's transformation)
## 
## Call:
## plm(formula = patents ~ merger + employ + return + stckpr + rnd + 
##     sales + sic, data = panel_pantentes, model = "random", random.method = "nerlove")
## 
## Balanced Panel: n = 226, T = 10, N = 2260
## 
## Effects:
##                   var std.dev share
## idiosyncratic  362.51   19.04 0.046
## individual    7557.16   86.93 0.954
## theta: 0.9309
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -455.94828   -2.93752   -1.60035    0.62863  192.36375 
## 
## Coefficients:
##                Estimate  Std. Error  z-value  Pr(>|z|)    
## (Intercept)  8.38498937 31.41700295   0.2669    0.7896    
## merger       3.86675065  4.09938561   0.9433    0.3456    
## employ       0.46018862  0.06203371   7.4184 1.186e-13 ***
## return      -0.09236163  0.10697310  -0.8634    0.3879    
## stckpr       0.04167663  0.03156299   1.3204    0.1867    
## rnd         -0.18153379  0.01403810 -12.9315 < 2.2e-16 ***
## sales       -0.00339833  0.00040545  -8.3816 < 2.2e-16 ***
## sic          0.00451640  0.00925634   0.4879    0.6256    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    1138700
## Residual Sum of Squares: 885220
## R-Squared:      0.22262
## Adj. R-Squared: 0.22021
## Chisq: 644.925 on 7 DF, p-value: < 2.22e-16

# Comparar la r2 ajustada de los 3 metodos y elegir el que tenga el mayor.
phtest(walhus_patentes, within_patentes)

## 
##  Hausman Test
## 
## data:  patents ~ merger + employ + return + stckpr + rnd + sales + sic
## chisq = 352.48, df = 6, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent

#Si el p-value es <0.05, usamos efectos fijos (within)

#Por lo tanto nos quedamos con el modelo de efectos fijos (within)

Paso 4. Pruebas de heterocedasticidad y autocorrelacion serial

#prueba de heterocedasticidad
bptest(within_patentes)

## 
##  studentized Breusch-Pagan test
## 
## data:  within_patentes
## BP = 1447.6, df = 7, p-value < 2.2e-16

# Si el p-value <0.05, hay heterocedasticidad en los residuos (problema detectado)

#Prueba de autorcorrelacion serial
pwartest(within_patentes)

## 
##  Wooldridge's test for serial correlation in FE panels
## 
## data:  within_patentes
## F = 104.29, df1 = 1, df2 = 2032, p-value < 2.2e-16
## alternative hypothesis: serial correlation

# Si el p-value <0.05, hay autocorrelacion serial en los errores (problema detectado)

# Modelo de correcion
coeficientes_corregidos <- coeftest(within_patentes, vcov =vcovHC(within_patentes, type = "HC0"))
solo_coeficientes <- coeficientes_corregidos[,1]

Paso 5. Generar pronosticos y Evaluar Modelo

datos_de_prueba <- data.frame(merger = 0, employ = 10, return =6, stckpr =48, rnd =3,  sales =344)
prediccion <- sum(solo_coeficientes*datos_de_prueba)
prediccion

## [1] -1.418735

Paso 6. Conclusiones

En conclusion este ejercicio permite generar pronosticos en bases de datos con panel, tomando en cuenta los tratamientos para distintos efectos en los datos y sus errores.

actividad 1

Sebastián Fajardo- A01412035

2025-08-12

Enlace de shiny

Pregunta negocio

Instalar paquetes y llamar librerias

Paso 1. generar conjunto de datos de panel

Paso 2. Prueba de heterogeneidad

Paso 3. Pruebas de efectos fijos y aleatorios

Ejercicio 1. generar conjunto de datos de panel 2

Actividad 1. Patentes

Contexto. Patentes

Importart base

Paso 1. generar conjunto de datos de panel

Paso 2. Prueba de heterogeneidad

Paso 4. Pruebas de heterocedasticidad y autocorrelacion serial

Paso 5. Generar pronosticos y Evaluar Modelo

Paso 6. Conclusiones