library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(plm)
##
## Attaching package: 'plm'
## The following objects are masked from 'package:dplyr':
##
## between, lag, lead
library(magrittr)
#Importar base de datos
PATENT3 <- read_excel("C:\\Users\\USER\\Documents\\PATENT3.xlsx")
# Llamar columnas con datos faltantes
columns_to_fill <- c("return", "stckpr", "rndstck", "sales")
# Función para llenar filas faltantes con el promedio de cada empresa
fill_missing_with_mean <- function(df, col) {
df %>%
group_by(cusip) %>%
mutate_at(vars({{col}}), ~ifelse(is.na(.), mean(., na.rm = TRUE), .)) %>%
ungroup()
}
# Aplicar la funcion a cada columna
patent_data <- PATENT3
for (col in columns_to_fill) {
patent_data <- fill_missing_with_mean(patent_data, col)
}
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(col)
##
## # Now:
## data %>% select(all_of(col))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Borrar columna "rndstck"
patent_data <- patent_data %>% select(-rndstck)
patent_data <- select(patent_data, cusip, return, stckpr, sales,patentsg,year,employ)
patent_data <- pdata.frame(patent_data, index = c("cusip","year"))
plotmeans(patentsg ~ year, data = patent_data,
main = "Heterogeneidad entre años",
xlab = "Año", ylab = "Patentes otorgados",
barcol="red")
Esta gráfica demuestra que la variable sí es constante, sin embargo hay un pequeño declive.
pooled <- plm(patentsg ~ employ + stckpr+ sales, data = patent_data, model = "pooling")
summary(pooled)
## Pooling Model
##
## Call:
## plm(formula = patentsg ~ employ + stckpr + sales, data = patent_data,
## model = "pooling")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -406.482940 -10.966768 -0.098362 5.842271 578.682163
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) -7.78741274 1.40859456 -5.5285 3.603e-08 ***
## employ 1.26647834 0.03709282 34.1435 < 2.2e-16 ***
## stckpr 0.70187341 0.04488889 15.6358 < 2.2e-16 ***
## sales -0.00375855 0.00048955 -7.6775 2.398e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 14168000
## Residual Sum of Squares: 5650800
## R-Squared: 0.60115
## Adj. R-Squared: 0.60062
## F-statistic: 1133.43 on 3 and 2256 DF, p-value: < 2.22e-16
within <- plm(patentsg ~ employ + stckpr+ sales, data = patent_data, model = "within")
summary(within)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = patentsg ~ employ + stckpr + sales, data = patent_data,
## model = "within")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -226.71716 -2.13000 -0.21152 1.79633 277.43654
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## employ -0.07572994 0.06258381 -1.2101 0.2264
## stckpr 0.11341500 0.02678179 4.2348 2.39e-05 ***
## sales -0.00410898 0.00028362 -14.4875 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 715640
## Residual Sum of Squares: 627170
## R-Squared: 0.12362
## Adj. R-Squared: 0.02524
## F-statistic: 95.4979 on 3 and 2031 DF, p-value: < 2.22e-16
En este modelo podemos observar que la variable de empleados no tiene un impacto significativo en las patentes otorgadas a la empresa.
pFtest(within,pooled)
##
## F test for individual effects
##
## data: patentsg ~ employ + stckpr + sales
## F = 72.305, df1 = 225, df2 = 2031, p-value < 2.2e-16
## alternative hypothesis: significant effects
Modelos significantes. Esto nos indica que hay un model mejor que el otro, el modelo que interpreta mejor nuestra variables es el modelo agrupado con el método Pooled, esto es debido a que la R cuadrada ajustada es la mayor.
walhus <- plm(patentsg ~ employ + stckpr+ sales, data = patent_data, model = "random")
summary(walhus)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = patentsg ~ employ + stckpr + sales, data = patent_data,
## model = "random")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Effects:
## var std.dev share
## idiosyncratic 308.80 17.57 0.129
## individual 2083.70 45.65 0.871
## theta: 0.8792
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -156.15473 -4.05094 -1.98937 0.59603 307.93315
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 15.47009112 3.44207613 4.4944 6.976e-06 ***
## employ 0.64863883 0.04964448 13.0657 < 2.2e-16 ***
## stckpr 0.20422464 0.02791323 7.3164 2.547e-13 ***
## sales -0.00415004 0.00029919 -13.8708 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 912080
## Residual Sum of Squares: 795610
## R-Squared: 0.1277
## Adj. R-Squared: 0.12654
## Chisq: 330.262 on 3 DF, p-value: < 2.22e-16
amemiya <- plm(patentsg ~ employ + stckpr + sales, data = patent_data, model="random", random.method="amemiya")
summary(amemiya)
## Oneway (individual) effect Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = patentsg ~ employ + stckpr + sales, data = patent_data,
## model = "random", random.method = "amemiya")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Effects:
## var std.dev share
## idiosyncratic 308.34 17.56 0.038
## individual 7820.88 88.44 0.962
## theta: 0.9373
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -179.29139 -3.34909 -1.59897 0.80985 296.28233
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 24.53728282 5.97776184 4.1048 4.047e-05 ***
## employ 0.22863283 0.05599839 4.0828 4.449e-05 ***
## stckpr 0.14889909 0.02636281 5.6481 1.623e-08 ***
## sales -0.00413280 0.00028058 -14.7294 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 768460
## Residual Sum of Squares: 686640
## R-Squared: 0.10647
## Adj. R-Squared: 0.10528
## Chisq: 268.825 on 3 DF, p-value: < 2.22e-16
nerlove <- plm(patentsg ~ employ + stckpr + sales, data = patent_data, model="random", random.method="nerlove")
summary(nerlove)
## Oneway (individual) effect Random Effect Model
## (Nerlove's transformation)
##
## Call:
## plm(formula = patentsg ~ employ + stckpr + sales, data = patent_data,
## model = "random", random.method = "nerlove")
##
## Balanced Panel: n = 226, T = 10, N = 2260
##
## Effects:
## var std.dev share
## idiosyncratic 277.51 16.66 0.034
## individual 7886.61 88.81 0.966
## theta: 0.9408
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -181.19093 -3.30313 -1.55289 0.85044 295.40484
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 25.10085616 6.28899815 3.9912 6.573e-05 ***
## employ 0.20216827 0.05632923 3.5890 0.0003319 ***
## stckpr 0.14571591 0.02627692 5.5454 2.933e-08 ***
## sales -0.00413097 0.00027955 -14.7775 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 762800
## Residual Sum of Squares: 681070
## R-Squared: 0.10716
## Adj. R-Squared: 0.10597
## Chisq: 270.756 on 3 DF, p-value: < 2.22e-16
Aquí podemos ver que todas las variables son significativas con el coeficiente de patentes otorgadas.
phtest(walhus,within)
##
## Hausman Test
##
## data: patentsg ~ employ + stckpr + sales
## chisq = 354.51, df = 3, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
El mejor modelo fue el de efectos Aleatorios por el método Walhus, esto es debido a que la R cuadrada ajustada es la mayor.