Este documento integra los resultados de:
# Instalar paquetes y llamar librerías
#install.packages("WDI")
library(WDI)
#install.packages("wbstats")
library(wbstats)
#install.packages("tidyverse")
library(ggplot2)
#install.packages("gplots")
library(gplots)
##
## ---------------------
## gplots 3.3.0 loaded:
## * Use citation('gplots') for citation info.
## * Homepage: https://talgalili.github.io/gplots/
## * Report issues: https://github.com/talgalili/gplots/issues
## * Ask questions: https://stackoverflow.com/questions/tagged/gplots
## * Suppress this message with: suppressPackageStartupMessages(library(gplots))
## ---------------------
##
## Adjuntando el paquete: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
#install.packages("plm")
library(plm)
# Obtener la información de 1 país
PIB_MEX <- wb_data(country = "MX", indicator = "NY.GDP.PCAP.CD",
start_date=1900, end_date=2025)
summary(PIB_MEX)
## iso2c iso3c country date
## Length:65 Length:65 Length:65 Min. :1960
## Class :character Class :character Class :character 1st Qu.:1976
## Mode :character Mode :character Mode :character Median :1992
## Mean :1992
## 3rd Qu.:2008
## Max. :2024
## NY.GDP.PCAP.CD unit obs_status footnote
## Min. : 355.1 Length:65 Length:65 Length:65
## 1st Qu.: 1465.5 Class :character Class :character Class :character
## Median : 4183.9 Mode :character Mode :character Mode :character
## Mean : 5238.3
## 3rd Qu.: 9097.9
## Max. :14185.8
## last_updated
## Min. :2026-01-28
## 1st Qu.:2026-01-28
## Median :2026-01-28
## Mean :2026-01-28
## 3rd Qu.:2026-01-28
## Max. :2026-01-28
ggplot(PIB_MEX, aes(x= date, y=NY.GDP.PCAP.CD)) +
geom_point () +
geom_line() +
labs(title="PIB per Capita en México (Current USD$)", x = "Año",
y = "Valor")
# Obtener la información de varios paises
PIB_PANEL <- wb_data(country = c("MX","US","CA"), indicator = "NY.GDP.PCAP.CD",
start_date=1900, end_date=2025)
summary(PIB_PANEL)
## iso2c iso3c country date
## Length:195 Length:195 Length:195 Min. :1960
## Class :character Class :character Class :character 1st Qu.:1976
## Mode :character Mode :character Mode :character Median :1992
## Mean :1992
## 3rd Qu.:2008
## Max. :2024
## NY.GDP.PCAP.CD unit obs_status footnote
## Min. : 355.1 Length:195 Length:195 Length:195
## 1st Qu.: 4136.1 Class :character Class :character Class :character
## Median :10664.5 Mode :character Mode :character Mode :character
## Mean :19606.2
## 3rd Qu.:30713.4
## Max. :84534.0
## last_updated
## Min. :2026-01-28
## 1st Qu.:2026-01-28
## Median :2026-01-28
## Mean :2026-01-28
## 3rd Qu.:2026-01-28
## Max. :2026-01-28
ggplot(PIB_PANEL, aes(x= date, y=NY.GDP.PCAP.CD, color =iso3c)) +
geom_point () +
geom_line() +
labs(title="PIB per Capita en Norteamérica (Current USD$)", x = "Año",
y = "Valor")
# Obtener la información de varios indicadores en varios paises
MEGAPIB<- wb_data(country = c("MX","US","CA"), indicator = c("NY.GDP.PCAP.CD",
"SP.DYN.LE00.IN"), start_date=1900, end_date=2025)
summary(MEGAPIB)
## iso2c iso3c country date
## Length:195 Length:195 Length:195 Min. :1960
## Class :character Class :character Class :character 1st Qu.:1976
## Mode :character Mode :character Mode :character Median :1992
## Mean :1992
## 3rd Qu.:2008
## Max. :2024
##
## NY.GDP.PCAP.CD SP.DYN.LE00.IN
## Min. : 355.1 Min. :53.57
## 1st Qu.: 4136.1 1st Qu.:70.78
## Median :10664.5 Median :74.24
## Mean :19606.2 Mean :73.19
## 3rd Qu.:30713.4 3rd Qu.:77.51
## Max. :84534.0 Max. :82.16
## NA's :3
# Heterogeneidad
# Variación entre individuos
plotmeans(NY.GDP.PCAP.CD ~ country, main = "Heterogeneidad entre países", xlab = "País", ylab = "PIB per Cápita", data=MEGAPIB)
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
# Interpretación:
# Alta Heterogeneidad: Si los puntos (medias) están muy separados entre países.
# Baja Heterogeneidad: Si los puntos (medias) están cerca uno de otros.
# En este caso, EUA y Canadá tienen un PIB per Cápita mayor que México, mostrando
# alta heterogeneidad entre países.
options(
repos = c(CRAN = "https://cloud.r-project.org"),
timeout = 900
)
# Efficient: download + clean + save (panel country-year) in one go
# install.packages(c("WDI", "dplyr", "writexl"), quiet = TRUE)
library(WDI)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:plm':
##
## between, lag, lead
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(writexl)
library(tidyr)
library(plm)
paises <- c("CHL", "PER", "COL", "MEX", "USA", "DEU", "BRA")
start_year <- 2014
end_year <- 2022
indicator_map <- c(
"NY.GDP.PCAP.KD" = "gdp_pc",
"BM.GSR.ROYL.CD" = "ip_pay",
"BX.GSR.ROYL.CD" = "ip_rec",
"TX.VAL.TECH.MF.ZS" = "ht_x_mfg_pct",
"TX.VAL.TECH.CD" = "ht_x_usd",
"IP.PAT.NRES" = "pat_nres",
"IP.PAT.RESD" = "pat_res",
"GB.XPD.RSDV.GD.ZS" = "rd_gdp_pct",
"SP.POP.SCIE.RD.P6" = "researchers_pm",
"IP.JRN.ARTC.SC" = "sci_articles",
"SP.POP.TECH.RD.P6" = "techs_pm"
)
# --- safe download with 1 retry ---
pull_wdi <- function() {
WDI(
country = paises,
indicator = names(indicator_map),
start = start_year,
end = end_year
)
}
panel_raw <- tryCatch(pull_wdi(), error = function(e) NULL)
if (is.null(panel_raw) || nrow(panel_raw) == 0) {
Sys.sleep(2)
panel_raw <- pull_wdi()
}
# Identify what actually came back (API may drop some)
requested <- names(indicator_map)
present <- intersect(requested, names(panel_raw))
missing <- setdiff(requested, present)
message("Indicators downloaded: ", paste(present, collapse = ", "))
## Indicators downloaded: NY.GDP.PCAP.KD, BM.GSR.ROYL.CD, BX.GSR.ROYL.CD, TX.VAL.TECH.MF.ZS, TX.VAL.TECH.CD, IP.PAT.NRES, IP.PAT.RESD, GB.XPD.RSDV.GD.ZS, SP.POP.SCIE.RD.P6, IP.JRN.ARTC.SC, SP.POP.TECH.RD.P6
if (length(missing) > 0) message("Indicators missing (API/server): ", paste(missing, collapse = ", "))
# Build panel WITHOUT crashing if some indicators are missing
panel <- panel_raw |>
select(iso3c, year, any_of(requested)) |>
rename(country = iso3c) |>
rename_with(~ indicator_map[.x], any_of(requested)) |>
arrange(country, year)
panel
## country year gdp_pc ip_pay ip_rec ht_x_mfg_pct ht_x_usd
## 1 BRA 2014 9338.342 6147326282 375097938 12.371169 8794690902
## 2 BRA 2015 8936.196 5517267029 581080520 14.485900 9433128684
## 3 BRA 2016 8577.843 5315108648 650833690 16.000164 10375539028
## 4 BRA 2017 8628.252 5402128504 642157301 14.311889 10715201949
## 5 BRA 2018 8722.335 5124101633 825475487 14.744242 11063190965
## 6 BRA 2019 8771.440 5246219108 641114074 14.066805 9392109980
## 7 BRA 2020 8435.010 4062060898 634291803 11.350094 5944951723
## 8 BRA 2021 8799.228 5222174071 705261778 9.001554 6350128610
## 9 BRA 2022 9032.084 7299729724 745138505 9.111346 7707210423
## 10 CHL 2014 13285.954 1614627539 40769469 7.091794 731680359
## 11 CHL 2015 13433.920 1635419969 41950301 6.722531 580229395
## 12 CHL 2016 13505.098 1730596607 38437446 8.424380 720871030
## 13 CHL 2017 13473.346 1709057253 50489332 7.106096 634869020
## 14 CHL 2018 13763.007 2034376888 46185212 6.911411 667789266
## 15 CHL 2019 13630.594 1878506475 45131485 7.632882 672148298
## 16 CHL 2020 12679.024 1511563046 39645102 15.975058 1368060499
## 17 CHL 2021 14051.471 1689634052 67186686 12.441775 1264185464
## 18 CHL 2022 14283.149 1475578872 65404498 6.936727 1400540500
## 19 COL 2014 6121.840 1401046313 116382418 8.119835 762816334
## 20 COL 2015 6248.515 1345633688 101196816 9.849346 823794429
## 21 COL 2016 6316.071 1257276813 99122533 10.185796 766933461
## 22 COL 2017 6309.677 1275576188 134001691 8.962363 692527611
## 23 COL 2018 6353.546 1412618281 143866520 7.221393 603548552
## 24 COL 2019 6439.964 1334981000 127065631 9.048456 753154204
## 25 COL 2020 5891.955 1122727656 123030174 9.913552 689647169
## 26 COL 2021 6457.169 1358180421 133092287 8.209150 735066988
## 27 COL 2022 6856.726 1651368252 169634637 8.401826 885893553
## 28 DEU 2014 41602.466 10729008199 23486335059 17.209715 215660926664
## 29 DEU 2015 41929.755 10117945574 24082893580 17.824375 199465124309
## 30 DEU 2016 42516.934 11295106926 28726730974 18.082701 205114885619
## 31 DEU 2017 43543.481 14364977912 31302945582 15.850579 195727088512
## 32 DEU 2018 43905.855 16367799982 36595706878 15.747434 209762237943
## 33 DEU 2019 44235.266 16959534532 37525236325 16.386941 208177564289
## 34 DEU 2020 42372.873 17265370702 37950983696 15.503196 182393711293
## 35 DEU 2021 44011.019 21774153930 59721878306 15.385105 211942359477
## 36 DEU 2022 44817.132 21276617907 53172744429 17.498151 245975238590
## 37 MEX 2014 9862.481 3834926121 349306590 20.222100 61566553721
## 38 MEX 2015 10021.239 3947789516 398759507 19.572702 60222704580
## 39 MEX 2016 10100.502 4299087599 522082664 20.654112 62489596572
## 40 MEX 2017 10193.773 4647755230 458086880 21.168097 69637771602
## 41 MEX 2018 10296.869 5073984251 509830134 20.891746 74827258646
## 42 MEX 2019 10159.445 5257637099 801349603 20.406858 75166404372
## 43 MEX 2020 9234.644 4240004176 576540631 21.507425 71010208559
## 44 MEX 2021 9728.057 5129414066 558064827 19.811341 74981570074
## 45 MEX 2022 10013.248 4883738882 585046980 20.534369 91195069526
## 46 PER 2014 6103.919 465593560 6000000 4.252186 206108972
## 47 PER 2015 6231.712 421403538 13684929 5.243404 302441321
## 48 PER 2016 6392.255 379133896 16881634 4.799713 186687352
## 49 PER 2017 6457.419 306382719 26290389 5.122323 211714389
## 50 PER 2018 6593.144 349222679 26182714 4.680200 210905058
## 51 PER 2019 6626.261 419229522 30291840 4.075004 179192646
## 52 PER 2020 5831.830 459604253 25098048 4.794834 172740806
## 53 PER 2021 6547.847 505899553 38013260 4.461645 222582995
## 54 PER 2022 6667.517 521794008 39533790 4.589466 263430270
## 55 USA 2014 55394.451 37562000000 116380000000 20.484398 176029439000
## 56 USA 2015 56572.919 35178000000 111151000000 21.390448 175321672223
## 57 USA 2016 57151.471 41974000000 112981000000 22.419294 173983447240
## 58 USA 2017 58151.702 44406000000 118147000000 19.265970 154602417969
## 59 USA 2018 59526.666 43291000000 115499000000 18.484270 153893519793
## 60 USA 2019 60750.990 43280000000 126166000000 18.685394 154028528002
## 61 USA 2020 59194.667 45483000000 123642000000 19.493976 141612133157
## 62 USA 2021 62680.250 48199000000 142803000000 19.901461 169281833346
## 63 USA 2022 63886.132 59417000000 152710000000 20.578983 191876058536
## pat_nres pat_res rd_gdp_pct researchers_pm sci_articles techs_pm
## 1 25683 4659 1.26971 903.20079 52367.03 969.9022
## 2 25578 4641 1.37093 NA 53504.24 NA
## 3 22810 5200 1.28637 NA 55719.12 NA
## 4 20178 5480 1.11750 NA 59143.03 NA
## 5 19877 4980 1.16769 NA 63130.11 NA
## 6 19932 5464 1.21096 NA 66506.17 NA
## 7 19058 5280 1.14526 NA 70490.47 NA
## 8 19566 4666 NA NA 73727.34 NA
## 9 NA NA NA NA 67030.59 NA
## 10 2653 452 0.37668 426.80476 5871.42 313.6781
## 11 2831 443 0.38296 455.29183 6161.44 284.7413
## 12 2521 386 0.37103 495.36438 6774.99 296.6805
## 13 2469 425 0.35679 494.62971 6921.66 303.4670
## 14 2694 406 0.36916 523.65457 7572.42 NA
## 15 2799 438 0.34243 507.31033 8012.18 NA
## 16 2433 372 0.33525 515.32026 9032.26 NA
## 17 2680 402 0.36041 638.83802 9990.88 NA
## 18 NA NA NA NA 9057.77 NA
## 19 1898 260 0.30317 59.05478 4993.33 NA
## 20 1921 321 0.36542 70.66931 5383.50 NA
## 21 1658 545 0.27051 91.26030 6417.19 NA
## 22 1777 595 0.26109 90.24729 6701.16 NA
## 23 1808 415 0.31233 NA 7531.98 NA
## 24 1735 422 0.32201 NA 8752.82 NA
## 25 1752 369 0.28940 NA 9599.78 NA
## 26 1855 432 NA NA 10300.11 NA
## 27 NA NA NA NA 9683.40 NA
## 28 17811 48154 2.87784 4336.01915 108118.23 1883.1957
## 29 19509 47384 2.93379 4755.29531 108479.28 1909.8538
## 30 19419 48480 2.94039 4839.84645 110725.54 1945.7051
## 31 19927 47785 3.04710 5058.39528 111426.61 2006.6533
## 32 21281 46617 3.11011 5209.21430 111308.13 NA
## 33 20802 46632 3.16701 5398.63462 113491.66 NA
## 34 19845 42260 3.13136 5390.05249 112179.45 NA
## 35 18747 39822 3.12882 5520.57631 119603.62 NA
## 36 NA NA 3.13236 5787.44181 113976.30 NA
## 37 14889 1246 0.41962 262.92059 14167.62 115.6411
## 38 16707 1364 0.41477 284.58114 14570.29 120.9261
## 39 16103 1310 0.37601 319.55110 15194.65 140.2975
## 40 15850 1334 0.31955 273.56869 16177.94 NA
## 41 14869 1555 0.29816 278.43618 17182.13 NA
## 42 14636 1305 0.27606 282.18884 18991.67 NA
## 43 13180 1132 0.29191 281.59287 20363.28 NA
## 44 15044 1117 0.27378 273.14814 21753.88 NA
## 45 NA NA 0.25782 273.66785 21027.10 NA
## 46 1204 83 0.10805 NA 745.61 NA
## 47 1182 67 0.11702 NA 899.29 NA
## 48 1091 72 0.12008 NA 1071.41 NA
## 49 1119 100 0.12085 NA 1389.87 NA
## 50 1133 89 0.12683 NA 1700.86 NA
## 51 1122 137 0.15696 NA 2332.04 NA
## 52 1142 125 0.17229 NA 2892.98 NA
## 53 1141 94 0.13752 NA 4064.96 NA
## 54 NA NA 0.16178 NA 4584.96 NA
## 55 293706 285096 2.70881 3824.01193 434412.33 NA
## 56 301075 288335 2.77328 3858.04036 436908.38 NA
## 57 310244 295327 2.83676 3809.11997 437546.04 NA
## 58 313052 293904 2.88357 3931.60035 440417.99 NA
## 59 312046 285095 2.99045 4226.79136 447164.41 NA
## 60 336340 285113 3.14704 4265.92083 451480.17 NA
## 61 327586 269586 3.42467 4464.09495 457586.90 NA
## 62 329229 262244 3.48313 4825.18040 472448.44 NA
## 63 NA NA 3.58623 NA 457335.25 NA
# 1) Identify NA patterns + omit rows with ANY NA in model vars
vars <- c(
"gdp_pc","ip_pay","ip_rec","ht_x_mfg_pct","ht_x_usd",
"pat_nres","pat_res","rd_gdp_pct","researchers_pm",
"sci_articles","techs_pm"
)
# 1) NA count + NA % per variable (safe)
na_report <- panel %>%
summarise(across(all_of(vars),
list(na_count = ~sum(is.na(.)),
na_pct = ~mean(is.na(.))*100))) %>%
pivot_longer(
cols = everything(),
names_to = c("variable","metric"),
names_pattern = "^(.*)_(na_count|na_pct)$",
values_to = "value"
) %>%
pivot_wider(names_from = metric, values_from = value) %>%
arrange(desc(na_pct))
na_report
## # A tibble: 11 × 3
## variable na_count na_pct
## <chr> <dbl> <dbl>
## 1 techs_pm 51 81.0
## 2 researchers_pm 24 38.1
## 3 pat_nres 7 11.1
## 4 pat_res 7 11.1
## 5 rd_gdp_pct 5 7.94
## 6 gdp_pc 0 0
## 7 ip_pay 0 0
## 8 ip_rec 0 0
## 9 ht_x_mfg_pct 0 0
## 10 ht_x_usd 0 0
## 11 sci_articles 0 0
panel_clean <- panel %>%
# 1. Drop specified columns
select(-techs_pm, -researchers_pm, -pat_nres, -pat_res) %>%
# 2. Median imputation for rd_gdp_pct
mutate(
rd_gdp_pct = ifelse(
is.na(rd_gdp_pct),
median(rd_gdp_pct, na.rm = TRUE),
rd_gdp_pct
)
)
panel_clean
## country year gdp_pc ip_pay ip_rec ht_x_mfg_pct ht_x_usd
## 1 BRA 2014 9338.342 6147326282 375097938 12.371169 8794690902
## 2 BRA 2015 8936.196 5517267029 581080520 14.485900 9433128684
## 3 BRA 2016 8577.843 5315108648 650833690 16.000164 10375539028
## 4 BRA 2017 8628.252 5402128504 642157301 14.311889 10715201949
## 5 BRA 2018 8722.335 5124101633 825475487 14.744242 11063190965
## 6 BRA 2019 8771.440 5246219108 641114074 14.066805 9392109980
## 7 BRA 2020 8435.010 4062060898 634291803 11.350094 5944951723
## 8 BRA 2021 8799.228 5222174071 705261778 9.001554 6350128610
## 9 BRA 2022 9032.084 7299729724 745138505 9.111346 7707210423
## 10 CHL 2014 13285.954 1614627539 40769469 7.091794 731680359
## 11 CHL 2015 13433.920 1635419969 41950301 6.722531 580229395
## 12 CHL 2016 13505.098 1730596607 38437446 8.424380 720871030
## 13 CHL 2017 13473.346 1709057253 50489332 7.106096 634869020
## 14 CHL 2018 13763.007 2034376888 46185212 6.911411 667789266
## 15 CHL 2019 13630.594 1878506475 45131485 7.632882 672148298
## 16 CHL 2020 12679.024 1511563046 39645102 15.975058 1368060499
## 17 CHL 2021 14051.471 1689634052 67186686 12.441775 1264185464
## 18 CHL 2022 14283.149 1475578872 65404498 6.936727 1400540500
## 19 COL 2014 6121.840 1401046313 116382418 8.119835 762816334
## 20 COL 2015 6248.515 1345633688 101196816 9.849346 823794429
## 21 COL 2016 6316.071 1257276813 99122533 10.185796 766933461
## 22 COL 2017 6309.677 1275576188 134001691 8.962363 692527611
## 23 COL 2018 6353.546 1412618281 143866520 7.221393 603548552
## 24 COL 2019 6439.964 1334981000 127065631 9.048456 753154204
## 25 COL 2020 5891.955 1122727656 123030174 9.913552 689647169
## 26 COL 2021 6457.169 1358180421 133092287 8.209150 735066988
## 27 COL 2022 6856.726 1651368252 169634637 8.401826 885893553
## 28 DEU 2014 41602.466 10729008199 23486335059 17.209715 215660926664
## 29 DEU 2015 41929.755 10117945574 24082893580 17.824375 199465124309
## 30 DEU 2016 42516.934 11295106926 28726730974 18.082701 205114885619
## 31 DEU 2017 43543.481 14364977912 31302945582 15.850579 195727088512
## 32 DEU 2018 43905.855 16367799982 36595706878 15.747434 209762237943
## 33 DEU 2019 44235.266 16959534532 37525236325 16.386941 208177564289
## 34 DEU 2020 42372.873 17265370702 37950983696 15.503196 182393711293
## 35 DEU 2021 44011.019 21774153930 59721878306 15.385105 211942359477
## 36 DEU 2022 44817.132 21276617907 53172744429 17.498151 245975238590
## 37 MEX 2014 9862.481 3834926121 349306590 20.222100 61566553721
## 38 MEX 2015 10021.239 3947789516 398759507 19.572702 60222704580
## 39 MEX 2016 10100.502 4299087599 522082664 20.654112 62489596572
## 40 MEX 2017 10193.773 4647755230 458086880 21.168097 69637771602
## 41 MEX 2018 10296.869 5073984251 509830134 20.891746 74827258646
## 42 MEX 2019 10159.445 5257637099 801349603 20.406858 75166404372
## 43 MEX 2020 9234.644 4240004176 576540631 21.507425 71010208559
## 44 MEX 2021 9728.057 5129414066 558064827 19.811341 74981570074
## 45 MEX 2022 10013.248 4883738882 585046980 20.534369 91195069526
## 46 PER 2014 6103.919 465593560 6000000 4.252186 206108972
## 47 PER 2015 6231.712 421403538 13684929 5.243404 302441321
## 48 PER 2016 6392.255 379133896 16881634 4.799713 186687352
## 49 PER 2017 6457.419 306382719 26290389 5.122323 211714389
## 50 PER 2018 6593.144 349222679 26182714 4.680200 210905058
## 51 PER 2019 6626.261 419229522 30291840 4.075004 179192646
## 52 PER 2020 5831.830 459604253 25098048 4.794834 172740806
## 53 PER 2021 6547.847 505899553 38013260 4.461645 222582995
## 54 PER 2022 6667.517 521794008 39533790 4.589466 263430270
## 55 USA 2014 55394.451 37562000000 116380000000 20.484398 176029439000
## 56 USA 2015 56572.919 35178000000 111151000000 21.390448 175321672223
## 57 USA 2016 57151.471 41974000000 112981000000 22.419294 173983447240
## 58 USA 2017 58151.702 44406000000 118147000000 19.265970 154602417969
## 59 USA 2018 59526.666 43291000000 115499000000 18.484270 153893519793
## 60 USA 2019 60750.990 43280000000 126166000000 18.685394 154028528002
## 61 USA 2020 59194.667 45483000000 123642000000 19.493976 141612133157
## 62 USA 2021 62680.250 48199000000 142803000000 19.901461 169281833346
## 63 USA 2022 63886.132 59417000000 152710000000 20.578983 191876058536
## rd_gdp_pct sci_articles
## 1 1.269710 52367.03
## 2 1.370930 53504.24
## 3 1.286370 55719.12
## 4 1.117500 59143.03
## 5 1.167690 63130.11
## 6 1.210960 66506.17
## 7 1.145260 70490.47
## 8 0.376345 73727.34
## 9 0.376345 67030.59
## 10 0.376680 5871.42
## 11 0.382960 6161.44
## 12 0.371030 6774.99
## 13 0.356790 6921.66
## 14 0.369160 7572.42
## 15 0.342430 8012.18
## 16 0.335250 9032.26
## 17 0.360410 9990.88
## 18 0.376345 9057.77
## 19 0.303170 4993.33
## 20 0.365420 5383.50
## 21 0.270510 6417.19
## 22 0.261090 6701.16
## 23 0.312330 7531.98
## 24 0.322010 8752.82
## 25 0.289400 9599.78
## 26 0.376345 10300.11
## 27 0.376345 9683.40
## 28 2.877840 108118.23
## 29 2.933790 108479.28
## 30 2.940390 110725.54
## 31 3.047100 111426.61
## 32 3.110110 111308.13
## 33 3.167010 113491.66
## 34 3.131360 112179.45
## 35 3.128820 119603.62
## 36 3.132360 113976.30
## 37 0.419620 14167.62
## 38 0.414770 14570.29
## 39 0.376010 15194.65
## 40 0.319550 16177.94
## 41 0.298160 17182.13
## 42 0.276060 18991.67
## 43 0.291910 20363.28
## 44 0.273780 21753.88
## 45 0.257820 21027.10
## 46 0.108050 745.61
## 47 0.117020 899.29
## 48 0.120080 1071.41
## 49 0.120850 1389.87
## 50 0.126830 1700.86
## 51 0.156960 2332.04
## 52 0.172290 2892.98
## 53 0.137520 4064.96
## 54 0.161780 4584.96
## 55 2.708810 434412.33
## 56 2.773280 436908.38
## 57 2.836760 437546.04
## 58 2.883570 440417.99
## 59 2.990450 447164.41
## 60 3.147040 451480.17
## 61 3.424670 457586.90
## 62 3.483130 472448.44
## 63 3.586230 457335.25
vars1 <- c(
"gdp_pc",
"ip_pay",
"ip_rec",
"ht_x_mfg_pct",
"ht_x_usd",
"rd_gdp_pct",
"sci_articles"
)
# 3) Create log variables where allowed (adds ln_* columns)
panel_log <- panel_clean %>%
# Create log variables only where valid
mutate(across(
all_of(vars1),
~ if (all(. > 0, na.rm = TRUE)) log(.) else NA_real_,
.names = "ln_{.col}"
)) %>%
# Keep identifiers + transformed OR original (if no log possible)
select(country, year,
any_of(paste0("ln_", vars1)),
all_of(vars1)) %>%
# Drop original variables that were successfully log-transformed
{
log_vars <- paste0("ln_", vars1)
orig_to_drop <- vars1[log_vars %in% names(.)]
select(., -any_of(orig_to_drop))
}
panel_log
## country year ln_gdp_pc ln_ip_pay ln_ip_rec ln_ht_x_mfg_pct ln_ht_x_usd
## 1 BRA 2014 9.141884 22.53928 19.74270 2.515369 22.89741
## 2 BRA 2015 9.097865 22.43115 20.18040 2.673176 22.96749
## 3 BRA 2016 9.056938 22.39382 20.29376 2.772599 23.06272
## 4 BRA 2017 9.062797 22.41006 20.28034 2.661091 23.09493
## 5 BRA 2018 9.073642 22.35722 20.53147 2.690853 23.12689
## 6 BRA 2019 9.079256 22.38077 20.27872 2.643818 22.96314
## 7 BRA 2020 9.040146 22.12496 20.26802 2.429226 22.50581
## 8 BRA 2021 9.082419 22.37618 20.37408 2.197397 22.57174
## 9 BRA 2022 9.108538 22.71110 20.42908 2.209520 22.76542
## 10 CHL 2014 9.494463 21.20237 17.52344 1.958938 20.41085
## 11 CHL 2015 9.505538 21.21517 17.55200 1.905465 20.17893
## 12 CHL 2016 9.510823 21.27173 17.46454 2.131130 20.39597
## 13 CHL 2017 9.508469 21.25921 17.73727 1.960953 20.26893
## 14 CHL 2018 9.529740 21.43346 17.64817 1.933174 20.31948
## 15 CHL 2019 9.520072 21.35374 17.62509 2.032465 20.32599
## 16 CHL 2020 9.447704 21.13641 17.49548 2.771029 21.03666
## 17 CHL 2021 9.550482 21.24778 18.02299 2.521060 20.95769
## 18 CHL 2022 9.566836 21.11232 17.99610 1.936830 21.06012
## 19 COL 2014 8.719618 21.06049 18.57239 2.094310 20.45253
## 20 COL 2015 8.740099 21.02013 18.43258 2.287405 20.52943
## 21 COL 2016 8.750853 20.95221 18.41187 2.320994 20.45791
## 22 COL 2017 8.749840 20.96666 18.71336 2.193034 20.35586
## 23 COL 2018 8.756768 21.06871 18.78440 1.977048 20.21834
## 24 COL 2019 8.770278 21.01218 18.66021 2.202594 20.43978
## 25 COL 2020 8.681343 20.83903 18.62794 2.293903 20.35169
## 26 COL 2021 8.772946 21.02941 18.70655 2.105249 20.41547
## 27 COL 2022 8.832985 21.22487 18.94916 2.128449 20.60211
## 28 DEU 2014 10.635915 23.09622 23.87968 2.845474 26.09697
## 29 DEU 2015 10.643751 23.03758 23.90477 2.880567 26.01891
## 30 DEU 2016 10.657658 23.14764 24.08109 2.894956 26.04684
## 31 DEU 2017 10.681515 23.38806 24.16698 2.763206 25.99999
## 32 DEU 2018 10.689803 23.51858 24.32320 2.756677 26.06924
## 33 DEU 2019 10.697278 23.55410 24.34828 2.796485 26.06166
## 34 DEU 2020 10.654264 23.57197 24.35956 2.741046 25.92943
## 35 DEU 2021 10.692195 23.80399 24.81296 2.733400 26.07958
## 36 DEU 2022 10.710346 23.78087 24.69681 2.862095 26.22850
## 37 MEX 2014 9.196493 22.06742 19.67146 3.006776 24.84338
## 38 MEX 2015 9.212462 22.09642 19.80387 2.974136 24.82132
## 39 MEX 2016 9.220340 22.18167 20.07334 3.027914 24.85827
## 40 MEX 2017 9.229532 22.25965 19.94257 3.052495 24.96657
## 41 MEX 2018 9.239595 22.34739 20.04959 3.039354 25.03845
## 42 MEX 2019 9.226159 22.38295 20.50181 3.015871 25.04297
## 43 MEX 2020 9.130717 22.16783 20.17256 3.068398 24.98609
## 44 MEX 2021 9.182769 22.35826 20.13999 2.986255 25.04051
## 45 MEX 2022 9.211664 22.30918 20.18720 3.022100 25.23627
## 46 PER 2014 8.716686 19.95882 15.60727 1.447433 19.14392
## 47 PER 2015 8.737406 19.85910 16.43181 1.656971 19.52740
## 48 PER 2016 8.762842 19.75340 16.64174 1.568556 19.04495
## 49 PER 2017 8.772985 19.54035 17.08471 1.633608 19.17075
## 50 PER 2018 8.793786 19.67122 17.08061 1.543341 19.16692
## 51 PER 2019 8.798796 19.85393 17.22639 1.404872 19.00397
## 52 PER 2020 8.671086 19.94588 17.03830 1.567539 18.96730
## 53 PER 2021 8.786892 20.04185 17.45345 1.495518 19.22081
## 54 PER 2022 8.805003 20.07278 17.49267 1.523764 19.38930
## 55 USA 2014 10.922235 24.34926 25.48013 3.019664 25.89392
## 56 USA 2015 10.943286 24.28369 25.43416 3.062944 25.88989
## 57 USA 2016 10.953460 24.46032 25.45049 3.109922 25.88223
## 58 USA 2017 10.970810 24.51664 25.49520 2.958340 25.76412
## 59 USA 2018 10.994180 24.49121 25.47253 2.916920 25.75953
## 60 USA 2019 11.014539 24.49096 25.56086 2.927742 25.76040
## 61 USA 2020 10.988587 24.54060 25.54066 2.970106 25.67636
## 62 USA 2021 11.045802 24.59860 25.68473 2.990793 25.85483
## 63 USA 2022 11.064858 24.80785 25.75181 3.024270 25.98012
## ln_rd_gdp_pct ln_sci_articles
## 1 0.2387885 10.866032
## 2 0.3154893 10.887516
## 3 0.2518243 10.928079
## 4 0.1110940 10.987714
## 5 0.1550274 11.052953
## 6 0.1914134 11.105050
## 7 0.1356317 11.163233
## 8 -0.9772490 11.208129
## 9 -0.9772490 11.112904
## 10 -0.9763593 8.677852
## 11 -0.9598247 8.726066
## 12 -0.9914724 8.820993
## 13 -1.0306079 8.842411
## 14 -0.9965251 8.932268
## 15 -1.0716880 8.988718
## 16 -1.0928788 9.108558
## 17 -1.0205130 9.209428
## 18 -0.9772490 9.111378
## 19 -1.1934616 8.515858
## 20 -1.0067079 8.591094
## 21 -1.3074462 8.766736
## 22 -1.3428901 8.810036
## 23 -1.1636950 8.926913
## 24 -1.1331727 9.077131
## 25 -1.2399455 9.169495
## 26 -0.9772490 9.239910
## 27 -0.9772490 9.178168
## 28 1.0570400 11.590981
## 29 1.0762951 11.594314
## 30 1.0785422 11.614810
## 31 1.1141903 11.621121
## 32 1.1346581 11.620058
## 33 1.1527879 11.639485
## 34 1.1414674 11.627855
## 35 1.1406559 11.691938
## 36 1.1417867 11.643746
## 37 -0.8684057 9.558714
## 38 -0.8800311 9.586740
## 39 -0.9781395 9.628699
## 40 -1.1408415 9.691404
## 41 -1.2101250 9.751625
## 42 -1.2871370 9.851756
## 43 -1.2313097 9.921489
## 44 -1.2954304 9.987547
## 45 -1.3554936 9.953567
## 46 -2.2251612 6.614203
## 47 -2.1454104 6.801606
## 48 -2.1195971 6.976731
## 49 -2.1132052 7.236965
## 50 -2.0649077 7.438889
## 51 -1.8517643 7.754499
## 52 -1.7585762 7.970042
## 53 -1.9839859 8.310159
## 54 -1.8215179 8.430537
## 55 0.9965094 12.981749
## 56 1.0200307 12.987479
## 57 1.0426626 12.988937
## 58 1.0590291 12.995480
## 59 1.0954239 13.010682
## 60 1.1464623 13.020287
## 61 1.2310051 13.033722
## 62 1.2479313 13.065684
## 63 1.2771015 13.033172
# 1) Make sure it's a plain data.frame (not grouped/tibble issues)
panel_log_df <- as.data.frame(panel_log)
# 2) Define indices + build regressors explicitly
id_vars <- c("country", "year")
y <- "ln_gdp_pc"
x_vars <- setdiff(names(panel_log_df), c(id_vars, y))
f_pool <- as.formula(paste(y, "~", paste(x_vars, collapse = " + ")))
# Modelo de Mínimos Cuadrados Ordinarios Agrupados /
# Pooled Ordinary Least Squares Model (Pooled OLS)
modelo_pool <- plm(
formula = f_pool,
data = panel_log_df,
index = id_vars,
model = "pooling"
)
summary(modelo_pool) # n = países; T= años; N= registros
## Pooling Model
##
## Call:
## plm(formula = f_pool, data = panel_log_df, model = "pooling",
## index = id_vars)
##
## Balanced Panel: n = 7, T = 9, N = 63
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.569493 -0.208640 -0.034578 0.201041 0.862127
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) -1.854461 2.156392 -0.8600 0.3934650
## ln_ip_pay 0.632639 0.153949 4.1094 0.0001307 ***
## ln_ip_rec 0.192183 0.058352 3.2935 0.0017185 **
## ln_ht_x_mfg_pct -0.417738 0.217754 -1.9184 0.0601645 .
## ln_ht_x_usd -0.029553 0.062151 -0.4755 0.6362843
## ln_rd_gdp_pct 0.286145 0.100623 2.8437 0.0062135 **
## ln_sci_articles -0.461330 0.108157 -4.2654 7.754e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 44.421
## Residual Sum of Squares: 4.9508
## R-Squared: 0.88855
## Adj. R-Squared: 0.87661
## F-statistic: 74.4088 on 6 and 56 DF, p-value: < 2.22e-16
# Prueba Breusch–Pagan
# Compara Pooled vs Aleatorios
# Si p-value < 0.05, Pooled NO es adecuado, probar Aleatorios
# Si p-value > 0.05, usar Pooled
plmtest(modelo_pool, type = "bp")
##
## Lagrange Multiplier Test - (Breusch-Pagan)
##
## data: f_pool
## chisq = 122.03, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
# Paso 9. Modelo de Efectos Fijos / Fixed Effects Model
model_fe <- plm(
formula = f_pool,
data = panel_log_df,
index = id_vars,
model = "within"
)
summary(model_fe)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = f_pool, data = panel_log_df, model = "within",
## index = id_vars)
##
## Balanced Panel: n = 7, T = 9, N = 63
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.080318 -0.016568 0.002416 0.019367 0.055303
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## ln_ip_pay 0.054511 0.027754 1.9641 0.055094 .
## ln_ip_rec 0.040076 0.020989 1.9093 0.061965 .
## ln_ht_x_mfg_pct -0.115875 0.038719 -2.9928 0.004287 **
## ln_ht_x_usd 0.072781 0.029502 2.4670 0.017096 *
## ln_rd_gdp_pct 0.037730 0.018612 2.0272 0.047988 *
## ln_sci_articles -0.022108 0.020811 -1.0623 0.293195
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 0.078037
## Residual Sum of Squares: 0.045102
## R-Squared: 0.42204
## Adj. R-Squared: 0.28333
## F-statistic: 6.08527 on 6 and 50 DF, p-value: 7.7446e-05
# Paso 10. Prueba F
# Compara Fijos vs Pooled
# Si p-value < 0.05, usar Efectos Fijos
# Si p-value > 0.05, usar Pooled
pFtest(model_fe, modelo_pool)
##
## F test for individual effects
##
## data: f_pool
## F = 906.41, df1 = 6, df2 = 50, p-value < 2.2e-16
## alternative hypothesis: significant effects
# Paso 11. Modelo de Efectos Aleatorios / Random Effects Model
model_re <- plm( formula = f_pool, data = panel_log_df, index = id_vars, model
= "random", random.method = "walhus")
summary(model_re)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = f_pool, data = panel_log_df, model = "random",
## random.method = "walhus", index = id_vars)
##
## Balanced Panel: n = 7, T = 9, N = 63
##
## Effects:
## var std.dev share
## idiosyncratic 0.02390 0.15460 0.304
## individual 0.05468 0.23385 0.696
## theta: 0.7848
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.1555495 -0.0638582 -0.0069229 0.0746218 0.1880467
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 3.653764 1.018218 3.5884 0.0003327 ***
## ln_ip_pay 0.123748 0.070639 1.7518 0.0798001 .
## ln_ip_rec 0.168751 0.046810 3.6051 0.0003121 ***
## ln_ht_x_mfg_pct -0.141134 0.101154 -1.3952 0.1629416
## ln_ht_x_usd 0.050677 0.049308 1.0278 0.3040595
## ln_rd_gdp_pct 0.125060 0.049469 2.5280 0.0114700 *
## ln_sci_articles -0.101994 0.052972 -1.9254 0.0541752 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 2.1316
## Residual Sum of Squares: 0.45272
## R-Squared: 0.78762
## Adj. R-Squared: 0.76486
## Chisq: 207.677 on 6 DF, p-value: < 2.22e-16
# Paso 12. Prueba de Hausman
# Compara Fijos vs Aleatorios
# Si p-value < 0.05, usar Efectos Fijos
# Si p-value > 0.05, usar Efectos Aleatorios
phtest(model_fe, model_re)
##
## Hausman Test
##
## data: f_pool
## chisq = 38.332, df = 6, p-value = 9.675e-07
## alternative hypothesis: one model is inconsistent
# Paso 14. Prueba de Heterocedasticidad
# Evalúa si la varianza de los errores es constante.
# install.packages("lmtest")
library(lmtest)
## Cargando paquete requerido: zoo
##
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
bptest(model_fe)
##
## studentized Breusch-Pagan test
##
## data: model_fe
## BP = 23.95, df = 6, p-value = 0.0005334
# Interpretación:
# Si p-value < 0.05 → Existe heterocedasticidad (problema detectado) SI
# Si p-value > 0.05 → No hay evidencia de heterocedasticidad
# Paso 15. Prueba de Autocorrelación Serial
# Evalúa si los errores están correlacionados en el tiempo dentro de cada país. SI
# Prueba de Wooldridge (más apropiada para Efectos Fijos)
pwartest(model_fe)
##
## Wooldridge's test for serial correlation in FE panels
##
## data: model_fe
## F = 11.71, df1 = 1, df2 = 54, p-value = 0.001192
## alternative hypothesis: serial correlation
# Prueba Breusch-Godfrey para panel (más apropiada para Efectos Aleatorios)
# pbgtest(model_re)
# Interpretación:
# Si p-value < 0.05 → Existe autocorrelación serial (problema detectado)
# Si p-value > 0.05 → No hay evidencia de autocorrelación
# Paso 16. Corrección con Errores Estándar Robustos Clusterizados
# Corrige heterocedasticidad y autocorrelación dentro de cada país
modelo_robusto <- coeftest(model_fe,
vcov = vcovHC(model_fe,
method = "arellano",
type = "HC1",
cluster = "group"))
print(modelo_robusto)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## ln_ip_pay 0.054511 0.046775 1.1654 0.2493934
## ln_ip_rec 0.040076 0.038159 1.0502 0.2986629
## ln_ht_x_mfg_pct -0.115875 0.031738 -3.6510 0.0006244 ***
## ln_ht_x_usd 0.072781 0.012320 5.9075 3.047e-07 ***
## ln_rd_gdp_pct 0.037730 0.016704 2.2587 0.0282951 *
## ln_sci_articles -0.022108 0.030281 -0.7301 0.4687409
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Interpretación:
# Los coeficientes NO cambian.
# Cambian los errores estándar, estadísticos t y p-values.
# Si una variable sigue siendo significativa después de la corrección,
# el resultado es estadísticamente más confiable.
El modelo de efectos fijos muestra que las exportaciones de alta tecnología en valor absoluto tienen un efecto positivo y altamente significativo sobre el PIB per cápita (β = 0.0728, p < 0.001), lo que implica que un aumento del 1% en estas exportaciones se asocia con un incremento de 0.073% en el PIB per cápita. Asimismo, el gasto en I+D presenta un efecto positivo y significativo (β = 0.0377, p = 0.028), confirmando el papel de la inversión en innovación en el crecimiento económico. En contraste, el porcentaje de exportaciones de alta tecnología dentro del total manufacturero muestra un efecto negativo significativo (β = −0.1159, p < 0.001), sugiriendo que una mayor proporción relativa no necesariamente se traduce en mayor ingreso per cápita. Finalmente, los pagos y recibos por propiedad intelectual y los artículos científicos no presentan efectos estadísticamente significativos (p > 0.24), lo que indica ausencia de evidencia de impacto directo en este modelo.
# Instalar paquetes y llamar librerías
# install.packages("readxl")
library(readxl)
# install.packages("plm")
library(plm)
# install.packages("dplyr")
library(dplyr)
# install.packages("glmmTMB")
library(fixest)
library(lmtest)
library(sandwich)
library(glmmTMB)
##
## Adjuntando el paquete: 'glmmTMB'
## The following objects are masked from 'package:sandwich':
##
## meatHC, sandwich
# Load dataset
patentes <- read_excel("C:\\Users\\almai\\Downloads\\patentes.xls")
str(patentes)
## tibble [2,260 × 13] (S3: tbl_df/tbl/data.frame)
## $ cusip : num [1:2260] 800 800 800 800 800 800 800 800 800 800 ...
## $ merger : num [1:2260] 0 0 0 0 0 0 0 0 0 0 ...
## $ employ : num [1:2260] 9.85 12.32 12.2 11.84 12.99 ...
## $ return : num [1:2260] 5.82 5.69 4.42 5.28 4.91 ...
## $ patents : num [1:2260] 22 34 31 32 40 60 57 77 38 5 ...
## $ patentsg: num [1:2260] 24 32 30 34 28 33 53 47 64 70 ...
## $ stckpr : num [1:2260] 47.6 57.9 33 38.5 35.1 ...
## $ rnd : num [1:2260] 2.56 3.1 3.27 3.24 3.78 ...
## $ rndeflt : num [1:2260] 2.56 2.91 2.8 2.52 2.78 ...
## $ rndstck : num [1:2260] 16.2 17.4 19.6 21.9 23.1 ...
## $ sales : num [1:2260] 344 436 535 567 631 ...
## $ sic : num [1:2260] 3740 3740 3740 3740 3740 3740 3740 3740 3740 3740 ...
## $ year : num [1:2260] 2012 2013 2014 2015 2016 ...
summary(patentes)
## cusip merger employ return
## Min. : 800 Min. :0.0000 Min. : 0.085 Min. :-73.022
## 1st Qu.:368514 1st Qu.:0.0000 1st Qu.: 1.227 1st Qu.: 5.128
## Median :501116 Median :0.0000 Median : 3.842 Median : 7.585
## Mean :514536 Mean :0.0177 Mean : 18.826 Mean : 8.003
## 3rd Qu.:754688 3rd Qu.:0.0000 3rd Qu.: 15.442 3rd Qu.: 10.501
## Max. :878555 Max. :1.0000 Max. :506.531 Max. : 48.675
## NA's :21 NA's :8
## patents patentsg stckpr rnd
## Min. : 0.0 Min. : 0.00 Min. : 0.1875 Min. : 0.0000
## 1st Qu.: 1.0 1st Qu.: 1.00 1st Qu.: 7.6250 1st Qu.: 0.6847
## Median : 3.0 Median : 4.00 Median : 16.5000 Median : 2.1456
## Mean : 22.9 Mean : 27.14 Mean : 22.6270 Mean : 29.3398
## 3rd Qu.: 15.0 3rd Qu.: 19.00 3rd Qu.: 29.2500 3rd Qu.: 11.9168
## Max. :906.0 Max. :1063.00 Max. :402.0000 Max. :1719.3535
## NA's :2
## rndeflt rndstck sales sic
## Min. : 0.0000 Min. : 0.1253 Min. : 1.222 Min. :2000
## 1st Qu.: 0.4788 1st Qu.: 5.1520 1st Qu.: 52.995 1st Qu.:2890
## Median : 1.4764 Median : 13.3532 Median : 174.065 Median :3531
## Mean : 19.7238 Mean : 163.8234 Mean : 1219.601 Mean :3333
## 3rd Qu.: 8.7527 3rd Qu.: 74.5625 3rd Qu.: 728.964 3rd Qu.:3661
## Max. :1000.7876 Max. :9755.3516 Max. :44224.000 Max. :9997
## NA's :157 NA's :3
## year
## Min. :2012
## 1st Qu.:2014
## Median :2016
## Mean :2016
## 3rd Qu.:2019
## Max. :2021
##
## 1) Basic type fixes + NA handling + panel keys
patentes_panel <- patentes %>%
mutate(
# Panel identifiers
cusip = as.character(cusip),
year = as.integer(year),
# Binary / categorical
merger = as.integer(merger),
merger = ifelse(is.na(merger), 0L, merger),
merger = factor(merger, levels = c(0, 1), labels = c("No", "Yes")),
sic = as.character(sic),
sic = factor(sic)
) %>%
# Drop rows missing panel keys
filter(!is.na(cusip), !is.na(year)) %>%
arrange(cusip, year)
## 2) Ensure dependent variables are usable Poisson counts
## (Poisson GLM expects non-negative integers; we coerce safely.)
to_count <- function(x) {
x_num <- suppressWarnings(as.numeric(x))
# Treat missing as 0 for the DV (change if you prefer dropping instead)
x_num[is.na(x_num)] <- 0
# Enforce non-negativity + integer count
x_num <- pmax(x_num, 0)
as.integer(round(x_num))
}
patentes_panel <- patentes_panel %>%
mutate(
patents_count = to_count(patents),
patentsg_count = to_count(patentsg)
)
# after you run the mutate() that creates ln_employ, return_z, ln_sales, etc.
PanelPatentes <- pdata.frame(patentes_panel, index = c("cusip", "year"), drop.index = FALSE)
dv <- "patentsg_count"
## Common RHS (adapt as needed)
rhs <- ~ merger + employ + return + stckpr + rndeflt + sales + rndstck
## 5) Pooled OLS
f_pool <- as.formula(paste(dv, paste(deparse(rhs), collapse = ""), sep = " "))
glm_pool <- glm(f_pool, family = poisson(link = "log"), data = PanelPatentes)
summary(glm_pool)
##
## Call:
## glm(formula = f_pool, family = poisson(link = "log"), data = PanelPatentes)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.434e+00 9.690e-03 251.223 < 2e-16 ***
## mergerYes 2.308e-01 3.065e-02 7.530 5.07e-14 ***
## employ 1.196e-02 6.569e-05 182.047 < 2e-16 ***
## return 1.551e-02 9.235e-04 16.797 < 2e-16 ***
## stckpr 1.071e-02 7.877e-05 135.917 < 2e-16 ***
## rndeflt -5.725e-03 7.250e-05 -78.966 < 2e-16 ***
## sales 1.193e-05 1.197e-06 9.965 < 2e-16 ***
## rndstck 2.515e-04 7.776e-06 32.345 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 173916 on 2082 degrees of freedom
## Residual deviance: 79247 on 2075 degrees of freedom
## (177 observations deleted due to missingness)
## AIC: 86268
##
## Number of Fisher Scoring iterations: 6
## Robust SE (cluster by firm = cusip) for pooled Poisson
pool_robust <- coeftest(glm_pool, vcov. = vcovCL(glm_pool, cluster = PanelPatentes$cusip, type = "HC1"))
print(pool_robust)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.4343e+00 1.3914e-01 17.4952 < 2.2e-16 ***
## mergerYes 2.3079e-01 2.6392e-01 0.8745 0.381865
## employ 1.1958e-02 1.2344e-03 9.6870 < 2.2e-16 ***
## return 1.5511e-02 1.2916e-02 1.2009 0.229788
## stckpr 1.0706e-02 2.4440e-03 4.3806 1.183e-05 ***
## rndeflt -5.7247e-03 1.4742e-03 -3.8834 0.000103 ***
## sales 1.1930e-05 3.5101e-05 0.3399 0.733954
## rndstck 2.5153e-04 1.2461e-04 2.0184 0.043546 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 8) LR test pooled vs Random Effects
# Interpretation:
# p-value < 0.05 -> random intercept is useful; pooled is likely too restrictive
# p-value > 0.05 -> pooled may be adequate
f_re <- as.formula(paste0(dv, " ", paste(deparse(rhs), collapse = ""), " + (1|cusip)"))
re_pois <- glmmTMB(f_re, family = poisson(link = "log"), data = PanelPatentes) # Random-effects Poisson
## Warning in (function (start, objective, gradient = NULL, hessian = NULL, :
## NA/NaN function evaluation
## Warning in (function (start, objective, gradient = NULL, hessian = NULL, :
## NA/NaN function evaluation
lr_pool_vs_re <- lrtest(glm_pool, re_pois)
## Warning in modelUpdate(objects[[i - 1]], objects[[i]]): original model was of
## class "glm", updated model is of class "glmmTMB"
print(lr_pool_vs_re)
## Likelihood ratio test
##
## Model 1: patentsg_count ~ merger + employ + return + stckpr + rndeflt +
## sales + rndstck
## Model 2: patentsg_count ~ merger + employ + return + stckpr + rndeflt +
## sales + rndstck + (1 | cusip)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 8 -43126
## 2 9 -7336 1 71579 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The likelihood ratio test strongly rejects the pooled Poisson model in favor of the random-effects Poisson model (p < 0.001)
# Unobserved firm-level heterogeneity is highly significant and that modeling firm-specific effects is necessary
Hay heterogeneidad no observada por firma gigantesca. El modelo pooled es demasiado restrictivo. Un intercepto aleatorio por firma mejora muchísimo el ajuste.
## 9) Fixed Effects Poisson (firm FE + year FE)
fe_pois <- fepois(
as.formula(paste0(dv, " ", paste(deparse(rhs), collapse = ""), " | cusip + year")),
data = PanelPatentes
)
## NOTE: 177 observations removed because of NA values (RHS: 177).
summary(fe_pois)
## Poisson estimation, Dep. Var.: patentsg_count
## Observations: 2,083
## Fixed-effects: cusip: 215, year: 10
## Standard-errors: IID
## Estimate Std. Error z value Pr(>|z|)
## mergerYes -0.00517113 0.04390760 -0.117773 0.9062476
## employ 0.00087310 0.00047568 1.835470 0.0664360 .
## return -0.00393315 0.00234700 -1.675820 0.0937735 .
## stckpr 0.00029310 0.00018019 1.626602 0.1038217
## rndeflt -0.00062926 0.00024272 -2.592489 0.0095284 **
## sales 0.00000248 0.00000204 1.216852 0.2236604
## rndstck -0.00000186 0.00001189 -0.156536 0.8756109
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -6,261.9 Adj. Pseudo R2: 0.928236
## BIC: 14,288.9 Squared Cor.: 0.969274
En coeficientes: - rndeflt sale significativo negativo: p = 0.0095 (**). - el resto sale no significativo (al 5%); employ y return quedan marginales (10% aprox.).
En FE, los coeficientes se identifican solo con variación within-firma. Si muchas variables cambian poco dentro de cada firma, o si el efecto real opera con rezagos >1, los coeficientes salen no significativos.
## 10) Use Likelyhood ratio test (nested models) to compare pooled Poisson vs FE Poisson
# Interpretation:
# p-value < 0.05 -> FE improves fit; prefer FE over pooled
# p-value > 0.05 -> pooled may be sufficient
pool_tmb <- glmmTMB(
patentsg_count ~ merger + sic + employ + return + stckpr + rndeflt + rndstck + sales,
family = poisson(link="log"),
data = PanelPatentes
)
## Warning in (function (start, objective, gradient = NULL, hessian = NULL, :
## NA/NaN function evaluation
## Warning in (function (start, objective, gradient = NULL, hessian = NULL, :
## NA/NaN function evaluation
## Warning in finalizeTMB(TMBStruc, obj, fit, h, data.tmb.old): Model convergence
## problem; function evaluation limit reached without convergence (9). See
## vignette('troubleshooting'), help('diagnose')
re_tmb <- glmmTMB(
patentsg_count ~ merger + sic + employ + return + stckpr + rndeflt + rndstck + sales + (1|cusip),
family = poisson(link="log"),
data = PanelPatentes
)
## Warning in (function (start, objective, gradient = NULL, hessian = NULL, :
## NA/NaN function evaluation
## Warning in (function (start, objective, gradient = NULL, hessian = NULL, :
## NA/NaN function evaluation
lmtest::lrtest(pool_tmb, re_tmb)
## Likelihood ratio test
##
## Model 1: patentsg_count ~ merger + sic + employ + return + stckpr + rndeflt +
## rndstck + sales
## Model 2: patentsg_count ~ merger + sic + employ + return + stckpr + rndeflt +
## rndstck + sales + (1 | cusip)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 91 -23589.3
## 2 92 -7261.8 1 32655 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# pooled Poisson is not adequate. There is massive firm-level heterogeneity → you need a panel structure (p < 2.2e-16)
## 11) Random Effects Poisson (already fitted as re_pois)
summary(re_pois)
## Family: poisson ( log )
## Formula:
## patentsg_count ~ merger + employ + return + stckpr + rndeflt +
## sales + rndstck + (1 | cusip)
## Data: PanelPatentes
##
## AIC BIC logLik -2*log(L) df.resid
## 14690.8 14741.6 -7336.4 14672.8 2074
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## cusip (Intercept) 3.375 1.837
## Number of obs: 2083, groups: cusip, 215
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.750e+00 1.283e-01 13.639 < 2e-16 ***
## mergerYes 1.538e-01 4.043e-02 3.805 0.000142 ***
## employ 1.444e-03 7.895e-04 1.829 0.067439 .
## return -5.747e-03 2.380e-03 -2.415 0.015748 *
## stckpr 3.369e-04 1.741e-04 1.935 0.053011 .
## rndeflt -7.960e-04 4.970e-04 -1.602 0.109207
## sales -1.298e-05 1.879e-05 -0.691 0.489629
## rndstck -3.025e-05 3.199e-05 -0.945 0.344439
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Bajo la suposición RE (que el efecto firma no se correlaciona con X), se ve efecto de merger y return. Pero esta interpretación no es confiable si RE es inconsistente.
## 12) Hausman analog (FE vs RE)
# Interpretation (rule-of-thumb):
# p-value < 0.05 -> prefer FE (RE likely inconsistent)
# p-value > 0.05 -> RE may be acceptable
## FE (fixest) coefficients and robust vcov
b_fe <- coef(fe_pois)
V_fe <- vcov(fe_pois, cluster = "cusip")
## RE (glmmTMB) fixed effects and vcov for the conditional model
b_re <- fixef(re_pois)$cond
V_re <- vcov(re_pois)$cond
V_re <- as.matrix(V_re)
## Keep only common slope coefficients (exclude intercept if FE has none)
common_names <- intersect(names(b_fe), names(b_re))
common_names <- setdiff(common_names, "(Intercept)")
b_diff <- b_fe[common_names] - b_re[common_names]
V_fe_c <- V_fe[common_names, common_names, drop = FALSE]
V_re_c <- V_re[common_names, common_names, drop = FALSE]
## Hausman-style statistic (may fail if V_fe - V_re not invertible)
V_diff <- V_fe_c - V_re_c
hausman_stat <- tryCatch(
as.numeric(t(b_diff) %*% solve(V_diff) %*% b_diff),
error = function(e) NA_real_
)
hausman_df <- length(common_names)
hausman_p <- if (!is.na(hausman_stat)) pchisq(hausman_stat, df = hausman_df, lower.tail = FALSE) else NA_real_
cat("\nApprox Poisson Hausman-style check\n")
##
## Approx Poisson Hausman-style check
cat("stat =", hausman_stat, " df =", hausman_df, " p-value =", hausman_p, "\n")
## stat = 43.28148 df = 7 p-value = 2.943096e-07
if (is.na(hausman_stat)) {
cat("\nNOTE: V_fe - V_re was not invertible (common in practice).\n",
"In that case, decide via coefficient stability + theory: FE is safer if firm effects correlate with regressors.\n", sep = "")
}
# rejects the null hypothesis that the random-effects estimator is consistent (p = 1.278684e-07)
Rechazas con mucha fuerza la hipótesis de consistencia de RE. Implica que los efectos no observados de firma sí están correlacionados con tus regresores.
# Cluster-robust inference for FE Poisson
fe_robust_firm <- coeftest(fe_pois, vcov = vcov(fe_pois, cluster = "cusip"))
print(fe_robust_firm)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## mergerYes -5.1711e-03 1.2995e-01 -0.0398 0.9683
## employ 8.7310e-04 1.2361e-03 0.7063 0.4801
## return -3.9331e-03 7.8813e-03 -0.4990 0.6178
## stckpr 2.9310e-04 4.7652e-04 0.6151 0.5386
## rndeflt -6.2926e-04 6.8007e-04 -0.9253 0.3549
## sales 2.4785e-06 8.0202e-06 0.3090 0.7573
## rndstck -1.8618e-06 4.0314e-05 -0.0462 0.9632
# Two-way clustering (firm + year) if desired
fe_robust_2way <- coeftest(fe_pois, vcov = vcov(fe_pois, cluster = c("cusip", "year")))
print(fe_robust_2way)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## mergerYes -5.1711e-03 1.3541e-01 -0.0382 0.9695
## employ 8.7310e-04 1.3380e-03 0.6525 0.5141
## return -3.9331e-03 7.8249e-03 -0.5026 0.6153
## stckpr 2.9310e-04 5.5431e-04 0.5288 0.5970
## rndeflt -6.2926e-04 7.1852e-04 -0.8758 0.3813
## sales 2.4785e-06 7.6846e-06 0.3225 0.7471
## rndstck -1.8618e-06 5.0956e-05 -0.0365 0.9709
# Interpretation
# Main conclusion: None of the regressors are statistically significant after proper correction for: firm fixed effects
En el modelo pooled Poisson se observan asociaciones estadísticamente significativas entre varias covariables y el número de patentes otorgadas. Sin embargo, pruebas de razón de verosimilitud indican una heterogeneidad no observada sustancial a nivel firma, y la prueba tipo Hausman rechaza fuertemente la consistencia del estimador de efectos aleatorios. Por lo tanto, se privilegia el modelo Poisson con efectos fijos por firma y año. Al emplear errores estándar robustos clusterizados (firma y firma+año), la evidencia estadística para la mayoría de los regresores desaparece, sugiriendo que la variación explicativa relevante se concentra principalmente entre firmas y que las covariables presentan señal limitada en la variación within-firma, además de potencial sobredispersión.
Entonces, employ, sales y stckpr se mueven poco dentro de firma relativo a diferencias estructurales entre firmas. No obstante, el uso del modelo Poisson no es correcto debido a la variable endógena que tiene sobredispersión enorme (y con ceros). Hay posibilidad de después emplear modelos como Negative Binomial FE y Poisson QMLE (con inclusión de rezagos)