# =======================================
# 📘 PANEL DATA ANALYSIS – Viviana
# Estadísticas descriptivas, correlación y regresiones de panel
# =======================================
# Cargar librerías
library(plm)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plm':
##
## between, lag, lead
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(readr)
# ========================
# 1️⃣ CARGA Y LIMPIEZA DE DATOS
# ========================
# Leer CSV
data <- read_csv("test_data.csv")
## Rows: 75 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (7): FirmID, YEAR, SP, ROA, TDR, GROW, SIZE
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Variables clave
variables <- c("ROA", "SP", "TDR", "SIZE", "GROW")
# Eliminar columnas vacías
data <- data[, !grepl("Unnamed", names(data))]
# Revisar NA y ceros
cat("\n--- NA Check ---\n")
##
## --- NA Check ---
print(sapply(data[, variables], function(x) sum(is.na(x))))
## ROA SP TDR SIZE GROW
## 0 0 0 0 0
cat("\n--- Zero/Negative Check ---\n")
##
## --- Zero/Negative Check ---
cat("SP (<=0):", sum(data$SP <= 0, na.rm = TRUE), "\n")
## SP (<=0): 0
cat("TDR (<=0):", sum(data$TDR <= 0, na.rm = TRUE), "\n")
## TDR (<=0): 15
# Transformaciones logarítmicas seguras
data <- data %>%
mutate(
lnSP = log(ifelse(SP > 0, SP, 1)), # log seguro
lnTDR = log1p(TDR) # log1p maneja ceros
)
variables_final <- c("ROA", "lnSP", "lnTDR", "SIZE", "GROW")
# Eliminar duplicados
data <- data %>%
distinct(FirmID, YEAR, .keep_all = TRUE)
# Convertir en panel
pdata <- pdata.frame(data, index = c("FirmID", "YEAR"))
# ========================
# 2️⃣ ESTADÍSTICAS DESCRIPTIVAS
# ========================
cat("\n===== 📈 Descriptive Statistics =====\n")
##
## ===== 📈 Descriptive Statistics =====
desc_stats <- describe(pdata[, variables_final])
print(desc_stats)
## vars n mean sd median trimmed mad min max range
## ROA 1 75 -0.04 0.11 -0.04 -0.04 0.09 -0.38 0.17 0.55
## lnSP 2 75 4.41 0.83 4.38 4.45 0.93 1.86 5.86 4.00
## lnTDR 3 75 0.24 0.21 0.24 0.22 0.16 0.00 0.98 0.98
## SIZE 4 75 4580.05 3526.26 3803.90 4133.76 3017.09 -111.40 16847.40 16958.80
## GROW 5 75 0.33 0.21 0.27 0.31 0.17 0.00 1.06 1.06
## skew kurtosis se
## ROA -0.30 0.29 0.01
## lnSP -0.42 -0.20 0.10
## lnTDR 1.35 2.54 0.02
## SIZE 1.23 1.53 407.18
## GROW 1.10 1.17 0.02
# ========================
# 3️⃣ MATRIZ DE CORRELACIÓN
# ========================
cat("\n===== 🔗 Correlation Matrix =====\n")
##
## ===== 🔗 Correlation Matrix =====
cor_matrix <- cor(pdata[, variables_final])
print(cor_matrix)
## ROA lnSP lnTDR SIZE GROW
## ROA 1.00000000 0.30402531 -0.09996249 0.2613324 -0.39280084
## lnSP 0.30402531 1.00000000 0.15344932 0.1847300 0.02938467
## lnTDR -0.09996249 0.15344932 1.00000000 -0.1416913 0.02466471
## SIZE 0.26133238 0.18473004 -0.14169131 1.0000000 -0.14580365
## GROW -0.39280084 0.02938467 0.02466471 -0.1458037 1.00000000
# ========================
# 4️⃣ REGRESIONES DE PANEL (EFECTO FIJO) SIN LAG
# ========================
model_roa <- plm(ROA ~ lnTDR + SIZE + GROW, data = pdata, model = "within")
summary(model_roa)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = ROA ~ lnTDR + SIZE + GROW, data = pdata, model = "within")
##
## Balanced Panel: n = 15, T = 5, N = 75
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.20649547 -0.01300580 0.00061975 0.01300579 0.14866893
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## lnTDR 3.2576e-01 1.1808e-01 2.7589 0.0077831 **
## SIZE 1.9532e-05 5.5236e-06 3.5360 0.0008146 ***
## GROW -8.7846e-02 4.5040e-02 -1.9504 0.0560507 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 0.19589
## Residual Sum of Squares: 0.14803
## R-Squared: 0.24429
## Adj. R-Squared: 0.018901
## F-statistic: 6.14188 on 3 and 57 DF, p-value: 0.001081
model_lntdr1 <- plm(lnTDR ~ ROA + lnSP + SIZE + GROW, data = pdata, model = "within")
summary(model_lntdr1)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = lnTDR ~ ROA + lnSP + SIZE + GROW, data = pdata,
## model = "within")
##
## Balanced Panel: n = 15, T = 5, N = 75
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.1192850 -0.0228558 -0.0047737 0.0244617 0.1528075
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## ROA 3.4531e-01 1.3288e-01 2.5986 0.01194 *
## lnSP 1.4512e-02 1.7427e-02 0.8327 0.40854
## SIZE -3.3327e-05 4.8900e-06 -6.8154 6.983e-09 ***
## GROW 6.7788e-02 4.8515e-02 1.3973 0.16785
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 0.31664
## Residual Sum of Squares: 0.16232
## R-Squared: 0.48735
## Adj. R-Squared: 0.32257
## F-statistic: 13.3092 on 4 and 56 DF, p-value: 1.0981e-07
model_lnsp <- plm(lnSP ~ lnTDR + SIZE + GROW, data = pdata, model = "within")
summary(model_lnsp)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = lnSP ~ lnTDR + SIZE + GROW, data = pdata, model = "within")
##
## Balanced Panel: n = 15, T = 5, N = 75
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -1.206008 -0.138078 0.043657 0.226893 1.108498
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## lnTDR 1.1099e+00 9.4727e-01 1.1717 0.24618
## SIZE 1.0107e-04 4.4313e-05 2.2808 0.02632 *
## GROW -3.3081e-01 3.6133e-01 -0.9155 0.36377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 10.704
## Residual Sum of Squares: 9.5276
## R-Squared: 0.10988
## Adj. R-Squared: -0.15559
## F-statistic: 2.34552 on 3 and 57 DF, p-value: 0.082384
model_lntdr2 <- plm(lnTDR ~ lnSP + SIZE + GROW, data = pdata, model = "within")
summary(model_lntdr2)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = lnTDR ~ lnSP + SIZE + GROW, data = pdata, model = "within")
##
## Balanced Panel: n = 15, T = 5, N = 75
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.1325112 -0.0297014 -0.0055332 0.0243262 0.1607501
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## lnSP 2.1191e-02 1.8085e-02 1.1717 0.2462
## SIZE -3.0286e-05 4.9817e-06 -6.0795 1.065e-07 ***
## GROW 4.3601e-02 4.9959e-02 0.8727 0.3865
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 0.31664
## Residual Sum of Squares: 0.1819
## R-Squared: 0.42554
## Adj. R-Squared: 0.25421
## F-statistic: 14.0744 on 3 and 57 DF, p-value: 5.6052e-07
# ========================
# 4️⃣ REGRESIONES DE PANEL (EFECTO FIJO) CON LAG -1
# ========================
model_roa_lag <- plm(
ROA ~ lag(lnTDR,1) + lag(SIZE,1) + lag(GROW,1),
data = pdata, model = "within"
)
summary(model_roa_lag)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = ROA ~ lag(lnTDR, 1) + lag(SIZE, 1) + lag(GROW,
## 1), data = pdata, model = "within")
##
## Unbalanced Panel: n = 15, T = 4-5, N = 74
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.246313 -0.012511 -0.002149 0.015097 0.176181
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## lag(lnTDR, 1) -9.3162e-02 5.4325e-02 -1.7149 0.09189 .
## lag(SIZE, 1) 1.0955e-06 3.7846e-06 0.2895 0.77329
## lag(GROW, 1) -3.0869e-02 4.6697e-02 -0.6611 0.51129
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 0.19587
## Residual Sum of Squares: 0.17936
## R-Squared: 0.084315
## Adj. R-Squared: -0.19366
## F-statistic: 1.7188 on 3 and 56 DF, p-value: 0.17356
model_lntdr1_lag <- plm(
lnTDR ~ lag(ROA,1) + lag(lnSP,1) + lag(SIZE,1) + lag(GROW,1),
data = pdata, model = "within"
)
summary(model_lntdr1_lag)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = lnTDR ~ lag(ROA, 1) + lag(lnSP, 1) + lag(SIZE,
## 1) + lag(GROW, 1), data = pdata, model = "within")
##
## Unbalanced Panel: n = 15, T = 4-5, N = 74
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.167383 -0.040176 -0.005186 0.031590 0.151803
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## lag(ROA, 1) 1.5755e-01 1.2096e-01 1.3025 0.1982
## lag(lnSP, 1) 1.6811e-04 1.5898e-02 0.0106 0.9916
## lag(SIZE, 1) 4.6426e-06 4.3188e-06 1.0750 0.2871
## lag(GROW, 1) -4.1264e-02 6.1814e-02 -0.6676 0.5072
##
## Total Sum of Squares: 0.31007
## Residual Sum of Squares: 0.27716
## R-Squared: 0.10615
## Adj. R-Squared: -0.18639
## F-statistic: 1.63284 on 4 and 55 DF, p-value: 0.17906
model_lnsp_lag <- plm(
lnSP ~ lag(lnTDR,1) + lag(SIZE,1) + lag(GROW,1),
data = pdata, model = "within"
)
summary(model_lnsp_lag)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = lnSP ~ lag(lnTDR, 1) + lag(SIZE, 1) + lag(GROW,
## 1), data = pdata, model = "within")
##
## Unbalanced Panel: n = 15, T = 4-5, N = 74
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -1.136240 -0.241733 0.062743 0.215606 1.221575
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## lag(lnTDR, 1) -2.9267e-01 4.0479e-01 -0.7230 0.4727
## lag(SIZE, 1) 9.7956e-06 2.8200e-05 0.3474 0.7296
## lag(GROW, 1) -3.6626e-01 3.4795e-01 -1.0526 0.2970
##
## Total Sum of Squares: 10.478
## Residual Sum of Squares: 9.9581
## R-Squared: 0.049599
## Adj. R-Squared: -0.23892
## F-statistic: 0.974172 on 3 and 56 DF, p-value: 0.41147
model_lntdr2_lag <- plm(
lnTDR ~ lag(ROA,1) + lag(lnSP,1) + lag(SIZE,1) + lag(GROW,1),
data = pdata, model = "within"
)
summary(model_lntdr2_lag)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = lnTDR ~ lag(ROA, 1) + lag(lnSP, 1) + lag(SIZE,
## 1) + lag(GROW, 1), data = pdata, model = "within")
##
## Unbalanced Panel: n = 15, T = 4-5, N = 74
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.167383 -0.040176 -0.005186 0.031590 0.151803
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## lag(ROA, 1) 1.5755e-01 1.2096e-01 1.3025 0.1982
## lag(lnSP, 1) 1.6811e-04 1.5898e-02 0.0106 0.9916
## lag(SIZE, 1) 4.6426e-06 4.3188e-06 1.0750 0.2871
## lag(GROW, 1) -4.1264e-02 6.1814e-02 -0.6676 0.5072
##
## Total Sum of Squares: 0.31007
## Residual Sum of Squares: 0.27716
## R-Squared: 0.10615
## Adj. R-Squared: -0.18639
## F-statistic: 1.63284 on 4 and 55 DF, p-value: 0.17906
# ========================
# 5️⃣ TABLAS SIMPLES PARA COPY/PASTE
# ========================
# Descriptivos
write.csv(desc_stats, "descriptive_stats.csv", row.names = FALSE)
# Correlación
write.csv(cor_matrix, "correlation_matrix.csv")
# Función para extraer coeficientes
coefs <- function(mod) {
data.frame(
Variable = rownames(summary(mod)$coefficients),
Estimate = summary(mod)$coefficients[,1],
StdError = summary(mod)$coefficients[,2],
tValue = summary(mod)$coefficients[,3],
row.names = NULL
)
}
# ==========================
# Exportar regresiones SIN LAG
# ==========================
write.csv(coefs(model_roa), "reg_roa.csv", row.names = FALSE)
write.csv(coefs(model_lnsp), "reg_lnsp.csv", row.names = FALSE)
write.csv(coefs(model_lntdr1), "reg_lntdr1.csv", row.names = FALSE)
write.csv(coefs(model_lntdr2), "reg_lntdr2.csv", row.names = FALSE)
# ==========================
# Exportar regresiones CON LAG (-1)
# ==========================
write.csv(coefs(model_roa_lag), "reg_roa_lag.csv", row.names = FALSE)
write.csv(coefs(model_lnsp_lag), "reg_lnsp_lag.csv", row.names = FALSE)
write.csv(coefs(model_lntdr1_lag), "reg_lntdr1_lag.csv", row.names = FALSE)
write.csv(coefs(model_lntdr2_lag), "reg_lntdr2_lag.csv", row.names = FALSE)
cat("\n✅ Tablas generadas: descriptivos, correlación y 8 regresiones de panel.\n")
##
## ✅ Tablas generadas: descriptivos, correlación y 8 regresiones de panel.