# =======================================
# 📘 PANEL DATA ANALYSIS – Viviana
# Estadísticas descriptivas, correlación y regresiones de panel
# =======================================
# Cargar librerías
library(plm)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plm':
## 
##     between, lag, lead
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
library(readr)
# ========================
# 1️⃣ CARGA Y LIMPIEZA DE DATOS
# ========================

# Leer CSV
data <- read_csv("test_data.csv")
## Rows: 75 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (7): FirmID, YEAR, SP, ROA, TDR, GROW, SIZE
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Variables clave
variables <- c("ROA", "SP", "TDR", "SIZE", "GROW")

# Eliminar columnas vacías
data <- data[, !grepl("Unnamed", names(data))]

# Revisar NA y ceros
cat("\n--- NA Check ---\n")
## 
## --- NA Check ---
print(sapply(data[, variables], function(x) sum(is.na(x))))
##  ROA   SP  TDR SIZE GROW 
##    0    0    0    0    0
cat("\n--- Zero/Negative Check ---\n")
## 
## --- Zero/Negative Check ---
cat("SP (<=0):", sum(data$SP <= 0, na.rm = TRUE), "\n")
## SP (<=0): 0
cat("TDR (<=0):", sum(data$TDR <= 0, na.rm = TRUE), "\n")
## TDR (<=0): 15
# Transformaciones logarítmicas seguras
data <- data %>%
  mutate(
    lnSP = log(ifelse(SP > 0, SP, 1)),   # log seguro
    lnTDR = log1p(TDR)                    # log1p maneja ceros
  )

variables_final <- c("ROA", "lnSP", "lnTDR", "SIZE", "GROW")

# Eliminar duplicados
data <- data %>%
  distinct(FirmID, YEAR, .keep_all = TRUE)

# Convertir en panel
pdata <- pdata.frame(data, index = c("FirmID", "YEAR"))
# ========================
# 2️⃣ ESTADÍSTICAS DESCRIPTIVAS
# ========================

cat("\n===== 📈 Descriptive Statistics =====\n")
## 
## ===== 📈 Descriptive Statistics =====
desc_stats <- describe(pdata[, variables_final])
print(desc_stats)
##       vars  n    mean      sd  median trimmed     mad     min      max    range
## ROA      1 75   -0.04    0.11   -0.04   -0.04    0.09   -0.38     0.17     0.55
## lnSP     2 75    4.41    0.83    4.38    4.45    0.93    1.86     5.86     4.00
## lnTDR    3 75    0.24    0.21    0.24    0.22    0.16    0.00     0.98     0.98
## SIZE     4 75 4580.05 3526.26 3803.90 4133.76 3017.09 -111.40 16847.40 16958.80
## GROW     5 75    0.33    0.21    0.27    0.31    0.17    0.00     1.06     1.06
##        skew kurtosis     se
## ROA   -0.30     0.29   0.01
## lnSP  -0.42    -0.20   0.10
## lnTDR  1.35     2.54   0.02
## SIZE   1.23     1.53 407.18
## GROW   1.10     1.17   0.02
# ========================
# 3️⃣ MATRIZ DE CORRELACIÓN
# ========================

cat("\n===== 🔗 Correlation Matrix =====\n")
## 
## ===== 🔗 Correlation Matrix =====
cor_matrix <- cor(pdata[, variables_final])
print(cor_matrix)
##               ROA       lnSP       lnTDR       SIZE        GROW
## ROA    1.00000000 0.30402531 -0.09996249  0.2613324 -0.39280084
## lnSP   0.30402531 1.00000000  0.15344932  0.1847300  0.02938467
## lnTDR -0.09996249 0.15344932  1.00000000 -0.1416913  0.02466471
## SIZE   0.26133238 0.18473004 -0.14169131  1.0000000 -0.14580365
## GROW  -0.39280084 0.02938467  0.02466471 -0.1458037  1.00000000
# ========================
# 4️⃣ REGRESIONES DE PANEL (EFECTO FIJO) SIN LAG
# ========================

model_roa <- plm(ROA ~ lnTDR + SIZE + GROW, data = pdata, model = "within")
summary(model_roa)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = ROA ~ lnTDR + SIZE + GROW, data = pdata, model = "within")
## 
## Balanced Panel: n = 15, T = 5, N = 75
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -0.20649547 -0.01300580  0.00061975  0.01300579  0.14866893 
## 
## Coefficients:
##          Estimate  Std. Error t-value  Pr(>|t|)    
## lnTDR  3.2576e-01  1.1808e-01  2.7589 0.0077831 ** 
## SIZE   1.9532e-05  5.5236e-06  3.5360 0.0008146 ***
## GROW  -8.7846e-02  4.5040e-02 -1.9504 0.0560507 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    0.19589
## Residual Sum of Squares: 0.14803
## R-Squared:      0.24429
## Adj. R-Squared: 0.018901
## F-statistic: 6.14188 on 3 and 57 DF, p-value: 0.001081
model_lntdr1 <- plm(lnTDR ~ ROA + lnSP + SIZE + GROW, data = pdata, model = "within")
summary(model_lntdr1)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = lnTDR ~ ROA + lnSP + SIZE + GROW, data = pdata, 
##     model = "within")
## 
## Balanced Panel: n = 15, T = 5, N = 75
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -0.1192850 -0.0228558 -0.0047737  0.0244617  0.1528075 
## 
## Coefficients:
##         Estimate  Std. Error t-value  Pr(>|t|)    
## ROA   3.4531e-01  1.3288e-01  2.5986   0.01194 *  
## lnSP  1.4512e-02  1.7427e-02  0.8327   0.40854    
## SIZE -3.3327e-05  4.8900e-06 -6.8154 6.983e-09 ***
## GROW  6.7788e-02  4.8515e-02  1.3973   0.16785    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    0.31664
## Residual Sum of Squares: 0.16232
## R-Squared:      0.48735
## Adj. R-Squared: 0.32257
## F-statistic: 13.3092 on 4 and 56 DF, p-value: 1.0981e-07
model_lnsp <- plm(lnSP ~ lnTDR + SIZE + GROW, data = pdata, model = "within")
summary(model_lnsp)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = lnSP ~ lnTDR + SIZE + GROW, data = pdata, model = "within")
## 
## Balanced Panel: n = 15, T = 5, N = 75
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -1.206008 -0.138078  0.043657  0.226893  1.108498 
## 
## Coefficients:
##          Estimate  Std. Error t-value Pr(>|t|)  
## lnTDR  1.1099e+00  9.4727e-01  1.1717  0.24618  
## SIZE   1.0107e-04  4.4313e-05  2.2808  0.02632 *
## GROW  -3.3081e-01  3.6133e-01 -0.9155  0.36377  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    10.704
## Residual Sum of Squares: 9.5276
## R-Squared:      0.10988
## Adj. R-Squared: -0.15559
## F-statistic: 2.34552 on 3 and 57 DF, p-value: 0.082384
model_lntdr2 <- plm(lnTDR ~ lnSP + SIZE + GROW, data = pdata, model = "within")
summary(model_lntdr2)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = lnTDR ~ lnSP + SIZE + GROW, data = pdata, model = "within")
## 
## Balanced Panel: n = 15, T = 5, N = 75
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -0.1325112 -0.0297014 -0.0055332  0.0243262  0.1607501 
## 
## Coefficients:
##         Estimate  Std. Error t-value  Pr(>|t|)    
## lnSP  2.1191e-02  1.8085e-02  1.1717    0.2462    
## SIZE -3.0286e-05  4.9817e-06 -6.0795 1.065e-07 ***
## GROW  4.3601e-02  4.9959e-02  0.8727    0.3865    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    0.31664
## Residual Sum of Squares: 0.1819
## R-Squared:      0.42554
## Adj. R-Squared: 0.25421
## F-statistic: 14.0744 on 3 and 57 DF, p-value: 5.6052e-07
# ========================
# 4️⃣ REGRESIONES DE PANEL (EFECTO FIJO) CON LAG -1
# ========================

model_roa_lag <- plm(
  ROA ~ lag(lnTDR,1) + lag(SIZE,1) + lag(GROW,1),
  data = pdata, model = "within"
)
summary(model_roa_lag)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = ROA ~ lag(lnTDR, 1) + lag(SIZE, 1) + lag(GROW, 
##     1), data = pdata, model = "within")
## 
## Unbalanced Panel: n = 15, T = 4-5, N = 74
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -0.246313 -0.012511 -0.002149  0.015097  0.176181 
## 
## Coefficients:
##                  Estimate  Std. Error t-value Pr(>|t|)  
## lag(lnTDR, 1) -9.3162e-02  5.4325e-02 -1.7149  0.09189 .
## lag(SIZE, 1)   1.0955e-06  3.7846e-06  0.2895  0.77329  
## lag(GROW, 1)  -3.0869e-02  4.6697e-02 -0.6611  0.51129  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    0.19587
## Residual Sum of Squares: 0.17936
## R-Squared:      0.084315
## Adj. R-Squared: -0.19366
## F-statistic: 1.7188 on 3 and 56 DF, p-value: 0.17356
model_lntdr1_lag <- plm(
  lnTDR ~ lag(ROA,1) + lag(lnSP,1) + lag(SIZE,1) + lag(GROW,1),
  data = pdata, model = "within"
)
summary(model_lntdr1_lag)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = lnTDR ~ lag(ROA, 1) + lag(lnSP, 1) + lag(SIZE, 
##     1) + lag(GROW, 1), data = pdata, model = "within")
## 
## Unbalanced Panel: n = 15, T = 4-5, N = 74
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -0.167383 -0.040176 -0.005186  0.031590  0.151803 
## 
## Coefficients:
##                 Estimate  Std. Error t-value Pr(>|t|)
## lag(ROA, 1)   1.5755e-01  1.2096e-01  1.3025   0.1982
## lag(lnSP, 1)  1.6811e-04  1.5898e-02  0.0106   0.9916
## lag(SIZE, 1)  4.6426e-06  4.3188e-06  1.0750   0.2871
## lag(GROW, 1) -4.1264e-02  6.1814e-02 -0.6676   0.5072
## 
## Total Sum of Squares:    0.31007
## Residual Sum of Squares: 0.27716
## R-Squared:      0.10615
## Adj. R-Squared: -0.18639
## F-statistic: 1.63284 on 4 and 55 DF, p-value: 0.17906
model_lnsp_lag <- plm(
  lnSP ~ lag(lnTDR,1) + lag(SIZE,1) + lag(GROW,1),
  data = pdata, model = "within"
)
summary(model_lnsp_lag)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = lnSP ~ lag(lnTDR, 1) + lag(SIZE, 1) + lag(GROW, 
##     1), data = pdata, model = "within")
## 
## Unbalanced Panel: n = 15, T = 4-5, N = 74
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -1.136240 -0.241733  0.062743  0.215606  1.221575 
## 
## Coefficients:
##                  Estimate  Std. Error t-value Pr(>|t|)
## lag(lnTDR, 1) -2.9267e-01  4.0479e-01 -0.7230   0.4727
## lag(SIZE, 1)   9.7956e-06  2.8200e-05  0.3474   0.7296
## lag(GROW, 1)  -3.6626e-01  3.4795e-01 -1.0526   0.2970
## 
## Total Sum of Squares:    10.478
## Residual Sum of Squares: 9.9581
## R-Squared:      0.049599
## Adj. R-Squared: -0.23892
## F-statistic: 0.974172 on 3 and 56 DF, p-value: 0.41147
model_lntdr2_lag <- plm(
  lnTDR ~ lag(ROA,1) + lag(lnSP,1) + lag(SIZE,1) + lag(GROW,1),
  data = pdata, model = "within"
)
summary(model_lntdr2_lag)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = lnTDR ~ lag(ROA, 1) + lag(lnSP, 1) + lag(SIZE, 
##     1) + lag(GROW, 1), data = pdata, model = "within")
## 
## Unbalanced Panel: n = 15, T = 4-5, N = 74
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -0.167383 -0.040176 -0.005186  0.031590  0.151803 
## 
## Coefficients:
##                 Estimate  Std. Error t-value Pr(>|t|)
## lag(ROA, 1)   1.5755e-01  1.2096e-01  1.3025   0.1982
## lag(lnSP, 1)  1.6811e-04  1.5898e-02  0.0106   0.9916
## lag(SIZE, 1)  4.6426e-06  4.3188e-06  1.0750   0.2871
## lag(GROW, 1) -4.1264e-02  6.1814e-02 -0.6676   0.5072
## 
## Total Sum of Squares:    0.31007
## Residual Sum of Squares: 0.27716
## R-Squared:      0.10615
## Adj. R-Squared: -0.18639
## F-statistic: 1.63284 on 4 and 55 DF, p-value: 0.17906
# ========================
# 5️⃣ TABLAS SIMPLES PARA COPY/PASTE
# ========================

# Descriptivos
write.csv(desc_stats, "descriptive_stats.csv", row.names = FALSE)

# Correlación
write.csv(cor_matrix, "correlation_matrix.csv")

# Función para extraer coeficientes
coefs <- function(mod) {
  data.frame(
    Variable = rownames(summary(mod)$coefficients),
    Estimate = summary(mod)$coefficients[,1],
    StdError = summary(mod)$coefficients[,2],
    tValue = summary(mod)$coefficients[,3],
    row.names = NULL
  )
}

# ==========================
# Exportar regresiones SIN LAG
# ==========================
write.csv(coefs(model_roa),     "reg_roa.csv", row.names = FALSE)
write.csv(coefs(model_lnsp),    "reg_lnsp.csv", row.names = FALSE)
write.csv(coefs(model_lntdr1),  "reg_lntdr1.csv", row.names = FALSE)
write.csv(coefs(model_lntdr2),  "reg_lntdr2.csv", row.names = FALSE)

# ==========================
# Exportar regresiones CON LAG (-1)
# ==========================
write.csv(coefs(model_roa_lag),     "reg_roa_lag.csv", row.names = FALSE)
write.csv(coefs(model_lnsp_lag),    "reg_lnsp_lag.csv", row.names = FALSE)
write.csv(coefs(model_lntdr1_lag),  "reg_lntdr1_lag.csv", row.names = FALSE)
write.csv(coefs(model_lntdr2_lag),  "reg_lntdr2_lag.csv", row.names = FALSE)

cat("\n✅ Tablas generadas: descriptivos, correlación y 8 regresiones de panel.\n")
## 
## ✅ Tablas generadas: descriptivos, correlación y 8 regresiones de panel.