import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.stats.diagnostic import het_breuschpagan
from statsmodels.stats.sandwich_covariance import cov_hc0, cov_hc1Regresion
Regresion
Alberto Medina Gutiérrez PhDc, MAF, ISC
Antes de cualquier bloque de Python, agrega este bloque en R para decirle a reticulate que use Conda:
# library(reticulate)
# Indica que quieres usar el entorno base de Conda
# use_condaenv("base", required = TRUE)
# Verifica que esté bien
# py_config()
Asegúrarte de que reticulate esté usando el entorno de Conda correcto. Esto implica poner esto al inicio de cualquier archivo Quarto que use Python:
library(reticulate)
use_condaenv("base", required = TRUE)
Siempre inicia por cargar tus librerías
Estas son algunas de las librerías más utilizadas en Python. Si el reticulate es el correcto, podemos incluir las librerías.
# Leer los datos
capm = pd.read_csv("capm.csv")
# Convertir columna Date a datetime con el formato correcto
capm['Date'] = pd.to_datetime(capm['Date'], format="%d/%m/%y")
capm.head()| Date | SANDP | FORD | GE | MICROSOFT | ORACLE | USTB3M | |
|---|---|---|---|---|---|---|---|
| 0 | 2002-01-01 | 1130.20 | 15.30 | 37.15 | 31.855 | 17.26 | 0.140000 |
| 1 | 2002-02-01 | 1106.73 | 14.88 | 38.50 | 29.170 | 16.62 | 0.146667 |
| 2 | 2002-03-01 | 1147.39 | 16.49 | 37.40 | 30.155 | 12.80 | 0.152500 |
| 3 | 2002-04-01 | 1076.92 | 16.00 | 31.55 | 26.130 | 10.04 | 0.145833 |
| 4 | 2002-05-01 | 1067.14 | 17.65 | 31.14 | 25.455 | 7.92 | 0.146667 |
# Calcular rendimientos logarítmicos (diferencias de log precios)
capm['rsandp'] = np.log(capm['SANDP']).diff() * 100 # SP500
capm['rford'] = np.log(capm['FORD']).diff() * 100 # Ford
capm['rge'] = np.log(capm['GE']).diff() * 100 # General Electric
capm['rmsoft'] = np.log(capm['MICROSOFT']).diff() * 100 # Microsoft
capm['roracle'] = np.log(capm['ORACLE']).diff() * 100 # Oracle
# Convertir la tasa anual de los TBills a mensual
capm['USTB3M'] = capm['USTB3M'] / 12
# Calcular rendimientos en exceso
capm['ersandp'] = capm['rsandp'] - capm['USTB3M']
capm['erford'] = capm['rford'] - capm['USTB3M']
capm['erge'] = capm['rge'] - capm['USTB3M']
capm['ermsoft'] = capm['rmsoft'] - capm['USTB3M']
capm['eroracle'] = capm['roracle'] - capm['USTB3M']
capm.head()| Date | SANDP | FORD | GE | MICROSOFT | ORACLE | USTB3M | rsandp | rford | rge | rmsoft | roracle | ersandp | erford | erge | ermsoft | eroracle | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2002-01-01 | 1130.20 | 15.30 | 37.15 | 31.855 | 17.26 | 0.011667 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | 2002-02-01 | 1106.73 | 14.88 | 38.50 | 29.170 | 16.62 | 0.012222 | -2.098489 | -2.783480 | 3.569447 | -8.805357 | -3.778490 | -2.110711 | -2.795702 | 3.557225 | -8.817579 | -3.790712 |
| 2 | 2002-03-01 | 1147.39 | 16.49 | 37.40 | 30.155 | 12.80 | 0.012708 | 3.608008 | 10.273611 | -2.898754 | 3.320996 | -26.116162 | 3.595299 | 10.260902 | -2.911462 | 3.308288 | -26.128870 |
| 3 | 2002-04-01 | 1076.92 | 16.00 | 31.55 | 26.130 | 10.04 | 0.012153 | -6.338468 | -3.016541 | -17.009712 | -14.326667 | -24.286806 | -6.350621 | -3.028694 | -17.021864 | -14.338820 | -24.298958 |
| 4 | 2002-05-01 | 1067.14 | 17.65 | 31.14 | 25.455 | 7.92 | 0.012222 | -0.912294 | 9.814706 | -1.308042 | -2.617189 | -23.718591 | -0.924516 | 9.802484 | -1.320264 | -2.629411 | -23.730813 |
Nos centraremos en el CAPM para Ford
plt.figure(figsize=(12, 6))
plt.plot(capm['Date'], capm['ersandp'], label='SP500', color='blue')
plt.plot(capm['Date'], capm['erford'], label='Ford', color='black')
plt.ylim(-100, 100)
plt.ylabel('')
plt.legend(loc='upper right')
plt.grid(True)
plt.title("Excess Returns: S&P 500 vs Ford")
plt.show()Y ahora Oracle
plt.figure(figsize=(12, 4))
plt.plot(capm['Date'], capm['eroracle'], label='Oracle', color='green')
plt.grid(True)
plt.title("Excess Returns: Oracle")
plt.show()OLS
# Agrega constante para el modelo
capm['const'] = 1
# Limpieza: eliminar filas con NaNs o infinitos en las variables usadas en la regresión
capm_clean = capm[['erford', 'ersandp', 'const', 'Date']].replace([np.inf, -np.inf], np.nan).dropna()
# Estima el modelo OLS
lm_capm = sm.OLS(capm_clean['erford'], capm_clean[['const', 'ersandp']]).fit()
print(lm_capm.summary()) OLS Regression Results
==============================================================================
Dep. Variable: erford R-squared: 0.335
Model: OLS Adj. R-squared: 0.332
Method: Least Squares F-statistic: 96.40
Date: Fri, 16 May 2025 Prob (F-statistic): 1.11e-18
Time: 16:24:58 Log-Likelihood: -735.39
No. Observations: 193 AIC: 1475.
Df Residuals: 191 BIC: 1481.
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const -1.0384 0.795 -1.306 0.193 -2.607 0.530
ersandp 1.8857 0.192 9.818 0.000 1.507 2.265
==============================================================================
Omnibus: 71.513 Durbin-Watson: 2.515
Prob(Omnibus): 0.000 Jarque-Bera (JB): 680.219
Skew: 1.080 Prob(JB): 1.96e-148
Kurtosis: 11.940 Cond. No. 4.17
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
import seaborn as sns
plt.figure(figsize=(8, 5))
sns.regplot(x='ersandp', y='erford', data=capm_clean, scatter_kws={'s': 10}, line_kws={'color': 'red'})
plt.xlabel("ersandp")
plt.ylabel("erford")
plt.title("Scatter plot con línea ajustada")
plt.grid(True)
plt.show()# Heteroskedasticity
# Gráfico de residuos
plt.figure(figsize=(12, 5))
plt.plot(capm.loc[capm_clean.index, 'Date'], lm_capm.resid, linestyle='-', color='black')
plt.ylabel("Residuos")
plt.title("Residuos a lo largo del tiempo")
plt.grid(True)
plt.show()# Prueba de Breusch-Pagan
bp_test = het_breuschpagan(lm_capm.resid, lm_capm.model.exog)
labels = ['LM stat', 'LM p-value', 'F stat', 'F p-value']
print("Breusch-Pagan Test:")
print(dict(zip(labels, bp_test)))Breusch-Pagan Test:
{'LM stat': 1.8268297529998414, 'LM p-value': 0.1765024586424296, 'F stat': 1.8251749571979783, 'F p-value': 0.17829673269979113}
# Coeficientes con errores estándar robustos
robust_cov = lm_capm.get_robustcov_results(cov_type='HC1').cov_params()
robust_se = np.sqrt(np.diag(robust_cov))# Mostrar tabla tipo stargazer simple
print("\n--- Tabla de Coeficientes ---")
coef = lm_capm.params
print(f"{'Variable':<10}{'(OLS)':>15}{'(Robusto)':>15}")
for i, var in enumerate(coef.index):
print(f"{var:<10}{coef.iloc[i]:>15.4f}{coef.iloc[i]:>15.4f}")
print(f"{'':<10}{lm_capm.bse.iloc[i]:>15.4f}{robust_se[i]:>15.4f}")
--- Tabla de Coeficientes ---
Variable (OLS) (Robusto)
const -1.0384 -1.0384
0.7954 0.8394
ersandp 1.8857 1.8857
0.1921 0.4242