EJERCICIO 3.8 ANALISIS MULTIVARIANTE DANIEL PENA

import numpy as np
import pandas as pd

# Leer los datos desde el archivo invest.txt
data = pd.read_csv("invest.txt", sep="\t")
print(data.head())
       V1      V2     V3     V4      V5      V6      V7      V8
0  815319  379851  88663  58104  255864  440134  111015  162288
1  162103   90332  35158  29802   59630   92725    6409   34349
2  105856   78811  13978  16758   55634  308926   32039   40538
3  118935   76186  13818  11253   49938  120065    9984   35792
4   91099   85037  11740  20337   41233   66087   11304   22093
# 1. Calcular la matriz de varianzas y covarianzas (S)
S = data.cov()
print(S)
              V1            V2  ...            V7            V8
V1  3.054136e+10  1.431111e+10  ...  4.118953e+09  6.147014e+09
V2  1.431111e+10  6.838918e+09  ...  1.940370e+09  2.918681e+09
V3  3.342091e+09  1.577437e+09  ...  4.403024e+08  6.731871e+08
V4  2.141635e+09  1.040640e+09  ...  2.832687e+08  4.369565e+08
V5  9.575286e+09  4.539247e+09  ...  1.300812e+09  1.952632e+09
V6  1.686343e+10  8.261552e+09  ...  2.489172e+09  3.639317e+09
V7  4.118953e+09  1.940370e+09  ...  5.855858e+08  8.417088e+08
V8  6.147014e+09  2.918681e+09  ...  8.417088e+08  1.261930e+09

[8 rows x 8 columns]
# 2. Calcular la inversa de la matriz de varianzas y covarianzas (S^-1)
S_inv = np.linalg.inv(S)
print(S_inv)
[[ 2.88955904e-08 -1.91721689e-09 -6.50343900e-08  1.11274868e-08
  -6.69484243e-09  1.19748487e-08 -7.38143663e-08 -8.04207203e-08]
 [-1.91721689e-09  7.88000198e-08  1.45868081e-07 -2.97069922e-07
   1.31558645e-07  2.14652743e-08 -7.17869380e-08 -3.65454361e-07]
 [-6.50343900e-08  1.45868081e-07  5.81944081e-07 -7.46661017e-07
   3.45939808e-07  2.73148380e-08 -4.65000386e-10 -6.86237146e-07]
 [ 1.11274868e-08 -2.97069922e-07 -7.46661017e-07  1.43407073e-06
  -7.01053426e-07 -1.01344539e-07  3.41629620e-07  1.68380155e-06]
 [-6.69484243e-09  1.31558645e-07  3.45939808e-07 -7.01053426e-07
   4.31904074e-07  5.21584262e-08 -1.77312048e-07 -9.13919717e-07]
 [ 1.19748487e-08  2.14652743e-08  2.73148380e-08 -1.01344539e-07
   5.21584262e-08  1.36189627e-08 -5.94949001e-08 -1.67756775e-07]
 [-7.38143663e-08 -7.17869380e-08 -4.65000386e-10  3.41629620e-07
  -1.77312048e-07 -5.94949001e-08  3.35881757e-07  6.29455244e-07]
 [-8.04207203e-08 -3.65454361e-07 -6.86237146e-07  1.68380155e-06
  -9.13919717e-07 -1.67756775e-07  6.29455244e-07  2.49891971e-06]]
# 3. Extraer los elementos diagonales
S_diag = np.diag(S)
S_inv_diag = np.diag(S_inv)

print(S_diag)
[3.05413570e+10 6.83891776e+09 3.82030296e+08 1.74968598e+08
 3.04211675e+09 1.24047908e+10 5.85585827e+08 1.26192951e+09]
# 4. Multiplicar los elementos diagonales de S y S^-1
products_diag = S_diag * S_inv_diag
print(products_diag)
[ 882.51054228  538.90685517  222.3202696   250.91734542 1313.90261862
  168.94038382  196.68759639 3153.46052912]
# 5. Calcular las inversas de estos productos
inversos = 1 / products_diag
print(inversos)
[0.00113313 0.00185561 0.00449802 0.00398538 0.00076109 0.00591925
 0.0050842  0.00031711]
# 6. Calcular los coeficientes de correlación múltiple al cuadrado (R^2)
R_squared = 1 - inversos
print(R_squared)
[0.99886687 0.99814439 0.99550198 0.99601462 0.99923891 0.99408075
 0.9949158  0.99968289]
# Crear un DataFrame para mostrar los resultados
results = pd.DataFrame({
    "Variable": data.columns,
    "S_ii * S^-1_ii": products_diag,
    "1 / (S_ii * S^-1_ii)": inversos,
    "R^2": R_squared
})

# Imprimir los resultados
print("Resultados de los Coeficientes de Correlación Múltiple al Cuadrado:")
Resultados de los Coeficientes de Correlación Múltiple al Cuadrado:
print(results)
  Variable  S_ii * S^-1_ii  1 / (S_ii * S^-1_ii)       R^2
0       V1      882.510542              0.001133  0.998867
1       V2      538.906855              0.001856  0.998144
2       V3      222.320270              0.004498  0.995502
3       V4      250.917345              0.003985  0.996015
4       V5     1313.902619              0.000761  0.999239
5       V6      168.940384              0.005919  0.994081
6       V7      196.687596              0.005084  0.994916
7       V8     3153.460529              0.000317  0.999683
# Identificar la variable más previsible
most_predictable = results.loc[results['R^2'].idxmax()]
print("\nVariable más previsible por las demás:")

Variable más previsible por las demás:
print(most_predictable)
Variable                         V8
S_ii * S^-1_ii          3153.460529
1 / (S_ii * S^-1_ii)       0.000317
R^2                        0.999683
Name: 7, dtype: object
data <- read.delim("~/invest.txt")
print(head(data))
      V1     V2    V3    V4     V5     V6     V7     V8
1 815319 379851 88663 58104 255864 440134 111015 162288
2 162103  90332 35158 29802  59630  92725   6409  34349
3 105856  78811 13978 16758  55634 308926  32039  40538
4 118935  76186 13818 11253  49938 120065   9984  35792
5  91099  85037 11740 20337  41233  66087  11304  22093
6  72722  49459 14041 16722  23139 101900  12034  14645
# 1. Calcular la matriz de varianzas y covarianzas (S)
S <- cov(data)
print(S)
            V1          V2         V3         V4         V5          V6
V1 30541357013 14311108050 3342091072 2141634823 9575286094 16863434733
V2 14311108050  6838917760 1577436520 1040639806 4539247232  8261552240
V3  3342091072  1577436520  382030296  249789181 1052122713  1853953075
V4  2141634823  1040639806  249789181  174968598  689052435  1258638505
V5  9575286094  4539247232 1052122713  689052435 3042116752  5527674127
V6 16863434733  8261552240 1853953075 1258638505 5527674127 12404790846
V7  4118953393  1940369947  440302389  283268659 1300812059  2489171521
V8  6147014285  2918680978  673187147  436956491 1952631573  3639316558
           V7         V8
V1 4118953393 6147014285
V2 1940369947 2918680978
V3  440302389  673187147
V4  283268659  436956491
V5 1300812059 1952631573
V6 2489171521 3639316558
V7  585585827  841708761
V8  841708761 1261929511
# 2. Calcular la inversa de la matriz de varianzas y covarianzas (S^-1)
S_inv <- solve(S)
print(S_inv)
              V1            V2            V3            V4            V5
V1  2.889559e-08 -1.917217e-09 -6.503439e-08  1.112749e-08 -6.694842e-09
V2 -1.917217e-09  7.880002e-08  1.458681e-07 -2.970699e-07  1.315586e-07
V3 -6.503439e-08  1.458681e-07  5.819441e-07 -7.466610e-07  3.459398e-07
V4  1.112749e-08 -2.970699e-07 -7.466610e-07  1.434071e-06 -7.010534e-07
V5 -6.694842e-09  1.315586e-07  3.459398e-07 -7.010534e-07  4.319041e-07
V6  1.197485e-08  2.146527e-08  2.731484e-08 -1.013445e-07  5.215843e-08
V7 -7.381437e-08 -7.178694e-08 -4.650004e-10  3.416296e-07 -1.773120e-07
V8 -8.042072e-08 -3.654544e-07 -6.862371e-07  1.683802e-06 -9.139197e-07
              V6            V7            V8
V1  1.197485e-08 -7.381437e-08 -8.042072e-08
V2  2.146527e-08 -7.178694e-08 -3.654544e-07
V3  2.731484e-08 -4.650004e-10 -6.862371e-07
V4 -1.013445e-07  3.416296e-07  1.683802e-06
V5  5.215843e-08 -1.773120e-07 -9.139197e-07
V6  1.361896e-08 -5.949490e-08 -1.677568e-07
V7 -5.949490e-08  3.358818e-07  6.294552e-07
V8 -1.677568e-07  6.294552e-07  2.498920e-06
# 3. Extraer los elementos diagonales
S_diag <- diag(S)
S_inv_diag <- diag(S_inv)
# 4. Multiplicar los elementos diagonales de S y S^-1
products_diag <- S_diag * S_inv_diag
print(products_diag)
       V1        V2        V3        V4        V5        V6        V7        V8 
 882.5105  538.9069  222.3203  250.9173 1313.9026  168.9404  196.6876 3153.4605 
# 5. Calcular las inversas de estos productos
inverses <- 1 / products_diag
print(inverses)
          V1           V2           V3           V4           V5           V6 
0.0011331309 0.0018556082 0.0044980154 0.0039853761 0.0007610914 0.0059192478 
          V7           V8 
0.0050842047 0.0003171119 
# 6. Calcular los coeficientes de correlación múltiple al cuadrado (R^2)
R_squared <- 1 - inverses
print(R_squared )
       V1        V2        V3        V4        V5        V6        V7        V8 
0.9988669 0.9981444 0.9955020 0.9960146 0.9992389 0.9940808 0.9949158 0.9996829 
# Crear un DataFrame para mostrar los resultados
results <- data.frame(
  Variable = colnames(data),
  S_ii_times_S_inv_ii = products_diag,
  One_over_S_ii_times_S_inv_ii = inverses,
  R_squared = R_squared
)

# Imprimir los resultados
cat("Resultados de los Coeficientes de Correlación Múltiple al Cuadrado:\n")
Resultados de los Coeficientes de Correlación Múltiple al Cuadrado:
print(results)
   Variable S_ii_times_S_inv_ii One_over_S_ii_times_S_inv_ii R_squared
V1       V1            882.5105                 0.0011331309 0.9988669
V2       V2            538.9069                 0.0018556082 0.9981444
V3       V3            222.3203                 0.0044980154 0.9955020
V4       V4            250.9173                 0.0039853761 0.9960146
V5       V5           1313.9026                 0.0007610914 0.9992389
V6       V6            168.9404                 0.0059192478 0.9940808
V7       V7            196.6876                 0.0050842047 0.9949158
V8       V8           3153.4605                 0.0003171119 0.9996829
# Identificar la variable más previsible
most_predictable <- results[which.max(results$R_squared), ]
cat("\nVariable más previsible por las demás:\n")

Variable más previsible por las demás:
print(most_predictable)
   Variable S_ii_times_S_inv_ii One_over_S_ii_times_S_inv_ii R_squared
V8       V8            3153.461                 0.0003171119 0.9996829