# ============================================================
# Cvičenie 10 – Multikolinearita v regresných modeloch
# Dáta: EuStockMarkets (stock indexy)
# Model: FTSE ~ DAX + SMI + CAC
# ============================================================
# Balík na VIF
# Ak ho ešte nemáš: install.packages("car")
library(car)
## Loading required package: carData
# 1. Načítanie dát
data("EuStockMarkets")
stocks <- as.data.frame(EuStockMarkets)
# Ukážka dát
head(stocks)
## DAX SMI CAC FTSE
## 1 1628.75 1678.1 1772.8 2443.6
## 2 1613.63 1688.5 1750.5 2460.2
## 3 1606.51 1678.6 1718.0 2448.2
## 4 1621.04 1684.1 1708.1 2470.4
## 5 1618.16 1686.6 1723.1 2484.7
## 6 1610.61 1671.6 1714.3 2466.8
# 2. Základný regresný model
# FTSE_t = β0 + β1*DAX_t + β2*SMI_t + β3*CAC_t + u_t
model <- lm(FTSE ~ DAX + SMI + CAC, data = stocks)
summary(model)
##
## Call:
## lm(formula = FTSE ~ DAX + SMI + CAC, data = stocks)
##
## Residuals:
## Min 1Q Median 3Q Max
## -534.61 -76.61 12.18 84.13 386.73
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1988.54565 18.75930 106.003 <2e-16 ***
## DAX -0.02123 0.02578 -0.823 0.41
## SMI 0.70758 0.01347 52.541 <2e-16 ***
## CAC -0.34029 0.01988 -17.120 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 121.5 on 1856 degrees of freedom
## Multiple R-squared: 0.9845, Adjusted R-squared: 0.9845
## F-statistic: 3.941e+04 on 3 and 1856 DF, p-value: < 2.2e-16
# 3. Korelačná matica vysvetľujúcich premenných
xvars <- stocks[, c("DAX", "SMI", "CAC")]
round(cor(xvars), 3)
## DAX SMI CAC
## DAX 1.000 0.991 0.966
## SMI 0.991 1.000 0.947
## CAC 0.966 0.947 1.000
# Scatterplotová matica – vizuálne posúdenie vzťahov
pairs(
xvars,
main = "Scatterplotová matica – DAX, SMI, CAC"
)

# 4. VIF – Variance Inflation Factor
vif(model)
## DAX SMI CAC
## 98.46480 63.14756 16.75008
# 5. Condition Number pôvodného modelu
X <- model.matrix(model)[, -1] # bez interceptu
XtX <- t(X) %*% X
eig <- eigen(XtX)
condition_number <- sqrt(max(eig$values) / min(eig$values))
condition_number # čím vyššie, tým väčšia multikolinearita
## [1] 53.55796
# 6. Riešenia multikolinearity
# 6.1 Model bez jednotlivých premenných
# Bez DAX
model_no_DAX <- lm(FTSE ~ SMI + CAC, data = stocks)
summary(model_no_DAX)
##
## Call:
## lm(formula = FTSE ~ SMI + CAC, data = stocks)
##
## Residuals:
## Min 1Q Median 3Q Max
## -533.97 -76.18 12.70 84.55 383.04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.993e+03 1.796e+01 110.98 <2e-16 ***
## SMI 6.974e-01 5.266e-03 132.42 <2e-16 ***
## CAC -3.509e-01 1.509e-02 -23.25 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 121.5 on 1857 degrees of freedom
## Multiple R-squared: 0.9845, Adjusted R-squared: 0.9845
## F-statistic: 5.913e+04 on 2 and 1857 DF, p-value: < 2.2e-16
# Bez SMI
model_no_SMI <- lm(FTSE ~ DAX + CAC, data = stocks)
summary(model_no_SMI)
##
## Call:
## lm(formula = FTSE ~ DAX + CAC, data = stocks)
##
## Residuals:
## Min 1Q Median 3Q Max
## -645.35 -136.66 50.85 140.69 489.10
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1961.84473 29.56711 66.35 <2e-16 ***
## DAX 1.22542 0.01590 77.09 <2e-16 ***
## CAC -0.67210 0.02972 -22.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 191.6 on 1857 degrees of freedom
## Multiple R-squared: 0.9616, Adjusted R-squared: 0.9615
## F-statistic: 2.323e+04 on 2 and 1857 DF, p-value: < 2.2e-16
# Bez CAC
model_no_CAC <- lm(FTSE ~ DAX + SMI, data = stocks)
summary(model_no_CAC)
##
## Call:
## lm(formula = FTSE ~ DAX + SMI, data = stocks)
##
## Residuals:
## Min 1Q Median 3Q Max
## -589.54 -76.07 -2.32 81.31 428.57
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1709.91642 10.03651 170.37 <2e-16 ***
## DAX -0.30843 0.02106 -14.65 <2e-16 ***
## SMI 0.78083 0.01374 56.84 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 130.7 on 1857 degrees of freedom
## Multiple R-squared: 0.9821, Adjusted R-squared: 0.9821
## F-statistic: 5.096e+04 on 2 and 1857 DF, p-value: < 2.2e-16
# 7. Z-škálovanie premenných (centrovanie + škálovanie)
stocks$DAX_c <- scale(stocks$DAX, center = TRUE, scale = TRUE)
stocks$SMI_c <- scale(stocks$SMI, center = TRUE, scale = TRUE)
stocks$CAC_c <- scale(stocks$CAC, center = TRUE, scale = TRUE)
model_centered <- lm(FTSE ~ DAX_c + SMI_c + CAC_c, data = stocks)
summary(model_centered)
##
## Call:
## lm(formula = FTSE ~ DAX_c + SMI_c + CAC_c, data = stocks)
##
## Residuals:
## Min 1Q Median 3Q Max
## -534.61 -76.61 12.18 84.13 386.73
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3565.643 2.818 1265.485 <2e-16 ***
## DAX_c -23.028 27.966 -0.823 0.41
## SMI_c 1176.716 22.396 52.541 <2e-16 ***
## CAC_c -197.476 11.535 -17.120 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 121.5 on 1856 degrees of freedom
## Multiple R-squared: 0.9845, Adjusted R-squared: 0.9845
## F-statistic: 3.941e+04 on 3 and 1856 DF, p-value: < 2.2e-16
# VIF po škálovaní
vif(model_centered)
## DAX_c SMI_c CAC_c
## 98.46480 63.14756 16.75008
# Condition number po škálovaní
Xc <- model.matrix(model_centered)[, -1]
XtXc <- t(Xc) %*% Xc
eigc <- eigen(XtXc)
condition_number_scaled <- sqrt(max(eigc$values) / min(eigc$values))
condition_number_scaled
## [1] 21.71815
# 8. Jednoduchá reskalácia – DAX/1000 (aby boli rády podobné)
stocks$DAX_1000 <- stocks$DAX / 1000
model_DAX_1000 <- lm(FTSE ~ DAX_1000 + SMI + CAC, data = stocks)
summary(model_DAX_1000)
##
## Call:
## lm(formula = FTSE ~ DAX_1000 + SMI + CAC, data = stocks)
##
## Residuals:
## Min 1Q Median 3Q Max
## -534.61 -76.61 12.18 84.13 386.73
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1988.54565 18.75930 106.003 <2e-16 ***
## DAX_1000 -21.22824 25.78049 -0.823 0.41
## SMI 0.70758 0.01347 52.541 <2e-16 ***
## CAC -0.34029 0.01988 -17.120 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 121.5 on 1856 degrees of freedom
## Multiple R-squared: 0.9845, Adjusted R-squared: 0.9845
## F-statistic: 3.941e+04 on 3 and 1856 DF, p-value: < 2.2e-16
# VIF po reskalácii
vif(model_DAX_1000)
## DAX_1000 SMI CAC
## 98.46480 63.14756 16.75008
# Condition number po reskalácii
Xr <- model.matrix(model_DAX_1000)[, -1]
XtXr <- t(Xr) %*% Xr
eigr <- eigen(XtXr)
condition_number_rescaled <- sqrt(max(eigr$values) / min(eigr$values))
condition_number_rescaled
## [1] 38442.15