Import súboru

# import the dataset and create a data.frame udaje
udaje_svet <- read.csv("udaje/Life-Expectancy-Data-Updated.csv",header=TRUE,sep=",",dec=".",check.names = TRUE)
head(udaje_svet)
 udaje_svet <- udaje_svet[-992,]
# z databázy udaje_svet si vyberieme len tie pozorovania, ktoré sa týkajú Abánska 
udaje <- subset(udaje_svet, Country == "Albania")
# vyrovnanie priebehu očakávanej dĺžky dožitia v čase
model <- lm(Life_expectancy ~ Alcohol_consumption+Adult_mortality+Incidents_HIV,data = udaje)
library(broom)
library(knitr)
library(kableExtra)

# koeficienty regresie
tidy(model) %>%
  kable(digits = 3, caption = "Odhadnuté koeficienty regresie") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE)
Odhadnuté koeficienty regresie
term estimate std.error statistic p.value
(Intercept) 105.254 3.678 28.614 0.000
Alcohol_consumption -0.018 0.202 -0.091 0.929
Adult_mortality -0.339 0.042 -8.006 0.000
Incidents_HIV -44.297 31.242 -1.418 0.184

# kvalita vyrovnania
glance(model) %>%
  kable(digits = 3, caption = "Ukazovatele kvality vyrovnania") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE)
Ukazovatele kvality vyrovnania
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
0.953 0.941 0.299 74.885 0 3 -0.847 11.694 15.234 0.983 11 15
NA
# výber premenných
X <- udaje[, c("Alcohol_consumption", "Adult_mortality", "Incidents_HIV")]

# výpočet korelačnej matice
cor_matrix <- cor(X, use = "complete.obs")

# zaokrúhlenie
round(cor_matrix, 4)
                    Alcohol_consumption Adult_mortality Incidents_HIV
Alcohol_consumption              1.0000          0.0106        0.4130
Adult_mortality                  0.0106          1.0000       -0.8066
Incidents_HIV                    0.4130         -0.8066        1.0000
library(knitr)
round(cor_matrix, 4) %>%
  kable(caption = "Korelačná matica")
Korelačná matica
Alcohol_consumption Adult_mortality Incidents_HIV
Alcohol_consumption 1.0000 0.0106 0.4130
Adult_mortality 0.0106 1.0000 -0.8066
Incidents_HIV 0.4130 -0.8066 1.0000
cor.test (udaje$Adult_mortality, udaje$Alcohol_consumption)

    Pearson's product-moment correlation

data:  udaje$Adult_mortality and udaje$Alcohol_consumption
t = 0.038131, df = 13, p-value = 0.9702
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.5044203  0.5200209
sample estimates:
      cor 
0.0105751 
cor.test (udaje$Alcohol_consumption, udaje$Incidents_HIV)

    Pearson's product-moment correlation

data:  udaje$Alcohol_consumption and udaje$Incidents_HIV
t = 1.6351, df = 13, p-value = 0.126
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.1258822  0.7636995
sample estimates:
      cor 
0.4130138 
cor.test (udaje$Adult_mortality, udaje$Incidents_HIV )

    Pearson's product-moment correlation

data:  udaje$Adult_mortality and udaje$Incidents_HIV
t = -4.9197, df = 13, p-value = 0.0002801
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.9332439 -0.5015452
sample estimates:
       cor 
-0.8065793 

Multicollinearity detection and testing

if (!requireNamespace("corrplot", quietly = TRUE)) {
  install.packages("corrplot")}

library(corrplot)

corrplot(cor_matrix, method = "number", type = "upper")

Variance Inflation Factor

Pre premennú \(x_j\) definujeme

\[ VIF_j = \frac{1}{1-R_j^2}, \]

kde \(R_j^2\) je koeficient determinácie z pomocnej regresie, v ktorej je \(x_j\) vysvetľovaná ostatnými regresormi.

Ak je \(R_j^2\) blízko jednej, potom je \(VIF_j\) veľký a premenná \(x_j\) je silno lineárne vysvetliteľná ostatnými premennými.

install.packages(“car”) # run once if not installed

library(car)
# Variance Inflation Factors
vif_values <- vif(model)

vif_values
Alcohol_consumption     Adult_mortality       Incidents_HIV 
           2.035214            4.830860            5.823734 

Interpretation:

Číslo podmienenosti

Číslo podmienenosti je založené na vlastných číslach matice \(X'X\). Ak sú vlastné čísla veľmi rozdielne, matica je zle podmienená.

\[ \kappa = \sqrt{\frac{\lambda_{\max}}{\lambda_{\min}}}. \]

X_scaled <- scale(X)
eigen_values <- eigen(cor(X_scaled))$values

condition_number <- sqrt(max(eigen_values) / min(eigen_values))
eigen_values
[1] 1.90191483 1.00857992 0.08950525
condition_number
[1] 4.609685

Interpretácia čísla podmienenosti: - \(\kappa \approx 1\): žiadna multikolinearita, - \(\kappa > 10\): mierna multikolinearita, - \(\kappa > 30\): silná multikolinearita.

Malá p-hodnota znamená, že zamietame hypotézu \(R=I_k\), teda medzi vysvetľujúcimi premennými existuje štatisticky významná korelačná štruktúra.

Eliminácia problému multikolinearity

Multikolinearita nie je porušením exogenity. Nie je teda automaticky dôvodom na zamietnutie OLS modelu. Problém je hlavne inferenčný: veľké štandardné chyby a nestabilné individuálne koeficienty.

Možné riešenia:

  1. získať viac dát,
  2. odstrániť alebo spojiť silno korelované premenné,
  3. transformovať premenné,
  4. použiť teoretické obmedzenia,
  5. použiť regularizačné metódy, napríklad ridge regresiu.

Odstránenie jednej z korelovaných premenných

Ak sú dve premenné takmer rovnaké a ekonomická teória nevyžaduje obe, môžeme jednu z nich vynechať.

model_reduced <- lm(Life_expectancy ~ Alcohol_consumption + Incidents_HIV, data = udaje)

summary(model)

Call:
lm(formula = Life_expectancy ~ Alcohol_consumption + Adult_mortality + 
    Incidents_HIV, data = udaje)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.36499 -0.18121 -0.05457  0.22523  0.48082 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         105.25422    3.67841  28.614 1.11e-11 ***
Alcohol_consumption  -0.01843    0.20206  -0.091    0.929    
Adult_mortality      -0.33881    0.04232  -8.006 6.49e-06 ***
Incidents_HIV       -44.29739   31.24154  -1.418    0.184    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.299 on 11 degrees of freedom
Multiple R-squared:  0.9533,    Adjusted R-squared:  0.9406 
F-statistic: 74.89 on 3 and 11 DF,  p-value: 1.324e-07
summary(model_reduced)

Call:
lm(formula = Life_expectancy ~ Alcohol_consumption + Incidents_HIV, 
    data = udaje)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.23159 -0.42878  0.07813  0.48214  0.94301 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)          76.3137     1.7037  44.793 9.99e-15 ***
Alcohol_consumption  -1.0512     0.3891  -2.702   0.0192 *  
Incidents_HIV       178.4343    35.5604   5.018   0.0003 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.7479 on 12 degrees of freedom
Multiple R-squared:  0.6813,    Adjusted R-squared:  0.6282 
F-statistic: 12.83 on 2 and 12 DF,  p-value: 0.001047
LS0tCnRpdGxlOiAiTXVsdGlrb2xpbmVhcml0YSB2IGxpbmXDoXJuZWogcmVncmVzaWkiCmF1dGhvcjogIk5hdMOhbGlhIEt1bm92w6EiCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgdG9jOiB0cnVlCiAgICB0b2NfZGVwdGg6IDMKLS0tCiMjICBJbXBvcnQgc8O6Ym9ydQpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFLCB3YXJuaW5nID0gRkFMU0UsIG1lc3NhZ2UgPSBGQUxTRSkKc2V0LnNlZWQoMTIzKQpgYGAKCmBgYHtyfQojIGltcG9ydCB0aGUgZGF0YXNldCBhbmQgY3JlYXRlIGEgZGF0YS5mcmFtZSB1ZGFqZQp1ZGFqZV9zdmV0IDwtIHJlYWQuY3N2KCJ1ZGFqZS9MaWZlLUV4cGVjdGFuY3ktRGF0YS1VcGRhdGVkLmNzdiIsaGVhZGVyPVRSVUUsc2VwPSIsIixkZWM9Ii4iLGNoZWNrLm5hbWVzID0gVFJVRSkKaGVhZCh1ZGFqZV9zdmV0KQogdWRhamVfc3ZldCA8LSB1ZGFqZV9zdmV0Wy05OTIsXQpgYGAKYGBge3J9CiMgeiBkYXRhYsOhenkgdWRhamVfc3ZldCBzaSB2eWJlcmllbWUgbGVuIHRpZSBwb3pvcm92YW5pYSwga3RvcsOpIHNhIHTDvWthasO6IEFiw6Fuc2thIAp1ZGFqZSA8LSBzdWJzZXQodWRhamVfc3ZldCwgQ291bnRyeSA9PSAiQWxiYW5pYSIpCmBgYAoKYGBge3J9CiMgdnlyb3ZuYW5pZSBwcmllYmVodSBvxI1ha8OhdmFuZWogZMS6xb5reSBkb8W+aXRpYSB2IMSNYXNlCm1vZGVsIDwtIGxtKExpZmVfZXhwZWN0YW5jeSB+IEFsY29ob2xfY29uc3VtcHRpb24rQWR1bHRfbW9ydGFsaXR5K0luY2lkZW50c19ISVYsZGF0YSA9IHVkYWplKQpsaWJyYXJ5KGJyb29tKQpsaWJyYXJ5KGtuaXRyKQpsaWJyYXJ5KGthYmxlRXh0cmEpCgojIGtvZWZpY2llbnR5IHJlZ3Jlc2llCnRpZHkobW9kZWwpICU+JQogIGthYmxlKGRpZ2l0cyA9IDMsIGNhcHRpb24gPSAiT2RoYWRudXTDqSBrb2VmaWNpZW50eSByZWdyZXNpZSIpICU+JQogIGthYmxlX3N0eWxpbmcoYm9vdHN0cmFwX29wdGlvbnMgPSAic3RyaXBlZCIsIGZ1bGxfd2lkdGggPSBGQUxTRSkKCiMga3ZhbGl0YSB2eXJvdm5hbmlhCmdsYW5jZShtb2RlbCkgJT4lCiAga2FibGUoZGlnaXRzID0gMywgY2FwdGlvbiA9ICJVa2F6b3ZhdGVsZSBrdmFsaXR5IHZ5cm92bmFuaWEiKSAlPiUKICBrYWJsZV9zdHlsaW5nKGJvb3RzdHJhcF9vcHRpb25zID0gInN0cmlwZWQiLCBmdWxsX3dpZHRoID0gRkFMU0UpCgpgYGAKCmBgYHtyfQojIHbDvWJlciBwcmVtZW5uw71jaApYIDwtIHVkYWplWywgYygiQWxjb2hvbF9jb25zdW1wdGlvbiIsICJBZHVsdF9tb3J0YWxpdHkiLCAiSW5jaWRlbnRzX0hJViIpXQoKIyB2w71wb8SNZXQga29yZWxhxI1uZWogbWF0aWNlCmNvcl9tYXRyaXggPC0gY29yKFgsIHVzZSA9ICJjb21wbGV0ZS5vYnMiKQoKIyB6YW9rcsO6aGxlbmllCnJvdW5kKGNvcl9tYXRyaXgsIDQpCmBgYAoKYGBge3J9CmxpYnJhcnkoa25pdHIpCnJvdW5kKGNvcl9tYXRyaXgsIDQpICU+JQogIGthYmxlKGNhcHRpb24gPSAiS29yZWxhxI1uw6EgbWF0aWNhIikKYGBgCgpgYGB7cn0KY29yLnRlc3QgKHVkYWplJEFkdWx0X21vcnRhbGl0eSwgdWRhamUkQWxjb2hvbF9jb25zdW1wdGlvbikKY29yLnRlc3QgKHVkYWplJEFsY29ob2xfY29uc3VtcHRpb24sIHVkYWplJEluY2lkZW50c19ISVYpCmNvci50ZXN0ICh1ZGFqZSRBZHVsdF9tb3J0YWxpdHksIHVkYWplJEluY2lkZW50c19ISVYgKQpgYGAKCiMgTXVsdGljb2xsaW5lYXJpdHkgZGV0ZWN0aW9uIGFuZCB0ZXN0aW5nCgpgYGB7ciBjb3JyZWxhdGlvbi1wbG90fQppZiAoIXJlcXVpcmVOYW1lc3BhY2UoImNvcnJwbG90IiwgcXVpZXRseSA9IFRSVUUpKSB7CiAgaW5zdGFsbC5wYWNrYWdlcygiY29ycnBsb3QiKX0KCmxpYnJhcnkoY29ycnBsb3QpCgpjb3JycGxvdChjb3JfbWF0cml4LCBtZXRob2QgPSAibnVtYmVyIiwgdHlwZSA9ICJ1cHBlciIpCmBgYAoKIyMgIFZhcmlhbmNlIEluZmxhdGlvbiBGYWN0b3IKClByZSBwcmVtZW5uw7ogXCh4X2pcKSBkZWZpbnVqZW1lCgpcWwpWSUZfaiA9IFxmcmFjezF9ezEtUl9qXjJ9LApcXQoKa2RlIFwoUl9qXjJcKSBqZSBrb2VmaWNpZW50IGRldGVybWluw6FjaWUgeiBwb21vY25laiByZWdyZXNpZSwgdiBrdG9yZWogamUgXCh4X2pcKSB2eXN2ZXTEvm92YW7DoSBvc3RhdG7DvW1pIHJlZ3Jlc29ybWkuCgpBayBqZSBcKFJfal4yXCkgYmzDrXprbyBqZWRuZWosIHBvdG9tIGplIFwoVklGX2pcKSB2ZcS+a8O9IGEgcHJlbWVubsOhIFwoeF9qXCkgamUgc2lsbm8gbGluZcOhcm5lIHZ5c3ZldGxpdGXEvm7DoSBvc3RhdG7DvW1pIHByZW1lbm7DvW1pLgoKIyBpbnN0YWxsLnBhY2thZ2VzKCJjYXIiKSAgIyBydW4gb25jZSBpZiBub3QgaW5zdGFsbGVkCgoKYGBge3J9CmxpYnJhcnkoY2FyKQojIFZhcmlhbmNlIEluZmxhdGlvbiBGYWN0b3JzCnZpZl92YWx1ZXMgPC0gdmlmKG1vZGVsKQoKdmlmX3ZhbHVlcwpgYGAKCioqSW50ZXJwcmV0YXRpb246KioKCi0gXChWSUZfaiA9IDFcKTogxb5pYWRuYSBtdWx0aWtvbGluZWFyaXRhLAotIFwoMSA8IFZJRl9qIDwgNVwpOiBtaWVybmEgbXVsdGlrb2xpbmVhcml0YQotIFwoVklGX2ogXGdlcSA1XCk6IHNpbG7DoSBtdWx0aWtvbGluZWFyaXRhIChuaWVrdG9yw60gYXV0b3JpIHBvdcW+w612YWrDuiBwcsOtc25lasWhw60gcHJhaCBcKFZJRl9qIFxnZXEgMTBcKSkuCgojIyAgxIzDrXNsbyBwb2RtaWVuZW5vc3RpCgrEjMOtc2xvIHBvZG1pZW5lbm9zdGkgamUgemFsb8W+ZW7DqSBuYSB2bGFzdG7DvWNoIMSNw61zbGFjaCBtYXRpY2UgXChYJ1hcKS4gQWsgc8O6IHZsYXN0bsOpIMSNw61zbGEgdmXEvm1pIHJvemRpZWxuZSwgbWF0aWNhIGplIHpsZSBwb2RtaWVuZW7DoS4KClxbClxrYXBwYSA9IFxzcXJ0e1xmcmFje1xsYW1iZGFfe1xtYXh9fXtcbGFtYmRhX3tcbWlufX19LgpcXQoKYGBge3IgY29uZGl0aW9uLW51bWJlcn0KWF9zY2FsZWQgPC0gc2NhbGUoWCkKZWlnZW5fdmFsdWVzIDwtIGVpZ2VuKGNvcihYX3NjYWxlZCkpJHZhbHVlcwoKY29uZGl0aW9uX251bWJlciA8LSBzcXJ0KG1heChlaWdlbl92YWx1ZXMpIC8gbWluKGVpZ2VuX3ZhbHVlcykpCmVpZ2VuX3ZhbHVlcwpjb25kaXRpb25fbnVtYmVyCmBgYAogIAoqKkludGVycHJldMOhY2lhIMSNw61zbGEgcG9kbWllbmVub3N0aToqKiAKLSBcKFxrYXBwYSBcYXBwcm94IDFcKTogxb5pYWRuYSBtdWx0aWtvbGluZWFyaXRhLAotIFwoXGthcHBhID4gMTBcKTogbWllcm5hIG11bHRpa29saW5lYXJpdGEsCi0gXChca2FwcGEgPiAzMFwpOiBzaWxuw6EgbXVsdGlrb2xpbmVhcml0YS4KCiAgCk1hbMOhIHAtaG9kbm90YSB6bmFtZW7DoSwgxb5lIHphbWlldGFtZSBoeXBvdMOpenUgXChSPUlfa1wpLCB0ZWRhIG1lZHppIHZ5c3ZldMS+dWrDumNpbWkgcHJlbWVubsO9bWkgZXhpc3R1amUgxaF0YXRpc3RpY2t5IHbDvXpuYW1uw6Ega29yZWxhxI1uw6EgxaF0cnVrdMO6cmEuCgoKIyAgRWxpbWluw6FjaWEgcHJvYmzDqW11IG11bHRpa29saW5lYXJpdHkKCk11bHRpa29saW5lYXJpdGEgbmllIGplIHBvcnXFoWVuw61tIGV4b2dlbml0eS4gTmllIGplIHRlZGEgYXV0b21hdGlja3kgZMO0dm9kb20gbmEgemFtaWV0bnV0aWUgT0xTIG1vZGVsdS4gUHJvYmzDqW0gamUgaGxhdm5lIGluZmVyZW7EjW7DvTogdmXEvmvDqSDFoXRhbmRhcmRuw6kgY2h5YnkgYSBuZXN0YWJpbG7DqSBpbmRpdmlkdcOhbG5lIGtvZWZpY2llbnR5LgoKTW/Fvm7DqSByaWXFoWVuaWE6CgoxLiB6w61za2HFpSB2aWFjIGTDoXQsCjIuIG9kc3Ryw6FuacWlIGFsZWJvIHNwb2ppxaUgc2lsbm8ga29yZWxvdmFuw6kgcHJlbWVubsOpLAozLiB0cmFuc2Zvcm1vdmHFpSBwcmVtZW5uw6ksCjQuIHBvdcW+acWlIHRlb3JldGlja8OpIG9ibWVkemVuaWEsCjUuIHBvdcW+acWlIHJlZ3VsYXJpemHEjW7DqSBtZXTDs2R5LCBuYXByw61rbGFkIHJpZGdlIHJlZ3Jlc2l1LgoKIyMgIE9kc3Ryw6FuZW5pZSBqZWRuZWogeiBrb3JlbG92YW7DvWNoIHByZW1lbm7DvWNoCgpBayBzw7ogZHZlIHByZW1lbm7DqSB0YWttZXIgcm92bmFrw6kgYSBla29ub21pY2vDoSB0ZcOzcmlhIG5ldnnFvmFkdWplIG9iZSwgbcO0xb5lbWUgamVkbnUgeiBuaWNoIHZ5bmVjaGHFpS4KCmBgYHtyIHJlZHVjZWQtbW9kZWx9Cm1vZGVsX3JlZHVjZWQgPC0gbG0oTGlmZV9leHBlY3RhbmN5IH4gQWxjb2hvbF9jb25zdW1wdGlvbiArIEluY2lkZW50c19ISVYsIGRhdGEgPSB1ZGFqZSkKCnN1bW1hcnkobW9kZWwpCnN1bW1hcnkobW9kZWxfcmVkdWNlZCkKYGBgCg==