rm(list=ls())
library(lmtest) # podpora regresie
library(outliers) # analyza odlahlych hodnot (outliers)
library(gptstudio)
library(kableExtra)
library(knitr)
library(dplyr)
library(broom)
library(corrplot)
library(sandwich)
library(lmtest)
# import the dataset and create a data.frame udaje
#
udaje_svet <- read.csv("udaje/udaje.csv",header=TRUE,sep=",",dec=".",check.names = TRUE)
head(udaje_svet)
## Country Region Year Infant_deaths Under_five_deaths
## 1 Turkiye Middle East 2015 11.1 13.0
## 2 Spain European Union 2015 2.7 3.3
## 3 India Asia 2007 51.5 67.9
## 4 Guyana South America 2006 32.8 40.5
## 5 Israel Middle East 2012 3.4 4.3
## 6 Costa Rica Central America and Caribbean 2006 9.8 11.2
## Adult_mortality Alcohol_consumption Hepatitis_B Measles BMI Polio Diphtheria
## 1 105.8240 1.32 97 65 27.8 97 97
## 2 57.9025 10.35 97 94 26.0 97 97
## 3 201.0765 1.57 60 35 21.2 67 64
## 4 222.1965 5.68 93 74 25.3 92 93
## 5 57.9510 2.89 97 89 27.0 94 94
## 6 95.2200 4.19 88 86 26.4 89 89
## Incidents_HIV GDP_per_capita Population_mln Thinness_ten_nineteen_years
## 1 0.08 11006 78.53 4.9
## 2 0.09 25742 46.44 0.6
## 3 0.13 1076 1183.21 27.1
## 4 0.79 4146 0.75 5.7
## 5 0.08 33995 7.91 1.2
## 6 0.16 9110 4.35 2.0
## Thinness_five_nine_years Schooling Economy_status_Developed
## 1 4.8 7.8 0
## 2 0.5 9.7 1
## 3 28.0 5.0 0
## 4 5.5 7.9 0
## 5 1.1 12.8 1
## 6 1.9 7.9 0
## Economy_status_Developing Life_expectancy
## 1 1 76.5
## 2 0 82.8
## 3 1 65.4
## 4 1 67.0
## 5 0 81.7
## 6 1 78.2
udaje_portugal <- udaje_svet[udaje_svet$Country == "Portugal",]
udaje_portugal
## Country Region Year Infant_deaths Under_five_deaths
## 310 Portugal European Union 2013 3.0 3.7
## 346 Portugal European Union 2007 3.3 4.2
## 734 Portugal European Union 2014 3.0 3.6
## 874 Portugal European Union 2015 3.0 3.6
## 927 Portugal European Union 2011 3.0 3.7
## 945 Portugal European Union 2004 3.9 5.0
## 1337 Portugal European Union 2002 4.7 6.1
## 1443 Portugal European Union 2008 3.2 4.0
## 1678 Portugal European Union 2001 5.1 6.6
## 1812 Portugal European Union 2012 3.0 3.7
## 1933 Portugal European Union 2000 5.5 7.2
## 2086 Portugal European Union 2005 3.6 4.6
## 2329 Portugal European Union 2010 3.1 3.8
## 2490 Portugal European Union 2006 3.4 4.4
## 2701 Portugal European Union 2003 4.3 5.5
## 2781 Portugal European Union 2009 3.1 3.9
## Adult_mortality Alcohol_consumption Hepatitis_B Measles BMI Polio
## 310 78.8295 9.52 98 96 25.7 98
## 346 91.6555 11.54 97 95 25.8 96
## 734 76.9805 10.20 98 96 25.7 98
## 874 73.2830 10.34 98 95 25.7 98
## 927 83.8420 10.86 97 96 25.8 97
## 945 98.0330 12.26 94 79 25.7 95
## 1337 106.0975 12.50 82 64 25.6 96
## 1443 89.2665 11.47 97 95 25.8 97
## 1678 108.3190 12.63 70 56 25.6 96
## 1812 80.7525 9.76 98 96 25.8 98
## 1933 109.6325 12.77 58 48 25.5 96
## 2086 99.1470 11.97 94 87 25.8 93
## 2329 86.1460 11.26 97 95 25.8 97
## 2490 94.1855 11.81 97 95 25.8 97
## 2701 103.6955 12.22 94 72 25.7 96
## 2781 87.8230 11.50 96 95 25.8 96
## Diphtheria Incidents_HIV GDP_per_capita Population_mln
## 310 98 0.14 18585 10.46
## 346 97 0.18 19951 10.54
## 734 98 0.13 18833 10.40
## 874 98 0.12 19250 10.36
## 927 97 0.15 19365 10.56
## 945 95 0.19 19110 10.48
## 1337 98 0.21 19068 10.42
## 1443 97 0.17 19986 10.56
## 1678 97 0.22 19026 10.36
## 1812 98 0.14 18655 10.51
## 1933 96 0.22 18795 10.29
## 2086 93 0.19 19224 10.50
## 2329 98 0.16 19670 10.57
## 2490 97 0.18 19501 10.52
## 2701 99 0.20 18819 10.46
## 2781 96 0.16 19343 10.57
## Thinness_ten_nineteen_years Thinness_five_nine_years Schooling
## 310 0.7 0.5 8.6
## 346 0.7 0.5 7.7
## 734 0.7 0.5 8.9
## 874 0.7 0.5 9.1
## 927 0.7 0.5 8.3
## 945 0.7 0.6 7.4
## 1337 0.8 0.6 7.0
## 1443 0.7 0.5 7.8
## 1678 0.8 0.6 6.9
## 1812 0.7 0.5 8.5
## 1933 0.8 0.6 6.8
## 2086 0.7 0.6 7.5
## 2329 0.7 0.5 8.1
## 2490 0.7 0.5 7.6
## 2701 0.7 0.6 7.2
## 2781 0.7 0.5 7.9
## Economy_status_Developed Economy_status_Developing Life_expectancy
## 310 1 0 80.7
## 346 1 0 78.3
## 734 1 0 81.1
## 874 1 0 81.1
## 927 1 0 80.5
## 945 1 0 77.7
## 1337 1 0 77.1
## 1443 1 0 78.5
## 1678 1 0 76.8
## 1812 1 0 80.4
## 1933 1 0 76.3
## 2086 1 0 78.1
## 2329 1 0 79.0
## 2490 1 0 78.4
## 2701 1 0 77.2
## 2781 1 0 78.7
udaje_portugal$y <- udaje_portugal$Life_expectancy
udaje_portugal$x <- udaje_portugal$Alcohol_consumption
#
fit_klasicky <- lm(y ~ x, data = udaje_portugal)
summary(fit_klasicky)
##
## Call:
## lm(formula = y ~ x, data = udaje_portugal)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.82921 -0.28194 -0.00681 0.21178 0.94241
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 95.5365 1.4875 64.23 < 2e-16 ***
## x -1.4714 0.1298 -11.33 1.94e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5134 on 14 degrees of freedom
## Multiple R-squared: 0.9017, Adjusted R-squared: 0.8947
## F-statistic: 128.4 on 1 and 14 DF, p-value: 1.945e-08
dwtest(fit_klasicky)
##
## Durbin-Watson test
##
## data: fit_klasicky
## DW = 0.86825, p-value = 0.003709
## alternative hypothesis: true autocorrelation is greater than 0
udaje_portugal$y_lag <- c(NA, head(udaje_portugal$y, -1)) # posunie to o jedno obdobie dozadu
udaje2<- na.omit(udaje_portugal) # vynecha sa prve pozorovanie (NA)
fit <- lm(y ~ x + y_lag, data = udaje2)
summary(fit)
##
## Call:
## lm(formula = y ~ x + y_lag, data = udaje2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.86525 -0.14861 0.04537 0.23437 0.50393
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 87.90521 5.61918 15.644 2.40e-09 ***
## x -1.62121 0.12421 -13.052 1.89e-08 ***
## y_lag 0.11957 0.06968 1.716 0.112
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4266 on 12 degrees of freedom
## Multiple R-squared: 0.9347, Adjusted R-squared: 0.9239
## F-statistic: 85.92 on 2 and 12 DF, p-value: 7.733e-08
Odhadované parametre sú:
coef_fit <- coef(fit)
c_hat <- coef_fit["(Intercept)"]
beta_hat <- coef_fit["x"]
lambda_hat <- coef_fit["y_lag"]
alpha_hat <- c_hat / (1 - lambda_hat)
long_run_hat <- beta_hat / (1 - lambda_hat)
estimates <- data.frame(
parameter = c("c", "beta", "lambda", "alpha = c/(1-lambda)", "dlhodobý efekt beta/(1-lambda)"),
odhad = c(c_hat, beta_hat, lambda_hat, alpha_hat, long_run_hat)
)
estimates
## parameter odhad
## 1 c 87.9052068
## 2 beta -1.6212140
## 3 lambda 0.1195715
## 4 alpha = c/(1-lambda) 99.8436610
## 5 dlhodobý efekt beta/(1-lambda) -1.8413920
Pre dynamické modely časových radov je rozumné ukázať aj HAC
štandardné chyby. Balíky sandwich a lmtest nie
sú súčasťou úplne základnej inštalácie R, preto ich načítame len vtedy,
ak sú dostupné.
nw_vcov <- sandwich::NeweyWest(fit, lag = 4, prewhite = FALSE, adjust = TRUE)
lmtest::coeftest(fit, vcov. = nw_vcov)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 87.905207 4.914019 17.889 5.109e-10 ***
## x -1.621214 0.151512 -10.700 1.716e-07 ***
## y_lag 0.119571 0.058932 2.029 0.06525 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
res <- residuals(fit)
plot(res, type = "l", main = "Rezíduá z transformovaného modelu",
xlab = "čas", ylab = "rezíduum")
abline(h = 0, lty = 2)
dwtest(fit)
##
## Durbin-Watson test
##
## data: fit
## DW = 1.6923, p-value = 0.2565
## alternative hypothesis: true autocorrelation is greater than 0
Ak rezíduá vykazujú autokoreláciu, klasické OLS štandardné chyby môžu byť zavádzajúce. To je jeden z dôvodov, prečo sme vyššie uviedli HAC štandardné chyby.
Koyckova transformácia je užitočná, keď chceme odhadnúť distribuované oneskorenia bez veľkého počtu parametrov. Jej hlavné výhody sú:
Jej hlavné obmedzenia sú: