V tomto cvičení analyzujem ekonomické ukazovatele z datasetu
economics.csv, ktorý obsahuje časové rady ekonomických
premenných v USA (napr. nezamestnanosť, osobné príjmy, spotreba a
pod.).
Cieľom je ukázať, ako v R realizovať ekonometrickú analýzu – testovanie
stacionarity, modelovanie a diagnostiku chybných štruktúr.
library(zoo)
library(tseries)
library(lmtest)
library(sandwich)
library(car)
library(ggplot2)
rm(list=ls())
# Nastavenie pracovného adresára (uprav podľa seba)
#setwd("C:/Users/TvojeMeno/Documents/R/Cvicenie6")
# Načítanie údajov
data <- read.csv("economics.csv", header = TRUE, sep = ",", dec = ".", stringsAsFactors = FALSE)
head(data)
econ <- data[, c("date", "pce", "unemploy", "uempmed", "psavert")]
econ$date <- as.Date(econ$date)
str(econ)
'data.frame': 574 obs. of 5 variables:
$ date : Date, format: "1967-07-01" "1967-08-01" "1967-09-01" ...
$ pce : num 507 510 516 512 517 ...
$ unemploy: int 2944 2945 2958 3143 3066 3018 2878 3001 2877 2709 ...
$ uempmed : num 4.5 4.7 4.6 4.9 4.7 4.8 5.1 4.5 4.1 4.6 ...
$ psavert : num 12.6 12.6 11.9 12.9 12.8 11.8 11.7 12.3 11.7 12.3 ...
summary(econ)
date pce unemploy uempmed psavert
Min. :1967-07-01 Min. : 506.7 Min. : 2685 Min. : 4.000 Min. : 2.200
1st Qu.:1979-06-08 1st Qu.: 1578.3 1st Qu.: 6284 1st Qu.: 6.000 1st Qu.: 6.400
Median :1991-05-16 Median : 3936.8 Median : 7494 Median : 7.500 Median : 8.400
Mean :1991-05-17 Mean : 4820.1 Mean : 7771 Mean : 8.609 Mean : 8.567
3rd Qu.:2003-04-23 3rd Qu.: 7626.3 3rd Qu.: 8686 3rd Qu.: 9.100 3rd Qu.:11.100
Max. :2015-04-01 Max. :12193.8 Max. :15352 Max. :25.200 Max. :17.300
par(mfrow=c(2,2))
plot(econ$date, econ$pce, type="l", main="Osobná spotreba (PCE)", xlab="Rok", ylab="Hodnota")
plot(econ$date, econ$unemploy, type="l", main="Počet nezamestnaných", xlab="Rok", ylab="Osoby (tis.)")
plot(econ$date, econ$uempmed, type="l", main="Medián dĺžky nezamestnanosti", xlab="Rok", ylab="Týždne")
plot(econ$date, econ$psavert, type="l", main="Miera úspor", xlab="Rok", ylab="%")
par(mfrow=c(1,1))
# — Testovanie stacionarity (ADF test) —
adf.test(econ$pce)
adf.test(econ$unemploy)
adf.test(econ$uempmed)
adf.test(econ$psavert)
econ$dpce <- c(NA, diff(econ$pce))
econ$dunemploy <- c(NA, diff(econ$unemploy))
econ$duempmed <- c(NA, diff(econ$uempmed))
econ$dpsavert <- c(NA, diff(econ$psavert))
econ_diff <- na.omit(econ)
# --- Korelačná analýza ---
cor(econ_diff[, c("dpce","dunemploy","duempmed","dpsavert")])
dpce dunemploy duempmed dpsavert
dpce 1.00000000 -0.12532393 -0.09258617 -0.33342108
dunemploy -0.12532393 1.00000000 0.05470010 0.03306587
duempmed -0.09258617 0.05470010 1.00000000 0.02612497
dpsavert -0.33342108 0.03306587 0.02612497 1.00000000
### Interpretácia:
Silne záporná korelácia → pohyby opačným smerom Silne kladná korelácia → spoločné trendy
# --- Lineárna regresia ---
model <- lm(dpce ~ dunemploy + duempmed + dpsavert, data=econ_diff)
summary(model)
Call:
lm(formula = dpce ~ dunemploy + duempmed + dpsavert, data = econ_diff)
Residuals:
Min 1Q Median 3Q Max
-145.446 -13.111 -3.575 10.969 141.143
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 20.471343 1.034995 19.779 < 2e-16 ***
dunemploy -0.013512 0.004802 -2.814 0.00507 **
duempmed -3.661019 1.838406 -1.991 0.04691 *
dpsavert -11.613811 1.386562 -8.376 4.29e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 24.74 on 569 degrees of freedom
Multiple R-squared: 0.1303, Adjusted R-squared: 0.1257
F-statistic: 28.42 on 3 and 569 DF, p-value: < 2.2e-16
### Interpretácia:
Koeficienty – smer vplyvu (kladný/záporný) Pravdepodobnosť (Pr(>|t|)) – ak je < 0.05 → premenná je štatisticky významná R² – vysvetľuje, aký podiel variability spotreby vysvetľujú nezamestnanosť a úspory
# --- Diagnostika modelu ---
par(mfrow=c(2,2))
plot(model)
par(mfrow=c(1,1))
jarque.bera.test(residuals(model))
Jarque Bera Test
data: residuals(model)
X-squared = 760.52, df = 2, p-value < 2.2e-16
bptest(model)
studentized Breusch-Pagan test
data: model
BP = 22.168, df = 3, p-value = 6.019e-05
### Interpretácia:
Q-Q graf → ak body ležia pri čiare, rezíduá sú normálne rozdelené Scale-Location → ak je červená čiara rovná, variancia je konštantná Breusch–Pagan test → test heteroskedasticity p-value < 0.05 → heteroskedasticita prítomná p-value > 0.05 → homoskedasticita
# --- Model s logaritmickou transformáciou ---
model_log <- lm(log(pce) ~ unemploy + uempmed + psavert, data=econ)
summary(model_log)
Call:
lm(formula = log(pce) ~ unemploy + uempmed + psavert, data = econ)
Residuals:
Min 1Q Median 3Q Max
-0.98908 -0.17923 0.01342 0.17903 1.28578
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.997e+00 6.351e-02 141.665 < 2e-16 ***
unemploy 1.247e-04 9.584e-06 13.009 < 2e-16 ***
uempmed 1.707e-02 6.200e-03 2.754 0.00608 **
psavert -2.333e-01 4.465e-03 -52.252 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.2991 on 570 degrees of freedom
Multiple R-squared: 0.9004, Adjusted R-squared: 0.8999
F-statistic: 1717 on 3 and 570 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(model_log)
par(mfrow=c(1,1))
bptest(model_log)
studentized Breusch-Pagan test
data: model_log
BP = 53.641, df = 3, p-value = 1.339e-11
# — Robustné štandardné chyby (Whiteova korekcia) —
coeftest(model_log, vcov = vcovHC(model_log))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.9965e+00 6.8275e-02 131.7685 < 2e-16 ***
unemploy 1.2469e-04 1.1474e-05 10.8666 < 2e-16 ***
uempmed 1.7075e-02 7.4795e-03 2.2828 0.02281 *
psavert -2.3330e-01 4.1485e-03 -56.2372 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1