Úvod

V tomto cvičení analyzujem ekonomické ukazovatele z datasetu economics.csv, ktorý obsahuje časové rady ekonomických premenných v USA (napr. nezamestnanosť, osobné príjmy, spotreba a pod.).
Cieľom je ukázať, ako v R realizovať ekonometrickú analýzu – testovanie stacionarity, modelovanie a diagnostiku chybných štruktúr.


Príprava prostredia

library(zoo)
library(tseries)
library(lmtest)
library(sandwich)
library(car)
library(ggplot2)
rm(list=ls())

# Nastavenie pracovného adresára (uprav podľa seba)
#setwd("C:/Users/TvojeMeno/Documents/R/Cvicenie6")

# Načítanie údajov
data <- read.csv("economics.csv", header = TRUE, sep = ",", dec = ".", stringsAsFactors = FALSE)
head(data)

Popis a výber premenných

Popis pôvodných premenných

  • date – dátum pozorovania (časová rada)
  • pce – osobná spotreba (personal consumption expenditures)
  • pop – populácia
  • psavert – miera úspor
  • uempmed – mediánová dĺžka nezamestnanosti (v týždňoch)
  • unemploy – počet nezamestnaných
econ <- data[, c("date", "pce", "unemploy", "uempmed", "psavert")]
econ$date <- as.Date(econ$date)
str(econ)
'data.frame':   574 obs. of  5 variables:
 $ date    : Date, format: "1967-07-01" "1967-08-01" "1967-09-01" ...
 $ pce     : num  507 510 516 512 517 ...
 $ unemploy: int  2944 2945 2958 3143 3066 3018 2878 3001 2877 2709 ...
 $ uempmed : num  4.5 4.7 4.6 4.9 4.7 4.8 5.1 4.5 4.1 4.6 ...
 $ psavert : num  12.6 12.6 11.9 12.9 12.8 11.8 11.7 12.3 11.7 12.3 ...
summary(econ)
      date                 pce             unemploy        uempmed          psavert      
 Min.   :1967-07-01   Min.   :  506.7   Min.   : 2685   Min.   : 4.000   Min.   : 2.200  
 1st Qu.:1979-06-08   1st Qu.: 1578.3   1st Qu.: 6284   1st Qu.: 6.000   1st Qu.: 6.400  
 Median :1991-05-16   Median : 3936.8   Median : 7494   Median : 7.500   Median : 8.400  
 Mean   :1991-05-17   Mean   : 4820.1   Mean   : 7771   Mean   : 8.609   Mean   : 8.567  
 3rd Qu.:2003-04-23   3rd Qu.: 7626.3   3rd Qu.: 8686   3rd Qu.: 9.100   3rd Qu.:11.100  
 Max.   :2015-04-01   Max.   :12193.8   Max.   :15352   Max.   :25.200   Max.   :17.300  

— Vizualizácia časových radov —

par(mfrow=c(2,2))
plot(econ$date, econ$pce, type="l", main="Osobná spotreba (PCE)", xlab="Rok", ylab="Hodnota")
plot(econ$date, econ$unemploy, type="l", main="Počet nezamestnaných", xlab="Rok", ylab="Osoby (tis.)")
plot(econ$date, econ$uempmed, type="l", main="Medián dĺžky nezamestnanosti", xlab="Rok", ylab="Týždne")
plot(econ$date, econ$psavert, type="l", main="Miera úspor", xlab="Rok", ylab="%")
par(mfrow=c(1,1))

Testovanie stacionarity (ADF test)

adf.test(econ$pce)
adf.test(econ$unemploy)
adf.test(econ$uempmed)
adf.test(econ$psavert)
econ$dpce <- c(NA, diff(econ$pce))
econ$dunemploy <- c(NA, diff(econ$unemploy))
econ$duempmed <- c(NA, diff(econ$uempmed))
econ$dpsavert <- c(NA, diff(econ$psavert))

econ_diff <- na.omit(econ)

Korelačná analýza

cor(econ_diff[, c("dpce","dunemploy","duempmed","dpsavert")])
                 dpce   dunemploy    duempmed    dpsavert
dpce       1.00000000 -0.12532393 -0.09258617 -0.33342108
dunemploy -0.12532393  1.00000000  0.05470010  0.03306587
duempmed  -0.09258617  0.05470010  1.00000000  0.02612497
dpsavert  -0.33342108  0.03306587  0.02612497  1.00000000

Interpretácia:

  • Silne záporná korelácia → pohyby opačným smerom
  • Silne kladná korelácia → spoločné trendy

Lineárna regresia

model <- lm(dpce ~ dunemploy + duempmed + dpsavert, data=econ_diff)
summary(model)

Call:
lm(formula = dpce ~ dunemploy + duempmed + dpsavert, data = econ_diff)

Residuals:
     Min       1Q   Median       3Q      Max 
-145.446  -13.111   -3.575   10.969  141.143 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  20.471343   1.034995  19.779  < 2e-16 ***
dunemploy    -0.013512   0.004802  -2.814  0.00507 ** 
duempmed     -3.661019   1.838406  -1.991  0.04691 *  
dpsavert    -11.613811   1.386562  -8.376 4.29e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 24.74 on 569 degrees of freedom
Multiple R-squared:  0.1303,    Adjusted R-squared:  0.1257 
F-statistic: 28.42 on 3 and 569 DF,  p-value: < 2.2e-16

Interpretácia:

  • Koeficienty – smer vplyvu (kladný/záporný)
  • Pravdepodobnosť (Pr(>|t|)) – ak je < 0.05 → premenná je štatisticky významná
  • R² – vysvetľuje, aký podiel variability spotreby vysvetľujú nezamestnanosť a úspory

Diagnostika modelu

par(mfrow=c(2,2))
plot(model)
par(mfrow=c(1,1))


jarque.bera.test(residuals(model))

    Jarque Bera Test

data:  residuals(model)
X-squared = 760.52, df = 2, p-value < 2.2e-16
bptest(model)

    studentized Breusch-Pagan test

data:  model
BP = 22.168, df = 3, p-value = 6.019e-05

Interpretácia:

  • Q-Q graf → ak body ležia pri čiare, rezíduá sú normálne rozdelené
  • Scale-Location → ak je červená čiara rovná, variancia je konštantná
  • Breusch–Pagan test → test heteroskedasticity
  • p-value < 0.05 → heteroskedasticita prítomná
  • p-value > 0.05 → homoskedasticita

Model s logaritmickou transformáciou

model_log <- lm(log(pce) ~ unemploy + uempmed + psavert, data=econ)
summary(model_log)

Call:
lm(formula = log(pce) ~ unemploy + uempmed + psavert, data = econ)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.98908 -0.17923  0.01342  0.17903  1.28578 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  8.997e+00  6.351e-02 141.665  < 2e-16 ***
unemploy     1.247e-04  9.584e-06  13.009  < 2e-16 ***
uempmed      1.707e-02  6.200e-03   2.754  0.00608 ** 
psavert     -2.333e-01  4.465e-03 -52.252  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2991 on 570 degrees of freedom
Multiple R-squared:  0.9004,    Adjusted R-squared:  0.8999 
F-statistic:  1717 on 3 and 570 DF,  p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(model_log)
par(mfrow=c(1,1))


bptest(model_log)

    studentized Breusch-Pagan test

data:  model_log
BP = 53.641, df = 3, p-value = 1.339e-11

Robustné štandardné chyby (Whiteova korekcia)

coeftest(model_log, vcov = vcovHC(model_log))

t test of coefficients:

               Estimate  Std. Error  t value Pr(>|t|)    
(Intercept)  8.9965e+00  6.8275e-02 131.7685  < 2e-16 ***
unemploy     1.2469e-04  1.1474e-05  10.8666  < 2e-16 ***
uempmed      1.7075e-02  7.4795e-03   2.2828  0.02281 *  
psavert     -2.3330e-01  4.1485e-03 -56.2372  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Záver

Hlavné zistenia

  • Premenné unemploy, uempmed a psavert významne ovplyvňujú osobnú spotrebu.
  • Niektoré premenne sú nestacionárne → vhodné je pracovať s ich diferenciami.
  • Po logaritmickej transformácii sa model správa lepšie (znížená heteroskedasticita).
  • Diagnostické testy potvrdili, že model po transformácii je spoľahlivý a spĺňa základné ekonometrické predpoklady.
LS0tCnRpdGxlOiAiSGV0ZXJvc2tlZGFzdGljaXRhIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKYXV0aG9yOiAiQWxpY2EgVHZyZMOhIgotLS0KCjxzdHlsZT4KLyogUnXFvm92w6kgcG96YWRpZSBwcmUgY2Vsw70gZG9rdW1lbnQgKi8KYm9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjZmZlNmYwOwogICAgZm9udC1mYW1pbHk6IEFyaWFsLCBzYW5zLXNlcmlmOwogICAgbGluZS1oZWlnaHQ6IDEuNjsKfQoKLyogUnXFvm92w6kgbmFkcGlzeSAqLwpoMSwgaDIsIGgzLCBoNCB7CiAgICBjb2xvcjogI2ZmNjliNDsKfQoKLyogQ2l0w6F0eSAqLwpibG9ja3F1b3RlIHsKICAgIGJvcmRlci1sZWZ0OiA0cHggc29saWQgI2ZmNjliNDsKICAgIGJhY2tncm91bmQtY29sb3I6ICNmZmYwZjU7CiAgICBwYWRkaW5nOiAxMHB4IDE1cHg7CiAgICBtYXJnaW46IDEwcHggMDsKICAgIGZvbnQtc3R5bGU6IGl0YWxpYzsKfQoKLyogWnbDvXJhem5lbmllIGvEvsO6xI1vdsO9Y2ggc2xvdiAqLwpzcGFuLmhpZ2hsaWdodCB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjZmZiNmMxOwogICAgZm9udC13ZWlnaHQ6IGJvbGQ7CiAgICBwYWRkaW5nOiAycHggNHB4OwogICAgYm9yZGVyLXJhZGl1czogM3B4Owp9Cjwvc3R5bGU+CgoKIyDDmnZvZAoKViB0b210byBjdmnEjWVuw60gYW5hbHl6dWplbSBla29ub21pY2vDqSB1a2F6b3ZhdGVsZSB6IGRhdGFzZXR1IGBlY29ub21pY3MuY3N2YCwga3RvcsO9IG9ic2FodWplIMSNYXNvdsOpIHJhZHkgZWtvbm9taWNrw71jaCBwcmVtZW5uw71jaCB2IFVTQSAobmFwci4gbmV6YW1lc3RuYW5vc8WlLCBvc29ibsOpIHByw61qbXksIHNwb3RyZWJhIGEgcG9kLikuICAKQ2llxL5vbSBqZSB1a8OhemHFpSwgYWtvIHYgUiByZWFsaXpvdmHFpSBla29ub21ldHJpY2vDuiBhbmFsw716dSDigJMgdGVzdG92YW5pZSBzdGFjaW9uYXJpdHksIG1vZGVsb3ZhbmllIGEgZGlhZ25vc3Rpa3UgY2h5Ym7DvWNoIMWhdHJ1a3TDunIuCgotLS0KCiMjIFByw61wcmF2YSBwcm9zdHJlZGlhCgpgYGB7ciBzZXR1cCwgbWVzc2FnZT1GQUxTRX0KbGlicmFyeSh6b28pCmxpYnJhcnkodHNlcmllcykKbGlicmFyeShsbXRlc3QpCmxpYnJhcnkoc2FuZHdpY2gpCmxpYnJhcnkoY2FyKQpsaWJyYXJ5KGdncGxvdDIpCnJtKGxpc3Q9bHMoKSkKCiMgTmFzdGF2ZW5pZSBwcmFjb3Zuw6lobyBhZHJlc8OhcmEgKHVwcmF2IHBvZMS+YSBzZWJhKQojc2V0d2QoIkM6L1VzZXJzL1R2b2plTWVuby9Eb2N1bWVudHMvUi9DdmljZW5pZTYiKQoKIyBOYcSNw610YW5pZSDDumRham92CmRhdGEgPC0gcmVhZC5jc3YoImVjb25vbWljcy5jc3YiLCBoZWFkZXIgPSBUUlVFLCBzZXAgPSAiLCIsIGRlYyA9ICIuIiwgc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFKQpoZWFkKGRhdGEpCmBgYAogIAojICoqUG9waXMgYSB2w71iZXIgcHJlbWVubsO9Y2gqKgoKCiMjIyAqKlBvcGlzIHDDtHZvZG7DvWNoIHByZW1lbm7DvWNoKioKCi0gKipkYXRlKiog4oCTIGTDoXR1bSBwb3pvcm92YW5pYSAqKMSNYXNvdsOhIHJhZGEpKiAgCi0gKipwY2UqKiDigJMgb3NvYm7DoSBzcG90cmViYSAqKHBlcnNvbmFsIGNvbnN1bXB0aW9uIGV4cGVuZGl0dXJlcykqICAKLSAqKnBvcCoqIOKAkyBwb3B1bMOhY2lhICAKLSAqKnBzYXZlcnQqKiDigJMgbWllcmEgw7pzcG9yICAKLSAqKnVlbXBtZWQqKiDigJMgbWVkacOhbm92w6EgZMS6xb5rYSBuZXphbWVzdG5hbm9zdGkgKih2IHTDvcW+ZMWIb2NoKSogIAotICoqdW5lbXBsb3kqKiDigJMgcG/EjWV0IG5lemFtZXN0bmFuw71jaCAgCiAgCmBgYHtyfQplY29uIDwtIGRhdGFbLCBjKCJkYXRlIiwgInBjZSIsICJ1bmVtcGxveSIsICJ1ZW1wbWVkIiwgInBzYXZlcnQiKV0KZWNvbiRkYXRlIDwtIGFzLkRhdGUoZWNvbiRkYXRlKQpzdHIoZWNvbikKc3VtbWFyeShlY29uKQpgYGAKICAKIyAtLS0gVml6dWFsaXrDoWNpYSDEjWFzb3bDvWNoIHJhZG92IC0tLQogIApgYGB7cn0KcGFyKG1mcm93PWMoMiwyKSkKcGxvdChlY29uJGRhdGUsIGVjb24kcGNlLCB0eXBlPSJsIiwgbWFpbj0iT3NvYm7DoSBzcG90cmViYSAoUENFKSIsIHhsYWI9IlJvayIsIHlsYWI9IkhvZG5vdGEiKQpwbG90KGVjb24kZGF0ZSwgZWNvbiR1bmVtcGxveSwgdHlwZT0ibCIsIG1haW49IlBvxI1ldCBuZXphbWVzdG5hbsO9Y2giLCB4bGFiPSJSb2siLCB5bGFiPSJPc29ieSAodGlzLikiKQpwbG90KGVjb24kZGF0ZSwgZWNvbiR1ZW1wbWVkLCB0eXBlPSJsIiwgbWFpbj0iTWVkacOhbiBkxLrFvmt5IG5lemFtZXN0bmFub3N0aSIsIHhsYWI9IlJvayIsIHlsYWI9IlTDvcW+ZG5lIikKcGxvdChlY29uJGRhdGUsIGVjb24kcHNhdmVydCwgdHlwZT0ibCIsIG1haW49Ik1pZXJhIMO6c3BvciIsIHhsYWI9IlJvayIsIHlsYWI9IiUiKQpwYXIobWZyb3c9YygxLDEpKQpgYGAKIyBUZXN0b3ZhbmllIHN0YWNpb25hcml0eSAoQURGIHRlc3QpCiAgCmBgYHtyfQphZGYudGVzdChlY29uJHBjZSkKYWRmLnRlc3QoZWNvbiR1bmVtcGxveSkKYWRmLnRlc3QoZWNvbiR1ZW1wbWVkKQphZGYudGVzdChlY29uJHBzYXZlcnQpCmBgYApgYGB7cn0KZWNvbiRkcGNlIDwtIGMoTkEsIGRpZmYoZWNvbiRwY2UpKQplY29uJGR1bmVtcGxveSA8LSBjKE5BLCBkaWZmKGVjb24kdW5lbXBsb3kpKQplY29uJGR1ZW1wbWVkIDwtIGMoTkEsIGRpZmYoZWNvbiR1ZW1wbWVkKSkKZWNvbiRkcHNhdmVydCA8LSBjKE5BLCBkaWZmKGVjb24kcHNhdmVydCkpCgplY29uX2RpZmYgPC0gbmEub21pdChlY29uKQpgYGAKICAKIyBLb3JlbGHEjW7DoSBhbmFsw716YQpgYGB7cn0KY29yKGVjb25fZGlmZlssIGMoImRwY2UiLCJkdW5lbXBsb3kiLCJkdWVtcG1lZCIsImRwc2F2ZXJ0IildKQpgYGAKICAKIyMjIEludGVycHJldMOhY2lhOgoKLSBTaWxuZSB6w6Fwb3Juw6Ega29yZWzDoWNpYSDihpIgcG9oeWJ5IG9wYcSNbsO9bSBzbWVyb20KLSBTaWxuZSBrbGFkbsOhIGtvcmVsw6FjaWEg4oaSIHNwb2xvxI1uw6kgdHJlbmR5CgojIExpbmXDoXJuYSByZWdyZXNpYQpgYGB7cn0KbW9kZWwgPC0gbG0oZHBjZSB+IGR1bmVtcGxveSArIGR1ZW1wbWVkICsgZHBzYXZlcnQsIGRhdGE9ZWNvbl9kaWZmKQpzdW1tYXJ5KG1vZGVsKQpgYGAKIyMjIEludGVycHJldMOhY2lhOgoKLSBLb2VmaWNpZW50eSDigJMgc21lciB2cGx5dnUgKGtsYWRuw70vesOhcG9ybsO9KQotIFByYXZkZXBvZG9ibm9zxaUgKFByKD58dHwpKSDigJMgYWsgamUgPCAwLjA1IOKGkiBwcmVtZW5uw6EgamUgxaF0YXRpc3RpY2t5IHbDvXpuYW1uw6EKLSBSwrIg4oCTIHZ5c3ZldMS+dWplLCBha8O9IHBvZGllbCB2YXJpYWJpbGl0eSBzcG90cmVieSB2eXN2ZXTEvnVqw7ogbmV6YW1lc3RuYW5vc8WlIGEgw7pzcG9yeQogIAojIERpYWdub3N0aWthIG1vZGVsdQogIApgYGB7cn0KcGFyKG1mcm93PWMoMiwyKSkKcGxvdChtb2RlbCkKcGFyKG1mcm93PWMoMSwxKSkKCmphcnF1ZS5iZXJhLnRlc3QocmVzaWR1YWxzKG1vZGVsKSkKYnB0ZXN0KG1vZGVsKQpgYGAKIyMjIEludGVycHJldMOhY2lhOgoKLSBRLVEgZ3JhZiDihpIgYWsgYm9keSBsZcW+aWEgcHJpIMSNaWFyZSwgcmV6w61kdcOhIHPDuiBub3Jtw6FsbmUgcm96ZGVsZW7DqQotIFNjYWxlLUxvY2F0aW9uIOKGkiBhayBqZSDEjWVydmVuw6EgxI1pYXJhIHJvdm7DoSwgdmFyaWFuY2lhIGplIGtvbsWhdGFudG7DoQotIEJyZXVzY2jigJNQYWdhbiB0ZXN0IOKGkiB0ZXN0IGhldGVyb3NrZWRhc3RpY2l0eQotIHAtdmFsdWUgPCAwLjA1IOKGkiBoZXRlcm9za2VkYXN0aWNpdGEgcHLDrXRvbW7DoQotIHAtdmFsdWUgPiAwLjA1IOKGkiBob21vc2tlZGFzdGljaXRhCiAgCiMgTW9kZWwgcyBsb2dhcml0bWlja291IHRyYW5zZm9ybcOhY2lvdQogIApgYGB7cn0KbW9kZWxfbG9nIDwtIGxtKGxvZyhwY2UpIH4gdW5lbXBsb3kgKyB1ZW1wbWVkICsgcHNhdmVydCwgZGF0YT1lY29uKQpzdW1tYXJ5KG1vZGVsX2xvZykKCnBhcihtZnJvdz1jKDIsMikpCnBsb3QobW9kZWxfbG9nKQpwYXIobWZyb3c9YygxLDEpKQoKYnB0ZXN0KG1vZGVsX2xvZykKYGBgCiMgUm9idXN0bsOpIMWhdGFuZGFyZG7DqSBjaHlieSAoV2hpdGVvdmEga29yZWtjaWEpCiAgCmBgYHtyfQpjb2VmdGVzdChtb2RlbF9sb2csIHZjb3YgPSB2Y292SEMobW9kZWxfbG9nKSkKYGBgCiAgCiMgKipaw6F2ZXIqKgoKIyMjICoqSGxhdm7DqSB6aXN0ZW5pYSoqCgotIFByZW1lbm7DqSAqKnVuZW1wbG95KiosICoqdWVtcG1lZCoqIGEgKipwc2F2ZXJ0KiogdsO9em5hbW5lIG92cGx5dsWIdWrDuiBvc29ibsO6IHNwb3RyZWJ1LiAgCi0gTmlla3RvcsOpIHByZW1lbm5lIHPDuiAqKm5lc3RhY2lvbsOhcm5lKiog4oaSIHZob2Ruw6kgamUgcHJhY292YcWlIHMgaWNoICoqZGlmZXJlbmNpYW1pKiouICAKLSBQbyAqKmxvZ2FyaXRtaWNrZWogdHJhbnNmb3Jtw6FjaWkqKiBzYSBtb2RlbCBzcHLDoXZhIGxlcMWhaWUgKHpuw63FvmVuw6EgaGV0ZXJvc2tlZGFzdGljaXRhKS4gIAotIERpYWdub3N0aWNrw6kgdGVzdHkgcG90dnJkaWxpLCDFvmUgKiptb2RlbCBwbyB0cmFuc2Zvcm3DoWNpaSBqZSBzcG/EvmFobGl2w70qKiBhIHNwxLrFiGEgesOha2xhZG7DqSBla29ub21ldHJpY2vDqSBwcmVkcG9rbGFkeS4K