V tomto cvičení použijeme databázu U.S. Economic Time Series (1967–2015).
Cieľom je analyzovať spotrebu domácností (PCE) ako
funkciu vybraných ekonomických ukazovateľov —
konkrétne nezamestnanosti (unemploy), miery úspor
(psavert) a populácie (pop).
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(lmtest)
library(sandwich)
library(car)
## Loading required package: carData
rm(list = ls())
udaje <- read.csv("economics.csv", header = TRUE, sep = ",", dec = ".", stringsAsFactors = FALSE)
if("date" %in% names(udaje)) {
possible_dates <- try(as.Date(udaje$date), silent = TRUE)
if(!inherits(possible_dates, "try-error") && !all(is.na(possible_dates))) {
udaje$date <- possible_dates
udaje$Year <- as.numeric(format(udaje$date, "%Y"))
} else {
udaje$Year <- as.numeric(substr(udaje$date, nchar(udaje$date)-3, nchar(udaje$date)))
}
}
tail(udaje)
summary(udaje)
## X date pce pop
## Min. : 1.0 Min. :1967-07-01 Min. : 506.7 Min. :198712
## 1st Qu.:144.2 1st Qu.:1979-06-08 1st Qu.: 1578.3 1st Qu.:224896
## Median :287.5 Median :1991-05-16 Median : 3936.8 Median :253060
## Mean :287.5 Mean :1991-05-17 Mean : 4820.1 Mean :257160
## 3rd Qu.:430.8 3rd Qu.:2003-04-23 3rd Qu.: 7626.3 3rd Qu.:290291
## Max. :574.0 Max. :2015-04-01 Max. :12193.8 Max. :320402
## psavert uempmed unemploy Year
## Min. : 2.200 Min. : 4.000 Min. : 2685 Min. :1967
## 1st Qu.: 6.400 1st Qu.: 6.000 1st Qu.: 6284 1st Qu.:1979
## Median : 8.400 Median : 7.500 Median : 7494 Median :1991
## Mean : 8.567 Mean : 8.609 Mean : 7771 Mean :1991
## 3rd Qu.:11.100 3rd Qu.: 9.100 3rd Qu.: 8686 3rd Qu.:2003
## Max. :17.300 Max. :25.200 Max. :15352 Max. :2015
Zameriame sa na tieto premenné: pce — osobné výdavky spotrebiteľov (dependent variable) unemploy — nezamestnaní psavert — miera úspor pop — populácia uempmed — medián dĺžky nezamestnanosti
udaje.2014 <- udaje[udaje$Year == 2014, c("pce","unemploy","psavert","pop","uempmed")]
summary(udaje.2014)
## pce unemploy psavert pop
## Min. :11512 Min. : 8717 Min. :7.100 Min. :317594
## 1st Qu.:11688 1st Qu.: 9219 1st Qu.:7.275 1st Qu.:318046
## Median :11839 Median : 9604 Median :7.400 Median :318563
## Mean :11824 Mean : 9602 Mean :7.350 Mean :318619
## 3rd Qu.:11974 3rd Qu.: 9945 3rd Qu.:7.400 3rd Qu.:319182
## Max. :12062 Max. :10380 Max. :7.600 Max. :319746
## uempmed
## Min. :12.90
## 1st Qu.:13.07
## Median :13.70
## Mean :14.18
## 3rd Qu.:15.47
## Max. :15.90
column_medians <- sapply(udaje.2014, median, na.rm = TRUE)
udaje_imputed <- udaje.2014
for (col in names(udaje_imputed)) {
udaje_imputed[[col]][is.na(udaje_imputed[[col]])] <- column_medians[col]
}
udaje.2014 <- udaje_imputed
par(mfrow = c(2, 2))
par(mar = c(4, 4, 2, 1))
for (col in names(udaje.2014)) {
boxplot(udaje.2014[[col]], main = paste("Premenná:", col), col = "lightblue")
}
mtext("Boxploty premenných (rok 2014)", outer = TRUE, cex = 1.2, font = 2)
par(mfrow = c(1, 1))
Môj model: \[ PCE_i = \beta_0 + \beta_1 unemploy_i + \beta_2 psavert_i + \beta_3 pop_i + \varepsilon_i \]
if (nrow(udaje.2014) > ncol(udaje.2014)) {
model <- lm(pce ~ unemploy + psavert + pop, data = udaje.2014)
summary(model)
} else {
cat("⚠️ Príliš málo pozorovaní pre odhad modelu!\n")
}
##
## Call:
## lm(formula = pce ~ unemploy + psavert + pop, data = udaje.2014)
##
## Residuals:
## Min 1Q Median 3Q Max
## -53.121 -22.129 4.932 19.109 48.048
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.107e+04 1.593e+04 -4.462 0.002106 **
## unemploy 8.848e-04 6.534e-02 0.014 0.989528
## psavert -5.511e+00 8.591e+01 -0.064 0.950424
## pop 2.603e-01 4.808e-02 5.413 0.000636 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37.35 on 8 degrees of freedom
## Multiple R-squared: 0.972, Adjusted R-squared: 0.9615
## F-statistic: 92.5 on 3 and 8 DF, p-value: 1.5e-06
par(mfrow = c(2, 2))
plot(model)
par(mfrow = c(1, 1))
residuals_model <- residuals(model)
jb_test <- jarque.bera.test(residuals_model)
jb_test
##
## Jarque Bera Test
##
## data: residuals_model
## X-squared = 0.6942, df = 2, p-value = 0.7067
outlier_test <- outlierTest(model)
outlier_test
## No Studentized residuals with Bonferroni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferroni p
## 570 -2.668789 0.032056 0.38467
bptest(model) # Breusch-Pagan test
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 0.32444, df = 3, p-value = 0.9554
coeftest(model, vcov = vcovHC(model, type = "HC3")) # robustné chyby
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.1074e+04 2.1153e+04 -3.3600 0.009931 **
## unemploy 8.8478e-04 7.7104e-02 0.0115 0.991125
## psavert -5.5113e+00 1.5869e+02 -0.0347 0.973146
## pop 2.6028e-01 6.3376e-02 4.1069 0.003405 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Ak sa ukáže, že rezíduá nie sú normálne alebo existuje heteroskedasticita, transformujeme niektoré premenné logaritmicky:
\[ \log(PCE_i) = \beta_0 + \beta_1 unemploy_i + \beta_2 psavert_i + \beta_3 \log(pop_i) + \varepsilon_i \]
model2 <- lm(log(pce) ~ +1 + unemploy + psavert + log(pop), data = udaje.2014)
summary(model2)
##
## Call:
## lm(formula = log(pce) ~ +1 + unemploy + psavert + log(pop), data = udaje.2014)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0046410 -0.0019290 0.0004409 0.0016685 0.0041598
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.960e+01 1.697e+01 -4.691 0.001560 **
## unemploy 9.162e-08 5.693e-06 0.016 0.987553
## psavert -1.379e-04 7.485e-03 -0.018 0.985753
## log(pop) 7.021e+00 1.335e+00 5.259 0.000765 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.003254 on 8 degrees of freedom
## Multiple R-squared: 0.9704, Adjusted R-squared: 0.9593
## F-statistic: 87.39 on 3 and 8 DF, p-value: 1.87e-06
par(mfrow = c(2, 2))
plot(model2)
par(mfrow = c(1, 1))
jb_test2 <- jarque.bera.test(residuals(model2))
jb_test2
##
## Jarque Bera Test
##
## data: residuals(model2)
## X-squared = 0.71052, df = 2, p-value = 0.701
bptest(model2)
##
## studentized Breusch-Pagan test
##
## data: model2
## BP = 0.33263, df = 3, p-value = 0.9538
coeftest(model2, vcov = vcovHC(model2, type = "HC3"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.9596e+01 2.2421e+01 -3.5501 0.007508 **
## unemploy 9.1621e-08 6.7364e-06 0.0136 0.989481
## psavert -1.3789e-04 1.3837e-02 -0.0100 0.992293
## log(pop) 7.0215e+00 1.7629e+00 3.9830 0.004045 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
log(pce) a
log(pop)):