Logaritmine transformacija

NR_Log_Data <- lm(gpm ~ log(vdu) + log(nedarbas), data = Duomenys)
summary(NR_Log_Data)
## 
## Call:
## lm(formula = gpm ~ log(vdu) + log(nedarbas), data = Duomenys)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -143565  -33338    -596   34276  133792 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1054215     318905  -3.306  0.00253 ** 
## log(vdu)        238478      35853   6.651 2.71e-07 ***
## log(nedarbas)  -116833      58198  -2.008  0.05409 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 56380 on 29 degrees of freedom
## Multiple R-squared:  0.7234, Adjusted R-squared:  0.7043 
## F-statistic: 37.92 on 2 and 29 DF,  p-value: 8.079e-09
print("Rezultatuose matomas tik vienas is dvieju kintamuju, kuris yra statistiskai reiksmingas ir turi tiegiama rysy su GPM: (log(vdu)) (p < 0.05). Determinacijos koeficientas rodo, jog modelis paaiskina (72.34)% GPM variacijos, taigi modelio tinkamumas tik vidutinis")
## [1] "Rezultatuose matomas tik vienas is dvieju kintamuju, kuris yra statistiskai reiksmingas ir turi tiegiama rysy su GPM: (log(vdu)) (p < 0.05). Determinacijos koeficientas rodo, jog modelis paaiskina (72.34)% GPM variacijos, taigi modelio tinkamumas tik vidutinis"

Laipsnine transformacija

NR_Log_Data_2 <- lm(gpm ~ vdu + I(vdu^2)+ nedarbas + I(nedarbas^2), data = Duomenys)
summary(NR_Log_Data_2)
## 
## Call:
## lm(formula = gpm ~ vdu + I(vdu^2) + nedarbas + I(nedarbas^2), 
##     data = Duomenys)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -111880  -28824    8440   30504   89087 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    1.100e+06  3.627e+05   3.034  0.00529 **
## vdu           -7.642e+02  4.157e+02  -1.838  0.07707 . 
## I(vdu^2)       4.297e-01  1.806e-01   2.379  0.02469 * 
## nedarbas      -8.533e+04  5.084e+04  -1.678  0.10479   
## I(nedarbas^2)  3.168e+03  2.874e+03   1.102  0.28017   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 51390 on 27 degrees of freedom
## Multiple R-squared:  0.786,  Adjusted R-squared:  0.7543 
## F-statistic:  24.8 on 4 and 27 DF,  p-value: 1.058e-08
print("Tik laipsninis vidutinio darbo uzmokescio kintamasis turi statistiskai reiskminga itaka priklausomam kinamajam, palyginus su ankstesniu modeliu, sio determinacijos koeficientas rodo didesni variacijos paaiskinamuma - 78.6 %")
## [1] "Tik laipsninis vidutinio darbo uzmokescio kintamasis turi statistiskai reiskminga itaka priklausomam kinamajam, palyginus su ankstesniu modeliu, sio determinacijos koeficientas rodo didesni variacijos paaiskinamuma - 78.6 %"

Polinomine transformacija

NR_Log_Data_3 <- lm(gpm ~ poly(vdu,2,raw = T)+poly(nedarbas,2,raw = T), data = Duomenys)
summary(NR_Log_Data_3)
## 
## Call:
## lm(formula = gpm ~ poly(vdu, 2, raw = T) + poly(nedarbas, 2, 
##     raw = T), data = Duomenys)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -111880  -28824    8440   30504   89087 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                  1.100e+06  3.627e+05   3.034  0.00529 **
## poly(vdu, 2, raw = T)1      -7.642e+02  4.157e+02  -1.838  0.07707 . 
## poly(vdu, 2, raw = T)2       4.297e-01  1.806e-01   2.379  0.02469 * 
## poly(nedarbas, 2, raw = T)1 -8.533e+04  5.084e+04  -1.678  0.10479   
## poly(nedarbas, 2, raw = T)2  3.168e+03  2.874e+03   1.102  0.28017   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 51390 on 27 degrees of freedom
## Multiple R-squared:  0.786,  Adjusted R-squared:  0.7543 
## F-statistic:  24.8 on 4 and 27 DF,  p-value: 1.058e-08
print("Is gautu regresijos rezultatu naudojant polinominius kintamuosius, galima isskirti tik viena, tai butu antrasis vidutinio darbo uzmokescio polinominis kintamasis, kuris yra statistiskai reiksmingas ir gali tureti rysi su gpm kintamuoju. Taip pat determinacijos koeficientas nurodo 76.8 % variacijos paaiskinauma")
## [1] "Is gautu regresijos rezultatu naudojant polinominius kintamuosius, galima isskirti tik viena, tai butu antrasis vidutinio darbo uzmokescio polinominis kintamasis, kuris yra statistiskai reiksmingas ir gali tureti rysi su gpm kintamuoju. Taip pat determinacijos koeficientas nurodo 76.8 % variacijos paaiskinauma"

Splainai su mazgais pagal kvartilius

knots_vdu <- quantile(Duomenys$vdu, p = c(0.25,0.5,0.75))
knots_nedarbas <- quantile(Duomenys$vdu, p = c(0.25,0.5,0.75))

NR_Log_Data_4 <- lm(gpm ~ bs(vdu, knots = knots_vdu) + bs(nedarbas, knots = knots_nedarbas), data = Duomenys)
summary(NR_Log_Data_4)
## 
## Call:
## lm(formula = gpm ~ bs(vdu, knots = knots_vdu) + bs(nedarbas, 
##     knots = knots_nedarbas), data = Duomenys)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -86401 -34672   1578  29650  91716 
## 
## Coefficients: (3 not defined because of singularities)
##                                       Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                             251316     166762   1.507  0.14603   
## bs(vdu, knots = knots_vdu)1              66984     189952   0.353  0.72772   
## bs(vdu, knots = knots_vdu)2             183518     145846   1.258  0.22147   
## bs(vdu, knots = knots_vdu)3             172325     203348   0.847  0.40588   
## bs(vdu, knots = knots_vdu)4             184847     159015   1.162  0.25751   
## bs(vdu, knots = knots_vdu)5             308479     158866   1.942  0.06508 . 
## bs(vdu, knots = knots_vdu)6             477448     159902   2.986  0.00681 **
## bs(nedarbas, knots = knots_nedarbas)1  -282867     145151  -1.949  0.06419 . 
## bs(nedarbas, knots = knots_nedarbas)2    33815     123228   0.274  0.78633   
## bs(nedarbas, knots = knots_nedarbas)3   -62811     178964  -0.351  0.72895   
## bs(nedarbas, knots = knots_nedarbas)4       NA         NA      NA       NA   
## bs(nedarbas, knots = knots_nedarbas)5       NA         NA      NA       NA   
## bs(nedarbas, knots = knots_nedarbas)6       NA         NA      NA       NA   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47450 on 22 degrees of freedom
## Multiple R-squared:  0.8514, Adjusted R-squared:  0.7906 
## F-statistic:    14 on 9 and 22 DF,  p-value: 3.381e-07

Grafikas smooth su kubiniu splainu

ggplot(Duomenys, aes(nedarbas, gpm, color = vdu)) +
  geom_point() +
  stat_smooth(method = lm, formula = y ~ splines::bs(x, df = 3))

Grafikas smooth su netiesine israiska

ggplot(Duomenys, aes(nedarbas, gpm, color = vdu)) +
  geom_point() +
  stat_smooth(method = lm, formula = y ~ x)

Grafikas smooth su 5-ojo laipsnio polinomu

ggplot(Duomenys, aes(nedarbas, gpm, color = vdu)) +
  geom_point() +
  stat_smooth(method = lm, formula = y ~ poly(x,5,raw = T))

Smooth grafikas

ggplot(Duomenys, aes(nedarbas, gpm, color = vdu)) +
  geom_point() +
  stat_smooth()

Grafikas smooth apibendrintu adytiviu modeliu

modelis <- gam(gpm ~ s(nedarbas)+s(vdu),data = Duomenys)
summary(modelis)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## gpm ~ s(nedarbas) + s(vdu)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   349430       8293   42.14   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##               edf Ref.df      F  p-value    
## s(nedarbas) 2.358  2.925  1.604    0.236    
## s(vdu)      3.015  3.710 20.294 1.15e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.795   Deviance explained = 83.1%
## GCV = 2.748e+09  Scale est. = 2.2007e+09  n = 32
ggplot(Duomenys, aes(nedarbas, gpm, color = vdu)) +
  geom_point() +
  stat_smooth(method = gam,formula = y ~ s(x))

Laiko eilutes

gpm_ts <- ts(Duomenys$gpm, start = c(2014, 1), frequency = 4)
vdu_ts <- ts(Duomenys$vdu, start = c(2014, 1), frequency = 4)
nedarbas_ts <- ts(Duomenys$nedarbas, start = c(2014, 1), frequency = 4)

autoplot(gpm_ts) + xlab("Metai") + ylab("gpm")

autoplot(vdu_ts) + xlab("Metai") + ylab("vdu")

autoplot(nedarbas_ts) + xlab("Metai") + ylab("nedarbas")

Duomenys_ts <- ts(Duomenys[, 2:4], start = c(2014, 1), frequency = 4)

GPM Prognoze

Prognoze <- window(gpm_ts,start = 2014)
h <- 10


fit.lin <- tslm(Prognoze ~ trend)

fcasts.lin <- forecast(fit.lin, h = h)

fit.exp <- tslm(Prognoze ~ trend, lambda = 0)
fcasts.exp <- forecast(fit.exp, h = h)

t <- time(Prognoze)
t.break1 <- 2016-02-01
t.break2 <- 2018-02-01
tb1 <- ts(pmax(0, t - t.break1), start = 2014)
tb2 <- ts(pmax(0, t - t.break2), start = 2014)

fit.pw <- tslm(Prognoze ~ t + tb1 + tb2)
t.new <- t[length(t)] + seq(h)
tb1.new <- tb1[length(tb1)] + seq(h)
tb2.new <- tb2[length(tb2)] + seq(h)

newdata <- cbind(t = t.new, tb1 = tb1.new, tb2 = tb2.new) %>%
  as.data.frame()
fcasts.pw <- forecast(fit.pw, newdata = newdata)

fit.spline <- tslm(Prognoze ~ t + I(t^2) + I(t^3) +
                     I(tb1^3) + I(tb2^3))
fcasts.spl <- forecast(fit.spline, newdata = newdata)

autoplot(Prognoze) +
  autolayer(fitted(fit.lin), series = "Linijine") +
  autolayer(fitted(fit.exp), series = "Eksponentine") +
  autolayer(fitted(fit.pw), series = "Dalinis") +
  autolayer(fitted(fit.spline), series = "Kubinis splainas") +
  autolayer(fcasts.pw, series = "Dalinis") +
  autolayer(fcasts.lin, series = "Linijinis", PI = FALSE) +
  autolayer(fcasts.exp, series = "Eksponentine", PI = FALSE) +
  autolayer(fcasts.spl, series = "Kubinis splainas", PI = FALSE) +
  xlab("Metai") + ylab("") +
  ggtitle("GPM") +
  guides(colour = guide_legend(title = ""))

Prognozavimas su kintamuoju Vidutinis darbo uzmokestis

Prognoze <- window(vdu_ts,start = 2014)
h <- 10


fit.lin <- tslm(Prognoze ~ trend)

fcasts.lin <- forecast(fit.lin, h = h)

fit.exp <- tslm(Prognoze ~ trend, lambda = 0)
fcasts.exp <- forecast(fit.exp, h = h)

t <- time(Prognoze)
t.break1 <- 2016-02-01
t.break2 <- 2018-02-01
tb1 <- ts(pmax(0, t - t.break1), start = 2014)
tb2 <- ts(pmax(0, t - t.break2), start = 2014)

fit.pw <- tslm(Prognoze ~ t + tb1 + tb2)
t.new <- t[length(t)] + seq(h)
tb1.new <- tb1[length(tb1)] + seq(h)
tb2.new <- tb2[length(tb2)] + seq(h)

newdata <- cbind(t = t.new, tb1 = tb1.new, tb2 = tb2.new) %>%
  as.data.frame()
fcasts.pw <- forecast(fit.pw, newdata = newdata)

fit.spline <- tslm(Prognoze ~ t + I(t^2) + I(t^3) +
                     I(tb1^3) + I(tb2^3))
fcasts.spl <- forecast(fit.spline, newdata = newdata)

autoplot(Prognoze) +
  autolayer(fitted(fit.lin), series = "Linijine") +
  autolayer(fitted(fit.exp), series = "Eksponentine") +
  autolayer(fitted(fit.pw), series = "Dalinis") +
  autolayer(fitted(fit.spline), series = "Kubinis splainas") +
  autolayer(fcasts.pw, series = "Dalinis") +
  autolayer(fcasts.lin, series = "Linijinis", PI = FALSE) +
  autolayer(fcasts.exp, series = "Eksponentine", PI = FALSE) +
  autolayer(fcasts.spl, series = "Kubinis splainas", PI = FALSE) +
  xlab("Metai") + ylab("") +
  ggtitle("Vidutinis darbo uzmokestis") +
  guides(colour = guide_legend(title = ""))

Prognozavimas su kintamuoju Nedarbas

Prognoze <- window(nedarbas_ts,start = 2014)
h <- 10


fit.lin <- tslm(Prognoze ~ trend)

fcasts.lin <- forecast(fit.lin, h = h)

fit.exp <- tslm(Prognoze ~ trend, lambda = 0)
fcasts.exp <- forecast(fit.exp, h = h)

t <- time(Prognoze)
t.break1 <- 2016-02-01
t.break2 <- 2018-02-01
tb1 <- ts(pmax(0, t - t.break1), start = 2014)
tb2 <- ts(pmax(0, t - t.break2), start = 2014)

fit.pw <- tslm(Prognoze ~ t + tb1 + tb2)
t.new <- t[length(t)] + seq(h)
tb1.new <- tb1[length(tb1)] + seq(h)
tb2.new <- tb2[length(tb2)] + seq(h)

newdata <- cbind(t = t.new, tb1 = tb1.new, tb2 = tb2.new) %>%
  as.data.frame()
fcasts.pw <- forecast(fit.pw, newdata = newdata)

fit.spline <- tslm(Prognoze ~ t + I(t^2) + I(t^3) +
                     I(tb1^3) + I(tb2^3))
fcasts.spl <- forecast(fit.spline, newdata = newdata)

autoplot(Prognoze) +
  autolayer(fitted(fit.lin), series = "Linijine") +
  autolayer(fitted(fit.exp), series = "Eksponentine") +
  autolayer(fitted(fit.pw), series = "Dalinis") +
  autolayer(fitted(fit.spline), series = "Kubinis splainas") +
  autolayer(fcasts.pw, series = "Dalinis") +
  autolayer(fcasts.lin, series = "Linijinis", PI = FALSE) +
  autolayer(fcasts.exp, series = "Eksponentine", PI = FALSE) +
  autolayer(fcasts.spl, series = "Kubinis splainas", PI = FALSE) +
  xlab("Metai") + ylab("") +
  ggtitle("Nedarbas") +
  guides(colour = guide_legend(title = ""))