Будет совсем маленькая-маленькая лекция с кучей пакетов:
library(xts)
library(lubridate)
library(plm)
library(forecast)
library(corrplot)
Скачаем данные по распространению гриппа в России и точки зрения Google.
flu <- read.csv('http://www.google.org/flutrends/about/data/flu/ru/data.txt', skip = 10)
head(flu)
## Date Russia
## 1 2004-10-03 150
## 2 2004-10-10 168
## 3 2004-10-17 242
## 4 2004-10-24 284
## 5 2004-10-31 311
## 6 2004-11-07 356
plot(flu$Russia, type='l')
Формат даты:
somedate <- as.Date('2015-5-5')
day(somedate)
## [1] 5
week(somedate)
## [1] 18
weekdays(somedate)
## [1] "вторник"
Ряд дат:
str(flu)
## 'data.frame': 567 obs. of 2 variables:
## $ Date : Factor w/ 567 levels "2004-10-03","2004-10-10",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Russia: int 150 168 242 284 311 356 339 354 366 383 ...
flu$Date <- as.Date(flu$Date)
str(flu)
## 'data.frame': 567 obs. of 2 variables:
## $ Date : Date, format: "2004-10-03" "2004-10-10" ...
## $ Russia: int 150 168 242 284 311 356 339 354 366 383 ...
Теперь графики должны стать лучше:
plot(flu$Date, flu$Russia, type='l')
Создадим временной ряд (сразу с помощью xts):
flu$Date[1]
## [1] "2004-10-03"
TS <- ts(flu$Russia, frequency = 52, start = c(2004,10,3))
str(TS)
## Time-Series [1:567] from 2004 to 2015: 150 168 242 284 311 356 339 354 366 383 ...
Клевые штуки:
dec <- decompose(TS)
plot(dec)
plot(TS)
plot(TS - dec$seasonal)
Экспоненциальное сглаживание Holt-Winters:
hw <- HoltWinters(TS, beta = FALSE, gamma = FALSE)
plot(hw)
И просто Holt:
hw <- HoltWinters(TS, gamma = FALSE)
plot(hw)
Автокорреляции:
acf(TS, lag.max = 20)
Box.test(TS, lag=20, type="Ljung-Box")
##
## Box-Ljung test
##
## data: TS
## X-squared = 2111.4, df = 20, p-value < 2.2e-16
acf(TS, lag.max = 120)
acf(TS - dec$seasonal, lag.max = 100)
Приращения:
plot(diff(TS))
hist(diff(TS), breaks = 30)
ARIMA:
model <- arima(TS, order = c(2,1,1))
summary(model)
##
## Call:
## arima(x = TS, order = c(2, 1, 1))
##
## Coefficients:
## ar1 ar2 ma1
## 1.4481 -0.5292 -0.9904
## s.e. 0.0356 0.0356 0.0053
##
## sigma^2 estimated as 5723: log likelihood = -3252.67, aic = 6513.34
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 4.518758 75.58158 48.60771 -1.022063 10.32957 0.8804674
## ACF1
## Training set 0.01739417
forec <- forecast.Arima(model, h = 10)
plot(forec, xlim=c(2012,2015.5))
Есть еще отличная команда, но она работает очень долго:
#auto.arima(TS, max.p = 3, max.q = 3)
Теперь данные по всему миру:
flu <- read.csv('http://www.google.org/flutrends/about/data/flu/data.txt', skip = 10)
flu <- na.omit(flu)
plot(flu$Russia, type='l')
lines(flu$Brazil, type='l', col=2)
lines(flu$Germany, type='l', col=3)
Старая знакомая:
Dates <- flu$Date
flu$Date <- NULL
corrplot(cor(flu))
corrplot(cor(flu), order = "AOE")
data("Grunfeld")
panel <- pdata.frame(Grunfeld, index = c("firm", "year"), row.names = TRUE)
head(Grunfeld)
## firm year inv value capital
## 1 1 1935 317.6 3078.5 2.8
## 2 1 1936 391.8 4661.7 52.6
## 3 1 1937 410.6 5387.1 156.9
## 4 1 1938 257.7 2792.2 209.2
## 5 1 1939 330.8 4313.2 203.4
## 6 1 1940 461.2 4643.9 207.2
Три разные модели:
m.pooled <- plm(value ~ inv + capital, data = panel, model = "pooling")
m.re <- plm(value ~ inv + capital, data = panel, model = "random")
m.fe <- plm(value ~ inv + capital, data = panel, model = "within")
И отчеты:
summary(m.pooled)
## Oneway (individual) effect Pooling Model
##
## Call:
## plm(formula = value ~ inv + capital, data = panel, model = "pooling")
##
## Balanced Panel: n=10, T=20, N=200
##
## Residuals :
## Min. 1st Qu. Median 3rd Qu. Max.
## -2010.0 -340.0 -184.0 76.7 2710.0
##
## Coefficients :
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 410.81557 64.14190 6.4048 1.082e-09 ***
## inv 5.75981 0.29086 19.8026 < 2.2e-16 ***
## capital -0.61527 0.20950 -2.9369 0.00371 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 343840000
## Residual Sum of Squares: 87514000
## R-Squared : 0.74548
## Adj. R-Squared : 0.7343
## F-statistic: 288.5 on 2 and 197 DF, p-value: < 2.22e-16
summary(m.fe)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = value ~ inv + capital, data = panel, model = "within")
##
## Balanced Panel: n=10, T=20, N=200
##
## Residuals :
## Min. 1st Qu. Median 3rd Qu. Max.
## -808.00 -88.60 -7.23 76.20 1370.00
##
## Coefficients :
## Estimate Std. Error t-value Pr(>|t|)
## inv 2.85617 0.30751 9.2879 < 2.2e-16 ***
## capital -0.50787 0.14037 -3.6182 0.0003812 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 23078000
## Residual Sum of Squares: 13577000
## R-Squared : 0.41169
## Adj. R-Squared : 0.38699
## F-statistic: 65.7798 on 2 and 188 DF, p-value: < 2.22e-16
summary(m.re)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = value ~ inv + capital, data = panel, model = "random")
##
## Balanced Panel: n=10, T=20, N=200
##
## Effects:
## var std.dev share
## idiosyncratic 72217.6 268.7 0.195
## individual 298685.7 546.5 0.805
## theta: 0.8907
##
## Residuals :
## Min. 1st Qu. Median 3rd Qu. Max.
## -614.0 -121.0 -59.6 80.6 1610.0
##
## Coefficients :
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 786.90480 182.17147 4.3196 2.477e-05 ***
## inv 3.11343 0.30761 10.1212 < 2.2e-16 ***
## capital -0.57842 0.14247 -4.0599 7.079e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 26909000
## Residual Sum of Squares: 15280000
## R-Squared : 0.43217
## Adj. R-Squared : 0.42569
## F-statistic: 74.9683 on 2 and 197 DF, p-value: < 2.22e-16
Тестирование:
pFtest(m.fe, m.pooled)
##
## F test for individual effects
##
## data: value ~ inv + capital
## F = 113.76, df1 = 9, df2 = 188, p-value < 2.2e-16
## alternative hypothesis: significant effects
phtest(m.fe, m.re)
##
## Hausman Test
##
## data: value ~ inv + capital
## chisq = 2366.7, df = 2, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
plmtest(m.re, type = "bp")
##
## Lagrange Multiplier Test - (Breusch-Pagan)
##
## data: value ~ inv + capital
## chisq = 772.32, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
Домашней работы сегодня не будет. А остальные я проверю до конца недели. На этот раз точно-точно проверю :)