plot(discoveries,main="Time Series of Number of Major Scientific Discoveries In A Year")
stripchart(discoveries, method = "stack", offset=.5, at=.15,pch=19,
main="Number of Discoveries Dotplot",
xlab="Number of Major Scientific Discoveries in a Year",
ylab="Frequency")
par(mfcol = c(2,1 ))
acf(discoveries, main="ACF of Number of Major Scientific Discoveries in a Year")
acf(discoveries, type="partial", main="PACF of Number of Major Scientific Discoveries in a Year")
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
auto.arima(discoveries, d=0, approximation=FALSE)
## Series: discoveries
## ARIMA(1,0,1) with non-zero mean
##
## Coefficients:
## ar1 ma1 mean
## 0.8353 -0.6243 3.0208
## s.e. 0.1379 0.1948 0.4728
##
## sigma^2 estimated as 4.538: log likelihood=-216.1
## AIC=440.2 AICc=440.62 BIC=450.62
# parameters
phi=c(.7, .2)
beta=0.5
sigma=3
m=10000
set.seed(5)
Simulated.Arima=arima.sim(n=m,list(order = c(2,1,1), ar = phi, ma=beta))
plot(Simulated.Arima, ylab=' ',main='Simulated time series from ARIMA(2,1,1) process', col='blue', lwd=2)
acf(Simulated.Arima)
Diff.Simulated.Arima=diff(Simulated.Arima)
plot(Diff.Simulated.Arima)
acf(Diff.Simulated.Arima)
pacf(Diff.Simulated.Arima)
library(astsa)
##
## Attaching package: 'astsa'
## The following object is masked from 'package:forecast':
##
## gas
sarima(Simulated.Arima,2,1,1,0,0,0) # astsa not installed
## initial value 1.092704
## iter 2 value 0.655083
## iter 3 value 0.576329
## iter 4 value 0.250793
## iter 5 value 0.124855
## iter 6 value 0.033738
## iter 7 value 0.013225
## iter 8 value 0.012554
## iter 9 value 0.012517
## iter 10 value 0.012292
## iter 11 value 0.012267
## iter 12 value 0.012258
## iter 13 value 0.012170
## iter 14 value 0.012069
## iter 15 value 0.011860
## iter 16 value 0.011703
## iter 17 value 0.011609
## iter 18 value 0.011601
## iter 19 value 0.011601
## iter 20 value 0.011601
## iter 20 value 0.011601
## iter 20 value 0.011601
## final value 0.011601
## converged
## initial value 0.011653
## iter 2 value 0.011653
## iter 3 value 0.011653
## iter 3 value 0.011653
## iter 3 value 0.011653
## final value 0.011653
## converged
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## xreg = constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc,
## REPORT = 1, reltol = tol))
##
## Coefficients:
## ar1 ar2 ma1 constant
## 0.6876 0.204 0.5002 0.0280
## s.e. 0.0334 0.032 0.0301 0.1398
##
## sigma^2 estimated as 1.023: log likelihood = -14305.92, aic = 28621.83
##
## $degrees_of_freedom
## [1] 9996
##
## $ttable
## Estimate SE t.value p.value
## ar1 0.6876 0.0334 20.5786 0.0000
## ar2 0.2040 0.0320 6.3817 0.0000
## ma1 0.5002 0.0301 16.6139 0.0000
## constant 0.0280 0.1398 0.2001 0.8414
##
## $AIC
## [1] 2.862183
##
## $AICc
## [1] 2.862183
##
## $BIC
## [1] 2.865788
library(forecast)
auto.arima(Simulated.Arima) # forecast not installed
## Series: Simulated.Arima
## ARIMA(4,2,0)
##
## Coefficients:
## ar1 ar2 ar3 ar4
## 0.2279 -0.1633 0.0337 -0.0707
## s.e. 0.0100 0.0102 0.0102 0.0100
##
## sigma^2 estimated as 1.064: log likelihood=-14495.62
## AIC=29001.24 AICc=29001.25 BIC=29037.29
fit1<-arima(Diff.Simulated.Arima, order=c(4,0,0))
fit1
##
## Call:
## arima(x = Diff.Simulated.Arima, order = c(4, 0, 0))
##
## Coefficients:
## ar1 ar2 ar3 ar4 intercept
## 1.1862 -0.3761 0.1733 -0.0581 0.0280
## s.e. 0.0100 0.0154 0.0154 0.0100 0.1353
##
## sigma^2 estimated as 1.025: log likelihood = -14313.1, aic = 28638.2
fit2<-arima(Diff.Simulated.Arima, order=c(2,0,1))
fit2
##
## Call:
## arima(x = Diff.Simulated.Arima, order = c(2, 0, 1))
##
## Coefficients:
## ar1 ar2 ma1 intercept
## 0.6876 0.204 0.5002 0.0280
## s.e. 0.0334 0.032 0.0301 0.1398
##
## sigma^2 estimated as 1.023: log likelihood = -14305.92, aic = 28621.83
fit3<-arima(Simulated.Arima, order=c(2,1,1))
fit3
##
## Call:
## arima(x = Simulated.Arima, order = c(2, 1, 1))
##
## Coefficients:
## ar1 ar2 ma1
## 0.6876 0.2039 0.5001
## s.e. 0.0334 0.0320 0.0301
##
## sigma^2 estimated as 1.023: log likelihood = -14305.93, aic = 28619.85
The following time series is taken from Time Series Data Library (TSDL) TSDL was created by Rob Hyndman, Professor of Statistics at Monash University, Australia.
Link: https://datamarket.com/data/list/?q=cat:fwy%20provider:tsdl
library(astsa)
# read data to R variable
birth.data<-read.csv("daily-total-female-births-in-cal.csv")
# pull out number of births column
number_of_births<-birth.data$Daily.total.female.births.in.California..1959
# use date format for dates
birth.data$Date <- as.Date(birth.data$Date, "%m/%d/%Y")
plot.ts(number_of_births,main='Daily total female births in california, 1959', ylab = 'Number of births')
# Test for correlation
Box.test(number_of_births, lag = log(length(number_of_births)))
##
## Box-Pierce test
##
## data: number_of_births
## X-squared = 36.391, df = 5.8999, p-value = 2.088e-06
# Plot the differenced data
plot.ts(diff(number_of_births), main='Differenced series', ylab = '')
# Test for correlation in the differenced data
Box.test(diff(number_of_births), lag = log(length(diff(number_of_births))))
##
## Box-Pierce test
##
## data: diff(number_of_births)
## X-squared = 78.094, df = 5.8972, p-value = 7.661e-15
# acf and pacf of the differenced data
acf(diff(number_of_births), main='ACF of differenced data', 50)
pacf(diff(number_of_births), main='PACF of differenced data', 50)
# Fit various ARIMA models
model1<-arima(number_of_births, order=c(0,1,1))
SSE1<-sum(model1$residuals^2)
model1.test<-Box.test(model1$residuals, lag = log(length(model1$residuals)))
model2<-arima(number_of_births, order=c(0,1,2))
SSE2<-sum(model2$residuals^2)
model2.test<-Box.test(model2$residuals, lag = log(length(model2$residuals)))
model3<-arima(number_of_births, order=c(7,1,1))
SSE3<-sum(model3$residuals^2)
model3.test<-Box.test(model3$residuals, lag = log(length(model3$residuals)))
model4<-arima(number_of_births, order=c(7,1,2))
SSE4<-sum(model4$residuals^2)
model4.test<-Box.test(model4$residuals, lag = log(length(model4$residuals)))
df<-data.frame(row.names=c('AIC', 'SSE', 'p-value'), c(model1$aic, SSE1, model1.test$p.value),
c(model2$aic, SSE2, model2.test$p.value), c(model3$aic, SSE3, model3.test$p.value),
c(model4$aic, SSE4, model4.test$p.value))
colnames(df)<-c('Arima(0,1,1)','Arima(0,1,2)', 'Arima(7,1,1)', 'Arima(7,1,2)')
format(df, scientific=FALSE)
## Arima(0,1,1) Arima(0,1,2) Arima(7,1,1) Arima(7,1,2)
## AIC 2462.2207021 2459.5705306 2464.8827225 2466.6664136
## SSE 18148.4561632 17914.6513437 17584.3902548 17574.0578120
## p-value 0.5333604 0.9859227 0.9999899 0.9999929
# Fit a SARIMA model
sarima(number_of_births, 0,1,2,0,0,0)
## initial value 2.216721
## iter 2 value 2.047518
## iter 3 value 1.974780
## iter 4 value 1.966955
## iter 5 value 1.958906
## iter 6 value 1.952299
## iter 7 value 1.951439
## iter 8 value 1.950801
## iter 9 value 1.950797
## iter 10 value 1.950650
## iter 11 value 1.950646
## iter 12 value 1.950638
## iter 13 value 1.950635
## iter 13 value 1.950635
## iter 13 value 1.950635
## final value 1.950635
## converged
## initial value 1.950708
## iter 2 value 1.950564
## iter 3 value 1.950290
## iter 4 value 1.950196
## iter 5 value 1.950185
## iter 6 value 1.950185
## iter 7 value 1.950185
## iter 7 value 1.950185
## iter 7 value 1.950185
## final value 1.950185
## converged
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## xreg = constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc,
## REPORT = 1, reltol = tol))
##
## Coefficients:
## ma1 ma2 constant
## -0.8511 -0.1113 0.015
## s.e. 0.0496 0.0502 0.015
##
## sigma^2 estimated as 49.08: log likelihood = -1226.36, aic = 2460.72
##
## $degrees_of_freedom
## [1] 361
##
## $ttable
## Estimate SE t.value p.value
## ma1 -0.8511 0.0496 -17.1448 0.0000
## ma2 -0.1113 0.0502 -2.2164 0.0273
## constant 0.0150 0.0150 1.0007 0.3176
##
## $AIC
## [1] 6.760225
##
## $AICc
## [1] 6.760408
##
## $BIC
## [1] 6.803051