library(tidyverse)
I want to display common forms for time series data. These data are used in Hyndman’s text. For example,
Data on the age of death of successive kings of England, starting with William the Conqueror (original source: Hipel and Mcleod, 1994). This is not what we normally think of as time series data because the index is not time.
kings <- scan("http://robjhyndman.com/tsdldata/misc/kings.dat",skip=3) # Base R for reading data file
kings
## [1] 60 43 67 50 56 42 50 65 68 43 65 34 47 34 49 41 13 35 53 56 16 43 69 59 48
## [26] 59 86 55 68 51 33 49 67 77 81 67 71 81 68 70 77 56
kings.TS <- ts(kings) # Turn it into a time series
# Saying this is now in a "time sequence"
kings.TS
## Time Series:
## Start = 1
## End = 42
## Frequency = 1
## [1] 60 43 67 50 56 42 50 65 68 43 65 34 47 34 49 41 13 35 53 56 16 43 69 59 48
## [26] 59 86 55 68 51 33 49 67 77 81 67 71 81 68 70 77 56
# This is a sequence of numbers and some metadata ontop of it
# These are indexes not times, but this is still considered to be a time series
plot(kings.TS, xlab="King [starting with WtC]", ylab="Age at Death")
Let’s look at some data on the number of births per month in New York city, from January 1946 to December 1959 (originally collected by Newton). It is a sequence with a known starting point: January, 1946.
births <- scan("http://robjhyndman.com/tsdldata/data/nybirths.dat")
births # Just a vector of numbers
## [1] 26.663 23.598 26.931 24.740 25.806 24.364 24.477 23.901 23.175 23.227
## [11] 21.672 21.870 21.439 21.089 23.709 21.669 21.752 20.761 23.479 23.824
## [21] 23.105 23.110 21.759 22.073 21.937 20.035 23.590 21.672 22.222 22.123
## [31] 23.950 23.504 22.238 23.142 21.059 21.573 21.548 20.000 22.424 20.615
## [41] 21.761 22.874 24.104 23.748 23.262 22.907 21.519 22.025 22.604 20.894
## [51] 24.677 23.673 25.320 23.583 24.671 24.454 24.122 24.252 22.084 22.991
## [61] 23.287 23.049 25.076 24.037 24.430 24.667 26.451 25.618 25.014 25.110
## [71] 22.964 23.981 23.798 22.270 24.775 22.646 23.988 24.737 26.276 25.816
## [81] 25.210 25.199 23.162 24.707 24.364 22.644 25.565 24.062 25.431 24.635
## [91] 27.009 26.606 26.268 26.462 25.246 25.180 24.657 23.304 26.982 26.199
## [101] 27.210 26.122 26.706 26.878 26.152 26.379 24.712 25.688 24.990 24.239
## [111] 26.721 23.475 24.767 26.219 28.361 28.599 27.914 27.784 25.693 26.881
## [121] 26.217 24.218 27.914 26.975 28.527 27.139 28.982 28.169 28.056 29.136
## [131] 26.291 26.987 26.589 24.848 27.543 26.896 28.878 27.390 28.065 28.141
## [141] 29.048 28.484 26.634 27.735 27.132 24.924 28.963 26.589 27.931 28.009
## [151] 29.229 28.759 28.405 27.945 25.912 26.619 26.076 25.286 27.660 25.951
## [161] 26.398 25.565 28.865 30.000 29.261 29.012 26.992 27.897
# Tells us what date/time it is
as.Date(Sys.time())
## [1] "2021-02-03"
Incorporating the starting point can be done in base R by declaring the time frequency and the start point.
births.TS <- ts(births,
frequency=12, # Time frequency
start=c(1946,1)) # When does it begin.. Starts in 1946 in the first month (1)
births.TS
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct
## 1946 26.663 23.598 26.931 24.740 25.806 24.364 24.477 23.901 23.175 23.227
## 1947 21.439 21.089 23.709 21.669 21.752 20.761 23.479 23.824 23.105 23.110
## 1948 21.937 20.035 23.590 21.672 22.222 22.123 23.950 23.504 22.238 23.142
## 1949 21.548 20.000 22.424 20.615 21.761 22.874 24.104 23.748 23.262 22.907
## 1950 22.604 20.894 24.677 23.673 25.320 23.583 24.671 24.454 24.122 24.252
## 1951 23.287 23.049 25.076 24.037 24.430 24.667 26.451 25.618 25.014 25.110
## 1952 23.798 22.270 24.775 22.646 23.988 24.737 26.276 25.816 25.210 25.199
## 1953 24.364 22.644 25.565 24.062 25.431 24.635 27.009 26.606 26.268 26.462
## 1954 24.657 23.304 26.982 26.199 27.210 26.122 26.706 26.878 26.152 26.379
## 1955 24.990 24.239 26.721 23.475 24.767 26.219 28.361 28.599 27.914 27.784
## 1956 26.217 24.218 27.914 26.975 28.527 27.139 28.982 28.169 28.056 29.136
## 1957 26.589 24.848 27.543 26.896 28.878 27.390 28.065 28.141 29.048 28.484
## 1958 27.132 24.924 28.963 26.589 27.931 28.009 29.229 28.759 28.405 27.945
## 1959 26.076 25.286 27.660 25.951 26.398 25.565 28.865 30.000 29.261 29.012
## Nov Dec
## 1946 21.672 21.870
## 1947 21.759 22.073
## 1948 21.059 21.573
## 1949 21.519 22.025
## 1950 22.084 22.991
## 1951 22.964 23.981
## 1952 23.162 24.707
## 1953 25.246 25.180
## 1954 24.712 25.688
## 1955 25.693 26.881
## 1956 26.291 26.987
## 1957 26.634 27.735
## 1958 25.912 26.619
## 1959 26.992 27.897
W.births.TS <- ts(births,
frequency=52, # Time frequency
start=c(1946,1)) # When does it begin.. Starts in 1946 in the first month (1)
W.births.TS
## Time Series:
## Start = c(1946, 1)
## End = c(1949, 12)
## Frequency = 52
## [1] 26.663 23.598 26.931 24.740 25.806 24.364 24.477 23.901 23.175 23.227
## [11] 21.672 21.870 21.439 21.089 23.709 21.669 21.752 20.761 23.479 23.824
## [21] 23.105 23.110 21.759 22.073 21.937 20.035 23.590 21.672 22.222 22.123
## [31] 23.950 23.504 22.238 23.142 21.059 21.573 21.548 20.000 22.424 20.615
## [41] 21.761 22.874 24.104 23.748 23.262 22.907 21.519 22.025 22.604 20.894
## [51] 24.677 23.673 25.320 23.583 24.671 24.454 24.122 24.252 22.084 22.991
## [61] 23.287 23.049 25.076 24.037 24.430 24.667 26.451 25.618 25.014 25.110
## [71] 22.964 23.981 23.798 22.270 24.775 22.646 23.988 24.737 26.276 25.816
## [81] 25.210 25.199 23.162 24.707 24.364 22.644 25.565 24.062 25.431 24.635
## [91] 27.009 26.606 26.268 26.462 25.246 25.180 24.657 23.304 26.982 26.199
## [101] 27.210 26.122 26.706 26.878 26.152 26.379 24.712 25.688 24.990 24.239
## [111] 26.721 23.475 24.767 26.219 28.361 28.599 27.914 27.784 25.693 26.881
## [121] 26.217 24.218 27.914 26.975 28.527 27.139 28.982 28.169 28.056 29.136
## [131] 26.291 26.987 26.589 24.848 27.543 26.896 28.878 27.390 28.065 28.141
## [141] 29.048 28.484 26.634 27.735 27.132 24.924 28.963 26.589 27.931 28.009
## [151] 29.229 28.759 28.405 27.945 25.912 26.619 26.076 25.286 27.660 25.951
## [161] 26.398 25.565 28.865 30.000 29.261 29.012 26.992 27.897
plot(W.births.TS,
bty="n",
ylab="Births")
Q.births.TS <- ts(births,
frequency=4, # Time frequency
start=c(1946,1)) # When does it begin.. Starts in 1946 in the first month (1)
Q.births.TS
## Qtr1 Qtr2 Qtr3 Qtr4
## 1946 26.663 23.598 26.931 24.740
## 1947 25.806 24.364 24.477 23.901
## 1948 23.175 23.227 21.672 21.870
## 1949 21.439 21.089 23.709 21.669
## 1950 21.752 20.761 23.479 23.824
## 1951 23.105 23.110 21.759 22.073
## 1952 21.937 20.035 23.590 21.672
## 1953 22.222 22.123 23.950 23.504
## 1954 22.238 23.142 21.059 21.573
## 1955 21.548 20.000 22.424 20.615
## 1956 21.761 22.874 24.104 23.748
## 1957 23.262 22.907 21.519 22.025
## 1958 22.604 20.894 24.677 23.673
## 1959 25.320 23.583 24.671 24.454
## 1960 24.122 24.252 22.084 22.991
## 1961 23.287 23.049 25.076 24.037
## 1962 24.430 24.667 26.451 25.618
## 1963 25.014 25.110 22.964 23.981
## 1964 23.798 22.270 24.775 22.646
## 1965 23.988 24.737 26.276 25.816
## 1966 25.210 25.199 23.162 24.707
## 1967 24.364 22.644 25.565 24.062
## 1968 25.431 24.635 27.009 26.606
## 1969 26.268 26.462 25.246 25.180
## 1970 24.657 23.304 26.982 26.199
## 1971 27.210 26.122 26.706 26.878
## 1972 26.152 26.379 24.712 25.688
## 1973 24.990 24.239 26.721 23.475
## 1974 24.767 26.219 28.361 28.599
## 1975 27.914 27.784 25.693 26.881
## 1976 26.217 24.218 27.914 26.975
## 1977 28.527 27.139 28.982 28.169
## 1978 28.056 29.136 26.291 26.987
## 1979 26.589 24.848 27.543 26.896
## 1980 28.878 27.390 28.065 28.141
## 1981 29.048 28.484 26.634 27.735
## 1982 27.132 24.924 28.963 26.589
## 1983 27.931 28.009 29.229 28.759
## 1984 28.405 27.945 25.912 26.619
## 1985 26.076 25.286 27.660 25.951
## 1986 26.398 25.565 28.865 30.000
## 1987 29.261 29.012 26.992 27.897
plot(Q.births.TS,
bty="n",
ylab="Births")
plot(births.TS,
bty="n",
ylab="Births")
ggp.birth <- data.frame(births=births.TS, time=time(births.TS))
ggplot(ggp.birth) +
aes(x=time, y=births) +
geom_line() +
theme_minimal() +
labs(x="Time", y="Births")
ggfortify
A handy reference for ggfortify
and the integration of ts objects can be found in the vignette.
#install.packages("ggfortify")
library(ggfortify)
autoplot(births.TS) +
theme_dark() +
labs(x="Time", y="Births")