Goal

To give examples of date sequence management using seq.Date and the package {xts}.

Method

We will define several date sequences and operate with them to give useful tricks.

Date sequence definition

Use seq.Date(from, to, by, length.out, along.with) to define sequences of dates.

The by parameter can be “day”, “week”, “month”, “quarter” or “year” with a positive or negative integer preceding it and a space. Also a number that is taken as days.

We define first a sequence of 5 dates starting on 2000-01-01 every 10 days

tseq1 <- seq(from = as.Date('2000-01-01'), by = 10, len = 5)
tseq1
## [1] "2000-01-01" "2000-01-11" "2000-01-21" "2000-01-31" "2000-02-10"
dateline(tseq1)

Same as tseq1 but every 2 weeks until 2000-02-28

(tseq2 <- seq.Date(as.Date("2000-01-01"), as.Date("2000-02-28"), by = "2 weeks"))
## [1] "2000-01-01" "2000-01-15" "2000-01-29" "2000-02-12" "2000-02-26"
dateline(tseq2)

Third sequence is the first days of the first 5 years of 21st century

tseq3 <- seq.Date(from = as.Date('2000-01-01'), by = 'years', len = 5)
tseq3
## [1] "2000-01-01" "2001-01-01" "2002-01-01" "2003-01-01" "2004-01-01"
dateline(tseq3)

Last day of every quarter in 2002. Questions about last day in a period are more subtle as days in each period are different. The trick consists in computing first days of next quarters and substract 1 day

tseq4 <- seq.Date(from = as.Date('2002-03-31'),
  to = as.Date("2002-12-31"), by = "quarter")
tseq4
## [1] "2002-03-31" "2002-07-01" "2002-10-01" "2002-12-31"
tseq4 <- seq.Date(from = as.Date('2002-04-01'),
  to = as.Date("2003-01-01"), by = "quarter") - 1
tseq4
## [1] "2002-03-31" "2002-06-30" "2002-09-30" "2002-12-31"
dateline(tseq4)

Last days of the first 5 month of 2004.

tseq5 <- seq.Date(from = as.Date('2004-01-31'),
  by = "1 months", len = 5)
tseq5
## [1] "2004-01-31" "2004-03-02" "2004-03-31" "2004-05-01" "2004-05-31"
tseq5 <- seq.Date(from = as.Date('2004-01-28'),
  by = "1 months", len = 5)
tseq5
## [1] "2004-01-28" "2004-02-28" "2004-03-28" "2004-04-28" "2004-05-28"
tseq5 <- seq.Date(from = as.Date('2004-02-01'),
  by = "1 months", len = 5) - 1
tseq5
## [1] "2004-01-31" "2004-02-29" "2004-03-31" "2004-04-30" "2004-05-31"
dateline(tseq5)

Note that in this example, “month” period does not work as expected for the last day of the month (from 29 to 31) as different month have different days. therefore, finding the last day of every month requires some care

Last day of February of for years 2002 to 2007

tseq6 <- seq.Date(from = as.Date('2001-02-28'), to = as.Date("2007-03-01"),
  by = "1 year")
tseq6
## [1] "2001-02-28" "2002-02-28" "2003-02-28" "2004-02-28" "2005-02-28"
## [6] "2006-02-28" "2007-02-28"
tseq6 <- seq.Date(from = as.Date('2001-03-01'), to = as.Date("2007-03-01"),
  by = "1 year") - 1
tseq6
## [1] "2001-02-28" "2002-02-28" "2003-02-28" "2004-02-29" "2005-02-28"
## [6] "2006-02-28" "2007-02-28"
dateline(tseq6)

Find the second saturday of the first 6 months of 2004

tseq7 <- seq.Date(from = as.Date('2004-01-01'), to = as.Date('2004-06-30'), by = 1)
tseq7a <- tseq7[weekdays(tseq7) == "sábado"]
tseq7a <- tseq7[format(tseq7, "%u") == 6]
tseq7b <- tapply(tseq7a, format(tseq7a, "%Y-%m"), '[', 2)
tseq7c <- as.Date(tseq7b, origin = "1970-01-01")
tseq7c
##      2004-01      2004-02      2004-03      2004-04      2004-05 
## "2004-01-10" "2004-02-14" "2004-03-13" "2004-04-10" "2004-05-08" 
##      2004-06 
## "2004-06-12"
dateline(tseq7c)

Find the weekend days of August 2014

tseq8 <- seq.Date(as.Date("2014-08-01"), as.Date("2014-08-31"), by = 1)
tseq8a <- tseq8[format(tseq8, "%u") > 5]
tseq8a
##  [1] "2014-08-02" "2014-08-03" "2014-08-09" "2014-08-10" "2014-08-16"
##  [6] "2014-08-17" "2014-08-23" "2014-08-24" "2014-08-30" "2014-08-31"
dateline(tseq8a)

Last Sunday of August of years 2001 to 2007

tseq9 <- seq.Date(as.Date("2001-01-01"), as.Date("2007-12-31"), by = 1)
tseq9a <- tseq9[format(tseq9, "%m") == "08" & format(tseq9, "%u") == 7]
tseq9b <- rev(tseq9a)
tseq9c <- tapply(tseq9b, format(tseq9b, "%Y"), '[', 1)
tseq9d <- as.Date(tseq9c, origin = "1970-01-01")
tseq9d
##         2001         2002         2003         2004         2005 
## "2001-08-26" "2002-08-25" "2003-08-31" "2004-08-29" "2005-08-28" 
##         2006         2007 
## "2006-08-27" "2007-08-26"
dateline(tseq9d)

Find the number of days of each of the first 6 months of 2004 of the first week of the month (it includes any day of the month)

tseq10 <- seq.Date(from = as.Date("2004-01-01"), to = as.Date("2004-06-30"), by = 1)
tseq10b <- split(tseq10, format(tseq10, "%m"))
tseq10c <- lapply(tseq10b, 
  function(seq) tapply(seq, format(seq, "%W"), length))
tseq10d <- lapply(tseq10c, '[', 1)
tseq10e <- unlist(tseq10d)
names(tseq10e) <- unique(format(tseq10, "%Y-%m"))
tseq10e
## 2004-01 2004-02 2004-03 2004-04 2004-05 2004-06 
##       4       1       7       4       2       6

Find the number of days of each of the first 6 months of 2004 of the last week of the month (it includes any day of the month)

tseq11 <- seq.Date(from = as.Date("2004-01-01"), to = as.Date("2004-06-30"), by = 1)
tseq11b <- split(tseq10, format(tseq11, "%m"))
tseq11c <- lapply(tseq11b, 
  function(seq) tapply(seq, format(seq, "%W"), length))
tseq11d <- lapply(tseq11c, rev)
tseq11e <- lapply(tseq11d, '[', 1)
tseq11f <- unlist(tseq11e)
names(tseq11f) <- unique(format(tseq11, "%Y-%m"))
tseq11f
## 2004-01 2004-02 2004-03 2004-04 2004-05 2004-06 
##       6       7       3       5       1       3

Using xts package

tseq1
## [1] "2000-01-01" "2000-01-11" "2000-01-21" "2000-01-31" "2000-02-10"
xts1 <- xts(rep(0, length(tseq1)), order.by = tseq1)
index(xts1)
## [1] "2000-01-01" "2000-01-11" "2000-01-21" "2000-01-31" "2000-02-10"
index(xts1) == tseq1
## [1] TRUE TRUE TRUE TRUE TRUE

Last day of every quarter in 2002

year_2002_days <- seq.Date(as.Date("2002-01-01"), as.Date("2002-12-31"), by ="days")
n_2002 <- length(year_2002_days)
xts4 <- xts(rep(0, n_2002), year_2002_days)
ep4 <- endpoints(xts4, on = "quarters", k = 1)
index(xts4[ep4])
## [1] "2002-03-31" "2002-06-30" "2002-09-30" "2002-12-31"

Equivalently

index(
  do.call(
    c,
      lapply(split(xts4, "quarters"), last, "1 day")
  )
)
## [1] "2002-03-31" "2002-06-30" "2002-09-30" "2002-12-31"

Last day of February of for years 2002 to 2007

year_2002_2007_days <- seq.Date(as.Date("2002-01-01"), as.Date("2007-12-31"), by ="days")
feb_2002_2007_days <- year_2002_2007_days[format(year_2002_2007_days, "%m") == "02"]
ep6 <- endpoints(feb_2002_2007_days, on = "months")
feb_2002_2007_days[ep6]
## [1] "2002-02-28" "2003-02-28" "2004-02-29" "2005-02-28" "2006-02-28"
## [6] "2007-02-28"

Find the second saturday of the first 6 months of 2004

do.call(
  c,
  lapply(split(tseq7a, format(tseq7a, "%m")),'[', 2)
)
##           01           02           03           04           05 
## "2004-01-10" "2004-02-14" "2004-03-13" "2004-04-10" "2004-05-08" 
##           06 
## "2004-06-12"
xts7 <- xts(1:length(tseq7a), tseq7a)
index(xts7)[apply.monthly(xts7, first, "2 weeks")[, 2]]
## [1] "2004-01-10" "2004-02-14" "2004-03-13" "2004-04-10" "2004-05-08"
## [6] "2004-06-12"

Find the fifth saturday of the first 6 months of 2004

do.call(
  c,
  lapply(split(tseq7a, format(tseq7a, "%m")),'[', 5)
)
##           01           02           03           04           05 
## "2004-01-31"           NA           NA           NA "2004-05-29" 
##           06 
##           NA

Equivalently

do.call(
  c,
  lapply(split(xts7, "months"), function(x) index(x)[5])
)
## [1] "2004-01-31" NA           NA           NA           "2004-05-29"
## [6] NA
xts7 <- xts(rep(0, length(tseq7a)), tseq7a)
index(apply.monthly(xts7, last)[apply.monthly(xts7, length) == 5])
## [1] "2004-01-31 UTC" "2004-05-29 UTC"

Plot of date sequences

tseq1

tseq1
## [1] "2000-01-01" "2000-01-11" "2000-01-21" "2000-01-31" "2000-02-10"
summary(tseq1)
##         Min.      1st Qu.       Median         Mean      3rd Qu. 
## "2000-01-01" "2000-01-11" "2000-01-21" "2000-01-21" "2000-01-31" 
##         Max. 
## "2000-02-10"
dateline(tseq1)

n1 <- length(tseq1)
xts1 <- xts(x = rnorm(n1), order.by = tseq1)
colnames(xts1) <- "random"
plot(xts1, type = "b", pch = 16)

dygraph(xts1)
tclass(xts1)
## [1] "Date"
tclass(convertIndex(xts1, "POSIXct"))
## [1] "POSIXct" "POSIXt"
tzone(xts1)
## [1] "UTC"
index(xts1)
## [1] "2000-01-01" "2000-01-11" "2000-01-21" "2000-01-31" "2000-02-10"
tformat(xts1) <- "%Y%m%d"
index(xts1)
## [1] "2000-01-01" "2000-01-11" "2000-01-21" "2000-01-31" "2000-02-10"
xts1
##              random
## 20000101 -0.6788673
## 20000111  0.8846896
## 20000121  0.9879566
## 20000131 -0.1163698
## 20000210  0.6781643
str(xts1)
## An 'xts' object on 2000-01-01/2000-02-10 containing:
##   Data: num [1:5, 1] -0.679 0.885 0.988 -0.116 0.678
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr "random"
##   Indexed by objects of class: [Date] TZ: UTC
##   xts Attributes:  
##  NULL
colnames(xts1) <- "rnorm"
xts1
##               rnorm
## 20000101 -0.6788673
## 20000111  0.8846896
## 20000121  0.9879566
## 20000131 -0.1163698
## 20000210  0.6781643
str(xts1)
## An 'xts' object on 2000-01-01/2000-02-10 containing:
##   Data: num [1:5, 1] -0.679 0.885 0.988 -0.116 0.678
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr "rnorm"
##   Indexed by objects of class: [Date] TZ: UTC
##   xts Attributes:  
##  NULL
xtsAttributes(xts1)
## NULL

Save an xts object

write.zoo(xts1, file = "xts1.csv", col.names = T, sep = "\t", dec = ".")

Read the xts object back. Double check you are able to reproduce original data. There are multiple methods to do so.

table1 <- read.table("xts1.csv", sep = "\t", dec = ".", stringsAsFactors = F, header = T)
m1 <- as.matrix(table1[, -1])
colnames(m1) <- colnames(table1)[-1]
xts1_read <- xts(m1, order.by = as.Date(table1$Index))

Replace values in the xts object

xts1[3] <- NA
index(xts1)
## [1] "2000-01-01" "2000-01-11" "2000-01-21" "2000-01-31" "2000-02-10"
xts1["2000-02-10"] <- NA
dygraph(xts1)
plot(xts1, type = "b", pch = 16)

dygraph(na.omit(xts1))

Add a data point between existing ones

xts1_extra <- xts(-1, order.by = as.Date("2000-01-15"))
colnames(xts1_extra) <- "rnorm"
merge(xts1, xts1_extra)
##               rnorm rnorm.1
## 20000101 -0.6788673      NA
## 20000111  0.8846896      NA
## 20000115         NA      -1
## 20000121         NA      NA
## 20000131 -0.1163698      NA
## 20000210         NA      NA
rbind(xts1, xts1_extra)
##               rnorm
## 20000101 -0.6788673
## 20000111  0.8846896
## 20000115 -1.0000000
## 20000121         NA
## 20000131 -0.1163698
## 20000210         NA
xts1 + xts1_extra
## Data:
## numeric(0)
## 
## Index:
## Date of length 0
xts1 + merge(xts1_extra, index(xts1), fill = 0)
##               rnorm
## 20000101 -0.6788673
## 20000111  0.8846896
## 20000121         NA
## 20000131 -0.1163698
## 20000210         NA

If the date is already in the original xts index, you have to decide how to merge the values that correspond to the same date.

xts1_extra2 <- xts(-1, order.by = as.Date("2000-01-11"))
colnames(xts1_extra2) <- "rnorm"
rbind(xts1, xts1_extra2)
##               rnorm
## 20000101 -0.6788673
## 20000111  0.8846896
## 20000111 -1.0000000
## 20000121         NA
## 20000131 -0.1163698
## 20000210         NA
merge(xts1, xts1_extra2)
##               rnorm rnorm.1
## 20000101 -0.6788673      NA
## 20000111  0.8846896      -1
## 20000121         NA      NA
## 20000131 -0.1163698      NA
## 20000210         NA      NA

For instance, let us assume we want to take the minimum

xts1_merged <- merge(xts1, xts1_extra2)
xts1_merged
##               rnorm rnorm.1
## 20000101 -0.6788673      NA
## 20000111  0.8846896      -1
## 20000121         NA      NA
## 20000131 -0.1163698      NA
## 20000210         NA      NA
min_na <- function(x){
  min_value <- ifelse(sum(is.na(x)) == length(x),
    NA,
    min(x, na.rm = T))
  return(min_value)
}
min_na(c(NA, NA))
## [1] NA
min_na(c(1, -0.5))
## [1] -0.5
min_na(c(-1, NA))
## [1] -1
xts1_merged <- xts(apply(coredata(xts1_merged), 1, min_na), order.by = index(xts1_merged))
colnames(xts1_merged) <- "rnorm"
xts1_merged
##                 rnorm
## 2000-01-01 -0.6788673
## 2000-01-11 -1.0000000
## 2000-01-21         NA
## 2000-01-31 -0.1163698
## 2000-02-10         NA