To give examples of date sequence management using seq.Date and the package {xts}.
We will define several date sequences and operate with them to give useful tricks.
Use seq.Date(from, to, by, length.out, along.with) to define sequences of dates.
The by parameter can be “day”, “week”, “month”, “quarter” or “year” with a positive or negative integer preceding it and a space. Also a number that is taken as days.
We define first a sequence of 5 dates starting on 2000-01-01 every 10 days
tseq1 <- seq(from = as.Date('2000-01-01'), by = 10, len = 5)
tseq1
## [1] "2000-01-01" "2000-01-11" "2000-01-21" "2000-01-31" "2000-02-10"
dateline(tseq1)
Same as tseq1 but every 2 weeks until 2000-02-28
(tseq2 <- seq.Date(as.Date("2000-01-01"), as.Date("2000-02-28"), by = "2 weeks"))
## [1] "2000-01-01" "2000-01-15" "2000-01-29" "2000-02-12" "2000-02-26"
dateline(tseq2)
Third sequence is the first days of the first 5 years of 21st century
tseq3 <- seq.Date(from = as.Date('2000-01-01'), by = 'years', len = 5)
tseq3
## [1] "2000-01-01" "2001-01-01" "2002-01-01" "2003-01-01" "2004-01-01"
dateline(tseq3)
Last day of every quarter in 2002. Questions about last day in a period are more subtle as days in each period are different. The trick consists in computing first days of next quarters and substract 1 day
tseq4 <- seq.Date(from = as.Date('2002-03-31'),
to = as.Date("2002-12-31"), by = "quarter")
tseq4
## [1] "2002-03-31" "2002-07-01" "2002-10-01" "2002-12-31"
tseq4 <- seq.Date(from = as.Date('2002-04-01'),
to = as.Date("2003-01-01"), by = "quarter") - 1
tseq4
## [1] "2002-03-31" "2002-06-30" "2002-09-30" "2002-12-31"
dateline(tseq4)
Last days of the first 5 month of 2004.
tseq5 <- seq.Date(from = as.Date('2004-01-31'),
by = "1 months", len = 5)
tseq5
## [1] "2004-01-31" "2004-03-02" "2004-03-31" "2004-05-01" "2004-05-31"
tseq5 <- seq.Date(from = as.Date('2004-01-28'),
by = "1 months", len = 5)
tseq5
## [1] "2004-01-28" "2004-02-28" "2004-03-28" "2004-04-28" "2004-05-28"
tseq5 <- seq.Date(from = as.Date('2004-02-01'),
by = "1 months", len = 5) - 1
tseq5
## [1] "2004-01-31" "2004-02-29" "2004-03-31" "2004-04-30" "2004-05-31"
dateline(tseq5)
Note that in this example, “month” period does not work as expected for the last day of the month (from 29 to 31) as different month have different days. therefore, finding the last day of every month requires some care
Last day of February of for years 2002 to 2007
tseq6 <- seq.Date(from = as.Date('2001-02-28'), to = as.Date("2007-03-01"),
by = "1 year")
tseq6
## [1] "2001-02-28" "2002-02-28" "2003-02-28" "2004-02-28" "2005-02-28"
## [6] "2006-02-28" "2007-02-28"
tseq6 <- seq.Date(from = as.Date('2001-03-01'), to = as.Date("2007-03-01"),
by = "1 year") - 1
tseq6
## [1] "2001-02-28" "2002-02-28" "2003-02-28" "2004-02-29" "2005-02-28"
## [6] "2006-02-28" "2007-02-28"
dateline(tseq6)
Find the second saturday of the first 6 months of 2004
tseq7 <- seq.Date(from = as.Date('2004-01-01'), to = as.Date('2004-06-30'), by = 1)
tseq7a <- tseq7[weekdays(tseq7) == "sábado"]
tseq7a <- tseq7[format(tseq7, "%u") == 6]
tseq7b <- tapply(tseq7a, format(tseq7a, "%Y-%m"), '[', 2)
tseq7c <- as.Date(tseq7b, origin = "1970-01-01")
tseq7c
## 2004-01 2004-02 2004-03 2004-04 2004-05
## "2004-01-10" "2004-02-14" "2004-03-13" "2004-04-10" "2004-05-08"
## 2004-06
## "2004-06-12"
dateline(tseq7c)
Find the weekend days of August 2014
tseq8 <- seq.Date(as.Date("2014-08-01"), as.Date("2014-08-31"), by = 1)
tseq8a <- tseq8[format(tseq8, "%u") > 5]
tseq8a
## [1] "2014-08-02" "2014-08-03" "2014-08-09" "2014-08-10" "2014-08-16"
## [6] "2014-08-17" "2014-08-23" "2014-08-24" "2014-08-30" "2014-08-31"
dateline(tseq8a)
Last Sunday of August of years 2001 to 2007
tseq9 <- seq.Date(as.Date("2001-01-01"), as.Date("2007-12-31"), by = 1)
tseq9a <- tseq9[format(tseq9, "%m") == "08" & format(tseq9, "%u") == 7]
tseq9b <- rev(tseq9a)
tseq9c <- tapply(tseq9b, format(tseq9b, "%Y"), '[', 1)
tseq9d <- as.Date(tseq9c, origin = "1970-01-01")
tseq9d
## 2001 2002 2003 2004 2005
## "2001-08-26" "2002-08-25" "2003-08-31" "2004-08-29" "2005-08-28"
## 2006 2007
## "2006-08-27" "2007-08-26"
dateline(tseq9d)
Find the number of days of each of the first 6 months of 2004 of the first week of the month (it includes any day of the month)
tseq10 <- seq.Date(from = as.Date("2004-01-01"), to = as.Date("2004-06-30"), by = 1)
tseq10b <- split(tseq10, format(tseq10, "%m"))
tseq10c <- lapply(tseq10b,
function(seq) tapply(seq, format(seq, "%W"), length))
tseq10d <- lapply(tseq10c, '[', 1)
tseq10e <- unlist(tseq10d)
names(tseq10e) <- unique(format(tseq10, "%Y-%m"))
tseq10e
## 2004-01 2004-02 2004-03 2004-04 2004-05 2004-06
## 4 1 7 4 2 6
Find the number of days of each of the first 6 months of 2004 of the last week of the month (it includes any day of the month)
tseq11 <- seq.Date(from = as.Date("2004-01-01"), to = as.Date("2004-06-30"), by = 1)
tseq11b <- split(tseq10, format(tseq11, "%m"))
tseq11c <- lapply(tseq11b,
function(seq) tapply(seq, format(seq, "%W"), length))
tseq11d <- lapply(tseq11c, rev)
tseq11e <- lapply(tseq11d, '[', 1)
tseq11f <- unlist(tseq11e)
names(tseq11f) <- unique(format(tseq11, "%Y-%m"))
tseq11f
## 2004-01 2004-02 2004-03 2004-04 2004-05 2004-06
## 6 7 3 5 1 3
xts packagetseq1
## [1] "2000-01-01" "2000-01-11" "2000-01-21" "2000-01-31" "2000-02-10"
xts1 <- xts(rep(0, length(tseq1)), order.by = tseq1)
index(xts1)
## [1] "2000-01-01" "2000-01-11" "2000-01-21" "2000-01-31" "2000-02-10"
index(xts1) == tseq1
## [1] TRUE TRUE TRUE TRUE TRUE
Last day of every quarter in 2002
year_2002_days <- seq.Date(as.Date("2002-01-01"), as.Date("2002-12-31"), by ="days")
n_2002 <- length(year_2002_days)
xts4 <- xts(rep(0, n_2002), year_2002_days)
ep4 <- endpoints(xts4, on = "quarters", k = 1)
index(xts4[ep4])
## [1] "2002-03-31" "2002-06-30" "2002-09-30" "2002-12-31"
Equivalently
index(
do.call(
c,
lapply(split(xts4, "quarters"), last, "1 day")
)
)
## [1] "2002-03-31" "2002-06-30" "2002-09-30" "2002-12-31"
Last day of February of for years 2002 to 2007
year_2002_2007_days <- seq.Date(as.Date("2002-01-01"), as.Date("2007-12-31"), by ="days")
feb_2002_2007_days <- year_2002_2007_days[format(year_2002_2007_days, "%m") == "02"]
ep6 <- endpoints(feb_2002_2007_days, on = "months")
feb_2002_2007_days[ep6]
## [1] "2002-02-28" "2003-02-28" "2004-02-29" "2005-02-28" "2006-02-28"
## [6] "2007-02-28"
Find the second saturday of the first 6 months of 2004
do.call(
c,
lapply(split(tseq7a, format(tseq7a, "%m")),'[', 2)
)
## 01 02 03 04 05
## "2004-01-10" "2004-02-14" "2004-03-13" "2004-04-10" "2004-05-08"
## 06
## "2004-06-12"
xts7 <- xts(1:length(tseq7a), tseq7a)
index(xts7)[apply.monthly(xts7, first, "2 weeks")[, 2]]
## [1] "2004-01-10" "2004-02-14" "2004-03-13" "2004-04-10" "2004-05-08"
## [6] "2004-06-12"
Find the fifth saturday of the first 6 months of 2004
do.call(
c,
lapply(split(tseq7a, format(tseq7a, "%m")),'[', 5)
)
## 01 02 03 04 05
## "2004-01-31" NA NA NA "2004-05-29"
## 06
## NA
Equivalently
do.call(
c,
lapply(split(xts7, "months"), function(x) index(x)[5])
)
## [1] "2004-01-31" NA NA NA "2004-05-29"
## [6] NA
xts7 <- xts(rep(0, length(tseq7a)), tseq7a)
index(apply.monthly(xts7, last)[apply.monthly(xts7, length) == 5])
## [1] "2004-01-31 UTC" "2004-05-29 UTC"
tseq1
## [1] "2000-01-01" "2000-01-11" "2000-01-21" "2000-01-31" "2000-02-10"
summary(tseq1)
## Min. 1st Qu. Median Mean 3rd Qu.
## "2000-01-01" "2000-01-11" "2000-01-21" "2000-01-21" "2000-01-31"
## Max.
## "2000-02-10"
dateline(tseq1)
n1 <- length(tseq1)
xts1 <- xts(x = rnorm(n1), order.by = tseq1)
colnames(xts1) <- "random"
plot(xts1, type = "b", pch = 16)
dygraph(xts1)
tclass(xts1)
## [1] "Date"
tclass(convertIndex(xts1, "POSIXct"))
## [1] "POSIXct" "POSIXt"
tzone(xts1)
## [1] "UTC"
index(xts1)
## [1] "2000-01-01" "2000-01-11" "2000-01-21" "2000-01-31" "2000-02-10"
tformat(xts1) <- "%Y%m%d"
index(xts1)
## [1] "2000-01-01" "2000-01-11" "2000-01-21" "2000-01-31" "2000-02-10"
xts1
## random
## 20000101 -0.6788673
## 20000111 0.8846896
## 20000121 0.9879566
## 20000131 -0.1163698
## 20000210 0.6781643
str(xts1)
## An 'xts' object on 2000-01-01/2000-02-10 containing:
## Data: num [1:5, 1] -0.679 0.885 0.988 -0.116 0.678
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr "random"
## Indexed by objects of class: [Date] TZ: UTC
## xts Attributes:
## NULL
colnames(xts1) <- "rnorm"
xts1
## rnorm
## 20000101 -0.6788673
## 20000111 0.8846896
## 20000121 0.9879566
## 20000131 -0.1163698
## 20000210 0.6781643
str(xts1)
## An 'xts' object on 2000-01-01/2000-02-10 containing:
## Data: num [1:5, 1] -0.679 0.885 0.988 -0.116 0.678
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr "rnorm"
## Indexed by objects of class: [Date] TZ: UTC
## xts Attributes:
## NULL
xtsAttributes(xts1)
## NULL
Save an xts object
write.zoo(xts1, file = "xts1.csv", col.names = T, sep = "\t", dec = ".")
Read the xts object back. Double check you are able to reproduce original data. There are multiple methods to do so.
table1 <- read.table("xts1.csv", sep = "\t", dec = ".", stringsAsFactors = F, header = T)
m1 <- as.matrix(table1[, -1])
colnames(m1) <- colnames(table1)[-1]
xts1_read <- xts(m1, order.by = as.Date(table1$Index))
Replace values in the xts object
xts1[3] <- NA
index(xts1)
## [1] "2000-01-01" "2000-01-11" "2000-01-21" "2000-01-31" "2000-02-10"
xts1["2000-02-10"] <- NA
dygraph(xts1)
plot(xts1, type = "b", pch = 16)
dygraph(na.omit(xts1))
Add a data point between existing ones
xts1_extra <- xts(-1, order.by = as.Date("2000-01-15"))
colnames(xts1_extra) <- "rnorm"
merge(xts1, xts1_extra)
## rnorm rnorm.1
## 20000101 -0.6788673 NA
## 20000111 0.8846896 NA
## 20000115 NA -1
## 20000121 NA NA
## 20000131 -0.1163698 NA
## 20000210 NA NA
rbind(xts1, xts1_extra)
## rnorm
## 20000101 -0.6788673
## 20000111 0.8846896
## 20000115 -1.0000000
## 20000121 NA
## 20000131 -0.1163698
## 20000210 NA
xts1 + xts1_extra
## Data:
## numeric(0)
##
## Index:
## Date of length 0
xts1 + merge(xts1_extra, index(xts1), fill = 0)
## rnorm
## 20000101 -0.6788673
## 20000111 0.8846896
## 20000121 NA
## 20000131 -0.1163698
## 20000210 NA
If the date is already in the original xts index, you have to decide how to merge the values that correspond to the same date.
xts1_extra2 <- xts(-1, order.by = as.Date("2000-01-11"))
colnames(xts1_extra2) <- "rnorm"
rbind(xts1, xts1_extra2)
## rnorm
## 20000101 -0.6788673
## 20000111 0.8846896
## 20000111 -1.0000000
## 20000121 NA
## 20000131 -0.1163698
## 20000210 NA
merge(xts1, xts1_extra2)
## rnorm rnorm.1
## 20000101 -0.6788673 NA
## 20000111 0.8846896 -1
## 20000121 NA NA
## 20000131 -0.1163698 NA
## 20000210 NA NA
For instance, let us assume we want to take the minimum
xts1_merged <- merge(xts1, xts1_extra2)
xts1_merged
## rnorm rnorm.1
## 20000101 -0.6788673 NA
## 20000111 0.8846896 -1
## 20000121 NA NA
## 20000131 -0.1163698 NA
## 20000210 NA NA
min_na <- function(x){
min_value <- ifelse(sum(is.na(x)) == length(x),
NA,
min(x, na.rm = T))
return(min_value)
}
min_na(c(NA, NA))
## [1] NA
min_na(c(1, -0.5))
## [1] -0.5
min_na(c(-1, NA))
## [1] -1
xts1_merged <- xts(apply(coredata(xts1_merged), 1, min_na), order.by = index(xts1_merged))
colnames(xts1_merged) <- "rnorm"
xts1_merged
## rnorm
## 2000-01-01 -0.6788673
## 2000-01-11 -1.0000000
## 2000-01-21 NA
## 2000-01-31 -0.1163698
## 2000-02-10 NA