These functions reads a file without a TS structure and then defines the TS object.
# Read from a raw csv file
raw.data <- read.table("data.us.csv", sep = ",", header = T)
# When you have the dates in the original csv file
xts.data <- xts(raw.data, order.by = as.Date(raw.data$date, "%m/%d/%Y"))
# When you don't have the dates in the original csv file but know the starting date
date = seq(as.Date("1960/3/1"), by = "3 month", length.out = nrow(raw.data))
xts.data <- xts(raw.data[,-1], order.by = date, frequency = 3)
This function reads and declares the TS structure from the begining.
# Note that this is a TS with a zoo structure
ts.data <- read.zoo("data.us.csv", index.column = 1, sep = ",", header = T, format = "%m/%d/%Y")
# Or...
ts.data <- ts(raw.data[,2:4], frequency = 4, start = c(1960,1))
# One can convert the TS-zoo into a xts...
xts.data <- as.xts(ts.data)
There are two main ways to get data into R: get the data into Excel or a csv or download for an online source. There are built-in package to get the data directly for the web in a predefined format. The table below shows the most popular sources and packages that one can use.
| Sources | R-Package | Web Pages |
|---|---|---|
| Yahoo, FRED, Google, Onda | quantmod |
Link |
| International Monetary Fund (IMF)1 | IMFData or imfr |
Link |
| World Bank’s WDI | WDI |
Link |
| OECD2 | rsdmx |
Link |
| Penn World Tables | pwt |
Link |
| International Labor Organization (ILO) | rsdmx |
Link |
One can use the getSymbols function with a previous search in the web pages and download directly into R.
getSymbols("GDPC1", src = "FRED")
getSymbols("PCEPILFE", src = "FRED")
getSymbols("FEDFUNDS", src = "FRED")
names(GDPC1) <- "US Real GDP"
names(PCEPILFE) <- "Core PCE"
names(FEDFUNDS) <- "FED Rate"
# Federal funds rate, montly data from January 1980 to March
FEDFUNDS["1980-01-01/1980-03-01"]
## FED Rate
## 1980-01-01 13.82
## 1980-02-01 14.13
## 1980-03-01 17.19
# Real GDP, quarterly data, for in 2006
GDPC1["2006"]
## US Real GDP
## 2006-01-01 14546.12
## 2006-04-01 14589.58
## 2006-07-01 14602.63
## 2006-10-01 14716.93
# End of period inflation rate from 2000 to 2005
PCEPILFE[format(index(PCEPILFE["2000/2005"]), "%m") %in% "12"]
## Core PCE
## 1959-12-01 17.948
## 1960-12-01 18.191
## 1961-12-01 18.408
## 1962-12-01 18.635
## 1963-12-01 18.941
## 1964-12-01 19.191
# Set missings into the series...
gdp.miss <- GDPC1["2000/2002"]
gdp.miss["2001"] <- NA
# Identify the NAs
gdp.miss[is.na(gdp.miss)]
# Show numbers without NAs
na.omit(gdp.miss)
# Fill missing with the last observarion or with the first non-missing
# observation
cbind(gdp.miss, na.locf(gdp.miss), na.locf(gdp.miss, fromLast = T))
## US.Real.GDP US.Real.GDP.1 US.Real.GDP.2
## 2000-01-01 12359.09 12359.09 12359.09
## 2000-04-01 12592.53 12592.53 12592.53
## 2000-07-01 12607.68 12607.68 12607.68
## 2000-10-01 12679.34 12679.34 12679.34
## 2001-01-01 NA 12679.34 12822.26
## 2001-04-01 NA 12679.34 12822.26
## 2001-07-01 NA 12679.34 12822.26
## 2001-10-01 NA 12679.34 12822.26
## 2002-01-01 12822.26 12822.26 12822.26
## 2002-04-01 12893.00 12893.00 12893.00
## 2002-07-01 12955.77 12955.77 12955.77
## 2002-10-01 12964.02 12964.02 12964.02
# Fill missing values with linear interpolation and bubic spline
cbind(gdp.miss, na.approx(gdp.miss), na.spline(gdp.miss, method = "fmm"))
## US.Real.GDP US.Real.GDP.1 US.Real.GDP.2
## 2000-01-01 12359.09 12359.09 12359.09
## 2000-04-01 12592.53 12592.53 12592.53
## 2000-07-01 12607.68 12607.68 12607.68
## 2000-10-01 12679.34 12679.34 12679.34
## 2001-01-01 NA 12708.11 12736.04
## 2001-04-01 NA 12736.26 12758.66
## 2001-07-01 NA 12764.71 12767.68
## 2001-10-01 NA 12793.49 12782.28
## 2002-01-01 12822.26 12822.26 12822.26
## 2002-04-01 12893.00 12893.00 12893.00
## 2002-07-01 12955.77 12955.77 12955.77
## 2002-10-01 12964.02 12964.02 12964.02
| Transformation | Command |
|---|---|
| Logarithm | log(y) |
| Lag: \(L^{n} y_{t} = y_{t-1}\) | lag(y,n) |
| Difference: \(\Delta y_{t} = y_{t} - y_{t-1}\) | diff(y) |
| Moving average: \(\bar{y}^{n}_{t} = \frac{1}{n} \sum^{n-1}_{i=0} y_{t-i}\) | rollapply(y, n, FUN = mean) |
| Cumulative sum: \(y^{s}_{t} = \sum^{t}_{i=0} y_{i}\) | cumsum(y) |
# Transformations
xts.gdp$lgdp <- log(xts.gdp$gdp)
xts.gdp$lgdp_1 <- lag(xts.gdp$lgdp, 1)
xts.gdp$dlgdp <- diff(xts.gdp$lgdp)
xts.gdp$mov.avg5_lgdp <- rollapply(xts.gdp$lgdp, 5, FUN = mean)
xts.gdp$cu.sum_lgdp <- cumsum(xts.gdp$lgdp)
# Get a date index on a lower frequency
periodicity(xts.gdp)
## Quarterly periodicity from 1947-03-01 to 2018-03-01
years <- endpoints(xts.gdp, on = "years")
# Aggregate to first/end of period
xts.gdp.a.firs <- period.apply(xts.gdp, INDEX = years, FUN = first)
xts.gdp.a.last <- period.apply(xts.gdp, INDEX = years, FUN = last)
# Aggregate to average of period
xts.gdp.a.mean <- period.apply(xts.gdp, INDEX = years, FUN = mean)
# Aggregate to sum of period
xts.gdp.a.sum <- period.apply(xts.gdp, INDEX = years, FUN = sum)
# Aggregate to min/max of period
xts.gdp.a.min <- period.apply(xts.gdp, INDEX = years, FUN = min)
xts.gdp.a.max <- period.apply(xts.gdp, INDEX = years, FUN = max)
# Putting all together...
cbind(xts.gdp["2000/2001"], xts.gdp.a.firs["2000/2001"], xts.gdp.a.last["2000/2001"],
xts.gdp.a.mean["2000/2001"], xts.gdp.a.sum["2000/2001"], xts.gdp.a.min["2000/2001"],
xts.gdp.a.max["2000/2001"])
## QRT.GDP FOP.GDP EOP.GDP AVG.GDP SUM.GDP MIN.GDP MAX.GDP
## 2000-03-01 12359.09 NA NA NA NA NA NA
## 2000-06-01 12592.53 NA NA NA NA NA NA
## 2000-09-01 12607.68 NA NA NA NA NA NA
## 2000-12-01 12679.34 12359.09 12679.34 12559.66 50238.64 12359.09 12679.34
## 2001-03-01 12643.28 NA NA NA NA NA NA
## 2001-06-01 12710.30 NA NA NA NA NA NA
## 2001-09-01 12670.11 NA NA NA NA NA NA
## 2001-12-01 12705.27 12643.28 12705.27 12682.24 50728.96 12643.28 12710.30
# Aggregate data to quarterly averages
quarts <- endpoints(xts.inf, on = "quarters")
xts.inf.q.avg <- period.apply(xts.inf, INDEX = quarts, FUN = mean)
# Merge monthly and quarterly data
xts.inf <- merge(xts.inf, xts.inf.q.avg, join = "left")
colnames(xts.inf) <- c("EOP.inf", "AVG.inf")
xts.inf["2001"]
## EOP.inf AVG.inf
## 2001-01-01 85.683 NA
## 2001-02-01 85.814 NA
## 2001-03-01 85.898 85.79833
## 2001-04-01 86.044 NA
## 2001-05-01 86.074 NA
## 2001-06-01 86.278 86.13200
## 2001-07-01 86.503 NA
## 2001-08-01 86.554 NA
## 2001-09-01 86.078 86.37833
## 2001-10-01 86.694 NA
## 2001-11-01 86.869 NA
## 2001-12-01 86.888 86.81700
# Merge two series and exclude the missing cases from both sides
merge(xts.gdp["2001"], xts.inf["2001"], join = "inner")
## QRT.GDP EOP.inf AVG.inf
## 2001-03-01 12643.28 85.898 85.79833
## 2001-06-01 12710.30 86.278 86.13200
## 2001-09-01 12670.11 86.078 86.37833
## 2001-12-01 12705.27 86.888 86.81700
# Plot separate series under the zoo TS structure
plot(ts.data[,c(1:2)], plot.type = "multiple",
col = c("blue","red"),
lty = c(1,1), lwd = c(2,2),
main = "",
ylab = c("FED Rate","Inflation"),
xlab = "Date")
legend(x = "topright",
legend = c("FED Rate","Inflation"),
col = c("blue","red"), lty = c(1,1), lwd = c(2,2))
# Plot series together under the zoo TS structure
plot(ts.data[,c(1:2)], plot.type = "single", ylim = c(0,20),
col = c("blue","red"),
lty = c(1,1), lwd = c(2,2),
ylab = "Percentage points",
xlab = "Date")
legend(x = "topright",
legend = c("Fed Rate","Inflation"),
col = c("blue","red"), lty = c(1,1), lwd = c(2,2))
ggplot() +
geom_line(data = xts.data, aes(x = Index, y = ffr, color = "Fed Rate"), linetype = 1, size = 1) +
geom_line(data = xts.data, aes(x = Index, y = infl, color = "Inflation"), linetype = 1, size = 1) +
scale_color_manual(labels = c("Fed Rate","Inflation"),
breaks = c("Fed Rate","Inflation"),
values = c("Fed Rate"="red","Inflation"="blue")) +
scale_y_continuous(limits=c(0,20), breaks=seq(0,20,5)) +
scale_x_date(limits = as.Date(c("1960-03-01","2018-03-01")), date_breaks = "10 years", date_labels = "%Y") +
theme_hc() +
theme(legend.position = c(0.82,0.85),
legend.direction = "horizontal",
legend.background = element_rect(fill="transparent"),
panel.grid.major.y = element_line(size = 0.1, colour = "grey", linetype = 3),
panel.grid.major.x = element_line(colour = "transparent"),
panel.grid.minor.x = element_line(colour = "transparent")) +
labs(x = "", y = "", color = "",
title = "Federal Funds Rate and PCE Inflation",
subtitle = "Percentage points",
caption = "Source: U.S. Bureau of Economic Analysis.")