library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(readxl)

#install.packages("plotly")
library("plotly")
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library("dplyr")
library("tidyr")
library("purrr")
library("quantmod")
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Version 0.4-0 included new data defaults. See ?getSymbols.
library("magrittr")
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library("scales")
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
library("xts")

### Visualising government bond markets ########################################
gc()                                             # Garbage collection
##           used (Mb) gc trigger  (Mb) limit (Mb) max used (Mb)
## Ncells  982974 52.5    2013118 107.6         NA  1270021 67.9
## Vcells 1645825 12.6    8388608  64.0      16384  2224991 17.0
assign("last.warning", NULL, envir = baseenv())  # Reset list of past warnings

library("pdfetch")   # To obtain data from St. Louis FED
library("xts")       # Extended time series
library("reshape2")  # To reshape data sets
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library("ggplot2")   # For plotting
library("ggthemes")  # Plot themes
library("plotly")    # For 3D plot

### US government bond markets #################################################
gbu <- pdfetch_FRED(c("DGS30", "DGS20", "DGS10", "DGS7", "DGS5", "DGS3",
                      "DGS2", "DGS1", "DGS6MO", "DGS3MO", "DGS1MO"))
names(gbu) <- c("30Y", "20Y", "10Y", "7Y", "5Y", "3Y", "2Y", "1Y",
                "6M", "3M", "1M")


# Static plot
u <- ggplot(melt(fortify(gbu), id = "Index"),
            aes(x = Index, y = value, color = variable)) +
  geom_line(size = 1) +
  scale_color_gdocs("", breaks = names(gbu)) +
  labs(title = "US Treasury yields",
       y = "Percent", x = "Date", caption = "Source: St. Louis FRED")
u
## Warning: This manual palette can handle a maximum of 10 values. You have
## supplied 11.
## Warning: Removed 35296 row(s) containing missing values (geom_path).

ggsave("ustreasuries-yields.png", u, width = 8, height = 4.5, dpi = 300)
## Warning: This manual palette can handle a maximum of 10 values. You have
## supplied 11.

## Warning: Removed 35296 row(s) containing missing values (geom_path).
## 3D plot
p <- plot_ly(z = ~gbu, y = index(gbu), x = names(gbu),
             colors = gdocs_pal()(3)) %>%
  add_surface() %>%
  layout(title = "US Treasury yields<br />Source: St. Louis FRED",
         scene = list(
           xaxis = list(title = "Maturity", gridcolor = "rgb(255, 255, 255)",
                        zerolinecolor = "rgb(255, 255, 255)",
                        showbackground = TRUE,
                        backgroundcolor = "rgb(240, 240,240)"),
           yaxis = list(title = "Date", gridcolor = "rgb(255, 255, 255)",
                        zerolinecolor = "rgb(255, 255, 255)",
                        showbackground = TRUE,
                        backgroundcolor = "rgb(230, 230,230)"),
           zaxis = list(title = "Percent", gridcolor = "rgb(255, 255, 255)",
                        zerolinecolor = "rgb(255, 255, 255)",
                        showbackground = TRUE,
                        backgroundcolor = "rgb(220, 220,220)")
         ))
p
### Japan government bond market ###
source.jgb <- "http://www.mof.go.jp/jgbs/reference/interest_rate/data/jgbcm_all.csv"
jgb1 <- read.csv("jgbcm_all.csv",skip=1)
colnames(jgb1) <- c("Date","1","2","3","4","5","6","7","8","9","10",
                       "15","20","25","30","40")
jgb1$Date <- as.character(jgb1$Date)

# a function to transform Japanese calendar to Western calendar 
ToChristianEra <- function(x)
{
  era  <- substr(x, 1, 1)
  year <- as.numeric(substr(x, 2, nchar(x)))
  if(era == "H"){
    year <- year + 1988
  }else if(era == "S"){
    year <- year + 1925
  }
  as.character(year)
}

# calendar transformation (Japanese to Western)
jgb.day <- strsplit(jgb1[,1],"\\.")
warn.old <- getOption("warn")
options(warn = -1)
jgb.day <- lapply(jgb.day, function(x)c(ToChristianEra(x[1]), x[2:length(x)]))
jgb1[, 1] <- as.Date(sapply(jgb.day, function(x)Reduce(function(y,z)paste(y,z, sep="-"),x)))
jgb1[, -1] <- apply(jgb1[, -1], 2, as.numeric)
options(warn = warn.old)
rm(jgb.day)

# create a xts object
jgb.xts <- as.xts(read.zoo(jgb1))

# 3D plot
jgb.xts["2000::"] %>%
  data.matrix() %>% 
  t() %>%
  plot_ly(
    x=as.Date(index(jgb.xts["2000::"])),
    y=c(1,2,3,4,5,6,7,8,9,10,15,20,25,30,40),
    z=.,
    type="surface"
  ) %>%
  plotly::layout(
    scene=list(
      xaxis=list(title="date"),
      yaxis=list(title="term"),
      zaxis=list(title="yield")
    )
  )
###Nelson and Siegel###
# create dataset
dataset <- jgb1 %>%
  tidyr::gather(key = "maturity", value = "irate",
                "1","2","3","4","5","6","7","8","9","10","15","20","25","30","40")

head(dataset)
##         Date maturity  irate
## 1 1974-09-24        1 10.327
## 2 1974-09-25        1 10.333
## 3 1974-09-26        1 10.340
## 4 1974-09-27        1 10.347
## 5 1974-09-28        1 10.354
## 6 1974-09-30        1 10.368
# define objective function
obj.func <- function(lambda,dataset,time_end,time_start){
  ff1 <- function(m,lambda){
    ((1-exp(-m*lambda))/(m*lambda))
    }
  ff2 <- function(m,lambda){
    ((1-exp(-m*lambda))/(m*lambda)-exp(-m*lambda))
    }
  dataset <- dataset %>%
    mutate(ff1 = ff1(as.numeric(dataset$maturity)*12,lambda), ff2 = ff2(as.numeric(dataset$maturity)*12,lambda))
  results <- lm(0.01*irate ~ ff1 + ff2,data = dataset,subset = (Date>time_start & time_end>Date))
  return(summary(results)$r.squared)
}

optimize(obj.func,interval = c(1/10,1/200),dataset=dataset,time_start="1992-01-01",time_end="2018-09-28",maximum = TRUE)
## $maximum
## [1] 0.06396283
## 
## $objective
## [1] 0.1468459
#$maximum
#[1] 0.06396283

#$objective
#[1] 0.1468459

# 1992-01-01~2009-12-31で推計
optimize(obj.func,interval = c(1/10,1/200),dataset=dataset,time_end="2010-01-01",time_start="1992-01-01",maximum = TRUE)
## $maximum
## [1] 0.06885455
## 
## $objective
## [1] 0.1891524
# 2010-01-01~2018-09-28で推計
optimize(obj.func,interval = c(1/10,1/200),dataset=dataset,time_end="2018-09-28",time_start="2010-01-01",maximum = TRUE)
## $maximum
## [1] 0.02323009
## 
## $objective
## [1] 0.6573314
lambda = 0.02323009
#dataset <- dataset %>%
#  mutate(ff1 = ff1(as.numeric(dataset$maturity)*12,lambda), ff2 = #ff2(as.numeric(dataset$maturity)*12,lambda))

#head(dataset)


#results <- lm(0.01*irate ~ f1 + f2,data = dataset,subset = (Date>"2010-01-01" & #"2018-09-28">Date))
#summary(results)

#maturity <- 1:600
#plot(maturity,results$coefficients[1]+results$coefficients[2]*f1(maturity,lambda)+resu#lts$coefficients[3]*f2(maturity,lambda),type = "l",xlab = "Maturity",ylab = "Yield")

#Firring US treasury rates#

library(quantmod)
# data name collected
symbols.name <- c("10-Year Treasury Constant Maturity Rate","Effective Federal Funds Rate","
Consumer Price Index for All Urban Consumers: All Items","Civilian Unemployment Rate","3-Month Treasury Bill: Secondary Market Rate","Industrial Production Index","
10-Year Breakeven Inflation Rate","Trade Weighted U.S. Dollar Index: Broad, Goods","
Smoothed U.S. Recession Probabilities","Moody's Seasoned Baa Corporate Bond Yield","5-Year, 5-Year Forward Inflation Expectation Rate","Personal Consumption Expenditures")

# Collect economic data
symbols <- c("GS10","FEDFUNDS","CPIAUCSL","UNRATE","TB3MS","INDPRO","T10YIEM","TWEXBMTH","RECPROUSM156N","BAA","T5YIFRM","PCE")
getSymbols(symbols, from = '1980-01-01', src = "FRED", auto.assign = TRUE)
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
## 
## This message is shown once per session and may be disabled by setting 
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
##  [1] "GS10"          "FEDFUNDS"      "CPIAUCSL"      "UNRATE"       
##  [5] "TB3MS"         "INDPRO"        "T10YIEM"       "TWEXBMTH"     
##  [9] "RECPROUSM156N" "BAA"           "T5YIFRM"       "PCE"
macro_indicator <- merge(GS10,FEDFUNDS,CPIAUCSL,UNRATE,TB3MS,INDPRO,T10YIEM,TWEXBMTH,RECPROUSM156N,BAA,T5YIFRM,PCE)
rm(GS10,FEDFUNDS,CPIAUCSL,UNRATE,TB3MS,INDPRO,T10YIEM,TWEXBMTH,RECPROUSM156N,BAA,T5YIFRM,PCE,USEPUINDXD)
## Warning in rm(GS10, FEDFUNDS, CPIAUCSL, UNRATE, TB3MS, INDPRO, T10YIEM, : object
## 'USEPUINDXD' not found
# make dataset
traindata <- na.omit(merge(macro_indicator["2003-01-01::2015-12-31"][,1],stats::lag(macro_indicator["2003-01-01::2015-12-31"][,-1],1)))
testdata  <- na.omit(merge(macro_indicator["2016-01-01::"][,1],stats::lag(macro_indicator["2016-01-01::"][,-1],1)))

# fitting OLS
trial1 <- lm(GS10~.,data = traindata)
summary(trial1)
## 
## Call:
## lm(formula = GS10 ~ ., data = traindata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76226 -0.21361  0.00073  0.21627  0.70430 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   14.3999898  4.3542473   3.307 0.001192 ** 
## FEDFUNDS      -0.1948179  0.1435694  -1.357 0.176932    
## CPIAUCSL      -0.0706252  0.0207560  -3.403 0.000866 ***
## UNRATE        -0.2086999  0.0795652  -2.623 0.009661 ** 
## TB3MS          0.2902683  0.1412132   2.056 0.041647 *  
## INDPRO        -0.0647136  0.0260471  -2.484 0.014128 *  
## T10YIEM        1.1535646  0.1770493   6.515 1.15e-09 ***
## TWEXBMTH      -0.0316560  0.0118119  -2.680 0.008226 ** 
## RECPROUSM156N -0.0098034  0.0020821  -4.708 5.84e-06 ***
## BAA            0.7778495  0.0867024   8.971 1.50e-15 ***
## T5YIFRM       -0.4558682  0.1898630  -2.401 0.017634 *  
## PCE            0.0009139  0.0002473   3.696 0.000312 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2982 on 143 degrees of freedom
## Multiple R-squared:  0.9203, Adjusted R-squared:  0.9141 
## F-statistic:   150 on 11 and 143 DF,  p-value: < 2.2e-16
est.OLS.Y <- predict(trial1,testdata[,-1])
Y <- as.matrix(testdata[,1])
mse.OLS <- sum((Y - est.OLS.Y)^2) / length(Y)
mse.OLS
## [1] 0.1446454
#[1] 0.1446454
# fitting lasso regression
library(glmnet)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loaded glmnet 4.0-2
trial2 <- cv.glmnet(as.matrix(traindata[,-1]),as.matrix(traindata[,1]),family="gaussian",alpha=1)
plot(trial2)

trial2$lambda.min
## [1] 0.001463052
#[1] 0.001214651

coef(trial2,s=trial2$lambda.min)
## 12 x 1 sparse Matrix of class "dgCMatrix"
##                           1
## (Intercept)    7.6144128826
## FEDFUNDS       .           
## CPIAUCSL      -0.0331481731
## UNRATE        -0.1649025233
## TB3MS          0.1250401791
## INDPRO        -0.0519294876
## T10YIEM        1.1982549442
## TWEXBMTH      -0.0121919410
## RECPROUSM156N -0.0089627572
## BAA            0.7381479249
## T5YIFRM       -0.4695926777
## PCE            0.0004393386
est.lasso.Y <- predict(trial2, newx = as.matrix(testdata[,-1]), s = trial2$lambda.min, type = 'response')
mse.lasso <- sum((Y - est.lasso.Y)^2) / length(Y)
mse.lasso
## [1] 0.1044043
#[1] 0.1127012

library(tidyverse)

ggplot(gather(data.frame(actual=Y[,1],lasso_prediction=est.lasso.Y[,1],OLS_prediction=est.OLS.Y,date=as.POSIXct(rownames(Y))),key=data,value=rate,-date),aes(x=date,y=rate, colour=data)) +
  geom_line(size=1.5) +
  scale_x_datetime(breaks = "6 month",date_labels = "%Y-%m") +
  scale_y_continuous(breaks=c(1,1.5,2,2.5,3,3.5),limits = c(1.25,3.5))