< 1. Plotting against Year 2019 >
Let’s take a look at one year, 2019. We will see what the slope of the regression line is for the graph plotting the annual standard deviation, to the annual mean return, for each of the assets.
options("install.lock"=FALSE)
library(tidyr)
library(dplyr)
library(zoo)
library(ggplot2)
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.2.2
library(data.table)
library(knitr)
# Manipulate Data
dat <- read.csv('Extract.csv')
newdat1 <- dat %>% pivot_longer(cols = all_of(colnames(dat)[-c(1,2)]),
names_to = "Assets",
values_to = "Returns",
values_drop_na = FALSE
);newdat1
newdat2 <- newdat1 %>% group_by(Year, Assets) %>%
summarise(mean = prod(1+Returns), sd = sd(Returns)/sqrt(12)); newdat2
# Year 2019, plot of the asset volatility to mean return
newdat2 %>% filter(Year == 2019) %>%
ggplot(mapping = aes(x = sd, y = mean, label = Assets))+
geom_point(size = 1.5)+
geom_smooth(method = 'lm', color = "red", se = FALSE)+
geom_text_repel()+
labs(title = '2019 Risk Return Characteristics across Asset Classes') +
xlab('Annualized Asset Standard Deviation') +
ylab('Annualized Asset Return')
# Ordinary Least Squares
newdat3 <- newdat2 %>% filter(Year == 2019)
model.ols <- newdat3 %>%
lm(formula = mean ~ sd)
as.numeric(model.ols$coefficients[2]) # Slope of 2019
## [1] 15.17556
summary(model.ols)
##
## Call:
## lm(formula = mean ~ sd, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.143312 -0.042177 -0.001856 0.046145 0.113685
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.06274 0.02084 51.005 < 2e-16 ***
## sd 15.17556 2.08935 7.263 2.97e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06731 on 32 degrees of freedom
## Multiple R-squared: 0.6224, Adjusted R-squared: 0.6106
## F-statistic: 52.76 on 1 and 32 DF, p-value: 2.973e-08
# Plots to investigate any assumption violations
par(mfrow = c(2,2))
plot(model.ols)
# Generalized Least Squares to address homoscedasticity
newdat3$resi <- model.ols$residuals
varfunc.ols <- lm(log(resi^2) ~ log(newdat3$sd), data = newdat3)
newdat3$varfunc <- exp(varfunc.ols$fitted.values)
model.gls <- lm(mean ~ sd, weights = 1/sqrt(varfunc), data = newdat3)
summary(model.gls)
##
## Call:
## lm(formula = mean ~ sd, data = newdat3, weights = 1/sqrt(varfunc))
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -0.66929 -0.19790 0.01976 0.22845 0.51367
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0521 0.0178 59.109 < 2e-16 ***
## sd 16.1972 1.9376 8.359 1.5e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3144 on 32 degrees of freedom
## Multiple R-squared: 0.6859, Adjusted R-squared: 0.6761
## F-statistic: 69.88 on 1 and 32 DF, p-value: 1.497e-09
as.numeric(model.gls$coefficients[2]) # Adjusted Slope of 2019
## [1] 16.19715
# Much improved R^2, and variances in the estimators
# Plot
par(mfrow = c(2,2))
plot(model.gls)
# New Plot
eq <- function(x,y) {
m <- lm(mean ~ sd, weights = 1/sqrt(varfunc), data = newdat3)
as.character(
as.expression(
substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
list(a = format(coef(m)[1], digits = 4),
b = format(coef(m)[2], digits = 4),
r2 = format(summary(m)$r.squared, digits = 3)))
)
)
}
newdat3 %>%
ggplot(mapping = aes(x = sd, y = mean, label = Assets))+
geom_point(size = 1.5)+
geom_smooth(method = 'lm', color = "red", se = FALSE)+
geom_text_repel()+
geom_text(x = 0.004, y = 1.35, label = eq(df$wt,df$hp), parse = TRUE) +
labs(title = 'Adjusted 2019 Risk Return Characteristics across Asset Classes') +
xlab('Annualized Asset Standard Deviation') +
ylab('Annualized Asset Return')
## `geom_smooth()` using formula 'y ~ x'
What Was happening in 2019?
Observations: - Mid upper crowdness on stocks and riskier bonds. - Lower left plot shows extreme risk loving behaviour on treasuries resulted by expectations that Fed would decrease interest rate.
<2. Plotting across all years>
2.1. Slope Values and R Squared Values
Now that we have looked at Year 2019 individually, let’s try using the same methodology and plot against all years.
Years <- unique(dat$Year)
Years <- Years[3:length(Years)] # Exclude 2012, 2013 for now.
Slopes <- c()
R_Squared <- c()
for (i in seq(1:length(Years))){
newdat4 <- newdat2 %>% filter(Year == Years[i])
model.ols <- newdat4 %>% lm(formula = mean ~ sd)
newdat4$resi <- model.ols$residuals
varfunc.ols <- lm(log(resi^2) ~ log(newdat4$sd), data = newdat4)
newdat4$varfunc <- exp(varfunc.ols$fitted.values)
model.gls <- lm(mean ~ sd, weights = 1/sqrt(varfunc), data = newdat4)
Slopes[i] <- format(as.numeric(model.gls$coefficients[2]), digits = 3)
R_Squared[i] <- format(summary(model.gls)$r.squared, digits = 3)
}
annual_slopes <- kable(as.data.table(cbind(Years, Slopes, R_Squared))); annual_slopes
| Years | Slopes | R_Squared |
|---|---|---|
| 2014 | 16.9 | 0.421 |
| 2015 | -0.573 | 0.0192 |
| 2016 | 4.49 | 0.172 |
| 2017 | 30 | 0.583 |
| 2018 | -6.71 | 0.742 |
| 2019 | 16.2 | 0.686 |
| 2020 | 6.59 | 0.293 |
| 2021 | 16.9 | 0.461 |
| 2022 | -12.7 | 0.658 |
2.2. Plots from 2014 to 2022
for (i in seq(1:length(Years))){
newdat4 <- newdat2 %>% filter(Year == Years[i])
plot <- newdat4 %>%
ggplot(mapping = aes(x = sd, y = mean, label = Assets))+
geom_point(size = 1.5)+
geom_smooth(method = 'lm', color = "red", se = FALSE)+
geom_text_repel()+
labs(title = paste0('Adjusted ', Years[i], ' Risk Return Characteristics across Asset Classes')) +
xlab('Annualized Asset Standard Deviation') +
ylab('Annualized Asset Return')
print(plot)
}