... using your own words and insert R chunks where appropriate.You will be performing analysis using a tidy workflow on the following stock tickers:
Use daily log returns on adjusted prices (instead of daily settles).
…
library(tidyquant); library(tidyverse)
ticker <- c("QQQ", "GLD", "ALB")
dfraw <- tq_get(ticker, get = "stock.prices", from = "2018-01-01", to = "2019-10-01")
dflong <- dfraw %>% dplyr::mutate(value = log(adjusted)- log(dplyr::lag(adjusted))) %>% na.omit() %>% select(date, symbol, value)
dflong %>% ggplot(aes(x = date, y = value, col = symbol))+ geom_line() + facet_wrap(.~symbol, ncol = 1, scale = "free")+ scale_y_continuous()
# to wide data
dfwide <- dflong %>% pivot_wider(names_from = symbol, values_from = value) %>% na.omit()
…
library(moments)
dflong %>% group_by(symbol) %>% dplyr::summarize(mean=mean(value),
stdv=sd(value),
skew=moments::skewness(value),
kurt=moments::kurtosis(value))
## # A tibble: 3 x 5
## symbol mean stdv skew kurt
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 ALB -0.00158 0.0222 -0.529 5.37
## 2 GLD -0.000700 0.0208 -17.5 347.
## 3 QQQ 0.000433 0.0131 -0.381 5.19
geom_rug(). (4 points)…
library(scales)
dflong %>% group_by(symbol) %>% ggplot(aes(x = value))+
geom_histogram(aes(y =..density..), bins = 300) + facet_wrap(.~symbol, ncol=1)+
geom_rug(aes(x=value, y = 0), position = position_jitter(height = 0.5))
The remainder of the exam questions use ONLY ALB and QQQ. ### Q4. Correlation (10 points)
…
cor <- stats::cor(dfwide$ALB,dfwide$QQQ)
cor
## [1] 0.4795587
library(tibbletime)
cor60 <- tibbletime::rollify(~cor(.x, .y),window=60)
dfroll <- dfwide %>%
dplyr::mutate(cor60= cor60(ALB, QQQ)) %>%
na.omit()
head(dfroll)
## # A tibble: 6 x 5
## date QQQ GLD ALB cor60
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 2018-03-29 0.0181 0.000477 0.0349 0.359
## 2 2018-04-02 -0.0293 0.0116 -0.0416 0.381
## 3 2018-04-03 0.0112 -0.00757 0.0163 0.387
## 4 2018-04-04 0.0156 0.00119 0.0130 0.390
## 5 2018-04-05 0.00568 -0.00515 0.0445 0.392
## 6 2018-04-06 -0.0253 0.00468 -0.0423 0.412
cor60graph <- dfroll %>% ggplot(aes(x = date, y = cor60))+geom_line()
cor60graph
sd60 <- tibbletime::rollify(sd,window=60)
dfrollsd <- tibbletime::as_tbl_time(dflong,index=date) %>%
group_by(symbol) %>%
dplyr::mutate(sd60=sd60(value)*sqrt(252)) %>%
ungroup() %>% na.omit()
dfrollsd %>% dplyr::filter(symbol == "QQQ" | symbol == "ALB" )%>% ggplot(aes(x = date, y = sd60, col = symbol))+ geom_line()
#The answer is A)ALB and QQQ both show heteroskedasticity. This is because their 60 days standard deviation are non-constant.
dfwide %>% ggplot(aes(x = QQQ, y = ALB))+ geom_point()+ stat_smooth(method = "lm")
…
…
fit <- stats::lm(ALB ~ QQQ, data = dfwide)
summary(fit)
##
## Call:
## stats::lm(formula = ALB ~ QQQ, data = dfwide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.098294 -0.010557 0.001043 0.010968 0.079679
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.001737 0.000921 -1.886 0.06 .
## QQQ 0.801556 0.070243 11.411 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01926 on 436 degrees of freedom
## Multiple R-squared: 0.23, Adjusted R-squared: 0.2282
## F-statistic: 130.2 on 1 and 436 DF, p-value: < 2.2e-16
A/ More than 20% of the variance is explained. E/ The beta coefficient is > 0.5.
…be concise max a few bullet points…
library(ggfortify)
autoplot(fit,size = 0.5)
Residuals vs. Fitted plot shows that residual are randomly and equally spread across a horizontal mean of 0. This indicates that ALB and QQQ don’t have a non-linear relationship.
Normal Q-Q shows the residual follow a straight line, which indicates that the residuals are normally distributed
Scale-locations have evenly spread points around a horizontal line. This means that the variance is constant.
Overall, the residuals show that the linear relationship is significant between QQQ and ALB.