For this assignment I picked Bank of America from yahoo finance https://finance.yahoo.com/quote/BAC/history?p=BAC
#BOA - Historical Prices
#Determining the numbers of observations
#By using the "str" function, we already know the number of observation, but there is another way to find out:
BOAdata$Date <- as.Date(BOAdata$Date) # $Date is because Date is a column
print(paste("we have",
length(BOAdata$Date),
"observations", sep = " "))
## [1] "we have 503 observations"
#There are 365 days in a year and therefore, not every single day is listed.
#What days are missing ? Probably weekends so let's find out:
table(weekdays(BOAdata$Date), useNA = "a")
##
## Friday Monday Thursday Tuesday Wednesday <NA>
## 102 97 101 103 100 0
#Looking for the first and last date of our dataset
print(paste("The first date is", head(BOAdata$Date, 1),
"and the last date is", tail(BOAdata$Date, 1), sep = " "))
## [1] "The first date is 2018-07-02 and the last date is 2020-06-30"
#If we just want the last 10 dates for example the code would be print(paste(tail(BOAdata$Date, 10))
#Finding the mean of the “adjusting closing price”
BOAdata %>%
mutate(Weekday = weekdays(Date)) %>%
group_by(Weekday) %>%
summarize(Mean_Adj.Close = mean(Adj.Close))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 2
## Weekday Mean_Adj.Close
## <chr> <dbl>
## 1 Friday 28.0
## 2 Monday 27.9
## 3 Thursday 27.9
## 4 Tuesday 28.1
## 5 Wednesday 27.9
#As we can see, there is little variation in our results.
#Forecasting (pt1) - creating a TS function
#ts_cp = Time Series for close price
#frequency = 365 because it's daily)
ts_closeprice <- ts(BOAdata$Adj.Close,
start = c(2018,
as.numeric(format(as.Date("2018-07-02"), "%j"))),
frequency = 365)
## How to know what day of year date x is
print(as.numeric(format(as.Date("2018-07-02"), "%j")))
## [1] 183
#Plotting the trend of our data
ggplot(BOAdata, aes(x =as.POSIXct(Date) , y = Adj.Close)) +
geom_line() +
xlab("Year") +
ylab("Adjusted Closing Price in USD") +
ggtitle("BOA Daily Adjusted Closing Price") +
ggthemes::theme_economist_white()
#Spliting the data into training and testing
train_ts <- window(ts_closeprice, end = c(2019,2))
test_ts <- window(ts_closeprice, start = c(2019,1))
#fit our linear model of the training set against the trend and see the results
x <- tslm(train_ts ~ trend)
print(summary(x))
##
## Call:
## tslm(formula = train_ts ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1725 -1.2127 0.7017 1.3667 2.6058
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.825760 0.255478 112.830 < 2e-16 ***
## trend -0.014121 0.002382 -5.928 1.5e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.73 on 183 degrees of freedom
## Multiple R-squared: 0.1611, Adjusted R-squared: 0.1565
## F-statistic: 35.14 on 1 and 183 DF, p-value: 1.499e-08
#Results of our linear model
x_forecasts <- forecast(x, h = 30)
print(accuracy(x_forecasts, test_ts))
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.858000e-17 1.721024 1.445664 -0.4232204 5.435470 NaN 0.9673308
## Test set 2.737326e+00 2.886191 2.737326 9.4424313 9.442431 NaN 0.8131647
## Theil's U
## Training set NA
## Test set 8.389265
#Residual of linear model
hist(x$residuals, col = "pink")
#Forecasting results - 1st method
#predict next 30
t=c(1:503)#define time variable
mod=lm(Close~t,data=BOAdata) #Close est une colonne
pred=forecast(mod,newdata=data.frame(t=503:533))
autoplot(pred)+ggtitle("Forecasts from LM")
#the expected close price for the upcoming 30 days should range from 23 to 32
#Forecasting results - 2nd method
x_forecasts %>% forecast(method="naive") %>%
autoplot() + ylab("Adjusting closing price in USD")
#For some reason it predicts the result of 2019; that is unexpected; is there an error somewhere ?