For this assignment I picked Bank of America from yahoo finance https://finance.yahoo.com/quote/BAC/history?p=BAC

#BOA - Historical Prices

#Determining the numbers of observations

#By using the "str" function, we already know the number of observation, but there is another way to find out:

BOAdata$Date <- as.Date(BOAdata$Date)  # $Date is because Date is a column

print(paste("we have",
      length(BOAdata$Date), 
        "observations", sep = " "))
## [1] "we have 503 observations"
#There are 365 days in a year and therefore, not every single day is listed.
#What days are missing ? Probably weekends so let's find out:

table(weekdays(BOAdata$Date), useNA = "a")
## 
##    Friday    Monday  Thursday   Tuesday Wednesday      <NA> 
##       102        97       101       103       100         0

#Looking for the first and last date of our dataset

print(paste("The first date is", head(BOAdata$Date, 1), 
            "and the last date is", tail(BOAdata$Date, 1), sep = " "))
## [1] "The first date is 2018-07-02 and the last date is 2020-06-30"
#If we just want the last 10 dates for example the code would be print(paste(tail(BOAdata$Date, 10))

#Finding the mean of the “adjusting closing price”

BOAdata %>% 
  mutate(Weekday = weekdays(Date)) %>% 
  group_by(Weekday) %>% 
  summarize(Mean_Adj.Close = mean(Adj.Close))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 2
##   Weekday   Mean_Adj.Close
##   <chr>              <dbl>
## 1 Friday              28.0
## 2 Monday              27.9
## 3 Thursday            27.9
## 4 Tuesday             28.1
## 5 Wednesday           27.9
#As we can see, there is little variation in our results.

#Forecasting (pt1) - creating a TS function

#ts_cp = Time Series for close price
#frequency = 365 because it's daily)

ts_closeprice <- ts(BOAdata$Adj.Close,
                     start = c(2018, 
                             as.numeric(format(as.Date("2018-07-02"), "%j"))),
                     frequency = 365)

## How to know what day of year date x is

print(as.numeric(format(as.Date("2018-07-02"), "%j")))
## [1] 183

#Plotting the trend of our data

ggplot(BOAdata, aes(x =as.POSIXct(Date) , y = Adj.Close)) +
  geom_line() +
  xlab("Year") +
  ylab("Adjusted Closing Price in USD") +
  ggtitle("BOA Daily Adjusted Closing Price") +
  ggthemes::theme_economist_white()

#Spliting the data into training and testing

train_ts <- window(ts_closeprice, end = c(2019,2))

test_ts <- window(ts_closeprice, start = c(2019,1))

#fit our linear model of the training set against the trend and see the results

x <- tslm(train_ts ~ trend)

print(summary(x))
## 
## Call:
## tslm(formula = train_ts ~ trend)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.1725 -1.2127  0.7017  1.3667  2.6058 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 28.825760   0.255478 112.830  < 2e-16 ***
## trend       -0.014121   0.002382  -5.928  1.5e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.73 on 183 degrees of freedom
## Multiple R-squared:  0.1611, Adjusted R-squared:  0.1565 
## F-statistic: 35.14 on 1 and 183 DF,  p-value: 1.499e-08
#Results of our linear model
x_forecasts <- forecast(x, h = 30)

print(accuracy(x_forecasts, test_ts))
##                        ME     RMSE      MAE        MPE     MAPE MASE      ACF1
## Training set 1.858000e-17 1.721024 1.445664 -0.4232204 5.435470  NaN 0.9673308
## Test set     2.737326e+00 2.886191 2.737326  9.4424313 9.442431  NaN 0.8131647
##              Theil's U
## Training set        NA
## Test set      8.389265

#Residual of linear model

 hist(x$residuals, col = "pink")

#Forecasting results - 1st method

#predict next 30
t=c(1:503)#define time variable
mod=lm(Close~t,data=BOAdata)  #Close est une colonne
pred=forecast(mod,newdata=data.frame(t=503:533))

autoplot(pred)+ggtitle("Forecasts from LM")

#the expected close price for the upcoming 30 days should range from 23 to 32

#Forecasting results - 2nd method

x_forecasts %>% forecast(method="naive") %>%
  autoplot() + ylab("Adjusting closing price in USD")

#For some reason it predicts the result of 2019; that is unexpected; is there an error somewhere ?