#case study : Requirement is to understand the stock price patterns for four corporate sectors and predict their next six months stock price.

Setup Working Directory

getwd()
## [1] "C:/Users/sasim/Desktop/Fall Semester 2021/Business Forecasting/Day3_Date_Time_Ludridate"
setwd("C:/Users/sasim/Desktop/Fall Semester 2021/Business Forecasting/Day3_Date_Time_Ludridate")

Install the Packages:

pacman::p_load(forecast,tidyverse, lubridate, rio, pdfetch, tidyverse, readxl,dygraphs)

Import data.

 hd = rownames_to_column(data.frame(pdfetch_YAHOO("HD", fields = c("close"), 
                                                   from = as.Date("2010-01-01"), to = Sys.Date(), interval = "1m")))
  wba = rownames_to_column(data.frame(pdfetch_YAHOO("WBA", fields = c("close"), 
                                                    from = as.Date("2010-01-01"), to = Sys.Date(), interval = "1m")))
  lmt = rownames_to_column(data.frame(pdfetch_YAHOO("LMT", fields = c("close"), 
                                                    from = as.Date("2010-01-01"), to = Sys.Date(), interval = "1m")))
  pbct = rownames_to_column(data.frame(pdfetch_YAHOO("PBCT", fields = c("close"), 
                                                     from = as.Date("2010-01-01"), to = Sys.Date(), interval = "1m")))

Prepare the data for analysis

#step1:
  
 stocks=cbind(hd,wba[2],lmt[2],pbct[2])

  head(stocks)
##      rowname    HD   WBA   LMT  PBCT
## 1 2010-01-01 28.01 36.05 74.52 16.17
## 2 2010-02-01 31.20 35.24 77.76 15.75
## 3 2010-03-01 32.35 37.09 83.22 15.62
## 4 2010-04-01 35.23 35.15 84.89 15.52
## 5 2010-05-01 33.86 32.04 79.92 13.97
## 6 2010-06-01 28.07 26.70 74.50 13.50
#step2: To project the stock price of these four sectors, we need to bring the stock closing price in to a single vector.
  
        stockdata= stocks %>% pivot_longer(cols=c(HD,WBA,LMT,PBCT), names_to='measure',values_to='values')
        
        head(stockdata)
## # A tibble: 6 x 3
##   rowname    measure values
##   <chr>      <chr>    <dbl>
## 1 2010-01-01 HD        28.0
## 2 2010-01-01 WBA       36.0
## 3 2010-01-01 LMT       74.5
## 4 2010-01-01 PBCT      16.2
## 5 2010-02-01 HD        31.2
## 6 2010-02-01 WBA       35.2
#step3: rename the time variable
        stockdata=stockdata %>% rename(date=rowname)
        head(stockdata)
## # A tibble: 6 x 3
##   date       measure values
##   <chr>      <chr>    <dbl>
## 1 2010-01-01 HD        28.0
## 2 2010-01-01 WBA       36.0
## 3 2010-01-01 LMT       74.5
## 4 2010-01-01 PBCT      16.2
## 5 2010-02-01 HD        31.2
## 6 2010-02-01 WBA       35.2
#step4: As we are working with time-series, make sure the date is in common format across all values.
        
        stockdata$date=ymd(stockdata$date)
#step5: We have monthly data from 10 years, but for our analysis i am limiting the data to only three years.
        stockdata_short = stockdata%>% 
                       filter(date>=today()-years(3))
        
        head(stockdata_short)
## # A tibble: 6 x 3
##   date       measure values
##   <date>     <chr>    <dbl>
## 1 2018-11-01 HD       180. 
## 2 2018-11-01 WBA       84.7
## 3 2018-11-01 LMT      300. 
## 4 2018-11-01 PBCT      16.9
## 5 2018-12-01 HD       172. 
## 6 2018-12-01 WBA       68.3
#step6: To see how these sectors performed over two years. I am defining growth and percent_growth to see the patterns.
        stockdata2 =stockdata_short %>% group_by(measure)%>%
                                  mutate(growth=values-first(values),
                                         growth_percent=(values-first(values))/first(values)*100)
        
        #Note : To calculate the growth, we must pick a point in time and then start analyzing the patterns from that point.
        #In our case, I picked 2019 november as my starting point
        
        head(stockdata2)
## # A tibble: 6 x 5
## # Groups:   measure [4]
##   date       measure values growth growth_percent
##   <date>     <chr>    <dbl>  <dbl>          <dbl>
## 1 2018-11-01 HD       180.     0             0   
## 2 2018-11-01 WBA       84.7    0             0   
## 3 2018-11-01 LMT      300.     0             0   
## 4 2018-11-01 PBCT      16.9    0             0   
## 5 2018-12-01 HD       172.    -8.5          -4.71
## 6 2018-12-01 WBA       68.3  -16.3         -19.3

Visualize the data

# Growth patterns individually.

ggplot(data=stockdata2,mapping=aes(x=date,y=growth,col=measure))+geom_line()

# Percent growth for two years.
stockdata2 %>% ggplot(aes(x = date, y = growth_percent, col = measure)) + 
  geom_line() + 
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# calculate index to make the sectors starting point with same stock price.

stockdata3 = stockdata_short %>% group_by(measure) %>%
  arrange(date) %>%
  mutate(growth = values - first(values), growth_percent = (values - first(values))/first(values)*100, 
         Index = values*100/first(values)
  )

#Visualize the indexed stock price.

head(stockdata3)  
## # A tibble: 6 x 6
## # Groups:   measure [4]
##   date       measure values growth growth_percent Index
##   <date>     <chr>    <dbl>  <dbl>          <dbl> <dbl>
## 1 2018-11-01 HD       180.     0             0    100  
## 2 2018-11-01 WBA       84.7    0             0    100  
## 3 2018-11-01 LMT      300.     0             0    100  
## 4 2018-11-01 PBCT      16.9    0             0    100  
## 5 2018-12-01 HD       172.    -8.5          -4.71  95.3
## 6 2018-12-01 WBA       68.3  -16.3         -19.3   80.7
stockdata3 %>% ggplot(aes(x = date, y = Index, col = measure)) + 
  geom_line()  +
  labs(title = "Corporate Indexed stock prices", subtitle = "Three years data",
       x='Time',
       y='Stock Index price'
       )+
  theme_classic()

#From the projections,its clear that the stock price of all the corporate have increased over the past two years.
#Home depot stock prices have increased in high magnitude in the last two years.

Create Corporate Index:

# we have seen that stock prices of corporate have increased in last two years.
#Now, we will see how these are performing as a group 

stockdata4 = stockdata3 %>% select(date, measure, values) %>%
  pivot_wider(names_from = measure, 
                           values_from = values) 

head(stockdata4)
## # A tibble: 6 x 5
##   date          HD   WBA   LMT  PBCT
##   <date>     <dbl> <dbl> <dbl> <dbl>
## 1 2018-11-01  180.  84.7  300.  16.9
## 2 2018-12-01  172.  68.3  262.  14.4
## 3 2019-01-01  184.  72.3  290.  16.4
## 4 2019-02-01  185.  71.2  309.  17.8
## 5 2019-03-01  192.  63.3  300.  16.4
## 6 2019-04-01  204.  53.6  333.  17.3
#As we are considering the group of companies, I took the average price of the corporate.

stockdata4$BigfatIndex = rowSums(stockdata4[2:5], dims = 1)/4

stockdata4 %>% ggplot(aes(x = date, y = BigfatIndex, col = "darkred")) + 
  geom_line()  +
  labs(title =  "The Corporate Index", subtitle = "Three years data",
       x='Time',
       y='Indexed Price')+
  theme_classic()

Convert the data to time-series and visualize.

first(stockdata4$date)
## [1] "2018-11-01"
last(stockdata4$date)
## [1] "2021-10-01"
BigFatIndex = ts(stockdata4$BigfatIndex, start = c(2018, 11), end = c(2021, 10), frequency = 12)
str(BigFatIndex)
##  Time-Series [1:36] from 2019 to 2022: 146 129 140 146 143 ...
#use the dygraph for visual interactivity.

dygraph(BigFatIndex) %>% dyRangeSelector()

Conclusion:

  • Stock prices increased individually, home depot saw the greatest rise. *stock price of the sectors increased as a group over the last two years.

Forecast:

* Always choose the timeperiod to predict carefully based on business needs.
* As we have data till october 2021.we are going to see the prediction for the next six months.
* As its a seasonal data, we must consider atleast two seasons to be able to predict the data accurately.
autoplot(forecast(auto.arima(BigFatIndex), h = 6))

Conclusions and Recommendations: