#case study : Requirement is to understand the stock price patterns for four corporate sectors and predict their next six months stock price.
Setup Working Directory
getwd()
## [1] "C:/Users/sasim/Desktop/Fall Semester 2021/Business Forecasting/Day3_Date_Time_Ludridate"
setwd("C:/Users/sasim/Desktop/Fall Semester 2021/Business Forecasting/Day3_Date_Time_Ludridate")
Install the Packages:
pacman::p_load(forecast,tidyverse, lubridate, rio, pdfetch, tidyverse, readxl,dygraphs)
Import data.
hd = rownames_to_column(data.frame(pdfetch_YAHOO("HD", fields = c("close"),
from = as.Date("2010-01-01"), to = Sys.Date(), interval = "1m")))
wba = rownames_to_column(data.frame(pdfetch_YAHOO("WBA", fields = c("close"),
from = as.Date("2010-01-01"), to = Sys.Date(), interval = "1m")))
lmt = rownames_to_column(data.frame(pdfetch_YAHOO("LMT", fields = c("close"),
from = as.Date("2010-01-01"), to = Sys.Date(), interval = "1m")))
pbct = rownames_to_column(data.frame(pdfetch_YAHOO("PBCT", fields = c("close"),
from = as.Date("2010-01-01"), to = Sys.Date(), interval = "1m")))
Prepare the data for analysis
#step1:
stocks=cbind(hd,wba[2],lmt[2],pbct[2])
head(stocks)
## rowname HD WBA LMT PBCT
## 1 2010-01-01 28.01 36.05 74.52 16.17
## 2 2010-02-01 31.20 35.24 77.76 15.75
## 3 2010-03-01 32.35 37.09 83.22 15.62
## 4 2010-04-01 35.23 35.15 84.89 15.52
## 5 2010-05-01 33.86 32.04 79.92 13.97
## 6 2010-06-01 28.07 26.70 74.50 13.50
#step2: To project the stock price of these four sectors, we need to bring the stock closing price in to a single vector.
stockdata= stocks %>% pivot_longer(cols=c(HD,WBA,LMT,PBCT), names_to='measure',values_to='values')
head(stockdata)
## # A tibble: 6 x 3
## rowname measure values
## <chr> <chr> <dbl>
## 1 2010-01-01 HD 28.0
## 2 2010-01-01 WBA 36.0
## 3 2010-01-01 LMT 74.5
## 4 2010-01-01 PBCT 16.2
## 5 2010-02-01 HD 31.2
## 6 2010-02-01 WBA 35.2
#step3: rename the time variable
stockdata=stockdata %>% rename(date=rowname)
head(stockdata)
## # A tibble: 6 x 3
## date measure values
## <chr> <chr> <dbl>
## 1 2010-01-01 HD 28.0
## 2 2010-01-01 WBA 36.0
## 3 2010-01-01 LMT 74.5
## 4 2010-01-01 PBCT 16.2
## 5 2010-02-01 HD 31.2
## 6 2010-02-01 WBA 35.2
#step4: As we are working with time-series, make sure the date is in common format across all values.
stockdata$date=ymd(stockdata$date)
#step5: We have monthly data from 10 years, but for our analysis i am limiting the data to only three years.
stockdata_short = stockdata%>%
filter(date>=today()-years(3))
head(stockdata_short)
## # A tibble: 6 x 3
## date measure values
## <date> <chr> <dbl>
## 1 2018-11-01 HD 180.
## 2 2018-11-01 WBA 84.7
## 3 2018-11-01 LMT 300.
## 4 2018-11-01 PBCT 16.9
## 5 2018-12-01 HD 172.
## 6 2018-12-01 WBA 68.3
#step6: To see how these sectors performed over two years. I am defining growth and percent_growth to see the patterns.
stockdata2 =stockdata_short %>% group_by(measure)%>%
mutate(growth=values-first(values),
growth_percent=(values-first(values))/first(values)*100)
#Note : To calculate the growth, we must pick a point in time and then start analyzing the patterns from that point.
#In our case, I picked 2019 november as my starting point
head(stockdata2)
## # A tibble: 6 x 5
## # Groups: measure [4]
## date measure values growth growth_percent
## <date> <chr> <dbl> <dbl> <dbl>
## 1 2018-11-01 HD 180. 0 0
## 2 2018-11-01 WBA 84.7 0 0
## 3 2018-11-01 LMT 300. 0 0
## 4 2018-11-01 PBCT 16.9 0 0
## 5 2018-12-01 HD 172. -8.5 -4.71
## 6 2018-12-01 WBA 68.3 -16.3 -19.3
Visualize the data
# Growth patterns individually.
ggplot(data=stockdata2,mapping=aes(x=date,y=growth,col=measure))+geom_line()

# Percent growth for two years.
stockdata2 %>% ggplot(aes(x = date, y = growth_percent, col = measure)) +
geom_line() +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# calculate index to make the sectors starting point with same stock price.
stockdata3 = stockdata_short %>% group_by(measure) %>%
arrange(date) %>%
mutate(growth = values - first(values), growth_percent = (values - first(values))/first(values)*100,
Index = values*100/first(values)
)
#Visualize the indexed stock price.
head(stockdata3)
## # A tibble: 6 x 6
## # Groups: measure [4]
## date measure values growth growth_percent Index
## <date> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2018-11-01 HD 180. 0 0 100
## 2 2018-11-01 WBA 84.7 0 0 100
## 3 2018-11-01 LMT 300. 0 0 100
## 4 2018-11-01 PBCT 16.9 0 0 100
## 5 2018-12-01 HD 172. -8.5 -4.71 95.3
## 6 2018-12-01 WBA 68.3 -16.3 -19.3 80.7
stockdata3 %>% ggplot(aes(x = date, y = Index, col = measure)) +
geom_line() +
labs(title = "Corporate Indexed stock prices", subtitle = "Three years data",
x='Time',
y='Stock Index price'
)+
theme_classic()

#From the projections,its clear that the stock price of all the corporate have increased over the past two years.
#Home depot stock prices have increased in high magnitude in the last two years.
Create Corporate Index:
# we have seen that stock prices of corporate have increased in last two years.
#Now, we will see how these are performing as a group
stockdata4 = stockdata3 %>% select(date, measure, values) %>%
pivot_wider(names_from = measure,
values_from = values)
head(stockdata4)
## # A tibble: 6 x 5
## date HD WBA LMT PBCT
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 2018-11-01 180. 84.7 300. 16.9
## 2 2018-12-01 172. 68.3 262. 14.4
## 3 2019-01-01 184. 72.3 290. 16.4
## 4 2019-02-01 185. 71.2 309. 17.8
## 5 2019-03-01 192. 63.3 300. 16.4
## 6 2019-04-01 204. 53.6 333. 17.3
#As we are considering the group of companies, I took the average price of the corporate.
stockdata4$BigfatIndex = rowSums(stockdata4[2:5], dims = 1)/4
stockdata4 %>% ggplot(aes(x = date, y = BigfatIndex, col = "darkred")) +
geom_line() +
labs(title = "The Corporate Index", subtitle = "Three years data",
x='Time',
y='Indexed Price')+
theme_classic()

Convert the data to time-series and visualize.
first(stockdata4$date)
## [1] "2018-11-01"
last(stockdata4$date)
## [1] "2021-10-01"
BigFatIndex = ts(stockdata4$BigfatIndex, start = c(2018, 11), end = c(2021, 10), frequency = 12)
str(BigFatIndex)
## Time-Series [1:36] from 2019 to 2022: 146 129 140 146 143 ...
#use the dygraph for visual interactivity.
dygraph(BigFatIndex) %>% dyRangeSelector()
Conclusion:
- Stock prices increased individually, home depot saw the greatest rise. *stock price of the sectors increased as a group over the last two years.
Forecast:
* Always choose the timeperiod to predict carefully based on business needs.
* As we have data till october 2021.we are going to see the prediction for the next six months.
* As its a seasonal data, we must consider atleast two seasons to be able to predict the data accurately.
autoplot(forecast(auto.arima(BigFatIndex), h = 6))

Conclusions and Recommendations:
- The stockprice of this corporate index is evenly poised and subjected to seasonal rise in the next six months.
- The average index price may vary between 170 and 230 at 90% confidence interval.