1 Set Up

rm(list = ls())   # Clear environment 
gc()              # Clear unused memory
##          used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 525337 28.1    1166321 62.3         NA   669282 35.8
## Vcells 965398  7.4    8388608 64.0      32768  1840402 14.1
cat("\f")         # Clear the console
graphics.off()    # Clear the charts

# Load libraries
library(tidyverse)
library(magrittr)
library(stargazer)
library(dplyr)
library(data.table)

2 Load Data

Merge the data based on dates.

getwd()
## [1] "/Users/arvindsharma/Library/CloudStorage/Dropbox/WCAS/investments/Week 6"
# dir()

MSFT   <- read.csv("MSFT.csv")
SP500  <- read.csv("^GSPC.csv")

# merge the data based on date
data <- merge(x = MSFT, 
              y = SP500, 
              by=c("Date")
              )

# keep all the relevant variables in a new dataframe
clean_data <- 
data %>% mutate(MSFT  = Adj.Close.x,
                SP500 = Adj.Close.y,
                DATE  = Date
                ) %>% 
        select(MSFT, SP500, DATE )

Now, we have the clean data with MSFT and SP500 prices, along with DATES in one dataframe.

3 Visualize Data

str(clean_data)
## 'data.frame':    61 obs. of  3 variables:
##  $ MSFT : num  105.1 96.6 99.4 106.6 112.7 ...
##  $ SP500: num  2760 2507 2704 2784 2834 ...
##  $ DATE : chr  "2018-11-01" "2018-12-01" "2019-01-01" "2019-02-01" ...
stargazer(clean_data, 
          type="text"
          )
## 
## ===================================================
## Statistic N    Mean    St. Dev.    Min       Max   
## ---------------------------------------------------
## MSFT      61  226.406   73.459   96.646    339.821 
## SP500     61 3,709.200 647.113  2,506.850 4,766.180
## ---------------------------------------------------

For plotting purposes, I will divide the SP500 price by $10 (so that I can see the movement of the two securities more clearly).

ggplot(data = clean_data, 
       aes(x = as.Date(DATE),
           group = 1)) +
  geom_line(aes(y = MSFT, color = "MSFT")) +
  geom_line(aes(y = SP500/10, color = "SP500")) +  
labs(title = "MSFT Stock& S&P 500 Prices Over Time",
       x = "Monthly Data over the last 5 years",
       y = "USD"
     ) + 
  scale_color_manual(values = c("MSFT" = "blue", "SP500" = "red"),
                     name = "") +  # Set legend title
  theme(legend.title = element_text(color = "purple", size = 12),  # Adjust legend title
        legend.text = element_text(color = "black", size = 10),  # Adjust legend labels
        axis.title.y = element_text(color = "black", size = 12))  # Adjust y-axis title

4 Estimate Beta

4.1 Estimate monthly returns of the two securites

# Convert DATE to a Date object
clean_data$DATE <- as.Date(clean_data$DATE)

# Arrange the data by DATE
clean_data <- clean_data %>% arrange(DATE)

# Calculate the percentage change in MSFT
clean_data <- clean_data %>%
  mutate(MSFT_Percent_Change = (MSFT / shift(MSFT, type = "lag") - 1) * 100)

# Calculate the percentage change in MSFT
clean_data <- clean_data %>%
  mutate(SP500_Percent_Change = (SP500 / shift(SP500, type = "lag") - 1) * 100)

head(clean_data)
##        MSFT   SP500       DATE MSFT_Percent_Change SP500_Percent_Change
## 1 105.06068 2760.17 2018-11-01                  NA                   NA
## 2  96.64635 2506.85 2018-12-01           -8.009019            -9.177689
## 3  99.36772 2704.10 2019-01-01            2.815807             7.868440
## 4 106.59929 2784.49 2019-02-01            7.277582             2.972889
## 5 112.70207 2834.40 2019-03-01            5.724975             1.792426
## 6 124.79984 2945.83 2019-04-01           10.734293             3.931350

4.2 Linear regression

Estimated now (cannot run a regression with the price of the two securities on each other - need the returns).

CAPM <- lm(data = clean_data, 
           formula = MSFT_Percent_Change ~ SP500_Percent_Change)
summary(CAPM)
## 
## Call:
## lm(formula = MSFT_Percent_Change ~ SP500_Percent_Change, data = clean_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.7438 -3.2624 -0.5437  1.9282 11.4762 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            1.2634     0.5417   2.332   0.0232 *  
## SP500_Percent_Change   0.8953     0.1001   8.942 1.65e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.143 on 58 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.5796, Adjusted R-squared:  0.5724 
## F-statistic: 79.97 on 1 and 58 DF,  p-value: 1.651e-12
CAPM$coefficients[2]
## SP500_Percent_Change 
##            0.8952509
  • MSFT is a defensive stock.

Alternatively, use covariance-variance formula for \(\beta_1\) -

cov(y = clean_data$MSFT_Percent_Change,
    x = clean_data$SP500_Percent_Change,
    use = "complete.obs") / var(x = clean_data$SP500_Percent_Change, 
                                use = "complete.obs")
## [1] 0.8952509

Complete.obs is used to get rid of the first missing value in the monthly returns vector of the the two securities.

We get the same exact estimate, of course.

MSFT 5 year (monthly) beta is indeed .9 (rounded).

Picture Source.