rm(list = ls()) # Clear environment
gc() # Clear unused memory
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 525337 28.1 1166321 62.3 NA 669282 35.8
## Vcells 965398 7.4 8388608 64.0 32768 1840402 14.1
cat("\f") # Clear the console
graphics.off() # Clear the charts
# Load libraries
library(tidyverse)
library(magrittr)
library(stargazer)
library(dplyr)
library(data.table)
Merge the data based on dates.
getwd()
## [1] "/Users/arvindsharma/Library/CloudStorage/Dropbox/WCAS/investments/Week 6"
# dir()
MSFT <- read.csv("MSFT.csv")
SP500 <- read.csv("^GSPC.csv")
# merge the data based on date
data <- merge(x = MSFT,
y = SP500,
by=c("Date")
)
# keep all the relevant variables in a new dataframe
clean_data <-
data %>% mutate(MSFT = Adj.Close.x,
SP500 = Adj.Close.y,
DATE = Date
) %>%
select(MSFT, SP500, DATE )
Now, we have the clean data with MSFT and
SP500 prices, along with DATES in one
dataframe.
str(clean_data)
## 'data.frame': 61 obs. of 3 variables:
## $ MSFT : num 105.1 96.6 99.4 106.6 112.7 ...
## $ SP500: num 2760 2507 2704 2784 2834 ...
## $ DATE : chr "2018-11-01" "2018-12-01" "2019-01-01" "2019-02-01" ...
stargazer(clean_data,
type="text"
)
##
## ===================================================
## Statistic N Mean St. Dev. Min Max
## ---------------------------------------------------
## MSFT 61 226.406 73.459 96.646 339.821
## SP500 61 3,709.200 647.113 2,506.850 4,766.180
## ---------------------------------------------------
For plotting purposes, I will divide the SP500 price by $10 (so that I can see the movement of the two securities more clearly).
ggplot(data = clean_data,
aes(x = as.Date(DATE),
group = 1)) +
geom_line(aes(y = MSFT, color = "MSFT")) +
geom_line(aes(y = SP500/10, color = "SP500")) +
labs(title = "MSFT Stock& S&P 500 Prices Over Time",
x = "Monthly Data over the last 5 years",
y = "USD"
) +
scale_color_manual(values = c("MSFT" = "blue", "SP500" = "red"),
name = "") + # Set legend title
theme(legend.title = element_text(color = "purple", size = 12), # Adjust legend title
legend.text = element_text(color = "black", size = 10), # Adjust legend labels
axis.title.y = element_text(color = "black", size = 12)) # Adjust y-axis title
# Convert DATE to a Date object
clean_data$DATE <- as.Date(clean_data$DATE)
# Arrange the data by DATE
clean_data <- clean_data %>% arrange(DATE)
# Calculate the percentage change in MSFT
clean_data <- clean_data %>%
mutate(MSFT_Percent_Change = (MSFT / shift(MSFT, type = "lag") - 1) * 100)
# Calculate the percentage change in MSFT
clean_data <- clean_data %>%
mutate(SP500_Percent_Change = (SP500 / shift(SP500, type = "lag") - 1) * 100)
head(clean_data)
## MSFT SP500 DATE MSFT_Percent_Change SP500_Percent_Change
## 1 105.06068 2760.17 2018-11-01 NA NA
## 2 96.64635 2506.85 2018-12-01 -8.009019 -9.177689
## 3 99.36772 2704.10 2019-01-01 2.815807 7.868440
## 4 106.59929 2784.49 2019-02-01 7.277582 2.972889
## 5 112.70207 2834.40 2019-03-01 5.724975 1.792426
## 6 124.79984 2945.83 2019-04-01 10.734293 3.931350
Estimated now (cannot run a regression with the price of the two securities on each other - need the returns).
CAPM <- lm(data = clean_data,
formula = MSFT_Percent_Change ~ SP500_Percent_Change)
summary(CAPM)
##
## Call:
## lm(formula = MSFT_Percent_Change ~ SP500_Percent_Change, data = clean_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.7438 -3.2624 -0.5437 1.9282 11.4762
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2634 0.5417 2.332 0.0232 *
## SP500_Percent_Change 0.8953 0.1001 8.942 1.65e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.143 on 58 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5796, Adjusted R-squared: 0.5724
## F-statistic: 79.97 on 1 and 58 DF, p-value: 1.651e-12
CAPM$coefficients[2]
## SP500_Percent_Change
## 0.8952509
MSFT is a defensive stock.Alternatively, use covariance-variance formula for \(\beta_1\) -
cov(y = clean_data$MSFT_Percent_Change,
x = clean_data$SP500_Percent_Change,
use = "complete.obs") / var(x = clean_data$SP500_Percent_Change,
use = "complete.obs")
## [1] 0.8952509
Complete.obs is used to get rid of the first missing
value in the monthly returns vector of the the two securities.
We get the same exact estimate, of course.