rm(list = ls()) 

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8      ✔ TTR                  0.24.4
## ✔ quantmod             0.4.26     ✔ xts                  0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date()                 masks base::as.Date()
## ✖ zoo::as.Date.numeric()         masks base::as.Date.numeric()
## ✖ dplyr::filter()                masks stats::filter()
## ✖ xts::first()                   masks dplyr::first()
## ✖ dplyr::lag()                   masks stats::lag()
## ✖ xts::last()                    masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary()            masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)

data <- read_csv("capm.csv")
## Rows: 2363 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Date
## dbl (5): Close-tbill, Close-sp500, Close-msft, Close-ge, Close-ford
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 6
##   Date      `Close-tbill` `Close-sp500` `Close-msft` `Close-ge` `Close-ford`
##   <chr>             <dbl>         <dbl>        <dbl>      <dbl>        <dbl>
## 1 1993/11/1          3.06          469.         2.52       6.72         8.16
## 2 1993/11/2          3.12          468.         2.5        6.69         8.31
## 3 1993/11/3          3.08          463.         2.45       6.64         8.24
## 4 1993/11/4          3.07          457.         2.38       6.46         7.98
## 5 1993/11/5          3.07          460.         2.45       6.49         7.94
## 6 1993/11/8          3.06          460.         2.45       6.46         8.02
dat <- read_csv("capm.csv") %>%
  mutate(Date = as.Date(as.character(Date), "%Y/%m/%d")) %>%
  filter(Date >= as.Date("1993-11-01") & Date <= as.Date("1998-11-30")) %>%
  rename(
    rf = `Close-tbill`,
    sp500 = `Close-sp500`,
    msft = `Close-msft`,
    ge = `Close-ge`,
    ford = `Close-ford`
  ) %>%
  # Convert risk-free rate into daily returns
  mutate(rf = rf / (100 * 360))
## Rows: 2363 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Date
## dbl (5): Close-tbill, Close-sp500, Close-msft, Close-ge, Close-ford
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(dat)  
## # A tibble: 6 × 6
##   Date              rf sp500  msft    ge  ford
##   <date>         <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1993-11-01 0.000085   469.  2.52  6.72  8.16
## 2 1993-11-02 0.0000867  468.  2.5   6.69  8.31
## 3 1993-11-03 0.0000856  463.  2.45  6.64  8.24
## 4 1993-11-04 0.0000853  457.  2.38  6.46  7.98
## 5 1993-11-05 0.0000853  460.  2.45  6.49  7.94
## 6 1993-11-08 0.000085   460.  2.45  6.46  8.02
library(dplyr)
library(tidyr)
library(tidyquant)

ret4 <- dat %>%
  select(-rf) %>% 
  pivot_longer(cols = -Date, names_to = "stock", values_to = "price") %>%  # Replaced gather() with pivot_longer()
  group_by(stock) %>%
  tq_transmute(
    mutate_fun = periodReturn,
    period = "daily",
    type = "arithmetic",
    col_rename = "daily_returns"
  ) %>%
  ungroup() %>%
  pivot_wider(names_from = stock, values_from = daily_returns) %>%  # Replaced spread() with pivot_wider()
  bind_cols(., rf = dat$rf) %>%
  # Subtract each return by risk-free rate
  mutate(
    ford_rf = ford - rf,
    ge_rf = ge - rf,
    msft_rf = msft - rf,
    sp500_rf = sp500 - rf
  ) %>%
  # Delete the first row with 0 data
  slice(-1) %>%
  select(Date, ends_with("rf"))  # Selecting only the columns ending with "rf"

# View the result
head(ret4)
## # A tibble: 6 × 6
##   Date              rf  ford_rf    ge_rf   msft_rf  sp500_rf
##   <date>         <dbl>    <dbl>    <dbl>     <dbl>     <dbl>
## 1 1993-11-02 0.0000867  0.0183  -0.00455 -0.00802  -0.00149 
## 2 1993-11-03 0.0000856 -0.00851 -0.00756 -0.0201   -0.0117  
## 3 1993-11-04 0.0000853 -0.0316  -0.0272  -0.0287   -0.0120  
## 4 1993-11-05 0.0000853 -0.00510  0.00456  0.0293    0.00446 
## 5 1993-11-08 0.000085   0.00999 -0.00471 -0.000085  0.00131 
## 6 1993-11-09 0.0000864 -0.0101   0.00765 -0.00417   0.000174
# Perform regression with the correct formula
ret4.reg <- lm(cbind(msft_rf, ge_rf, ford_rf) ~ sp500_rf, data = ret4)

# View the result of the regression
summary(ret4.reg)
## Response msft_rf :
## 
## Call:
## lm(formula = msft_rf ~ sp500_rf, data = ret4)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.056456 -0.010372 -0.001143  0.009762  0.087597 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0012339  0.0004747   2.599  0.00945 ** 
## sp500_rf    1.2823822  0.0531051  24.148  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01692 on 1275 degrees of freedom
## Multiple R-squared:  0.3138, Adjusted R-squared:  0.3133 
## F-statistic: 583.1 on 1 and 1275 DF,  p-value: < 2.2e-16
## 
## 
## Response ge_rf :
## 
## Call:
## lm(formula = ge_rf ~ sp500_rf, data = ret4)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.032199 -0.006674 -0.000136  0.006388  0.036949 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0003448  0.0002684   1.285    0.199    
## sp500_rf    1.1981993  0.0300256  39.906   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.009569 on 1275 degrees of freedom
## Multiple R-squared:  0.5554, Adjusted R-squared:  0.555 
## F-statistic:  1592 on 1 and 1275 DF,  p-value: < 2.2e-16
## 
## 
## Response ford_rf :
## 
## Call:
## lm(formula = ford_rf ~ sp500_rf, data = ret4)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.063903 -0.009529 -0.001148  0.008793  0.075596 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0002996  0.0004423   0.677    0.498    
## sp500_rf    1.0250995  0.0494758  20.719   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01577 on 1275 degrees of freedom
## Multiple R-squared:  0.2519, Adjusted R-squared:  0.2513 
## F-statistic: 429.3 on 1 and 1275 DF,  p-value: < 2.2e-16
# Bind the columns for msft_rf, ge_rf, and ford_rf together
head(cbind(ret4$msft_rf, ret4$ge_rf, ret4$ford_rf))
##              [,1]         [,2]         [,3]
## [1,] -0.008023175 -0.004550952  0.018295686
## [2,] -0.020085556 -0.007559397 -0.008509142
## [3,] -0.028656706 -0.027193712 -0.031638676
## [4,]  0.029326487  0.004558685 -0.005097809
## [5,] -0.000085000 -0.004707496  0.009990567
## [6,] -0.004168022  0.007653549 -0.010061451
b_hat<-ret4.reg$coefficients
b_hat
##                 msft_rf       ge_rf      ford_rf
## (Intercept) 0.001233949 0.000344806 0.0002995505
## sp500_rf    1.282382154 1.198199332 1.0250994574
# Calculate the diagonal of the covariance matrix of the residuals from the regression model
diagD_hat <- ret4.reg$residuals %>%
  cov() %>%
  diag()

# View the result
diagD_hat
##      msft_rf        ge_rf      ford_rf 
## 2.861862e-04 9.148714e-05 2.484053e-04
# Extract the variance of sp500_rf
sigm2 <- var(ret4$sp500_rf)

# Extract the regression coefficients from the model
b_hat <- coef(ret4.reg)

# Calculate the diagonal of the covariance matrix of residuals
diagD_hat <- cov(ret4.reg$residuals) %>% diag()

# Calculate omega
omega <- sigm2 * t(b_hat) %*% b_hat + diagD_hat

# View the result
omega
##              msft_rf        ge_rf      ford_rf
## msft_rf 0.0004170745 0.0004084821 0.0003908145
## ge_rf   0.0002137831 0.0002057549 0.0001892470
## ford_rf 0.0003530335 0.0003461651 0.0003320421
# Define the length of Y (the number of observations)
n = length(ret4$msft_rf)

# Create a column of ones and combine it with Y to form the X matrix (the independent variable matrix)
ones = rep(1, n)
X = cbind(ones, ret4$sp500_rf)  # Assuming you are using 'sp500_rf' as the factor (independent variable)
X = as.matrix(X)

# Create Y matrix (dependent variables: msft_rf, ge_rf, and ford_rf)
Y = cbind(ret4$msft_rf, ret4$ge_rf, ret4$ford_rf)

# Compute the coefficient estimates using the least squares formula
b_hat.1 = solve(t(X) %*% X) %*% t(X) %*% Y
b_hat.1  # Print the estimated coefficients
##             [,1]        [,2]         [,3]
## ones 0.001233949 0.000344806 0.0002995505
##      1.282382154 1.198199332 1.0250994574
# Compute the residuals
E_hat = Y - X %*% b_hat.1

# Compute the residual variance
res_var.1 = (t(E_hat) %*% E_hat) / (n - 2)  # Adjust the denominator for degrees of freedom
diagD_hat.1 = diag(res_var.1)  # Extract the diagonal (variances of residuals)
diagD_hat.1  # Print the diagonal of residual variances
## [1] 2.864107e-04 9.155889e-05 2.486001e-04
# Compute the covariance matrix (omega.1)
omega.1 = var(ret4$sp500_rf) * t(b_hat.1) %*% b_hat.1 + diagD_hat.1
omega.1  # Print the covariance matrix
##              [,1]         [,2]         [,3]
## [1,] 0.0004172989 0.0004087066 0.0003910389
## [2,] 0.0002138548 0.0002058266 0.0001893187
## [3,] 0.0003532283 0.0003463599 0.0003322369