library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(moments)
df <- read_excel("6 Portfolios Formed on Size and Book-to-Market (2 x 3).xlsx", skip = 14)
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
# Load the dataset
df <- read_excel("6 Portfolios Formed on Size and Book-to-Market (2 x 3).xlsx", skip = 14)
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
# Set proper column names
colnames(df) <- df[1, ]  # Assign first row as column names
df <- df[-1, ]  # Remove the old header row

# Ensure Date column is properly named
colnames(df)[1] <- "Date"

# Convert columns to appropriate types
df$Date <- as.integer(df$Date)
df[,-1] <- lapply(df[,-1], as.numeric)
df <- df %>% rename(Date = 1)
df <- df[-1, ]  # Remove the old header row
middle_index <- nrow(df) %/% 2
first_half <- df[1:middle_index, ]
second_half <- df[(middle_index+1):nrow(df), ]
compute_stats <- function(data) {
  data %>% summarise_all(list(
    Mean = mean,
    SD = sd,
    Skewness = skewness,
    Kurtosis = kurtosis
  ))
}

stats_first_half <- compute_stats(first_half)
stats_second_half <- compute_stats(second_half)

stats_first_half
## # A tibble: 1 × 28
##   Date_Mean `SMALL LoBM_Mean` `ME1 BM2_Mean` `SMALL HiBM_Mean` `BIG LoBM_Mean`
##       <dbl>             <dbl>          <dbl>             <dbl>           <dbl>
## 1   195186.             0.970           1.15              1.47           0.755
## # ℹ 23 more variables: `ME2 BM2_Mean` <dbl>, `BIG HiBM_Mean` <dbl>,
## #   Date_SD <dbl>, `SMALL LoBM_SD` <dbl>, `ME1 BM2_SD` <dbl>,
## #   `SMALL HiBM_SD` <dbl>, `BIG LoBM_SD` <dbl>, `ME2 BM2_SD` <dbl>,
## #   `BIG HiBM_SD` <dbl>, Date_Skewness <dbl>, `SMALL LoBM_Skewness` <dbl>,
## #   `ME1 BM2_Skewness` <dbl>, `SMALL HiBM_Skewness` <dbl>,
## #   `BIG LoBM_Skewness` <dbl>, `ME2 BM2_Skewness` <dbl>,
## #   `BIG HiBM_Skewness` <dbl>, Date_Kurtosis <dbl>, …
stats_second_half
## # A tibble: 1 × 28
##   Date_Mean `SMALL LoBM_Mean` `ME1 BM2_Mean` `SMALL HiBM_Mean` `BIG LoBM_Mean`
##       <dbl>             <dbl>          <dbl>             <dbl>           <dbl>
## 1   199631.              1.00           1.36              1.43           0.979
## # ℹ 23 more variables: `ME2 BM2_Mean` <dbl>, `BIG HiBM_Mean` <dbl>,
## #   Date_SD <dbl>, `SMALL LoBM_SD` <dbl>, `ME1 BM2_SD` <dbl>,
## #   `SMALL HiBM_SD` <dbl>, `BIG LoBM_SD` <dbl>, `ME2 BM2_SD` <dbl>,
## #   `BIG HiBM_SD` <dbl>, Date_Skewness <dbl>, `SMALL LoBM_Skewness` <dbl>,
## #   `ME1 BM2_Skewness` <dbl>, `SMALL HiBM_Skewness` <dbl>,
## #   `BIG LoBM_Skewness` <dbl>, `ME2 BM2_Skewness` <dbl>,
## #   `BIG HiBM_Skewness` <dbl>, Date_Kurtosis <dbl>, …
print(stats_first_half)
## # A tibble: 1 × 28
##   Date_Mean `SMALL LoBM_Mean` `ME1 BM2_Mean` `SMALL HiBM_Mean` `BIG LoBM_Mean`
##       <dbl>             <dbl>          <dbl>             <dbl>           <dbl>
## 1   195186.             0.970           1.15              1.47           0.755
## # ℹ 23 more variables: `ME2 BM2_Mean` <dbl>, `BIG HiBM_Mean` <dbl>,
## #   Date_SD <dbl>, `SMALL LoBM_SD` <dbl>, `ME1 BM2_SD` <dbl>,
## #   `SMALL HiBM_SD` <dbl>, `BIG LoBM_SD` <dbl>, `ME2 BM2_SD` <dbl>,
## #   `BIG HiBM_SD` <dbl>, Date_Skewness <dbl>, `SMALL LoBM_Skewness` <dbl>,
## #   `ME1 BM2_Skewness` <dbl>, `SMALL HiBM_Skewness` <dbl>,
## #   `BIG LoBM_Skewness` <dbl>, `ME2 BM2_Skewness` <dbl>,
## #   `BIG HiBM_Skewness` <dbl>, Date_Kurtosis <dbl>, …
print(stats_second_half)
## # A tibble: 1 × 28
##   Date_Mean `SMALL LoBM_Mean` `ME1 BM2_Mean` `SMALL HiBM_Mean` `BIG LoBM_Mean`
##       <dbl>             <dbl>          <dbl>             <dbl>           <dbl>
## 1   199631.              1.00           1.36              1.43           0.979
## # ℹ 23 more variables: `ME2 BM2_Mean` <dbl>, `BIG HiBM_Mean` <dbl>,
## #   Date_SD <dbl>, `SMALL LoBM_SD` <dbl>, `ME1 BM2_SD` <dbl>,
## #   `SMALL HiBM_SD` <dbl>, `BIG LoBM_SD` <dbl>, `ME2 BM2_SD` <dbl>,
## #   `BIG HiBM_SD` <dbl>, Date_Skewness <dbl>, `SMALL LoBM_Skewness` <dbl>,
## #   `ME1 BM2_Skewness` <dbl>, `SMALL HiBM_Skewness` <dbl>,
## #   `BIG LoBM_Skewness` <dbl>, `ME2 BM2_Skewness` <dbl>,
## #   `BIG HiBM_Skewness` <dbl>, Date_Kurtosis <dbl>, …
comparison <- stats_first_half - stats_second_half
print("Difference in statistics (First Half - Second Half):")
## [1] "Difference in statistics (First Half - Second Half):"
print(comparison)
##   Date_Mean SMALL LoBM_Mean ME1 BM2_Mean SMALL HiBM_Mean BIG LoBM_Mean
## 1 -4445.414     -0.03165606   -0.2069784      0.03349022    -0.2235495
##   ME2 BM2_Mean BIG HiBM_Mean   Date_SD SMALL LoBM_SD ME1 BM2_SD SMALL HiBM_SD
## 1   -0.2584479    0.06010309 -2.275775      1.509291   3.096831      4.648619
##   BIG LoBM_SD ME2 BM2_SD BIG HiBM_SD Date_Skewness SMALL LoBM_Skewness
## 1    1.004595   2.395637    3.975932    0.00197048            1.600133
##   ME1 BM2_Skewness SMALL HiBM_Skewness BIG LoBM_Skewness ME2 BM2_Skewness
## 1         2.172001            2.823103         0.5020465         2.213681
##   BIG HiBM_Skewness Date_Kurtosis SMALL LoBM_Kurtosis ME1 BM2_Kurtosis
## 1          2.307465  0.0004575959             6.92498         9.493695
##   SMALL HiBM_Kurtosis BIG LoBM_Kurtosis ME2 BM2_Kurtosis BIG HiBM_Kurtosis
## 1            13.23764          5.029897         15.12201          11.57677
ggplot(df, aes(x = Date)) + 
  geom_line(aes(y = `SMALL LoBM`, color = "SMALL LoBM")) +
  geom_line(aes(y = `BIG HiBM`, color = "BIG HiBM")) +
  labs(title = "Portfolio Returns Over Time", y = "Return", x = "Date")