library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(moments)
df <- read_excel("6 Portfolios Formed on Size and Book-to-Market (2 x 3).xlsx", skip = 14)
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
# Load the dataset
df <- read_excel("6 Portfolios Formed on Size and Book-to-Market (2 x 3).xlsx", skip = 14)
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
# Set proper column names
colnames(df) <- df[1, ] # Assign first row as column names
df <- df[-1, ] # Remove the old header row
# Ensure Date column is properly named
colnames(df)[1] <- "Date"
# Convert columns to appropriate types
df$Date <- as.integer(df$Date)
df[,-1] <- lapply(df[,-1], as.numeric)
df <- df %>% rename(Date = 1)
df <- df[-1, ] # Remove the old header row
middle_index <- nrow(df) %/% 2
first_half <- df[1:middle_index, ]
second_half <- df[(middle_index+1):nrow(df), ]
compute_stats <- function(data) {
data %>% summarise_all(list(
Mean = mean,
SD = sd,
Skewness = skewness,
Kurtosis = kurtosis
))
}
stats_first_half <- compute_stats(first_half)
stats_second_half <- compute_stats(second_half)
stats_first_half
## # A tibble: 1 × 28
## Date_Mean `SMALL LoBM_Mean` `ME1 BM2_Mean` `SMALL HiBM_Mean` `BIG LoBM_Mean`
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 195186. 0.970 1.15 1.47 0.755
## # ℹ 23 more variables: `ME2 BM2_Mean` <dbl>, `BIG HiBM_Mean` <dbl>,
## # Date_SD <dbl>, `SMALL LoBM_SD` <dbl>, `ME1 BM2_SD` <dbl>,
## # `SMALL HiBM_SD` <dbl>, `BIG LoBM_SD` <dbl>, `ME2 BM2_SD` <dbl>,
## # `BIG HiBM_SD` <dbl>, Date_Skewness <dbl>, `SMALL LoBM_Skewness` <dbl>,
## # `ME1 BM2_Skewness` <dbl>, `SMALL HiBM_Skewness` <dbl>,
## # `BIG LoBM_Skewness` <dbl>, `ME2 BM2_Skewness` <dbl>,
## # `BIG HiBM_Skewness` <dbl>, Date_Kurtosis <dbl>, …
stats_second_half
## # A tibble: 1 × 28
## Date_Mean `SMALL LoBM_Mean` `ME1 BM2_Mean` `SMALL HiBM_Mean` `BIG LoBM_Mean`
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 199631. 1.00 1.36 1.43 0.979
## # ℹ 23 more variables: `ME2 BM2_Mean` <dbl>, `BIG HiBM_Mean` <dbl>,
## # Date_SD <dbl>, `SMALL LoBM_SD` <dbl>, `ME1 BM2_SD` <dbl>,
## # `SMALL HiBM_SD` <dbl>, `BIG LoBM_SD` <dbl>, `ME2 BM2_SD` <dbl>,
## # `BIG HiBM_SD` <dbl>, Date_Skewness <dbl>, `SMALL LoBM_Skewness` <dbl>,
## # `ME1 BM2_Skewness` <dbl>, `SMALL HiBM_Skewness` <dbl>,
## # `BIG LoBM_Skewness` <dbl>, `ME2 BM2_Skewness` <dbl>,
## # `BIG HiBM_Skewness` <dbl>, Date_Kurtosis <dbl>, …
print(stats_first_half)
## # A tibble: 1 × 28
## Date_Mean `SMALL LoBM_Mean` `ME1 BM2_Mean` `SMALL HiBM_Mean` `BIG LoBM_Mean`
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 195186. 0.970 1.15 1.47 0.755
## # ℹ 23 more variables: `ME2 BM2_Mean` <dbl>, `BIG HiBM_Mean` <dbl>,
## # Date_SD <dbl>, `SMALL LoBM_SD` <dbl>, `ME1 BM2_SD` <dbl>,
## # `SMALL HiBM_SD` <dbl>, `BIG LoBM_SD` <dbl>, `ME2 BM2_SD` <dbl>,
## # `BIG HiBM_SD` <dbl>, Date_Skewness <dbl>, `SMALL LoBM_Skewness` <dbl>,
## # `ME1 BM2_Skewness` <dbl>, `SMALL HiBM_Skewness` <dbl>,
## # `BIG LoBM_Skewness` <dbl>, `ME2 BM2_Skewness` <dbl>,
## # `BIG HiBM_Skewness` <dbl>, Date_Kurtosis <dbl>, …
print(stats_second_half)
## # A tibble: 1 × 28
## Date_Mean `SMALL LoBM_Mean` `ME1 BM2_Mean` `SMALL HiBM_Mean` `BIG LoBM_Mean`
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 199631. 1.00 1.36 1.43 0.979
## # ℹ 23 more variables: `ME2 BM2_Mean` <dbl>, `BIG HiBM_Mean` <dbl>,
## # Date_SD <dbl>, `SMALL LoBM_SD` <dbl>, `ME1 BM2_SD` <dbl>,
## # `SMALL HiBM_SD` <dbl>, `BIG LoBM_SD` <dbl>, `ME2 BM2_SD` <dbl>,
## # `BIG HiBM_SD` <dbl>, Date_Skewness <dbl>, `SMALL LoBM_Skewness` <dbl>,
## # `ME1 BM2_Skewness` <dbl>, `SMALL HiBM_Skewness` <dbl>,
## # `BIG LoBM_Skewness` <dbl>, `ME2 BM2_Skewness` <dbl>,
## # `BIG HiBM_Skewness` <dbl>, Date_Kurtosis <dbl>, …
comparison <- stats_first_half - stats_second_half
print("Difference in statistics (First Half - Second Half):")
## [1] "Difference in statistics (First Half - Second Half):"
print(comparison)
## Date_Mean SMALL LoBM_Mean ME1 BM2_Mean SMALL HiBM_Mean BIG LoBM_Mean
## 1 -4445.414 -0.03165606 -0.2069784 0.03349022 -0.2235495
## ME2 BM2_Mean BIG HiBM_Mean Date_SD SMALL LoBM_SD ME1 BM2_SD SMALL HiBM_SD
## 1 -0.2584479 0.06010309 -2.275775 1.509291 3.096831 4.648619
## BIG LoBM_SD ME2 BM2_SD BIG HiBM_SD Date_Skewness SMALL LoBM_Skewness
## 1 1.004595 2.395637 3.975932 0.00197048 1.600133
## ME1 BM2_Skewness SMALL HiBM_Skewness BIG LoBM_Skewness ME2 BM2_Skewness
## 1 2.172001 2.823103 0.5020465 2.213681
## BIG HiBM_Skewness Date_Kurtosis SMALL LoBM_Kurtosis ME1 BM2_Kurtosis
## 1 2.307465 0.0004575959 6.92498 9.493695
## SMALL HiBM_Kurtosis BIG LoBM_Kurtosis ME2 BM2_Kurtosis BIG HiBM_Kurtosis
## 1 13.23764 5.029897 15.12201 11.57677
ggplot(df, aes(x = Date)) +
geom_line(aes(y = `SMALL LoBM`, color = "SMALL LoBM")) +
geom_line(aes(y = `BIG HiBM`, color = "BIG HiBM")) +
labs(title = "Portfolio Returns Over Time", y = "Return", x = "Date")
