library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(moments)
library(lubridate)
## R Markdown
# R verification for CFA problem
eq_expected <- (0.6 * 50000) + (0.4 * -30000)
tb_expected <- 5000
risk_premium <- eq_expected - tb_expected
print(paste("The expected risk premium is $", risk_premium, sep=""))
## [1] "The expected risk premium is $13000"
# 1. Read the data
ff_data <- read_csv("downloads/6_Portfolios_2x3.csv", skip = 15)
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 8889 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): ...1, SMALL LoBM, ME1 BM2, SMALL HiBM, BIG LoBM, ME2 BM2, BIG HiBM
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# 2. Clean it up (this fixes the character/parsing issues!)
ff_clean <- ff_data %>%
# Rename that weird '...1' column to 'Date'
rename(Date = 1) %>%
# Filter out the footnotes and annual data at the bottom
# (keeps only rows where the date is exactly 6 characters like "193001")
filter(!is.na(Date), nchar(Date) == 6) %>%
# Convert the Date text into an actual Date format
mutate(Date = ymd(paste0(Date, "01"))) %>%
# Filter for the requested period: Jan 1930 to Dec 2018
filter(Date >= as.Date("1930-01-01") & Date <= as.Date("2018-12-01"))
# 3. Force all the return columns to be numeric instead of characters
ff_clean <- ff_clean %>% mutate(across(-Date, as.numeric))
# 4. Keep only the Date and the 6 portfolio columns
ff_clean <- ff_clean[, 1:7]
head(ff_clean)
## # A tibble: 6 × 7
## Date `SMALL LoBM` `ME1 BM2` `SMALL HiBM` `BIG LoBM` `ME2 BM2` `BIG HiBM`
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1930-01-01 6.03 9.52 8.47 7.36 3.35 2.85
## 2 1930-02-01 1.76 1.07 4.57 3.47 1.88 1.21
## 3 1930-03-01 8.68 11.3 10.7 6.76 8.42 5.35
## 4 1930-04-01 -7.10 -1.25 -3.48 -2.34 -1.76 -6.68
## 5 1930-05-01 -3.61 -2.69 -2.99 0.702 -2.28 -1.40
## 6 1930-06-01 -18.0 -16.5 -19.0 -17.7 -13.2 -11.8
total_months <- nrow(ff_clean) # or nrow(ff_data) if you saved the cleaned version as ff_data
# 2. Use floor() to ensure the midpoint is a whole integer
midpoint <- floor(total_months / 2)
# 3. Split the data
first_half <- ff_clean[1:midpoint, ]
second_half <- ff_clean[(midpoint + 1):total_months, ]
# 4. Check the dates to make sure it split correctly
cat("First half covers:", as.character(min(first_half$Date)), "to", as.character(max(first_half$Date)), "\n")
## First half covers: 1930-01-01 to 2018-12-01
cat("Second half covers:", as.character(min(second_half$Date)), "to", as.character(max(second_half$Date)), "\n")
## Second half covers: 1930-01-01 to 2018-12-01
# 5. Create a function to calculate statistics
calc_stats <- function(df) {
df %>%
select(-Date) %>%
gather(Portfolio, Return) %>%
group_by(Portfolio) %>%
summarise(
Average = mean(Return, na.rm = TRUE),
SD = sd(Return, na.rm = TRUE),
Skewness = skewness(Return, na.rm = TRUE),
Kurtosis = kurtosis(Return, na.rm = TRUE)
)
}
# 6. Compute statistics for both halves
stats_first_half <- calc_stats(first_half)
stats_second_half <- calc_stats(second_half)
print("--- Statistics for First Half ---")
## [1] "--- Statistics for First Half ---"
print(stats_first_half)
## # A tibble: 6 × 5
## Portfolio Average SD Skewness Kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 BIG HiBM 95.8 235. 5.30 37.7
## 2 BIG LoBM 193. 328. 2.43 9.27
## 3 ME1 BM2 185. 379. 2.11 6.09
## 4 ME2 BM2 150. 254. 2.92 14.1
## 5 SMALL HiBM 213. 456. 2.37 7.83
## 6 SMALL LoBM 169. 386. 2.49 8.12
print("--- Statistics for Second Half ---")
## [1] "--- Statistics for Second Half ---"
print(stats_second_half)
## # A tibble: 6 × 5
## Portfolio Average SD Skewness Kurtosis
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 BIG HiBM 892. 3429. 4.76 26.9
## 2 BIG LoBM 1414. 4952. 3.94 19.1
## 3 ME1 BM2 38.0 137. 4.27 21.5
## 4 ME2 BM2 1000. 3634. 4.31 22.1
## 5 SMALL HiBM 22.7 78.2 4.46 23.7
## 6 SMALL LoBM 38.8 142. 4.37 22.8