library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(moments)
library(lubridate)
## R Markdown

# R verification for CFA problem
eq_expected <- (0.6 * 50000) + (0.4 * -30000)
tb_expected <- 5000
risk_premium <- eq_expected - tb_expected
print(paste("The expected risk premium is $", risk_premium, sep=""))
## [1] "The expected risk premium is $13000"
# 1. Read the data
ff_data <- read_csv("downloads/6_Portfolios_2x3.csv", skip = 15)
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 8889 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): ...1, SMALL LoBM, ME1 BM2, SMALL HiBM, BIG LoBM, ME2 BM2, BIG HiBM
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# 2. Clean it up (this fixes the character/parsing issues!)
ff_clean <- ff_data %>%
  # Rename that weird '...1' column to 'Date'
  rename(Date = 1) %>%
  # Filter out the footnotes and annual data at the bottom 
  # (keeps only rows where the date is exactly 6 characters like "193001")
  filter(!is.na(Date), nchar(Date) == 6) %>% 
  # Convert the Date text into an actual Date format
  mutate(Date = ymd(paste0(Date, "01"))) %>%
  # Filter for the requested period: Jan 1930 to Dec 2018
  filter(Date >= as.Date("1930-01-01") & Date <= as.Date("2018-12-01"))
# 3. Force all the return columns to be numeric instead of characters
ff_clean <- ff_clean %>% mutate(across(-Date, as.numeric))
# 4. Keep only the Date and the 6 portfolio columns
ff_clean <- ff_clean[, 1:7]
head(ff_clean)
## # A tibble: 6 × 7
##   Date       `SMALL LoBM` `ME1 BM2` `SMALL HiBM` `BIG LoBM` `ME2 BM2` `BIG HiBM`
##   <date>            <dbl>     <dbl>        <dbl>      <dbl>     <dbl>      <dbl>
## 1 1930-01-01         6.03      9.52         8.47      7.36       3.35       2.85
## 2 1930-02-01         1.76      1.07         4.57      3.47       1.88       1.21
## 3 1930-03-01         8.68     11.3         10.7       6.76       8.42       5.35
## 4 1930-04-01        -7.10     -1.25        -3.48     -2.34      -1.76      -6.68
## 5 1930-05-01        -3.61     -2.69        -2.99      0.702     -2.28      -1.40
## 6 1930-06-01       -18.0     -16.5        -19.0     -17.7      -13.2      -11.8
total_months <- nrow(ff_clean) # or nrow(ff_data) if you saved the cleaned version as ff_data
# 2. Use floor() to ensure the midpoint is a whole integer
midpoint <- floor(total_months / 2)
# 3. Split the data
first_half <- ff_clean[1:midpoint, ]
second_half <- ff_clean[(midpoint + 1):total_months, ]
# 4. Check the dates to make sure it split correctly
cat("First half covers:", as.character(min(first_half$Date)), "to", as.character(max(first_half$Date)), "\n")
## First half covers: 1930-01-01 to 2018-12-01
cat("Second half covers:", as.character(min(second_half$Date)), "to", as.character(max(second_half$Date)), "\n")
## Second half covers: 1930-01-01 to 2018-12-01
# 5. Create a function to calculate statistics
calc_stats <- function(df) {
  df %>%
    select(-Date) %>%
    gather(Portfolio, Return) %>%
    group_by(Portfolio) %>%
    summarise(
      Average = mean(Return, na.rm = TRUE),
      SD = sd(Return, na.rm = TRUE),
      Skewness = skewness(Return, na.rm = TRUE),
      Kurtosis = kurtosis(Return, na.rm = TRUE)
    )
}
# 6. Compute statistics for both halves
stats_first_half <- calc_stats(first_half)
stats_second_half <- calc_stats(second_half)
print("--- Statistics for First Half ---")
## [1] "--- Statistics for First Half ---"
print(stats_first_half)
## # A tibble: 6 × 5
##   Portfolio  Average    SD Skewness Kurtosis
##   <chr>        <dbl> <dbl>    <dbl>    <dbl>
## 1 BIG HiBM      95.8  235.     5.30    37.7 
## 2 BIG LoBM     193.   328.     2.43     9.27
## 3 ME1 BM2      185.   379.     2.11     6.09
## 4 ME2 BM2      150.   254.     2.92    14.1 
## 5 SMALL HiBM   213.   456.     2.37     7.83
## 6 SMALL LoBM   169.   386.     2.49     8.12
print("--- Statistics for Second Half ---")
## [1] "--- Statistics for Second Half ---"
print(stats_second_half)  
## # A tibble: 6 × 5
##   Portfolio  Average     SD Skewness Kurtosis
##   <chr>        <dbl>  <dbl>    <dbl>    <dbl>
## 1 BIG HiBM     892.  3429.      4.76     26.9
## 2 BIG LoBM    1414.  4952.      3.94     19.1
## 3 ME1 BM2       38.0  137.      4.27     21.5
## 4 ME2 BM2     1000.  3634.      4.31     22.1
## 5 SMALL HiBM    22.7   78.2     4.46     23.7
## 6 SMALL LoBM    38.8  142.      4.37     22.8