Import Data

# Import CSV
data <- read.csv("6_Portfolios_2x3.csv",
                 header = TRUE,
                 fill = TRUE,
                 stringsAsFactors = FALSE)

# Remove NA on first column
data <- data[!is.na(data[,1]), ]

# Check first rows
head(data)
## [1] "It contains value- and equal-weighted returns for portfolios formed on ME and BEME."                                          
## [2] "The portfolios are constructed at the end of June.  BEME is book value at the last fiscal year end of the prior calendar year"
## [3] "divided by ME at the end of December of the prior year."                                                                      
## [4] "Annual returns are from January to December."                                                                                 
## [5] "Missing data are indicated by -99.99 or -999."                                                                                
## [6] "The break points include utilities and include financials"

Alternative Import Method (readr)

library(readr)
## Warning: package 'readr' was built under R version 4.5.2
data <- read_csv("6_Portfolios_2x3.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 8898 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): This file was created using the 202601 CRSP database.
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 1
##   `This file was created using the 202601 CRSP database.`                       
##   <chr>                                                                         
## 1 It contains value- and equal-weighted returns for portfolios formed on ME and…
## 2 The portfolios are constructed at the end of June.  BEME is book value at the…
## 3 divided by ME at the end of December of the prior year.                       
## 4 Annual returns are from January to December.                                  
## 5 Missing data are indicated by -99.99 or -999.                                 
## 6 The break points include utilities and include financials
nrow(data)
## [1] 8898

Split Dataset Into Two Halves

half <- floor(nrow(data)/2)

first_half <- data[1:half, ]
second_half <- data[(half+1):nrow(data), ]

Compute Descriptive Statistics

library(moments)
## Warning: package 'moments' was built under R version 4.5.2
num_first  <- first_half[, sapply(first_half, is.numeric)]
num_second <- second_half[, sapply(second_half, is.numeric)]

stats1 <- data.frame(
  mean = colMeans(num_first),
  sd   = apply(num_first, 2, sd),
  skew = apply(num_first, 2, skewness),
  kurt = apply(num_first, 2, kurtosis)
)

stats2 <- data.frame(
  mean = colMeans(num_second),
  sd   = apply(num_second, 2, sd),
  skew = apply(num_second, 2, skewness),
  kurt = apply(num_second, 2, kurtosis)
)

stats1
## [1] mean sd   skew kurt
## <0 rows> (or 0-length row.names)
stats2
## [1] mean sd   skew kurt
## <0 rows> (or 0-length row.names)

Risk Premium Calculation

equity <- 0.6*50000 + 0.4*(-30000)
tbill  <- 5000

risk_premium <- equity - tbill
risk_premium
## [1] 13000