Import Data
# Import CSV
data <- read.csv("6_Portfolios_2x3.csv",
header = TRUE,
fill = TRUE,
stringsAsFactors = FALSE)
# Remove NA on first column
data <- data[!is.na(data[,1]), ]
# Check first rows
head(data)
## [1] "It contains value- and equal-weighted returns for portfolios formed on ME and BEME."
## [2] "The portfolios are constructed at the end of June. BEME is book value at the last fiscal year end of the prior calendar year"
## [3] "divided by ME at the end of December of the prior year."
## [4] "Annual returns are from January to December."
## [5] "Missing data are indicated by -99.99 or -999."
## [6] "The break points include utilities and include financials"
Alternative Import Method (readr)
library(readr)
## Warning: package 'readr' was built under R version 4.5.2
data <- read_csv("6_Portfolios_2x3.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 8898 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): This file was created using the 202601 CRSP database.
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 1
## `This file was created using the 202601 CRSP database.`
## <chr>
## 1 It contains value- and equal-weighted returns for portfolios formed on ME and…
## 2 The portfolios are constructed at the end of June. BEME is book value at the…
## 3 divided by ME at the end of December of the prior year.
## 4 Annual returns are from January to December.
## 5 Missing data are indicated by -99.99 or -999.
## 6 The break points include utilities and include financials
nrow(data)
## [1] 8898
Split Dataset Into Two Halves
half <- floor(nrow(data)/2)
first_half <- data[1:half, ]
second_half <- data[(half+1):nrow(data), ]
Compute Descriptive Statistics
library(moments)
## Warning: package 'moments' was built under R version 4.5.2
num_first <- first_half[, sapply(first_half, is.numeric)]
num_second <- second_half[, sapply(second_half, is.numeric)]
stats1 <- data.frame(
mean = colMeans(num_first),
sd = apply(num_first, 2, sd),
skew = apply(num_first, 2, skewness),
kurt = apply(num_first, 2, kurtosis)
)
stats2 <- data.frame(
mean = colMeans(num_second),
sd = apply(num_second, 2, sd),
skew = apply(num_second, 2, skewness),
kurt = apply(num_second, 2, kurtosis)
)
stats1
## [1] mean sd skew kurt
## <0 rows> (or 0-length row.names)
stats2
## [1] mean sd skew kurt
## <0 rows> (or 0-length row.names)
Risk Premium Calculation
equity <- 0.6*50000 + 0.4*(-30000)
tbill <- 5000
risk_premium <- equity - tbill
risk_premium
## [1] 13000