library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(moments)
# Load the dataset
data <- read_csv("6_Portfolios_2x3.CSV", col_types = cols(.default = "d"))
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
# Check column names and structure
print(colnames(data))
## [1] "This file was created by CMPT_ME_BEME_OP_INV_RETS using the 202412 CRSP database."
str(data)
## spc_tbl_ [8,806 × 1] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ This file was created by CMPT_ME_BEME_OP_INV_RETS using the 202412 CRSP database.: num [1:8806] NA NA NA NA NA NA NA NA NA NA ...
## - attr(*, "spec")=
## .. cols(
## .. .default = col_double(),
## .. `This file was created by CMPT_ME_BEME_OP_INV_RETS using the 202412 CRSP database.` = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
# If Date column exists, attempt to convert it
if("Date" %in% colnames(data)) {
data$Date <- as.Date(as.character(data$Date), format="%Y%m")
} else {
print("Date column not found. Please check the dataset structure.")
}
## [1] "Date column not found. Please check the dataset structure."
# Check for missing values
print(sum(is.na(data$Date)))
## Warning: Unknown or uninitialised column: `Date`.
## [1] 0
# If Date column is missing, rename the first column as Date
if(!"Date" %in% colnames(data)) {
colnames(data)[1] <- "Date"
}
# Split the data into two halves
midpoint <- floor(nrow(data) / 2)
data_first_half <- data[1:midpoint, ]
data_second_half <- data[(midpoint + 1):nrow(data), ]
# Function to compute summary statistics
compute_stats <- function(df) {
df %>% summarise_all(
list(
Mean = mean,
SD = sd,
Skewness = skewness,
Kurtosis = kurtosis
)
)
}
# Compute statistics for both halves
stats_first_half <- compute_stats(data_first_half)
stats_second_half <- compute_stats(data_second_half)
# Print results
print("Summary Statistics for First Half")
## [1] "Summary Statistics for First Half"
print(stats_first_half)
## # A tibble: 1 × 4
## Mean SD Skewness Kurtosis
## <dbl> <dbl> <dbl> <dbl>
## 1 NA NA NA NA
print("Summary Statistics for Second Half")
## [1] "Summary Statistics for Second Half"
print(stats_second_half)
## # A tibble: 1 × 4
## Mean SD Skewness Kurtosis
## <dbl> <dbl> <dbl> <dbl>
## 1 NA NA NA NA