df <- read.csv("6portfolio.csv")
head(df) # View first few rows
## This.file.was.created.by.CMPT_ME_BEME_OP_INV_RETS.using.the.202412.CRSP.database.
## 1 It contains value- and equal-weighted returns for portfolios formed on ME and BEME.
## 2
## 3 The portfolios are constructed at the end of June. BEME is book value at the last fiscal year end of the prior calendar year
## 4 divided by ME at the end of December of the prior year.
## 5 Annual returns are from January to December.
## 6
## X X.1 X.2 X.3 X.4 X.5
## 1
## 2
## 3
## 4
## 5
## 6
colnames(df)
## [1] "This.file.was.created.by.CMPT_ME_BEME_OP_INV_RETS.using.the.202412.CRSP.database."
## [2] "X"
## [3] "X.1"
## [4] "X.2"
## [5] "X.3"
## [6] "X.4"
## [7] "X.5"
str(df)
## 'data.frame': 8831 obs. of 7 variables:
## $ This.file.was.created.by.CMPT_ME_BEME_OP_INV_RETS.using.the.202412.CRSP.database.: chr "It contains value- and equal-weighted returns for portfolios formed on ME and BEME." "" "The portfolios are constructed at the end of June. BEME is book value at the last fiscal year end of the prior calendar year" "divided by ME at the end of December of the prior year. " ...
## $ X : chr "" "" "" "" ...
## $ X.1 : chr "" "" "" "" ...
## $ X.2 : chr "" "" "" "" ...
## $ X.3 : chr "" "" "" "" ...
## $ X.4 : chr "" "" "" "" ...
## $ X.5 : chr "" "" "" "" ...
colnames(df)
## [1] "This.file.was.created.by.CMPT_ME_BEME_OP_INV_RETS.using.the.202412.CRSP.database."
## [2] "X"
## [3] "X.1"
## [4] "X.2"
## [5] "X.3"
## [6] "X.4"
## [7] "X.5"
head(df)
## This.file.was.created.by.CMPT_ME_BEME_OP_INV_RETS.using.the.202412.CRSP.database.
## 1 It contains value- and equal-weighted returns for portfolios formed on ME and BEME.
## 2
## 3 The portfolios are constructed at the end of June. BEME is book value at the last fiscal year end of the prior calendar year
## 4 divided by ME at the end of December of the prior year.
## 5 Annual returns are from January to December.
## 6
## X X.1 X.2 X.3 X.4 X.5
## 1
## 2
## 3
## 4
## 5
## 6
colnames(df)
## [1] "This.file.was.created.by.CMPT_ME_BEME_OP_INV_RETS.using.the.202412.CRSP.database."
## [2] "X"
## [3] "X.1"
## [4] "X.2"
## [5] "X.3"
## [6] "X.4"
## [7] "X.5"
head(df$Date)
## NULL
midpoint <- median(df$Date, na.rm=TRUE)
df_first_half <- df[df$Date <= midpoint, ]
df_second_half <- df[df$Date > midpoint, ]
head(df$Date)
## NULL
midpoint <- median(df$Date, na.rm=TRUE)
df_first_half <- df[df$Date <= midpoint, ]
df_second_half <- df[df$Date > midpoint, ]
midpoint <- median(df$Date) # Find midpoint
df_first_half <- df[df$Date <= midpoint, ]
df_second_half <- df[df$Date > midpoint, ]
summary_stats <- function(data) {
return(data.frame(
Mean = colMeans(data[,-1], na.rm=TRUE),
SD = apply(data[,-1], 2, sd, na.rm=TRUE),
Skewness = apply(data[,-1], 2, function(x) moments::skewness(x, na.rm=TRUE)),
Kurtosis = apply(data[,-1], 2, function(x) moments::kurtosis(x, na.rm=TRUE))
))
}
library(moments) # Required for skewness & kurtosis
stats_first <- summary_stats(df_first_half)
stats_second <- summary_stats(df_second_half)
print(stats_first)
## Mean SD Skewness Kurtosis
## X NaN NA NaN NaN
## X.1 NaN NA NaN NaN
## X.2 NaN NA NaN NaN
## X.3 NaN NA NaN NaN
## X.4 NaN NA NaN NaN
## X.5 NaN NA NaN NaN
print(stats_second)
## Mean SD Skewness Kurtosis
## X NaN NA NaN NaN
## X.1 NaN NA NaN NaN
## X.2 NaN NA NaN NaN
## X.3 NaN NA NaN NaN
## X.4 NaN NA NaN NaN
## X.5 NaN NA NaN NaN
str(df)
## 'data.frame': 8831 obs. of 7 variables:
## $ This.file.was.created.by.CMPT_ME_BEME_OP_INV_RETS.using.the.202412.CRSP.database.: chr "It contains value- and equal-weighted returns for portfolios formed on ME and BEME." "" "The portfolios are constructed at the end of June. BEME is book value at the last fiscal year end of the prior calendar year" "divided by ME at the end of December of the prior year. " ...
## $ X : chr "" "" "" "" ...
## $ X.1 : chr "" "" "" "" ...
## $ X.2 : chr "" "" "" "" ...
## $ X.3 : chr "" "" "" "" ...
## $ X.4 : chr "" "" "" "" ...
## $ X.5 : chr "" "" "" "" ...
numeric_cols <- sapply(df, is.numeric) # Identify numeric columns
df_numeric <- df[, numeric_cols] # Keep only numeric columns
n <- nrow(df_numeric)
df_first_half <- df_numeric[1:(n/2), ]
df_second_half <- df_numeric[((n/2) + 1):n, ]
print(colnames(df_first_half))
## character(0)
print(ncol(df_first_half))
## [1] 0
par(mfrow=c(2,3)) # Arrange plots in a 2x3 grid
for (i in colnames(df_first_half)) { # Loop through column names
hist(df_first_half[[i]],
main=paste("First Half -", i),
col="blue", breaks=30)
hist(df_second_half[[i]],
main=paste("Second Half -", i),
col="red", breaks=30)
}
print(colnames(df_first_half))
## character(0)
print(ncol(df_first_half)) # Should be greater than 0
## [1] 0
print(str(df_first_half)) # Check data structure
## 'data.frame': 4415 obs. of 0 variables
## NULL
print(head(df_first_half)) # Confirm data looks correct
## data frame with 0 columns and 6 rows