Homework2

df <- read.csv("6portfolio.csv")
head(df)  # View first few rows

##                                               This.file.was.created.by.CMPT_ME_BEME_OP_INV_RETS.using.the.202412.CRSP.database.
## 1                                           It contains value- and equal-weighted returns for portfolios formed on ME and BEME.
## 2                                                                                                                              
## 3 The portfolios are constructed at the end of June.  BEME is book value at the last fiscal year end of the prior calendar year
## 4                                                                      divided by ME at the end of December of the prior year. 
## 5                                                                                  Annual returns are from January to December.
## 6                                                                                                                              
##   X X.1 X.2 X.3 X.4 X.5
## 1                      
## 2                      
## 3                      
## 4                      
## 5                      
## 6

colnames(df)

## [1] "This.file.was.created.by.CMPT_ME_BEME_OP_INV_RETS.using.the.202412.CRSP.database."
## [2] "X"                                                                                
## [3] "X.1"                                                                              
## [4] "X.2"                                                                              
## [5] "X.3"                                                                              
## [6] "X.4"                                                                              
## [7] "X.5"

str(df)

## 'data.frame':    8831 obs. of  7 variables:
##  $ This.file.was.created.by.CMPT_ME_BEME_OP_INV_RETS.using.the.202412.CRSP.database.: chr  "It contains value- and equal-weighted returns for portfolios formed on ME and BEME." "" "The portfolios are constructed at the end of June.  BEME is book value at the last fiscal year end of the prior calendar year" "divided by ME at the end of December of the prior year. " ...
##  $ X                                                                                : chr  "" "" "" "" ...
##  $ X.1                                                                              : chr  "" "" "" "" ...
##  $ X.2                                                                              : chr  "" "" "" "" ...
##  $ X.3                                                                              : chr  "" "" "" "" ...
##  $ X.4                                                                              : chr  "" "" "" "" ...
##  $ X.5                                                                              : chr  "" "" "" "" ...

colnames(df)

## [1] "This.file.was.created.by.CMPT_ME_BEME_OP_INV_RETS.using.the.202412.CRSP.database."
## [2] "X"                                                                                
## [3] "X.1"                                                                              
## [4] "X.2"                                                                              
## [5] "X.3"                                                                              
## [6] "X.4"                                                                              
## [7] "X.5"

head(df)

##                                               This.file.was.created.by.CMPT_ME_BEME_OP_INV_RETS.using.the.202412.CRSP.database.
## 1                                           It contains value- and equal-weighted returns for portfolios formed on ME and BEME.
## 2                                                                                                                              
## 3 The portfolios are constructed at the end of June.  BEME is book value at the last fiscal year end of the prior calendar year
## 4                                                                      divided by ME at the end of December of the prior year. 
## 5                                                                                  Annual returns are from January to December.
## 6                                                                                                                              
##   X X.1 X.2 X.3 X.4 X.5
## 1                      
## 2                      
## 3                      
## 4                      
## 5                      
## 6

colnames(df)

## [1] "This.file.was.created.by.CMPT_ME_BEME_OP_INV_RETS.using.the.202412.CRSP.database."
## [2] "X"                                                                                
## [3] "X.1"                                                                              
## [4] "X.2"                                                                              
## [5] "X.3"                                                                              
## [6] "X.4"                                                                              
## [7] "X.5"

head(df$Date)

## NULL

midpoint <- median(df$Date, na.rm=TRUE)
df_first_half <- df[df$Date <= midpoint, ]
df_second_half <- df[df$Date > midpoint, ]

head(df$Date)

## NULL

midpoint <- median(df$Date, na.rm=TRUE)
df_first_half <- df[df$Date <= midpoint, ]
df_second_half <- df[df$Date > midpoint, ]

midpoint <- median(df$Date)  # Find midpoint
df_first_half <- df[df$Date <= midpoint, ]
df_second_half <- df[df$Date > midpoint, ]

summary_stats <- function(data) {
  return(data.frame(
    Mean = colMeans(data[,-1], na.rm=TRUE),
    SD = apply(data[,-1], 2, sd, na.rm=TRUE),
    Skewness = apply(data[,-1], 2, function(x) moments::skewness(x, na.rm=TRUE)),
    Kurtosis = apply(data[,-1], 2, function(x) moments::kurtosis(x, na.rm=TRUE))
  ))
}

library(moments)  # Required for skewness & kurtosis

stats_first <- summary_stats(df_first_half)
stats_second <- summary_stats(df_second_half)

print(stats_first)

##     Mean SD Skewness Kurtosis
## X    NaN NA      NaN      NaN
## X.1  NaN NA      NaN      NaN
## X.2  NaN NA      NaN      NaN
## X.3  NaN NA      NaN      NaN
## X.4  NaN NA      NaN      NaN
## X.5  NaN NA      NaN      NaN

print(stats_second)

##     Mean SD Skewness Kurtosis
## X    NaN NA      NaN      NaN
## X.1  NaN NA      NaN      NaN
## X.2  NaN NA      NaN      NaN
## X.3  NaN NA      NaN      NaN
## X.4  NaN NA      NaN      NaN
## X.5  NaN NA      NaN      NaN

str(df)

## 'data.frame':    8831 obs. of  7 variables:
##  $ This.file.was.created.by.CMPT_ME_BEME_OP_INV_RETS.using.the.202412.CRSP.database.: chr  "It contains value- and equal-weighted returns for portfolios formed on ME and BEME." "" "The portfolios are constructed at the end of June.  BEME is book value at the last fiscal year end of the prior calendar year" "divided by ME at the end of December of the prior year. " ...
##  $ X                                                                                : chr  "" "" "" "" ...
##  $ X.1                                                                              : chr  "" "" "" "" ...
##  $ X.2                                                                              : chr  "" "" "" "" ...
##  $ X.3                                                                              : chr  "" "" "" "" ...
##  $ X.4                                                                              : chr  "" "" "" "" ...
##  $ X.5                                                                              : chr  "" "" "" "" ...

numeric_cols <- sapply(df, is.numeric)  # Identify numeric columns
df_numeric <- df[, numeric_cols]        # Keep only numeric columns

n <- nrow(df_numeric)
df_first_half <- df_numeric[1:(n/2), ]
df_second_half <- df_numeric[((n/2) + 1):n, ]

print(colnames(df_first_half))

## character(0)

print(ncol(df_first_half))

## [1] 0

par(mfrow=c(2,3))  # Arrange plots in a 2x3 grid
for (i in colnames(df_first_half)) {  # Loop through column names
  hist(df_first_half[[i]], 
       main=paste("First Half -", i), 
       col="blue", breaks=30)
  hist(df_second_half[[i]], 
       main=paste("Second Half -", i), 
       col="red", breaks=30)
}

print(colnames(df_first_half))

## character(0)

print(ncol(df_first_half))  # Should be greater than 0

## [1] 0

print(str(df_first_half))  # Check data structure

## 'data.frame':    4415 obs. of  0 variables
## NULL

print(head(df_first_half)) # Confirm data looks correct

## data frame with 0 columns and 6 rows

Homework2

Khishigmaa Bold-Erdene 113035124

2025-03-04