# packages
library(readr)
library(stringr)
library(tidyr) # reshaping data
library(dplyr) # data manipulation
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2) # data visualization
## Warning: package 'ggplot2' was built under R version 4.5.3
library(e1071) # statistics & machine learning
## Warning: package 'e1071' was built under R version 4.5.3
## 
## Attaching package: 'e1071'
## The following object is masked from 'package:ggplot2':
## 
##     element
library(ggrepel) # ggplot2 + better labels
## Warning: package 'ggrepel' was built under R version 4.5.3

Phase 1: Data Preprocessing

1.1 How can raw mutual fund data to be cleaned to ensure consistency in numeric format.

mf_data <- read_csv(
  "D:/semester_projects/R/mutual_funds_data.csv",
  col_names = TRUE
)
## New names:
## Rows: 1407 Columns: 37
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (32): Name, Sub Category, Plan, CAGR 3Y, CAGR 5Y, Absolute Returns - 3M,... dbl
## (2): Category St Dev, Time since inception num (2): AUM, NAV lgl (1): ...37
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...37`
mf_data
## # A tibble: 1,407 × 37
##    Name   `Sub Category` Plan     AUM `CAGR 3Y` `CAGR 5Y` Absolute Returns - 3…¹
##    <chr>  <chr>          <chr>  <dbl> <chr>     <chr>     <chr>                 
##  1 Parag… Flexi Cap Fund Grow… 1.34e5 18.3      16.75     -8.75                 
##  2 Parag… Flexi Cap Fund IDCW  1.34e5 -         -         -8.75                 
##  3 Parag… Flexi Cap Fund IDCW  1.34e5 -         -         -8.75                 
##  4 HDFC … Balanced Adva… IDCW  1.08e5 8.52      7.74      -9.11                 
##  5 HDFC … Balanced Adva… IDCW  1.08e5 8.52      7.74      -9.11                 
##  6 HDFC … Balanced Adva… Grow… 1.08e5 16.36     16.7      -7.59                 
##  7 HDFC … Flexi Cap Fund IDCW  1.00e5 10.83     9.79      -16.26                
##  8 HDFC … Flexi Cap Fund IDCW  1.00e5 10.83     9.79      -16.26                
##  9 HDFC … Flexi Cap Fund Grow… 1.00e5 20.04     19.42     -9.64                 
## 10 HDFC … Mid Cap Fund   IDCW  9.43e4 16.14     13.25     -14.61                
## # ℹ 1,397 more rows
## # ℹ abbreviated name: ¹​`Absolute Returns - 3M`
## # ℹ 30 more variables: `Absolute Returns - 1Y` <chr>,
## #   `Absolute Returns - 6M` <chr>, `SEBI Risk Category` <chr>,
## #   Volatility <chr>, `Category St Dev` <dbl>, `% Other Holdings` <chr>,
## #   `% Largecap Holding` <chr>, `% Midcap Holding` <chr>,
## #   `% Equity Holding` <chr>, `% Smallcap Holding` <chr>, …

Cleaning the data as column names contains blank spaces and special characters.

# Column names which starts with %
colnames(mf_data)[grep("%", colnames(mf_data))]
## [1] "% Other Holdings"   "% Largecap Holding" "% Midcap Holding"  
## [4] "% Equity Holding"   "% Smallcap Holding" "% Debt Holding"
# replacing %
clean_mf_data <- mf_data |>
  rename_with(~ str_replace_all(., "%", "")) |>
  rename_with(~ str_replace_all(., " ", "_")) |>
  rename_with(~ str_replace_all(., "-", "")) |>
  # removes more than one underscore
  rename_with(~ str_replace_all(., "__+", "_")) |>
  # removes leading / trailing underscore
  rename_with(~ str_remove(., "^_|_$")) |>
  select(-starts_with("..."))
glimpse(clean_mf_data)
## Rows: 1,407
## Columns: 36
## $ Name                 <chr> "Parag Parikh Flexi Cap Fund", "Parag Parikh Flex…
## $ Sub_Category         <chr> "Flexi Cap Fund", "Flexi Cap Fund", "Flexi Cap Fu…
## $ Plan                 <chr> "Growth", "IDCW", "IDCW", "IDCW", "IDCW", "Growth…
## $ AUM                  <dbl> 134253.17, 134253.17, 134253.17, 107589.67, 10758…
## $ CAGR_3Y              <chr> "18.3", "-", "-", "8.52", "8.52", "16.36", "10.83…
## $ CAGR_5Y              <chr> "16.75", "-", "-", "7.74", "7.74", "16.7", "9.79"…
## $ Absolute_Returns_3M  <chr> "-8.75", "-8.75", "-8.75", "-9.11", "-9.11", "-7.…
## $ Absolute_Returns_1Y  <chr> "0.08", "-", "-", "-5.8", "-5.8", "0.65", "-6.33"…
## $ Absolute_Returns_6M  <chr> "-7.25", "-", "-", "-8.1", "-8.1", "-5.02", "-14.…
## $ SEBI_Risk_Category   <chr> "Very High", "Very High", "Very High", "Very High…
## $ Volatility           <chr> "8.76", "8.17", "8.17", "8.63", "8.63", "8.63", "…
## $ Category_St_Dev      <dbl> 13.52, 13.52, 13.52, 8.25, 8.25, 8.25, 13.52, 13.…
## $ Other_Holdings       <chr> "-", "-", "-", "-", "-", "-", "-", "-", "-", "-",…
## $ Largecap_Holding     <chr> "63.01", "63.01", "63.01", "54.22", "54.22", "54.…
## $ Midcap_Holding       <chr> "2.17", "2.17", "2.17", "7.47", "7.47", "7.47", "…
## $ Equity_Holding       <chr> "78.36", "78.36", "78.36", "69.36", "69.36", "69.…
## $ Smallcap_Holding     <chr> "2.67", "2.67", "2.67", "7.67", "7.67", "7.67", "…
## $ Debt_Holding         <chr> "13.94", "13.94", "13.94", "25.78", "25.78", "25.…
## $ Category_YTM         <chr> "-", "-", "-", "6.56", "6.56", "6.56", "-", "-", …
## $ PE_Ratio             <chr> "17.51", "17.51", "17.51", "19", "19", "19", "24.…
## $ Average_Maturity     <chr> "-", "-", "-", "7.6", "7.6", "7.6", "-", "-", "-"…
## $ Category_PE_Ratio    <chr> "27.28", "27.28", "27.28", "25.11", "25.11", "25.…
## $ Sortino_Ratio        <chr> "-0.02", "-0.25", "-0.25", "-0.01", "-0.01", "-0.…
## $ Sharpe_Ratio         <chr> "-0.17", "-2.82", "-2.82", "-0.13", "-0.13", "-0.…
## $ Average_YTM          <chr> "-", "-", "-", "7.01", "7.01", "7.01", "-", "-", …
## $ Benchmark            <chr> "NIFTY 500 - TRI", "NIFTY 500 - TRI", "NIFTY 500 …
## $ Time_since_inception <dbl> 155, 6, 6, 159, 159, 159, 159, 159, 159, 159, 159…
## $ Expense_Ratio        <chr> "0.63", "0.63", "0.63", "0.77", "0.77", "0.77", "…
## $ NAV                  <dbl> 87.00, 87.00, 87.00, 41.82, 41.82, 535.18, 82.81,…
## $ AMC                  <chr> "PPFAS Asset Management Pvt. Ltd.", "PPFAS Asset …
## $ Minimum_Lumpsum      <chr> "1,000.00", "1,000.00", "1,000.00", "100", "Not a…
## $ Lockin               <chr> "-", "-", "-", "-", "-", "-", "-", "-", "-", "-",…
## $ Fund_Manager         <chr> "Rajeev Thakkar+3 others", "Rajeev Thakkar+3 othe…
## $ Exit_Load            <chr> "2", "1", "1", "1", "1", "1", "1", "1", "1", "1",…
## $ SIP_Investment       <chr> "Allowed", "Allowed", "Allowed", "Allowed", "Not …
## $ Minimum_SIP          <chr> "3,000.00", "3,000.00", "3,000.00", "100", "100",…

Identify and convert columns to numeric

clean_mf_data <- clean_mf_data |>
  mutate(across(
    .cols =
      contains(c(
        "CAGR", "Returns", "Expense", "Sharpe", "Sortino", "Nav", "AUM",
        "Large", "Mid", "Equity", "Smallcap", "Debt", "Average", "Absolute"
      )),
    ~ suppressWarnings(as.numeric(str_replace_all(., "[%,]", "")))
  ))
clean_mf_data
## # A tibble: 1,407 × 36
##    Name            Sub_Category Plan     AUM CAGR_3Y CAGR_5Y Absolute_Returns_3M
##    <chr>           <chr>        <chr>  <dbl>   <dbl>   <dbl>               <dbl>
##  1 Parag Parikh F… Flexi Cap F… Grow… 1.34e5   18.3    16.8                -8.75
##  2 Parag Parikh F… Flexi Cap F… IDCW  1.34e5   NA      NA                  -8.75
##  3 Parag Parikh F… Flexi Cap F… IDCW  1.34e5   NA      NA                  -8.75
##  4 HDFC Balanced … Balanced Ad… IDCW  1.08e5    8.52    7.74               -9.11
##  5 HDFC Balanced … Balanced Ad… IDCW  1.08e5    8.52    7.74               -9.11
##  6 HDFC Balanced … Balanced Ad… Grow… 1.08e5   16.4    16.7                -7.59
##  7 HDFC Flexi Cap… Flexi Cap F… IDCW  1.00e5   10.8     9.79              -16.3 
##  8 HDFC Flexi Cap… Flexi Cap F… IDCW  1.00e5   10.8     9.79              -16.3 
##  9 HDFC Flexi Cap… Flexi Cap F… Grow… 1.00e5   20.0    19.4                -9.64
## 10 HDFC Mid Cap F… Mid Cap Fund IDCW  9.43e4   16.1    13.2               -14.6 
## # ℹ 1,397 more rows
## # ℹ 29 more variables: Absolute_Returns_1Y <dbl>, Absolute_Returns_6M <dbl>,
## #   SEBI_Risk_Category <chr>, Volatility <chr>, Category_St_Dev <dbl>,
## #   Other_Holdings <chr>, Largecap_Holding <dbl>, Midcap_Holding <dbl>,
## #   Equity_Holding <dbl>, Smallcap_Holding <dbl>, Debt_Holding <dbl>,
## #   Category_YTM <chr>, PE_Ratio <chr>, Average_Maturity <dbl>,
## #   Category_PE_Ratio <chr>, Sortino_Ratio <dbl>, Sharpe_Ratio <dbl>, …
# saving clean data into a file
write.csv(clean_mf_data,
  "D:/semester_projects/R/data_preprocessing_phase/clean_mf_data.csv",
  row.names = FALSE
)

1.2 What are the pattern and proportion of missing values or NA values across features and treatment.

mf_data <- read_csv(
  "D:/semester_projects/R/data_preprocessing_phase/clean_mf_data.csv",
  col_names = TRUE
)
## Rows: 1407 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (17): Name, Sub_Category, Plan, SEBI_Risk_Category, Volatility, Other_Ho...
## dbl (19): AUM, CAGR_3Y, CAGR_5Y, Absolute_Returns_3M, Absolute_Returns_1Y, A...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mf_data
## # A tibble: 1,407 × 36
##    Name            Sub_Category Plan     AUM CAGR_3Y CAGR_5Y Absolute_Returns_3M
##    <chr>           <chr>        <chr>  <dbl>   <dbl>   <dbl>               <dbl>
##  1 Parag Parikh F… Flexi Cap F… Grow… 1.34e5   18.3    16.8                -8.75
##  2 Parag Parikh F… Flexi Cap F… IDCW  1.34e5   NA      NA                  -8.75
##  3 Parag Parikh F… Flexi Cap F… IDCW  1.34e5   NA      NA                  -8.75
##  4 HDFC Balanced … Balanced Ad… IDCW  1.08e5    8.52    7.74               -9.11
##  5 HDFC Balanced … Balanced Ad… IDCW  1.08e5    8.52    7.74               -9.11
##  6 HDFC Balanced … Balanced Ad… Grow… 1.08e5   16.4    16.7                -7.59
##  7 HDFC Flexi Cap… Flexi Cap F… IDCW  1.00e5   10.8     9.79              -16.3 
##  8 HDFC Flexi Cap… Flexi Cap F… IDCW  1.00e5   10.8     9.79              -16.3 
##  9 HDFC Flexi Cap… Flexi Cap F… Grow… 1.00e5   20.0    19.4                -9.64
## 10 HDFC Mid Cap F… Mid Cap Fund IDCW  9.43e4   16.1    13.2               -14.6 
## # ℹ 1,397 more rows
## # ℹ 29 more variables: Absolute_Returns_1Y <dbl>, Absolute_Returns_6M <dbl>,
## #   SEBI_Risk_Category <chr>, Volatility <chr>, Category_St_Dev <dbl>,
## #   Other_Holdings <chr>, Largecap_Holding <dbl>, Midcap_Holding <dbl>,
## #   Equity_Holding <dbl>, Smallcap_Holding <dbl>, Debt_Holding <dbl>,
## #   Category_YTM <chr>, PE_Ratio <chr>, Average_Maturity <dbl>,
## #   Category_PE_Ratio <chr>, Sortino_Ratio <dbl>, Sharpe_Ratio <dbl>, …
# lets identify fields that contain missing values or " - " symbol
cols_to_be_treated <- colnames(mf_data)[
  sapply(mf_data, function(col) {
    is.character(col) &&
      any(trimws(col) == "-", na.rm = TRUE)
  })
]

# Replacing these missing records with some appropriate value
mf_data <- mf_data |> mutate(
  across(
    all_of(cols_to_be_treated),
    ~ suppressWarnings(as.numeric(ifelse(trimws(.) == "-", "0", .)))
  )
)

# lets identify NA values
cols_contains_na <- colnames(mf_data)[colSums(is.na(mf_data)) > 0]

# Replacing these NA values with median
mf_data <- mf_data |> mutate(
  across(
    all_of(cols_contains_na),
    ~ ifelse(is.na(.), median(., na.rm = TRUE), .)
  )
)

# Saving the file as csv
write.csv(mf_data,
  "D:/semester_projects/R/data_preprocessing_phase/mf_data.csv",
  row.names = FALSE
)

1.3 How can duplicate fund entries should be detected and removed efficiently.

# identifying the duplicate records
dup_funds <- mf_data |> filter(
  duplicated(mf_data[, c("Name", "Plan", "Sub_Category")]) |
    duplicated(mf_data[, c("Name", "Plan", "Sub_Category")], fromLast = TRUE)
)

# removing the duplicate records
mf_data <- mf_data[
  !duplicated(mf_data[, c("Name", "Plan", "Sub_Category")]),
]

1.4 Are data types correctly assigned and how check incorrect types that can affect analysis.

# here we will check the invalid datatypes
# although we have converted the datatype before.
# first we will assign levels in the dataset for easier data exploration.

mf_data <- read.csv(
  "D:/semester_projects/R/data_preprocessing_phase/mf_data.csv",
  header = TRUE
)
mf_data$Plan <- as.factor(mf_data$Plan)
mf_data$SEBI_Risk_Category <- as.factor(mf_data$SEBI_Risk_Category)

# check levels
levels(mf_data$Plan)
## [1] "Bonus"  "Growth" "IDCW"
levels(mf_data$SEBI_Risk_Category)
## [1] "High"            "Low"             "Moderate"        "Moderately High"
## [5] "Moderately Low"  "Very High"
# lets detect columns with mixed type of values
str(mf_data)
## 'data.frame':    1407 obs. of  36 variables:
##  $ Name                : chr  "Parag Parikh Flexi Cap Fund" "Parag Parikh Flexi Cap Fund(IDCW)" "Parag Parikh Flexi Cap Fund(IDCW Payout)" "HDFC Balanced Advantage Fund(IDCW)" ...
##  $ Sub_Category        : chr  "Flexi Cap Fund" "Flexi Cap Fund" "Flexi Cap Fund" "Balanced Advantage Fund" ...
##  $ Plan                : Factor w/ 3 levels "Bonus","Growth",..: 2 3 3 3 3 2 3 3 2 3 ...
##  $ AUM                 : num  134253 134253 134253 107590 107590 ...
##  $ CAGR_3Y             : num  18.3 7.58 7.58 8.52 8.52 ...
##  $ CAGR_5Y             : num  16.75 6.32 6.32 7.74 7.74 ...
##  $ Absolute_Returns_3M : num  -8.75 -8.75 -8.75 -9.11 -9.11 ...
##  $ Absolute_Returns_1Y : num  0.08 0 0 -5.8 -5.8 0.65 -6.33 -6.33 1.07 -1 ...
##  $ Absolute_Returns_6M : num  -7.25 -0.9 -0.9 -8.1 -8.1 ...
##  $ SEBI_Risk_Category  : Factor w/ 6 levels "High","Low","Moderate",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ Volatility          : num  8.76 8.17 8.17 8.63 8.63 ...
##  $ Category_St_Dev     : num  13.52 13.52 13.52 8.25 8.25 ...
##  $ Other_Holdings      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Largecap_Holding    : num  63 63 63 54.2 54.2 ...
##  $ Midcap_Holding      : num  2.17 2.17 2.17 7.47 7.47 ...
##  $ Equity_Holding      : num  78.4 78.4 78.4 69.4 69.4 ...
##  $ Smallcap_Holding    : num  2.67 2.67 2.67 7.67 7.67 ...
##  $ Debt_Holding        : num  13.9 13.9 13.9 25.8 25.8 ...
##  $ Category_YTM        : num  0 0 0 6.56 6.56 6.56 0 0 0 0 ...
##  $ PE_Ratio            : num  17.5 17.5 17.5 19 19 ...
##  $ Average_Maturity    : num  1 1 1 7.6 7.6 7.6 1 1 1 1 ...
##  $ Category_PE_Ratio   : num  27.3 27.3 27.3 25.1 25.1 ...
##  $ Sortino_Ratio       : num  -0.02 -0.25 -0.25 -0.01 -0.01 -0.01 0 0 0 0.03 ...
##  $ Sharpe_Ratio        : num  -0.17 -2.82 -2.82 -0.13 -0.13 -0.13 -0.02 -0.02 -0.03 0.31 ...
##  $ Average_YTM         : num  6.77 6.77 6.77 7.01 7.01 7.01 6.77 6.77 6.77 6.77 ...
##  $ Benchmark           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Time_since_inception: int  155 6 6 159 159 159 159 159 159 159 ...
##  $ Expense_Ratio       : num  0.63 0.63 0.63 0.77 0.77 0.77 0.7 0.7 0.7 0.76 ...
##  $ NAV                 : num  87 87 87 41.8 41.8 ...
##  $ AMC                 : chr  "PPFAS Asset Management Pvt. Ltd." "PPFAS Asset Management Pvt. Ltd." "PPFAS Asset Management Pvt. Ltd." "HDFC Asset Management Company Limited" ...
##  $ Minimum_Lumpsum     : chr  "1,000.00" "1,000.00" "1,000.00" "100" ...
##  $ Lockin              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Fund_Manager        : chr  "Rajeev Thakkar+3 others" "Rajeev Thakkar+3 others" "Rajeev Thakkar+3 others" "Gopal Agrawal+3 others" ...
##  $ Exit_Load           : num  2 1 1 1 1 1 1 1 1 1 ...
##  $ SIP_Investment      : chr  "Allowed" "Allowed" "Allowed" "Allowed" ...
##  $ Minimum_SIP         : int  100 100 100 100 100 100 100 100 100 100 ...
char_cols <- sapply(mf_data, is.character)
names(mf_data[char_cols])
## [1] "Name"            "Sub_Category"    "AMC"             "Minimum_Lumpsum"
## [5] "Fund_Manager"    "SIP_Investment"
# here minimum_lumpsum contains mixed type of data
unique(mf_data$Minimum_Lumpsum)
## [1] "1,000.00"    "100"         "Not allowed" "5,000.00"    "500"        
## [6] "10,000.00"   "99"          "20,000.00"   "1,00,000.00"
# converting the values into similar types of data
mf_data <- mf_data |>
  mutate(
    Minimum_Lumpsum = suppressWarnings(
      as.numeric(gsub(",", "", Minimum_Lumpsum))
    )
  )

1.5 Detect Outliers using IQR method.

# these are important columns and data is
# naturally skewed it is important to ignore them.
exclude_cols <- c("AUM", "NAV", "Minimum_Lumpsum")

# identifying the numeric columns
numeric_cols <- names(mf_data)[
  sapply(mf_data, is.numeric) &
    !(names(mf_data) %in% exclude_cols)
]

# Detecting outliers
for (col in numeric_cols) {
  q1 <- quantile(mf_data[[col]], 0.25, na.rm = TRUE)
  q3 <- quantile(mf_data[[col]], 0.75, na.rm = TRUE)
  iqr_val <- IQR(mf_data[[col]], na.rm = TRUE)

  lower <- q1 - 1.5 * iqr_val
  upper <- q3 + 1.5 * iqr_val

  outliers <- mf_data[[col]][
    mf_data[[col]] < lower | mf_data[[col]] > upper
  ]
  if (length(outliers) > 0) {
    cat("column", col, "\n")
    print(outliers)
    cat("\n-----------\n")
  }
}
## column CAGR_3Y 
##  [1] 31.65 31.65 31.65 31.38 31.21 31.21 31.21 44.84 44.84 44.84 31.18 31.18
## [13] 31.18 32.82 32.82 32.83 31.48 31.48 31.48 44.99 44.69 44.69 44.69
## 
## -----------
## column CAGR_5Y 
##  [1] 24.42 24.42 24.42 25.20 27.36 27.36 27.36 26.95 24.87 24.87 24.87
## 
## -----------
## column Absolute_Returns_1Y 
##  [1] -21.68 -15.40 -15.40 -13.91  57.86  57.86  57.86 -13.51 -13.51 -16.44
## [11] -16.44 -13.40 -13.40 -13.22 -13.22  21.34  57.08 -13.28 -13.28 -15.76
## [21] -15.76 -15.76 -14.73 -14.73 -14.73  19.58  19.58  19.58  17.21  17.21
## [31]  17.21  57.92  57.92  57.92 122.54 122.54 122.54  57.19  57.19  57.19
## [41]  58.26  58.26  58.26 -14.15 -14.15  18.44  18.44  18.44  19.13  19.13
## [51]  19.13 123.57 123.02 123.02 123.02 126.78 126.78 126.78
## 
## -----------
## column Absolute_Returns_6M 
##  [1] 22.22 22.22 22.22 21.75 21.90 21.90 21.90 62.20 62.20 62.20 21.81 21.81
## [13] 21.81 22.18 22.18 22.18 62.29 61.90 61.90 61.90 64.46 64.46 64.46
## 
## -----------
## column Volatility 
##  [1] 54.82 54.82 54.82 58.91 51.71 51.71 51.71 56.72 56.72 56.72
## 
## -----------
## column Category_St_Dev 
##  [1] 34.60 34.60 34.60 34.60 34.60 34.60 34.60 60.61 60.61 60.61 34.60 34.60
## [13] 34.60 34.60 34.60 34.60 60.61 60.61 60.61 60.61 60.61 60.61 60.61
## 
## -----------
## column Largecap_Holding 
##   [1]  63.01  63.01  63.01  54.22  54.22  54.22  75.25  75.25  75.25  12.47
##  [11]  12.47  12.47  56.10  56.10  56.10  84.95  84.95  84.95  57.77  57.77
##  [21]  57.77  57.77  57.77  14.89  14.89  14.89  14.89  16.77  16.77  16.77
##  [31]  75.61  75.61  75.61  76.13  76.13  76.13  80.80  80.80  80.80  83.46
##  [41]  83.46  83.46  83.46  45.14  45.14  45.14  45.14  58.08  58.08  58.08
##  [51]  58.08  58.08  58.08  58.08  25.09  25.09  25.09  25.09  84.94  84.94
##  [61]  84.94  87.04  87.04  87.04  41.57  41.57  41.57   9.16   9.16   9.16
##  [71]  66.27  66.27  66.27  25.88  25.88  25.88  74.05  74.05  39.84  39.84
##  [81]  39.84  89.12  89.12  89.12  63.97  63.97  63.97  19.20  19.20  19.20
##  [91]  57.55  57.55  57.55  82.87  82.87  82.87  -0.38  -0.38  -0.38  -0.38
## [101]  47.14  47.14  47.14  23.52  23.52  23.52  38.26  38.26  38.26  71.61
## [111]  71.61  71.61  46.77  46.77  46.77  99.24  99.24  99.24  58.06  58.06
## [121]   5.64   5.64   5.64  59.52  59.52  59.52  41.31  41.31  41.31  54.29
## [131]  54.29  54.29  54.29  54.29  54.29  54.29  54.29  46.41  46.41  46.41
## [141]   8.04   8.04   8.04  67.42  67.42  67.42  38.47  38.47  38.47  58.67
## [151]  58.67  58.67  99.26  46.67  46.67  46.67   4.81   4.81  63.82  63.82
## [161]  63.82  58.52  58.52  58.52  18.57  18.57  18.57  76.49  76.49  76.49
## [171]  46.01  46.01  46.01  14.17  14.17  14.17  65.31  65.31  65.31  54.67
## [181]  54.67  54.67  68.75  68.75  68.75  87.29  87.29  87.29  81.64  81.64
## [191]  81.64  47.30  47.30  47.30  47.30  47.30   6.35   6.35   6.35  15.80
## [201]  15.80  15.80  15.80  15.80  15.80  15.80  73.48  73.48  73.48  40.44
## [211]  40.44  40.44  41.31  41.31  41.31  41.31  41.31  41.31  41.31  99.06
## [221]  99.06  99.06   2.62   2.62   2.62  39.28  39.28  39.28  61.57  61.57
## [231]  61.57  73.40  73.40  73.40  69.18  69.18  69.18  42.25  42.25  42.25
## [241]  20.41  20.41  20.41  75.50  75.50  44.39  44.39  44.39  43.56  43.56
## [251]  43.56  71.77  71.77  71.77  18.71  18.71  18.71  64.31  64.31  64.31
## [261]  64.31  64.31  64.31  64.31   6.58   6.58   6.58  46.59  46.59  46.59
## [271]  56.86  56.86  56.86  88.54  88.54  88.54  45.32  45.32  45.32   8.86
## [281]   8.86   8.86  27.25  27.25  27.25  15.17  15.17  15.17  60.73  60.73
## [291]  60.73  77.11  77.11  77.11  81.55  81.55  99.24  99.24  99.24  13.01
## [301]  13.01  13.01  65.93  65.93  65.93  54.03  54.03  54.03  42.99  42.99
## [311]  42.99  62.57  62.57  62.57  85.66  85.66  85.66  20.82  20.82  20.82
## [321]  20.82  15.84  15.84  15.84  76.00  76.00  76.00  56.83  56.83  56.83
## [331]  59.34  59.34  59.34  63.36  63.36  63.36   7.51   7.51   7.51   7.51
## [341]   7.51   7.51   7.51  45.96  45.96  45.96  63.20  63.20  63.20  10.28
## [351]  10.28  10.28  55.92  55.92  55.92  45.35  45.35  45.35  47.14  47.14
## [361]  47.14  57.30  57.30  57.30  35.78  35.78  35.78  46.07  46.07  46.07
## [371]  65.65  65.65  65.65  -0.48  -0.48  -0.48  40.08  40.08  40.08  61.66
## [381]  61.66  61.66  71.57  71.57  71.57  35.42  35.42  35.42 100.04  72.01
## [391]  72.01  69.96  69.96  69.96  69.96  69.96  73.39  41.65  41.65  41.65
## [401]  41.93  41.93  41.93  41.93  41.93  41.93  20.97  20.97  20.97  64.91
## [411]  64.91  64.91  78.21  78.21  78.21  11.54  11.54  11.54  44.29  44.29
## [421]  44.29  44.29  43.05  43.05  43.05  53.99  53.99  53.99  35.83  35.83
## [431]  35.83  45.38  45.38  45.38  77.90  77.90  77.90  63.03  63.03  63.03
## [441]  63.03  81.73  81.73  81.73  43.87  43.87  43.87  68.18  68.18  68.18
## [451]  65.51  65.51  65.51  11.26  11.26  11.26  57.39  57.39  57.39  23.17
## [461]  23.17  23.17  30.17  30.17  30.17  78.53  78.53  78.53  53.09  53.09
## [471]  53.09  67.96  67.96  62.14  62.14  62.14  62.14  44.89  44.89  44.89
## [481]  44.89  44.89  74.43  74.43  74.43  37.07  37.07  37.07  43.83  43.83
## [491]  43.83  46.03  46.03  20.05  65.93  65.93  65.93  73.45  73.45  73.45
## [501]  56.84  56.84  56.84  75.83  75.83  72.07  72.07  61.86  61.86  61.86
## [511]  44.38  44.38  44.38  78.18  63.75  63.75  63.75  64.01  64.01  64.01
## [521]  43.23  43.23  43.23  10.28  10.28  10.28  64.81  64.81  62.05  62.05
## [531]  62.05  62.05  60.86  60.86  60.86  54.05  54.05  54.05  65.69  65.69
## [541]  65.69  38.08  38.08  38.08  45.46  45.46  45.46  57.28  57.28  57.28
## [551]  57.28  57.28  46.59  46.59  46.59  70.50   1.29   1.29   1.29  76.90
## [561]  76.90  76.90  16.21  16.21  16.21   2.63   2.63   2.63   2.63  43.82
## [571]  43.82  43.82  39.20  39.20  39.20  39.20  39.20  47.14  47.14  47.14
## [581]  59.15  59.15  59.15  61.73  61.73  61.73  59.55  59.55  59.55  64.26
## [591]  64.26  64.26  43.26  43.26  43.26  23.48  23.48  42.42  42.42  42.42
## [601]  44.93  44.93  44.93  24.43  24.43  24.43  38.90  38.90  38.90   4.41
## [611]   4.41   4.41  42.52  42.52  42.52  45.77  45.77  45.77  39.55  39.55
## [621]  39.55  10.70  10.70  10.70  57.36  57.36  56.39  56.39  56.39  72.20
## [631]  72.20  72.20  68.22  68.22  68.22  14.23  14.23  14.23  44.62  44.62
## [641]  44.62  20.97  20.97  20.97  20.45  20.45
## 
## -----------
## column Midcap_Holding 
##   [1]  2.17  2.17  2.17  7.47  7.47  7.47  7.89  7.89  7.89 63.77 63.77 63.77
##  [13] 14.01 14.01 14.01  7.01  7.01  7.01 14.89 14.89 14.89  5.92  5.92  5.92
##  [25]  5.92  5.92 68.90 68.90 68.90  4.56  4.56  4.56 12.54 12.54 12.54 13.05
##  [37] 13.05 13.05 13.05 27.31 27.31 27.31 27.31  6.28  6.28  6.28  6.28  6.28
##  [49]  6.28  6.28 20.83 20.83 20.83 61.39 61.39 61.39 61.39 14.60 14.60 14.60
##  [61] 40.60 40.60 40.60  9.66  9.66  9.66 12.87 12.87 12.87 11.49 11.49 11.49
##  [73] 35.57 35.57 35.57 12.06 12.06 12.06  2.82  2.82  2.82 66.88 66.88 66.88
##  [85] 20.81 20.81 19.45 19.45 19.45  1.22  1.22  1.22 20.38 20.38 20.38 64.77
##  [97] 64.77 64.77 33.02 33.02 33.02  6.94  6.94  6.94 -0.06 -0.06 -0.06 -0.06
## [109] 37.29 37.29 37.29  4.72  4.72  4.72 41.59 41.59 41.59  7.66  7.66  7.66
## [121]  0.71  0.71  0.71 25.24 25.24 22.69 22.69 22.69 42.60 42.60 42.60  9.55
## [133]  9.55  9.55  9.55  9.55  9.55  9.55  9.55 21.90 21.90 21.90  5.58  5.58
## [145]  5.58 67.60 67.60 67.60 12.57 12.57 12.57 22.00 22.00 22.00 25.19 25.19
## [157] 25.19  0.71 19.35 19.35 19.35 13.71 13.71  8.43  8.43  8.43 23.05 23.05
## [169] 23.05 63.67 63.67 63.67 11.16 11.16 11.16 27.24 27.24 27.24 69.30 69.30
## [181] 69.30  1.59  1.59  1.59 32.05 32.05 32.05  9.91  9.91  9.91  9.87  9.87
## [193]  9.87  6.56  6.56  6.56  7.02  7.02  7.02 19.80 19.80 19.80 12.98 12.98
## [205] 12.98 12.98 12.98 12.98 12.98 10.36 10.36 10.36 30.44 30.44 30.44 32.83
## [217] 32.83 32.83 22.23 22.23 22.23 22.23 22.23 22.23 22.23  0.71  0.71  0.71
## [229] 23.79 23.79 23.79 35.20 35.20 35.20 19.88 19.88 19.88 24.15 24.15 24.15
## [241] 12.51 12.51 12.51 31.46 31.46 31.46 65.33 65.33 65.33  7.07  7.07 10.16
## [253] 10.16 10.16 21.46 21.46 21.46 61.60 61.60 61.60 12.25 12.25 12.25 12.25
## [265] 12.25 12.25 12.25  8.37  8.37  8.37 28.41 28.41 28.41 20.67 20.67 20.67
## [277]  7.17  7.17  7.17 12.16 12.16 12.16 22.51 22.51 22.51 49.58 49.58 49.58
## [289] 64.78 64.78 64.78 23.01 23.01 23.01 12.06 12.06 12.06  7.21  7.21  9.57
## [301]  9.57  9.57  0.71  0.71  0.71 64.71 64.71 64.71 12.62 12.62 12.62 13.75
## [313] 13.75 13.75 12.51 12.51 12.51  8.82  8.82  8.82 64.26 64.26 64.26 64.26
## [325] 57.57 57.57 57.57 13.06 13.06 13.06  4.10  4.10  4.10 13.34 13.34 13.34
## [337]  3.63  3.63  3.63  3.63  3.63  3.63  3.63 15.36 15.36 15.36 20.19 20.19
## [349] 20.19 19.89 19.89 19.89 11.51 11.51 11.51 19.37 19.37 19.37 13.31 13.31
## [361] 13.31 11.72 11.72 11.72 37.13 37.13 37.13 24.78 24.78 24.78 20.41 20.41
## [373] 20.41 -0.06 -0.06 -0.06 19.17 19.17 19.17 14.72 14.72 14.72 27.28 27.28
## [385] 27.28 10.53 10.53 10.53 10.53 10.53 26.55 27.53 27.53 27.53 19.99 19.99
## [397] 19.99 19.99 19.99 19.99  5.07  5.07  5.07 19.31 19.31 19.31 21.69 21.69
## [409] 21.69  3.21  3.21  3.21 29.35 29.35 29.35 29.35 33.61 33.61 33.61  7.54
## [421]  7.54  7.54 19.51 19.51 19.51 24.78 24.78 24.78 20.94 20.94 20.94 20.94
## [433] 12.59 12.59 12.59 24.95 24.95 24.95 11.36 11.36 11.36 19.29 19.29 19.29
## [445] 69.80 69.80 69.80 11.24 11.24 11.24 64.50 64.50 64.50 20.58 20.58 20.58
## [457]  6.18  6.18  6.18  9.29  9.29  9.29 23.13 23.13 23.13 23.13 15.18 15.18
## [469] 33.72 33.72 33.72 33.72 13.45 13.45 13.45 34.11 34.11 34.11 10.84 10.84
## [481] 10.84 27.11 27.11 27.11 13.15 13.15 13.15 20.10 20.10  2.35 24.17 24.17
## [493] 24.17 10.23 10.23 10.23 14.36 14.36 14.36 12.15 12.15 14.85 14.85 11.37
## [505] 11.37 11.37 15.45 15.45 15.45 25.13 25.13 25.13 14.00 14.00 14.00 21.69
## [517] 20.39 20.39 20.39 22.73 22.73 22.73 22.22 22.22 22.22 64.74 64.74 64.74
## [529] 26.34 26.34 20.87 20.87 20.87 20.87  2.23  2.23  2.23  3.41  3.41  3.41
## [541] 11.95 11.95 11.95 23.60 23.60 23.60 45.62 45.62 45.62  7.26  7.26  7.26
## [553]  7.26  7.26 33.88 33.88 33.88  2.26  9.25  9.25  9.25 62.91 62.91 62.91
## [565] 33.35 33.35 33.35 33.35 13.09 13.09 13.09 20.31 20.31 20.31 20.31 20.31
## [577] 22.82 22.82 22.82 24.79 24.79 24.79 21.51 21.51 21.51 11.87 11.87 11.87
## [589] 27.76 27.76 27.76 10.35 10.35  8.23  8.23  8.23 25.90 25.90 25.90  3.30
## [601]  3.30  3.30 28.86 28.86 28.86 21.53 21.53 21.53 32.00 32.00 32.00 61.02
## [613] 61.02 61.02 26.09 26.09 26.09 14.02 14.02 29.56 29.56 29.56 19.44 19.44
## [625]  9.90  9.90  9.90 35.19 35.19 35.19  8.09  8.09  8.09 69.12 69.12 69.12
## [637] 20.32 20.32 20.32 65.94 65.94 65.94 34.62 34.62
## 
## -----------
## column Equity_Holding 
##   [1]  78.36  78.36  78.36  69.36  69.36  69.36  90.40  90.40  90.40  69.27
##  [11]  69.27  69.27  74.02  74.02  74.02  92.16  92.16  92.16  67.14  67.14
##  [21]  67.14  66.40  66.40  66.40  66.40  66.40  95.91  95.91  95.91  95.91
##  [31]  99.07  99.07  99.07  85.68  85.68  85.68  98.21  98.21  98.21  99.31
##  [41]  99.31  99.31  99.31  99.40  99.40  99.40  99.40  70.56  70.56  70.56
##  [51]  70.56  70.56  70.56  70.56  83.37  83.37  83.37  98.68  98.68  98.68
##  [61]  98.68  69.91  69.91  69.91  83.18  83.18  83.18  98.90  98.90  98.90
##  [71]  99.20  99.20  99.20  65.94  65.94  65.94  98.53  98.53  98.53  93.15
##  [81]  93.15  93.15  91.93  91.93  91.93  92.48  92.48  92.48  86.01  86.01
##  [91]  86.01  92.76  92.76  92.76  96.82  96.82  61.92  61.92  61.92  90.34
## [101]  90.34  90.34  97.79  97.79  97.79  88.43  88.43  88.43  97.68  97.68
## [111]  97.68  96.85  96.85  96.85  -0.27  -0.27  -0.27  -0.27  98.67  98.67
## [121]  98.67  89.84  89.84  89.84  66.56  66.56  66.56  99.95  99.95  99.95
## [131]  99.18  99.18  89.05  89.05  89.05  97.69  97.69  97.69  97.73  97.73
## [141]  97.73  65.13  65.13  65.13  65.13  65.13  65.13  65.13  65.13  97.09
## [151]  97.09  97.09  70.56  70.56  70.56  93.80  93.80  93.80  93.71  93.71
## [161]  93.71  92.19  92.19  92.19  96.88  96.88  96.88  99.97  69.67  69.67
## [171]  69.67  89.74  89.74  97.34  97.34  97.34  96.75  96.75  96.75  98.31
## [181]  98.31  98.31  98.06  98.06  98.06  67.42  67.42  67.42  96.93  96.93
## [191]  96.93  67.33  67.33  67.33  97.48  97.48  97.48  97.17  97.17  97.17
## [201]  97.92  97.92  97.92  66.93  66.93  66.93  66.93  66.93  98.31  98.31
## [211]  98.31  40.85  40.85  40.85  40.85  40.85  40.85  40.85  91.27  91.27
## [221]  91.27  97.84  97.84  97.84  93.66  93.66  93.66  67.30  67.30  67.30
## [231]  67.30  67.30  67.30  67.30  99.77  99.77  99.77  96.54  96.54  96.54
## [241]  98.15  98.15  98.15  97.03  97.03  97.03  97.55  97.55  97.55  99.29
## [251]  99.29  99.29  93.45  93.45  93.45  97.89  97.89  89.98  89.98  89.98
## [261]  58.21  58.21  58.21  96.39  96.39  96.39  96.71  96.71  96.71  81.77
## [271]  81.77  81.77  81.77  81.77  81.77  81.77  93.35  93.35  93.35  78.84
## [281]  78.84  78.84  88.78  88.78  88.78  97.99  97.99  97.99  68.00  68.00
## [291]  68.00  97.27  97.27  97.27  97.94  97.94  97.94  95.83  95.83  95.83
## [301]  96.67  96.67  96.67  93.40  93.40  66.67  66.67  66.67  99.94  99.94
## [311]  99.94  98.33  98.33  98.33  78.55  78.55  78.55  74.25  74.25  74.25
## [321]  65.25  65.25  65.25  97.80  97.80  97.80  97.22  97.22  97.22  97.22
## [331]  99.35  99.35  99.35  97.72  97.72  97.72  92.48  92.48  92.48  99.01
## [341]  99.01  99.01  91.57  91.57  91.57  93.37  93.37  93.37  22.69  22.69
## [351]  22.69  22.69  22.69  22.69  22.69  66.18  66.18  66.18  99.02  99.02
## [361]  99.02  98.91  98.91  98.91  70.10  70.10  70.10  65.61  65.61  65.61
## [371]  76.04  76.04  76.04  96.83  96.83  96.83  98.92  98.92  98.92  -0.55
## [381]  -0.55  -0.55  66.41  66.41  66.41  97.22  97.22  97.22  92.25  92.25
## [391]  92.25 100.04  97.40  97.40  90.53  90.53  90.53  90.53  90.53  99.94
## [401]  75.88  75.88  75.88  68.78  68.78  68.78  68.78  68.78  68.78  38.15
## [411]  38.15  38.15  96.94  96.94  96.94  97.96  97.96  97.96  99.90  99.90
## [421]  99.90  92.20  92.20  92.20  99.70  99.70  99.70  99.70  96.86  96.86
## [431]  96.86  68.77  68.77  68.77  91.69  91.69  91.69  96.29  96.29  96.29
## [441]  96.74  96.74  96.74  97.80  97.80  97.80  97.80  97.67  97.67  97.67
## [451]  96.01  96.01  96.01  96.17  96.17  96.17  99.60  99.60  99.60  89.16
## [461]  89.16  89.16  75.92  75.92  75.92  98.31  98.31  98.31  82.71  82.71
## [471]  82.71  97.70  97.70  97.70  89.41  89.41  89.41  86.10  86.10  86.10
## [481]  99.22  99.22  99.22  99.22  97.71  97.71  97.71  97.71  65.07  65.07
## [491]  65.07  67.96  67.96  67.96  67.96  67.96  97.15  97.15  97.15  97.98
## [501]  97.98  97.98  67.72  67.72  67.72  97.21  97.21  26.22  95.84  95.84
## [511]  95.84  91.04  91.04  91.04  97.28  97.28  97.28  99.60  99.60  81.29
## [521]  81.29  81.29  64.85  64.85  64.85  95.99  95.99  95.99  99.87  98.24
## [531]  98.24  98.24  99.23  99.23  99.23  98.20  98.20  98.20  97.77  97.77
## [541]  96.31  96.31  96.31  96.31  66.63  66.63  66.63  63.48  63.48  63.48
## [551]  98.84  98.84  98.84  98.17  98.17  98.17  65.71  65.71  65.71  65.71
## [561]  65.71  87.87  97.06  97.06  97.06  98.82  98.82  98.82  93.47  93.47
## [571]  93.47  97.88  97.88  97.88  97.88  73.30  73.30  73.30  73.30  73.30
## [581]  97.84  97.84  97.84  98.01  98.01  98.01  98.35  98.35  98.35  96.45
## [591]  96.45  96.45  97.39  97.39  97.39  97.58  97.58  97.58  37.56  37.56
## [601]  97.92  97.92  97.92  64.88  64.88  64.88  93.58  93.58  93.58  45.35
## [611]  45.35  45.35  85.98  85.98  85.98  96.04  96.04  96.04  73.30  73.30
## [621]  73.30  99.41  99.41  99.41  87.19  87.19  87.19  96.97  96.97  96.97
## [631]  90.57  90.57  98.40  98.40  98.40  98.23  98.23  98.23  97.12  97.12
## [641]  97.12  99.19  99.19  99.19  96.36  96.36  96.36  97.26  97.26  97.26
## [651]  99.03  99.03
## 
## -----------
## column Smallcap_Holding 
##   [1]  2.67  2.67  2.67  7.67  7.67  7.67  7.26  7.26  7.26 17.99 17.99 17.99
##  [13]  7.51  7.51  7.51  1.00  1.00  1.00  0.20  0.20  0.20  2.11  2.11  2.11
##  [25]  2.71  2.71  2.71  2.71  2.71 65.09 65.09 65.09 65.09 13.40 13.40 13.40
##  [37]  5.51  5.51  5.51  3.50  3.50  3.50  1.95  1.95  1.95  2.80  2.80  2.80
##  [49]  2.80 26.96 26.96 26.96 26.96  6.21  6.21  6.21  6.21  6.21  6.21  6.21
##  [61]  9.27  9.27  9.27  3.06  3.06  3.06  4.41  4.41  4.41  9.69  9.69  9.69
##  [73]  4.60  4.60  4.60  3.67  3.67  3.67 15.42 15.42 15.42 70.71 70.71 70.71
##  [85] 10.28 10.28 10.28 83.20 83.20 83.20  1.95  1.95  2.63  2.63  2.63 13.45
##  [97] 13.45 13.45  4.46  4.46  4.46  7.11  7.11  7.11  7.04  7.04  7.04  0.18
## [109]  0.18  0.18  0.18 14.24 14.24 14.24 66.68 66.68 66.68 15.18 15.18 15.18
## [121] 10.56 10.56 10.56  2.97  2.97  2.97 15.88 15.88 66.34 66.34 66.34 15.48
## [133] 15.48 15.48 13.82 13.82 13.82  1.29  1.29  1.29  1.29  1.29  1.29  1.29
## [145]  1.29 28.77 28.77 28.77 16.89 16.89 16.89 18.16 18.16 18.16 11.82 11.82
## [157] 11.82 31.73 31.73 31.73 13.01 13.01 13.01  3.65  3.65  3.65 71.22 71.22
## [169] 25.09 25.09 25.09 15.18 15.18 15.18 13.55 13.55 13.55  7.97  7.97  7.97
## [181] 25.06 25.06 25.06 14.58 14.58 14.58  0.52  0.52  0.52 10.21 10.21 10.21
## [193]  6.92  6.92  6.92 13.12 13.12 13.12  0.01  0.01  0.01 88.10 88.10 88.10
## [205]  9.26  9.26  9.26  3.01  3.01  3.01  3.01  3.01 72.15 72.15 72.15 12.07
## [217] 12.07 12.07 12.07 12.07 12.07 12.07  7.43  7.43  7.43 26.96 26.96 26.96
## [229]  4.10  4.10  4.10  3.76  3.76  3.76  3.76  3.76  3.76  3.76 70.12 70.12
## [241] 70.12 23.68 23.68 23.68 15.58 15.58 15.58 17.60 17.60 17.60 19.74 19.74
## [253] 19.74 29.76 29.76 29.76  9.04  9.04  9.04 15.32 15.32 22.99 22.99 22.99
## [265]  4.48  4.48  4.48  3.16  3.16  3.16 16.39 16.39 16.39  5.21  5.21  5.21
## [277]  5.21  5.21  5.21  5.21 78.40 78.40 78.40  3.84  3.84  3.84 11.25 11.25
## [289] 11.25  2.28  2.28  2.28 10.53 10.53 10.53 65.90 65.90 65.90 21.11 21.11
## [301] 21.11 15.87 15.87 15.87 11.93 11.93 11.93  7.50  7.50  7.50  4.64  4.64
## [313]  8.90  8.90  8.90 20.62 20.62 20.62  6.46  6.46  6.46  4.69  4.69  4.69
## [325] 19.75 19.75 19.75  3.32  3.32  3.32 12.13 12.13 12.13 12.13 25.95 25.95
## [337] 25.95  8.67  8.67  8.67 88.38 88.38 88.38 25.11 25.11 25.11 16.41 16.41
## [349] 16.41 16.67 16.67 16.67 11.55 11.55 11.55 11.55 11.55 11.55 11.55  4.87
## [361]  4.87  4.87 15.64 15.64 15.64 68.74 68.74 68.74  2.67  2.67  2.67 29.28
## [373] 29.28 29.28  5.16  5.16  5.16  7.02  7.02  7.02 23.92 23.92 23.92 23.36
## [385] 23.36 23.36 12.86 12.86 12.86 -0.01 -0.01 -0.01  9.12  9.12  9.12 16.39
## [397] 16.39 16.39  5.96  5.96  5.96 31.70 31.70 31.70  8.85  8.85 10.04 10.04
## [409] 10.04 10.04 10.04  6.71  6.71  6.71  6.87  6.87  6.87  6.87  6.87  6.87
## [421]  3.32  3.32  3.32 12.73 12.73 12.73 28.08 28.08 28.08 77.45 77.45 77.45
## [433] 26.05 26.05 26.05 26.05 20.19 20.19 20.19  7.23  7.23  7.23 36.36 36.36
## [445] 36.36 26.13 26.13 26.13  1.36  1.36  1.36 13.83 13.83 13.83 13.83  1.57
## [457]  1.57  1.57 25.01 25.01 25.01 16.63 16.63 16.63 14.79 14.79 14.79  8.10
## [469]  8.10  8.10  7.29  7.29  7.29 10.64 10.64 10.64 33.63 33.63 33.63 28.98
## [481] 28.98 28.98  4.70  4.70  4.70 23.71 23.71 23.71 23.66 23.66 23.66 23.66
## [493] 11.48 11.48  1.85  1.85  1.85  1.85  1.49  1.49  1.49  4.87  4.87  4.87
## [505]  4.87  4.87 11.38 11.38 11.38 10.41 10.41 10.41 33.81 33.81 33.81 10.74
## [517] 10.74 10.74 31.08 31.08  3.81  5.74  5.74  5.74  7.36  7.36  7.36 26.08
## [529] 26.08 26.08  7.30  7.30 12.68 12.68  8.06  8.06  8.06  5.02  5.02  5.02
## [541] 21.66 21.66 21.66 28.58 28.58 28.58 14.11 14.11 14.11 12.49 12.49 12.49
## [553] 28.50 28.50 28.50 23.18 23.18 23.18  6.62  6.62 13.39 13.39 13.39 13.39
## [565]  3.55  3.55  3.55  6.02  6.02  6.02 21.20 21.20 21.20 32.77 32.77 32.77
## [577]  7.08  7.08  7.08  1.16  1.16  1.16  1.16  1.16 13.70 13.70 13.70 15.12
## [589] 86.52 86.52 86.52  5.96  5.96  5.96 14.34 14.34 14.34 61.91 61.91 61.91
## [601] 61.91 38.19 38.19 38.19 13.79 13.79 13.79 13.79 13.79 27.88 27.88 27.88
## [613] 23.23 23.23 23.23 11.83 11.83 11.83 15.39 15.39 15.39 21.26 21.26 21.26
## [625] 26.56 26.56 26.56  3.72  3.72 37.72 37.72 37.72 11.72 11.72 11.72 43.25
## [637] 43.25 43.25  3.15  3.15  3.15 18.11 18.11 18.11 73.75 73.75 73.75 24.67
## [649] 24.67 24.67  5.99  5.99  5.99 27.85 27.85 27.85 15.48 15.48 15.48 21.39
## [661] 21.39 21.39 19.19 19.19 21.58 21.58 21.58 25.85 25.85 25.85 17.89 17.89
## [673] 17.89 75.53 75.53 16.13 16.13 16.13 10.39 10.39 10.39 22.88 22.88 22.88
## [685] 13.01 13.01 13.01 19.77 19.77 19.77  8.07  8.07  8.07 43.96 43.96
## 
## -----------
## column Debt_Holding 
##   [1] 13.94 13.94 13.94 25.78 25.78 25.78  0.51  0.51  0.51 13.31 13.31 13.31
##  [13] 18.82 18.82 18.82  2.80  2.80  2.80  5.62  5.62  5.62 19.97 19.97 19.97
##  [25] 19.97 19.97  1.46  1.46  1.46  1.46  1.78  1.78  1.78  0.75  0.75  0.75
##  [37] 21.38 21.38 21.38 21.38 21.38 21.38 21.38  8.12  8.12  8.12  1.03  1.03
##  [49]  1.03 26.01 26.01 26.01  0.48  0.48  0.48  0.73  0.73  0.73  0.97  0.97
##  [61]  0.97  2.13  2.13  2.13 11.97 11.97 11.97  0.12  0.12  0.12  0.61  0.61
##  [73]  0.61 10.38 10.38 10.38 10.38  1.21  1.21  1.21  0.83  0.83  0.83  0.19
##  [85]  0.19  0.19 10.15 10.15 10.15  1.73  1.73  1.73 14.84 14.84 14.84 14.84
##  [97] 14.84 14.84 14.84 14.84 23.50 23.50 23.50  0.77  0.77  0.77  1.61  1.61
## [109]  1.61  0.53  0.53  0.53  0.22  0.22  0.22 17.08 17.08 17.08  0.26  0.26
## [121]  0.26  0.25  0.25  0.25 12.12 12.12 12.12 21.47 21.47 21.47  0.31  0.31
## [133]  0.31  8.13  8.13  8.13  8.13  8.13 35.87 35.87 35.87 35.87 35.87 35.87
## [145] 35.87  1.71  1.71  1.71  0.45  0.45  0.45 13.90 13.90 13.90 13.90 13.90
## [157] 13.90 13.90  1.17  1.17  1.17  0.37  0.37  0.37 16.19 16.19 16.19  7.16
## [169]  7.16  7.16  7.16  7.16  7.16  7.16  0.39  0.39  0.39  0.40  0.40  0.40
## [181] 10.52 10.52 10.52  0.20  0.20  0.20  0.20  0.20  0.20 27.86 27.86 27.86
## [193] 20.73 20.73 20.73 12.36 12.36 12.36  0.21  0.21  0.21  0.15  0.15  0.15
## [205]  0.15  0.04  0.04  0.04 69.48 69.48 69.48 69.48 69.48 69.48 69.48 11.14
## [217] 11.14 11.14  0.26  0.26  0.26 16.89 16.89 16.89  0.16  0.16  0.16 26.99
## [229] 26.99 26.99 21.59 21.59 21.59  0.37  0.37  0.37 17.89 17.89 17.89 19.48
## [241] 19.48 19.48  0.05  0.05  0.05  0.61  0.61  0.61 18.94 18.94 18.94  1.82
## [253]  1.82  1.82  1.82  1.82  1.82 11.16 11.16 11.16  0.29  0.29  0.29  1.66
## [265]  1.66  1.66 21.55 21.55 21.55  1.12  1.12  1.12  0.13  0.13  0.13  0.99
## [277]  0.99  0.99  4.39  4.39  4.39 19.68 19.68 19.68  5.35  5.35  5.35  2.86
## [289]  2.86  2.86 10.79 10.79 10.79 14.83 14.83 14.83 14.83 14.83  0.03  0.03
## [301]  0.03 26.37 26.37 26.37 29.41  0.07  0.07  0.07  0.64  0.64  0.64  0.23
## [313]  0.23  4.75  4.75  4.75 10.48 10.48 10.48  0.30  0.30  0.30  0.11  0.11
## [325]  0.11  0.11 21.57 21.57 21.57 11.23 11.23 11.23  0.33  0.33  0.33 23.34
## [337] 23.34 23.34 23.34 23.34  8.66  0.09  0.09  0.09 22.31 22.31 22.31 22.31
## [349] 22.31 59.37 59.37  0.09  0.09  0.09  9.63  9.63  9.63  0.10  0.10  0.10
## [361] 10.56 10.56 10.56 19.23 19.23 19.23  3.62  3.62  3.62  1.17  1.17  1.17
## [373]  0.07  0.07  0.07
## 
## -----------
## column Average_Maturity 
##   [1]  7.60  7.60  7.60  5.56  5.56  5.56  3.39  3.39  3.39  6.73  6.73  6.73
##  [13]  6.73  6.73  8.19  8.19  8.19  8.19  8.19  8.19  8.19  3.88  3.88  3.88
##  [25]  6.00  6.00  6.00  6.00  6.00  7.50  7.50  7.50  7.50  7.50  7.01  7.01
##  [37]  7.01  7.01  9.67  9.67  9.67  4.53  4.53  4.53  2.92  2.92  2.92  2.92
##  [49]  2.92  3.14  3.14  3.14 12.90 12.90 12.90  3.38  3.38  3.38  4.16  4.16
##  [61]  4.16  3.44  3.44  3.44  2.97  2.97  2.97  2.97  2.97  2.97  2.97  4.12
##  [73]  4.12  4.12  4.12  4.12  3.49  3.49  3.49  3.49  3.49  3.49  3.49 14.03
##  [85] 14.03 14.03 14.03 14.03 14.03 14.03 14.03 14.03  2.97  2.97  2.97  2.97
##  [97]  2.97  2.97 11.13 11.13 11.13 13.66 13.66 13.66  3.93  3.93  3.93 13.82
## [109] 13.82 13.82 20.72 20.72 20.72  3.81  3.81  3.81  3.81  3.81  3.81  3.81
## [121]  2.99  2.99  2.99  2.99  2.99  3.59  3.59  3.59  7.30  7.30  7.30  5.13
## [133]  5.13  5.13  5.94  5.94  5.94  3.29  3.29  3.29  4.52  4.52  4.52  4.52
## [145]  4.52  3.42  3.42  3.42  3.42  3.42  3.42  3.42  3.42  3.42  3.42  3.42
## [157] 13.69 13.69 13.69  3.69  3.69  3.69  3.69  3.69  3.69  3.69  3.69  5.89
## [169]  5.89  5.89  3.00  3.00  3.00  3.00  3.00  3.00  3.00 22.05 22.05 22.05
## [181] 22.05 22.05 22.05 22.05 22.05 22.05 22.05 22.05  4.46  4.46  4.46  3.07
## [193]  3.07  3.07  3.07  3.07  3.07  6.45  6.45  6.45  9.26  9.26  9.26  3.77
## [205]  3.77  3.77  3.77  3.77  4.05  4.05  4.05  9.40  9.40  9.40  4.03  4.03
## [217]  4.03  4.03  4.03  4.03  4.03  4.03  4.03  3.65  3.65  3.65  3.43  3.43
## [229]  3.43  6.48  6.48  6.48  4.34  4.34  4.34  4.13  4.13  4.13  4.13  4.13
## [241]  4.13  4.13  4.13  4.13  4.04  4.04  4.04  4.11  4.11  4.11  4.11  4.11
## [253]  4.11  4.11  4.11  4.11  4.11  9.53  9.53  3.15  3.15  3.15  5.67  5.67
## [265]  5.67  2.91  2.91  2.91  2.91  2.91
## 
## -----------
## column Sortino_Ratio 
##   [1] 1.05 1.06 1.37 1.37 1.03 1.03 1.03 1.04 1.04 1.04 1.04 0.89 1.04 1.01 1.01
##  [16] 1.04 0.90 0.90 0.90 0.89 0.89 0.91 0.94 0.94 0.93 0.94 0.93 0.94 0.94 0.93
##  [31] 0.99 0.99 0.99 0.99 0.99 0.99 0.99 1.05 1.06 1.01 0.90 0.90 0.89 0.90 0.90
##  [46] 0.90 0.90 0.90 0.90 0.90 0.90 0.90 0.90 0.89 0.90 0.90 1.03 1.03 1.05 1.02
##  [61] 1.02 1.02 1.02 1.02 1.02 1.02 1.02 1.02 1.02 1.02 1.06 1.07 1.02 1.02 1.00
##  [76] 1.00 1.00 1.00 1.00 1.00 0.90 0.90 0.89 0.95 0.95 0.95 0.95 1.18 1.18 1.18
##  [91] 1.20 1.18 1.18 0.95 0.95 1.04 1.05 0.99 0.99 0.99 1.01 1.01 1.01 1.01 1.01
## [106] 1.01 1.20 1.21 1.21 1.00 0.99 1.00 1.00 1.02 1.02 1.02 0.94 0.94 1.06 1.08
## [121] 1.08 1.06 1.06 0.95 0.95 0.96 0.95 1.00 1.00 1.00 0.89
## 
## -----------
## column Sharpe_Ratio 
##  [1] -28.62 -28.62 -28.63 -28.62   7.12   7.19   7.14   7.14   7.08   7.15
## [11]   7.14   7.14   7.39   7.39   7.38   7.39   7.38   7.39   7.38   7.38
## [21]   7.28   7.33   7.19   7.19   7.19   7.19 -28.07 -28.42   7.97   7.97
## [31]   7.97   8.00   7.97   7.97  -5.69 -23.18 -23.91 -23.97 -23.97  -5.69
## [41] -27.26 -27.26 -20.87 -26.98 -27.25 -12.94 -27.12  -6.06 -24.00 -24.03
## [51] -14.85 -24.02 -24.00 -24.03   7.57   7.55   7.55   7.29   7.30   7.29
## [61]   7.29 -26.59 -26.59 -26.59 -26.59   7.13   7.21   7.21   7.13   7.13
## [71]   7.07   7.06   7.07 -26.64 -25.28 -10.86 -26.64 -25.28 -26.64
## 
## -----------
## column Average_YTM 
##   [1] 6.18 6.18 6.18 6.18 6.18 6.18 6.21 6.21 6.21 7.26 7.26 7.26 7.26 7.26 7.24
##  [16] 7.24 7.24 7.24 7.24 5.60 5.60 5.60 6.06 6.06 6.06 6.06 6.06 6.06 6.06 6.06
##  [31] 6.06 6.06 6.06 6.06 4.97 4.97 4.97 4.97 6.10 6.10 6.10 6.10 6.03 6.03 6.03
##  [46] 6.03 6.03 6.03 6.03 6.03 6.03 6.03 6.03 6.03 6.03 6.03 6.03 7.29 7.29 7.29
##  [61] 7.41 7.41 7.41 5.73 5.73 5.73 6.18 6.18 6.18 6.18 6.18 6.18 6.01 6.01 6.01
##  [76] 6.01 6.01 6.01 6.01 6.01 6.01 7.29 7.29 7.29 7.25 7.25 7.25 6.12 6.12 6.12
##  [91] 6.12 6.12 6.12 7.74 7.74 7.74 7.74 7.74 7.74 7.74 7.74 7.74 7.26 7.26 7.26
## [106] 6.25 6.25 6.25 6.25 6.25 6.25 6.25 5.88 5.88 5.88 5.88 5.88 5.88 5.88 6.25
## [121] 6.25 6.25 6.25 5.04 5.04 5.04 5.04 5.04 5.04 5.04 5.04 5.05 5.05 7.27 7.27
## [136] 7.27 7.27 7.27 7.27 7.27 6.04 6.04 6.04 6.04 6.04 6.04 6.04 6.04 6.04 6.04
## [151] 6.04 6.04 6.04 5.04 5.04 5.04 5.04 5.04 5.04 7.39 7.39 7.39 7.56 7.56 7.56
## [166] 7.56 7.56 7.56 7.56 6.15 6.15 6.15 7.29 7.29 7.29 7.29 7.29 4.99 4.99 4.99
## [181] 4.99 4.99 6.14 5.97 5.97 5.97 7.28 7.28 7.28 7.28 7.28 7.28 7.28 7.40 7.40
## [196] 7.40 7.40 7.40 7.40 7.40 7.40 7.40 7.40 7.40 6.20 6.20 6.15 6.15 6.15 6.15
## [211] 6.15 6.15 6.15 6.05 6.05 6.05 6.05 6.05 6.05 6.05 6.05 7.46 7.46 7.46 5.06
## [226] 5.06 5.98 5.98 5.98 8.31 8.31 8.31 8.31 8.31 8.03 8.03 8.03 5.05 5.05 5.05
## [241] 5.05 5.05 5.05 5.05 6.15 7.58 7.58 7.58 6.07 6.07 6.07 5.88 5.88 5.88 5.61
## [256] 5.61 5.61 8.41 8.41 8.41 5.85 5.85 5.85 7.33 7.33 7.33 7.33 7.33 6.09 6.09
## [271] 6.09 6.09 6.09 6.09 6.09 6.09 7.99 7.99 7.99 4.95 4.95 4.95 4.95 5.92 5.92
## [286] 5.92 5.92 5.92 7.32 7.32 7.32 5.10 5.10 5.10 5.01 5.01 5.01 5.01 5.01 5.01
## 
## -----------
## column Time_since_inception 
##  [1]  6  6 10 23 23 23 21 21 21  9 22 22 22 20 20 20 21 21 21 21 21 21
## 
## -----------
## column Lockin 
##  [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## 
## -----------
## column Minimum_SIP 
##   [1] 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500
##  [19] 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500
##  [37] 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500
##  [55] 500 500 500 500 500 500 500 500 500 500 500 500 500 500 300 300 300 500
##  [73] 500 500 500 500 500 500 500 500 500 500 500 500 500 600 600 600 500 500
##  [91] 500 500 300 300 300 300 300 300 300 300 300 500 500 500 500 500 500 500
## [109] 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500
## [127] 500 500 750 750 750 500 500 500 500 500 500 500 500 500 500 500 500 500
## [145] 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500
## [163] 500 500 500 500
## 
## -----------
# small visaulization to understand outliers
exclude_cols <- c("AUM", "NAV", "Minimum_Lumpsum")

mf_data |>
  select(-all_of(exclude_cols)) |>
  pivot_longer(cols = where(is.numeric)) |>
  ggplot(aes(x = name, y = value)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 90))
## Error : The fig.showtext code chunk option must be TRUE

# removing outliers
mf_data_cleaned <- mf_data
for (col in numeric_cols) {
  q1 <- quantile(mf_data[[col]], 0.25, na.rm = TRUE)
  q3 <- quantile(mf_data[[col]], 0.75, na.rm = TRUE)
  iqr_val <- IQR(mf_data[[col]], na.rm = TRUE)

  lower <- q1 - 1.5 * iqr_val
  upper <- q3 + 1.5 * iqr_val

  mf_data_cleaned <- mf_data_cleaned[
    (mf_data_cleaned[[col]] >= lower & mf_data_cleaned[[col]] <= upper) |
      is.na(mf_data_cleaned[[col]]),
  ]
}
# finally data is cleaned but still contains na we
# need detect and treat them

sum(is.na(mf_data_cleaned))
## [1] 23
mf_data[is.na(mf_data)] <- 0

# finally store the data into file
write.csv(
  mf_data_cleaned,
  "D:/semester_projects/R/data_preprocessing_phase/mutual_fund.csv",
  row.names = FALSE
)

Phase 2: Exploratory Data Analysis.

2.1 What are the central tendencies and spread of returns and risk metrics.

# loading the data
mf_data <- read_csv(
  "D:/semester_projects/R/data_preprocessing_phase/mutual_fund.csv",
  show_col_types = FALSE
)
# Central Tendencies of returns
# CAGR 3Y
mean_cagr_3y <- mf_data |> summarise(mean_cagr_3y = mean(CAGR_3Y, na.rm = TRUE))

median_cagr_3y <- mf_data |>
  summarise(
    median_cagr_3y = median(CAGR_3Y, na.rm = TRUE)
  )

sd_cagr_3y <- mf_data |> summarise(SD = sd(CAGR_3Y, na.rm = TRUE))

iqr_cagr_3y <- mf_data |> summarise(IQR = IQR(CAGR_3Y, na.rm = TRUE))

# CAGR 5Y
mean_cagr_5y <- mf_data |> summarise(mean_cagr_5y = mean(CAGR_5Y, na.rm = TRUE))

median_cagr_5y <- mf_data |> summarise(
  median_cagr_5y = median(CAGR_5Y, na.rm = TRUE)
)

sd_cagr_5y <- mf_data |> summarise(SD = sd(CAGR_5Y, na.rm = TRUE))

iqr_cagr_5y <- mf_data |> summarise(IQR = IQR(CAGR_5Y, na.rm = TRUE))

# Absolute_Returns_3M
mean_absolute_returns_3m <- mf_data |>
  summarize(Absolute_Returns_3M = mean(Absolute_Returns_3M, na.rm = TRUE))

median_absolute_returns_3m <- mf_data |>
  summarize(Absolute_Returns_3M = median(Absolute_Returns_3M, na.rm = TRUE))

sd_absolute_returns_3m <- mf_data |> summarise(
  SD = sd(Absolute_Returns_3M, na.rm = TRUE)
)

iqr_absolute_returns_3m <- mf_data |> summarise(
  IQR = IQR(Absolute_Returns_3M, na.rm = TRUE)
)

# Absolute_Returns_6M
mean_absolute_returns_6m <- mf_data |>
  summarize(Absolute_Returns_6M = mean(Absolute_Returns_6M, na.rm = TRUE))

median_absolute_returns_6m <- mf_data |>
  summarize(Absolute_Returns_6M = median(Absolute_Returns_6M, na.rm = TRUE))

sd_absolute_returns_6m <- mf_data |> summarise(
  SD = sd(Absolute_Returns_6M, na.rm = TRUE)
)

iqr_absolute_returns_6m <- mf_data |> summarise(
  IQR = IQR(Absolute_Returns_6M, na.rm = TRUE)
)

# Absolute_Returns_1Y
mean_absolute_returns_1y <- mf_data |>
  summarize(Absolute_Returns_1y = mean(Absolute_Returns_1Y, na.rm = TRUE))

median_absolute_returns_1y <- mf_data |>
  summarize(Absolute_Returns_1y = median(Absolute_Returns_1Y, na.rm = TRUE))

sd_absolute_returns_1y <- mf_data |> summarise(
  SD = sd(Absolute_Returns_1Y, na.rm = TRUE)
)
iqr_absolute_returns_1y <- mf_data |> summarise(
  IQR = IQR(Absolute_Returns_1Y, na.rm = TRUE)
)
central_tendency_and_spread <- data.frame(
  Metric = c("CAGR_3Y", "CAGR_5Y", "3M", "6M", "1Y"),
  Mean = c(
    mean_cagr_3y$mean_cagr_3y,
    mean_cagr_5y$mean_cagr_5y,
    mean_absolute_returns_3m$Absolute_Returns_3M,
    mean_absolute_returns_6m$Absolute_Returns_6M,
    mean_absolute_returns_1y$Absolute_Returns_1y
  ),
  Median = c(
    median_cagr_3y$median_cagr_3y,
    median_cagr_5y$median_cagr_5y,
    median_absolute_returns_3m$Absolute_Returns_3M,
    median_absolute_returns_6m$Absolute_Returns_6M,
    median_absolute_returns_1y$Absolute_Returns_1y
  ),
  SD = c(
    sd_cagr_3y$SD,
    sd_cagr_5y$SD,
    sd_absolute_returns_3m$SD,
    sd_absolute_returns_6m$SD,
    sd_absolute_returns_1y$SD
  ),
  IQR = c(
    iqr_cagr_3y$IQR,
    iqr_cagr_5y$IQR,
    iqr_absolute_returns_3m$IQR,
    iqr_absolute_returns_6m$IQR,
    iqr_absolute_returns_1y$IQR
  ),
  stringsAsFactors = FALSE
)
central_tendency_and_spread
##    Metric      Mean Median       SD    IQR
## 1 CAGR_3Y 2.9014706   0.40 3.558050 7.3950
## 2 CAGR_5Y 2.3390294   0.40 2.906254 6.1200
## 3      3M 0.2169412  -0.05 0.731940 1.1275
## 4      6M 0.6809412  -0.04 1.441582 2.6600
## 5      1Y 1.9766471   0.00 3.299085 6.6325

2.2 How are returns distributed.

hist(mf_data$CAGR_3Y, breaks = 30, main = "Distribution of Returns")
## Error : The fig.showtext code chunk option must be TRUE

plot(density(mf_data$CAGR_3Y, na.rm = TRUE))
## Error : The fig.showtext code chunk option must be TRUE

skewness(mf_data$CAGR_3Y, na.rm = TRUE)
## [1] 0.4708283

2.3 What is the relationship between short-term and long-term returns.

ggplot(mf_data, aes(x = Absolute_Returns_1Y, y = CAGR_3Y)) +
  geom_point(alpha = 0.6, color = "blue") +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(
    title = "Short term returns VS Long term returns",
    x = "1-Year Returns (short-term)",
    y = "3-Year CAGR (Long-term)"
  ) +
  theme_minimal()
## Error : The fig.showtext code chunk option must be TRUE
## `geom_smooth()` using formula = 'y ~ x'

# 2.4 Are high-return funds consistently associated with higher risk.

ggplot(mf_data, aes(x = Volatility, y = Absolute_Returns_1Y)) +
  geom_point(
    color = "steelblue", alpha = 0.6, size = 1.5
  ) + # Added alpha to see overlapping dots

  # This is the magic line that fixes the labels:
  geom_text_repel(
    aes(label = Plan),
    size = 2.5, # Small but readable font
    max.overlaps = 15, # Only labels the 15 least-crowded points (prevents mess)
    box.padding = 0.35, # Adds space around labels
    point.padding = 0.5, # Adds space between label and dot
    segment.color = "grey50" # Adds a tiny line pointing from text to dot
  ) +
  geom_smooth(
    method = "lm",
    se = FALSE, linetype = "dashed",
    color = "red", size = 0.8
  ) +
  labs(
    title = "Risk vs. Return Analysis (1-Year)",
    subtitle = "Higher volatility generally
    trends with higher absolute returns",
    x = "Volatility (Risk %)",
    y = "Absolute 1-Year Return (%)",
    caption = "Red line represents the expected risk-return tradeoff"
  ) +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Error : The fig.showtext code chunk option must be TRUE
## `geom_smooth()` using formula = 'y ~ x'

# 2.5 What patterns emerge from scatter plots of risk vs return.

mf_long <- mf_data |>
  select(
    Volatility, CAGR_3Y,
    CAGR_5Y, Absolute_Returns_1Y,
    Absolute_Returns_3M, Absolute_Returns_6M
  ) |>
  pivot_longer(
    cols = c(
      CAGR_3Y, CAGR_5Y,
      Absolute_Returns_1Y,
      Absolute_Returns_3M, Absolute_Returns_6M
    ),
    names_to = "Return_Type",
    values_to = "Return_Value"
  )

# Plot
ggplot(mf_long, aes(x = Volatility, y = Return_Value)) +
  geom_point(alpha = 0.6, color = "orange") +
  geom_smooth(method = "lm", se = FALSE, color = "red", linetype = "dashed") +
  facet_wrap(~Return_Type, scales = "free_y") + # 🔥 Subplots here

  labs(
    title = "Risk vs Return Across Different Time Horizons",
    x = "Volatility",
    y = "Returns"
  ) +
  theme_minimal()
## Error : The fig.showtext code chunk option must be TRUE
## `geom_smooth()` using formula = 'y ~ x'

# Phase 3: Feature Engineering & Transformation.

3.1 How can new features like Net Return should be created.

mf_data <- mf_data |>
  mutate(
    Net_CAGR_3Y = CAGR_3Y - Expense_Ratio,
    Net_CAGR_5Y = CAGR_5Y - Expense_Ratio,
    Net_Return_1Y = Absolute_Returns_1Y - Expense_Ratio
  )

3.2 Can a custom Risk-Adjusted Return metric outperform traditional Sharpe Ratio.

mf_data <- mf_data |>
  mutate(
    Custom_Risk_Adjusted = round(
      Net_CAGR_3Y / (Volatility + 0.01) * Sortino_Ratio, 2
    )
  )
head(mf_data$Custom_Risk_Adjusted,20)
##  [1] -0.62 19.81 -0.62  2.41 -0.62 -0.62 18.40 18.40 -0.63 19.68  1.56 -0.63
## [13] -0.12  3.67 -0.11 -0.12 -0.01 -0.07  2.87  3.18

3.3 What composite risk score can be built using volatility, Sortino, and drawdown proxies.

mf_data <- mf_data |>
  mutate(
    Risk_Score = round(scale(Volatility) +
      scale(1 / (Sortino_Ratio + 0.01)) +
      scale(1 / (Sharpe_Ratio + 0.01)), 0)
  )
mf_data$Risk_Score
##        [,1]
##   [1,]   -2
##   [2,]   -2
##   [3,]   -2
##   [4,]    1
##   [5,]   -2
##   [6,]   -2
##   [7,]   -2
##   [8,]   -2
##   [9,]   -2
##  [10,]   -2
##  [11,]    3
##  [12,]   -2
##  [13,]    0
##  [14,]    0
##  [15,]    0
##  [16,]    0
##  [17,]   13
##  [18,]    1
##  [19,]    1
##  [20,]    1
##  [21,]    1
##  [22,]   13
##  [23,]   -1
##  [24,]   -1
##  [25,]   -1
##  [26,]   -1
##  [27,]   -1
##  [28,]   -1
##  [29,]   -1
##  [30,]   -1
##  [31,]   -1
##  [32,]   -1
##  [33,]    1
##  [34,]    1
##  [35,]    1
##  [36,]    1
##  [37,]    2
##  [38,]    1
##  [39,]    1
##  [40,]    1
##  [41,]   -1
##  [42,]    1
##  [43,]    1
##  [44,]    0
##  [45,]    1
##  [46,]   -1
##  [47,]   -1
##  [48,]   -1
##  [49,]   -1
##  [50,]   -1
##  [51,]   -1
##  [52,]   -1
##  [53,]   -1
##  [54,]    0
##  [55,]    0
##  [56,]    0
##  [57,]    0
##  [58,]    0
##  [59,]    0
##  [60,]    0
##  [61,]    0
##  [62,]    0
##  [63,]    0
##  [64,]    0
##  [65,]   -1
##  [66,]   -1
##  [67,]   -1
##  [68,]   -1
##  [69,]   -1
##  [70,]   -1
##  [71,]   -1
##  [72,]    0
##  [73,]    0
##  [74,]    0
##  [75,]    0
##  [76,]    0
##  [77,]    0
##  [78,]    0
##  [79,]    0
##  [80,]    3
##  [81,]    3
##  [82,]    3
##  [83,]    2
##  [84,]    3
##  [85,]    3
##  [86,]    0
##  [87,]    0
##  [88,]    0
##  [89,]    0
##  [90,]    0
##  [91,]    0
##  [92,]    0
##  [93,]   -1
##  [94,]   -1
##  [95,]   -1
##  [96,]   -1
##  [97,]   -1
##  [98,]   -1
##  [99,]   -1
## [100,]   -1
## [101,]   -1
## [102,]   -1
## [103,]   -1
## [104,]    0
## [105,]    0
## [106,]    0
## [107,]    0
## [108,]    0
## [109,]    0
## [110,]    0
## [111,]    0
## [112,]    0
## [113,]    0
## [114,]    0
## [115,]    0
## [116,]    0
## [117,]    0
## [118,]    0
## [119,]    0
## [120,]    9
## [121,]    0
## [122,]    9
## [123,]    0
## [124,]    0
## [125,]    0
## [126,]    0
## [127,]    0
## [128,]    0
## [129,]    0
## [130,]    0
## [131,]    0
## [132,]    0
## [133,]    0
## [134,]   -1
## [135,]   -1
## [136,]   -1
## [137,]   -1
## [138,]   -1
## [139,]   -1
## [140,]    0
## [141,]    0
## [142,]    0
## [143,]    0
## [144,]    0
## [145,]    0
## [146,]    0
## [147,]   -1
## [148,]   -1
## [149,]   -1
## [150,]   -1
## [151,]   -1
## [152,]   -1
## [153,]   -1
## [154,]   -1
## [155,]   -1
## [156,]   -1
## [157,]   -1
## [158,]   -1
## [159,]   -1
## [160,]   -1
## [161,]   -1
## [162,]   -1
## [163,]    4
## [164,]    0
## [165,]    0
## [166,]   -1
## [167,]   -1
## [168,]   -1
## [169,]    4
## [170,]   -1
## attr(,"scaled:center")
## [1] 0.6926471
## attr(,"scaled:scale")
## [1] 0.6162413

3.4 How can equity exposure to be derived from allocation percentages.

mf_data <- mf_data |>
  mutate(
    Total_Equity_Exposure = Largecap_Holding +
      Midcap_Holding +
      Smallcap_Holding,
    Equity_to_Debt_Ratio = Equity_Holding / (Debt_Holding + 0.01)
  )
head(mf_data$Total_Equity_Exposure)
## [1] 79.55 79.55 79.55 79.55 79.55 79.55
head(mf_data$Equity_to_Debt_Ratio)
## [1] 17.80019 17.80019 17.80019 17.80019 17.80019 17.80019

3.5 What normalization techniques are required for combining the features.

normalize <- function(x) {
  (x - min(x, na.rm = TRUE)) /
    (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
}

mf_data <- mf_data |>
  mutate(
    Norm_Return = round(normalize(Net_CAGR_3Y), 2),
    Norm_Risk = round(normalize(Volatility), 2),
    Norm_Cost = round(normalize(Expense_Ratio), 2),
    Norm_Score = round(normalize(Custom_Risk_Adjusted), 2)
  )
head(mf_data$Norm_Return, 20)
##  [1] 0.05 0.99 0.05 0.91 0.05 0.05 0.91 0.91 0.05 0.99 0.91 0.05 0.06 0.92 0.06
## [16] 0.06 0.06 0.06 1.00 0.93