# packages
library(readr)
library(stringr)
library(tidyr) # reshaping data
library(dplyr) # data manipulation
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2) # data visualization
## Warning: package 'ggplot2' was built under R version 4.5.3
library(e1071) # statistics & machine learning
## Warning: package 'e1071' was built under R version 4.5.3
##
## Attaching package: 'e1071'
## The following object is masked from 'package:ggplot2':
##
## element
library(ggrepel) # ggplot2 + better labels
## Warning: package 'ggrepel' was built under R version 4.5.3
library(ggridges)
## Warning: package 'ggridges' was built under R version 4.5.3
mf_data <- read_csv(
"D:/semester_projects/R/mutual_funds_data.csv",
col_names = TRUE
)
## New names:
## Rows: 1407 Columns: 37
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (32): Name, Sub Category, Plan, CAGR 3Y, CAGR 5Y, Absolute Returns - 3M,... dbl
## (2): Category St Dev, Time since inception num (2): AUM, NAV lgl (1): ...37
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...37`
mf_data
## # A tibble: 1,407 × 37
## Name `Sub Category` Plan AUM `CAGR 3Y` `CAGR 5Y` Absolute Returns - 3…¹
## <chr> <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 Parag… Flexi Cap Fund Grow… 1.34e5 18.3 16.75 -8.75
## 2 Parag… Flexi Cap Fund IDCW 1.34e5 - - -8.75
## 3 Parag… Flexi Cap Fund IDCW 1.34e5 - - -8.75
## 4 HDFC … Balanced Adva… IDCW 1.08e5 8.52 7.74 -9.11
## 5 HDFC … Balanced Adva… IDCW 1.08e5 8.52 7.74 -9.11
## 6 HDFC … Balanced Adva… Grow… 1.08e5 16.36 16.7 -7.59
## 7 HDFC … Flexi Cap Fund IDCW 1.00e5 10.83 9.79 -16.26
## 8 HDFC … Flexi Cap Fund IDCW 1.00e5 10.83 9.79 -16.26
## 9 HDFC … Flexi Cap Fund Grow… 1.00e5 20.04 19.42 -9.64
## 10 HDFC … Mid Cap Fund IDCW 9.43e4 16.14 13.25 -14.61
## # ℹ 1,397 more rows
## # ℹ abbreviated name: ¹`Absolute Returns - 3M`
## # ℹ 30 more variables: `Absolute Returns - 1Y` <chr>,
## # `Absolute Returns - 6M` <chr>, `SEBI Risk Category` <chr>,
## # Volatility <chr>, `Category St Dev` <dbl>, `% Other Holdings` <chr>,
## # `% Largecap Holding` <chr>, `% Midcap Holding` <chr>,
## # `% Equity Holding` <chr>, `% Smallcap Holding` <chr>, …
# Column names which starts with %
colnames(mf_data)[grep("%", colnames(mf_data))]
## [1] "% Other Holdings" "% Largecap Holding" "% Midcap Holding"
## [4] "% Equity Holding" "% Smallcap Holding" "% Debt Holding"
# replacing %
clean_mf_data <- mf_data |>
rename_with(~ str_replace_all(., "%", "")) |>
rename_with(~ str_replace_all(., " ", "_")) |>
rename_with(~ str_replace_all(., "-", "")) |>
# removes more than one underscore
rename_with(~ str_replace_all(., "__+", "_")) |>
# removes leading / trailing underscore
rename_with(~ str_remove(., "^_|_$")) |>
select(-starts_with("..."))
glimpse(clean_mf_data)
## Rows: 1,407
## Columns: 36
## $ Name <chr> "Parag Parikh Flexi Cap Fund", "Parag Parikh Flex…
## $ Sub_Category <chr> "Flexi Cap Fund", "Flexi Cap Fund", "Flexi Cap Fu…
## $ Plan <chr> "Growth", "IDCW", "IDCW", "IDCW", "IDCW", "Growth…
## $ AUM <dbl> 134253.17, 134253.17, 134253.17, 107589.67, 10758…
## $ CAGR_3Y <chr> "18.3", "-", "-", "8.52", "8.52", "16.36", "10.83…
## $ CAGR_5Y <chr> "16.75", "-", "-", "7.74", "7.74", "16.7", "9.79"…
## $ Absolute_Returns_3M <chr> "-8.75", "-8.75", "-8.75", "-9.11", "-9.11", "-7.…
## $ Absolute_Returns_1Y <chr> "0.08", "-", "-", "-5.8", "-5.8", "0.65", "-6.33"…
## $ Absolute_Returns_6M <chr> "-7.25", "-", "-", "-8.1", "-8.1", "-5.02", "-14.…
## $ SEBI_Risk_Category <chr> "Very High", "Very High", "Very High", "Very High…
## $ Volatility <chr> "8.76", "8.17", "8.17", "8.63", "8.63", "8.63", "…
## $ Category_St_Dev <dbl> 13.52, 13.52, 13.52, 8.25, 8.25, 8.25, 13.52, 13.…
## $ Other_Holdings <chr> "-", "-", "-", "-", "-", "-", "-", "-", "-", "-",…
## $ Largecap_Holding <chr> "63.01", "63.01", "63.01", "54.22", "54.22", "54.…
## $ Midcap_Holding <chr> "2.17", "2.17", "2.17", "7.47", "7.47", "7.47", "…
## $ Equity_Holding <chr> "78.36", "78.36", "78.36", "69.36", "69.36", "69.…
## $ Smallcap_Holding <chr> "2.67", "2.67", "2.67", "7.67", "7.67", "7.67", "…
## $ Debt_Holding <chr> "13.94", "13.94", "13.94", "25.78", "25.78", "25.…
## $ Category_YTM <chr> "-", "-", "-", "6.56", "6.56", "6.56", "-", "-", …
## $ PE_Ratio <chr> "17.51", "17.51", "17.51", "19", "19", "19", "24.…
## $ Average_Maturity <chr> "-", "-", "-", "7.6", "7.6", "7.6", "-", "-", "-"…
## $ Category_PE_Ratio <chr> "27.28", "27.28", "27.28", "25.11", "25.11", "25.…
## $ Sortino_Ratio <chr> "-0.02", "-0.25", "-0.25", "-0.01", "-0.01", "-0.…
## $ Sharpe_Ratio <chr> "-0.17", "-2.82", "-2.82", "-0.13", "-0.13", "-0.…
## $ Average_YTM <chr> "-", "-", "-", "7.01", "7.01", "7.01", "-", "-", …
## $ Benchmark <chr> "NIFTY 500 - TRI", "NIFTY 500 - TRI", "NIFTY 500 …
## $ Time_since_inception <dbl> 155, 6, 6, 159, 159, 159, 159, 159, 159, 159, 159…
## $ Expense_Ratio <chr> "0.63", "0.63", "0.63", "0.77", "0.77", "0.77", "…
## $ NAV <dbl> 87.00, 87.00, 87.00, 41.82, 41.82, 535.18, 82.81,…
## $ AMC <chr> "PPFAS Asset Management Pvt. Ltd.", "PPFAS Asset …
## $ Minimum_Lumpsum <chr> "1,000.00", "1,000.00", "1,000.00", "100", "Not a…
## $ Lockin <chr> "-", "-", "-", "-", "-", "-", "-", "-", "-", "-",…
## $ Fund_Manager <chr> "Rajeev Thakkar+3 others", "Rajeev Thakkar+3 othe…
## $ Exit_Load <chr> "2", "1", "1", "1", "1", "1", "1", "1", "1", "1",…
## $ SIP_Investment <chr> "Allowed", "Allowed", "Allowed", "Allowed", "Not …
## $ Minimum_SIP <chr> "3,000.00", "3,000.00", "3,000.00", "100", "100",…
clean_mf_data <- clean_mf_data |>
mutate(across(
.cols =
contains(c(
"CAGR", "Returns", "Expense", "Sharpe", "Sortino", "Nav", "AUM",
"Large", "Mid", "Equity", "Smallcap", "Debt", "Average", "Absolute"
)),
~ suppressWarnings(as.numeric(str_replace_all(., "[%,]", "")))
))
clean_mf_data
## # A tibble: 1,407 × 36
## Name Sub_Category Plan AUM CAGR_3Y CAGR_5Y Absolute_Returns_3M
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Parag Parikh F… Flexi Cap F… Grow… 1.34e5 18.3 16.8 -8.75
## 2 Parag Parikh F… Flexi Cap F… IDCW 1.34e5 NA NA -8.75
## 3 Parag Parikh F… Flexi Cap F… IDCW 1.34e5 NA NA -8.75
## 4 HDFC Balanced … Balanced Ad… IDCW 1.08e5 8.52 7.74 -9.11
## 5 HDFC Balanced … Balanced Ad… IDCW 1.08e5 8.52 7.74 -9.11
## 6 HDFC Balanced … Balanced Ad… Grow… 1.08e5 16.4 16.7 -7.59
## 7 HDFC Flexi Cap… Flexi Cap F… IDCW 1.00e5 10.8 9.79 -16.3
## 8 HDFC Flexi Cap… Flexi Cap F… IDCW 1.00e5 10.8 9.79 -16.3
## 9 HDFC Flexi Cap… Flexi Cap F… Grow… 1.00e5 20.0 19.4 -9.64
## 10 HDFC Mid Cap F… Mid Cap Fund IDCW 9.43e4 16.1 13.2 -14.6
## # ℹ 1,397 more rows
## # ℹ 29 more variables: Absolute_Returns_1Y <dbl>, Absolute_Returns_6M <dbl>,
## # SEBI_Risk_Category <chr>, Volatility <chr>, Category_St_Dev <dbl>,
## # Other_Holdings <chr>, Largecap_Holding <dbl>, Midcap_Holding <dbl>,
## # Equity_Holding <dbl>, Smallcap_Holding <dbl>, Debt_Holding <dbl>,
## # Category_YTM <chr>, PE_Ratio <chr>, Average_Maturity <dbl>,
## # Category_PE_Ratio <chr>, Sortino_Ratio <dbl>, Sharpe_Ratio <dbl>, …
# saving clean data into a file
write.csv(clean_mf_data,
"D:/semester_projects/R/data_preprocessing_phase/clean_mf_data.csv",
row.names = FALSE
)
Interpretation: The dataset was cleaned by removing special characters, standardizing column names, and converting percentage and string values into numeric format. This ensures consistency across variables and prevents errors during analysis, making the dataset structured, readable, and suitable for statistical and machine learning operations.
mf_data <- read_csv(
"D:/semester_projects/R/data_preprocessing_phase/clean_mf_data.csv",
col_names = TRUE
)
## Rows: 1407 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (17): Name, Sub_Category, Plan, SEBI_Risk_Category, Volatility, Other_Ho...
## dbl (19): AUM, CAGR_3Y, CAGR_5Y, Absolute_Returns_3M, Absolute_Returns_1Y, A...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mf_data
## # A tibble: 1,407 × 36
## Name Sub_Category Plan AUM CAGR_3Y CAGR_5Y Absolute_Returns_3M
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Parag Parikh F… Flexi Cap F… Grow… 1.34e5 18.3 16.8 -8.75
## 2 Parag Parikh F… Flexi Cap F… IDCW 1.34e5 NA NA -8.75
## 3 Parag Parikh F… Flexi Cap F… IDCW 1.34e5 NA NA -8.75
## 4 HDFC Balanced … Balanced Ad… IDCW 1.08e5 8.52 7.74 -9.11
## 5 HDFC Balanced … Balanced Ad… IDCW 1.08e5 8.52 7.74 -9.11
## 6 HDFC Balanced … Balanced Ad… Grow… 1.08e5 16.4 16.7 -7.59
## 7 HDFC Flexi Cap… Flexi Cap F… IDCW 1.00e5 10.8 9.79 -16.3
## 8 HDFC Flexi Cap… Flexi Cap F… IDCW 1.00e5 10.8 9.79 -16.3
## 9 HDFC Flexi Cap… Flexi Cap F… Grow… 1.00e5 20.0 19.4 -9.64
## 10 HDFC Mid Cap F… Mid Cap Fund IDCW 9.43e4 16.1 13.2 -14.6
## # ℹ 1,397 more rows
## # ℹ 29 more variables: Absolute_Returns_1Y <dbl>, Absolute_Returns_6M <dbl>,
## # SEBI_Risk_Category <chr>, Volatility <chr>, Category_St_Dev <dbl>,
## # Other_Holdings <chr>, Largecap_Holding <dbl>, Midcap_Holding <dbl>,
## # Equity_Holding <dbl>, Smallcap_Holding <dbl>, Debt_Holding <dbl>,
## # Category_YTM <chr>, PE_Ratio <chr>, Average_Maturity <dbl>,
## # Category_PE_Ratio <chr>, Sortino_Ratio <dbl>, Sharpe_Ratio <dbl>, …
# lets identify fields that contain missing values or " - " symbol
cols_to_be_treated <- colnames(mf_data)[
sapply(mf_data, function(col) {
is.character(col) &&
any(trimws(col) == "-", na.rm = TRUE)
})
]
# Replacing these missing records with some appropriate value
mf_data <- mf_data |> mutate(
across(
all_of(cols_to_be_treated),
~ suppressWarnings(as.numeric(ifelse(trimws(.) == "-", "0", .)))
)
)
# lets identify NA values
cols_contains_na <- colnames(mf_data)[colSums(is.na(mf_data)) > 0]
# Replacing these NA values with median
mf_data <- mf_data |> mutate(
across(
all_of(cols_contains_na),
~ ifelse(is.na(.), median(., na.rm = TRUE), .)
)
)
# Saving the file as csv
write.csv(mf_data,
"D:/semester_projects/R/data_preprocessing_phase/mf_data.csv",
row.names = FALSE
)
Interpretation: Missing values represented as “-” or NA were handled using replacement and median imputation. Median was preferred due to skewed financial data. This approach preserves distribution while minimizing bias, ensuring that missing data does not distort analysis or affect model performance.
# identifying the duplicate records
dup_funds <- mf_data |> filter(
duplicated(mf_data[, c("Name", "Plan", "Sub_Category")]) |
duplicated(mf_data[, c("Name", "Plan", "Sub_Category")], fromLast = TRUE)
)
# removing the duplicate records
mf_data <- mf_data[
!duplicated(mf_data[, c("Name", "Plan", "Sub_Category")]),
]
Interpretation: Duplicate entries based on fund name, plan, and category were identified and removed. This prevents redundancy and avoids over-representation of certain funds, ensuring that each observation contributes uniquely to the analysis and improves the reliability of statistical results.
# here we will check the invalid datatypes
# although we have converted the datatype before.
# first we will assign levels in the dataset for easier data exploration.
mf_data <- read.csv(
"D:/semester_projects/R/data_preprocessing_phase/mf_data.csv",
header = TRUE
)
mf_data$Plan <- as.factor(mf_data$Plan)
mf_data$SEBI_Risk_Category <- as.factor(mf_data$SEBI_Risk_Category)
# check levels
levels(mf_data$Plan)
## [1] "Bonus" "Growth" "IDCW"
levels(mf_data$SEBI_Risk_Category)
## [1] "High" "Low" "Moderate" "Moderately High"
## [5] "Moderately Low" "Very High"
# lets detect columns with mixed type of values
str(mf_data)
## 'data.frame': 1407 obs. of 36 variables:
## $ Name : chr "Parag Parikh Flexi Cap Fund" "Parag Parikh Flexi Cap Fund(IDCW)" "Parag Parikh Flexi Cap Fund(IDCW Payout)" "HDFC Balanced Advantage Fund(IDCW)" ...
## $ Sub_Category : chr "Flexi Cap Fund" "Flexi Cap Fund" "Flexi Cap Fund" "Balanced Advantage Fund" ...
## $ Plan : Factor w/ 3 levels "Bonus","Growth",..: 2 3 3 3 3 2 3 3 2 3 ...
## $ AUM : num 134253 134253 134253 107590 107590 ...
## $ CAGR_3Y : num 18.3 7.58 7.58 8.52 8.52 ...
## $ CAGR_5Y : num 16.75 6.32 6.32 7.74 7.74 ...
## $ Absolute_Returns_3M : num -8.75 -8.75 -8.75 -9.11 -9.11 ...
## $ Absolute_Returns_1Y : num 0.08 0 0 -5.8 -5.8 0.65 -6.33 -6.33 1.07 -1 ...
## $ Absolute_Returns_6M : num -7.25 -0.9 -0.9 -8.1 -8.1 ...
## $ SEBI_Risk_Category : Factor w/ 6 levels "High","Low","Moderate",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ Volatility : num 8.76 8.17 8.17 8.63 8.63 ...
## $ Category_St_Dev : num 13.52 13.52 13.52 8.25 8.25 ...
## $ Other_Holdings : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Largecap_Holding : num 63 63 63 54.2 54.2 ...
## $ Midcap_Holding : num 2.17 2.17 2.17 7.47 7.47 ...
## $ Equity_Holding : num 78.4 78.4 78.4 69.4 69.4 ...
## $ Smallcap_Holding : num 2.67 2.67 2.67 7.67 7.67 ...
## $ Debt_Holding : num 13.9 13.9 13.9 25.8 25.8 ...
## $ Category_YTM : num 0 0 0 6.56 6.56 6.56 0 0 0 0 ...
## $ PE_Ratio : num 17.5 17.5 17.5 19 19 ...
## $ Average_Maturity : num 1 1 1 7.6 7.6 7.6 1 1 1 1 ...
## $ Category_PE_Ratio : num 27.3 27.3 27.3 25.1 25.1 ...
## $ Sortino_Ratio : num -0.02 -0.25 -0.25 -0.01 -0.01 -0.01 0 0 0 0.03 ...
## $ Sharpe_Ratio : num -0.17 -2.82 -2.82 -0.13 -0.13 -0.13 -0.02 -0.02 -0.03 0.31 ...
## $ Average_YTM : num 6.77 6.77 6.77 7.01 7.01 7.01 6.77 6.77 6.77 6.77 ...
## $ Benchmark : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Time_since_inception: int 155 6 6 159 159 159 159 159 159 159 ...
## $ Expense_Ratio : num 0.63 0.63 0.63 0.77 0.77 0.77 0.7 0.7 0.7 0.76 ...
## $ NAV : num 87 87 87 41.8 41.8 ...
## $ AMC : chr "PPFAS Asset Management Pvt. Ltd." "PPFAS Asset Management Pvt. Ltd." "PPFAS Asset Management Pvt. Ltd." "HDFC Asset Management Company Limited" ...
## $ Minimum_Lumpsum : chr "1,000.00" "1,000.00" "1,000.00" "100" ...
## $ Lockin : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Fund_Manager : chr "Rajeev Thakkar+3 others" "Rajeev Thakkar+3 others" "Rajeev Thakkar+3 others" "Gopal Agrawal+3 others" ...
## $ Exit_Load : num 2 1 1 1 1 1 1 1 1 1 ...
## $ SIP_Investment : chr "Allowed" "Allowed" "Allowed" "Allowed" ...
## $ Minimum_SIP : int 100 100 100 100 100 100 100 100 100 100 ...
char_cols <- sapply(mf_data, is.character)
names(mf_data[char_cols])
## [1] "Name" "Sub_Category" "AMC" "Minimum_Lumpsum"
## [5] "Fund_Manager" "SIP_Investment"
# here minimum_lumpsum contains mixed type of data
unique(mf_data$Minimum_Lumpsum)
## [1] "1,000.00" "100" "Not allowed" "5,000.00" "500"
## [6] "10,000.00" "99" "20,000.00" "1,00,000.00"
# converting the values into similar types of data
mf_data <- mf_data |>
mutate(
Minimum_Lumpsum = suppressWarnings(
as.numeric(gsub(",", "", Minimum_Lumpsum))
)
)
Interpretation: Columns with incorrect data types were identified and converted appropriately, especially numeric fields stored as text. Ensuring correct data types is essential for accurate computation, enabling proper use of statistical functions and avoiding unexpected errors during modeling.
# these are important columns and data is
# naturally skewed it is important to ignore them.
exclude_cols <- c("AUM", "NAV", "Minimum_Lumpsum")
# identifying the numeric columns
numeric_cols <- names(mf_data)[
sapply(mf_data, is.numeric) &
!(names(mf_data) %in% exclude_cols)
]
# Detecting outliers
for (col in numeric_cols) {
q1 <- quantile(mf_data[[col]], 0.25, na.rm = TRUE)
q3 <- quantile(mf_data[[col]], 0.75, na.rm = TRUE)
iqr_val <- IQR(mf_data[[col]], na.rm = TRUE)
lower <- q1 - 1.5 * iqr_val
upper <- q3 + 1.5 * iqr_val
outliers <- mf_data[[col]][
mf_data[[col]] < lower | mf_data[[col]] > upper
]
if (length(outliers) > 0) {
cat("column", col, "\n")
print(outliers)
cat("\n-----------\n")
}
}
## column CAGR_3Y
## [1] 31.65 31.65 31.65 31.38 31.21 31.21 31.21 44.84 44.84 44.84 31.18 31.18
## [13] 31.18 32.82 32.82 32.83 31.48 31.48 31.48 44.99 44.69 44.69 44.69
##
## -----------
## column CAGR_5Y
## [1] 24.42 24.42 24.42 25.20 27.36 27.36 27.36 26.95 24.87 24.87 24.87
##
## -----------
## column Absolute_Returns_1Y
## [1] -21.68 -15.40 -15.40 -13.91 57.86 57.86 57.86 -13.51 -13.51 -16.44
## [11] -16.44 -13.40 -13.40 -13.22 -13.22 21.34 57.08 -13.28 -13.28 -15.76
## [21] -15.76 -15.76 -14.73 -14.73 -14.73 19.58 19.58 19.58 17.21 17.21
## [31] 17.21 57.92 57.92 57.92 122.54 122.54 122.54 57.19 57.19 57.19
## [41] 58.26 58.26 58.26 -14.15 -14.15 18.44 18.44 18.44 19.13 19.13
## [51] 19.13 123.57 123.02 123.02 123.02 126.78 126.78 126.78
##
## -----------
## column Absolute_Returns_6M
## [1] 22.22 22.22 22.22 21.75 21.90 21.90 21.90 62.20 62.20 62.20 21.81 21.81
## [13] 21.81 22.18 22.18 22.18 62.29 61.90 61.90 61.90 64.46 64.46 64.46
##
## -----------
## column Volatility
## [1] 54.82 54.82 54.82 58.91 51.71 51.71 51.71 56.72 56.72 56.72
##
## -----------
## column Category_St_Dev
## [1] 34.60 34.60 34.60 34.60 34.60 34.60 34.60 60.61 60.61 60.61 34.60 34.60
## [13] 34.60 34.60 34.60 34.60 60.61 60.61 60.61 60.61 60.61 60.61 60.61
##
## -----------
## column Largecap_Holding
## [1] 63.01 63.01 63.01 54.22 54.22 54.22 75.25 75.25 75.25 12.47
## [11] 12.47 12.47 56.10 56.10 56.10 84.95 84.95 84.95 57.77 57.77
## [21] 57.77 57.77 57.77 14.89 14.89 14.89 14.89 16.77 16.77 16.77
## [31] 75.61 75.61 75.61 76.13 76.13 76.13 80.80 80.80 80.80 83.46
## [41] 83.46 83.46 83.46 45.14 45.14 45.14 45.14 58.08 58.08 58.08
## [51] 58.08 58.08 58.08 58.08 25.09 25.09 25.09 25.09 84.94 84.94
## [61] 84.94 87.04 87.04 87.04 41.57 41.57 41.57 9.16 9.16 9.16
## [71] 66.27 66.27 66.27 25.88 25.88 25.88 74.05 74.05 39.84 39.84
## [81] 39.84 89.12 89.12 89.12 63.97 63.97 63.97 19.20 19.20 19.20
## [91] 57.55 57.55 57.55 82.87 82.87 82.87 -0.38 -0.38 -0.38 -0.38
## [101] 47.14 47.14 47.14 23.52 23.52 23.52 38.26 38.26 38.26 71.61
## [111] 71.61 71.61 46.77 46.77 46.77 99.24 99.24 99.24 58.06 58.06
## [121] 5.64 5.64 5.64 59.52 59.52 59.52 41.31 41.31 41.31 54.29
## [131] 54.29 54.29 54.29 54.29 54.29 54.29 54.29 46.41 46.41 46.41
## [141] 8.04 8.04 8.04 67.42 67.42 67.42 38.47 38.47 38.47 58.67
## [151] 58.67 58.67 99.26 46.67 46.67 46.67 4.81 4.81 63.82 63.82
## [161] 63.82 58.52 58.52 58.52 18.57 18.57 18.57 76.49 76.49 76.49
## [171] 46.01 46.01 46.01 14.17 14.17 14.17 65.31 65.31 65.31 54.67
## [181] 54.67 54.67 68.75 68.75 68.75 87.29 87.29 87.29 81.64 81.64
## [191] 81.64 47.30 47.30 47.30 47.30 47.30 6.35 6.35 6.35 15.80
## [201] 15.80 15.80 15.80 15.80 15.80 15.80 73.48 73.48 73.48 40.44
## [211] 40.44 40.44 41.31 41.31 41.31 41.31 41.31 41.31 41.31 99.06
## [221] 99.06 99.06 2.62 2.62 2.62 39.28 39.28 39.28 61.57 61.57
## [231] 61.57 73.40 73.40 73.40 69.18 69.18 69.18 42.25 42.25 42.25
## [241] 20.41 20.41 20.41 75.50 75.50 44.39 44.39 44.39 43.56 43.56
## [251] 43.56 71.77 71.77 71.77 18.71 18.71 18.71 64.31 64.31 64.31
## [261] 64.31 64.31 64.31 64.31 6.58 6.58 6.58 46.59 46.59 46.59
## [271] 56.86 56.86 56.86 88.54 88.54 88.54 45.32 45.32 45.32 8.86
## [281] 8.86 8.86 27.25 27.25 27.25 15.17 15.17 15.17 60.73 60.73
## [291] 60.73 77.11 77.11 77.11 81.55 81.55 99.24 99.24 99.24 13.01
## [301] 13.01 13.01 65.93 65.93 65.93 54.03 54.03 54.03 42.99 42.99
## [311] 42.99 62.57 62.57 62.57 85.66 85.66 85.66 20.82 20.82 20.82
## [321] 20.82 15.84 15.84 15.84 76.00 76.00 76.00 56.83 56.83 56.83
## [331] 59.34 59.34 59.34 63.36 63.36 63.36 7.51 7.51 7.51 7.51
## [341] 7.51 7.51 7.51 45.96 45.96 45.96 63.20 63.20 63.20 10.28
## [351] 10.28 10.28 55.92 55.92 55.92 45.35 45.35 45.35 47.14 47.14
## [361] 47.14 57.30 57.30 57.30 35.78 35.78 35.78 46.07 46.07 46.07
## [371] 65.65 65.65 65.65 -0.48 -0.48 -0.48 40.08 40.08 40.08 61.66
## [381] 61.66 61.66 71.57 71.57 71.57 35.42 35.42 35.42 100.04 72.01
## [391] 72.01 69.96 69.96 69.96 69.96 69.96 73.39 41.65 41.65 41.65
## [401] 41.93 41.93 41.93 41.93 41.93 41.93 20.97 20.97 20.97 64.91
## [411] 64.91 64.91 78.21 78.21 78.21 11.54 11.54 11.54 44.29 44.29
## [421] 44.29 44.29 43.05 43.05 43.05 53.99 53.99 53.99 35.83 35.83
## [431] 35.83 45.38 45.38 45.38 77.90 77.90 77.90 63.03 63.03 63.03
## [441] 63.03 81.73 81.73 81.73 43.87 43.87 43.87 68.18 68.18 68.18
## [451] 65.51 65.51 65.51 11.26 11.26 11.26 57.39 57.39 57.39 23.17
## [461] 23.17 23.17 30.17 30.17 30.17 78.53 78.53 78.53 53.09 53.09
## [471] 53.09 67.96 67.96 62.14 62.14 62.14 62.14 44.89 44.89 44.89
## [481] 44.89 44.89 74.43 74.43 74.43 37.07 37.07 37.07 43.83 43.83
## [491] 43.83 46.03 46.03 20.05 65.93 65.93 65.93 73.45 73.45 73.45
## [501] 56.84 56.84 56.84 75.83 75.83 72.07 72.07 61.86 61.86 61.86
## [511] 44.38 44.38 44.38 78.18 63.75 63.75 63.75 64.01 64.01 64.01
## [521] 43.23 43.23 43.23 10.28 10.28 10.28 64.81 64.81 62.05 62.05
## [531] 62.05 62.05 60.86 60.86 60.86 54.05 54.05 54.05 65.69 65.69
## [541] 65.69 38.08 38.08 38.08 45.46 45.46 45.46 57.28 57.28 57.28
## [551] 57.28 57.28 46.59 46.59 46.59 70.50 1.29 1.29 1.29 76.90
## [561] 76.90 76.90 16.21 16.21 16.21 2.63 2.63 2.63 2.63 43.82
## [571] 43.82 43.82 39.20 39.20 39.20 39.20 39.20 47.14 47.14 47.14
## [581] 59.15 59.15 59.15 61.73 61.73 61.73 59.55 59.55 59.55 64.26
## [591] 64.26 64.26 43.26 43.26 43.26 23.48 23.48 42.42 42.42 42.42
## [601] 44.93 44.93 44.93 24.43 24.43 24.43 38.90 38.90 38.90 4.41
## [611] 4.41 4.41 42.52 42.52 42.52 45.77 45.77 45.77 39.55 39.55
## [621] 39.55 10.70 10.70 10.70 57.36 57.36 56.39 56.39 56.39 72.20
## [631] 72.20 72.20 68.22 68.22 68.22 14.23 14.23 14.23 44.62 44.62
## [641] 44.62 20.97 20.97 20.97 20.45 20.45
##
## -----------
## column Midcap_Holding
## [1] 2.17 2.17 2.17 7.47 7.47 7.47 7.89 7.89 7.89 63.77 63.77 63.77
## [13] 14.01 14.01 14.01 7.01 7.01 7.01 14.89 14.89 14.89 5.92 5.92 5.92
## [25] 5.92 5.92 68.90 68.90 68.90 4.56 4.56 4.56 12.54 12.54 12.54 13.05
## [37] 13.05 13.05 13.05 27.31 27.31 27.31 27.31 6.28 6.28 6.28 6.28 6.28
## [49] 6.28 6.28 20.83 20.83 20.83 61.39 61.39 61.39 61.39 14.60 14.60 14.60
## [61] 40.60 40.60 40.60 9.66 9.66 9.66 12.87 12.87 12.87 11.49 11.49 11.49
## [73] 35.57 35.57 35.57 12.06 12.06 12.06 2.82 2.82 2.82 66.88 66.88 66.88
## [85] 20.81 20.81 19.45 19.45 19.45 1.22 1.22 1.22 20.38 20.38 20.38 64.77
## [97] 64.77 64.77 33.02 33.02 33.02 6.94 6.94 6.94 -0.06 -0.06 -0.06 -0.06
## [109] 37.29 37.29 37.29 4.72 4.72 4.72 41.59 41.59 41.59 7.66 7.66 7.66
## [121] 0.71 0.71 0.71 25.24 25.24 22.69 22.69 22.69 42.60 42.60 42.60 9.55
## [133] 9.55 9.55 9.55 9.55 9.55 9.55 9.55 21.90 21.90 21.90 5.58 5.58
## [145] 5.58 67.60 67.60 67.60 12.57 12.57 12.57 22.00 22.00 22.00 25.19 25.19
## [157] 25.19 0.71 19.35 19.35 19.35 13.71 13.71 8.43 8.43 8.43 23.05 23.05
## [169] 23.05 63.67 63.67 63.67 11.16 11.16 11.16 27.24 27.24 27.24 69.30 69.30
## [181] 69.30 1.59 1.59 1.59 32.05 32.05 32.05 9.91 9.91 9.91 9.87 9.87
## [193] 9.87 6.56 6.56 6.56 7.02 7.02 7.02 19.80 19.80 19.80 12.98 12.98
## [205] 12.98 12.98 12.98 12.98 12.98 10.36 10.36 10.36 30.44 30.44 30.44 32.83
## [217] 32.83 32.83 22.23 22.23 22.23 22.23 22.23 22.23 22.23 0.71 0.71 0.71
## [229] 23.79 23.79 23.79 35.20 35.20 35.20 19.88 19.88 19.88 24.15 24.15 24.15
## [241] 12.51 12.51 12.51 31.46 31.46 31.46 65.33 65.33 65.33 7.07 7.07 10.16
## [253] 10.16 10.16 21.46 21.46 21.46 61.60 61.60 61.60 12.25 12.25 12.25 12.25
## [265] 12.25 12.25 12.25 8.37 8.37 8.37 28.41 28.41 28.41 20.67 20.67 20.67
## [277] 7.17 7.17 7.17 12.16 12.16 12.16 22.51 22.51 22.51 49.58 49.58 49.58
## [289] 64.78 64.78 64.78 23.01 23.01 23.01 12.06 12.06 12.06 7.21 7.21 9.57
## [301] 9.57 9.57 0.71 0.71 0.71 64.71 64.71 64.71 12.62 12.62 12.62 13.75
## [313] 13.75 13.75 12.51 12.51 12.51 8.82 8.82 8.82 64.26 64.26 64.26 64.26
## [325] 57.57 57.57 57.57 13.06 13.06 13.06 4.10 4.10 4.10 13.34 13.34 13.34
## [337] 3.63 3.63 3.63 3.63 3.63 3.63 3.63 15.36 15.36 15.36 20.19 20.19
## [349] 20.19 19.89 19.89 19.89 11.51 11.51 11.51 19.37 19.37 19.37 13.31 13.31
## [361] 13.31 11.72 11.72 11.72 37.13 37.13 37.13 24.78 24.78 24.78 20.41 20.41
## [373] 20.41 -0.06 -0.06 -0.06 19.17 19.17 19.17 14.72 14.72 14.72 27.28 27.28
## [385] 27.28 10.53 10.53 10.53 10.53 10.53 26.55 27.53 27.53 27.53 19.99 19.99
## [397] 19.99 19.99 19.99 19.99 5.07 5.07 5.07 19.31 19.31 19.31 21.69 21.69
## [409] 21.69 3.21 3.21 3.21 29.35 29.35 29.35 29.35 33.61 33.61 33.61 7.54
## [421] 7.54 7.54 19.51 19.51 19.51 24.78 24.78 24.78 20.94 20.94 20.94 20.94
## [433] 12.59 12.59 12.59 24.95 24.95 24.95 11.36 11.36 11.36 19.29 19.29 19.29
## [445] 69.80 69.80 69.80 11.24 11.24 11.24 64.50 64.50 64.50 20.58 20.58 20.58
## [457] 6.18 6.18 6.18 9.29 9.29 9.29 23.13 23.13 23.13 23.13 15.18 15.18
## [469] 33.72 33.72 33.72 33.72 13.45 13.45 13.45 34.11 34.11 34.11 10.84 10.84
## [481] 10.84 27.11 27.11 27.11 13.15 13.15 13.15 20.10 20.10 2.35 24.17 24.17
## [493] 24.17 10.23 10.23 10.23 14.36 14.36 14.36 12.15 12.15 14.85 14.85 11.37
## [505] 11.37 11.37 15.45 15.45 15.45 25.13 25.13 25.13 14.00 14.00 14.00 21.69
## [517] 20.39 20.39 20.39 22.73 22.73 22.73 22.22 22.22 22.22 64.74 64.74 64.74
## [529] 26.34 26.34 20.87 20.87 20.87 20.87 2.23 2.23 2.23 3.41 3.41 3.41
## [541] 11.95 11.95 11.95 23.60 23.60 23.60 45.62 45.62 45.62 7.26 7.26 7.26
## [553] 7.26 7.26 33.88 33.88 33.88 2.26 9.25 9.25 9.25 62.91 62.91 62.91
## [565] 33.35 33.35 33.35 33.35 13.09 13.09 13.09 20.31 20.31 20.31 20.31 20.31
## [577] 22.82 22.82 22.82 24.79 24.79 24.79 21.51 21.51 21.51 11.87 11.87 11.87
## [589] 27.76 27.76 27.76 10.35 10.35 8.23 8.23 8.23 25.90 25.90 25.90 3.30
## [601] 3.30 3.30 28.86 28.86 28.86 21.53 21.53 21.53 32.00 32.00 32.00 61.02
## [613] 61.02 61.02 26.09 26.09 26.09 14.02 14.02 29.56 29.56 29.56 19.44 19.44
## [625] 9.90 9.90 9.90 35.19 35.19 35.19 8.09 8.09 8.09 69.12 69.12 69.12
## [637] 20.32 20.32 20.32 65.94 65.94 65.94 34.62 34.62
##
## -----------
## column Equity_Holding
## [1] 78.36 78.36 78.36 69.36 69.36 69.36 90.40 90.40 90.40 69.27
## [11] 69.27 69.27 74.02 74.02 74.02 92.16 92.16 92.16 67.14 67.14
## [21] 67.14 66.40 66.40 66.40 66.40 66.40 95.91 95.91 95.91 95.91
## [31] 99.07 99.07 99.07 85.68 85.68 85.68 98.21 98.21 98.21 99.31
## [41] 99.31 99.31 99.31 99.40 99.40 99.40 99.40 70.56 70.56 70.56
## [51] 70.56 70.56 70.56 70.56 83.37 83.37 83.37 98.68 98.68 98.68
## [61] 98.68 69.91 69.91 69.91 83.18 83.18 83.18 98.90 98.90 98.90
## [71] 99.20 99.20 99.20 65.94 65.94 65.94 98.53 98.53 98.53 93.15
## [81] 93.15 93.15 91.93 91.93 91.93 92.48 92.48 92.48 86.01 86.01
## [91] 86.01 92.76 92.76 92.76 96.82 96.82 61.92 61.92 61.92 90.34
## [101] 90.34 90.34 97.79 97.79 97.79 88.43 88.43 88.43 97.68 97.68
## [111] 97.68 96.85 96.85 96.85 -0.27 -0.27 -0.27 -0.27 98.67 98.67
## [121] 98.67 89.84 89.84 89.84 66.56 66.56 66.56 99.95 99.95 99.95
## [131] 99.18 99.18 89.05 89.05 89.05 97.69 97.69 97.69 97.73 97.73
## [141] 97.73 65.13 65.13 65.13 65.13 65.13 65.13 65.13 65.13 97.09
## [151] 97.09 97.09 70.56 70.56 70.56 93.80 93.80 93.80 93.71 93.71
## [161] 93.71 92.19 92.19 92.19 96.88 96.88 96.88 99.97 69.67 69.67
## [171] 69.67 89.74 89.74 97.34 97.34 97.34 96.75 96.75 96.75 98.31
## [181] 98.31 98.31 98.06 98.06 98.06 67.42 67.42 67.42 96.93 96.93
## [191] 96.93 67.33 67.33 67.33 97.48 97.48 97.48 97.17 97.17 97.17
## [201] 97.92 97.92 97.92 66.93 66.93 66.93 66.93 66.93 98.31 98.31
## [211] 98.31 40.85 40.85 40.85 40.85 40.85 40.85 40.85 91.27 91.27
## [221] 91.27 97.84 97.84 97.84 93.66 93.66 93.66 67.30 67.30 67.30
## [231] 67.30 67.30 67.30 67.30 99.77 99.77 99.77 96.54 96.54 96.54
## [241] 98.15 98.15 98.15 97.03 97.03 97.03 97.55 97.55 97.55 99.29
## [251] 99.29 99.29 93.45 93.45 93.45 97.89 97.89 89.98 89.98 89.98
## [261] 58.21 58.21 58.21 96.39 96.39 96.39 96.71 96.71 96.71 81.77
## [271] 81.77 81.77 81.77 81.77 81.77 81.77 93.35 93.35 93.35 78.84
## [281] 78.84 78.84 88.78 88.78 88.78 97.99 97.99 97.99 68.00 68.00
## [291] 68.00 97.27 97.27 97.27 97.94 97.94 97.94 95.83 95.83 95.83
## [301] 96.67 96.67 96.67 93.40 93.40 66.67 66.67 66.67 99.94 99.94
## [311] 99.94 98.33 98.33 98.33 78.55 78.55 78.55 74.25 74.25 74.25
## [321] 65.25 65.25 65.25 97.80 97.80 97.80 97.22 97.22 97.22 97.22
## [331] 99.35 99.35 99.35 97.72 97.72 97.72 92.48 92.48 92.48 99.01
## [341] 99.01 99.01 91.57 91.57 91.57 93.37 93.37 93.37 22.69 22.69
## [351] 22.69 22.69 22.69 22.69 22.69 66.18 66.18 66.18 99.02 99.02
## [361] 99.02 98.91 98.91 98.91 70.10 70.10 70.10 65.61 65.61 65.61
## [371] 76.04 76.04 76.04 96.83 96.83 96.83 98.92 98.92 98.92 -0.55
## [381] -0.55 -0.55 66.41 66.41 66.41 97.22 97.22 97.22 92.25 92.25
## [391] 92.25 100.04 97.40 97.40 90.53 90.53 90.53 90.53 90.53 99.94
## [401] 75.88 75.88 75.88 68.78 68.78 68.78 68.78 68.78 68.78 38.15
## [411] 38.15 38.15 96.94 96.94 96.94 97.96 97.96 97.96 99.90 99.90
## [421] 99.90 92.20 92.20 92.20 99.70 99.70 99.70 99.70 96.86 96.86
## [431] 96.86 68.77 68.77 68.77 91.69 91.69 91.69 96.29 96.29 96.29
## [441] 96.74 96.74 96.74 97.80 97.80 97.80 97.80 97.67 97.67 97.67
## [451] 96.01 96.01 96.01 96.17 96.17 96.17 99.60 99.60 99.60 89.16
## [461] 89.16 89.16 75.92 75.92 75.92 98.31 98.31 98.31 82.71 82.71
## [471] 82.71 97.70 97.70 97.70 89.41 89.41 89.41 86.10 86.10 86.10
## [481] 99.22 99.22 99.22 99.22 97.71 97.71 97.71 97.71 65.07 65.07
## [491] 65.07 67.96 67.96 67.96 67.96 67.96 97.15 97.15 97.15 97.98
## [501] 97.98 97.98 67.72 67.72 67.72 97.21 97.21 26.22 95.84 95.84
## [511] 95.84 91.04 91.04 91.04 97.28 97.28 97.28 99.60 99.60 81.29
## [521] 81.29 81.29 64.85 64.85 64.85 95.99 95.99 95.99 99.87 98.24
## [531] 98.24 98.24 99.23 99.23 99.23 98.20 98.20 98.20 97.77 97.77
## [541] 96.31 96.31 96.31 96.31 66.63 66.63 66.63 63.48 63.48 63.48
## [551] 98.84 98.84 98.84 98.17 98.17 98.17 65.71 65.71 65.71 65.71
## [561] 65.71 87.87 97.06 97.06 97.06 98.82 98.82 98.82 93.47 93.47
## [571] 93.47 97.88 97.88 97.88 97.88 73.30 73.30 73.30 73.30 73.30
## [581] 97.84 97.84 97.84 98.01 98.01 98.01 98.35 98.35 98.35 96.45
## [591] 96.45 96.45 97.39 97.39 97.39 97.58 97.58 97.58 37.56 37.56
## [601] 97.92 97.92 97.92 64.88 64.88 64.88 93.58 93.58 93.58 45.35
## [611] 45.35 45.35 85.98 85.98 85.98 96.04 96.04 96.04 73.30 73.30
## [621] 73.30 99.41 99.41 99.41 87.19 87.19 87.19 96.97 96.97 96.97
## [631] 90.57 90.57 98.40 98.40 98.40 98.23 98.23 98.23 97.12 97.12
## [641] 97.12 99.19 99.19 99.19 96.36 96.36 96.36 97.26 97.26 97.26
## [651] 99.03 99.03
##
## -----------
## column Smallcap_Holding
## [1] 2.67 2.67 2.67 7.67 7.67 7.67 7.26 7.26 7.26 17.99 17.99 17.99
## [13] 7.51 7.51 7.51 1.00 1.00 1.00 0.20 0.20 0.20 2.11 2.11 2.11
## [25] 2.71 2.71 2.71 2.71 2.71 65.09 65.09 65.09 65.09 13.40 13.40 13.40
## [37] 5.51 5.51 5.51 3.50 3.50 3.50 1.95 1.95 1.95 2.80 2.80 2.80
## [49] 2.80 26.96 26.96 26.96 26.96 6.21 6.21 6.21 6.21 6.21 6.21 6.21
## [61] 9.27 9.27 9.27 3.06 3.06 3.06 4.41 4.41 4.41 9.69 9.69 9.69
## [73] 4.60 4.60 4.60 3.67 3.67 3.67 15.42 15.42 15.42 70.71 70.71 70.71
## [85] 10.28 10.28 10.28 83.20 83.20 83.20 1.95 1.95 2.63 2.63 2.63 13.45
## [97] 13.45 13.45 4.46 4.46 4.46 7.11 7.11 7.11 7.04 7.04 7.04 0.18
## [109] 0.18 0.18 0.18 14.24 14.24 14.24 66.68 66.68 66.68 15.18 15.18 15.18
## [121] 10.56 10.56 10.56 2.97 2.97 2.97 15.88 15.88 66.34 66.34 66.34 15.48
## [133] 15.48 15.48 13.82 13.82 13.82 1.29 1.29 1.29 1.29 1.29 1.29 1.29
## [145] 1.29 28.77 28.77 28.77 16.89 16.89 16.89 18.16 18.16 18.16 11.82 11.82
## [157] 11.82 31.73 31.73 31.73 13.01 13.01 13.01 3.65 3.65 3.65 71.22 71.22
## [169] 25.09 25.09 25.09 15.18 15.18 15.18 13.55 13.55 13.55 7.97 7.97 7.97
## [181] 25.06 25.06 25.06 14.58 14.58 14.58 0.52 0.52 0.52 10.21 10.21 10.21
## [193] 6.92 6.92 6.92 13.12 13.12 13.12 0.01 0.01 0.01 88.10 88.10 88.10
## [205] 9.26 9.26 9.26 3.01 3.01 3.01 3.01 3.01 72.15 72.15 72.15 12.07
## [217] 12.07 12.07 12.07 12.07 12.07 12.07 7.43 7.43 7.43 26.96 26.96 26.96
## [229] 4.10 4.10 4.10 3.76 3.76 3.76 3.76 3.76 3.76 3.76 70.12 70.12
## [241] 70.12 23.68 23.68 23.68 15.58 15.58 15.58 17.60 17.60 17.60 19.74 19.74
## [253] 19.74 29.76 29.76 29.76 9.04 9.04 9.04 15.32 15.32 22.99 22.99 22.99
## [265] 4.48 4.48 4.48 3.16 3.16 3.16 16.39 16.39 16.39 5.21 5.21 5.21
## [277] 5.21 5.21 5.21 5.21 78.40 78.40 78.40 3.84 3.84 3.84 11.25 11.25
## [289] 11.25 2.28 2.28 2.28 10.53 10.53 10.53 65.90 65.90 65.90 21.11 21.11
## [301] 21.11 15.87 15.87 15.87 11.93 11.93 11.93 7.50 7.50 7.50 4.64 4.64
## [313] 8.90 8.90 8.90 20.62 20.62 20.62 6.46 6.46 6.46 4.69 4.69 4.69
## [325] 19.75 19.75 19.75 3.32 3.32 3.32 12.13 12.13 12.13 12.13 25.95 25.95
## [337] 25.95 8.67 8.67 8.67 88.38 88.38 88.38 25.11 25.11 25.11 16.41 16.41
## [349] 16.41 16.67 16.67 16.67 11.55 11.55 11.55 11.55 11.55 11.55 11.55 4.87
## [361] 4.87 4.87 15.64 15.64 15.64 68.74 68.74 68.74 2.67 2.67 2.67 29.28
## [373] 29.28 29.28 5.16 5.16 5.16 7.02 7.02 7.02 23.92 23.92 23.92 23.36
## [385] 23.36 23.36 12.86 12.86 12.86 -0.01 -0.01 -0.01 9.12 9.12 9.12 16.39
## [397] 16.39 16.39 5.96 5.96 5.96 31.70 31.70 31.70 8.85 8.85 10.04 10.04
## [409] 10.04 10.04 10.04 6.71 6.71 6.71 6.87 6.87 6.87 6.87 6.87 6.87
## [421] 3.32 3.32 3.32 12.73 12.73 12.73 28.08 28.08 28.08 77.45 77.45 77.45
## [433] 26.05 26.05 26.05 26.05 20.19 20.19 20.19 7.23 7.23 7.23 36.36 36.36
## [445] 36.36 26.13 26.13 26.13 1.36 1.36 1.36 13.83 13.83 13.83 13.83 1.57
## [457] 1.57 1.57 25.01 25.01 25.01 16.63 16.63 16.63 14.79 14.79 14.79 8.10
## [469] 8.10 8.10 7.29 7.29 7.29 10.64 10.64 10.64 33.63 33.63 33.63 28.98
## [481] 28.98 28.98 4.70 4.70 4.70 23.71 23.71 23.71 23.66 23.66 23.66 23.66
## [493] 11.48 11.48 1.85 1.85 1.85 1.85 1.49 1.49 1.49 4.87 4.87 4.87
## [505] 4.87 4.87 11.38 11.38 11.38 10.41 10.41 10.41 33.81 33.81 33.81 10.74
## [517] 10.74 10.74 31.08 31.08 3.81 5.74 5.74 5.74 7.36 7.36 7.36 26.08
## [529] 26.08 26.08 7.30 7.30 12.68 12.68 8.06 8.06 8.06 5.02 5.02 5.02
## [541] 21.66 21.66 21.66 28.58 28.58 28.58 14.11 14.11 14.11 12.49 12.49 12.49
## [553] 28.50 28.50 28.50 23.18 23.18 23.18 6.62 6.62 13.39 13.39 13.39 13.39
## [565] 3.55 3.55 3.55 6.02 6.02 6.02 21.20 21.20 21.20 32.77 32.77 32.77
## [577] 7.08 7.08 7.08 1.16 1.16 1.16 1.16 1.16 13.70 13.70 13.70 15.12
## [589] 86.52 86.52 86.52 5.96 5.96 5.96 14.34 14.34 14.34 61.91 61.91 61.91
## [601] 61.91 38.19 38.19 38.19 13.79 13.79 13.79 13.79 13.79 27.88 27.88 27.88
## [613] 23.23 23.23 23.23 11.83 11.83 11.83 15.39 15.39 15.39 21.26 21.26 21.26
## [625] 26.56 26.56 26.56 3.72 3.72 37.72 37.72 37.72 11.72 11.72 11.72 43.25
## [637] 43.25 43.25 3.15 3.15 3.15 18.11 18.11 18.11 73.75 73.75 73.75 24.67
## [649] 24.67 24.67 5.99 5.99 5.99 27.85 27.85 27.85 15.48 15.48 15.48 21.39
## [661] 21.39 21.39 19.19 19.19 21.58 21.58 21.58 25.85 25.85 25.85 17.89 17.89
## [673] 17.89 75.53 75.53 16.13 16.13 16.13 10.39 10.39 10.39 22.88 22.88 22.88
## [685] 13.01 13.01 13.01 19.77 19.77 19.77 8.07 8.07 8.07 43.96 43.96
##
## -----------
## column Debt_Holding
## [1] 13.94 13.94 13.94 25.78 25.78 25.78 0.51 0.51 0.51 13.31 13.31 13.31
## [13] 18.82 18.82 18.82 2.80 2.80 2.80 5.62 5.62 5.62 19.97 19.97 19.97
## [25] 19.97 19.97 1.46 1.46 1.46 1.46 1.78 1.78 1.78 0.75 0.75 0.75
## [37] 21.38 21.38 21.38 21.38 21.38 21.38 21.38 8.12 8.12 8.12 1.03 1.03
## [49] 1.03 26.01 26.01 26.01 0.48 0.48 0.48 0.73 0.73 0.73 0.97 0.97
## [61] 0.97 2.13 2.13 2.13 11.97 11.97 11.97 0.12 0.12 0.12 0.61 0.61
## [73] 0.61 10.38 10.38 10.38 10.38 1.21 1.21 1.21 0.83 0.83 0.83 0.19
## [85] 0.19 0.19 10.15 10.15 10.15 1.73 1.73 1.73 14.84 14.84 14.84 14.84
## [97] 14.84 14.84 14.84 14.84 23.50 23.50 23.50 0.77 0.77 0.77 1.61 1.61
## [109] 1.61 0.53 0.53 0.53 0.22 0.22 0.22 17.08 17.08 17.08 0.26 0.26
## [121] 0.26 0.25 0.25 0.25 12.12 12.12 12.12 21.47 21.47 21.47 0.31 0.31
## [133] 0.31 8.13 8.13 8.13 8.13 8.13 35.87 35.87 35.87 35.87 35.87 35.87
## [145] 35.87 1.71 1.71 1.71 0.45 0.45 0.45 13.90 13.90 13.90 13.90 13.90
## [157] 13.90 13.90 1.17 1.17 1.17 0.37 0.37 0.37 16.19 16.19 16.19 7.16
## [169] 7.16 7.16 7.16 7.16 7.16 7.16 0.39 0.39 0.39 0.40 0.40 0.40
## [181] 10.52 10.52 10.52 0.20 0.20 0.20 0.20 0.20 0.20 27.86 27.86 27.86
## [193] 20.73 20.73 20.73 12.36 12.36 12.36 0.21 0.21 0.21 0.15 0.15 0.15
## [205] 0.15 0.04 0.04 0.04 69.48 69.48 69.48 69.48 69.48 69.48 69.48 11.14
## [217] 11.14 11.14 0.26 0.26 0.26 16.89 16.89 16.89 0.16 0.16 0.16 26.99
## [229] 26.99 26.99 21.59 21.59 21.59 0.37 0.37 0.37 17.89 17.89 17.89 19.48
## [241] 19.48 19.48 0.05 0.05 0.05 0.61 0.61 0.61 18.94 18.94 18.94 1.82
## [253] 1.82 1.82 1.82 1.82 1.82 11.16 11.16 11.16 0.29 0.29 0.29 1.66
## [265] 1.66 1.66 21.55 21.55 21.55 1.12 1.12 1.12 0.13 0.13 0.13 0.99
## [277] 0.99 0.99 4.39 4.39 4.39 19.68 19.68 19.68 5.35 5.35 5.35 2.86
## [289] 2.86 2.86 10.79 10.79 10.79 14.83 14.83 14.83 14.83 14.83 0.03 0.03
## [301] 0.03 26.37 26.37 26.37 29.41 0.07 0.07 0.07 0.64 0.64 0.64 0.23
## [313] 0.23 4.75 4.75 4.75 10.48 10.48 10.48 0.30 0.30 0.30 0.11 0.11
## [325] 0.11 0.11 21.57 21.57 21.57 11.23 11.23 11.23 0.33 0.33 0.33 23.34
## [337] 23.34 23.34 23.34 23.34 8.66 0.09 0.09 0.09 22.31 22.31 22.31 22.31
## [349] 22.31 59.37 59.37 0.09 0.09 0.09 9.63 9.63 9.63 0.10 0.10 0.10
## [361] 10.56 10.56 10.56 19.23 19.23 19.23 3.62 3.62 3.62 1.17 1.17 1.17
## [373] 0.07 0.07 0.07
##
## -----------
## column Average_Maturity
## [1] 7.60 7.60 7.60 5.56 5.56 5.56 3.39 3.39 3.39 6.73 6.73 6.73
## [13] 6.73 6.73 8.19 8.19 8.19 8.19 8.19 8.19 8.19 3.88 3.88 3.88
## [25] 6.00 6.00 6.00 6.00 6.00 7.50 7.50 7.50 7.50 7.50 7.01 7.01
## [37] 7.01 7.01 9.67 9.67 9.67 4.53 4.53 4.53 2.92 2.92 2.92 2.92
## [49] 2.92 3.14 3.14 3.14 12.90 12.90 12.90 3.38 3.38 3.38 4.16 4.16
## [61] 4.16 3.44 3.44 3.44 2.97 2.97 2.97 2.97 2.97 2.97 2.97 4.12
## [73] 4.12 4.12 4.12 4.12 3.49 3.49 3.49 3.49 3.49 3.49 3.49 14.03
## [85] 14.03 14.03 14.03 14.03 14.03 14.03 14.03 14.03 2.97 2.97 2.97 2.97
## [97] 2.97 2.97 11.13 11.13 11.13 13.66 13.66 13.66 3.93 3.93 3.93 13.82
## [109] 13.82 13.82 20.72 20.72 20.72 3.81 3.81 3.81 3.81 3.81 3.81 3.81
## [121] 2.99 2.99 2.99 2.99 2.99 3.59 3.59 3.59 7.30 7.30 7.30 5.13
## [133] 5.13 5.13 5.94 5.94 5.94 3.29 3.29 3.29 4.52 4.52 4.52 4.52
## [145] 4.52 3.42 3.42 3.42 3.42 3.42 3.42 3.42 3.42 3.42 3.42 3.42
## [157] 13.69 13.69 13.69 3.69 3.69 3.69 3.69 3.69 3.69 3.69 3.69 5.89
## [169] 5.89 5.89 3.00 3.00 3.00 3.00 3.00 3.00 3.00 22.05 22.05 22.05
## [181] 22.05 22.05 22.05 22.05 22.05 22.05 22.05 22.05 4.46 4.46 4.46 3.07
## [193] 3.07 3.07 3.07 3.07 3.07 6.45 6.45 6.45 9.26 9.26 9.26 3.77
## [205] 3.77 3.77 3.77 3.77 4.05 4.05 4.05 9.40 9.40 9.40 4.03 4.03
## [217] 4.03 4.03 4.03 4.03 4.03 4.03 4.03 3.65 3.65 3.65 3.43 3.43
## [229] 3.43 6.48 6.48 6.48 4.34 4.34 4.34 4.13 4.13 4.13 4.13 4.13
## [241] 4.13 4.13 4.13 4.13 4.04 4.04 4.04 4.11 4.11 4.11 4.11 4.11
## [253] 4.11 4.11 4.11 4.11 4.11 9.53 9.53 3.15 3.15 3.15 5.67 5.67
## [265] 5.67 2.91 2.91 2.91 2.91 2.91
##
## -----------
## column Sortino_Ratio
## [1] 1.05 1.06 1.37 1.37 1.03 1.03 1.03 1.04 1.04 1.04 1.04 0.89 1.04 1.01 1.01
## [16] 1.04 0.90 0.90 0.90 0.89 0.89 0.91 0.94 0.94 0.93 0.94 0.93 0.94 0.94 0.93
## [31] 0.99 0.99 0.99 0.99 0.99 0.99 0.99 1.05 1.06 1.01 0.90 0.90 0.89 0.90 0.90
## [46] 0.90 0.90 0.90 0.90 0.90 0.90 0.90 0.90 0.89 0.90 0.90 1.03 1.03 1.05 1.02
## [61] 1.02 1.02 1.02 1.02 1.02 1.02 1.02 1.02 1.02 1.02 1.06 1.07 1.02 1.02 1.00
## [76] 1.00 1.00 1.00 1.00 1.00 0.90 0.90 0.89 0.95 0.95 0.95 0.95 1.18 1.18 1.18
## [91] 1.20 1.18 1.18 0.95 0.95 1.04 1.05 0.99 0.99 0.99 1.01 1.01 1.01 1.01 1.01
## [106] 1.01 1.20 1.21 1.21 1.00 0.99 1.00 1.00 1.02 1.02 1.02 0.94 0.94 1.06 1.08
## [121] 1.08 1.06 1.06 0.95 0.95 0.96 0.95 1.00 1.00 1.00 0.89
##
## -----------
## column Sharpe_Ratio
## [1] -28.62 -28.62 -28.63 -28.62 7.12 7.19 7.14 7.14 7.08 7.15
## [11] 7.14 7.14 7.39 7.39 7.38 7.39 7.38 7.39 7.38 7.38
## [21] 7.28 7.33 7.19 7.19 7.19 7.19 -28.07 -28.42 7.97 7.97
## [31] 7.97 8.00 7.97 7.97 -5.69 -23.18 -23.91 -23.97 -23.97 -5.69
## [41] -27.26 -27.26 -20.87 -26.98 -27.25 -12.94 -27.12 -6.06 -24.00 -24.03
## [51] -14.85 -24.02 -24.00 -24.03 7.57 7.55 7.55 7.29 7.30 7.29
## [61] 7.29 -26.59 -26.59 -26.59 -26.59 7.13 7.21 7.21 7.13 7.13
## [71] 7.07 7.06 7.07 -26.64 -25.28 -10.86 -26.64 -25.28 -26.64
##
## -----------
## column Average_YTM
## [1] 6.18 6.18 6.18 6.18 6.18 6.18 6.21 6.21 6.21 7.26 7.26 7.26 7.26 7.26 7.24
## [16] 7.24 7.24 7.24 7.24 5.60 5.60 5.60 6.06 6.06 6.06 6.06 6.06 6.06 6.06 6.06
## [31] 6.06 6.06 6.06 6.06 4.97 4.97 4.97 4.97 6.10 6.10 6.10 6.10 6.03 6.03 6.03
## [46] 6.03 6.03 6.03 6.03 6.03 6.03 6.03 6.03 6.03 6.03 6.03 6.03 7.29 7.29 7.29
## [61] 7.41 7.41 7.41 5.73 5.73 5.73 6.18 6.18 6.18 6.18 6.18 6.18 6.01 6.01 6.01
## [76] 6.01 6.01 6.01 6.01 6.01 6.01 7.29 7.29 7.29 7.25 7.25 7.25 6.12 6.12 6.12
## [91] 6.12 6.12 6.12 7.74 7.74 7.74 7.74 7.74 7.74 7.74 7.74 7.74 7.26 7.26 7.26
## [106] 6.25 6.25 6.25 6.25 6.25 6.25 6.25 5.88 5.88 5.88 5.88 5.88 5.88 5.88 6.25
## [121] 6.25 6.25 6.25 5.04 5.04 5.04 5.04 5.04 5.04 5.04 5.04 5.05 5.05 7.27 7.27
## [136] 7.27 7.27 7.27 7.27 7.27 6.04 6.04 6.04 6.04 6.04 6.04 6.04 6.04 6.04 6.04
## [151] 6.04 6.04 6.04 5.04 5.04 5.04 5.04 5.04 5.04 7.39 7.39 7.39 7.56 7.56 7.56
## [166] 7.56 7.56 7.56 7.56 6.15 6.15 6.15 7.29 7.29 7.29 7.29 7.29 4.99 4.99 4.99
## [181] 4.99 4.99 6.14 5.97 5.97 5.97 7.28 7.28 7.28 7.28 7.28 7.28 7.28 7.40 7.40
## [196] 7.40 7.40 7.40 7.40 7.40 7.40 7.40 7.40 7.40 6.20 6.20 6.15 6.15 6.15 6.15
## [211] 6.15 6.15 6.15 6.05 6.05 6.05 6.05 6.05 6.05 6.05 6.05 7.46 7.46 7.46 5.06
## [226] 5.06 5.98 5.98 5.98 8.31 8.31 8.31 8.31 8.31 8.03 8.03 8.03 5.05 5.05 5.05
## [241] 5.05 5.05 5.05 5.05 6.15 7.58 7.58 7.58 6.07 6.07 6.07 5.88 5.88 5.88 5.61
## [256] 5.61 5.61 8.41 8.41 8.41 5.85 5.85 5.85 7.33 7.33 7.33 7.33 7.33 6.09 6.09
## [271] 6.09 6.09 6.09 6.09 6.09 6.09 7.99 7.99 7.99 4.95 4.95 4.95 4.95 5.92 5.92
## [286] 5.92 5.92 5.92 7.32 7.32 7.32 5.10 5.10 5.10 5.01 5.01 5.01 5.01 5.01 5.01
##
## -----------
## column Time_since_inception
## [1] 6 6 10 23 23 23 21 21 21 9 22 22 22 20 20 20 21 21 21 21 21 21
##
## -----------
## column Lockin
## [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##
## -----------
## column Minimum_SIP
## [1] 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500
## [19] 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500
## [37] 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500
## [55] 500 500 500 500 500 500 500 500 500 500 500 500 500 500 300 300 300 500
## [73] 500 500 500 500 500 500 500 500 500 500 500 500 500 600 600 600 500 500
## [91] 500 500 300 300 300 300 300 300 300 300 300 500 500 500 500 500 500 500
## [109] 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500
## [127] 500 500 750 750 750 500 500 500 500 500 500 500 500 500 500 500 500 500
## [145] 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500
## [163] 500 500 500 500
##
## -----------
# small visaulization to understand outliers
exclude_cols <- c("AUM", "NAV", "Minimum_Lumpsum")
mf_data |>
select(-all_of(exclude_cols)) |>
pivot_longer(cols = where(is.numeric)) |>
ggplot(aes(x = value, y = name)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90))
## Error : The fig.showtext code chunk option must be TRUE
# removing outliers
mf_data_cleaned <- mf_data
for (col in numeric_cols) {
q1 <- quantile(mf_data[[col]], 0.25, na.rm = TRUE)
q3 <- quantile(mf_data[[col]], 0.75, na.rm = TRUE)
iqr_val <- IQR(mf_data[[col]], na.rm = TRUE)
lower <- q1 - 1.5 * iqr_val
upper <- q3 + 1.5 * iqr_val
mf_data_cleaned <- mf_data_cleaned[
(mf_data_cleaned[[col]] >= lower & mf_data_cleaned[[col]] <= upper) |
is.na(mf_data_cleaned[[col]]),
]
}
# finally data is cleaned but still contains na we
# need detect and treat them
sum(is.na(mf_data_cleaned))
## [1] 23
mf_data[is.na(mf_data)] <- 0
# finally store the data into file
write.csv(
mf_data_cleaned,
"D:/semester_projects/R/data_preprocessing_phase/mutual_fund.csv",
row.names = FALSE
)
Interpretation: Outliers were detected using the IQR method and removed to reduce extreme influence on analysis. Some naturally skewed variables were excluded. This improves model stability and ensures that results reflect typical fund behavior rather than rare extreme observations.
# loading the data
mf_data <- read_csv(
"D:/semester_projects/R/data_preprocessing_phase/mutual_fund.csv",
show_col_types = FALSE
)
# Central Tendencies of returns
# CAGR 3Y
mean_cagr_3y <- mf_data |> summarise(mean_cagr_3y = mean(CAGR_3Y, na.rm = TRUE))
median_cagr_3y <- mf_data |>
summarise(
median_cagr_3y = median(CAGR_3Y, na.rm = TRUE)
)
sd_cagr_3y <- mf_data |> summarise(SD = sd(CAGR_3Y, na.rm = TRUE))
iqr_cagr_3y <- mf_data |> summarise(IQR = IQR(CAGR_3Y, na.rm = TRUE))
# CAGR 5Y
mean_cagr_5y <- mf_data |> summarise(mean_cagr_5y = mean(CAGR_5Y, na.rm = TRUE))
median_cagr_5y <- mf_data |> summarise(
median_cagr_5y = median(CAGR_5Y, na.rm = TRUE)
)
sd_cagr_5y <- mf_data |> summarise(SD = sd(CAGR_5Y, na.rm = TRUE))
iqr_cagr_5y <- mf_data |> summarise(IQR = IQR(CAGR_5Y, na.rm = TRUE))
# Absolute_Returns_3M
mean_absolute_returns_3m <- mf_data |>
summarize(Absolute_Returns_3M = mean(Absolute_Returns_3M, na.rm = TRUE))
median_absolute_returns_3m <- mf_data |>
summarize(Absolute_Returns_3M = median(Absolute_Returns_3M, na.rm = TRUE))
sd_absolute_returns_3m <- mf_data |> summarise(
SD = sd(Absolute_Returns_3M, na.rm = TRUE)
)
iqr_absolute_returns_3m <- mf_data |> summarise(
IQR = IQR(Absolute_Returns_3M, na.rm = TRUE)
)
# Absolute_Returns_6M
mean_absolute_returns_6m <- mf_data |>
summarize(Absolute_Returns_6M = mean(Absolute_Returns_6M, na.rm = TRUE))
median_absolute_returns_6m <- mf_data |>
summarize(Absolute_Returns_6M = median(Absolute_Returns_6M, na.rm = TRUE))
sd_absolute_returns_6m <- mf_data |> summarise(
SD = sd(Absolute_Returns_6M, na.rm = TRUE)
)
iqr_absolute_returns_6m <- mf_data |> summarise(
IQR = IQR(Absolute_Returns_6M, na.rm = TRUE)
)
# Absolute_Returns_1Y
mean_absolute_returns_1y <- mf_data |>
summarize(Absolute_Returns_1y = mean(Absolute_Returns_1Y, na.rm = TRUE))
median_absolute_returns_1y <- mf_data |>
summarize(Absolute_Returns_1y = median(Absolute_Returns_1Y, na.rm = TRUE))
sd_absolute_returns_1y <- mf_data |> summarise(
SD = sd(Absolute_Returns_1Y, na.rm = TRUE)
)
iqr_absolute_returns_1y <- mf_data |> summarise(
IQR = IQR(Absolute_Returns_1Y, na.rm = TRUE)
)
central_tendency_and_spread <- data.frame(
Metric = c("CAGR_3Y", "CAGR_5Y", "3M", "6M", "1Y"),
Mean = c(
mean_cagr_3y$mean_cagr_3y,
mean_cagr_5y$mean_cagr_5y,
mean_absolute_returns_3m$Absolute_Returns_3M,
mean_absolute_returns_6m$Absolute_Returns_6M,
mean_absolute_returns_1y$Absolute_Returns_1y
),
Median = c(
median_cagr_3y$median_cagr_3y,
median_cagr_5y$median_cagr_5y,
median_absolute_returns_3m$Absolute_Returns_3M,
median_absolute_returns_6m$Absolute_Returns_6M,
median_absolute_returns_1y$Absolute_Returns_1y
),
SD = c(
sd_cagr_3y$SD,
sd_cagr_5y$SD,
sd_absolute_returns_3m$SD,
sd_absolute_returns_6m$SD,
sd_absolute_returns_1y$SD
),
IQR = c(
iqr_cagr_3y$IQR,
iqr_cagr_5y$IQR,
iqr_absolute_returns_3m$IQR,
iqr_absolute_returns_6m$IQR,
iqr_absolute_returns_1y$IQR
),
stringsAsFactors = FALSE
)
central_tendency_and_spread
## Metric Mean Median SD IQR
## 1 CAGR_3Y 2.9014706 0.40 3.558050 7.3950
## 2 CAGR_5Y 2.3390294 0.40 2.906254 6.1200
## 3 3M 0.2169412 -0.05 0.731940 1.1275
## 4 6M 0.6809412 -0.04 1.441582 2.6600
## 5 1Y 1.9766471 0.00 3.299085 6.6325
Interpretation: Measures like mean, median, standard deviation, and IQR reveal the central tendency and variability of returns. Differences between mean and median indicate skewness, while higher standard deviation reflects greater uncertainty and risk in mutual fund performance.
hist(mf_data$CAGR_3Y, breaks = 30, main = "Distribution of Returns")
## Error : The fig.showtext code chunk option must be TRUE
plot(density(mf_data$CAGR_3Y, na.rm = TRUE))
## Error : The fig.showtext code chunk option must be TRUE
skewness(mf_data$CAGR_3Y, na.rm = TRUE)
## [1] 0.4708283
Interpretation: Return distributions show slight skewness, indicating that most funds deliver moderate returns while a few achieve significantly higher performance. This highlights inequality in fund performance and suggests the presence of outliers or high-performing funds influencing the distribution.
ggplot(mf_data, aes(x = Absolute_Returns_1Y, y = CAGR_3Y)) +
geom_point(alpha = 0.6, color = "blue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(
title = "Short term returns VS Long term returns",
x = "1-Year Returns (short-term)",
y = "3-Year CAGR (Long-term)"
) +
theme_minimal()
## Error : The fig.showtext code chunk option must be TRUE
## `geom_smooth()` using formula = 'y ~ x'
Interpretation: The relationship between short-term and
long-term returns shows moderate correlation. While some consistency
exists, short-term performance does not always translate into long-term
success, indicating that investment decisions should not rely solely on
recent returns.
ggplot(mf_data, aes(x = Volatility, y = Absolute_Returns_1Y)) +
geom_point(
color = "steelblue", alpha = 0.6, size = 1.5
) + # Added alpha to see overlapping dots
# This is the magic line that fixes the labels:
geom_text_repel(
aes(label = Plan),
size = 2.5, # Small but readable font
max.overlaps = 15, # Only labels the 15 least-crowded points (prevents mess)
box.padding = 0.35, # Adds space around labels
point.padding = 0.5, # Adds space between label and dot
segment.color = "grey50" # Adds a tiny line pointing from text to dot
) +
geom_smooth(
method = "lm",
se = FALSE, linetype = "dashed",
color = "red", size = 0.8
) +
labs(
title = "Risk vs. Return Analysis (1-Year)",
subtitle = "Higher volatility generally
trends with higher absolute returns",
x = "Volatility (Risk %)",
y = "Absolute 1-Year Return (%)",
caption = "Red line represents the expected risk-return tradeoff"
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Error : The fig.showtext code chunk option must be TRUE
## `geom_smooth()` using formula = 'y ~ x'
Interpretation: A general positive relationship exists
between volatility and returns, supporting the risk-return tradeoff
theory. However, the presence of scattered points indicates variability,
meaning higher risk does not always guarantee higher returns for all
funds.
mf_long <- mf_data |>
select(
Volatility, CAGR_3Y,
CAGR_5Y, Absolute_Returns_1Y,
Absolute_Returns_3M, Absolute_Returns_6M
) |>
pivot_longer(
cols = c(
CAGR_3Y, CAGR_5Y,
Absolute_Returns_1Y,
Absolute_Returns_3M, Absolute_Returns_6M
),
names_to = "Return_Type",
values_to = "Return_Value"
)
# Plot
ggplot(mf_long, aes(x = Volatility, y = Return_Value)) +
geom_point(alpha = 0.6, color = "orange") +
geom_smooth(method = "lm", se = FALSE, color = "red", linetype = "dashed") +
facet_wrap(~Return_Type, scales = "free_y") + # 🔥 Subplots here
labs(
title = "Risk vs Return Across Different Time Horizons",
x = "Volatility",
y = "Returns"
) +
theme_minimal()
## Error : The fig.showtext code chunk option must be TRUE
## `geom_smooth()` using formula = 'y ~ x'
Interpretation: Across different time horizons,
risk-return patterns vary, with long-term returns showing more stable
relationships compared to short-term returns. This suggests that
long-term investments provide more predictable performance compared to
short-term fluctuations.
mf_data <- mf_data |>
mutate(
Net_CAGR_3Y = CAGR_3Y - Expense_Ratio,
Net_CAGR_5Y = CAGR_5Y - Expense_Ratio,
Net_Return_1Y = Absolute_Returns_1Y - Expense_Ratio
)
Interpretation: Net returns were calculated by subtracting expense ratios from gross returns, reflecting actual investor gains. This adjustment provides a more realistic measure of performance, highlighting the impact of costs on overall investment returns.
mf_data <- mf_data |>
mutate(
Custom_Risk_Adjusted = round(
Net_CAGR_3Y / (Volatility + 0.01) * Sortino_Ratio, 2
)
)
head(mf_data$Custom_Risk_Adjusted, 20)
## [1] -0.62 19.81 -0.62 2.41 -0.62 -0.62 18.40 18.40 -0.63 19.68 1.56 -0.63
## [13] -0.12 3.67 -0.11 -0.12 -0.01 -0.07 2.87 3.18
Interpretation: A custom metric combining return, volatility, and Sortino ratio was developed to better evaluate performance. It emphasizes downside risk, offering a more investor-focused measure compared to traditional metrics like Sharpe ratio.
mf_data <- mf_data |>
mutate(
Risk_Score = round(scale(Volatility) +
scale(1 / (Sortino_Ratio + 0.01)) +
scale(1 / (Sharpe_Ratio + 0.01)), 0)
)
head(mf_data$Risk_Score)
## [,1]
## [1,] -2
## [2,] -2
## [3,] -2
## [4,] 1
## [5,] -2
## [6,] -2
Interpretation: A composite risk score was created using volatility, Sharpe, and Sortino ratios. This provides a holistic assessment of risk, capturing multiple dimensions rather than relying on a single metric, leading to more balanced evaluation.
mf_data <- mf_data |>
mutate(
Total_Equity_Exposure = Largecap_Holding +
Midcap_Holding +
Smallcap_Holding,
Equity_to_Debt_Ratio = Equity_Holding / (Debt_Holding + 0.01)
)
head(mf_data$Total_Equity_Exposure)
## [1] 79.55 79.55 79.55 79.55 79.55 79.55
head(mf_data$Equity_to_Debt_Ratio)
## [1] 17.80019 17.80019 17.80019 17.80019 17.80019 17.80019
Interpretation: Equity exposure and allocation ratios were derived from portfolio holdings. These metrics help understand fund composition and risk profile, as higher equity allocation typically indicates higher risk and potential for greater returns.
normalize <- function(x) {
(x - min(x, na.rm = TRUE)) /
(max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
}
mf_data <- mf_data |>
mutate(
Norm_Return = round(normalize(Net_CAGR_3Y), 2),
Norm_Risk = round(normalize(Volatility), 2),
Norm_Cost = round(normalize(Expense_Ratio), 2),
Norm_Score = round(normalize(Custom_Risk_Adjusted), 2)
)
head(mf_data$Norm_Return, 20)
## [1] 0.05 0.99 0.05 0.91 0.05 0.05 0.91 0.91 0.05 0.99 0.91 0.05 0.06 0.92 0.06
## [16] 0.06 0.06 0.06 1.00 0.93
Interpretation: Normalization scaled all variables to a common range, ensuring comparability across features. This step is essential for combining different metrics into composite scores and prevents variables with larger scales from dominating analysis.
# finding the linear relationship between risk and return using CAGR
mf_lm_model <- lm(Norm_Return ~ Norm_Risk, data = mf_data)
summary(mf_lm_model)
##
## Call:
## lm(formula = Norm_Return ~ Norm_Risk, data = mf_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4221 -0.3786 -0.2622 0.5042 0.6436
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.47206 0.04536 10.41 <2e-16 ***
## Norm_Risk -0.57840 0.25365 -2.28 0.0238 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4314 on 168 degrees of freedom
## Multiple R-squared: 0.03002, Adjusted R-squared: 0.02425
## F-statistic: 5.2 on 1 and 168 DF, p-value: 0.02385
ggplot(mf_data, aes(x = Norm_Risk, y = Norm_Return)) +
geom_point(alpha = 0.5, color = "orange") +
geom_smooth(method = "lm", color = "black", fill = "lightgreen") +
labs(
title = "Relationship between risk and return",
subtitle = paste("R-squared: ", round(summary(mf_lm_model)$r.squared, 3)),
x = "Normalized Risk",
y = "Normalized Returns"
)
## Error : The fig.showtext code chunk option must be TRUE
## `geom_smooth()` using formula = 'y ~ x'
Interpretation: Regression analysis shows a positive
but not strong relationship between risk and return. The R^2 value
indicates that risk explains only part of return variability, suggesting
other factors also influence fund performance.
return_cutoff <- median(mf_data$Norm_Return)
risk_cutoff <- median(mf_data$Norm_Risk)
inefficient_funds <- mf_data[mf_data$Norm_Risk > risk_cutoff & mf_data$Norm_Return < return_cutoff, "Name"]
unique(inefficient_funds)
## # A tibble: 26 × 1
## Name
## <chr>
## 1 Bandhan Corp Bond Fund(M-IDCW)
## 2 Bandhan Corp Bond Fund(H-IDCW)
## 3 Aditya Birla SL Low Duration Fund(W-IDCW Reinv)
## 4 Aditya Birla SL Low Duration Fund(DD-IDCW)
## 5 Aditya Birla SL Low Duration Fund(W-IDCW)
## 6 Aditya Birla SL Floating Rate Fund(DD-IDCW)
## 7 Aditya Birla SL Floating Rate Fund(W-IDCW)
## 8 Bandhan Banking and PSU Fund(DD-IDCW)
## 9 Bandhan Banking and PSU Fund(M-IDCW)
## 10 Bandhan Banking and PSU Fund(FN-IDCW)
## # ℹ 16 more rows
Interpretation: Funds with high risk and low returns were identified as inefficient. These funds fail to compensate investors adequately for the level of risk taken and may not be suitable for investment compared to better-performing alternatives.
sharpe_ranks <- mf_data[order(-mf_data$Sharpe_Ratio), c("Name", "Sharpe_Ratio")]
colnames(sharpe_ranks) <- c("name","value")
sortino_ranks <- mf_data[order(-mf_data$Sortino_Ratio), c("Name", "Sortino_Ratio")]
colnames(sortino_ranks) <- c("name","value")
comparison <- data.frame(
Rank = 1:10,
Top_Sharpe = sharpe_ranks$name[1:10],
Top_Sortino = sortino_ranks$name[1:10]
)
comparison
## Rank Top_Sharpe
## 1 1 Axis Ultra Short Duration Fund(W-IDCW)
## 2 2 Axis Ultra Short Duration Fund(W-IDCW)
## 3 3 Axis Ultra Short Duration Fund(M-IDCW)
## 4 4 Axis Ultra Short Duration Fund(M-IDCW)
## 5 5 SBI Savings Fund(IDCW-Payout)
## 6 6 SBI Savings Fund(IDCW)
## 7 7 SBI Savings Fund
## 8 8 SBI Savings Fund(DD-IDCW)
## 9 9 ICICI Pru Money Market Fund(IDCW-Payout)
## 10 10 ICICI Pru Money Market Fund(IDCW)
## Top_Sortino
## 1 SBI Savings Fund(IDCW-Payout)
## 2 SBI Savings Fund(IDCW)
## 3 Nippon India Low Duration Fund(IDCW)
## 4 Nippon India Low Duration Fund
## 5 Nippon India Low Duration Fund(DD-IDCW)
## 6 Nippon India Low Duration Fund(Q-IDCW)
## 7 Nippon India Low Duration Fund(W-IDCW)
## 8 Nippon India Low Duration Fund
## 9 Nippon India Low Duration Fund(M-IDCW)
## 10 Nippon India Low Duration Fund(M-IDCW)
Interpretation: Differences in rankings between Sharpe and Sortino ratios highlight the importance of downside risk. Sortino provides a more realistic evaluation by penalizing only negative volatility, making it more relevant for risk-averse investors.
ggplot(mf_data,aes(x = Volatility, y = SEBI_Risk_Category, fill = SEBI_Risk_Category))+
geom_violin(trim = FALSE, alpha = 0.5)+
geom_boxplot(width = 0.1, color="black", outlier.shape = NA)+
geom_jitter(width = 0.1, alpha = 0.2)+
theme_minimal()+
labs(
title = "Do SEBI Labels match Actual Volatility? ",
x = "Observed Volatility",
y = "SEBI Riskometer category"
)
## Error : The fig.showtext code chunk option must be TRUE
Interpretation: Comparison of SEBI risk categories with
actual volatility shows partial alignment. Some overlap indicates
inconsistencies, suggesting that official classifications may not always
accurately reflect real risk levels.
ggplot(central_tendency_and_spread, aes(x = SD, y = Mean, label = Metric)) +
geom_point(size = 2, color = "steelblue")+
geom_smooth(method = "lm", se = FALSE, linetype = "dashed", color = "grey")+
geom_text(aes(label = Metric),vjust = -1.2)+
theme_minimal()+
labs(
title = "Risk vs Return Stability",
x = "Observed Risk",
y = "Average Returns"
)
## Error : The fig.showtext code chunk option must be TRUE
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
Interpretation: The relationship between risk and
return across different metrics shows moderate stability. However,
variability indicates that returns are not fully predictable,
emphasizing the uncertain nature of financial markets.
mf_data |> mutate(Fee_Quartile = as.factor(ntile(Expense_Ratio, 4))) |>
group_by(Fee_Quartile) |>
summarise(Avg_Expense = mean(Expense_Ratio),
Avg_Net_Return = mean(CAGR_3Y)) |>
# knitr::kable(caption = "Performance decay by expense Quartile")
ggplot(aes(x = Fee_Quartile, y = Avg_Net_Return, fill = Avg_Net_Return)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = round(Avg_Net_Return, 2)), vjust = -0.5, size = 4) +
scale_fill_gradient(low = "#FF6B6B", high = "#00DAC6") + # Red for low, Teal for high
theme_minimal() +
labs(title = "Average Net Return (CAGR) by Expense Quartile",
subtitle = "Quartile 1: Cheapest Funds | Quartile 4: Most Expensive Funds",
x = "Fee Quartile (Low to High Expense)",
y = "Average 3Y CAGR (%)")
## Error : The fig.showtext code chunk option must be TRUE
Interpretation: Higher expense ratios reduce net
returns, negatively affecting fund performance. This highlights the
importance of cost efficiency, as even small differences in fees can
significantly impact long-term investment outcomes.
scale_min_max <- function(x) {
(x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
}
# adding SD column to mutual funds data
mf_data <- mf_data |>
rowwise() |>
mutate(
SD = sd(c(Absolute_Returns_3M, Absolute_Returns_6M, Absolute_Returns_1Y, CAGR_5Y, CAGR_3Y), na.rm = TRUE)
) |>
ungroup()
cagr_3y_sd <- central_tendency_and_spread |>
filter(Metric == "CAGR_3Y") |>
pull(SD)
# creating an optimized mutual fund data frame
mf_optimized <- mf_data |>
mutate(
score_return = scale_min_max(CAGR_3Y),
score_risk = 1 - (SD / max(cagr_3y_sd)),
score_cost = 1 - scale_min_max(Expense_Ratio)
)
mf_optimized <- mf_optimized |>
mutate(Final_Score = (score_return *0.40) +
(score_risk * 0.40)+
(score_cost * 0.20))
Interpretation: Combining return, risk, and cost into a weighted score identifies optimal funds. This approach balances multiple factors, providing a more comprehensive evaluation compared to relying on a single metric.
mf_optimized <- mf_data |>
mutate(
Return_cost_Ratio = CAGR_3Y / Expense_Ratio
) |>
arrange(desc(Return_cost_Ratio))
head(mf_optimized,6)
## # A tibble: 6 × 49
## Name Sub_Category Plan AUM CAGR_3Y CAGR_5Y Absolute_Returns_3M
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Mirae Asset Liq… Liquid Fund IDCW 12395. 7.58 6.32 -1.39
## 2 Mirae Asset Liq… Liquid Fund IDCW 12395. 7.58 6.32 -1.39
## 3 Mirae Asset Liq… Liquid Fund IDCW 12395. 7.58 6.32 -1.39
## 4 Mirae Asset Liq… Liquid Fund IDCW 12395. 7.58 6.32 0.07
## 5 Mirae Asset Liq… Liquid Fund Grow… 12395. 6.98 6.01 1.42
## 6 Axis Liquid Fun… Liquid Fund IDCW 43636. 7.58 6.32 -1.39
## # ℹ 42 more variables: Absolute_Returns_1Y <dbl>, Absolute_Returns_6M <dbl>,
## # SEBI_Risk_Category <chr>, Volatility <dbl>, Category_St_Dev <dbl>,
## # Other_Holdings <dbl>, Largecap_Holding <dbl>, Midcap_Holding <dbl>,
## # Equity_Holding <dbl>, Smallcap_Holding <dbl>, Debt_Holding <dbl>,
## # Category_YTM <dbl>, PE_Ratio <dbl>, Average_Maturity <dbl>,
## # Category_PE_Ratio <dbl>, Sortino_Ratio <dbl>, Sharpe_Ratio <dbl>,
## # Average_YTM <dbl>, Benchmark <dbl>, Time_since_inception <dbl>, …
Interpretation: The return-to-cost ratio identifies funds delivering higher returns relative to their expense ratio. This metric highlights cost-efficient investments, helping investors maximize returns while minimizing expenses.
# 1. Create a Risk-Return Efficiency Metric
mf_optimized <- mf_data |>
rowwise() |>
mutate(
Calculated_SD = sd(c(Absolute_Returns_3M, Absolute_Returns_6M, Absolute_Returns_1Y, CAGR_3Y), na.rm = TRUE),
Efficiency_Ratio = ifelse(Calculated_SD > 0, CAGR_3Y / Calculated_SD, 0)
) |>
ungroup()
# 2. Identify the Optimal Subset
optimal_subset <- mf_optimized |>
arrange(desc(Efficiency_Ratio)) |>
head(10) |>
select(Name, CAGR_3Y, Calculated_SD, Expense_Ratio, Efficiency_Ratio)
# 3. Output the Subset Table
knitr::kable(optimal_subset,
caption = "Optimal Subset Identified via Risk-Adjusted Optimization")
| Name | CAGR_3Y | Calculated_SD | Expense_Ratio | Efficiency_Ratio |
|---|---|---|---|---|
| DSP Liquidity Fund | 6.99 | 2.655221 | 0.13 | 2.632549 |
| Axis Liquid Fund | 7.01 | 2.668762 | 0.11 | 2.626686 |
| Aditya Birla SL Liquid Fund | 7.03 | 2.677573 | 0.21 | 2.625511 |
| ICICI Pru Liquid Fund(IDCW-Payout) | 6.96 | 2.654242 | 0.20 | 2.622218 |
| ICICI Pru Liquid Fund(IDCW) | 6.96 | 2.654242 | 0.20 | 2.622218 |
| ICICI Pru Liquid Fund | 6.96 | 2.654242 | 0.20 | 2.622218 |
| HDFC Liquid Fund | 6.94 | 2.647426 | 0.20 | 2.621414 |
| Mirae Asset Liquid Fund | 6.98 | 2.663549 | 0.09 | 2.620564 |
| Nippon India Liquid Fund | 6.99 | 2.668262 | 0.20 | 2.619682 |
| ICICI Pru Money Market Fund(IDCW-Payout) | 7.44 | 3.064032 | 0.21 | 2.428173 |
# 4. Visualize the 'Optimal Frontier'
ggplot(mf_optimized, aes(x = Calculated_SD, y = CAGR_3Y)) +
geom_point(aes(color = Expense_Ratio), alpha = 0.3) +
# Highlight the Optimal Subset in a distinct color from your theme
geom_point(data = optimal_subset, color = "#EA80FC", size = 4) +
geom_text_repel(data = optimal_subset, aes(label = Name), size = 3, color = "#B8BCC2") +
scale_color_gradient(low = "#00DAC6", high = "#FF6B6B") +
theme_minimal() +
labs(title = "Risk-Return Optimization Frontier",
subtitle = "Highlighted funds represent the mathematically optimal subset",
x = "Calculated Risk",
y = "Annualized Return")
## Error : The fig.showtext code chunk option must be TRUE
Interpretation: An optimal subset of funds was
identified based on efficiency ratios, representing the best trade-offs
between risk and return. These funds lie closer to the efficient
frontier and are considered superior investment choices.
# Ensure we are using the dataframe, not just one column
mf_sensitivity <- mf_optimized |>
mutate(
Rank_Balanced = rank(-(Norm_Return * 0.4 + Norm_Risk * 0.4 + Norm_Score * 0.2)),
Rank_Aggressive = rank(-(Norm_Return * 0.7 + Norm_Risk * 0.15 + Norm_Score * 0.15)),
Rank_Conservative = rank(-(Norm_Return * 0.2 + Norm_Risk * 0.6 + Norm_Score * 0.2))
)
# Check the results
head(mf_sensitivity |> select(Name, starts_with("Rank_")))
## # A tibble: 6 × 4
## Name Rank_Balanced Rank_Aggressive Rank_Conservative
## <chr> <dbl> <dbl> <dbl>
## 1 HDFC Liquid Fund(M-IDCW) 166. 152. 164.
## 2 HDFC Liquid Fund(DD-IDCW) 1 1 3
## 3 HDFC Liquid Fund(W-IDCW) 166. 152. 164.
## 4 HDFC Liquid Fund 66 64 67
## 5 HDFC Liquid Fund(M-IDCW Reinv) 166. 152. 164.
## 6 HDFC Liquid Fund(W-IDCW) 166. 152. 164.
Interpretation: Fund rankings change when weights assigned to return, risk, and cost vary. This indicates that rankings are subjective and depend on investor preferences, emphasizing the need for personalized investment strategies.
# creating a model
returns_model <- lm(CAGR_3Y ~ CAGR_5Y + Volatility + Expense_Ratio, data = mf_data)
# generating predictions adding column for predicted data
mf_data <- mf_data |> mutate(Predicted_Returns = predict(returns_model, newdata = mf_data))
mf_data |> select(Name, CAGR_3Y, Predicted_Returns) |> head(5)
## # A tibble: 5 × 3
## Name CAGR_3Y Predicted_Returns
## <chr> <dbl> <dbl>
## 1 HDFC Liquid Fund(M-IDCW) -0.03 0.139
## 2 HDFC Liquid Fund(DD-IDCW) 7.58 7.69
## 3 HDFC Liquid Fund(W-IDCW) -0.03 0.103
## 4 HDFC Liquid Fund 6.94 7.27
## 5 HDFC Liquid Fund(M-IDCW Reinv) -0.03 0.139
Interpretation: A regression model was used to predict returns based on historical variables. While it provides useful insights, predictions are limited due to market uncertainty and the dynamic nature of financial data.
model_summary <- summary(returns_model)
# Extracting the fit metrics
cat("Percentage of variability explained:", round(model_summary$r.squared * 100, 2), "%\n")
## Percentage of variability explained: 96.46 %
cat("Adjusted R-squared:", round(model_summary$adj.r.squared * 100, 2), "%\n")
## Adjusted R-squared: 96.4 %
cat("Average Prediction Error:", round(model_summary$sigma, 2), "%\n")
## Average Prediction Error: 0.68 %
Interpretation: R^2 and adjusted R^2 indicate the proportion of variance explained by the model. Moderate values suggest that while the model captures some relationships, a significant portion of variability remains unexplained.
poly_model <- lm(CAGR_3Y ~ poly(Volatility, 2) + Expense_Ratio, data = mf_data)
summary(poly_model)
##
## Call:
## lm(formula = CAGR_3Y ~ poly(Volatility, 2) + Expense_Ratio, data = mf_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.690 -3.071 -1.959 4.047 5.439
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.6732 1.1295 2.367 0.0191 *
## poly(Volatility, 2)1 -7.8766 3.7077 -2.124 0.0351 *
## poly(Volatility, 2)2 2.7587 4.3459 0.635 0.5264
## Expense_Ratio 0.7851 3.7701 0.208 0.8353
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.536 on 166 degrees of freedom
## Multiple R-squared: 0.0299, Adjusted R-squared: 0.01236
## F-statistic: 1.705 on 3 and 166 DF, p-value: 0.1679
Interpretation: Polynomial regression captures nonlinear relationships between risk and return more effectively than linear models. This reflects real-world scenarios where returns may increase at a decreasing rate with higher risk.
# R-squared
summary(returns_model)$adj.r.squared
## [1] 0.9639697
# RMSE
sqrt(mean(residuals(returns_model)^2))
## [1] 0.667384
# Residual Plot
plot(returns_model, which = 1)
## Error : The fig.showtext code chunk option must be TRUE
Interpretation: RMSE and residual analysis show
prediction errors and model fit. Larger errors indicate limited
predictive accuracy, highlighting the challenges of forecasting
financial returns accurately.
mf_ranked <- mf_data |>
mutate(
# Normalize Return (Higher is Better)
ret_score = (CAGR_3Y - min(CAGR_3Y)) / (max(CAGR_3Y) - min(CAGR_3Y)),
# Normalize Risk (Lower Volatility is Better)
risk_score = 1 - (Volatility - min(Volatility)) / (max(Volatility) - min(Volatility)),
# Normalize Cost (Lower Expense is Better)
cost_score = 1 - (Expense_Ratio - min(Expense_Ratio)) / (max(Expense_Ratio) - min(Expense_Ratio))
) |>
mutate(
# Final Weighted Score
Final_Composite_Score = (ret_score * 0.5) + (risk_score * 0.3) + (cost_score * 0.2)
) |>
arrange(desc(Final_Composite_Score))
Interpretation: A composite scoring model combining return, risk, and cost provides an overall ranking of funds. This method simplifies decision-making by integrating multiple performance indicators into a single score.
# Model 1: Simple Linear Regression
linear_model <- lm(CAGR_3Y ~ Volatility, data = mf_data)
# Model 2: Nonlinear (Polynomial Degree 2) Regression
nonlinear_model <- lm(CAGR_3Y ~ poly(Volatility, 2), data = mf_data)
# 2. Compare them using ANOVA
anova_results <- anova(linear_model, nonlinear_model)
print(anova_results)
## Analysis of Variance Table
##
## Model 1: CAGR_3Y ~ Volatility
## Model 2: CAGR_3Y ~ poly(Volatility, 2)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 168 2081.1
## 2 167 2076.1 1 4.9844 0.4009 0.5275
# 3. Compare Accuracy Metrics
metrics <- data.frame(
Model = c("Linear", "Nonlinear"),
R_Squared = c(summary(linear_model)$adj.r.squared, summary(nonlinear_model)$adj.r.squared),
RMSE = c(sqrt(mean(linear_model$residuals^2)), sqrt(mean(nonlinear_model$residuals^2)))
)
print(metrics)
## Model R_Squared RMSE
## 1 Linear 0.02152310 3.498787
## 2 Nonlinear 0.01802156 3.494594
ggplot(mf_data, aes(x = Volatility, y = CAGR_3Y)) +
geom_point(alpha = 0.4, color = "gray") +
geom_smooth(method = "lm", formula = y ~ x, color = "#EA80FC", se = FALSE, linetype = "dashed") + # Linear
geom_smooth(method = "lm", formula = y ~ poly(x, 2), color = "#00DAC6", se = FALSE) + # Nonlinear
labs(title = "Linear vs. Nonlinear Fit",
subtitle = "Nonlinear captures the diminishing returns of high risk",
x = "Volatility", y = "CAGR 3Y") +
theme_minimal()
## Error : The fig.showtext code chunk option must be TRUE
Interpretation: Nonlinear models slightly outperform
linear models in capturing complex relationships. However, the
improvement is limited, suggesting that increasing model complexity does
not always significantly enhance predictive performance.