# packages
library(readr)
library(stringr)
library(tidyr) # reshaping data
library(dplyr) # data manipulation
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2) # data visualization
## Warning: package 'ggplot2' was built under R version 4.5.3
library(e1071) # statistics & machine learning
## Warning: package 'e1071' was built under R version 4.5.3
##
## Attaching package: 'e1071'
## The following object is masked from 'package:ggplot2':
##
## element
library(ggrepel) # ggplot2 + better labels
## Warning: package 'ggrepel' was built under R version 4.5.3
mf_data <- read_csv(
"D:/semester_projects/R/mutual_funds_data.csv",
col_names = TRUE
)
## New names:
## Rows: 1407 Columns: 37
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (32): Name, Sub Category, Plan, CAGR 3Y, CAGR 5Y, Absolute Returns - 3M,... dbl
## (2): Category St Dev, Time since inception num (2): AUM, NAV lgl (1): ...37
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...37`
mf_data
## # A tibble: 1,407 × 37
## Name `Sub Category` Plan AUM `CAGR 3Y` `CAGR 5Y` Absolute Returns - 3…¹
## <chr> <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 Parag… Flexi Cap Fund Grow… 1.34e5 18.3 16.75 -8.75
## 2 Parag… Flexi Cap Fund IDCW 1.34e5 - - -8.75
## 3 Parag… Flexi Cap Fund IDCW 1.34e5 - - -8.75
## 4 HDFC … Balanced Adva… IDCW 1.08e5 8.52 7.74 -9.11
## 5 HDFC … Balanced Adva… IDCW 1.08e5 8.52 7.74 -9.11
## 6 HDFC … Balanced Adva… Grow… 1.08e5 16.36 16.7 -7.59
## 7 HDFC … Flexi Cap Fund IDCW 1.00e5 10.83 9.79 -16.26
## 8 HDFC … Flexi Cap Fund IDCW 1.00e5 10.83 9.79 -16.26
## 9 HDFC … Flexi Cap Fund Grow… 1.00e5 20.04 19.42 -9.64
## 10 HDFC … Mid Cap Fund IDCW 9.43e4 16.14 13.25 -14.61
## # ℹ 1,397 more rows
## # ℹ abbreviated name: ¹`Absolute Returns - 3M`
## # ℹ 30 more variables: `Absolute Returns - 1Y` <chr>,
## # `Absolute Returns - 6M` <chr>, `SEBI Risk Category` <chr>,
## # Volatility <chr>, `Category St Dev` <dbl>, `% Other Holdings` <chr>,
## # `% Largecap Holding` <chr>, `% Midcap Holding` <chr>,
## # `% Equity Holding` <chr>, `% Smallcap Holding` <chr>, …
# Column names which starts with %
colnames(mf_data)[grep("%", colnames(mf_data))]
## [1] "% Other Holdings" "% Largecap Holding" "% Midcap Holding"
## [4] "% Equity Holding" "% Smallcap Holding" "% Debt Holding"
# replacing %
clean_mf_data <- mf_data |>
rename_with(~ str_replace_all(., "%", "")) |>
rename_with(~ str_replace_all(., " ", "_")) |>
rename_with(~ str_replace_all(., "-", "")) |>
# removes more than one underscore
rename_with(~ str_replace_all(., "__+", "_")) |>
# removes leading / trailing underscore
rename_with(~ str_remove(., "^_|_$")) |>
select(-starts_with("..."))
glimpse(clean_mf_data)
## Rows: 1,407
## Columns: 36
## $ Name <chr> "Parag Parikh Flexi Cap Fund", "Parag Parikh Flex…
## $ Sub_Category <chr> "Flexi Cap Fund", "Flexi Cap Fund", "Flexi Cap Fu…
## $ Plan <chr> "Growth", "IDCW", "IDCW", "IDCW", "IDCW", "Growth…
## $ AUM <dbl> 134253.17, 134253.17, 134253.17, 107589.67, 10758…
## $ CAGR_3Y <chr> "18.3", "-", "-", "8.52", "8.52", "16.36", "10.83…
## $ CAGR_5Y <chr> "16.75", "-", "-", "7.74", "7.74", "16.7", "9.79"…
## $ Absolute_Returns_3M <chr> "-8.75", "-8.75", "-8.75", "-9.11", "-9.11", "-7.…
## $ Absolute_Returns_1Y <chr> "0.08", "-", "-", "-5.8", "-5.8", "0.65", "-6.33"…
## $ Absolute_Returns_6M <chr> "-7.25", "-", "-", "-8.1", "-8.1", "-5.02", "-14.…
## $ SEBI_Risk_Category <chr> "Very High", "Very High", "Very High", "Very High…
## $ Volatility <chr> "8.76", "8.17", "8.17", "8.63", "8.63", "8.63", "…
## $ Category_St_Dev <dbl> 13.52, 13.52, 13.52, 8.25, 8.25, 8.25, 13.52, 13.…
## $ Other_Holdings <chr> "-", "-", "-", "-", "-", "-", "-", "-", "-", "-",…
## $ Largecap_Holding <chr> "63.01", "63.01", "63.01", "54.22", "54.22", "54.…
## $ Midcap_Holding <chr> "2.17", "2.17", "2.17", "7.47", "7.47", "7.47", "…
## $ Equity_Holding <chr> "78.36", "78.36", "78.36", "69.36", "69.36", "69.…
## $ Smallcap_Holding <chr> "2.67", "2.67", "2.67", "7.67", "7.67", "7.67", "…
## $ Debt_Holding <chr> "13.94", "13.94", "13.94", "25.78", "25.78", "25.…
## $ Category_YTM <chr> "-", "-", "-", "6.56", "6.56", "6.56", "-", "-", …
## $ PE_Ratio <chr> "17.51", "17.51", "17.51", "19", "19", "19", "24.…
## $ Average_Maturity <chr> "-", "-", "-", "7.6", "7.6", "7.6", "-", "-", "-"…
## $ Category_PE_Ratio <chr> "27.28", "27.28", "27.28", "25.11", "25.11", "25.…
## $ Sortino_Ratio <chr> "-0.02", "-0.25", "-0.25", "-0.01", "-0.01", "-0.…
## $ Sharpe_Ratio <chr> "-0.17", "-2.82", "-2.82", "-0.13", "-0.13", "-0.…
## $ Average_YTM <chr> "-", "-", "-", "7.01", "7.01", "7.01", "-", "-", …
## $ Benchmark <chr> "NIFTY 500 - TRI", "NIFTY 500 - TRI", "NIFTY 500 …
## $ Time_since_inception <dbl> 155, 6, 6, 159, 159, 159, 159, 159, 159, 159, 159…
## $ Expense_Ratio <chr> "0.63", "0.63", "0.63", "0.77", "0.77", "0.77", "…
## $ NAV <dbl> 87.00, 87.00, 87.00, 41.82, 41.82, 535.18, 82.81,…
## $ AMC <chr> "PPFAS Asset Management Pvt. Ltd.", "PPFAS Asset …
## $ Minimum_Lumpsum <chr> "1,000.00", "1,000.00", "1,000.00", "100", "Not a…
## $ Lockin <chr> "-", "-", "-", "-", "-", "-", "-", "-", "-", "-",…
## $ Fund_Manager <chr> "Rajeev Thakkar+3 others", "Rajeev Thakkar+3 othe…
## $ Exit_Load <chr> "2", "1", "1", "1", "1", "1", "1", "1", "1", "1",…
## $ SIP_Investment <chr> "Allowed", "Allowed", "Allowed", "Allowed", "Not …
## $ Minimum_SIP <chr> "3,000.00", "3,000.00", "3,000.00", "100", "100",…
clean_mf_data <- clean_mf_data |>
mutate(across(
.cols =
contains(c(
"CAGR", "Returns", "Expense", "Sharpe", "Sortino", "Nav", "AUM",
"Large", "Mid", "Equity", "Smallcap", "Debt", "Average", "Absolute"
)),
~ suppressWarnings(as.numeric(str_replace_all(., "[%,]", "")))
))
clean_mf_data
## # A tibble: 1,407 × 36
## Name Sub_Category Plan AUM CAGR_3Y CAGR_5Y Absolute_Returns_3M
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Parag Parikh F… Flexi Cap F… Grow… 1.34e5 18.3 16.8 -8.75
## 2 Parag Parikh F… Flexi Cap F… IDCW 1.34e5 NA NA -8.75
## 3 Parag Parikh F… Flexi Cap F… IDCW 1.34e5 NA NA -8.75
## 4 HDFC Balanced … Balanced Ad… IDCW 1.08e5 8.52 7.74 -9.11
## 5 HDFC Balanced … Balanced Ad… IDCW 1.08e5 8.52 7.74 -9.11
## 6 HDFC Balanced … Balanced Ad… Grow… 1.08e5 16.4 16.7 -7.59
## 7 HDFC Flexi Cap… Flexi Cap F… IDCW 1.00e5 10.8 9.79 -16.3
## 8 HDFC Flexi Cap… Flexi Cap F… IDCW 1.00e5 10.8 9.79 -16.3
## 9 HDFC Flexi Cap… Flexi Cap F… Grow… 1.00e5 20.0 19.4 -9.64
## 10 HDFC Mid Cap F… Mid Cap Fund IDCW 9.43e4 16.1 13.2 -14.6
## # ℹ 1,397 more rows
## # ℹ 29 more variables: Absolute_Returns_1Y <dbl>, Absolute_Returns_6M <dbl>,
## # SEBI_Risk_Category <chr>, Volatility <chr>, Category_St_Dev <dbl>,
## # Other_Holdings <chr>, Largecap_Holding <dbl>, Midcap_Holding <dbl>,
## # Equity_Holding <dbl>, Smallcap_Holding <dbl>, Debt_Holding <dbl>,
## # Category_YTM <chr>, PE_Ratio <chr>, Average_Maturity <dbl>,
## # Category_PE_Ratio <chr>, Sortino_Ratio <dbl>, Sharpe_Ratio <dbl>, …
# saving clean data into a file
write.csv(clean_mf_data,
"D:/semester_projects/R/data_preprocessing_phase/clean_mf_data.csv",
row.names = FALSE
)
mf_data <- read_csv(
"D:/semester_projects/R/data_preprocessing_phase/clean_mf_data.csv",
col_names = TRUE
)
## Rows: 1407 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (17): Name, Sub_Category, Plan, SEBI_Risk_Category, Volatility, Other_Ho...
## dbl (19): AUM, CAGR_3Y, CAGR_5Y, Absolute_Returns_3M, Absolute_Returns_1Y, A...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mf_data
## # A tibble: 1,407 × 36
## Name Sub_Category Plan AUM CAGR_3Y CAGR_5Y Absolute_Returns_3M
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Parag Parikh F… Flexi Cap F… Grow… 1.34e5 18.3 16.8 -8.75
## 2 Parag Parikh F… Flexi Cap F… IDCW 1.34e5 NA NA -8.75
## 3 Parag Parikh F… Flexi Cap F… IDCW 1.34e5 NA NA -8.75
## 4 HDFC Balanced … Balanced Ad… IDCW 1.08e5 8.52 7.74 -9.11
## 5 HDFC Balanced … Balanced Ad… IDCW 1.08e5 8.52 7.74 -9.11
## 6 HDFC Balanced … Balanced Ad… Grow… 1.08e5 16.4 16.7 -7.59
## 7 HDFC Flexi Cap… Flexi Cap F… IDCW 1.00e5 10.8 9.79 -16.3
## 8 HDFC Flexi Cap… Flexi Cap F… IDCW 1.00e5 10.8 9.79 -16.3
## 9 HDFC Flexi Cap… Flexi Cap F… Grow… 1.00e5 20.0 19.4 -9.64
## 10 HDFC Mid Cap F… Mid Cap Fund IDCW 9.43e4 16.1 13.2 -14.6
## # ℹ 1,397 more rows
## # ℹ 29 more variables: Absolute_Returns_1Y <dbl>, Absolute_Returns_6M <dbl>,
## # SEBI_Risk_Category <chr>, Volatility <chr>, Category_St_Dev <dbl>,
## # Other_Holdings <chr>, Largecap_Holding <dbl>, Midcap_Holding <dbl>,
## # Equity_Holding <dbl>, Smallcap_Holding <dbl>, Debt_Holding <dbl>,
## # Category_YTM <chr>, PE_Ratio <chr>, Average_Maturity <dbl>,
## # Category_PE_Ratio <chr>, Sortino_Ratio <dbl>, Sharpe_Ratio <dbl>, …
# lets identify fields that contain missing values or " - " symbol
cols_to_be_treated <- colnames(mf_data)[
sapply(mf_data, function(col) {
is.character(col) &&
any(trimws(col) == "-", na.rm = TRUE)
})
]
# Replacing these missing records with some appropriate value
mf_data <- mf_data |> mutate(
across(
all_of(cols_to_be_treated),
~ suppressWarnings(as.numeric(ifelse(trimws(.) == "-", "0", .)))
)
)
# lets identify NA values
cols_contains_na <- colnames(mf_data)[colSums(is.na(mf_data)) > 0]
# Replacing these NA values with median
mf_data <- mf_data |> mutate(
across(
all_of(cols_contains_na),
~ ifelse(is.na(.), median(., na.rm = TRUE), .)
)
)
# Saving the file as csv
write.csv(mf_data,
"D:/semester_projects/R/data_preprocessing_phase/mf_data.csv",
row.names = FALSE
)
# identifying the duplicate records
dup_funds <- mf_data |> filter(
duplicated(mf_data[, c("Name", "Plan", "Sub_Category")]) |
duplicated(mf_data[, c("Name", "Plan", "Sub_Category")], fromLast = TRUE)
)
# removing the duplicate records
mf_data <- mf_data[
!duplicated(mf_data[, c("Name", "Plan", "Sub_Category")]),
]
# here we will check the invalid datatypes
# although we have converted the datatype before.
# first we will assign levels in the dataset for easier data exploration.
mf_data <- read.csv(
"D:/semester_projects/R/data_preprocessing_phase/mf_data.csv",
header = TRUE
)
mf_data$Plan <- as.factor(mf_data$Plan)
mf_data$SEBI_Risk_Category <- as.factor(mf_data$SEBI_Risk_Category)
# check levels
levels(mf_data$Plan)
## [1] "Bonus" "Growth" "IDCW"
levels(mf_data$SEBI_Risk_Category)
## [1] "High" "Low" "Moderate" "Moderately High"
## [5] "Moderately Low" "Very High"
# lets detect columns with mixed type of values
str(mf_data)
## 'data.frame': 1407 obs. of 36 variables:
## $ Name : chr "Parag Parikh Flexi Cap Fund" "Parag Parikh Flexi Cap Fund(IDCW)" "Parag Parikh Flexi Cap Fund(IDCW Payout)" "HDFC Balanced Advantage Fund(IDCW)" ...
## $ Sub_Category : chr "Flexi Cap Fund" "Flexi Cap Fund" "Flexi Cap Fund" "Balanced Advantage Fund" ...
## $ Plan : Factor w/ 3 levels "Bonus","Growth",..: 2 3 3 3 3 2 3 3 2 3 ...
## $ AUM : num 134253 134253 134253 107590 107590 ...
## $ CAGR_3Y : num 18.3 7.58 7.58 8.52 8.52 ...
## $ CAGR_5Y : num 16.75 6.32 6.32 7.74 7.74 ...
## $ Absolute_Returns_3M : num -8.75 -8.75 -8.75 -9.11 -9.11 ...
## $ Absolute_Returns_1Y : num 0.08 0 0 -5.8 -5.8 0.65 -6.33 -6.33 1.07 -1 ...
## $ Absolute_Returns_6M : num -7.25 -0.9 -0.9 -8.1 -8.1 ...
## $ SEBI_Risk_Category : Factor w/ 6 levels "High","Low","Moderate",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ Volatility : num 8.76 8.17 8.17 8.63 8.63 ...
## $ Category_St_Dev : num 13.52 13.52 13.52 8.25 8.25 ...
## $ Other_Holdings : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Largecap_Holding : num 63 63 63 54.2 54.2 ...
## $ Midcap_Holding : num 2.17 2.17 2.17 7.47 7.47 ...
## $ Equity_Holding : num 78.4 78.4 78.4 69.4 69.4 ...
## $ Smallcap_Holding : num 2.67 2.67 2.67 7.67 7.67 ...
## $ Debt_Holding : num 13.9 13.9 13.9 25.8 25.8 ...
## $ Category_YTM : num 0 0 0 6.56 6.56 6.56 0 0 0 0 ...
## $ PE_Ratio : num 17.5 17.5 17.5 19 19 ...
## $ Average_Maturity : num 1 1 1 7.6 7.6 7.6 1 1 1 1 ...
## $ Category_PE_Ratio : num 27.3 27.3 27.3 25.1 25.1 ...
## $ Sortino_Ratio : num -0.02 -0.25 -0.25 -0.01 -0.01 -0.01 0 0 0 0.03 ...
## $ Sharpe_Ratio : num -0.17 -2.82 -2.82 -0.13 -0.13 -0.13 -0.02 -0.02 -0.03 0.31 ...
## $ Average_YTM : num 6.77 6.77 6.77 7.01 7.01 7.01 6.77 6.77 6.77 6.77 ...
## $ Benchmark : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Time_since_inception: int 155 6 6 159 159 159 159 159 159 159 ...
## $ Expense_Ratio : num 0.63 0.63 0.63 0.77 0.77 0.77 0.7 0.7 0.7 0.76 ...
## $ NAV : num 87 87 87 41.8 41.8 ...
## $ AMC : chr "PPFAS Asset Management Pvt. Ltd." "PPFAS Asset Management Pvt. Ltd." "PPFAS Asset Management Pvt. Ltd." "HDFC Asset Management Company Limited" ...
## $ Minimum_Lumpsum : chr "1,000.00" "1,000.00" "1,000.00" "100" ...
## $ Lockin : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Fund_Manager : chr "Rajeev Thakkar+3 others" "Rajeev Thakkar+3 others" "Rajeev Thakkar+3 others" "Gopal Agrawal+3 others" ...
## $ Exit_Load : num 2 1 1 1 1 1 1 1 1 1 ...
## $ SIP_Investment : chr "Allowed" "Allowed" "Allowed" "Allowed" ...
## $ Minimum_SIP : int 100 100 100 100 100 100 100 100 100 100 ...
char_cols <- sapply(mf_data, is.character)
names(mf_data[char_cols])
## [1] "Name" "Sub_Category" "AMC" "Minimum_Lumpsum"
## [5] "Fund_Manager" "SIP_Investment"
# here minimum_lumpsum contains mixed type of data
unique(mf_data$Minimum_Lumpsum)
## [1] "1,000.00" "100" "Not allowed" "5,000.00" "500"
## [6] "10,000.00" "99" "20,000.00" "1,00,000.00"
# converting the values into similar types of data
mf_data <- mf_data |>
mutate(
Minimum_Lumpsum = suppressWarnings(
as.numeric(gsub(",", "", Minimum_Lumpsum))
)
)
# these are important columns and data is
# naturally skewed it is important to ignore them.
exclude_cols <- c("AUM", "NAV", "Minimum_Lumpsum")
# identifying the numeric columns
numeric_cols <- names(mf_data)[
sapply(mf_data, is.numeric) &
!(names(mf_data) %in% exclude_cols)
]
# Detecting outliers
for (col in numeric_cols) {
q1 <- quantile(mf_data[[col]], 0.25, na.rm = TRUE)
q3 <- quantile(mf_data[[col]], 0.75, na.rm = TRUE)
iqr_val <- IQR(mf_data[[col]], na.rm = TRUE)
lower <- q1 - 1.5 * iqr_val
upper <- q3 + 1.5 * iqr_val
outliers <- mf_data[[col]][
mf_data[[col]] < lower | mf_data[[col]] > upper
]
if (length(outliers) > 0) {
cat("column", col, "\n")
print(outliers)
cat("\n-----------\n")
}
}
## column CAGR_3Y
## [1] 31.65 31.65 31.65 31.38 31.21 31.21 31.21 44.84 44.84 44.84 31.18 31.18
## [13] 31.18 32.82 32.82 32.83 31.48 31.48 31.48 44.99 44.69 44.69 44.69
##
## -----------
## column CAGR_5Y
## [1] 24.42 24.42 24.42 25.20 27.36 27.36 27.36 26.95 24.87 24.87 24.87
##
## -----------
## column Absolute_Returns_1Y
## [1] -21.68 -15.40 -15.40 -13.91 57.86 57.86 57.86 -13.51 -13.51 -16.44
## [11] -16.44 -13.40 -13.40 -13.22 -13.22 21.34 57.08 -13.28 -13.28 -15.76
## [21] -15.76 -15.76 -14.73 -14.73 -14.73 19.58 19.58 19.58 17.21 17.21
## [31] 17.21 57.92 57.92 57.92 122.54 122.54 122.54 57.19 57.19 57.19
## [41] 58.26 58.26 58.26 -14.15 -14.15 18.44 18.44 18.44 19.13 19.13
## [51] 19.13 123.57 123.02 123.02 123.02 126.78 126.78 126.78
##
## -----------
## column Absolute_Returns_6M
## [1] 22.22 22.22 22.22 21.75 21.90 21.90 21.90 62.20 62.20 62.20 21.81 21.81
## [13] 21.81 22.18 22.18 22.18 62.29 61.90 61.90 61.90 64.46 64.46 64.46
##
## -----------
## column Volatility
## [1] 54.82 54.82 54.82 58.91 51.71 51.71 51.71 56.72 56.72 56.72
##
## -----------
## column Category_St_Dev
## [1] 34.60 34.60 34.60 34.60 34.60 34.60 34.60 60.61 60.61 60.61 34.60 34.60
## [13] 34.60 34.60 34.60 34.60 60.61 60.61 60.61 60.61 60.61 60.61 60.61
##
## -----------
## column Largecap_Holding
## [1] 63.01 63.01 63.01 54.22 54.22 54.22 75.25 75.25 75.25 12.47
## [11] 12.47 12.47 56.10 56.10 56.10 84.95 84.95 84.95 57.77 57.77
## [21] 57.77 57.77 57.77 14.89 14.89 14.89 14.89 16.77 16.77 16.77
## [31] 75.61 75.61 75.61 76.13 76.13 76.13 80.80 80.80 80.80 83.46
## [41] 83.46 83.46 83.46 45.14 45.14 45.14 45.14 58.08 58.08 58.08
## [51] 58.08 58.08 58.08 58.08 25.09 25.09 25.09 25.09 84.94 84.94
## [61] 84.94 87.04 87.04 87.04 41.57 41.57 41.57 9.16 9.16 9.16
## [71] 66.27 66.27 66.27 25.88 25.88 25.88 74.05 74.05 39.84 39.84
## [81] 39.84 89.12 89.12 89.12 63.97 63.97 63.97 19.20 19.20 19.20
## [91] 57.55 57.55 57.55 82.87 82.87 82.87 -0.38 -0.38 -0.38 -0.38
## [101] 47.14 47.14 47.14 23.52 23.52 23.52 38.26 38.26 38.26 71.61
## [111] 71.61 71.61 46.77 46.77 46.77 99.24 99.24 99.24 58.06 58.06
## [121] 5.64 5.64 5.64 59.52 59.52 59.52 41.31 41.31 41.31 54.29
## [131] 54.29 54.29 54.29 54.29 54.29 54.29 54.29 46.41 46.41 46.41
## [141] 8.04 8.04 8.04 67.42 67.42 67.42 38.47 38.47 38.47 58.67
## [151] 58.67 58.67 99.26 46.67 46.67 46.67 4.81 4.81 63.82 63.82
## [161] 63.82 58.52 58.52 58.52 18.57 18.57 18.57 76.49 76.49 76.49
## [171] 46.01 46.01 46.01 14.17 14.17 14.17 65.31 65.31 65.31 54.67
## [181] 54.67 54.67 68.75 68.75 68.75 87.29 87.29 87.29 81.64 81.64
## [191] 81.64 47.30 47.30 47.30 47.30 47.30 6.35 6.35 6.35 15.80
## [201] 15.80 15.80 15.80 15.80 15.80 15.80 73.48 73.48 73.48 40.44
## [211] 40.44 40.44 41.31 41.31 41.31 41.31 41.31 41.31 41.31 99.06
## [221] 99.06 99.06 2.62 2.62 2.62 39.28 39.28 39.28 61.57 61.57
## [231] 61.57 73.40 73.40 73.40 69.18 69.18 69.18 42.25 42.25 42.25
## [241] 20.41 20.41 20.41 75.50 75.50 44.39 44.39 44.39 43.56 43.56
## [251] 43.56 71.77 71.77 71.77 18.71 18.71 18.71 64.31 64.31 64.31
## [261] 64.31 64.31 64.31 64.31 6.58 6.58 6.58 46.59 46.59 46.59
## [271] 56.86 56.86 56.86 88.54 88.54 88.54 45.32 45.32 45.32 8.86
## [281] 8.86 8.86 27.25 27.25 27.25 15.17 15.17 15.17 60.73 60.73
## [291] 60.73 77.11 77.11 77.11 81.55 81.55 99.24 99.24 99.24 13.01
## [301] 13.01 13.01 65.93 65.93 65.93 54.03 54.03 54.03 42.99 42.99
## [311] 42.99 62.57 62.57 62.57 85.66 85.66 85.66 20.82 20.82 20.82
## [321] 20.82 15.84 15.84 15.84 76.00 76.00 76.00 56.83 56.83 56.83
## [331] 59.34 59.34 59.34 63.36 63.36 63.36 7.51 7.51 7.51 7.51
## [341] 7.51 7.51 7.51 45.96 45.96 45.96 63.20 63.20 63.20 10.28
## [351] 10.28 10.28 55.92 55.92 55.92 45.35 45.35 45.35 47.14 47.14
## [361] 47.14 57.30 57.30 57.30 35.78 35.78 35.78 46.07 46.07 46.07
## [371] 65.65 65.65 65.65 -0.48 -0.48 -0.48 40.08 40.08 40.08 61.66
## [381] 61.66 61.66 71.57 71.57 71.57 35.42 35.42 35.42 100.04 72.01
## [391] 72.01 69.96 69.96 69.96 69.96 69.96 73.39 41.65 41.65 41.65
## [401] 41.93 41.93 41.93 41.93 41.93 41.93 20.97 20.97 20.97 64.91
## [411] 64.91 64.91 78.21 78.21 78.21 11.54 11.54 11.54 44.29 44.29
## [421] 44.29 44.29 43.05 43.05 43.05 53.99 53.99 53.99 35.83 35.83
## [431] 35.83 45.38 45.38 45.38 77.90 77.90 77.90 63.03 63.03 63.03
## [441] 63.03 81.73 81.73 81.73 43.87 43.87 43.87 68.18 68.18 68.18
## [451] 65.51 65.51 65.51 11.26 11.26 11.26 57.39 57.39 57.39 23.17
## [461] 23.17 23.17 30.17 30.17 30.17 78.53 78.53 78.53 53.09 53.09
## [471] 53.09 67.96 67.96 62.14 62.14 62.14 62.14 44.89 44.89 44.89
## [481] 44.89 44.89 74.43 74.43 74.43 37.07 37.07 37.07 43.83 43.83
## [491] 43.83 46.03 46.03 20.05 65.93 65.93 65.93 73.45 73.45 73.45
## [501] 56.84 56.84 56.84 75.83 75.83 72.07 72.07 61.86 61.86 61.86
## [511] 44.38 44.38 44.38 78.18 63.75 63.75 63.75 64.01 64.01 64.01
## [521] 43.23 43.23 43.23 10.28 10.28 10.28 64.81 64.81 62.05 62.05
## [531] 62.05 62.05 60.86 60.86 60.86 54.05 54.05 54.05 65.69 65.69
## [541] 65.69 38.08 38.08 38.08 45.46 45.46 45.46 57.28 57.28 57.28
## [551] 57.28 57.28 46.59 46.59 46.59 70.50 1.29 1.29 1.29 76.90
## [561] 76.90 76.90 16.21 16.21 16.21 2.63 2.63 2.63 2.63 43.82
## [571] 43.82 43.82 39.20 39.20 39.20 39.20 39.20 47.14 47.14 47.14
## [581] 59.15 59.15 59.15 61.73 61.73 61.73 59.55 59.55 59.55 64.26
## [591] 64.26 64.26 43.26 43.26 43.26 23.48 23.48 42.42 42.42 42.42
## [601] 44.93 44.93 44.93 24.43 24.43 24.43 38.90 38.90 38.90 4.41
## [611] 4.41 4.41 42.52 42.52 42.52 45.77 45.77 45.77 39.55 39.55
## [621] 39.55 10.70 10.70 10.70 57.36 57.36 56.39 56.39 56.39 72.20
## [631] 72.20 72.20 68.22 68.22 68.22 14.23 14.23 14.23 44.62 44.62
## [641] 44.62 20.97 20.97 20.97 20.45 20.45
##
## -----------
## column Midcap_Holding
## [1] 2.17 2.17 2.17 7.47 7.47 7.47 7.89 7.89 7.89 63.77 63.77 63.77
## [13] 14.01 14.01 14.01 7.01 7.01 7.01 14.89 14.89 14.89 5.92 5.92 5.92
## [25] 5.92 5.92 68.90 68.90 68.90 4.56 4.56 4.56 12.54 12.54 12.54 13.05
## [37] 13.05 13.05 13.05 27.31 27.31 27.31 27.31 6.28 6.28 6.28 6.28 6.28
## [49] 6.28 6.28 20.83 20.83 20.83 61.39 61.39 61.39 61.39 14.60 14.60 14.60
## [61] 40.60 40.60 40.60 9.66 9.66 9.66 12.87 12.87 12.87 11.49 11.49 11.49
## [73] 35.57 35.57 35.57 12.06 12.06 12.06 2.82 2.82 2.82 66.88 66.88 66.88
## [85] 20.81 20.81 19.45 19.45 19.45 1.22 1.22 1.22 20.38 20.38 20.38 64.77
## [97] 64.77 64.77 33.02 33.02 33.02 6.94 6.94 6.94 -0.06 -0.06 -0.06 -0.06
## [109] 37.29 37.29 37.29 4.72 4.72 4.72 41.59 41.59 41.59 7.66 7.66 7.66
## [121] 0.71 0.71 0.71 25.24 25.24 22.69 22.69 22.69 42.60 42.60 42.60 9.55
## [133] 9.55 9.55 9.55 9.55 9.55 9.55 9.55 21.90 21.90 21.90 5.58 5.58
## [145] 5.58 67.60 67.60 67.60 12.57 12.57 12.57 22.00 22.00 22.00 25.19 25.19
## [157] 25.19 0.71 19.35 19.35 19.35 13.71 13.71 8.43 8.43 8.43 23.05 23.05
## [169] 23.05 63.67 63.67 63.67 11.16 11.16 11.16 27.24 27.24 27.24 69.30 69.30
## [181] 69.30 1.59 1.59 1.59 32.05 32.05 32.05 9.91 9.91 9.91 9.87 9.87
## [193] 9.87 6.56 6.56 6.56 7.02 7.02 7.02 19.80 19.80 19.80 12.98 12.98
## [205] 12.98 12.98 12.98 12.98 12.98 10.36 10.36 10.36 30.44 30.44 30.44 32.83
## [217] 32.83 32.83 22.23 22.23 22.23 22.23 22.23 22.23 22.23 0.71 0.71 0.71
## [229] 23.79 23.79 23.79 35.20 35.20 35.20 19.88 19.88 19.88 24.15 24.15 24.15
## [241] 12.51 12.51 12.51 31.46 31.46 31.46 65.33 65.33 65.33 7.07 7.07 10.16
## [253] 10.16 10.16 21.46 21.46 21.46 61.60 61.60 61.60 12.25 12.25 12.25 12.25
## [265] 12.25 12.25 12.25 8.37 8.37 8.37 28.41 28.41 28.41 20.67 20.67 20.67
## [277] 7.17 7.17 7.17 12.16 12.16 12.16 22.51 22.51 22.51 49.58 49.58 49.58
## [289] 64.78 64.78 64.78 23.01 23.01 23.01 12.06 12.06 12.06 7.21 7.21 9.57
## [301] 9.57 9.57 0.71 0.71 0.71 64.71 64.71 64.71 12.62 12.62 12.62 13.75
## [313] 13.75 13.75 12.51 12.51 12.51 8.82 8.82 8.82 64.26 64.26 64.26 64.26
## [325] 57.57 57.57 57.57 13.06 13.06 13.06 4.10 4.10 4.10 13.34 13.34 13.34
## [337] 3.63 3.63 3.63 3.63 3.63 3.63 3.63 15.36 15.36 15.36 20.19 20.19
## [349] 20.19 19.89 19.89 19.89 11.51 11.51 11.51 19.37 19.37 19.37 13.31 13.31
## [361] 13.31 11.72 11.72 11.72 37.13 37.13 37.13 24.78 24.78 24.78 20.41 20.41
## [373] 20.41 -0.06 -0.06 -0.06 19.17 19.17 19.17 14.72 14.72 14.72 27.28 27.28
## [385] 27.28 10.53 10.53 10.53 10.53 10.53 26.55 27.53 27.53 27.53 19.99 19.99
## [397] 19.99 19.99 19.99 19.99 5.07 5.07 5.07 19.31 19.31 19.31 21.69 21.69
## [409] 21.69 3.21 3.21 3.21 29.35 29.35 29.35 29.35 33.61 33.61 33.61 7.54
## [421] 7.54 7.54 19.51 19.51 19.51 24.78 24.78 24.78 20.94 20.94 20.94 20.94
## [433] 12.59 12.59 12.59 24.95 24.95 24.95 11.36 11.36 11.36 19.29 19.29 19.29
## [445] 69.80 69.80 69.80 11.24 11.24 11.24 64.50 64.50 64.50 20.58 20.58 20.58
## [457] 6.18 6.18 6.18 9.29 9.29 9.29 23.13 23.13 23.13 23.13 15.18 15.18
## [469] 33.72 33.72 33.72 33.72 13.45 13.45 13.45 34.11 34.11 34.11 10.84 10.84
## [481] 10.84 27.11 27.11 27.11 13.15 13.15 13.15 20.10 20.10 2.35 24.17 24.17
## [493] 24.17 10.23 10.23 10.23 14.36 14.36 14.36 12.15 12.15 14.85 14.85 11.37
## [505] 11.37 11.37 15.45 15.45 15.45 25.13 25.13 25.13 14.00 14.00 14.00 21.69
## [517] 20.39 20.39 20.39 22.73 22.73 22.73 22.22 22.22 22.22 64.74 64.74 64.74
## [529] 26.34 26.34 20.87 20.87 20.87 20.87 2.23 2.23 2.23 3.41 3.41 3.41
## [541] 11.95 11.95 11.95 23.60 23.60 23.60 45.62 45.62 45.62 7.26 7.26 7.26
## [553] 7.26 7.26 33.88 33.88 33.88 2.26 9.25 9.25 9.25 62.91 62.91 62.91
## [565] 33.35 33.35 33.35 33.35 13.09 13.09 13.09 20.31 20.31 20.31 20.31 20.31
## [577] 22.82 22.82 22.82 24.79 24.79 24.79 21.51 21.51 21.51 11.87 11.87 11.87
## [589] 27.76 27.76 27.76 10.35 10.35 8.23 8.23 8.23 25.90 25.90 25.90 3.30
## [601] 3.30 3.30 28.86 28.86 28.86 21.53 21.53 21.53 32.00 32.00 32.00 61.02
## [613] 61.02 61.02 26.09 26.09 26.09 14.02 14.02 29.56 29.56 29.56 19.44 19.44
## [625] 9.90 9.90 9.90 35.19 35.19 35.19 8.09 8.09 8.09 69.12 69.12 69.12
## [637] 20.32 20.32 20.32 65.94 65.94 65.94 34.62 34.62
##
## -----------
## column Equity_Holding
## [1] 78.36 78.36 78.36 69.36 69.36 69.36 90.40 90.40 90.40 69.27
## [11] 69.27 69.27 74.02 74.02 74.02 92.16 92.16 92.16 67.14 67.14
## [21] 67.14 66.40 66.40 66.40 66.40 66.40 95.91 95.91 95.91 95.91
## [31] 99.07 99.07 99.07 85.68 85.68 85.68 98.21 98.21 98.21 99.31
## [41] 99.31 99.31 99.31 99.40 99.40 99.40 99.40 70.56 70.56 70.56
## [51] 70.56 70.56 70.56 70.56 83.37 83.37 83.37 98.68 98.68 98.68
## [61] 98.68 69.91 69.91 69.91 83.18 83.18 83.18 98.90 98.90 98.90
## [71] 99.20 99.20 99.20 65.94 65.94 65.94 98.53 98.53 98.53 93.15
## [81] 93.15 93.15 91.93 91.93 91.93 92.48 92.48 92.48 86.01 86.01
## [91] 86.01 92.76 92.76 92.76 96.82 96.82 61.92 61.92 61.92 90.34
## [101] 90.34 90.34 97.79 97.79 97.79 88.43 88.43 88.43 97.68 97.68
## [111] 97.68 96.85 96.85 96.85 -0.27 -0.27 -0.27 -0.27 98.67 98.67
## [121] 98.67 89.84 89.84 89.84 66.56 66.56 66.56 99.95 99.95 99.95
## [131] 99.18 99.18 89.05 89.05 89.05 97.69 97.69 97.69 97.73 97.73
## [141] 97.73 65.13 65.13 65.13 65.13 65.13 65.13 65.13 65.13 97.09
## [151] 97.09 97.09 70.56 70.56 70.56 93.80 93.80 93.80 93.71 93.71
## [161] 93.71 92.19 92.19 92.19 96.88 96.88 96.88 99.97 69.67 69.67
## [171] 69.67 89.74 89.74 97.34 97.34 97.34 96.75 96.75 96.75 98.31
## [181] 98.31 98.31 98.06 98.06 98.06 67.42 67.42 67.42 96.93 96.93
## [191] 96.93 67.33 67.33 67.33 97.48 97.48 97.48 97.17 97.17 97.17
## [201] 97.92 97.92 97.92 66.93 66.93 66.93 66.93 66.93 98.31 98.31
## [211] 98.31 40.85 40.85 40.85 40.85 40.85 40.85 40.85 91.27 91.27
## [221] 91.27 97.84 97.84 97.84 93.66 93.66 93.66 67.30 67.30 67.30
## [231] 67.30 67.30 67.30 67.30 99.77 99.77 99.77 96.54 96.54 96.54
## [241] 98.15 98.15 98.15 97.03 97.03 97.03 97.55 97.55 97.55 99.29
## [251] 99.29 99.29 93.45 93.45 93.45 97.89 97.89 89.98 89.98 89.98
## [261] 58.21 58.21 58.21 96.39 96.39 96.39 96.71 96.71 96.71 81.77
## [271] 81.77 81.77 81.77 81.77 81.77 81.77 93.35 93.35 93.35 78.84
## [281] 78.84 78.84 88.78 88.78 88.78 97.99 97.99 97.99 68.00 68.00
## [291] 68.00 97.27 97.27 97.27 97.94 97.94 97.94 95.83 95.83 95.83
## [301] 96.67 96.67 96.67 93.40 93.40 66.67 66.67 66.67 99.94 99.94
## [311] 99.94 98.33 98.33 98.33 78.55 78.55 78.55 74.25 74.25 74.25
## [321] 65.25 65.25 65.25 97.80 97.80 97.80 97.22 97.22 97.22 97.22
## [331] 99.35 99.35 99.35 97.72 97.72 97.72 92.48 92.48 92.48 99.01
## [341] 99.01 99.01 91.57 91.57 91.57 93.37 93.37 93.37 22.69 22.69
## [351] 22.69 22.69 22.69 22.69 22.69 66.18 66.18 66.18 99.02 99.02
## [361] 99.02 98.91 98.91 98.91 70.10 70.10 70.10 65.61 65.61 65.61
## [371] 76.04 76.04 76.04 96.83 96.83 96.83 98.92 98.92 98.92 -0.55
## [381] -0.55 -0.55 66.41 66.41 66.41 97.22 97.22 97.22 92.25 92.25
## [391] 92.25 100.04 97.40 97.40 90.53 90.53 90.53 90.53 90.53 99.94
## [401] 75.88 75.88 75.88 68.78 68.78 68.78 68.78 68.78 68.78 38.15
## [411] 38.15 38.15 96.94 96.94 96.94 97.96 97.96 97.96 99.90 99.90
## [421] 99.90 92.20 92.20 92.20 99.70 99.70 99.70 99.70 96.86 96.86
## [431] 96.86 68.77 68.77 68.77 91.69 91.69 91.69 96.29 96.29 96.29
## [441] 96.74 96.74 96.74 97.80 97.80 97.80 97.80 97.67 97.67 97.67
## [451] 96.01 96.01 96.01 96.17 96.17 96.17 99.60 99.60 99.60 89.16
## [461] 89.16 89.16 75.92 75.92 75.92 98.31 98.31 98.31 82.71 82.71
## [471] 82.71 97.70 97.70 97.70 89.41 89.41 89.41 86.10 86.10 86.10
## [481] 99.22 99.22 99.22 99.22 97.71 97.71 97.71 97.71 65.07 65.07
## [491] 65.07 67.96 67.96 67.96 67.96 67.96 97.15 97.15 97.15 97.98
## [501] 97.98 97.98 67.72 67.72 67.72 97.21 97.21 26.22 95.84 95.84
## [511] 95.84 91.04 91.04 91.04 97.28 97.28 97.28 99.60 99.60 81.29
## [521] 81.29 81.29 64.85 64.85 64.85 95.99 95.99 95.99 99.87 98.24
## [531] 98.24 98.24 99.23 99.23 99.23 98.20 98.20 98.20 97.77 97.77
## [541] 96.31 96.31 96.31 96.31 66.63 66.63 66.63 63.48 63.48 63.48
## [551] 98.84 98.84 98.84 98.17 98.17 98.17 65.71 65.71 65.71 65.71
## [561] 65.71 87.87 97.06 97.06 97.06 98.82 98.82 98.82 93.47 93.47
## [571] 93.47 97.88 97.88 97.88 97.88 73.30 73.30 73.30 73.30 73.30
## [581] 97.84 97.84 97.84 98.01 98.01 98.01 98.35 98.35 98.35 96.45
## [591] 96.45 96.45 97.39 97.39 97.39 97.58 97.58 97.58 37.56 37.56
## [601] 97.92 97.92 97.92 64.88 64.88 64.88 93.58 93.58 93.58 45.35
## [611] 45.35 45.35 85.98 85.98 85.98 96.04 96.04 96.04 73.30 73.30
## [621] 73.30 99.41 99.41 99.41 87.19 87.19 87.19 96.97 96.97 96.97
## [631] 90.57 90.57 98.40 98.40 98.40 98.23 98.23 98.23 97.12 97.12
## [641] 97.12 99.19 99.19 99.19 96.36 96.36 96.36 97.26 97.26 97.26
## [651] 99.03 99.03
##
## -----------
## column Smallcap_Holding
## [1] 2.67 2.67 2.67 7.67 7.67 7.67 7.26 7.26 7.26 17.99 17.99 17.99
## [13] 7.51 7.51 7.51 1.00 1.00 1.00 0.20 0.20 0.20 2.11 2.11 2.11
## [25] 2.71 2.71 2.71 2.71 2.71 65.09 65.09 65.09 65.09 13.40 13.40 13.40
## [37] 5.51 5.51 5.51 3.50 3.50 3.50 1.95 1.95 1.95 2.80 2.80 2.80
## [49] 2.80 26.96 26.96 26.96 26.96 6.21 6.21 6.21 6.21 6.21 6.21 6.21
## [61] 9.27 9.27 9.27 3.06 3.06 3.06 4.41 4.41 4.41 9.69 9.69 9.69
## [73] 4.60 4.60 4.60 3.67 3.67 3.67 15.42 15.42 15.42 70.71 70.71 70.71
## [85] 10.28 10.28 10.28 83.20 83.20 83.20 1.95 1.95 2.63 2.63 2.63 13.45
## [97] 13.45 13.45 4.46 4.46 4.46 7.11 7.11 7.11 7.04 7.04 7.04 0.18
## [109] 0.18 0.18 0.18 14.24 14.24 14.24 66.68 66.68 66.68 15.18 15.18 15.18
## [121] 10.56 10.56 10.56 2.97 2.97 2.97 15.88 15.88 66.34 66.34 66.34 15.48
## [133] 15.48 15.48 13.82 13.82 13.82 1.29 1.29 1.29 1.29 1.29 1.29 1.29
## [145] 1.29 28.77 28.77 28.77 16.89 16.89 16.89 18.16 18.16 18.16 11.82 11.82
## [157] 11.82 31.73 31.73 31.73 13.01 13.01 13.01 3.65 3.65 3.65 71.22 71.22
## [169] 25.09 25.09 25.09 15.18 15.18 15.18 13.55 13.55 13.55 7.97 7.97 7.97
## [181] 25.06 25.06 25.06 14.58 14.58 14.58 0.52 0.52 0.52 10.21 10.21 10.21
## [193] 6.92 6.92 6.92 13.12 13.12 13.12 0.01 0.01 0.01 88.10 88.10 88.10
## [205] 9.26 9.26 9.26 3.01 3.01 3.01 3.01 3.01 72.15 72.15 72.15 12.07
## [217] 12.07 12.07 12.07 12.07 12.07 12.07 7.43 7.43 7.43 26.96 26.96 26.96
## [229] 4.10 4.10 4.10 3.76 3.76 3.76 3.76 3.76 3.76 3.76 70.12 70.12
## [241] 70.12 23.68 23.68 23.68 15.58 15.58 15.58 17.60 17.60 17.60 19.74 19.74
## [253] 19.74 29.76 29.76 29.76 9.04 9.04 9.04 15.32 15.32 22.99 22.99 22.99
## [265] 4.48 4.48 4.48 3.16 3.16 3.16 16.39 16.39 16.39 5.21 5.21 5.21
## [277] 5.21 5.21 5.21 5.21 78.40 78.40 78.40 3.84 3.84 3.84 11.25 11.25
## [289] 11.25 2.28 2.28 2.28 10.53 10.53 10.53 65.90 65.90 65.90 21.11 21.11
## [301] 21.11 15.87 15.87 15.87 11.93 11.93 11.93 7.50 7.50 7.50 4.64 4.64
## [313] 8.90 8.90 8.90 20.62 20.62 20.62 6.46 6.46 6.46 4.69 4.69 4.69
## [325] 19.75 19.75 19.75 3.32 3.32 3.32 12.13 12.13 12.13 12.13 25.95 25.95
## [337] 25.95 8.67 8.67 8.67 88.38 88.38 88.38 25.11 25.11 25.11 16.41 16.41
## [349] 16.41 16.67 16.67 16.67 11.55 11.55 11.55 11.55 11.55 11.55 11.55 4.87
## [361] 4.87 4.87 15.64 15.64 15.64 68.74 68.74 68.74 2.67 2.67 2.67 29.28
## [373] 29.28 29.28 5.16 5.16 5.16 7.02 7.02 7.02 23.92 23.92 23.92 23.36
## [385] 23.36 23.36 12.86 12.86 12.86 -0.01 -0.01 -0.01 9.12 9.12 9.12 16.39
## [397] 16.39 16.39 5.96 5.96 5.96 31.70 31.70 31.70 8.85 8.85 10.04 10.04
## [409] 10.04 10.04 10.04 6.71 6.71 6.71 6.87 6.87 6.87 6.87 6.87 6.87
## [421] 3.32 3.32 3.32 12.73 12.73 12.73 28.08 28.08 28.08 77.45 77.45 77.45
## [433] 26.05 26.05 26.05 26.05 20.19 20.19 20.19 7.23 7.23 7.23 36.36 36.36
## [445] 36.36 26.13 26.13 26.13 1.36 1.36 1.36 13.83 13.83 13.83 13.83 1.57
## [457] 1.57 1.57 25.01 25.01 25.01 16.63 16.63 16.63 14.79 14.79 14.79 8.10
## [469] 8.10 8.10 7.29 7.29 7.29 10.64 10.64 10.64 33.63 33.63 33.63 28.98
## [481] 28.98 28.98 4.70 4.70 4.70 23.71 23.71 23.71 23.66 23.66 23.66 23.66
## [493] 11.48 11.48 1.85 1.85 1.85 1.85 1.49 1.49 1.49 4.87 4.87 4.87
## [505] 4.87 4.87 11.38 11.38 11.38 10.41 10.41 10.41 33.81 33.81 33.81 10.74
## [517] 10.74 10.74 31.08 31.08 3.81 5.74 5.74 5.74 7.36 7.36 7.36 26.08
## [529] 26.08 26.08 7.30 7.30 12.68 12.68 8.06 8.06 8.06 5.02 5.02 5.02
## [541] 21.66 21.66 21.66 28.58 28.58 28.58 14.11 14.11 14.11 12.49 12.49 12.49
## [553] 28.50 28.50 28.50 23.18 23.18 23.18 6.62 6.62 13.39 13.39 13.39 13.39
## [565] 3.55 3.55 3.55 6.02 6.02 6.02 21.20 21.20 21.20 32.77 32.77 32.77
## [577] 7.08 7.08 7.08 1.16 1.16 1.16 1.16 1.16 13.70 13.70 13.70 15.12
## [589] 86.52 86.52 86.52 5.96 5.96 5.96 14.34 14.34 14.34 61.91 61.91 61.91
## [601] 61.91 38.19 38.19 38.19 13.79 13.79 13.79 13.79 13.79 27.88 27.88 27.88
## [613] 23.23 23.23 23.23 11.83 11.83 11.83 15.39 15.39 15.39 21.26 21.26 21.26
## [625] 26.56 26.56 26.56 3.72 3.72 37.72 37.72 37.72 11.72 11.72 11.72 43.25
## [637] 43.25 43.25 3.15 3.15 3.15 18.11 18.11 18.11 73.75 73.75 73.75 24.67
## [649] 24.67 24.67 5.99 5.99 5.99 27.85 27.85 27.85 15.48 15.48 15.48 21.39
## [661] 21.39 21.39 19.19 19.19 21.58 21.58 21.58 25.85 25.85 25.85 17.89 17.89
## [673] 17.89 75.53 75.53 16.13 16.13 16.13 10.39 10.39 10.39 22.88 22.88 22.88
## [685] 13.01 13.01 13.01 19.77 19.77 19.77 8.07 8.07 8.07 43.96 43.96
##
## -----------
## column Debt_Holding
## [1] 13.94 13.94 13.94 25.78 25.78 25.78 0.51 0.51 0.51 13.31 13.31 13.31
## [13] 18.82 18.82 18.82 2.80 2.80 2.80 5.62 5.62 5.62 19.97 19.97 19.97
## [25] 19.97 19.97 1.46 1.46 1.46 1.46 1.78 1.78 1.78 0.75 0.75 0.75
## [37] 21.38 21.38 21.38 21.38 21.38 21.38 21.38 8.12 8.12 8.12 1.03 1.03
## [49] 1.03 26.01 26.01 26.01 0.48 0.48 0.48 0.73 0.73 0.73 0.97 0.97
## [61] 0.97 2.13 2.13 2.13 11.97 11.97 11.97 0.12 0.12 0.12 0.61 0.61
## [73] 0.61 10.38 10.38 10.38 10.38 1.21 1.21 1.21 0.83 0.83 0.83 0.19
## [85] 0.19 0.19 10.15 10.15 10.15 1.73 1.73 1.73 14.84 14.84 14.84 14.84
## [97] 14.84 14.84 14.84 14.84 23.50 23.50 23.50 0.77 0.77 0.77 1.61 1.61
## [109] 1.61 0.53 0.53 0.53 0.22 0.22 0.22 17.08 17.08 17.08 0.26 0.26
## [121] 0.26 0.25 0.25 0.25 12.12 12.12 12.12 21.47 21.47 21.47 0.31 0.31
## [133] 0.31 8.13 8.13 8.13 8.13 8.13 35.87 35.87 35.87 35.87 35.87 35.87
## [145] 35.87 1.71 1.71 1.71 0.45 0.45 0.45 13.90 13.90 13.90 13.90 13.90
## [157] 13.90 13.90 1.17 1.17 1.17 0.37 0.37 0.37 16.19 16.19 16.19 7.16
## [169] 7.16 7.16 7.16 7.16 7.16 7.16 0.39 0.39 0.39 0.40 0.40 0.40
## [181] 10.52 10.52 10.52 0.20 0.20 0.20 0.20 0.20 0.20 27.86 27.86 27.86
## [193] 20.73 20.73 20.73 12.36 12.36 12.36 0.21 0.21 0.21 0.15 0.15 0.15
## [205] 0.15 0.04 0.04 0.04 69.48 69.48 69.48 69.48 69.48 69.48 69.48 11.14
## [217] 11.14 11.14 0.26 0.26 0.26 16.89 16.89 16.89 0.16 0.16 0.16 26.99
## [229] 26.99 26.99 21.59 21.59 21.59 0.37 0.37 0.37 17.89 17.89 17.89 19.48
## [241] 19.48 19.48 0.05 0.05 0.05 0.61 0.61 0.61 18.94 18.94 18.94 1.82
## [253] 1.82 1.82 1.82 1.82 1.82 11.16 11.16 11.16 0.29 0.29 0.29 1.66
## [265] 1.66 1.66 21.55 21.55 21.55 1.12 1.12 1.12 0.13 0.13 0.13 0.99
## [277] 0.99 0.99 4.39 4.39 4.39 19.68 19.68 19.68 5.35 5.35 5.35 2.86
## [289] 2.86 2.86 10.79 10.79 10.79 14.83 14.83 14.83 14.83 14.83 0.03 0.03
## [301] 0.03 26.37 26.37 26.37 29.41 0.07 0.07 0.07 0.64 0.64 0.64 0.23
## [313] 0.23 4.75 4.75 4.75 10.48 10.48 10.48 0.30 0.30 0.30 0.11 0.11
## [325] 0.11 0.11 21.57 21.57 21.57 11.23 11.23 11.23 0.33 0.33 0.33 23.34
## [337] 23.34 23.34 23.34 23.34 8.66 0.09 0.09 0.09 22.31 22.31 22.31 22.31
## [349] 22.31 59.37 59.37 0.09 0.09 0.09 9.63 9.63 9.63 0.10 0.10 0.10
## [361] 10.56 10.56 10.56 19.23 19.23 19.23 3.62 3.62 3.62 1.17 1.17 1.17
## [373] 0.07 0.07 0.07
##
## -----------
## column Average_Maturity
## [1] 7.60 7.60 7.60 5.56 5.56 5.56 3.39 3.39 3.39 6.73 6.73 6.73
## [13] 6.73 6.73 8.19 8.19 8.19 8.19 8.19 8.19 8.19 3.88 3.88 3.88
## [25] 6.00 6.00 6.00 6.00 6.00 7.50 7.50 7.50 7.50 7.50 7.01 7.01
## [37] 7.01 7.01 9.67 9.67 9.67 4.53 4.53 4.53 2.92 2.92 2.92 2.92
## [49] 2.92 3.14 3.14 3.14 12.90 12.90 12.90 3.38 3.38 3.38 4.16 4.16
## [61] 4.16 3.44 3.44 3.44 2.97 2.97 2.97 2.97 2.97 2.97 2.97 4.12
## [73] 4.12 4.12 4.12 4.12 3.49 3.49 3.49 3.49 3.49 3.49 3.49 14.03
## [85] 14.03 14.03 14.03 14.03 14.03 14.03 14.03 14.03 2.97 2.97 2.97 2.97
## [97] 2.97 2.97 11.13 11.13 11.13 13.66 13.66 13.66 3.93 3.93 3.93 13.82
## [109] 13.82 13.82 20.72 20.72 20.72 3.81 3.81 3.81 3.81 3.81 3.81 3.81
## [121] 2.99 2.99 2.99 2.99 2.99 3.59 3.59 3.59 7.30 7.30 7.30 5.13
## [133] 5.13 5.13 5.94 5.94 5.94 3.29 3.29 3.29 4.52 4.52 4.52 4.52
## [145] 4.52 3.42 3.42 3.42 3.42 3.42 3.42 3.42 3.42 3.42 3.42 3.42
## [157] 13.69 13.69 13.69 3.69 3.69 3.69 3.69 3.69 3.69 3.69 3.69 5.89
## [169] 5.89 5.89 3.00 3.00 3.00 3.00 3.00 3.00 3.00 22.05 22.05 22.05
## [181] 22.05 22.05 22.05 22.05 22.05 22.05 22.05 22.05 4.46 4.46 4.46 3.07
## [193] 3.07 3.07 3.07 3.07 3.07 6.45 6.45 6.45 9.26 9.26 9.26 3.77
## [205] 3.77 3.77 3.77 3.77 4.05 4.05 4.05 9.40 9.40 9.40 4.03 4.03
## [217] 4.03 4.03 4.03 4.03 4.03 4.03 4.03 3.65 3.65 3.65 3.43 3.43
## [229] 3.43 6.48 6.48 6.48 4.34 4.34 4.34 4.13 4.13 4.13 4.13 4.13
## [241] 4.13 4.13 4.13 4.13 4.04 4.04 4.04 4.11 4.11 4.11 4.11 4.11
## [253] 4.11 4.11 4.11 4.11 4.11 9.53 9.53 3.15 3.15 3.15 5.67 5.67
## [265] 5.67 2.91 2.91 2.91 2.91 2.91
##
## -----------
## column Sortino_Ratio
## [1] 1.05 1.06 1.37 1.37 1.03 1.03 1.03 1.04 1.04 1.04 1.04 0.89 1.04 1.01 1.01
## [16] 1.04 0.90 0.90 0.90 0.89 0.89 0.91 0.94 0.94 0.93 0.94 0.93 0.94 0.94 0.93
## [31] 0.99 0.99 0.99 0.99 0.99 0.99 0.99 1.05 1.06 1.01 0.90 0.90 0.89 0.90 0.90
## [46] 0.90 0.90 0.90 0.90 0.90 0.90 0.90 0.90 0.89 0.90 0.90 1.03 1.03 1.05 1.02
## [61] 1.02 1.02 1.02 1.02 1.02 1.02 1.02 1.02 1.02 1.02 1.06 1.07 1.02 1.02 1.00
## [76] 1.00 1.00 1.00 1.00 1.00 0.90 0.90 0.89 0.95 0.95 0.95 0.95 1.18 1.18 1.18
## [91] 1.20 1.18 1.18 0.95 0.95 1.04 1.05 0.99 0.99 0.99 1.01 1.01 1.01 1.01 1.01
## [106] 1.01 1.20 1.21 1.21 1.00 0.99 1.00 1.00 1.02 1.02 1.02 0.94 0.94 1.06 1.08
## [121] 1.08 1.06 1.06 0.95 0.95 0.96 0.95 1.00 1.00 1.00 0.89
##
## -----------
## column Sharpe_Ratio
## [1] -28.62 -28.62 -28.63 -28.62 7.12 7.19 7.14 7.14 7.08 7.15
## [11] 7.14 7.14 7.39 7.39 7.38 7.39 7.38 7.39 7.38 7.38
## [21] 7.28 7.33 7.19 7.19 7.19 7.19 -28.07 -28.42 7.97 7.97
## [31] 7.97 8.00 7.97 7.97 -5.69 -23.18 -23.91 -23.97 -23.97 -5.69
## [41] -27.26 -27.26 -20.87 -26.98 -27.25 -12.94 -27.12 -6.06 -24.00 -24.03
## [51] -14.85 -24.02 -24.00 -24.03 7.57 7.55 7.55 7.29 7.30 7.29
## [61] 7.29 -26.59 -26.59 -26.59 -26.59 7.13 7.21 7.21 7.13 7.13
## [71] 7.07 7.06 7.07 -26.64 -25.28 -10.86 -26.64 -25.28 -26.64
##
## -----------
## column Average_YTM
## [1] 6.18 6.18 6.18 6.18 6.18 6.18 6.21 6.21 6.21 7.26 7.26 7.26 7.26 7.26 7.24
## [16] 7.24 7.24 7.24 7.24 5.60 5.60 5.60 6.06 6.06 6.06 6.06 6.06 6.06 6.06 6.06
## [31] 6.06 6.06 6.06 6.06 4.97 4.97 4.97 4.97 6.10 6.10 6.10 6.10 6.03 6.03 6.03
## [46] 6.03 6.03 6.03 6.03 6.03 6.03 6.03 6.03 6.03 6.03 6.03 6.03 7.29 7.29 7.29
## [61] 7.41 7.41 7.41 5.73 5.73 5.73 6.18 6.18 6.18 6.18 6.18 6.18 6.01 6.01 6.01
## [76] 6.01 6.01 6.01 6.01 6.01 6.01 7.29 7.29 7.29 7.25 7.25 7.25 6.12 6.12 6.12
## [91] 6.12 6.12 6.12 7.74 7.74 7.74 7.74 7.74 7.74 7.74 7.74 7.74 7.26 7.26 7.26
## [106] 6.25 6.25 6.25 6.25 6.25 6.25 6.25 5.88 5.88 5.88 5.88 5.88 5.88 5.88 6.25
## [121] 6.25 6.25 6.25 5.04 5.04 5.04 5.04 5.04 5.04 5.04 5.04 5.05 5.05 7.27 7.27
## [136] 7.27 7.27 7.27 7.27 7.27 6.04 6.04 6.04 6.04 6.04 6.04 6.04 6.04 6.04 6.04
## [151] 6.04 6.04 6.04 5.04 5.04 5.04 5.04 5.04 5.04 7.39 7.39 7.39 7.56 7.56 7.56
## [166] 7.56 7.56 7.56 7.56 6.15 6.15 6.15 7.29 7.29 7.29 7.29 7.29 4.99 4.99 4.99
## [181] 4.99 4.99 6.14 5.97 5.97 5.97 7.28 7.28 7.28 7.28 7.28 7.28 7.28 7.40 7.40
## [196] 7.40 7.40 7.40 7.40 7.40 7.40 7.40 7.40 7.40 6.20 6.20 6.15 6.15 6.15 6.15
## [211] 6.15 6.15 6.15 6.05 6.05 6.05 6.05 6.05 6.05 6.05 6.05 7.46 7.46 7.46 5.06
## [226] 5.06 5.98 5.98 5.98 8.31 8.31 8.31 8.31 8.31 8.03 8.03 8.03 5.05 5.05 5.05
## [241] 5.05 5.05 5.05 5.05 6.15 7.58 7.58 7.58 6.07 6.07 6.07 5.88 5.88 5.88 5.61
## [256] 5.61 5.61 8.41 8.41 8.41 5.85 5.85 5.85 7.33 7.33 7.33 7.33 7.33 6.09 6.09
## [271] 6.09 6.09 6.09 6.09 6.09 6.09 7.99 7.99 7.99 4.95 4.95 4.95 4.95 5.92 5.92
## [286] 5.92 5.92 5.92 7.32 7.32 7.32 5.10 5.10 5.10 5.01 5.01 5.01 5.01 5.01 5.01
##
## -----------
## column Time_since_inception
## [1] 6 6 10 23 23 23 21 21 21 9 22 22 22 20 20 20 21 21 21 21 21 21
##
## -----------
## column Lockin
## [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##
## -----------
## column Minimum_SIP
## [1] 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500
## [19] 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500
## [37] 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500
## [55] 500 500 500 500 500 500 500 500 500 500 500 500 500 500 300 300 300 500
## [73] 500 500 500 500 500 500 500 500 500 500 500 500 500 600 600 600 500 500
## [91] 500 500 300 300 300 300 300 300 300 300 300 500 500 500 500 500 500 500
## [109] 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500
## [127] 500 500 750 750 750 500 500 500 500 500 500 500 500 500 500 500 500 500
## [145] 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500 500
## [163] 500 500 500 500
##
## -----------
# small visaulization to understand outliers
exclude_cols <- c("AUM", "NAV", "Minimum_Lumpsum")
mf_data |>
select(-all_of(exclude_cols)) |>
pivot_longer(cols = where(is.numeric)) |>
ggplot(aes(x = name, y = value)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90))
## Error : The fig.showtext code chunk option must be TRUE
# removing outliers
mf_data_cleaned <- mf_data
for (col in numeric_cols) {
q1 <- quantile(mf_data[[col]], 0.25, na.rm = TRUE)
q3 <- quantile(mf_data[[col]], 0.75, na.rm = TRUE)
iqr_val <- IQR(mf_data[[col]], na.rm = TRUE)
lower <- q1 - 1.5 * iqr_val
upper <- q3 + 1.5 * iqr_val
mf_data_cleaned <- mf_data_cleaned[
(mf_data_cleaned[[col]] >= lower & mf_data_cleaned[[col]] <= upper) |
is.na(mf_data_cleaned[[col]]),
]
}
# finally data is cleaned but still contains na we
# need detect and treat them
sum(is.na(mf_data_cleaned))
## [1] 23
mf_data[is.na(mf_data)] <- 0
# finally store the data into file
write.csv(
mf_data_cleaned,
"D:/semester_projects/R/data_preprocessing_phase/mutual_fund.csv",
row.names = FALSE
)
# loading the data
mf_data <- read_csv(
"D:/semester_projects/R/data_preprocessing_phase/mutual_fund.csv",
show_col_types = FALSE
)
# Central Tendencies of returns
# CAGR 3Y
mean_cagr_3y <- mf_data |> summarise(mean_cagr_3y = mean(CAGR_3Y, na.rm = TRUE))
median_cagr_3y <- mf_data |>
summarise(
median_cagr_3y = median(CAGR_3Y, na.rm = TRUE)
)
sd_cagr_3y <- mf_data |> summarise(SD = sd(CAGR_3Y, na.rm = TRUE))
iqr_cagr_3y <- mf_data |> summarise(IQR = IQR(CAGR_3Y, na.rm = TRUE))
# CAGR 5Y
mean_cagr_5y <- mf_data |> summarise(mean_cagr_5y = mean(CAGR_5Y, na.rm = TRUE))
median_cagr_5y <- mf_data |> summarise(
median_cagr_5y = median(CAGR_5Y, na.rm = TRUE)
)
sd_cagr_5y <- mf_data |> summarise(SD = sd(CAGR_5Y, na.rm = TRUE))
iqr_cagr_5y <- mf_data |> summarise(IQR = IQR(CAGR_5Y, na.rm = TRUE))
# Absolute_Returns_3M
mean_absolute_returns_3m <- mf_data |>
summarize(Absolute_Returns_3M = mean(Absolute_Returns_3M, na.rm = TRUE))
median_absolute_returns_3m <- mf_data |>
summarize(Absolute_Returns_3M = median(Absolute_Returns_3M, na.rm = TRUE))
sd_absolute_returns_3m <- mf_data |> summarise(
SD = sd(Absolute_Returns_3M, na.rm = TRUE)
)
iqr_absolute_returns_3m <- mf_data |> summarise(
IQR = IQR(Absolute_Returns_3M, na.rm = TRUE)
)
# Absolute_Returns_6M
mean_absolute_returns_6m <- mf_data |>
summarize(Absolute_Returns_6M = mean(Absolute_Returns_6M, na.rm = TRUE))
median_absolute_returns_6m <- mf_data |>
summarize(Absolute_Returns_6M = median(Absolute_Returns_6M, na.rm = TRUE))
sd_absolute_returns_6m <- mf_data |> summarise(
SD = sd(Absolute_Returns_6M, na.rm = TRUE)
)
iqr_absolute_returns_6m <- mf_data |> summarise(
IQR = IQR(Absolute_Returns_6M, na.rm = TRUE)
)
# Absolute_Returns_1Y
mean_absolute_returns_1y <- mf_data |>
summarize(Absolute_Returns_1y = mean(Absolute_Returns_1Y, na.rm = TRUE))
median_absolute_returns_1y <- mf_data |>
summarize(Absolute_Returns_1y = median(Absolute_Returns_1Y, na.rm = TRUE))
sd_absolute_returns_1y <- mf_data |> summarise(
SD = sd(Absolute_Returns_1Y, na.rm = TRUE)
)
iqr_absolute_returns_1y <- mf_data |> summarise(
IQR = IQR(Absolute_Returns_1Y, na.rm = TRUE)
)
central_tendency_and_spread <- data.frame(
Metric = c("CAGR_3Y", "CAGR_5Y", "3M", "6M", "1Y"),
Mean = c(
mean_cagr_3y$mean_cagr_3y,
mean_cagr_5y$mean_cagr_5y,
mean_absolute_returns_3m$Absolute_Returns_3M,
mean_absolute_returns_6m$Absolute_Returns_6M,
mean_absolute_returns_1y$Absolute_Returns_1y
),
Median = c(
median_cagr_3y$median_cagr_3y,
median_cagr_5y$median_cagr_5y,
median_absolute_returns_3m$Absolute_Returns_3M,
median_absolute_returns_6m$Absolute_Returns_6M,
median_absolute_returns_1y$Absolute_Returns_1y
),
SD = c(
sd_cagr_3y$SD,
sd_cagr_5y$SD,
sd_absolute_returns_3m$SD,
sd_absolute_returns_6m$SD,
sd_absolute_returns_1y$SD
),
IQR = c(
iqr_cagr_3y$IQR,
iqr_cagr_5y$IQR,
iqr_absolute_returns_3m$IQR,
iqr_absolute_returns_6m$IQR,
iqr_absolute_returns_1y$IQR
),
stringsAsFactors = FALSE
)
central_tendency_and_spread
## Metric Mean Median SD IQR
## 1 CAGR_3Y 2.9014706 0.40 3.558050 7.3950
## 2 CAGR_5Y 2.3390294 0.40 2.906254 6.1200
## 3 3M 0.2169412 -0.05 0.731940 1.1275
## 4 6M 0.6809412 -0.04 1.441582 2.6600
## 5 1Y 1.9766471 0.00 3.299085 6.6325
hist(mf_data$CAGR_3Y, breaks = 30, main = "Distribution of Returns")
## Error : The fig.showtext code chunk option must be TRUE
plot(density(mf_data$CAGR_3Y, na.rm = TRUE))
## Error : The fig.showtext code chunk option must be TRUE
skewness(mf_data$CAGR_3Y, na.rm = TRUE)
## [1] 0.4708283
ggplot(mf_data, aes(x = Absolute_Returns_1Y, y = CAGR_3Y)) +
geom_point(alpha = 0.6, color = "blue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(
title = "Short term returns VS Long term returns",
x = "1-Year Returns (short-term)",
y = "3-Year CAGR (Long-term)"
) +
theme_minimal()
## Error : The fig.showtext code chunk option must be TRUE
## `geom_smooth()` using formula = 'y ~ x'
# 2.4 Are high-return funds consistently associated with higher
risk.
ggplot(mf_data, aes(x = Volatility, y = Absolute_Returns_1Y)) +
geom_point(
color = "steelblue", alpha = 0.6, size = 1.5
) + # Added alpha to see overlapping dots
# This is the magic line that fixes the labels:
geom_text_repel(
aes(label = Plan),
size = 2.5, # Small but readable font
max.overlaps = 15, # Only labels the 15 least-crowded points (prevents mess)
box.padding = 0.35, # Adds space around labels
point.padding = 0.5, # Adds space between label and dot
segment.color = "grey50" # Adds a tiny line pointing from text to dot
) +
geom_smooth(
method = "lm",
se = FALSE, linetype = "dashed",
color = "red", size = 0.8
) +
labs(
title = "Risk vs. Return Analysis (1-Year)",
subtitle = "Higher volatility generally
trends with higher absolute returns",
x = "Volatility (Risk %)",
y = "Absolute 1-Year Return (%)",
caption = "Red line represents the expected risk-return tradeoff"
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Error : The fig.showtext code chunk option must be TRUE
## `geom_smooth()` using formula = 'y ~ x'
# 2.5 What patterns emerge from scatter plots of risk vs return.
mf_long <- mf_data |>
select(
Volatility, CAGR_3Y,
CAGR_5Y, Absolute_Returns_1Y,
Absolute_Returns_3M, Absolute_Returns_6M
) |>
pivot_longer(
cols = c(
CAGR_3Y, CAGR_5Y,
Absolute_Returns_1Y,
Absolute_Returns_3M, Absolute_Returns_6M
),
names_to = "Return_Type",
values_to = "Return_Value"
)
# Plot
ggplot(mf_long, aes(x = Volatility, y = Return_Value)) +
geom_point(alpha = 0.6, color = "orange") +
geom_smooth(method = "lm", se = FALSE, color = "red", linetype = "dashed") +
facet_wrap(~Return_Type, scales = "free_y") + # 🔥 Subplots here
labs(
title = "Risk vs Return Across Different Time Horizons",
x = "Volatility",
y = "Returns"
) +
theme_minimal()
## Error : The fig.showtext code chunk option must be TRUE
## `geom_smooth()` using formula = 'y ~ x'
# Phase 3: Feature Engineering & Transformation.
mf_data <- mf_data |>
mutate(
Net_CAGR_3Y = CAGR_3Y - Expense_Ratio,
Net_CAGR_5Y = CAGR_5Y - Expense_Ratio,
Net_Return_1Y = Absolute_Returns_1Y - Expense_Ratio
)
mf_data <- mf_data |>
mutate(
Custom_Risk_Adjusted = round(
Net_CAGR_3Y / (Volatility + 0.01) * Sortino_Ratio, 2
)
)
head(mf_data$Custom_Risk_Adjusted,20)
## [1] -0.62 19.81 -0.62 2.41 -0.62 -0.62 18.40 18.40 -0.63 19.68 1.56 -0.63
## [13] -0.12 3.67 -0.11 -0.12 -0.01 -0.07 2.87 3.18
mf_data <- mf_data |>
mutate(
Risk_Score = round(scale(Volatility) +
scale(1 / (Sortino_Ratio + 0.01)) +
scale(1 / (Sharpe_Ratio + 0.01)), 0)
)
mf_data$Risk_Score
## [,1]
## [1,] -2
## [2,] -2
## [3,] -2
## [4,] 1
## [5,] -2
## [6,] -2
## [7,] -2
## [8,] -2
## [9,] -2
## [10,] -2
## [11,] 3
## [12,] -2
## [13,] 0
## [14,] 0
## [15,] 0
## [16,] 0
## [17,] 13
## [18,] 1
## [19,] 1
## [20,] 1
## [21,] 1
## [22,] 13
## [23,] -1
## [24,] -1
## [25,] -1
## [26,] -1
## [27,] -1
## [28,] -1
## [29,] -1
## [30,] -1
## [31,] -1
## [32,] -1
## [33,] 1
## [34,] 1
## [35,] 1
## [36,] 1
## [37,] 2
## [38,] 1
## [39,] 1
## [40,] 1
## [41,] -1
## [42,] 1
## [43,] 1
## [44,] 0
## [45,] 1
## [46,] -1
## [47,] -1
## [48,] -1
## [49,] -1
## [50,] -1
## [51,] -1
## [52,] -1
## [53,] -1
## [54,] 0
## [55,] 0
## [56,] 0
## [57,] 0
## [58,] 0
## [59,] 0
## [60,] 0
## [61,] 0
## [62,] 0
## [63,] 0
## [64,] 0
## [65,] -1
## [66,] -1
## [67,] -1
## [68,] -1
## [69,] -1
## [70,] -1
## [71,] -1
## [72,] 0
## [73,] 0
## [74,] 0
## [75,] 0
## [76,] 0
## [77,] 0
## [78,] 0
## [79,] 0
## [80,] 3
## [81,] 3
## [82,] 3
## [83,] 2
## [84,] 3
## [85,] 3
## [86,] 0
## [87,] 0
## [88,] 0
## [89,] 0
## [90,] 0
## [91,] 0
## [92,] 0
## [93,] -1
## [94,] -1
## [95,] -1
## [96,] -1
## [97,] -1
## [98,] -1
## [99,] -1
## [100,] -1
## [101,] -1
## [102,] -1
## [103,] -1
## [104,] 0
## [105,] 0
## [106,] 0
## [107,] 0
## [108,] 0
## [109,] 0
## [110,] 0
## [111,] 0
## [112,] 0
## [113,] 0
## [114,] 0
## [115,] 0
## [116,] 0
## [117,] 0
## [118,] 0
## [119,] 0
## [120,] 9
## [121,] 0
## [122,] 9
## [123,] 0
## [124,] 0
## [125,] 0
## [126,] 0
## [127,] 0
## [128,] 0
## [129,] 0
## [130,] 0
## [131,] 0
## [132,] 0
## [133,] 0
## [134,] -1
## [135,] -1
## [136,] -1
## [137,] -1
## [138,] -1
## [139,] -1
## [140,] 0
## [141,] 0
## [142,] 0
## [143,] 0
## [144,] 0
## [145,] 0
## [146,] 0
## [147,] -1
## [148,] -1
## [149,] -1
## [150,] -1
## [151,] -1
## [152,] -1
## [153,] -1
## [154,] -1
## [155,] -1
## [156,] -1
## [157,] -1
## [158,] -1
## [159,] -1
## [160,] -1
## [161,] -1
## [162,] -1
## [163,] 4
## [164,] 0
## [165,] 0
## [166,] -1
## [167,] -1
## [168,] -1
## [169,] 4
## [170,] -1
## attr(,"scaled:center")
## [1] 0.6926471
## attr(,"scaled:scale")
## [1] 0.6162413
mf_data <- mf_data |>
mutate(
Total_Equity_Exposure = Largecap_Holding +
Midcap_Holding +
Smallcap_Holding,
Equity_to_Debt_Ratio = Equity_Holding / (Debt_Holding + 0.01)
)
head(mf_data$Total_Equity_Exposure)
## [1] 79.55 79.55 79.55 79.55 79.55 79.55
head(mf_data$Equity_to_Debt_Ratio)
## [1] 17.80019 17.80019 17.80019 17.80019 17.80019 17.80019
normalize <- function(x) {
(x - min(x, na.rm = TRUE)) /
(max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
}
mf_data <- mf_data |>
mutate(
Norm_Return = round(normalize(Net_CAGR_3Y), 2),
Norm_Risk = round(normalize(Volatility), 2),
Norm_Cost = round(normalize(Expense_Ratio), 2),
Norm_Score = round(normalize(Custom_Risk_Adjusted), 2)
)
head(mf_data$Norm_Return, 20)
## [1] 0.05 0.99 0.05 0.91 0.05 0.05 0.91 0.91 0.05 0.99 0.91 0.05 0.06 0.92 0.06
## [16] 0.06 0.06 0.06 1.00 0.93