library(tidyverse)
dt_set <- read_csv("data/funds.csv")
dt_set %>%
filter(morningstar_rating > 0) %>%
ggplot(aes(x = morningstar_rating, y = net_annual_expense_ratio_fund)) +
geom_boxplot(aes(group = cut_width(morningstar_rating, 1)), outlier.alpha = 1/9) +
scale_y_log10() +
labs(title = "Lower Morningstar ratings for larger expenses",
subtitle = "Funds with larger net expenses ratios tend to fare worse in terms of Morningstar rating",
x = "Morningstar rating",
y = "Net annual expense ratio (log scale)",
caption = "Source: Morningstar (25,308 funds)") + coord_flip()
ggsave(file = "ter_mrat.png", width = 7.0, height = 5.25)
dt_set %>%
filter(morningstar_rating > 0) %>%
ggplot(aes(x = morningstar_rating, y = net_annual_expense_ratio_fund)) +
geom_boxplot(aes(group = cut_width(morningstar_rating, 1)), outlier.alpha = 1/9) +
scale_y_log10() +
labs(title = "Rating Morningstar inferiori per spese correnti maggiori",
subtitle = "Da un'analisi di 25.308 fondi, Morningstar penalizza i fondi con spese correnti maggiori",
x = "rating Morningstar",
y = "rapporto di spese correnti netto (scala logaritmica)",
caption = "Data analysis di Rodolfo Vanzini su dati Morningstar") + coord_flip()
# ggsave(file = "ter_mrat_it.png", width = 7.0, height = 5.25)


dt_set %>% select(contains("3years"), contains("5years"), contains("10years"))
## # A tibble: 25,308 x 48
## fund_return_3ye… category_return… fund_alpha_3yea… category_alpha_…
## <dbl> <dbl> <dbl> <dbl>
## 1 7.1 6.14 -1.72 -0.02
## 2 10.1 11.8 -0.44 -0.01
## 3 15.4 15.4 0.580 0.01
## 4 9.38 8.38 -1.25 -0.02
## 5 9.24 5.78 -3.54 -0.03
## 6 6.28 6.14 -2.48 -0.02
## 7 6.8 6.14 -2 -0.02
## 8 8.62 8.38 -1.93 -0.02
## 9 7.23 6.14 -1.58 -0.02
## 10 5.7 5.56 -0.04 0
## # … with 25,298 more rows, and 44 more variables: fund_beta_3years <dbl>,
## # category_beta_3years <dbl>, fund_mean_annual_return_3years <dbl>,
## # category_mean_annual_return_3years <dbl>, fund_r_squared_3years <dbl>,
## # category_r_squared_3years <dbl>, fund_standard_deviation_3years <dbl>,
## # category_standard_deviation_3years <dbl>,
## # fund_sharpe_ratio_3years <dbl>, category_sharpe_ratio_3years <dbl>,
## # fund_treynor_ratio_3years <dbl>, category_treynor_ratio_3years <dbl>,
## # fund_return_5years <dbl>, category_return_5years <dbl>,
## # fund_alpha_5years <dbl>, category_alpha_5years <dbl>,
## # fund_beta_5years <dbl>, category_beta_5years <dbl>,
## # fund_mean_annual_return_5years <dbl>,
## # category_mean_annual_return_5years <dbl>, fund_r_squared_5years <dbl>,
## # category_r_squared_5years <dbl>, fund_standard_deviation_5years <dbl>,
## # category_standard_deviation_5years <dbl>,
## # fund_sharpe_ratio_5years <dbl>, category_sharpe_ratio_5years <dbl>,
## # fund_treynor_ratio_5years <dbl>, category_treynor_ratio_5years <dbl>,
## # fund_return_10years <dbl>, category_return_10years <dbl>,
## # fund_alpha_10years <dbl>, category_alpha_10years <dbl>,
## # fund_beta_10years <dbl>, category_beta_10years <dbl>,
## # fund_mean_annual_return_10years <dbl>,
## # category_mean_annual_return_10years <dbl>,
## # fund_r_squared_10years <dbl>, category_r_squared_10years <dbl>,
## # fund_standard_deviation_10years <dbl>,
## # category_standard_deviation_10years <dbl>,
## # fund_sharpe_ratio_10years <dbl>, category_sharpe_ratio_10years <dbl>,
## # fund_treynor_ratio_10years <dbl>, category_treynor_ratio_10years <dbl>
pl_3 <- dt_set %>%
select(fund_standard_deviation_3years, fund_mean_annual_return_3years) %>%
filter(fund_standard_deviation_3years <= 25, fund_mean_annual_return_3years > 0) %>%
ggplot(aes(fund_standard_deviation_3years, fund_mean_annual_return_3years))
pl_5 <- dt_set %>%
select(fund_standard_deviation_5years, fund_mean_annual_return_5years) %>%
filter(fund_standard_deviation_5years <= 25, fund_mean_annual_return_5years > 0) %>%
ggplot(aes(fund_standard_deviation_5years, fund_mean_annual_return_5years))
pl_10 <- dt_set %>%
select(fund_standard_deviation_10years, fund_mean_annual_return_10years) %>%
filter(fund_standard_deviation_10years <= 25, fund_mean_annual_return_10years > 0) %>%
ggplot(aes(fund_standard_deviation_10years, fund_mean_annual_return_10years))
pl_3 +
geom_point(alpha = 1/10) +
stat_smooth(method = "loess")
pl_5 +
geom_point(alpha = 1/10) +
stat_smooth(method = "loess")
pl_10 +
geom_point(alpha = 1/10) +
stat_smooth(method = "loess")
pl_3 +
geom_hex(bins = 100) +
stat_smooth(method = "loess", span = 2.0)
pl_5 +
geom_hex(bins = 100) +
stat_smooth(method = "loess", span = 2.0)
pl_10 +
geom_hex(bins = 100) +
stat_smooth(method = "loess", span = 2.0)






pl_1 <- dt_set %>%
select(morningstar_rating, fund_standard_deviation_10years, fund_mean_annual_return_10years) %>%
gather(2:3, key = "stat", value = "value") %>%
ggplot(aes(morningstar_rating, value)) +
scale_y_log10() +
geom_boxplot(aes(group = morningstar_rating)) +
facet_wrap(~ stat) +
coord_flip()
pl_1

dt_set %>% ggplot(aes(morningstar_return_rating, net_assets)) +
geom_boxplot(aes(group = morningstar_return_rating)) +
scale_y_log10() + coord_flip()
dt_set %>% ggplot(aes(morningstar_rating, net_assets)) +
geom_boxplot(aes(group = morningstar_rating)) +
scale_y_log10() + coord_flip()
dt_set %>% ggplot(aes(morningstar_risk_rating, net_assets)) +
geom_boxplot(aes(group = morningstar_risk_rating)) +
scale_y_log10() + coord_flip()


