library(tidyverse)
dt_set <- read_csv("data/funds.csv")
dt_set %>% 
  filter(morningstar_rating > 0) %>% 
  ggplot(aes(x = morningstar_rating, y = net_annual_expense_ratio_fund)) + 
  geom_boxplot(aes(group = cut_width(morningstar_rating, 1)), outlier.alpha = 1/9) +
  scale_y_log10() + 
  labs(title = "Lower Morningstar ratings for larger expenses", 
       subtitle = "Funds with larger net expenses ratios tend to fare worse in terms of Morningstar rating", 
       x = "Morningstar rating", 
       y = "Net annual expense ratio (log scale)", 
       caption = "Source: Morningstar (25,308 funds)") + coord_flip()
ggsave(file = "ter_mrat.png", width = 7.0, height = 5.25)

dt_set %>% 
  filter(morningstar_rating > 0) %>% 
  ggplot(aes(x = morningstar_rating, y = net_annual_expense_ratio_fund)) + 
  geom_boxplot(aes(group = cut_width(morningstar_rating, 1)), outlier.alpha = 1/9) +
  scale_y_log10() + 
  labs(title = "Rating Morningstar inferiori per spese correnti maggiori", 
       subtitle = "Da un'analisi di 25.308 fondi, Morningstar penalizza i fondi con spese correnti maggiori", 
       x = "rating Morningstar", 
       y = "rapporto di spese correnti netto (scala logaritmica)", 
       caption = "Data analysis di Rodolfo Vanzini su dati Morningstar") + coord_flip()
# ggsave(file = "ter_mrat_it.png", width = 7.0, height = 5.25)

dt_set %>% select(contains("3years"), contains("5years"), contains("10years"))
## # A tibble: 25,308 x 48
##    fund_return_3ye… category_return… fund_alpha_3yea… category_alpha_…
##               <dbl>            <dbl>            <dbl>            <dbl>
##  1             7.1              6.14           -1.72             -0.02
##  2            10.1             11.8            -0.44             -0.01
##  3            15.4             15.4             0.580             0.01
##  4             9.38             8.38           -1.25             -0.02
##  5             9.24             5.78           -3.54             -0.03
##  6             6.28             6.14           -2.48             -0.02
##  7             6.8              6.14           -2                -0.02
##  8             8.62             8.38           -1.93             -0.02
##  9             7.23             6.14           -1.58             -0.02
## 10             5.7              5.56           -0.04              0   
## # … with 25,298 more rows, and 44 more variables: fund_beta_3years <dbl>,
## #   category_beta_3years <dbl>, fund_mean_annual_return_3years <dbl>,
## #   category_mean_annual_return_3years <dbl>, fund_r_squared_3years <dbl>,
## #   category_r_squared_3years <dbl>, fund_standard_deviation_3years <dbl>,
## #   category_standard_deviation_3years <dbl>,
## #   fund_sharpe_ratio_3years <dbl>, category_sharpe_ratio_3years <dbl>,
## #   fund_treynor_ratio_3years <dbl>, category_treynor_ratio_3years <dbl>,
## #   fund_return_5years <dbl>, category_return_5years <dbl>,
## #   fund_alpha_5years <dbl>, category_alpha_5years <dbl>,
## #   fund_beta_5years <dbl>, category_beta_5years <dbl>,
## #   fund_mean_annual_return_5years <dbl>,
## #   category_mean_annual_return_5years <dbl>, fund_r_squared_5years <dbl>,
## #   category_r_squared_5years <dbl>, fund_standard_deviation_5years <dbl>,
## #   category_standard_deviation_5years <dbl>,
## #   fund_sharpe_ratio_5years <dbl>, category_sharpe_ratio_5years <dbl>,
## #   fund_treynor_ratio_5years <dbl>, category_treynor_ratio_5years <dbl>,
## #   fund_return_10years <dbl>, category_return_10years <dbl>,
## #   fund_alpha_10years <dbl>, category_alpha_10years <dbl>,
## #   fund_beta_10years <dbl>, category_beta_10years <dbl>,
## #   fund_mean_annual_return_10years <dbl>,
## #   category_mean_annual_return_10years <dbl>,
## #   fund_r_squared_10years <dbl>, category_r_squared_10years <dbl>,
## #   fund_standard_deviation_10years <dbl>,
## #   category_standard_deviation_10years <dbl>,
## #   fund_sharpe_ratio_10years <dbl>, category_sharpe_ratio_10years <dbl>,
## #   fund_treynor_ratio_10years <dbl>, category_treynor_ratio_10years <dbl>
pl_3 <- dt_set %>% 
  select(fund_standard_deviation_3years, fund_mean_annual_return_3years) %>%
  filter(fund_standard_deviation_3years <= 25, fund_mean_annual_return_3years > 0) %>% 
  ggplot(aes(fund_standard_deviation_3years, fund_mean_annual_return_3years)) 

pl_5 <- dt_set %>% 
  select(fund_standard_deviation_5years, fund_mean_annual_return_5years) %>%
  filter(fund_standard_deviation_5years <= 25, fund_mean_annual_return_5years > 0) %>% 
  ggplot(aes(fund_standard_deviation_5years, fund_mean_annual_return_5years)) 

pl_10 <- dt_set %>% 
  select(fund_standard_deviation_10years, fund_mean_annual_return_10years) %>%
  filter(fund_standard_deviation_10years <= 25, fund_mean_annual_return_10years > 0) %>% 
  ggplot(aes(fund_standard_deviation_10years, fund_mean_annual_return_10years)) 

pl_3 +
  geom_point(alpha = 1/10) + 
  stat_smooth(method = "loess") 

pl_5 +
  geom_point(alpha = 1/10) + 
  stat_smooth(method = "loess") 

pl_10 +
  geom_point(alpha = 1/10) + 
  stat_smooth(method = "loess") 


pl_3 +
  geom_hex(bins = 100) + 
  stat_smooth(method = "loess", span = 2.0)

pl_5 +
  geom_hex(bins = 100) + 
  stat_smooth(method = "loess", span = 2.0)

pl_10 +
  geom_hex(bins = 100) + 
  stat_smooth(method = "loess", span = 2.0)

pl_1 <- dt_set %>% 
  select(morningstar_rating, fund_standard_deviation_10years, fund_mean_annual_return_10years) %>% 
  gather(2:3, key = "stat", value = "value") %>% 
  ggplot(aes(morningstar_rating, value)) +
  scale_y_log10() + 
  geom_boxplot(aes(group = morningstar_rating)) + 
  facet_wrap(~ stat) + 
  coord_flip()
pl_1

dt_set %>% ggplot(aes(morningstar_return_rating, net_assets)) + 
  geom_boxplot(aes(group = morningstar_return_rating)) + 
  scale_y_log10() + coord_flip()

dt_set %>% ggplot(aes(morningstar_rating, net_assets)) + 
  geom_boxplot(aes(group = morningstar_rating)) + 
  scale_y_log10() + coord_flip()

dt_set %>% ggplot(aes(morningstar_risk_rating, net_assets)) + 
  geom_boxplot(aes(group = morningstar_risk_rating)) + 
  scale_y_log10() + coord_flip()