libraries

library(rvest)
library(magrittr)
#library(tmap)
library(ggplot2)
library(data.table)
#library(RColorBrewer)
library(ggthemes)
library(treemap)
library(highcharter)
library(dplyr)
#library(stringi)
#library(rPlotter)
#library(png)
#library(raster)

wiki table of countries and co-efficients

#use wikipedias list of country coefficients
UEFA_url <- "https://en.wikipedia.org/wiki/UEFA_coefficient#Country_coefficient"
UEFA_table_func <- function(url){
  read <- read_html(url)
  co_eff_nodes <- read %>% html_nodes('#mw-content-text > table:nth-child(37)')
  table <- co_eff_nodes %>% html_table(fill = TRUE)
  return(table)
}

UEFA_df <- data.frame(lapply(UEFA_url, UEFA_table_func))[c(4:8)]
  UEFA_df <- UEFA_df[-c(1),]
  names(UEFA_df) <- c("Nation", "cf1", "cf2", "cf3", "cf4")

#clean up names and co-efficients
  UEFA_df$Nation <- gsub(" \\(.*\\)", "", UEFA_df$Nation)
  cols = c(2:5)
  UEFA_df[,cols] %<>% lapply(function(x) as.numeric(as.character(x)))
  UEFA_df[is.na(UEFA_df)] <- 0
  
#sum the coefficients to get total strength ('size')
UEFA_df$Strength <- rowSums(UEFA_df[c(2:5)])

#add league names/ links
UEFA_leagues_func <- function(url){
  read <- read_html(url)
  leagues_nodes <- read %>% html_nodes("small a:nth-child(1)")
  leagues_links <- leagues_nodes %>% html_attr("href")
  leagues_names <- leagues_nodes %>% html_attr("title")
  return(c(links = as.character(leagues_links),
           names = as.character(leagues_names)))
}

#bah this is messy- how to cbind together as df with 2 columns?
Leagues_df <- do.call(cbind.data.frame, lapply(UEFA_url, UEFA_leagues_func))
  UEFA_df$League <- Leagues_df[c(grep("names", rownames(Leagues_df))),]
  UEFA_df$League_URL <- paste0("https://en.wikipedia.org",
                               Leagues_df[c(grep("links", rownames(Leagues_df))),])

ggplot coefficients over the last 4 years

MeltedUEFA_df <- melt(UEFA_df[1:6], id.vars = c("Nation", "Strength"), variable.name = "Year")
  MeltedUEFA_df <- MeltedUEFA_df[order(MeltedUEFA_df$Strength),]


ggplot(MeltedUEFA_df, aes(x = reorder(Nation, -Strength), value)) +
  geom_bar(aes(alpha = Year, fill = Strength), stat = "identity") +
  scale_fill_continuous(low = "blue", high = "red", guide = FALSE) +
  scale_alpha_manual(values = c(0.25, 0.5, 0.75, 1), labels = paste0("201", 3:6)) +
  scale_x_discrete(breaks = MeltedUEFA_df$Nation[seq(1, nrow(MeltedUEFA_df),
                                                     (nrow(MeltedUEFA_df)-4)/9)]) +
  theme_fivethirtyeight() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10)) +
  theme(legend.text = element_text(c("2011", "2012", "2013", "2014"))) +
  ggtitle("The Strength of UEFA Leagues",
          "coefficients over 4 years for each UEFA league")

get github data of league winners

#download the file of all league winners for every UEFA country
Github_url <- "https://raw.githubusercontent.com/RobWHickman/Databases-and-Files/master/"
Champions_df <- read.csv(paste0( Github_url, "European%20Football%20Champions/Football%20Champions.csv"), 
                         stringsAsFactors = FALSE, na.strings = "NA")

#rename the columns to fit with Nation names in UEFA_df
names(Champions_df) <- append("Year", UEFA_df$Nation)

#object out the special cells to manipulate league winner data
StartYear_row <- nrow(Champions_df) - 2
WinnerNotes_row <- nrow(Champions_df) -1
RemoveRows_row <- nrow(Champions_df)

get the winners out of the massive table

#function to get the winners of each league
LeagueWinners_func <- function(league){
  StartYear <- as.numeric(Champions_df[StartYear_row, league])
  rows <- (StartYear - 1888):(StartYear_row - 1)
  
  Nation <- colnames(Champions_df)[league]

  #some years there are multiple winners of leagues and some years we don't want to count
  #everything below takes these out and deals with them
  MultiWinYears <- unlist(strsplit(Champions_df[WinnerNotes_row, league], ", "))
  RemovalYears <- unlist(strsplit(Champions_df[RemoveRows_row, league], ", "))
  WeirdYears <- append(MultiWinYears, RemovalYears)

  if(!all(is.na(WeirdYears))){
    WeirdYears <- as.numeric(WeirdYears[!is.na(WeirdYears)])
    remove_rows <- which(Champions_df$Year %in% WeirdYears)
  }else{remove_rows <- NA}

  if(all(!is.na(remove_rows))){rows <- rows[-which(rows %in% remove_rows)]}

  Winners <- Champions_df[rows, league]
    AppendWinners <- Champions_df[which(Champions_df$Year %in% MultiWinYears), league]
    AppendWinners <- trimws(unlist(strsplit(AppendWinners, "AND")))
    Winners <- append(Winners, AppendWinners)
  
  #make a data frame from the frequency table of the leagues winners
  #attach the league nation and 'size'  
  Winners_df <- data.frame(table(Winners))
    names(Winners_df) <- c("Club", "Championships")
    Winners_df$TotalChampions <- sum(Winners_df$Championships)
    Winners_df$Nation <- Nation
    Winners_df$NationSize <- (UEFA_df$Strength[which(UEFA_df$Nation == Nation)] ^ 2) + 1
  
  return(Winners_df)
}

LeagueWinners_df <- do.call(rbind.data.frame, lapply(2:ncol(Champions_df), LeagueWinners_func))
  LeagueWinners_df$ClubSize <- (LeagueWinners_df$NationSize * LeagueWinners_df$Championships) /
                                LeagueWinners_df$TotalChampions
LeagueWinners_df$Club[which(LeagueWinners_df$Club == "Gibraltar")] <- "Gibraltar FC"

make the tree plot

#create the treemap object
tm <- treemap(LeagueWinners_df, c("Nation", "Club"), "ClubSize", draw = FALSE)

#attach the rest of the data to the treemap object
tm$tm <- left_join(tm$tm, LeagueWinners_df, by = "Club")

#create and plot the highchart
hctreemap(tm, allowDrillToNode = TRUE, layoutAlgorithm = "squarified") %>%
          hc_title(text = "Club Titles Won") %>%
           hc_tooltip(pointFormat = "<b>{point.name}</b>:<br>
                                    Championships: {point.Championships}")