libraries
library(rvest)
library(magrittr)
#library(tmap)
library(ggplot2)
library(data.table)
#library(RColorBrewer)
library(ggthemes)
library(treemap)
library(highcharter)
library(dplyr)
#library(stringi)
#library(rPlotter)
#library(png)
#library(raster)
wiki table of countries and co-efficients
#use wikipedias list of country coefficients
UEFA_url <- "https://en.wikipedia.org/wiki/UEFA_coefficient#Country_coefficient"
UEFA_table_func <- function(url){
read <- read_html(url)
co_eff_nodes <- read %>% html_nodes('#mw-content-text > table:nth-child(37)')
table <- co_eff_nodes %>% html_table(fill = TRUE)
return(table)
}
UEFA_df <- data.frame(lapply(UEFA_url, UEFA_table_func))[c(4:8)]
UEFA_df <- UEFA_df[-c(1),]
names(UEFA_df) <- c("Nation", "cf1", "cf2", "cf3", "cf4")
#clean up names and co-efficients
UEFA_df$Nation <- gsub(" \\(.*\\)", "", UEFA_df$Nation)
cols = c(2:5)
UEFA_df[,cols] %<>% lapply(function(x) as.numeric(as.character(x)))
UEFA_df[is.na(UEFA_df)] <- 0
#sum the coefficients to get total strength ('size')
UEFA_df$Strength <- rowSums(UEFA_df[c(2:5)])
#add league names/ links
UEFA_leagues_func <- function(url){
read <- read_html(url)
leagues_nodes <- read %>% html_nodes("small a:nth-child(1)")
leagues_links <- leagues_nodes %>% html_attr("href")
leagues_names <- leagues_nodes %>% html_attr("title")
return(c(links = as.character(leagues_links),
names = as.character(leagues_names)))
}
#bah this is messy- how to cbind together as df with 2 columns?
Leagues_df <- do.call(cbind.data.frame, lapply(UEFA_url, UEFA_leagues_func))
UEFA_df$League <- Leagues_df[c(grep("names", rownames(Leagues_df))),]
UEFA_df$League_URL <- paste0("https://en.wikipedia.org",
Leagues_df[c(grep("links", rownames(Leagues_df))),])
ggplot coefficients over the last 4 years
MeltedUEFA_df <- melt(UEFA_df[1:6], id.vars = c("Nation", "Strength"), variable.name = "Year")
MeltedUEFA_df <- MeltedUEFA_df[order(MeltedUEFA_df$Strength),]
ggplot(MeltedUEFA_df, aes(x = reorder(Nation, -Strength), value)) +
geom_bar(aes(alpha = Year, fill = Strength), stat = "identity") +
scale_fill_continuous(low = "blue", high = "red", guide = FALSE) +
scale_alpha_manual(values = c(0.25, 0.5, 0.75, 1), labels = paste0("201", 3:6)) +
scale_x_discrete(breaks = MeltedUEFA_df$Nation[seq(1, nrow(MeltedUEFA_df),
(nrow(MeltedUEFA_df)-4)/9)]) +
theme_fivethirtyeight() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10)) +
theme(legend.text = element_text(c("2011", "2012", "2013", "2014"))) +
ggtitle("The Strength of UEFA Leagues",
"coefficients over 4 years for each UEFA league")

get github data of league winners
#download the file of all league winners for every UEFA country
Github_url <- "https://raw.githubusercontent.com/RobWHickman/Databases-and-Files/master/"
Champions_df <- read.csv(paste0( Github_url, "European%20Football%20Champions/Football%20Champions.csv"),
stringsAsFactors = FALSE, na.strings = "NA")
#rename the columns to fit with Nation names in UEFA_df
names(Champions_df) <- append("Year", UEFA_df$Nation)
#object out the special cells to manipulate league winner data
StartYear_row <- nrow(Champions_df) - 2
WinnerNotes_row <- nrow(Champions_df) -1
RemoveRows_row <- nrow(Champions_df)
get the winners out of the massive table
#function to get the winners of each league
LeagueWinners_func <- function(league){
StartYear <- as.numeric(Champions_df[StartYear_row, league])
rows <- (StartYear - 1888):(StartYear_row - 1)
Nation <- colnames(Champions_df)[league]
#some years there are multiple winners of leagues and some years we don't want to count
#everything below takes these out and deals with them
MultiWinYears <- unlist(strsplit(Champions_df[WinnerNotes_row, league], ", "))
RemovalYears <- unlist(strsplit(Champions_df[RemoveRows_row, league], ", "))
WeirdYears <- append(MultiWinYears, RemovalYears)
if(!all(is.na(WeirdYears))){
WeirdYears <- as.numeric(WeirdYears[!is.na(WeirdYears)])
remove_rows <- which(Champions_df$Year %in% WeirdYears)
}else{remove_rows <- NA}
if(all(!is.na(remove_rows))){rows <- rows[-which(rows %in% remove_rows)]}
Winners <- Champions_df[rows, league]
AppendWinners <- Champions_df[which(Champions_df$Year %in% MultiWinYears), league]
AppendWinners <- trimws(unlist(strsplit(AppendWinners, "AND")))
Winners <- append(Winners, AppendWinners)
#make a data frame from the frequency table of the leagues winners
#attach the league nation and 'size'
Winners_df <- data.frame(table(Winners))
names(Winners_df) <- c("Club", "Championships")
Winners_df$TotalChampions <- sum(Winners_df$Championships)
Winners_df$Nation <- Nation
Winners_df$NationSize <- (UEFA_df$Strength[which(UEFA_df$Nation == Nation)] ^ 2) + 1
return(Winners_df)
}
LeagueWinners_df <- do.call(rbind.data.frame, lapply(2:ncol(Champions_df), LeagueWinners_func))
LeagueWinners_df$ClubSize <- (LeagueWinners_df$NationSize * LeagueWinners_df$Championships) /
LeagueWinners_df$TotalChampions
LeagueWinners_df$Club[which(LeagueWinners_df$Club == "Gibraltar")] <- "Gibraltar FC"
make the tree plot
#create the treemap object
tm <- treemap(LeagueWinners_df, c("Nation", "Club"), "ClubSize", draw = FALSE)

#attach the rest of the data to the treemap object
tm$tm <- left_join(tm$tm, LeagueWinners_df, by = "Club")
#create and plot the highchart
hctreemap(tm, allowDrillToNode = TRUE, layoutAlgorithm = "squarified") %>%
hc_title(text = "Club Titles Won") %>%
hc_tooltip(pointFormat = "<b>{point.name}</b>:<br>
Championships: {point.Championships}")