##Introduction
My dataset takes the Fifa 15 data and creates graphs based on different attributes
##Findings {.tabset .tabset-fade .tabset-pills}
The data ranges from players, teams, individuals stats and others.
###Tab 1
Top 30 Players in entire Fifa roster
library(dplyr)
library(ggplot2)
library(reshape2)
# Using forward slashes (which works in R)
players_15 <- read.csv("C:/Users/leoan/OneDrive/Desktop/players_15.csv")
#Load the dataset
players_15 <- read.csv("C:/Users/leoan/OneDrive/Desktop/players_15.csv")
# Rename the column in players_15 to match players_16
colnames(players_15)[which(colnames(players_15) == "player_url")] <- "player_id"
# Sort the dataset by 'overall' in descending order and select the top 30 players
top_30_players <- players_15[order(-players_15$overall), ][1:30, ]
# Create the top 10 nationalities by count of players
top_10_nationalities <- players_15 %>%
group_by(nationality) %>%
tally(sort = TRUE) %>%
top_n(10, n)
# Join this with the original dataset to get the player details for these nationalities
top_10_nationalities_players <- players_15 %>%
filter(nationality %in% top_10_nationalities$nationality)
ggplot(top_30_players, aes(x = long_name, y = overall)) +
geom_bar(stat = "identity", fill = "steelblue", color = "black") +
geom_text(aes(label = overall), vjust = -0.3, size = 4) +
theme_minimal() +
labs(title = "Top 30 Players Based on Overall Rating in 2015", x = "Player", y = "Overall Rating") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5))
###Tab 2 {.active} Average Overall Rating Based on Nationality
# Get all unique nationalities
unique_nationalities <- unique(players_15$nationality)
# Filter the dataset for Greek players
greek_players <- players_15[players_15$nationality == "Greece", ]
#Top 10 nationalities
# Create the top 10 nationalities by count of players
top_10_nationalities <- players_15 %>%
group_by(nationality) %>%
tally(sort = TRUE) %>%
top_n(10, n)
# Create a summary of average overall rating per nationality
nationality_avg_overall <- top_10_nationalities_players %>%
group_by(nationality) %>%
summarise(avg_overall = mean(overall, na.rm = TRUE))
# Create the line plot (using averages)
ggplot(nationality_avg_overall, aes(x = nationality, y = avg_overall, group = 1)) +
geom_line(color = "red", linewidth = 1) +
theme_minimal() +
labs(title = "Line Plot: Average Overall Rating by Nationality",
x = "Nationality", y = "Average Overall Rating") +
theme(plot.title = element_text(hjust = 0.5))
###Tab 3 {.active} Greek Players and Overall Score For The Year 2015
ggplot(greek_players, aes(x = reorder(long_name, overall), y = overall)) +
geom_bar(stat = "identity", fill = "gold", color = "royal blue") + # Royal blue fill, gold trim
geom_text(aes(label = overall), hjust = -0.2, size = 4) +
coord_flip() +
theme_minimal() +
labs(title = "Greek Players and Overall Score in 2015", x = "Player", y = "Overall Score") +
theme(axis.text.y = element_text(size = 10, face = "bold"),
plot.title = element_text(hjust = 0.5))
###Tab 4 {.active} Height vs Weight of Players Across Whole Roster
#Scatterplot of height vs weight
ggplot(players_15, aes(x = height_cm, y = weight_kg)) +
geom_point(color = "purple", alpha = 0.6) + # Change color to purple
labs(title = "Height vs Weight of Players", x = "Height (cm)", y = "Weight (kg)") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
###Tab 5 {.active}
Top Nationalities Based on Overall Player Rating
# Get the top 20 nationalities by count
top_nationalities <- players_15 %>%
group_by(nationality) %>%
tally(sort = TRUE) %>%
top_n(20, n)
# Create the pie chart
ggplot(top_nationalities, aes(x = "", y = n, fill = nationality)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y") +
labs(title = "Top 20 Nationalities of Players", x = "", y = "") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
###Tab 5
Density plot of overall ratings based on entire roster
#density plot
ggplot(players_15, aes(x = overall)) +
geom_density(fill = "lightblue", color = "black", alpha = 0.7) +
labs(title = "Density Plot of Overall Ratings", x = "Overall Rating", y = "Density") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
###Tab 6
Heatmap of player attribtes accross entire FIfa roster
#Heatmap
# Remove columns with zero variance
numeric_columns <- players_15[, c("height_cm", "weight_kg", "overall", "potential", "value_eur")]
# Find columns with non-zero variance
numeric_columns <- numeric_columns[, sapply(numeric_columns, function(x) sd(x, na.rm = TRUE) != 0)]
# Compute the correlation matrix
cor_matrix <- round(cor(numeric_columns, use = "complete.obs"), 2)
# Reshape the correlation matrix into long format for ggplot
melted_cor_matrix <- melt(cor_matrix)
# Create the heatmap
ggplot(data = melted_cor_matrix, aes(x = Var1, y = Var2, fill = value)) +
geom_tile(color = "white") +
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1, 1), space = "Lab",
name="Correlation") +
labs(title = "Heatmap of Correlation Between Player Attributes",
x = "Attributes", y = "Attributes") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
##Wrap up
The data shown incorporates data from the Fifa 15 roster and other stats, with attempts to combine those attributes into helpful graphs