library(tidyverse)
library(dplyr)
library(ggplot2)
library(lubridate)
library(plotly)
#install.packages("GGally")
#install.packages("reshape2")
followers <- read.csv("socialmedia_followers.csv")
player_value <- read.csv("player_market_values.csv")
followers <- followers %>%
select(-X_) %>%
select(-X_.1) %>%
select(-Sport)
player_value <- player_value %>%
select(-X_) %>%
select(-X_.1)
player_value <- player_value %>%
rename(Name = Player.name, Rank = Overall.rank, Market_Value = Market.value.at.10th.December.2020)
media_and_value_joint <- merge(followers, player_value, by = 'Name')
media_and_value_joint <- media_and_value_joint %>%
rename(Total_Followers = Total)
write.csv(media_and_value_joint, "media_and_value_joint.csv")
media_and_value_joint %>%
ggplot(aes(x = Total_Followers, y = Market_Value)) +
geom_point(aes(color = Name), size = 3) +
labs(
title = "Social Media Influence vs. Player Market Value",
x = "Total Social Media Followers",
y = "Player Market Value (mil €)"
)
total_followers_and_value_corr <-
cor(media_and_value_joint$Total_Followers, media_and_value_joint$Market_Value)
total_followers_and_value_corr
## [1] -0.0618486
How beautiful, a slightly negative correlation between market value and total social media followers. What happens if we remove Ronaldo? He appears to be a bit of an outlier.
media_and_value_joint[-2,] %>%
ggplot(aes(x = Total_Followers, y = Market_Value)) +
geom_point(aes(color = Name), size = 3) +
labs(
title = "Social Media Influence vs. Player Market Value (without Ronaldo)",
x = "Total Social Media Followers",
y = "Player Market Value (mil €)"
)
no_ronaldo <- media_and_value_joint[-2,]
total_followers_and_value_corr_no_ronaldo <-
cor(no_ronaldo$Total_Followers, no_ronaldo$Market_Value)
total_followers_and_value_corr_no_ronaldo
## [1] 0.1855158
Ha! Now we see a slightly positive correlation, albeit still highly unsignificant.
social_media_value_matrix_data <- media_and_value_joint %>%
rename('Market Value' = 'Market_Value') %>%
select(-League) %>%
select(-Rank) %>%
select(-Change.vs.October.2020) %>%
select(-Selected.currency) %>%
select(-Current.club..20.21.season.) %>%
select(-Current.league..20.21.season.) %>%
select(-Nationality) %>%
select(-Position) %>%
select(-Age) %>%
select(-Height..cm.) %>%
select(-Preferred.foot) %>%
select(-Intl..caps) %>%
select(-Intl..goals) %>%
select(-End.of.contract) %>%
select(-On.loan.from..parent.club.) %>%
select(-Agent)
social_media_value_matrix_data <- social_media_value_matrix_data %>%
rename('Total Followers' = 'Total_Followers')
source("https://raw.githubusercontent.com/briatte/ggcorr/master/ggcorr.R")
ggcorr(social_media_value_matrix_data, size = 2.8, angle = 15, label = TRUE, label_size = 2, geom = "circle", low = "orange", mid = "white", high = "lightblue", layout.exp = 1)
## Warning in ggcorr(social_media_value_matrix_data, size = 2.8, angle = 15, : data
## in column(s) 'Name' are not numeric and were ignored
#+
#title('Correlation Between Individual Social Media Platforms and Market Value")
Note: TikTok Followers appears to have a very high correlation with Player Market Value, but it should be noted that there are only FOUR players in this dataset with TikTok data, and therefore the correlatoin is not much to regard,
misc_value_matrix_data <- media_and_value_joint %>%
select(-League) %>%
select(-Rank) %>%
select(-Change.vs.October.2020) %>%
select(-Selected.currency) %>%
select(-Current.club..20.21.season.) %>%
select(-Current.league..20.21.season.) %>%
select(-Nationality) %>%
select(-Position) %>%
select(-End.of.contract) %>%
select(-On.loan.from..parent.club.) %>%
select(-Agent) %>%
select(-Facebook) %>%
select(-Instagram) %>%
select(-Twitter) %>%
select(-Youtube) %>%
select(-TikTok) %>%
select(-Weibo) %>%
select(-Total_Followers)
Just for fun, let’s compare some of the other variables in our list to Market_Value and see what we get:
ggcorr(misc_value_matrix_data, size = 4, angle = 15, label = TRUE, label_size = 2, geom = "circle", low = "orange", mid = "white", high = "lightblue", layout.exp = 1)
## Warning in ggcorr(misc_value_matrix_data, size = 4, angle = 15, label = TRUE, :
## data in column(s) 'Name', 'Preferred.foot' are not numeric and were ignored