library(ggplot2) library(dplyr)
# Load dataset
# Example of loading from a specific directory
data <- read.csv("~/Downloads/fifa_eda_stats.csv")
head(data ,5)
## ID Name Age Nationality Overall Potential
## 1 158023 L. Messi 31 Argentina 94 94
## 2 20801 Cristiano Ronaldo 33 Portugal 94 94
## 3 190871 Neymar Jr 26 Brazil 92 93
## 4 193080 De Gea 27 Spain 91 93
## 5 192985 K. De Bruyne 27 Belgium 91 92
## Club Value Wage Preferred.Foot International.Reputation
## 1 FC Barcelona €110.5M €565K Left 5
## 2 Juventus €77M €405K Right 5
## 3 Paris Saint-Germain €118.5M €290K Right 5
## 4 Manchester United €72M €260K Right 4
## 5 Manchester City €102M €355K Right 4
## Weak.Foot Skill.Moves Work.Rate Body.Type Position Jersey.Number
## 1 4 4 Medium/ Medium Messi RF 10
## 2 4 5 High/ Low C. Ronaldo ST 7
## 3 5 5 High/ Medium Neymar LW 10
## 4 3 1 Medium/ Medium Lean GK 1
## 5 5 4 High/ High Normal RCM 7
## Joined Loaned.From Contract.Valid.Until Height Weight Crossing
## 1 Jul 1, 2004 2021 5'7 159lbs 84
## 2 Jul 10, 2018 2022 6'2 183lbs 84
## 3 Aug 3, 2017 2022 5'9 150lbs 79
## 4 Jul 1, 2011 2020 6'4 168lbs 17
## 5 Aug 30, 2015 2023 5'11 154lbs 93
## Finishing HeadingAccuracy ShortPassing Volleys Dribbling Curve FKAccuracy
## 1 95 70 90 86 97 93 94
## 2 94 89 81 87 88 81 76
## 3 87 62 84 84 96 88 87
## 4 13 21 50 13 18 21 19
## 5 82 55 92 82 86 85 83
## LongPassing BallControl Acceleration SprintSpeed Agility Reactions Balance
## 1 87 96 91 86 91 95 95
## 2 77 94 89 91 87 96 70
## 3 78 95 94 90 96 94 84
## 4 51 42 57 58 60 90 43
## 5 91 91 78 76 79 91 77
## ShotPower Jumping Stamina Strength LongShots Aggression Interceptions
## 1 85 68 72 59 94 48 22
## 2 95 95 88 79 93 63 29
## 3 80 61 81 49 82 56 36
## 4 31 67 43 64 12 38 30
## 5 91 63 90 75 91 76 61
## Positioning Vision Penalties Composure Marking StandingTackle SlidingTackle
## 1 94 94 75 96 33 28 26
## 2 95 82 85 95 28 31 23
## 3 89 87 81 94 27 24 33
## 4 12 68 40 68 15 21 13
## 5 87 94 79 88 68 58 51
## GKDiving GKHandling GKKicking GKPositioning GKReflexes Release.Clause
## 1 6 11 15 14 8 €226.5M
## 2 7 11 15 14 11 €127.1M
## 3 9 9 15 15 11 €228.1M
## 4 90 85 87 88 94 €138.6M
## 5 15 13 5 10 13 €196.4M
# Find the number of rows and columns
dim(data)
## [1] 18207 57
# Summary of the dataset
summary(data)
## ID Name Age Nationality
## Min. : 16 Length:18207 Min. :16.00 Length:18207
## 1st Qu.:200316 Class :character 1st Qu.:21.00 Class :character
## Median :221759 Mode :character Median :25.00 Mode :character
## Mean :214298 Mean :25.12
## 3rd Qu.:236530 3rd Qu.:28.00
## Max. :246620 Max. :45.00
##
## Overall Potential Club Value
## Min. :46.00 Min. :48.00 Length:18207 Length:18207
## 1st Qu.:62.00 1st Qu.:67.00 Class :character Class :character
## Median :66.00 Median :71.00 Mode :character Mode :character
## Mean :66.24 Mean :71.31
## 3rd Qu.:71.00 3rd Qu.:75.00
## Max. :94.00 Max. :95.00
##
## Wage Preferred.Foot International.Reputation Weak.Foot
## Length:18207 Length:18207 Min. :1.000 Min. :1.000
## Class :character Class :character 1st Qu.:1.000 1st Qu.:3.000
## Mode :character Mode :character Median :1.000 Median :3.000
## Mean :1.113 Mean :2.947
## 3rd Qu.:1.000 3rd Qu.:3.000
## Max. :5.000 Max. :5.000
## NA's :48 NA's :48
## Skill.Moves Work.Rate Body.Type Position
## Min. :1.000 Length:18207 Length:18207 Length:18207
## 1st Qu.:2.000 Class :character Class :character Class :character
## Median :2.000 Mode :character Mode :character Mode :character
## Mean :2.361
## 3rd Qu.:3.000
## Max. :5.000
## NA's :48
## Jersey.Number Joined Loaned.From Contract.Valid.Until
## Min. : 1.00 Length:18207 Length:18207 Length:18207
## 1st Qu.: 8.00 Class :character Class :character Class :character
## Median :17.00 Mode :character Mode :character Mode :character
## Mean :19.55
## 3rd Qu.:26.00
## Max. :99.00
## NA's :60
## Height Weight Crossing Finishing
## Length:18207 Length:18207 Min. : 5.00 Min. : 2.00
## Class :character Class :character 1st Qu.:38.00 1st Qu.:30.00
## Mode :character Mode :character Median :54.00 Median :49.00
## Mean :49.73 Mean :45.55
## 3rd Qu.:64.00 3rd Qu.:62.00
## Max. :93.00 Max. :95.00
## NA's :48 NA's :48
## HeadingAccuracy ShortPassing Volleys Dribbling
## Min. : 4.0 Min. : 7.00 Min. : 4.00 Min. : 4.00
## 1st Qu.:44.0 1st Qu.:54.00 1st Qu.:30.00 1st Qu.:49.00
## Median :56.0 Median :62.00 Median :44.00 Median :61.00
## Mean :52.3 Mean :58.69 Mean :42.91 Mean :55.37
## 3rd Qu.:64.0 3rd Qu.:68.00 3rd Qu.:57.00 3rd Qu.:68.00
## Max. :94.0 Max. :93.00 Max. :90.00 Max. :97.00
## NA's :48 NA's :48 NA's :48 NA's :48
## Curve FKAccuracy LongPassing BallControl
## Min. : 6.00 Min. : 3.00 Min. : 9.00 Min. : 5.00
## 1st Qu.:34.00 1st Qu.:31.00 1st Qu.:43.00 1st Qu.:54.00
## Median :48.00 Median :41.00 Median :56.00 Median :63.00
## Mean :47.17 Mean :42.86 Mean :52.71 Mean :58.37
## 3rd Qu.:62.00 3rd Qu.:57.00 3rd Qu.:64.00 3rd Qu.:69.00
## Max. :94.00 Max. :94.00 Max. :93.00 Max. :96.00
## NA's :48 NA's :48 NA's :48 NA's :48
## Acceleration SprintSpeed Agility Reactions Balance
## Min. :12.00 Min. :12.00 Min. :14.0 Min. :21.00 Min. :16.00
## 1st Qu.:57.00 1st Qu.:57.00 1st Qu.:55.0 1st Qu.:56.00 1st Qu.:56.00
## Median :67.00 Median :67.00 Median :66.0 Median :62.00 Median :66.00
## Mean :64.61 Mean :64.73 Mean :63.5 Mean :61.84 Mean :63.97
## 3rd Qu.:75.00 3rd Qu.:75.00 3rd Qu.:74.0 3rd Qu.:68.00 3rd Qu.:74.00
## Max. :97.00 Max. :96.00 Max. :96.0 Max. :96.00 Max. :96.00
## NA's :48 NA's :48 NA's :48 NA's :48 NA's :48
## ShotPower Jumping Stamina Strength
## Min. : 2.00 Min. :15.00 Min. :12.00 Min. :17.00
## 1st Qu.:45.00 1st Qu.:58.00 1st Qu.:56.00 1st Qu.:58.00
## Median :59.00 Median :66.00 Median :66.00 Median :67.00
## Mean :55.46 Mean :65.09 Mean :63.22 Mean :65.31
## 3rd Qu.:68.00 3rd Qu.:73.00 3rd Qu.:74.00 3rd Qu.:74.00
## Max. :95.00 Max. :95.00 Max. :96.00 Max. :97.00
## NA's :48 NA's :48 NA's :48 NA's :48
## LongShots Aggression Interceptions Positioning Vision
## Min. : 3.00 Min. :11.00 Min. : 3.0 Min. : 2.00 Min. :10.0
## 1st Qu.:33.00 1st Qu.:44.00 1st Qu.:26.0 1st Qu.:38.00 1st Qu.:44.0
## Median :51.00 Median :59.00 Median :52.0 Median :55.00 Median :55.0
## Mean :47.11 Mean :55.87 Mean :46.7 Mean :49.96 Mean :53.4
## 3rd Qu.:62.00 3rd Qu.:69.00 3rd Qu.:64.0 3rd Qu.:64.00 3rd Qu.:64.0
## Max. :94.00 Max. :95.00 Max. :92.0 Max. :95.00 Max. :94.0
## NA's :48 NA's :48 NA's :48 NA's :48 NA's :48
## Penalties Composure Marking StandingTackle SlidingTackle
## Min. : 5.00 Min. : 3.00 Min. : 3.00 Min. : 2.0 Min. : 3.00
## 1st Qu.:39.00 1st Qu.:51.00 1st Qu.:30.00 1st Qu.:27.0 1st Qu.:24.00
## Median :49.00 Median :60.00 Median :53.00 Median :55.0 Median :52.00
## Mean :48.55 Mean :58.65 Mean :47.28 Mean :47.7 Mean :45.66
## 3rd Qu.:60.00 3rd Qu.:67.00 3rd Qu.:64.00 3rd Qu.:66.0 3rd Qu.:64.00
## Max. :92.00 Max. :96.00 Max. :94.00 Max. :93.0 Max. :91.00
## NA's :48 NA's :48 NA's :48 NA's :48 NA's :48
## GKDiving GKHandling GKKicking GKPositioning
## Min. : 1.00 Min. : 1.00 Min. : 1.00 Min. : 1.00
## 1st Qu.: 8.00 1st Qu.: 8.00 1st Qu.: 8.00 1st Qu.: 8.00
## Median :11.00 Median :11.00 Median :11.00 Median :11.00
## Mean :16.62 Mean :16.39 Mean :16.23 Mean :16.39
## 3rd Qu.:14.00 3rd Qu.:14.00 3rd Qu.:14.00 3rd Qu.:14.00
## Max. :90.00 Max. :92.00 Max. :91.00 Max. :90.00
## NA's :48 NA's :48 NA's :48 NA's :48
## GKReflexes Release.Clause
## Min. : 1.00 Length:18207
## 1st Qu.: 8.00 Class :character
## Median :11.00 Mode :character
## Mean :16.71
## 3rd Qu.:14.00
## Max. :94.00
## NA's :48
# Check for missing values
colSums(is.na(data))
## ID Name Age
## 0 0 0
## Nationality Overall Potential
## 0 0 0
## Club Value Wage
## 0 0 0
## Preferred.Foot International.Reputation Weak.Foot
## 0 48 48
## Skill.Moves Work.Rate Body.Type
## 48 0 0
## Position Jersey.Number Joined
## 0 60 0
## Loaned.From Contract.Valid.Until Height
## 0 0 0
## Weight Crossing Finishing
## 0 48 48
## HeadingAccuracy ShortPassing Volleys
## 48 48 48
## Dribbling Curve FKAccuracy
## 48 48 48
## LongPassing BallControl Acceleration
## 48 48 48
## SprintSpeed Agility Reactions
## 48 48 48
## Balance ShotPower Jumping
## 48 48 48
## Stamina Strength LongShots
## 48 48 48
## Aggression Interceptions Positioning
## 48 48 48
## Vision Penalties Composure
## 48 48 48
## Marking StandingTackle SlidingTackle
## 48 48 48
## GKDiving GKHandling GKKicking
## 48 48 48
## GKPositioning GKReflexes Release.Clause
## 48 48 0
# Replace missing values in numeric columns with the mean
data$Age[is.na(data$Age)] <- mean(data$Age, na.rm = TRUE)
data$Overall[is.na(data$Overall)] <- mean(data$Overall, na.rm = TRUE)
data$Potential[is.na(data$Potential)] <- mean(data$Potential, na.rm = TRUE)
# Function to calculate mode
get_mode <- function(x) {
ux <- unique(x)
return(ux[which.max(tabulate(match(x, ux)))])
}
# Replace missing values in categorical columns with the mode
data$Preferred.Foot[is.na(data$Preferred.Foot)] <- get_mode(data$Preferred.Foot)
data$Work.Rate[is.na(data$Work.Rate)] <- get_mode(data$Work.Rate)
# Check for any remaining missing values
colSums(is.na(data))
## ID Name Age
## 0 0 0
## Nationality Overall Potential
## 0 0 0
## Club Value Wage
## 0 0 0
## Preferred.Foot International.Reputation Weak.Foot
## 0 48 48
## Skill.Moves Work.Rate Body.Type
## 48 0 0
## Position Jersey.Number Joined
## 0 60 0
## Loaned.From Contract.Valid.Until Height
## 0 0 0
## Weight Crossing Finishing
## 0 48 48
## HeadingAccuracy ShortPassing Volleys
## 48 48 48
## Dribbling Curve FKAccuracy
## 48 48 48
## LongPassing BallControl Acceleration
## 48 48 48
## SprintSpeed Agility Reactions
## 48 48 48
## Balance ShotPower Jumping
## 48 48 48
## Stamina Strength LongShots
## 48 48 48
## Aggression Interceptions Positioning
## 48 48 48
## Vision Penalties Composure
## 48 48 48
## Marking StandingTackle SlidingTackle
## 48 48 48
## GKDiving GKHandling GKKicking
## 48 48 48
## GKPositioning GKReflexes Release.Clause
## 48 48 0
# Install dplyr if not installed already
if(!require(dplyr)) {
install.packages("dplyr")
}
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(dplyr)
# Numeric summary for Age, Overall, and Potential
numeric_summary <- data %>%
summarise(
min_age = min(Age, na.rm = TRUE),
max_age = max(Age, na.rm = TRUE),
mean_age = mean(Age, na.rm = TRUE),
min_overall = min(Overall, na.rm = TRUE),
max_overall = max(Overall, na.rm = TRUE),
mean_overall = mean(Overall, na.rm = TRUE),
min_potential = min(Potential, na.rm = TRUE),
max_potential = max(Potential, na.rm = TRUE),
mean_potential = mean(Potential, na.rm = TRUE)
)
print(numeric_summary)
## min_age max_age mean_age min_overall max_overall mean_overall min_potential
## 1 16 45 25.12221 46 94 66.2387 48
## max_potential mean_potential
## 1 95 71.3073
# Categorical summary for Nationality and Preferred Foot
categorical_summary_nationality <- data %>%
count(Nationality, sort = TRUE)
categorical_summary_foot <- data %>%
count(Preferred.Foot, sort = TRUE)
print(head(categorical_summary_nationality))
## Nationality n
## 1 England 1662
## 2 Germany 1198
## 3 Spain 1072
## 4 Argentina 937
## 5 France 914
## 6 Brazil 827
print(categorical_summary_foot)
## Preferred.Foot n
## 1 Right 13948
## 2 Left 4211
## 3 48
# Question 1: Using an aggregation function
age_overall_trend <- data %>%
group_by(Age) %>%
summarise(mean_overall = mean(Overall, na.rm = TRUE))
print(head(age_overall_trend))
## # A tibble: 6 × 2
## Age mean_overall
## <dbl> <dbl>
## 1 16 54.5
## 2 17 56.4
## 3 18 57.7
## 4 19 59.6
## 5 20 61.9
## 6 21 63.5
# Visualizing the relationship between Age and Overall performance
library(ggplot2)
ggplot(data, aes(x = Age, y = Overall)) +
geom_point() +
geom_smooth(method = "lm", col = "red") +
ggtitle("Age vs. Overall Performance") +
xlab("Age") +
ylab("Overall Rating")
## `geom_smooth()` using formula = 'y ~ x'
# Visualizing distributions of Preferred Foot and Overall Rating
ggplot(data, aes(x = Overall, fill = Preferred.Foot)) +
geom_histogram(bins = 30, position = "dodge") +
ggtitle("Overall Rating Distribution by Preferred Foot") +
xlab("Overall Rating") +
ylab("Count")
# Question 3: Average potential by nationality
nationality_potential <- data %>%
group_by(Nationality) %>%
summarise(mean_potential = mean(Potential, na.rm = TRUE)) %>%
arrange(desc(mean_potential))
print(head(nationality_potential))
## # A tibble: 6 × 2
## Nationality mean_potential
## <chr> <dbl>
## 1 Dominican Republic 80.5
## 2 Chad 78
## 3 United Arab Emirates 78
## 4 Central African Rep. 76
## 5 Russia 75.3
## 6 Portugal 75.3
#project goal #The dataset is giving information regarding the FIFA players. It is about the statistics of players. It has dataset that contains players performance, physical traits, contract details, financial data. Goal is to analyse performance of the players by analysing through speed, stamina, overall rating. #market analysis by checking player market value and wages #It can be served to sports analyst to understand players strength or video games for simulation