Load necessary libraries

library(ggplot2) library(dplyr)

# Load dataset
# Example of loading from a specific directory
data <- read.csv("~/Downloads/fifa_eda_stats.csv")

head(data ,5)
##       ID              Name Age Nationality Overall Potential
## 1 158023          L. Messi  31   Argentina      94        94
## 2  20801 Cristiano Ronaldo  33    Portugal      94        94
## 3 190871         Neymar Jr  26      Brazil      92        93
## 4 193080            De Gea  27       Spain      91        93
## 5 192985      K. De Bruyne  27     Belgium      91        92
##                  Club   Value  Wage Preferred.Foot International.Reputation
## 1        FC Barcelona €110.5M €565K           Left                        5
## 2            Juventus    €77M €405K          Right                        5
## 3 Paris Saint-Germain €118.5M €290K          Right                        5
## 4   Manchester United    €72M €260K          Right                        4
## 5     Manchester City   €102M €355K          Right                        4
##   Weak.Foot Skill.Moves      Work.Rate  Body.Type Position Jersey.Number
## 1         4           4 Medium/ Medium      Messi       RF            10
## 2         4           5      High/ Low C. Ronaldo       ST             7
## 3         5           5   High/ Medium     Neymar       LW            10
## 4         3           1 Medium/ Medium       Lean       GK             1
## 5         5           4     High/ High     Normal      RCM             7
##         Joined Loaned.From Contract.Valid.Until Height Weight Crossing
## 1  Jul 1, 2004                             2021    5'7 159lbs       84
## 2 Jul 10, 2018                             2022    6'2 183lbs       84
## 3  Aug 3, 2017                             2022    5'9 150lbs       79
## 4  Jul 1, 2011                             2020    6'4 168lbs       17
## 5 Aug 30, 2015                             2023   5'11 154lbs       93
##   Finishing HeadingAccuracy ShortPassing Volleys Dribbling Curve FKAccuracy
## 1        95              70           90      86        97    93         94
## 2        94              89           81      87        88    81         76
## 3        87              62           84      84        96    88         87
## 4        13              21           50      13        18    21         19
## 5        82              55           92      82        86    85         83
##   LongPassing BallControl Acceleration SprintSpeed Agility Reactions Balance
## 1          87          96           91          86      91        95      95
## 2          77          94           89          91      87        96      70
## 3          78          95           94          90      96        94      84
## 4          51          42           57          58      60        90      43
## 5          91          91           78          76      79        91      77
##   ShotPower Jumping Stamina Strength LongShots Aggression Interceptions
## 1        85      68      72       59        94         48            22
## 2        95      95      88       79        93         63            29
## 3        80      61      81       49        82         56            36
## 4        31      67      43       64        12         38            30
## 5        91      63      90       75        91         76            61
##   Positioning Vision Penalties Composure Marking StandingTackle SlidingTackle
## 1          94     94        75        96      33             28            26
## 2          95     82        85        95      28             31            23
## 3          89     87        81        94      27             24            33
## 4          12     68        40        68      15             21            13
## 5          87     94        79        88      68             58            51
##   GKDiving GKHandling GKKicking GKPositioning GKReflexes Release.Clause
## 1        6         11        15            14          8        €226.5M
## 2        7         11        15            14         11        €127.1M
## 3        9          9        15            15         11        €228.1M
## 4       90         85        87            88         94        €138.6M
## 5       15         13         5            10         13        €196.4M
# Find the number of rows and columns
dim(data)
## [1] 18207    57
# Summary of the dataset
summary(data)
##        ID             Name                Age        Nationality       
##  Min.   :    16   Length:18207       Min.   :16.00   Length:18207      
##  1st Qu.:200316   Class :character   1st Qu.:21.00   Class :character  
##  Median :221759   Mode  :character   Median :25.00   Mode  :character  
##  Mean   :214298                      Mean   :25.12                     
##  3rd Qu.:236530                      3rd Qu.:28.00                     
##  Max.   :246620                      Max.   :45.00                     
##                                                                        
##     Overall        Potential         Club              Value          
##  Min.   :46.00   Min.   :48.00   Length:18207       Length:18207      
##  1st Qu.:62.00   1st Qu.:67.00   Class :character   Class :character  
##  Median :66.00   Median :71.00   Mode  :character   Mode  :character  
##  Mean   :66.24   Mean   :71.31                                        
##  3rd Qu.:71.00   3rd Qu.:75.00                                        
##  Max.   :94.00   Max.   :95.00                                        
##                                                                       
##      Wage           Preferred.Foot     International.Reputation   Weak.Foot    
##  Length:18207       Length:18207       Min.   :1.000            Min.   :1.000  
##  Class :character   Class :character   1st Qu.:1.000            1st Qu.:3.000  
##  Mode  :character   Mode  :character   Median :1.000            Median :3.000  
##                                        Mean   :1.113            Mean   :2.947  
##                                        3rd Qu.:1.000            3rd Qu.:3.000  
##                                        Max.   :5.000            Max.   :5.000  
##                                        NA's   :48               NA's   :48     
##   Skill.Moves     Work.Rate          Body.Type           Position        
##  Min.   :1.000   Length:18207       Length:18207       Length:18207      
##  1st Qu.:2.000   Class :character   Class :character   Class :character  
##  Median :2.000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :2.361                                                           
##  3rd Qu.:3.000                                                           
##  Max.   :5.000                                                           
##  NA's   :48                                                              
##  Jersey.Number      Joined          Loaned.From        Contract.Valid.Until
##  Min.   : 1.00   Length:18207       Length:18207       Length:18207        
##  1st Qu.: 8.00   Class :character   Class :character   Class :character    
##  Median :17.00   Mode  :character   Mode  :character   Mode  :character    
##  Mean   :19.55                                                             
##  3rd Qu.:26.00                                                             
##  Max.   :99.00                                                             
##  NA's   :60                                                                
##     Height             Weight             Crossing       Finishing    
##  Length:18207       Length:18207       Min.   : 5.00   Min.   : 2.00  
##  Class :character   Class :character   1st Qu.:38.00   1st Qu.:30.00  
##  Mode  :character   Mode  :character   Median :54.00   Median :49.00  
##                                        Mean   :49.73   Mean   :45.55  
##                                        3rd Qu.:64.00   3rd Qu.:62.00  
##                                        Max.   :93.00   Max.   :95.00  
##                                        NA's   :48      NA's   :48     
##  HeadingAccuracy  ShortPassing      Volleys        Dribbling    
##  Min.   : 4.0    Min.   : 7.00   Min.   : 4.00   Min.   : 4.00  
##  1st Qu.:44.0    1st Qu.:54.00   1st Qu.:30.00   1st Qu.:49.00  
##  Median :56.0    Median :62.00   Median :44.00   Median :61.00  
##  Mean   :52.3    Mean   :58.69   Mean   :42.91   Mean   :55.37  
##  3rd Qu.:64.0    3rd Qu.:68.00   3rd Qu.:57.00   3rd Qu.:68.00  
##  Max.   :94.0    Max.   :93.00   Max.   :90.00   Max.   :97.00  
##  NA's   :48      NA's   :48      NA's   :48      NA's   :48     
##      Curve         FKAccuracy     LongPassing     BallControl   
##  Min.   : 6.00   Min.   : 3.00   Min.   : 9.00   Min.   : 5.00  
##  1st Qu.:34.00   1st Qu.:31.00   1st Qu.:43.00   1st Qu.:54.00  
##  Median :48.00   Median :41.00   Median :56.00   Median :63.00  
##  Mean   :47.17   Mean   :42.86   Mean   :52.71   Mean   :58.37  
##  3rd Qu.:62.00   3rd Qu.:57.00   3rd Qu.:64.00   3rd Qu.:69.00  
##  Max.   :94.00   Max.   :94.00   Max.   :93.00   Max.   :96.00  
##  NA's   :48      NA's   :48      NA's   :48      NA's   :48     
##   Acceleration    SprintSpeed       Agility       Reactions        Balance     
##  Min.   :12.00   Min.   :12.00   Min.   :14.0   Min.   :21.00   Min.   :16.00  
##  1st Qu.:57.00   1st Qu.:57.00   1st Qu.:55.0   1st Qu.:56.00   1st Qu.:56.00  
##  Median :67.00   Median :67.00   Median :66.0   Median :62.00   Median :66.00  
##  Mean   :64.61   Mean   :64.73   Mean   :63.5   Mean   :61.84   Mean   :63.97  
##  3rd Qu.:75.00   3rd Qu.:75.00   3rd Qu.:74.0   3rd Qu.:68.00   3rd Qu.:74.00  
##  Max.   :97.00   Max.   :96.00   Max.   :96.0   Max.   :96.00   Max.   :96.00  
##  NA's   :48      NA's   :48      NA's   :48     NA's   :48      NA's   :48     
##    ShotPower        Jumping         Stamina         Strength    
##  Min.   : 2.00   Min.   :15.00   Min.   :12.00   Min.   :17.00  
##  1st Qu.:45.00   1st Qu.:58.00   1st Qu.:56.00   1st Qu.:58.00  
##  Median :59.00   Median :66.00   Median :66.00   Median :67.00  
##  Mean   :55.46   Mean   :65.09   Mean   :63.22   Mean   :65.31  
##  3rd Qu.:68.00   3rd Qu.:73.00   3rd Qu.:74.00   3rd Qu.:74.00  
##  Max.   :95.00   Max.   :95.00   Max.   :96.00   Max.   :97.00  
##  NA's   :48      NA's   :48      NA's   :48      NA's   :48     
##    LongShots       Aggression    Interceptions   Positioning        Vision    
##  Min.   : 3.00   Min.   :11.00   Min.   : 3.0   Min.   : 2.00   Min.   :10.0  
##  1st Qu.:33.00   1st Qu.:44.00   1st Qu.:26.0   1st Qu.:38.00   1st Qu.:44.0  
##  Median :51.00   Median :59.00   Median :52.0   Median :55.00   Median :55.0  
##  Mean   :47.11   Mean   :55.87   Mean   :46.7   Mean   :49.96   Mean   :53.4  
##  3rd Qu.:62.00   3rd Qu.:69.00   3rd Qu.:64.0   3rd Qu.:64.00   3rd Qu.:64.0  
##  Max.   :94.00   Max.   :95.00   Max.   :92.0   Max.   :95.00   Max.   :94.0  
##  NA's   :48      NA's   :48      NA's   :48     NA's   :48      NA's   :48    
##    Penalties       Composure        Marking      StandingTackle SlidingTackle  
##  Min.   : 5.00   Min.   : 3.00   Min.   : 3.00   Min.   : 2.0   Min.   : 3.00  
##  1st Qu.:39.00   1st Qu.:51.00   1st Qu.:30.00   1st Qu.:27.0   1st Qu.:24.00  
##  Median :49.00   Median :60.00   Median :53.00   Median :55.0   Median :52.00  
##  Mean   :48.55   Mean   :58.65   Mean   :47.28   Mean   :47.7   Mean   :45.66  
##  3rd Qu.:60.00   3rd Qu.:67.00   3rd Qu.:64.00   3rd Qu.:66.0   3rd Qu.:64.00  
##  Max.   :92.00   Max.   :96.00   Max.   :94.00   Max.   :93.0   Max.   :91.00  
##  NA's   :48      NA's   :48      NA's   :48      NA's   :48     NA's   :48     
##     GKDiving       GKHandling      GKKicking     GKPositioning  
##  Min.   : 1.00   Min.   : 1.00   Min.   : 1.00   Min.   : 1.00  
##  1st Qu.: 8.00   1st Qu.: 8.00   1st Qu.: 8.00   1st Qu.: 8.00  
##  Median :11.00   Median :11.00   Median :11.00   Median :11.00  
##  Mean   :16.62   Mean   :16.39   Mean   :16.23   Mean   :16.39  
##  3rd Qu.:14.00   3rd Qu.:14.00   3rd Qu.:14.00   3rd Qu.:14.00  
##  Max.   :90.00   Max.   :92.00   Max.   :91.00   Max.   :90.00  
##  NA's   :48      NA's   :48      NA's   :48      NA's   :48     
##    GKReflexes    Release.Clause    
##  Min.   : 1.00   Length:18207      
##  1st Qu.: 8.00   Class :character  
##  Median :11.00   Mode  :character  
##  Mean   :16.71                     
##  3rd Qu.:14.00                     
##  Max.   :94.00                     
##  NA's   :48
# Check for missing values
colSums(is.na(data))
##                       ID                     Name                      Age 
##                        0                        0                        0 
##              Nationality                  Overall                Potential 
##                        0                        0                        0 
##                     Club                    Value                     Wage 
##                        0                        0                        0 
##           Preferred.Foot International.Reputation                Weak.Foot 
##                        0                       48                       48 
##              Skill.Moves                Work.Rate                Body.Type 
##                       48                        0                        0 
##                 Position            Jersey.Number                   Joined 
##                        0                       60                        0 
##              Loaned.From     Contract.Valid.Until                   Height 
##                        0                        0                        0 
##                   Weight                 Crossing                Finishing 
##                        0                       48                       48 
##          HeadingAccuracy             ShortPassing                  Volleys 
##                       48                       48                       48 
##                Dribbling                    Curve               FKAccuracy 
##                       48                       48                       48 
##              LongPassing              BallControl             Acceleration 
##                       48                       48                       48 
##              SprintSpeed                  Agility                Reactions 
##                       48                       48                       48 
##                  Balance                ShotPower                  Jumping 
##                       48                       48                       48 
##                  Stamina                 Strength                LongShots 
##                       48                       48                       48 
##               Aggression            Interceptions              Positioning 
##                       48                       48                       48 
##                   Vision                Penalties                Composure 
##                       48                       48                       48 
##                  Marking           StandingTackle            SlidingTackle 
##                       48                       48                       48 
##                 GKDiving               GKHandling                GKKicking 
##                       48                       48                       48 
##            GKPositioning               GKReflexes           Release.Clause 
##                       48                       48                        0
# Replace missing values in numeric columns with the mean
data$Age[is.na(data$Age)] <- mean(data$Age, na.rm = TRUE)
data$Overall[is.na(data$Overall)] <- mean(data$Overall, na.rm = TRUE)
data$Potential[is.na(data$Potential)] <- mean(data$Potential, na.rm = TRUE)
# Function to calculate mode
get_mode <- function(x) {
  ux <- unique(x)
  return(ux[which.max(tabulate(match(x, ux)))])
}
# Replace missing values in categorical columns with the mode
data$Preferred.Foot[is.na(data$Preferred.Foot)] <- get_mode(data$Preferred.Foot)
data$Work.Rate[is.na(data$Work.Rate)] <- get_mode(data$Work.Rate)
# Check for any remaining missing values
colSums(is.na(data))
##                       ID                     Name                      Age 
##                        0                        0                        0 
##              Nationality                  Overall                Potential 
##                        0                        0                        0 
##                     Club                    Value                     Wage 
##                        0                        0                        0 
##           Preferred.Foot International.Reputation                Weak.Foot 
##                        0                       48                       48 
##              Skill.Moves                Work.Rate                Body.Type 
##                       48                        0                        0 
##                 Position            Jersey.Number                   Joined 
##                        0                       60                        0 
##              Loaned.From     Contract.Valid.Until                   Height 
##                        0                        0                        0 
##                   Weight                 Crossing                Finishing 
##                        0                       48                       48 
##          HeadingAccuracy             ShortPassing                  Volleys 
##                       48                       48                       48 
##                Dribbling                    Curve               FKAccuracy 
##                       48                       48                       48 
##              LongPassing              BallControl             Acceleration 
##                       48                       48                       48 
##              SprintSpeed                  Agility                Reactions 
##                       48                       48                       48 
##                  Balance                ShotPower                  Jumping 
##                       48                       48                       48 
##                  Stamina                 Strength                LongShots 
##                       48                       48                       48 
##               Aggression            Interceptions              Positioning 
##                       48                       48                       48 
##                   Vision                Penalties                Composure 
##                       48                       48                       48 
##                  Marking           StandingTackle            SlidingTackle 
##                       48                       48                       48 
##                 GKDiving               GKHandling                GKKicking 
##                       48                       48                       48 
##            GKPositioning               GKReflexes           Release.Clause 
##                       48                       48                        0
# Install dplyr if not installed already
if(!require(dplyr)) {
  install.packages("dplyr")
}
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Load dplyr package

library(dplyr)

# Numeric summary for Age, Overall, and Potential
numeric_summary <- data %>%
  summarise(
    min_age = min(Age, na.rm = TRUE),
    max_age = max(Age, na.rm = TRUE),
    mean_age = mean(Age, na.rm = TRUE),
    min_overall = min(Overall, na.rm = TRUE),
    max_overall = max(Overall, na.rm = TRUE),
    mean_overall = mean(Overall, na.rm = TRUE),
    min_potential = min(Potential, na.rm = TRUE),
    max_potential = max(Potential, na.rm = TRUE),
    mean_potential = mean(Potential, na.rm = TRUE)
  )

print(numeric_summary)
##   min_age max_age mean_age min_overall max_overall mean_overall min_potential
## 1      16      45 25.12221          46          94      66.2387            48
##   max_potential mean_potential
## 1            95        71.3073
# Categorical summary for Nationality and Preferred Foot
categorical_summary_nationality <- data %>%
  count(Nationality, sort = TRUE)

categorical_summary_foot <- data %>%
  count(Preferred.Foot, sort = TRUE)

print(head(categorical_summary_nationality))
##   Nationality    n
## 1     England 1662
## 2     Germany 1198
## 3       Spain 1072
## 4   Argentina  937
## 5      France  914
## 6      Brazil  827
print(categorical_summary_foot)
##   Preferred.Foot     n
## 1          Right 13948
## 2           Left  4211
## 3                   48

Investigative questions

1. Are older players rated higher in overall performance?

2. Is there a significant difference between left-footed and right-footed players in terms of potential?

3. Which nationality has the highest average potential?

# Question 1: Using an aggregation function
age_overall_trend <- data %>%
  group_by(Age) %>%
  summarise(mean_overall = mean(Overall, na.rm = TRUE))

print(head(age_overall_trend))
## # A tibble: 6 × 2
##     Age mean_overall
##   <dbl>        <dbl>
## 1    16         54.5
## 2    17         56.4
## 3    18         57.7
## 4    19         59.6
## 5    20         61.9
## 6    21         63.5
# Visualizing the relationship between Age and Overall performance
library(ggplot2)
ggplot(data, aes(x = Age, y = Overall)) +
  geom_point() +
  geom_smooth(method = "lm", col = "red") +
  ggtitle("Age vs. Overall Performance") +
  xlab("Age") +
  ylab("Overall Rating")
## `geom_smooth()` using formula = 'y ~ x'

# Visualizing distributions of Preferred Foot and Overall Rating
ggplot(data, aes(x = Overall, fill = Preferred.Foot)) +
  geom_histogram(bins = 30, position = "dodge") +
  ggtitle("Overall Rating Distribution by Preferred Foot") +
  xlab("Overall Rating") +
  ylab("Count")

# Question 3: Average potential by nationality
nationality_potential <- data %>%
  group_by(Nationality) %>%
  summarise(mean_potential = mean(Potential, na.rm = TRUE)) %>%
  arrange(desc(mean_potential))

print(head(nationality_potential))
## # A tibble: 6 × 2
##   Nationality          mean_potential
##   <chr>                         <dbl>
## 1 Dominican Republic             80.5
## 2 Chad                           78  
## 3 United Arab Emirates           78  
## 4 Central African Rep.           76  
## 5 Russia                         75.3
## 6 Portugal                       75.3

#project goal #The dataset is giving information regarding the FIFA players. It is about the statistics of players. It has dataset that contains players performance, physical traits, contract details, financial data. Goal is to analyse performance of the players by analysing through speed, stamina, overall rating. #market analysis by checking player market value and wages #It can be served to sports analyst to understand players strength or video games for simulation