LAB 5

Author

Sofia Santana

options(scipen=999) 
library(tidyverse)
library(socviz)
library(datasetsICR)

PART 0:

library(dplyr)
library(ggplot2)
library(datasetsICR)
data("FIFA")

# Identify top 10 nationalities and positions
top_nationalities <- FIFA %>%
  count(Nationality) %>%
  top_n(10, n) %>%
  pull(Nationality)

top_positions <- FIFA %>%
  count(Position) %>%
  top_n(10, n) %>%
  pull(Position)

# Filter FIFA dataset for these top categories
FIFA_filtered <- FIFA %>%
  filter(Nationality %in% top_nationalities & Position %in% top_positions)


#PART 1: SUMMARIZE DATA
FIFA_summary <- FIFA_filtered %>%
  count(Nationality, Position) %>%
  mutate(Percent = n / sum(n) * 100)

print(FIFA_summary)
    Nationality Position   n   Percent
1     Argentina      CAM  46 0.6720234
2     Argentina       CB  99 1.4463112
3     Argentina      CDM  55 0.8035062
4     Argentina       CM  46 0.6720234
5     Argentina       GK  97 1.4170928
6     Argentina       LB  59 0.8619430
7     Argentina       LM  64 0.9349890
8     Argentina       RB  50 0.7304602
9     Argentina       RM  62 0.9057706
10    Argentina       ST 125 1.8261505
11       Brazil      CAM  81 1.1833455
12       Brazil       CB  66 0.9642075
13       Brazil      CDM  43 0.6281958
14       Brazil       CM  18 0.2629657
15       Brazil       GK  68 0.9934259
16       Brazil       LB  67 0.9788167
17       Brazil       LM  30 0.4382761
18       Brazil       RB  63 0.9203798
19       Brazil       RM  23 0.3360117
20       Brazil       ST 111 1.6216216
21     Colombia      CAM  40 0.5843682
22     Colombia       CB  52 0.7596786
23     Colombia      CDM  54 0.7888970
24     Colombia       CM  27 0.3944485
25     Colombia       GK  52 0.7596786
26     Colombia       LB  53 0.7742878
27     Colombia       LM  47 0.6866326
28     Colombia       RB  40 0.5843682
29     Colombia       RM  52 0.7596786
30     Colombia       ST  74 1.0810811
31      England      CAM  72 1.0518627
32      England       CB 161 2.3520818
33      England      CDM  50 0.7304602
34      England       CM 196 2.8634039
35      England       GK 174 2.5420015
36      England       LB 124 1.8115413
37      England       LM  97 1.4170928
38      England       RB 119 1.7384953
39      England       RM 115 1.6800584
40      England       ST 226 3.3016801
41       France      CAM  51 0.7450694
42       France       CB  93 1.3586560
43       France      CDM  63 0.9203798
44       France       CM  58 0.8473338
45       France       GK 101 1.4755296
46       France       LB  78 1.1395179
47       France       LM  49 0.7158510
48       France       RB  74 1.0810811
49       France       RM  49 0.7158510
50       France       ST 106 1.5485756
51      Germany      CAM  62 0.9057706
52      Germany       CB 128 1.8699781
53      Germany      CDM  79 1.1541271
54      Germany       CM  60 0.8765522
55      Germany       GK 164 2.3959094
56      Germany       LB  79 1.1541271
57      Germany       LM  94 1.3732652
58      Germany       RB  83 1.2125639
59      Germany       RM  88 1.2856099
60      Germany       ST 104 1.5193572
61        Italy      CAM  25 0.3652301
62        Italy       CB  82 1.1979547
63        Italy      CDM  16 0.2337473
64        Italy       CM  90 1.3148283
65        Italy       GK  92 1.3440467
66        Italy       LB  55 0.8035062
67        Italy       LM  18 0.2629657
68        Italy       RB  38 0.5551497
69        Italy       RM  27 0.3944485
70        Italy       ST  72 1.0518627
71        Japan      CAM  28 0.4090577
72        Japan       CB  49 0.7158510
73        Japan      CDM  19 0.2775749
74        Japan       CM  38 0.5551497
75        Japan       GK  61 0.8911614
76        Japan       LB  26 0.3798393
77        Japan       LM  35 0.5113221
78        Japan       RB  31 0.4528853
79        Japan       RM  37 0.5405405
80        Japan       ST  44 0.6428050
81  Netherlands      CAM  38 0.5551497
82  Netherlands       CB  42 0.6135866
83  Netherlands      CDM  15 0.2191381
84  Netherlands       CM  42 0.6135866
85  Netherlands       GK  56 0.8181154
86  Netherlands       LB  32 0.4674945
87  Netherlands       LM  22 0.3214025
88  Netherlands       RB  37 0.5405405
89  Netherlands       RM  19 0.2775749
90  Netherlands       ST  41 0.5989774
91        Spain      CAM  57 0.8327246
92        Spain       CB 104 1.5193572
93        Spain      CDM  65 0.9495982
94        Spain       CM  80 1.1687363
95        Spain       GK 116 1.6946676
96        Spain       LB  81 1.1833455
97        Spain       LM  76 1.1102995
98        Spain       RB  87 1.2710007
99        Spain       RM  82 1.1979547
100       Spain       ST 109 1.5924032

PART 2: CREATE STACKED AND DODGED BAR CHARTS FROM 2 CATEGORICAL VARIABLES

#stacked bar chart
ggplot(FIFA_summary, aes(x = Nationality, y = n, fill = Position)) +
  geom_bar(stat = "identity") +
  labs(title = "Stacked Bar Chart of Players by Nationality and Position",
       x = "Nationality", y = "Count", fill = "Position") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

National trends in player positions: A glimpse into the diverse development focuses and strategic emphasis of football nations.

  • Nations like England and Spain have a higher overall count of players across various positions, indicating a larger presence in the dataset. England shows a particularly high number of Center Backs (CB) and Spain has a significant number of Central Midfielders (CM).
  • Certain positions, such as CB (Center Back) and ST (Striker), appear prominently across several nationalities, which may reflect a global emphasis on developing players in these roles.
#dodged bar chart
ggplot(FIFA_summary, aes(x = Nationality, y = n, fill = Position)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Dodged Bar Chart of Players by Nationality and Position",
       x = "Nationality", y = "Count", fill = "Position") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Varied Positional Strengths Reflect Diverse Football Cultures Across Nations - The dodge bar chart shows that England and Germany stands out for the prominent number of players in almost all positions. ALso, looks like positions of ST, CM, CDM and CB appear prominently than other countries.

#Faceted Horizontal Bar Chart Without a Legend
ggplot(FIFA_summary, aes(x = Position, y = n)) +
  geom_bar(stat = "identity") +
  facet_wrap(~Nationality, scales = "free_y", ncol = 5) +
  coord_flip() +
  labs(title = "Faceted Horizontal Bar Chart of Players by Nationality",
       x = "", y = "Count") +
  theme_minimal() +
  theme(legend.position = "none")

PART 3: PRACTICE USING PIPES (dplyr) TO SUMMARIZE DATA: TWO CONTINUOUS & ONE CATEGORICAL VARIABLE

FIFA_continuous_summary <- FIFA_filtered %>%
  group_by(Nationality) %>%
  summarize(MeanHeight = mean(Height, na.rm = TRUE),
            MeanPotential = mean(Potential, na.rm = TRUE))

print(FIFA_continuous_summary)
# A tibble: 10 × 3
   Nationality MeanHeight MeanPotential
   <fct>            <dbl>         <dbl>
 1 Argentina         179.          72.9
 2 Brazil            181.          72.7
 3 Colombia          179.          70.5
 4 England           181.          69.9
 5 France            182.          73.0
 6 Germany           183.          71.4
 7 Italy             183.          72.3
 8 Japan             178.          66.2
 9 Netherlands       183.          72.6
10 Spain             180.          74.3

PART 4: SCATTERPLOT WITH A THIRD CATEGORICAL VARIABLE

ggplot(FIFA_continuous_summary, aes(x = MeanHeight, y = MeanPotential, color = Nationality)) +
  geom_point() +
  labs(title = "Mean Height vs. Mean Potential by Nationality",
       x = "Mean Height (cm)", y = "Mean Potential") +
  theme_minimal() +
  theme(legend.position = "right")

PART 5: LEGEND AND GUIDES

This is just an example – do not simply duplicate these options.

ggplot(FIFA_continuous_summary, aes(x = MeanHeight, y = MeanPotential, color = Nationality)) +
  geom_point(size = 5) +
  labs(y = "Mean Potential", x = "Mean Height (cm)", 
       title = "Mean Height vs. Mean Potential by Nationality", 
       subtitle = "Visualizing the average player attributes across nationalities",
       color = "Nationality") +
  theme(legend.title = element_text(color = "gray50", size = 14, face = "bold"),
        legend.position = "right") +
  guides(color = guide_legend(title.position = "top", title.hjust = 0.5))

PART 6: DATA LABELS VS LEGEND

ggplot(FIFA_continuous_summary, aes(x = MeanHeight, y = MeanPotential)) +
  geom_point(aes(color = Nationality), size = 4) + # Use color mapping for points
  geom_text(aes(label = Nationality), vjust = -1, hjust = 1.2, size = 3.5, check_overlap = TRUE) + # Add data labels
  labs(title = "Mean Height vs. Mean Potential by Nationality",
       x = "Mean Height (cm)", y = "Mean Potential") +
  theme_minimal() +
  theme(legend.position = "none") # Remove legend

PART 7: INTERPRETATION

The insights drawn from the dodged and stacked bar charts indicate that different nations have varying distributions of players across different positions, and there is no one-size-fits-all approach to player development. Some countries may focus on certain positions more than others, but this does not necessarily correlate with the potential seen in the scatter plot.

In the scatterplot, The scatter plot suggests that there is a diversity in the average physical and potential attributes of football players across different nationalities. While mean height varies slightly, it does not appear to be a decisive factor in the mean potential attributed to players from these nations. Countries like Argentina, Brazil, and Spain seem to have players with higher mean potential, which does not strictly correlate with mean height.

In conclusion, the variations observed in player distribution across positions and the lack of a clear correlation between height and potential underscore the multifaceted nature of talent development in football.

END