options(scipen=999)
library(tidyverse)
library(socviz)
library(datasetsICR)LAB 5
PART 0:
library(dplyr)
library(ggplot2)
library(datasetsICR)
data("FIFA")
# Identify top 10 nationalities and positions
top_nationalities <- FIFA %>%
count(Nationality) %>%
top_n(10, n) %>%
pull(Nationality)
top_positions <- FIFA %>%
count(Position) %>%
top_n(10, n) %>%
pull(Position)
# Filter FIFA dataset for these top categories
FIFA_filtered <- FIFA %>%
filter(Nationality %in% top_nationalities & Position %in% top_positions)
#PART 1: SUMMARIZE DATA
FIFA_summary <- FIFA_filtered %>%
count(Nationality, Position) %>%
mutate(Percent = n / sum(n) * 100)
print(FIFA_summary) Nationality Position n Percent
1 Argentina CAM 46 0.6720234
2 Argentina CB 99 1.4463112
3 Argentina CDM 55 0.8035062
4 Argentina CM 46 0.6720234
5 Argentina GK 97 1.4170928
6 Argentina LB 59 0.8619430
7 Argentina LM 64 0.9349890
8 Argentina RB 50 0.7304602
9 Argentina RM 62 0.9057706
10 Argentina ST 125 1.8261505
11 Brazil CAM 81 1.1833455
12 Brazil CB 66 0.9642075
13 Brazil CDM 43 0.6281958
14 Brazil CM 18 0.2629657
15 Brazil GK 68 0.9934259
16 Brazil LB 67 0.9788167
17 Brazil LM 30 0.4382761
18 Brazil RB 63 0.9203798
19 Brazil RM 23 0.3360117
20 Brazil ST 111 1.6216216
21 Colombia CAM 40 0.5843682
22 Colombia CB 52 0.7596786
23 Colombia CDM 54 0.7888970
24 Colombia CM 27 0.3944485
25 Colombia GK 52 0.7596786
26 Colombia LB 53 0.7742878
27 Colombia LM 47 0.6866326
28 Colombia RB 40 0.5843682
29 Colombia RM 52 0.7596786
30 Colombia ST 74 1.0810811
31 England CAM 72 1.0518627
32 England CB 161 2.3520818
33 England CDM 50 0.7304602
34 England CM 196 2.8634039
35 England GK 174 2.5420015
36 England LB 124 1.8115413
37 England LM 97 1.4170928
38 England RB 119 1.7384953
39 England RM 115 1.6800584
40 England ST 226 3.3016801
41 France CAM 51 0.7450694
42 France CB 93 1.3586560
43 France CDM 63 0.9203798
44 France CM 58 0.8473338
45 France GK 101 1.4755296
46 France LB 78 1.1395179
47 France LM 49 0.7158510
48 France RB 74 1.0810811
49 France RM 49 0.7158510
50 France ST 106 1.5485756
51 Germany CAM 62 0.9057706
52 Germany CB 128 1.8699781
53 Germany CDM 79 1.1541271
54 Germany CM 60 0.8765522
55 Germany GK 164 2.3959094
56 Germany LB 79 1.1541271
57 Germany LM 94 1.3732652
58 Germany RB 83 1.2125639
59 Germany RM 88 1.2856099
60 Germany ST 104 1.5193572
61 Italy CAM 25 0.3652301
62 Italy CB 82 1.1979547
63 Italy CDM 16 0.2337473
64 Italy CM 90 1.3148283
65 Italy GK 92 1.3440467
66 Italy LB 55 0.8035062
67 Italy LM 18 0.2629657
68 Italy RB 38 0.5551497
69 Italy RM 27 0.3944485
70 Italy ST 72 1.0518627
71 Japan CAM 28 0.4090577
72 Japan CB 49 0.7158510
73 Japan CDM 19 0.2775749
74 Japan CM 38 0.5551497
75 Japan GK 61 0.8911614
76 Japan LB 26 0.3798393
77 Japan LM 35 0.5113221
78 Japan RB 31 0.4528853
79 Japan RM 37 0.5405405
80 Japan ST 44 0.6428050
81 Netherlands CAM 38 0.5551497
82 Netherlands CB 42 0.6135866
83 Netherlands CDM 15 0.2191381
84 Netherlands CM 42 0.6135866
85 Netherlands GK 56 0.8181154
86 Netherlands LB 32 0.4674945
87 Netherlands LM 22 0.3214025
88 Netherlands RB 37 0.5405405
89 Netherlands RM 19 0.2775749
90 Netherlands ST 41 0.5989774
91 Spain CAM 57 0.8327246
92 Spain CB 104 1.5193572
93 Spain CDM 65 0.9495982
94 Spain CM 80 1.1687363
95 Spain GK 116 1.6946676
96 Spain LB 81 1.1833455
97 Spain LM 76 1.1102995
98 Spain RB 87 1.2710007
99 Spain RM 82 1.1979547
100 Spain ST 109 1.5924032
PART 2: CREATE STACKED AND DODGED BAR CHARTS FROM 2 CATEGORICAL VARIABLES
#stacked bar chart
ggplot(FIFA_summary, aes(x = Nationality, y = n, fill = Position)) +
geom_bar(stat = "identity") +
labs(title = "Stacked Bar Chart of Players by Nationality and Position",
x = "Nationality", y = "Count", fill = "Position") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))National trends in player positions: A glimpse into the diverse development focuses and strategic emphasis of football nations.
- Nations like England and Spain have a higher overall count of players across various positions, indicating a larger presence in the dataset. England shows a particularly high number of Center Backs (CB) and Spain has a significant number of Central Midfielders (CM).
- Certain positions, such as CB (Center Back) and ST (Striker), appear prominently across several nationalities, which may reflect a global emphasis on developing players in these roles.
#dodged bar chart
ggplot(FIFA_summary, aes(x = Nationality, y = n, fill = Position)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Dodged Bar Chart of Players by Nationality and Position",
x = "Nationality", y = "Count", fill = "Position") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))Varied Positional Strengths Reflect Diverse Football Cultures Across Nations - The dodge bar chart shows that England and Germany stands out for the prominent number of players in almost all positions. ALso, looks like positions of ST, CM, CDM and CB appear prominently than other countries.
#Faceted Horizontal Bar Chart Without a Legend
ggplot(FIFA_summary, aes(x = Position, y = n)) +
geom_bar(stat = "identity") +
facet_wrap(~Nationality, scales = "free_y", ncol = 5) +
coord_flip() +
labs(title = "Faceted Horizontal Bar Chart of Players by Nationality",
x = "", y = "Count") +
theme_minimal() +
theme(legend.position = "none")PART 3: PRACTICE USING PIPES (dplyr) TO SUMMARIZE DATA: TWO CONTINUOUS & ONE CATEGORICAL VARIABLE
FIFA_continuous_summary <- FIFA_filtered %>%
group_by(Nationality) %>%
summarize(MeanHeight = mean(Height, na.rm = TRUE),
MeanPotential = mean(Potential, na.rm = TRUE))
print(FIFA_continuous_summary)# A tibble: 10 × 3
Nationality MeanHeight MeanPotential
<fct> <dbl> <dbl>
1 Argentina 179. 72.9
2 Brazil 181. 72.7
3 Colombia 179. 70.5
4 England 181. 69.9
5 France 182. 73.0
6 Germany 183. 71.4
7 Italy 183. 72.3
8 Japan 178. 66.2
9 Netherlands 183. 72.6
10 Spain 180. 74.3
PART 4: SCATTERPLOT WITH A THIRD CATEGORICAL VARIABLE
ggplot(FIFA_continuous_summary, aes(x = MeanHeight, y = MeanPotential, color = Nationality)) +
geom_point() +
labs(title = "Mean Height vs. Mean Potential by Nationality",
x = "Mean Height (cm)", y = "Mean Potential") +
theme_minimal() +
theme(legend.position = "right")PART 5: LEGEND AND GUIDES
This is just an example – do not simply duplicate these options.
ggplot(FIFA_continuous_summary, aes(x = MeanHeight, y = MeanPotential, color = Nationality)) +
geom_point(size = 5) +
labs(y = "Mean Potential", x = "Mean Height (cm)",
title = "Mean Height vs. Mean Potential by Nationality",
subtitle = "Visualizing the average player attributes across nationalities",
color = "Nationality") +
theme(legend.title = element_text(color = "gray50", size = 14, face = "bold"),
legend.position = "right") +
guides(color = guide_legend(title.position = "top", title.hjust = 0.5))PART 6: DATA LABELS VS LEGEND
ggplot(FIFA_continuous_summary, aes(x = MeanHeight, y = MeanPotential)) +
geom_point(aes(color = Nationality), size = 4) + # Use color mapping for points
geom_text(aes(label = Nationality), vjust = -1, hjust = 1.2, size = 3.5, check_overlap = TRUE) + # Add data labels
labs(title = "Mean Height vs. Mean Potential by Nationality",
x = "Mean Height (cm)", y = "Mean Potential") +
theme_minimal() +
theme(legend.position = "none") # Remove legendPART 7: INTERPRETATION
The insights drawn from the dodged and stacked bar charts indicate that different nations have varying distributions of players across different positions, and there is no one-size-fits-all approach to player development. Some countries may focus on certain positions more than others, but this does not necessarily correlate with the potential seen in the scatter plot.
In the scatterplot, The scatter plot suggests that there is a diversity in the average physical and potential attributes of football players across different nationalities. While mean height varies slightly, it does not appear to be a decisive factor in the mean potential attributed to players from these nations. Countries like Argentina, Brazil, and Spain seem to have players with higher mean potential, which does not strictly correlate with mean height.
In conclusion, the variations observed in player distribution across positions and the lack of a clear correlation between height and potential underscore the multifaceted nature of talent development in football.