#Loading the data set
data <- read.csv("C:/Users/ticoa/OneDrive/Desktop/Fall 2024/Advanced Analytics/euro2024_players.csv")
Load necessary library for plotting
library(ggplot2)
Warning: package ‘ggplot2’ was built under R version 4.3.3
Bar plot for the Position column
ggplot(data, aes(x = Position)) +
geom_bar(fill = "red", color = "black") +
labs(title = "Distribution of Players by Position", x = "Position", y = "Count") +
theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1))

NA
NA
#The histogram shows the distribution of the “Position” column. It is
skewed to the left and the highest value is “Centre-Back”, with 120+
players.
Scatter plot for CAPS over Age
ggplot(data, aes(x = Age, y = Caps)) +
geom_point(color = "red") + geom_smooth(method = "lm", color = "blue", se = FALSE) + labs(title = "Scatter Plot of Caps over Age", x = "Age", y = "Caps") +
theme_minimal() + ylim(0, max(data$Caps) * 1.1) # Adjusting to leave some space above the highest point
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 16 rows containing missing values or values outside the scale range
(`geom_smooth()`).

#The scatter plot shows us the relationship between age and caps.
Caps are national team appearances in other words. So the logic behind
it, is with the older you are, the more caps you accumulate.
Calculate the mean of the Age column
mean_age <- mean(data$Age, na.rm = TRUE) # Use na.rm = TRUE to ignore NA values
mean_age
[1] 27.04013
Calculate the mean of the Caps column
mean_caps <- mean(data$Caps, na.rm = TRUE) # Use na.rm = TRUE to ignore NA values
mean_caps
[1] 30.33868
Calculate the correlation between Age and Caps
correlation_age_caps <- cor(data$Age, data$Caps)
#The correlation coefficient will range from -1 to 1. The closer to
1, the stronger the positive relationship between these two
variables.
Output the correlation
correlation_age_caps
[1] 0.6428288
#This supports the hypothesis that more experienced (and typically
older) players have more national team appearances.
Let’s create two subsets: one for players of age 25 and older and
one for players younger than 25.
# Players above age 25
players_25_or_older <- subset(data, Age >= 25)
# Players 25 years old or younger
players_under_25 <- subset(data, Age < 25)
T-test to compare differences between the two datasets.
# Conducting a t-test for the 'Caps' variable using the subsets
t_test_result <- t.test(players_25_or_older$Caps, players_under_25$Caps)
# Displaying the result
t_test_result
Welch Two Sample t-test
data: players_25_or_older$Caps and players_under_25$Caps
t = 14.498, df = 608.46, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
22.31030 29.30169
sample estimates:
mean of x mean of y
37.67040 11.86441
#p-value: Indicates whether the observed difference is statistically
significant. A p-value below 0.05 typically suggests a significant
difference. In this example, a p-value of 0.2667 suggests no significant
difference.
#mean of x and mean of y: These are the average Caps for players aged
25 and older (mean of x) and players under 25 (mean of y).
#The mean of >=25 is 3x the mean of <25, which is pretty
different, which is extremely small and aligns with what the p-test is
telling us.
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KI0xvYWRpbmcgdGhlIGRhdGEgc2V0DQpgYGB7cn0NCmRhdGEgPC0gcmVhZC5jc3YoIkM6L1VzZXJzL3RpY29hL09uZURyaXZlL0Rlc2t0b3AvRmFsbCAyMDI0L0FkdmFuY2VkIEFuYWx5dGljcy9ldXJvMjAyNF9wbGF5ZXJzLmNzdiIpDQpgYGANCg0KIyBMb2FkIG5lY2Vzc2FyeSBsaWJyYXJ5IGZvciBwbG90dGluZw0KYGBge3J9DQpsaWJyYXJ5KGdncGxvdDIpDQpgYGANCg0KIyBCYXIgcGxvdCBmb3IgdGhlIFBvc2l0aW9uIGNvbHVtbg0KYGBge3J9DQpnZ3Bsb3QoZGF0YSwgYWVzKHggPSBQb3NpdGlvbikpICsNCiAgZ2VvbV9iYXIoZmlsbCA9ICJyZWQiLCBjb2xvciA9ICJibGFjayIpICsNCiAgbGFicyh0aXRsZSA9ICJEaXN0cmlidXRpb24gb2YgUGxheWVycyBieSBQb3NpdGlvbiIsIHggPSAiUG9zaXRpb24iLCB5ID0gIkNvdW50IikgKw0KICB0aGVtZV9taW5pbWFsKCkgKyB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDQ1LCBoanVzdCA9IDEpKQ0KDQoNCmBgYA0KI1RoZSBoaXN0b2dyYW0gc2hvd3MgdGhlIGRpc3RyaWJ1dGlvbiBvZiB0aGUgIlBvc2l0aW9uIiBjb2x1bW4uIEl0IGlzIHNrZXdlZCB0byB0aGUgbGVmdCBhbmQgdGhlIGhpZ2hlc3QgdmFsdWUgaXMgIkNlbnRyZS1CYWNrIiwgd2l0aCAxMjArIHBsYXllcnMuDQoNCiMgU2NhdHRlciBwbG90IGZvciBDQVBTIG92ZXIgQWdlDQpgYGB7cn0NCmdncGxvdChkYXRhLCBhZXMoeCA9IEFnZSwgeSA9IENhcHMpKSArDQogIGdlb21fcG9pbnQoY29sb3IgPSAicmVkIikgKyBnZW9tX3Ntb290aChtZXRob2QgPSAibG0iLCBjb2xvciA9ICJibHVlIiwgc2UgPSBGQUxTRSkgKyBsYWJzKHRpdGxlID0gIlNjYXR0ZXIgUGxvdCBvZiBDYXBzIG92ZXIgQWdlIiwgeCA9ICJBZ2UiLCB5ID0gIkNhcHMiKSArDQogIHRoZW1lX21pbmltYWwoKSArIHlsaW0oMCwgbWF4KGRhdGEkQ2FwcykgKiAxLjEpICAjIEFkanVzdGluZyB0byBsZWF2ZSBzb21lIHNwYWNlIGFib3ZlIHRoZSBoaWdoZXN0IHBvaW50DQoNCmBgYA0KDQojVGhlIHNjYXR0ZXIgcGxvdCBzaG93cyB1cyB0aGUgcmVsYXRpb25zaGlwIGJldHdlZW4gYWdlIGFuZCBjYXBzLiBDYXBzIGFyZSBuYXRpb25hbCB0ZWFtIGFwcGVhcmFuY2VzIGluIG90aGVyIHdvcmRzLiBTbyB0aGUgbG9naWMgYmVoaW5kIGl0LCBpcyB3aXRoIHRoZSBvbGRlciB5b3UgYXJlLCB0aGUgbW9yZSBjYXBzIHlvdSBhY2N1bXVsYXRlLg0KDQojIENhbGN1bGF0ZSB0aGUgbWVhbiBvZiB0aGUgQWdlIGNvbHVtbg0KYGBge3J9DQptZWFuX2FnZSA8LSBtZWFuKGRhdGEkQWdlLCBuYS5ybSA9IFRSVUUpICAjIFVzZSBuYS5ybSA9IFRSVUUgdG8gaWdub3JlIE5BIHZhbHVlcw0KbWVhbl9hZ2UNCmBgYA0KDQojIENhbGN1bGF0ZSB0aGUgbWVhbiBvZiB0aGUgQ2FwcyBjb2x1bW4NCmBgYHtyfQ0KbWVhbl9jYXBzIDwtIG1lYW4oZGF0YSRDYXBzLCBuYS5ybSA9IFRSVUUpICAjIFVzZSBuYS5ybSA9IFRSVUUgdG8gaWdub3JlIE5BIHZhbHVlcw0KbWVhbl9jYXBzDQpgYGANCg0KIyBDYWxjdWxhdGUgdGhlIGNvcnJlbGF0aW9uIGJldHdlZW4gQWdlIGFuZCBDYXBzDQpgYGB7cn0NCmNvcnJlbGF0aW9uX2FnZV9jYXBzIDwtIGNvcihkYXRhJEFnZSwgZGF0YSRDYXBzKQ0KYGBgDQojVGhlIGNvcnJlbGF0aW9uIGNvZWZmaWNpZW50IHdpbGwgcmFuZ2UgZnJvbSAtMSB0byAxLiBUaGUgY2xvc2VyIHRvIDEsIHRoZSBzdHJvbmdlciB0aGUgcG9zaXRpdmUgcmVsYXRpb25zaGlwIGJldHdlZW4gdGhlc2UgdHdvIHZhcmlhYmxlcy4NCg0KIyBPdXRwdXQgdGhlIGNvcnJlbGF0aW9uDQpgYGB7cn0NCmNvcnJlbGF0aW9uX2FnZV9jYXBzDQpgYGANCiNUaGlzIHN1cHBvcnRzIHRoZSBoeXBvdGhlc2lzIHRoYXQgbW9yZSBleHBlcmllbmNlZCAoYW5kIHR5cGljYWxseSBvbGRlcikgcGxheWVycyBoYXZlIG1vcmUgbmF0aW9uYWwgdGVhbSBhcHBlYXJhbmNlcy4NCg0KIyBMZXQncyBjcmVhdGUgdHdvIHN1YnNldHM6IG9uZSBmb3IgcGxheWVycyBvZiBhZ2UgMjUgYW5kIG9sZGVyIGFuZCBvbmUgZm9yIHBsYXllcnMgeW91bmdlciB0aGFuIDI1Lg0KDQpgYGB7cn0NCiMgUGxheWVycyBhYm92ZSBhZ2UgMjUNCnBsYXllcnNfMjVfb3Jfb2xkZXIgPC0gc3Vic2V0KGRhdGEsIEFnZSA+PSAyNSkNCg0KIyBQbGF5ZXJzIDI1IHllYXJzIG9sZCBvciB5b3VuZ2VyDQpwbGF5ZXJzX3VuZGVyXzI1IDwtIHN1YnNldChkYXRhLCBBZ2UgPCAyNSkNCg0KYGBgDQoNCiMgVC10ZXN0IHRvIGNvbXBhcmUgZGlmZmVyZW5jZXMgYmV0d2VlbiB0aGUgdHdvIGRhdGFzZXRzLg0KDQpgYGB7cn0NCiMgQ29uZHVjdGluZyBhIHQtdGVzdCBmb3IgdGhlICdDYXBzJyB2YXJpYWJsZSB1c2luZyB0aGUgc3Vic2V0cw0KdF90ZXN0X3Jlc3VsdCA8LSB0LnRlc3QocGxheWVyc18yNV9vcl9vbGRlciRDYXBzLCBwbGF5ZXJzX3VuZGVyXzI1JENhcHMpDQoNCiMgRGlzcGxheWluZyB0aGUgcmVzdWx0DQp0X3Rlc3RfcmVzdWx0DQoNCmBgYA0KI3AtdmFsdWU6IEluZGljYXRlcyB3aGV0aGVyIHRoZSBvYnNlcnZlZCBkaWZmZXJlbmNlIGlzIHN0YXRpc3RpY2FsbHkgc2lnbmlmaWNhbnQuIEEgcC12YWx1ZSBiZWxvdyAwLjA1IHR5cGljYWxseSBzdWdnZXN0cyBhIHNpZ25pZmljYW50IGRpZmZlcmVuY2UuIEluIHRoaXMgZXhhbXBsZSwgYSBwLXZhbHVlIG9mIDAuMjY2NyBzdWdnZXN0cyBubyBzaWduaWZpY2FudCBkaWZmZXJlbmNlLg0KDQojbWVhbiBvZiB4IGFuZCBtZWFuIG9mIHk6IFRoZXNlIGFyZSB0aGUgYXZlcmFnZSBDYXBzIGZvciBwbGF5ZXJzIGFnZWQgMjUgYW5kIG9sZGVyIChtZWFuIG9mIHgpIGFuZCBwbGF5ZXJzIHVuZGVyIDI1IChtZWFuIG9mIHkpLg0KDQojVGhlIG1lYW4gb2YgPj0yNSBpcyAzeCB0aGUgbWVhbiBvZiA8MjUsIHdoaWNoIGlzIHByZXR0eSBkaWZmZXJlbnQsIHdoaWNoIGlzIGV4dHJlbWVseSBzbWFsbCBhbmQgYWxpZ25zIHdpdGggd2hhdCB0aGUgcC10ZXN0IGlzIHRlbGxpbmcgdXMuDQo=