#Loading the data set

data <- read.csv("C:/Users/ticoa/OneDrive/Desktop/Fall 2024/Advanced Analytics/euro2024_players.csv")

Load necessary library for plotting

library(ggplot2)
Warning: package ‘ggplot2’ was built under R version 4.3.3

Bar plot for the Position column

ggplot(data, aes(x = Position)) +
  geom_bar(fill = "red", color = "black") +
  labs(title = "Distribution of Players by Position", x = "Position", y = "Count") +
  theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1))

NA
NA

#The histogram shows the distribution of the “Position” column. It is skewed to the left and the highest value is “Centre-Back”, with 120+ players.

Scatter plot for CAPS over Age

ggplot(data, aes(x = Age, y = Caps)) +
  geom_point(color = "red") + geom_smooth(method = "lm", color = "blue", se = FALSE) + labs(title = "Scatter Plot of Caps over Age", x = "Age", y = "Caps") +
  theme_minimal() + ylim(0, max(data$Caps) * 1.1)  # Adjusting to leave some space above the highest point
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 16 rows containing missing values or values outside the scale range
(`geom_smooth()`).

#The scatter plot shows us the relationship between age and caps. Caps are national team appearances in other words. So the logic behind it, is with the older you are, the more caps you accumulate.

Calculate the mean of the Age column

mean_age <- mean(data$Age, na.rm = TRUE)  # Use na.rm = TRUE to ignore NA values
mean_age
[1] 27.04013

Calculate the mean of the Caps column

mean_caps <- mean(data$Caps, na.rm = TRUE)  # Use na.rm = TRUE to ignore NA values
mean_caps
[1] 30.33868

Calculate the correlation between Age and Caps

correlation_age_caps <- cor(data$Age, data$Caps)

#The correlation coefficient will range from -1 to 1. The closer to 1, the stronger the positive relationship between these two variables.

Output the correlation

correlation_age_caps
[1] 0.6428288

#This supports the hypothesis that more experienced (and typically older) players have more national team appearances.

Let’s create two subsets: one for players of age 25 and older and one for players younger than 25.

# Players above age 25
players_25_or_older <- subset(data, Age >= 25)

# Players 25 years old or younger
players_under_25 <- subset(data, Age < 25)

T-test to compare differences between the two datasets.

# Conducting a t-test for the 'Caps' variable using the subsets
t_test_result <- t.test(players_25_or_older$Caps, players_under_25$Caps)

# Displaying the result
t_test_result

    Welch Two Sample t-test

data:  players_25_or_older$Caps and players_under_25$Caps
t = 14.498, df = 608.46, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 22.31030 29.30169
sample estimates:
mean of x mean of y 
 37.67040  11.86441 

#p-value: Indicates whether the observed difference is statistically significant. A p-value below 0.05 typically suggests a significant difference. In this example, a p-value of 0.2667 suggests no significant difference.

#mean of x and mean of y: These are the average Caps for players aged 25 and older (mean of x) and players under 25 (mean of y).

#The mean of >=25 is 3x the mean of <25, which is pretty different, which is extremely small and aligns with what the p-test is telling us.

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KI0xvYWRpbmcgdGhlIGRhdGEgc2V0DQpgYGB7cn0NCmRhdGEgPC0gcmVhZC5jc3YoIkM6L1VzZXJzL3RpY29hL09uZURyaXZlL0Rlc2t0b3AvRmFsbCAyMDI0L0FkdmFuY2VkIEFuYWx5dGljcy9ldXJvMjAyNF9wbGF5ZXJzLmNzdiIpDQpgYGANCg0KIyBMb2FkIG5lY2Vzc2FyeSBsaWJyYXJ5IGZvciBwbG90dGluZw0KYGBge3J9DQpsaWJyYXJ5KGdncGxvdDIpDQpgYGANCg0KIyBCYXIgcGxvdCBmb3IgdGhlIFBvc2l0aW9uIGNvbHVtbg0KYGBge3J9DQpnZ3Bsb3QoZGF0YSwgYWVzKHggPSBQb3NpdGlvbikpICsNCiAgZ2VvbV9iYXIoZmlsbCA9ICJyZWQiLCBjb2xvciA9ICJibGFjayIpICsNCiAgbGFicyh0aXRsZSA9ICJEaXN0cmlidXRpb24gb2YgUGxheWVycyBieSBQb3NpdGlvbiIsIHggPSAiUG9zaXRpb24iLCB5ID0gIkNvdW50IikgKw0KICB0aGVtZV9taW5pbWFsKCkgKyB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDQ1LCBoanVzdCA9IDEpKQ0KDQoNCmBgYA0KI1RoZSBoaXN0b2dyYW0gc2hvd3MgdGhlIGRpc3RyaWJ1dGlvbiBvZiB0aGUgIlBvc2l0aW9uIiBjb2x1bW4uIEl0IGlzIHNrZXdlZCB0byB0aGUgbGVmdCBhbmQgdGhlIGhpZ2hlc3QgdmFsdWUgaXMgIkNlbnRyZS1CYWNrIiwgd2l0aCAxMjArIHBsYXllcnMuDQoNCiMgU2NhdHRlciBwbG90IGZvciBDQVBTIG92ZXIgQWdlDQpgYGB7cn0NCmdncGxvdChkYXRhLCBhZXMoeCA9IEFnZSwgeSA9IENhcHMpKSArDQogIGdlb21fcG9pbnQoY29sb3IgPSAicmVkIikgKyBnZW9tX3Ntb290aChtZXRob2QgPSAibG0iLCBjb2xvciA9ICJibHVlIiwgc2UgPSBGQUxTRSkgKyBsYWJzKHRpdGxlID0gIlNjYXR0ZXIgUGxvdCBvZiBDYXBzIG92ZXIgQWdlIiwgeCA9ICJBZ2UiLCB5ID0gIkNhcHMiKSArDQogIHRoZW1lX21pbmltYWwoKSArIHlsaW0oMCwgbWF4KGRhdGEkQ2FwcykgKiAxLjEpICAjIEFkanVzdGluZyB0byBsZWF2ZSBzb21lIHNwYWNlIGFib3ZlIHRoZSBoaWdoZXN0IHBvaW50DQoNCmBgYA0KDQojVGhlIHNjYXR0ZXIgcGxvdCBzaG93cyB1cyB0aGUgcmVsYXRpb25zaGlwIGJldHdlZW4gYWdlIGFuZCBjYXBzLiBDYXBzIGFyZSBuYXRpb25hbCB0ZWFtIGFwcGVhcmFuY2VzIGluIG90aGVyIHdvcmRzLiBTbyB0aGUgbG9naWMgYmVoaW5kIGl0LCBpcyB3aXRoIHRoZSBvbGRlciB5b3UgYXJlLCB0aGUgbW9yZSBjYXBzIHlvdSBhY2N1bXVsYXRlLg0KDQojIENhbGN1bGF0ZSB0aGUgbWVhbiBvZiB0aGUgQWdlIGNvbHVtbg0KYGBge3J9DQptZWFuX2FnZSA8LSBtZWFuKGRhdGEkQWdlLCBuYS5ybSA9IFRSVUUpICAjIFVzZSBuYS5ybSA9IFRSVUUgdG8gaWdub3JlIE5BIHZhbHVlcw0KbWVhbl9hZ2UNCmBgYA0KDQojIENhbGN1bGF0ZSB0aGUgbWVhbiBvZiB0aGUgQ2FwcyBjb2x1bW4NCmBgYHtyfQ0KbWVhbl9jYXBzIDwtIG1lYW4oZGF0YSRDYXBzLCBuYS5ybSA9IFRSVUUpICAjIFVzZSBuYS5ybSA9IFRSVUUgdG8gaWdub3JlIE5BIHZhbHVlcw0KbWVhbl9jYXBzDQpgYGANCg0KIyBDYWxjdWxhdGUgdGhlIGNvcnJlbGF0aW9uIGJldHdlZW4gQWdlIGFuZCBDYXBzDQpgYGB7cn0NCmNvcnJlbGF0aW9uX2FnZV9jYXBzIDwtIGNvcihkYXRhJEFnZSwgZGF0YSRDYXBzKQ0KYGBgDQojVGhlIGNvcnJlbGF0aW9uIGNvZWZmaWNpZW50IHdpbGwgcmFuZ2UgZnJvbSAtMSB0byAxLiBUaGUgY2xvc2VyIHRvIDEsIHRoZSBzdHJvbmdlciB0aGUgcG9zaXRpdmUgcmVsYXRpb25zaGlwIGJldHdlZW4gdGhlc2UgdHdvIHZhcmlhYmxlcy4NCg0KIyBPdXRwdXQgdGhlIGNvcnJlbGF0aW9uDQpgYGB7cn0NCmNvcnJlbGF0aW9uX2FnZV9jYXBzDQpgYGANCiNUaGlzIHN1cHBvcnRzIHRoZSBoeXBvdGhlc2lzIHRoYXQgbW9yZSBleHBlcmllbmNlZCAoYW5kIHR5cGljYWxseSBvbGRlcikgcGxheWVycyBoYXZlIG1vcmUgbmF0aW9uYWwgdGVhbSBhcHBlYXJhbmNlcy4NCg0KIyBMZXQncyBjcmVhdGUgdHdvIHN1YnNldHM6IG9uZSBmb3IgcGxheWVycyBvZiBhZ2UgMjUgYW5kIG9sZGVyIGFuZCBvbmUgZm9yIHBsYXllcnMgeW91bmdlciB0aGFuIDI1Lg0KDQpgYGB7cn0NCiMgUGxheWVycyBhYm92ZSBhZ2UgMjUNCnBsYXllcnNfMjVfb3Jfb2xkZXIgPC0gc3Vic2V0KGRhdGEsIEFnZSA+PSAyNSkNCg0KIyBQbGF5ZXJzIDI1IHllYXJzIG9sZCBvciB5b3VuZ2VyDQpwbGF5ZXJzX3VuZGVyXzI1IDwtIHN1YnNldChkYXRhLCBBZ2UgPCAyNSkNCg0KYGBgDQoNCiMgVC10ZXN0IHRvIGNvbXBhcmUgZGlmZmVyZW5jZXMgYmV0d2VlbiB0aGUgdHdvIGRhdGFzZXRzLg0KDQpgYGB7cn0NCiMgQ29uZHVjdGluZyBhIHQtdGVzdCBmb3IgdGhlICdDYXBzJyB2YXJpYWJsZSB1c2luZyB0aGUgc3Vic2V0cw0KdF90ZXN0X3Jlc3VsdCA8LSB0LnRlc3QocGxheWVyc18yNV9vcl9vbGRlciRDYXBzLCBwbGF5ZXJzX3VuZGVyXzI1JENhcHMpDQoNCiMgRGlzcGxheWluZyB0aGUgcmVzdWx0DQp0X3Rlc3RfcmVzdWx0DQoNCmBgYA0KI3AtdmFsdWU6IEluZGljYXRlcyB3aGV0aGVyIHRoZSBvYnNlcnZlZCBkaWZmZXJlbmNlIGlzIHN0YXRpc3RpY2FsbHkgc2lnbmlmaWNhbnQuIEEgcC12YWx1ZSBiZWxvdyAwLjA1IHR5cGljYWxseSBzdWdnZXN0cyBhIHNpZ25pZmljYW50IGRpZmZlcmVuY2UuIEluIHRoaXMgZXhhbXBsZSwgYSBwLXZhbHVlIG9mIDAuMjY2NyBzdWdnZXN0cyBubyBzaWduaWZpY2FudCBkaWZmZXJlbmNlLg0KDQojbWVhbiBvZiB4IGFuZCBtZWFuIG9mIHk6IFRoZXNlIGFyZSB0aGUgYXZlcmFnZSBDYXBzIGZvciBwbGF5ZXJzIGFnZWQgMjUgYW5kIG9sZGVyIChtZWFuIG9mIHgpIGFuZCBwbGF5ZXJzIHVuZGVyIDI1IChtZWFuIG9mIHkpLg0KDQojVGhlIG1lYW4gb2YgPj0yNSBpcyAzeCB0aGUgbWVhbiBvZiA8MjUsIHdoaWNoIGlzIHByZXR0eSBkaWZmZXJlbnQsIHdoaWNoIGlzIGV4dHJlbWVseSBzbWFsbCBhbmQgYWxpZ25zIHdpdGggd2hhdCB0aGUgcC10ZXN0IGlzIHRlbGxpbmcgdXMuDQo=