a)

library(readxl)
mydata <- read_excel("./Football.xlsx")

head(mydata)
## # A tibble: 6 × 10
##    Rank Name   Position   Age Value Club  Games_played Goals Assists Card_yellow
##   <dbl> <chr>     <dbl> <dbl> <dbl> <chr>        <dbl> <dbl>   <dbl>       <dbl>
## 1     1 Kylia…        4    22   144 Pari…           16     7      11           3
## 2     2 Erlin…        4    21   135 Boru…           10    13       4           1
## 3     3 Harry…        4    28   108 Tott…           16     7       2           2
## 4     4 Jack …        1    26    90 Manc…           15     2       3           1
## 5     5 Moham…        2    29    90 Live…           15    15       6           1
## 6     6 Romel…        4    28    90 Chel…           11     4       1           0

Description of the variables: - Rank: The rank of the footballer in relation to the value. - Name: Name of the player. - Position: Player position (1:midfielder, 2:winger, 3:defender, 4:striker, 5:goalkeeper) - Age: Age of the player. - Value: Estimated value of the player in million EUR for the year 2021. - Club: The club where the player plays. - Games played: Number of games played in 2021. - Goals: Number of goals scored in 2021. - Assists: Number of assists given in 2021. - Card_yellow: Number of yellow cards received in 2021.

b)

mydata$Position <- factor(mydata$Position,
                          levels = c(1, 2, 3, 4, 5),
                          labels = c("midfielder", "winger", "defender", "striker", "goalkeeper"))
library(ggplot2)
ggplot(mydata, aes(x = Position)) +
  geom_bar(colour = "blue", fill = "lightblue") +
  ylab("Frequency") +
  theme_minimal() +
  geom_text(stat = "count", 
            aes(label = ..count..), 
            vjust = -0.3)
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

c)

library(ggplot2)
ggplot(mydata, aes(x = Games_played, y = Goals)) +
  geom_point(color = "blue", size = 3) +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(title = "Scatterplot with Trend Line")
## `geom_smooth()` using formula = 'y ~ x'

d)

mean(mydata$Card_yellow[mydata$Position == "defender" ])
## [1] 1.913043

The average number of yellow cards for defenders is 1.91.

Strikers. H0: Mu = 1.91 H1: Mu < 1.91

One sample t-test

t.test(mydata[mydata$Position == "striker", ]$Card_yellow,
       mu = 1.91,
       alternative = "less")
## 
##  One Sample t-test
## 
## data:  mydata[mydata$Position == "striker", ]$Card_yellow
## t = -0.78247, df = 13, p-value = 0.224
## alternative hypothesis: true mean is less than 1.91
## 95 percent confidence interval:
##      -Inf 2.247475
## sample estimates:
## mean of x 
##  1.642857

We can not reject the null hypothesis. We can not say that the average number of yellow cards is smaller for strikers compared to defenders.