Import data

# excel file
data <- read_excel("../00_data/myData_apply2.xlsx")
data
## # A tibble: 271,116 × 16
##    Column1     id name  sex   age   height weight team  noc   games  year season
##      <dbl>  <dbl> <chr> <chr> <chr> <chr>  <chr>  <chr> <chr> <chr> <dbl> <chr> 
##  1  270479 135289 Zzim… M     20    NA     NA     Braz… BRA   1952…  1952 Summer
##  2  207677 104222 zzet… M     26    178    74     Turk… TUR   2016…  2016 Summer
##  3  102956  52087 zzet… M     23    172    85     Turk… TUR   2004…  2004 Summer
##  4  102957  52087 zzet… M     27    172    85     Turk… TUR   2008…  2008 Summer
##  5  106601  53910 Zyta… F     26    187    85     Pola… POL   1988…  1988 Summer
##  6  219493 110259 Zygm… M     25    185    82     Pola… POL   1932…  1932 Summer
##  7  183779  92370 Zygm… M     21    179    72     Pola… POL   1952…  1952 Summer
##  8  183780  92370 Zygm… M     26    179    72     Pola… POL   1956…  1956 Summer
##  9  152047  76313 Zygm… M     27    175    71     Pola… POL   1972…  1972 Summer
## 10  152048  76313 Zygm… M     31    175    71     Pola… POL   1976…  1976 Summer
## # ℹ 271,106 more rows
## # ℹ 4 more variables: city <chr>, sport <chr>, event <chr>, medal <chr>

State one question

What is the mean age per medal?

Plot data

# Clean the 'medal' column by replacing empty strings, spaces, and 'NA' strings with actual NA
data <- data %>%
  mutate(
    medal = na_if(trimws(medal), ""),  # Convert empty strings or spaces to NA
    medal = na_if(medal, "NA"))         # Convert string "NA" to actual NA

# Clean the data and ensure the 'age' column is numeric
data <- data %>%
  mutate(age = as.numeric(age))

# Calculate the mean age for each medal type
plotdata <- data %>%
  group_by(medal) %>%
  summarize(mean_age = mean(age, na.rm = TRUE))

# Plot the mean age per medal
ggplot(plotdata, aes(x = medal, y = mean_age)) +
  geom_bar(stat = "identity", fill = "steelblue") + 
  labs(title = "Mean Age per Medal", x = "Medal", y = "Mean Age")

Interpret

Bronze: The average age of athletes who won a Bronze medal is 25.9 years. Gold: Similarly, the average age for Gold medal winners is 25.9 years. Silver: Athletes who won Silver medals have an average age of 26.0 years. NA (25.5): This indicates rows where athletes that did not win any medal(NA), and their average age is 25.5 years. NA (34): Another instance where the medal field was missing (NA), but this group has a much higher average age of 34 years. The two NA groups likely represent different missing categories in the dataset, but should be together.