Load required the libraries

library(tidyr)
library(treemapify)
library(tidyverse)
library(rvest)
library(dplyr)
library(DT)
library(fmsb)
library(kableExtra)

Wikipedia page URL

url <- "https://en.wikipedia.org/wiki/2024_Summer_Olympics_medal_table"

# Read the full HTML of the page
page <- read_html(url)

# Extract all tables
tables <- page %>% html_elements("table")

Convert all the tables in the page to data frames

all_tables <- lapply(tables, html_table, fill = TRUE)

Select the table with the required data

medal_table <- all_tables[[4]] 

# view the data
head(medal_table)

Remove the last row of the data

df_new <- medal_table[-nrow(medal_table), ]

Transform the character variable to integers

df_new$Total = as.numeric(df_new$Total)
df_new$Rank = as.numeric(df_new$Rank)

# view the structure of the data
str(df_new)
## tibble [92 × 6] (S3: tbl_df/tbl/data.frame)
##  $ Rank  : num [1:92] 1 2 3 4 5 6 7 8 9 10 ...
##  $ NOC   : chr [1:92] "United States‡" "China" "Japan" "Australia" ...
##  $ Gold  : int [1:92] 40 40 20 18 16 15 14 13 12 12 ...
##  $ Silver: int [1:92] 44 27 12 19 26 7 22 9 13 13 ...
##  $ Bronze: int [1:92] 42 24 13 16 22 12 29 10 15 8 ...
##  $ Total : num [1:92] 126 91 45 53 64 34 65 32 40 33 ...

Top 10 Countries by Gold Medals

top10_gold <- df_new %>%
  arrange(desc(Gold)) %>%
  slice(1:10)

print(top10_gold)
## # A tibble: 10 × 6
##     Rank NOC             Gold Silver Bronze Total
##    <dbl> <chr>          <int>  <int>  <int> <dbl>
##  1     1 United States‡    40     44     42   126
##  2     2 China             40     27     24    91
##  3     3 Japan             20     12     13    45
##  4     4 Australia         18     19     16    53
##  5     5 France*           16     26     22    64
##  6     6 Netherlands       15      7     12    34
##  7     7 Great Britain     14     22     29    65
##  8     8 South Korea       13      9     10    32
##  9     9 Italy             12     13     15    40
## 10    10 Germany           12     13      8    33

# Create scrollable table

# Create a scrollable table
kable(top10_gold, "html") %>%
  kable_styling(bootstrap_options = c("striped", "hover",
                                      "condensed","responsive")) %>%
  scroll_box(width = "100%", height = "300px")
Rank NOC Gold Silver Bronze Total
1 United States‡ 40 44 42 126
2 China 40 27 24 91
3 Japan 20 12 13 45
4 Australia 18 19 16 53
5 France* 16 26 22 64
6 Netherlands 15 7 12 34
7 Great Britain 14 22 29 65
8 South Korea 13 9 10 32
9 Italy 12 13 15 40
10 Germany 12 13 8 33

Visualize the top 10 countries by Total Medals

library(ggplot2)

bar_colors <- rainbow(10)

ggplot(top10_gold, aes(x = reorder(NOC, Total), y = Total, fill = NOC)) +
  geom_col(show.legend = FALSE) +
  scale_fill_manual(values = bar_colors) +
  coord_flip() +
  labs(
    title = "Top 10 Countries by Total Medals - Paris 2024 ",
    x = "Country", y = "Total Medals"
  )

Top 5 Countries by Gold Medals

top5_gold <- df_new %>%
  arrange(desc(Gold)) %>%
  slice(1:5)

print(top5_gold)
## # A tibble: 5 × 6
##    Rank NOC             Gold Silver Bronze Total
##   <dbl> <chr>          <int>  <int>  <int> <dbl>
## 1     1 United States‡    40     44     42   126
## 2     2 China             40     27     24    91
## 3     3 Japan             20     12     13    45
## 4     4 Australia         18     19     16    53
## 5     5 France*           16     26     22    64

Top 5 Countries by Gold Medals

# Create a scrollable table
kable(top5_gold, "html") %>%
  kable_styling(bootstrap_options = c("striped", "hover",
                                      "condensed","responsive")) %>%
  scroll_box(width = "100%", height = "300px")
Rank NOC Gold Silver Bronze Total
1 United States‡ 40 44 42 126
2 China 40 27 24 91
3 Japan 20 12 13 45
4 Australia 18 19 16 53
5 France* 16 26 22 64
# Create donut chart
ggplot(top5_gold, aes(x = 2, y = Gold, fill = NOC)) +
  geom_col(width = 1, color = "white") +
  coord_polar(theta = "y", start = 0) +
  xlim(0.5, 2.5) +  # Creates the donut hole
  theme_void() +
  geom_text(aes(label = Gold),
            position = position_stack(vjust = 0.5),
            color = "white", size = 5) +
  theme(legend.title = element_blank()) +
  ggtitle("Top 5 Countries by Total Medals - Paris 2024") +
  theme(plot.title = element_text(hjust = 0.5))

# Select Top 5 Countries by Bronze count
top5_bronze <- df_new %>%
  arrange(desc(Bronze)) %>%
  slice(1:5)

# Expand rows based on Bronze counts
bronze_expanded <- top5_bronze %>%
  rowwise() %>%
  mutate(values = list(rep(NOC, Bronze))) %>%
  unnest(values)

# Create box plot
ggplot(bronze_expanded, aes(x = NOC, y = Bronze, fill = NOC)) +
  geom_boxplot() +
  labs(
    title = "Top 5 Countries by Bronze Medals",
    x = "Country",
    y = "Number of Bronze Medals"
  ) +
  theme_minimal() +
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0.5))

Medal Count Region

continent_map <- tibble::tribble(
  ~NOC, ~Continent,
  "United States", "North America",
  "China", "Asia",
  "Japan", "Asia",
  "Australia", "Oceania",
  "Canada", "North America",
  "Brazil", "South America",
  "Kenya", "Africa",
  "Ethiopia", "Africa",
  "Great Britain", "Europe",
  "France", "Europe",
  "Germany", "Europe",
  "Italy", "Europe",
  "Netherlands", "Europe",
  "South Korea", "Asia",
  "Hungary", "Europe",
  "Cuba", "North America",
  "South Africa", "Africa"
)

df_continent <- df_new %>%
  left_join(continent_map, by = "NOC") %>%
  filter(!is.na(Continent)) %>%
  group_by(Continent) %>%
  summarise(Total_Medals = sum(Total, na.rm = TRUE)) %>%
  arrange(desc(Total_Medals))
df_continent

# Merge and summarize the data

ggplot(df_continent, aes(x = reorder(Continent, Total_Medals), 
                         y = Total_Medals, fill = Continent)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  labs(title = "Total Medals by Continent - Paris 2024",
       x = "Continent", y = "Total Medals") +
  theme_minimal()

# Keep only top 5 continents by total medals
top5_continents <- df_continent %>%
  arrange(desc(Total_Medals)) %>%
  slice(1:5)

# Donut chart
ggplot(top5_continents, aes(x = 2, y = Total_Medals, fill = Continent)) +
  geom_col(width = 1, color = "white") +
  coord_polar(theta = "y") +
  xlim(0.5, 2.5) +
  theme_void() +
  geom_text(aes(label = paste0(Continent, "\n", round(Total_Medals / sum(Total_Medals) * 100, 1), "%")),
            position = position_stack(vjust = 0.5), size = 4, color = "white") +
  ggtitle("Share of Medals by Top 5 Continents - Paris 2024") +
  theme(plot.title = element_text(hjust = 0.5))

ggplot(df_new, aes(x = Rank, y = Total)) +
  geom_point(color = "steelblue", size = 3) +
  geom_text(aes(label = NOC), vjust = -0.5, size = 3) +
  labs(title = "Rank vs Total Medals - Paris 2024", x = "Rank", y = "Total Medals") +
  theme_minimal()+
  theme(plot.title = element_text(hjust = 0.5))

# Sample data
top10 <- df_new %>%
  arrange(desc(Total)) %>%
  slice_head(n = 10)

ggplot(top10, aes(x = reorder(NOC, Total), y = Total)) +
  geom_segment(aes(xend = NOC, y = 0, yend = Total), color = "grey") +
  geom_point(size = 5, color = "steelblue") +
  coord_flip() +
  labs(title = "Top 10 Countries by Total Medals",
       x = "", y = "Total Medals") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))

# Prepare data - top 10 countries by total medals
top10 <- df_new %>%
  arrange(desc(Total)) %>%
  slice_head(n = 10)

# Create treemap
ggplot(top10, aes(area = Total, fill = NOC, label = paste(NOC, "\n", Total))) +
  geom_treemap(color = "white") +
  geom_treemap_text(colour = "white", place = "centre", grow = TRUE) +
  labs(title = "Top 10 Countries by Total Medals (Treemap)") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"),
        legend.position = "none")

# Select top 5 countries by total medals
top_countries <- df_new %>%
  arrange(desc(Total)) %>%
  slice_head(n = 5) %>%
  select(NOC, Gold, Silver, Bronze)

# fmsb requires first two rows to be max and min values for scaling
max_vals <- c( max(top_countries$Gold),
               max(top_countries$Silver),
               max(top_countries$Bronze) )
min_vals <- c(0, 0, 0)

# Prepare data
radar_data <- rbind(max_vals, min_vals, top_countries[, -1])
rownames(radar_data) <- c("Max", "Min", top_countries$NOC)

# Plot Radar Chart
radarchart(radar_data,
           axistype = 1,
           pcol = rainbow(nrow(radar_data) - 2), # colors for countries
           plwd = 2,
           plty = 1,
           cglcol = "grey", cglty = 1, axislabcol = "black",
           cglwd = 0.8,
           vlcex = 0.9,
           title = "Medal Distribution")

legend("topright", legend = rownames(radar_data)[-c(1, 2)],
       col = rainbow(nrow(radar_data) - 2), lty = 1, lwd = 2, cex = 0.8)

# Sample 10 random countries
df_sample <- df_new %>%
  sample_n(20) %>% 
  mutate(Other = Total - Gold) %>%
  select(NOC, Gold, Other) %>%
  pivot_longer(cols = c(Gold, Other), names_to = "Medal_Type", values_to = "Count")

# Plot stacked 100% bar chart
ggplot(df_sample, aes(x = NOC, y = Count, fill = Medal_Type)) +
  geom_bar(stat = "identity", position = "fill") +
  scale_y_continuous(labels = scales::percent) +
  labs(
    title = "Gold-to-Total Medal Ratio by Country (Sample 20)",
    x = "Country",
    y = "Percentage"
  ) +
  scale_fill_manual(values = c("Gold" = "gold", "Other" = "gray70")) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )