1 Background

We try to make a historical report on the medals of the Indonesian contingent during the Olympics from 1896 to 2016.

The participation of the Indonesian contingent in fighting in the Olympics needs to be greatly appreciated. Making the nation proud with medals


1.1 Prepapration

1.1.1 Setup

#chunk setup
knitr::opts_chunk$set(echo = TRUE,
                      fig.align = "center",
                      comment = '#')

#set up scientific notation
options(scipen = 9999)

#clear global enviroment
rm(list = ls())

1.1.2 Load Library

#load Library

library(dplyr)
# 
# Attaching package: 'dplyr'
# The following objects are masked from 'package:stats':
# 
#     filter, lag
# The following objects are masked from 'package:base':
# 
#     intersect, setdiff, setequal, union
library(skimr)
library(data.table)
# 
# Attaching package: 'data.table'
# The following objects are masked from 'package:dplyr':
# 
#     between, first, last

2 Pre Processing

2.1 Load data source

#load data from some folder

event <- read.csv("data_input/athlete_events.csv")

Data successfully loaded. let’s go

2.2 Data Inspection

#show first 6 data table

rmarkdown::paged_table(head(event))
#show last 6 data table
rmarkdown::paged_table(tail(event))
#Check Data Dimension

dim(event)
# [1] 271116     15
#column names

names(event)
#  [1] "ID"     "Name"   "Sex"    "Age"    "Height" "Weight" "Team"   "NOC"   
#  [9] "Games"  "Year"   "Season" "City"   "Sport"  "Event"  "Medal"

Summary :

  • There are 271,116 rows and 15 columns in the sources data
  • With column names as follow :
    ID, Name, Sex, Age, Height, Weight, Team, NOC, Games, Year, Season, City, Sport, Event, Medal

Here we will only look at the acquisition of special medals for the country of Indonesia.

For that, we do subsetting based on the NOC column with the INA code

#subsetting only indonesia data

ina_medal <- event[event$NOC == 'INA',]

#remove unused columnd `ID`(1), `Age`(4), Height` (5),`Weight` (6) ( no needed )

ina_medal <- ina_medal[,c(-1,-4,-5,-6)]

rmarkdown::paged_table(ina_medal)
#Checking data dimension after subsetting

dim(ina_medal)
# [1] 412  11
names(ina_medal)
#  [1] "Name"   "Sex"    "Team"   "NOC"    "Games"  "Year"   "Season" "City"  
#  [9] "Sport"  "Event"  "Medal"
  • Now the data is : 412 rows and 11 columns
  • With column names as follow :
    Name, Sex, Team, NOC, Games, Year, Season, City, Sport, Event, Medal
#check type data
str(ina_medal)
# 'data.frame': 412 obs. of  11 variables:
#  $ Name  : chr  "Patmawati Abdul Wahid" "Selvyana Adrian-Sofyan" "Selvyana Adrian-Sofyan" "Selvyana Adrian-Sofyan" ...
#  $ Sex   : chr  "F" "F" "F" "F" ...
#  $ Team  : chr  "Indonesia" "Indonesia" "Indonesia" "Indonesia" ...
#  $ NOC   : chr  "INA" "INA" "INA" "INA" ...
#  $ Games : chr  "2004 Summer" "1984 Summer" "1988 Summer" "1988 Summer" ...
#  $ Year  : int  2004 1984 1988 1988 2016 2012 2016 2012 2016 1972 ...
#  $ Season: chr  "Summer" "Summer" "Summer" "Summer" ...
#  $ City  : chr  "Athina" "Los Angeles" "Seoul" "Seoul" ...
#  $ Sport : chr  "Weightlifting" "Shooting" "Shooting" "Shooting" ...
#  $ Event : chr  "Weightlifting Women's Lightweight" "Shooting Women's Sporting Pistol, 25 metres" "Shooting Women's Air Pistol, 10 metres" "Shooting Women's Sporting Pistol, 25 metres" ...
#  $ Medal : chr  NA NA NA NA ...
#cek NA value

colSums(is.na(ina_medal))
#   Name    Sex   Team    NOC  Games   Year Season   City  Sport  Event  Medal 
#      0      0      0      0      0      0      0      0      0      0    371
anyNA(ina_medal)
# [1] TRUE
  • There is a NA missing value in the Medal column because no medals were achieved
  • We replace the NA value with No Medal
  #change `NA` values in Medal column with No Medal
  ina_medal$Medal[is.na(ina_medal$Medal)] <- "No Medal"
  • There are several data types that are not in accordance with the contents of the data itself, let’s adjust the data types:
#Change data type `as.factor`
ina_medal$Sex <- as.factor(ina_medal$Sex)
ina_medal$Team <- as.factor(ina_medal$Team)
ina_medal$Games <- as.factor(ina_medal$Games)
ina_medal$City <- as.factor(ina_medal$City)
ina_medal$Sport <- as.factor(ina_medal$Sport)
ina_medal$Medal <- as.factor(ina_medal$Medal)

#check level Medal
levels(ina_medal$Medal)
# [1] "Bronze"   "Gold"     "No Medal" "Silver"

3 Data Processing

3.1 Data Summary

summary(ina_medal)
#      Name           Sex              Team         NOC           
#  Length:412         F:136   Indonesia  :355   Length:412        
#  Class :character   M:276   Indonesia-1: 26   Class :character  
#  Mode  :character           Indonesia-2: 26   Mode  :character  
#                             Partenope  :  3                     
#                             Tengiri    :  2                     
#                                                                 
#                                                                 
#          Games          Year         Season                      City    
#  2000 Summer: 54   Min.   :1952   Length:412         Sydney        : 54  
#  1992 Summer: 51   1st Qu.:1988   Class :character   Barcelona     : 51  
#  1996 Summer: 45   Median :1996   Mode  :character   Atlanta       : 45  
#  1988 Summer: 43   Mean   :1992                      Seoul         : 43  
#  2004 Summer: 39   3rd Qu.:2004                      Athina        : 39  
#  2016 Summer: 31   Max.   :2016                      Rio de Janeiro: 31  
#  (Other)    :149                                     (Other)       :149  
#            Sport        Event                Medal    
#  Badminton    : 98   Length:412         Bronze  : 13  
#  Weightlifting: 49   Class :character   Gold    : 11  
#  Athletics    : 44   Mode  :character   No Medal:371  
#  Swimming     : 42                      Silver  : 17  
#  Archery      : 38                                    
#  Boxing       : 21                                    
#  (Other)      :120

Conclusion :

  • Indonesia participated in the Olympics for the first time in 1952
  • During the olimpics periode 1952 - 2016 has sent a total of 412 athletes with details of 136 female athletes and 276 male athletes
  • The total medals that has been achieved are : 41 of Medals with details :
    • Gold : 11
    • Bronze : 13
    • Silver : 17

3.2 Visualization

3.2.1 Total Medals

#add new column for point of  medal where value `No Medal` = 0 and have medal =1 
ina_medal$ValMedal <- case_when(ina_medal$Medal == "No Medal" ~ 0,
                             TRUE ~ 1)

data_medal <- (ina_medal[ina_medal$Medal != 'No Medal',
                         c("Year","Name","Sex","Sport","Medal","ValMedal")])

plot(factor(data_medal$Medal),
      main = "Medals Achiement ",
      ylab = "Total Medal",
      col = c("#cd7f32","#ffd700","#c0c0c0")
)

3.2.2 Yearly Achievement

#create aggregate date
yearly_medal <- aggregate(ValMedal ~ Year, data = ina_medal, FUN = "sum")

#generate plot

plot(yearly_medal,pch = 21,
     type = "b", 
     bg = "blue", 
     col = 'red', 
     lwd = 3,
     main = "Yearly Medal Achievement",
     xlab = "Year",
     ylab = "Medal count",
     xlim = c(1951,2018),
     ylim = c(0,12),
     lty = 1,
     las = 2)
text(yearly_medal,
     labels = (yearly_medal$ValMedal),
     cex = 0.9,
     pos = 3)
axis(2,at = seq(min(yearly_medal$Year),max(yearly_medal$Year), by=1))

3.2.3 Achiements by Sports

#create aggregate medal by sports
sport_medal <- aggregate(ValMedal ~ Sport, data = ina_medal, FUN = "sum")

#sort order total medal by sport decending
sport_medal <- sport_medal[order(sport_medal$ValMedal, decreasing = F),]

#generat barplot without library (plot basic)
barplot(sport_medal$ValMedal,
        main = "Medal Achievement based on the sports participated",
        horiz = F,
        border = "red",
        col = "blue",
        density = 50,
        names.arg = sport_medal$Sport,
        las = 2, #make x axis rotate 90 degree
        cex.names = 0.75, #set x axis label font size as 75%
        cex.axis = 0.75, #set y axis label font size as 75%
        ylim = c(0,30), #set y axis label limits range 0 to 30
        )

3.2.4 Medals by athletes

#create aggregate by name, sports and medal
name_medal <- aggregate(ValMedal ~ Name+Sport +Medal, ina_medal,FUN = "sum")

#subsetting only have medals ( Medal more than zero)
name_medal <- name_medal[name_medal$ValMedal > 0,]
#create pattern for sorting medal as factor
md_level = c("Gold","Bronze","Silver")
mdl = factor(name_medal$Medal, levels = md_level)
#generate matrix data using xtabs
xtab_data <- xtabs(ValMedal ~ Name+mdl, data = name_medal)

#generate data frame for matrix result
df_data <- as.data.frame.matrix(xtab_data)

#add new column total medal
df_data$TotalMedal <- df_data$Gold + df_data$Bronze + df_data$Silver

#sort order by Total Medal descending
df_data_ord <- df_data[order(df_data$TotalMedal, decreasing = T),]

#print table output all line in once
rmarkdown::paged_table(df_data_ord, options = list(rows.print = 35))