We try to make a historical report on the medals of the Indonesian contingent during the Olympics from 1896 to 2016.
The participation of the Indonesian contingent in fighting in the Olympics needs to be greatly appreciated. Making the nation proud with medals
#chunk setup
knitr::opts_chunk$set(echo = TRUE,
fig.align = "center",
comment = '#')
#set up scientific notation
options(scipen = 9999)
#clear global enviroment
rm(list = ls())#load Library
library(dplyr)#
# Attaching package: 'dplyr'
# The following objects are masked from 'package:stats':
#
# filter, lag
# The following objects are masked from 'package:base':
#
# intersect, setdiff, setequal, union
library(skimr)
library(data.table)#
# Attaching package: 'data.table'
# The following objects are masked from 'package:dplyr':
#
# between, first, last
#load data from some folder
event <- read.csv("data_input/athlete_events.csv")Data successfully loaded. let’s go
#show first 6 data table
rmarkdown::paged_table(head(event))#show last 6 data table
rmarkdown::paged_table(tail(event))#Check Data Dimension
dim(event)# [1] 271116 15
#column names
names(event)# [1] "ID" "Name" "Sex" "Age" "Height" "Weight" "Team" "NOC"
# [9] "Games" "Year" "Season" "City" "Sport" "Event" "Medal"
Summary :
Here we will only look at the acquisition of special medals for the country of Indonesia.
For that, we do subsetting based on the NOC column with the INA code
#subsetting only indonesia data
ina_medal <- event[event$NOC == 'INA',]
#remove unused columnd `ID`(1), `Age`(4), Height` (5),`Weight` (6) ( no needed )
ina_medal <- ina_medal[,c(-1,-4,-5,-6)]
rmarkdown::paged_table(ina_medal)#Checking data dimension after subsetting
dim(ina_medal)# [1] 412 11
names(ina_medal)# [1] "Name" "Sex" "Team" "NOC" "Games" "Year" "Season" "City"
# [9] "Sport" "Event" "Medal"
#check type data
str(ina_medal)# 'data.frame': 412 obs. of 11 variables:
# $ Name : chr "Patmawati Abdul Wahid" "Selvyana Adrian-Sofyan" "Selvyana Adrian-Sofyan" "Selvyana Adrian-Sofyan" ...
# $ Sex : chr "F" "F" "F" "F" ...
# $ Team : chr "Indonesia" "Indonesia" "Indonesia" "Indonesia" ...
# $ NOC : chr "INA" "INA" "INA" "INA" ...
# $ Games : chr "2004 Summer" "1984 Summer" "1988 Summer" "1988 Summer" ...
# $ Year : int 2004 1984 1988 1988 2016 2012 2016 2012 2016 1972 ...
# $ Season: chr "Summer" "Summer" "Summer" "Summer" ...
# $ City : chr "Athina" "Los Angeles" "Seoul" "Seoul" ...
# $ Sport : chr "Weightlifting" "Shooting" "Shooting" "Shooting" ...
# $ Event : chr "Weightlifting Women's Lightweight" "Shooting Women's Sporting Pistol, 25 metres" "Shooting Women's Air Pistol, 10 metres" "Shooting Women's Sporting Pistol, 25 metres" ...
# $ Medal : chr NA NA NA NA ...
#cek NA value
colSums(is.na(ina_medal))# Name Sex Team NOC Games Year Season City Sport Event Medal
# 0 0 0 0 0 0 0 0 0 0 371
anyNA(ina_medal)# [1] TRUE
NA missing value in the Medal column because no medals were achievedNA value with No Medal #change `NA` values in Medal column with No Medal
ina_medal$Medal[is.na(ina_medal$Medal)] <- "No Medal"#Change data type `as.factor`
ina_medal$Sex <- as.factor(ina_medal$Sex)
ina_medal$Team <- as.factor(ina_medal$Team)
ina_medal$Games <- as.factor(ina_medal$Games)
ina_medal$City <- as.factor(ina_medal$City)
ina_medal$Sport <- as.factor(ina_medal$Sport)
ina_medal$Medal <- as.factor(ina_medal$Medal)
#check level Medal
levels(ina_medal$Medal)# [1] "Bronze" "Gold" "No Medal" "Silver"
summary(ina_medal)# Name Sex Team NOC
# Length:412 F:136 Indonesia :355 Length:412
# Class :character M:276 Indonesia-1: 26 Class :character
# Mode :character Indonesia-2: 26 Mode :character
# Partenope : 3
# Tengiri : 2
#
#
# Games Year Season City
# 2000 Summer: 54 Min. :1952 Length:412 Sydney : 54
# 1992 Summer: 51 1st Qu.:1988 Class :character Barcelona : 51
# 1996 Summer: 45 Median :1996 Mode :character Atlanta : 45
# 1988 Summer: 43 Mean :1992 Seoul : 43
# 2004 Summer: 39 3rd Qu.:2004 Athina : 39
# 2016 Summer: 31 Max. :2016 Rio de Janeiro: 31
# (Other) :149 (Other) :149
# Sport Event Medal
# Badminton : 98 Length:412 Bronze : 13
# Weightlifting: 49 Class :character Gold : 11
# Athletics : 44 Mode :character No Medal:371
# Swimming : 42 Silver : 17
# Archery : 38
# Boxing : 21
# (Other) :120
Conclusion :
#add new column for point of medal where value `No Medal` = 0 and have medal =1
ina_medal$ValMedal <- case_when(ina_medal$Medal == "No Medal" ~ 0,
TRUE ~ 1)
data_medal <- (ina_medal[ina_medal$Medal != 'No Medal',
c("Year","Name","Sex","Sport","Medal","ValMedal")])
plot(factor(data_medal$Medal),
main = "Medals Achiement ",
ylab = "Total Medal",
col = c("#cd7f32","#ffd700","#c0c0c0")
)#create aggregate date
yearly_medal <- aggregate(ValMedal ~ Year, data = ina_medal, FUN = "sum")
#generate plot
plot(yearly_medal,pch = 21,
type = "b",
bg = "blue",
col = 'red',
lwd = 3,
main = "Yearly Medal Achievement",
xlab = "Year",
ylab = "Medal count",
xlim = c(1951,2018),
ylim = c(0,12),
lty = 1,
las = 2)
text(yearly_medal,
labels = (yearly_medal$ValMedal),
cex = 0.9,
pos = 3)
axis(2,at = seq(min(yearly_medal$Year),max(yearly_medal$Year), by=1))#create aggregate medal by sports
sport_medal <- aggregate(ValMedal ~ Sport, data = ina_medal, FUN = "sum")
#sort order total medal by sport decending
sport_medal <- sport_medal[order(sport_medal$ValMedal, decreasing = F),]
#generat barplot without library (plot basic)
barplot(sport_medal$ValMedal,
main = "Medal Achievement based on the sports participated",
horiz = F,
border = "red",
col = "blue",
density = 50,
names.arg = sport_medal$Sport,
las = 2, #make x axis rotate 90 degree
cex.names = 0.75, #set x axis label font size as 75%
cex.axis = 0.75, #set y axis label font size as 75%
ylim = c(0,30), #set y axis label limits range 0 to 30
)#create aggregate by name, sports and medal
name_medal <- aggregate(ValMedal ~ Name+Sport +Medal, ina_medal,FUN = "sum")
#subsetting only have medals ( Medal more than zero)
name_medal <- name_medal[name_medal$ValMedal > 0,]#create pattern for sorting medal as factor
md_level = c("Gold","Bronze","Silver")
mdl = factor(name_medal$Medal, levels = md_level)
#generate matrix data using xtabs
xtab_data <- xtabs(ValMedal ~ Name+mdl, data = name_medal)
#generate data frame for matrix result
df_data <- as.data.frame.matrix(xtab_data)
#add new column total medal
df_data$TotalMedal <- df_data$Gold + df_data$Bronze + df_data$Silver
#sort order by Total Medal descending
df_data_ord <- df_data[order(df_data$TotalMedal, decreasing = T),]
#print table output all line in once
rmarkdown::paged_table(df_data_ord, options = list(rows.print = 35))