#install libraries
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3
library(tidyselect)
## Warning: package 'tidyselect' was built under R version 3.6.3
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.6.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
## Warning: package 'plotly' was built under R version 3.6.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(data.table)
## Warning: package 'data.table' was built under R version 3.6.3
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.6.3
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(forcats)
## Warning: package 'forcats' was built under R version 3.6.3
library(rvest)
## Warning: package 'rvest' was built under R version 3.6.3
## Loading required package: xml2
## Warning: package 'xml2' was built under R version 3.6.3
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.6.3
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
##
## Attaching package: 'ggmap'
## The following object is masked from 'package:magrittr':
##
## inset
## The following object is masked from 'package:plotly':
##
## wind
library(stringr)
library("maps")
## Warning: package 'maps' was built under R version 3.6.3
## [1] 0
## ï..City Year Sport Discipline Event
## 1 Beijing 2008 Taekwondo Taekwondo - 58 kg
## 2 Los Angeles 1984 Boxing Boxing 71-75kg
## 3 Los Angeles 1984 Boxing Boxing 75 - 81kg (light-heavyweight)
## 4 Barcelona 1992 Athletics Athletics 1500m
## 5 Barcelona 1992 Boxing Boxing 54 - 57kg (featherweight)
## 6 Atlanta 1996 Athletics Athletics 1500m
## Athlete Gender Country_Code Country Event_gender Medal Gold
## 1 NIKPAI, Rohullah Men AFG Afghanistan M Bronze 0
## 2 ZAOUI, Mohamed Men ALG Algeria M Bronze 0
## 3 MOUSSA, Mustapha Men ALG Algeria M Bronze 0
## 4 BOULMERKA, Hassiba Women ALG Algeria W Gold 1
## 5 SOLTANI, Hocine Men ALG Algeria M Bronze 0
## 6 MORCELI, Nourredine Men ALG Algeria M Gold 1
## Silver Bronze All
## 1 0 1 1
## 2 0 1 1
## 3 0 1 1
## 4 0 0 1
## 5 0 1 1
## 6 0 0 1
## ï..City Year Sport Discipline
## Beijing :2042 Min. :1976 Aquatics :2210 Athletics :1523
## Sydney :2015 1st Qu.:1984 Athletics :1523 Swimming :1422
## Athens :1998 Median :1996 Rowing :1377 Rowing :1377
## Atlanta :1859 Mean :1994 Hockey : 817 Hockey : 817
## Barcelona:1705 3rd Qu.:2004 Gymnastics: 783 Handball : 780
## Seoul :1546 Max. :2008 Handball : 780 Artistic G.: 672
## (Other) :4151 (Other) :7826 (Other) :8725
## Event Athlete Gender
## hockey : 817 PHELPS, Michael : 16 Men :9388
## handball : 780 ANDRIANOV, Nikolay: 12 Women:5928
## football : 669 FISCHER, Birgit : 12
## volleyball : 647 NEMOV, Alexei : 12
## basketball : 646 THOMPSON, Jenny : 12
## eight with coxswain (8+): 486 TORRES, Dara : 12
## (Other) :11271 (Other) :15240
## Country_Code Country Event_gender Medal
## USA :1992 United States:1992 M:8817 Bronze:5258
## RUS :1882 Russia :1882 W:5773 Gold :5042
## GER :1662 Germany :1662 X: 726 Silver:5016
## AUS : 798 Australia : 798
## CHN : 679 China : 679
## ITA : 486 Italy : 486
## (Other):7817 (Other) :7817
## Gold Silver Bronze All
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :1
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1
## Median :0.0000 Median :0.0000 Median :0.0000 Median :1
## Mean :0.3292 Mean :0.3275 Mean :0.3433 Mean :1
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1
##
## City Year Sport Discipline Event
## 1 Beijing 2008 Taekwondo Taekwondo - 58 kg
## 2 Los Angeles 1984 Boxing Boxing 71-75kg
## 3 Los Angeles 1984 Boxing Boxing 75 - 81kg (light-heavyweight)
## 4 Barcelona 1992 Athletics Athletics 1500m
## 5 Barcelona 1992 Boxing Boxing 54 - 57kg (featherweight)
## 6 Atlanta 1996 Athletics Athletics 1500m
## Athlete Gender Country_Code Country Event_gender Medal Gold
## 1 NIKPAI, Rohullah Men AFG Afghanistan M Bronze 0
## 2 ZAOUI, Mohamed Men ALG Algeria M Bronze 0
## 3 MOUSSA, Mustapha Men ALG Algeria M Bronze 0
## 4 BOULMERKA, Hassiba Women ALG Algeria W Gold 1
## 5 SOLTANI, Hocine Men ALG Algeria M Bronze 0
## 6 MORCELI, Nourredine Men ALG Algeria M Gold 1
## Silver Bronze All
## 1 0 1 1
## 2 0 1 1
## 3 0 1 1
## 4 0 0 1
## 5 0 1 1
## 6 0 0 1
# Plot female vs male athletes
ggplot(Athlete_GenderCount, aes(x=Year, y=Athletes, group=Gender, color=Gender)) +
geom_point(size=2) +
geom_line() +
geom_text(x=1996, y=500, label= "IOC Mandates
Women Competitors", color="red")+
geom_text(x=1994, y= 950, label= "Games Are
Boycott Free", color = "Black")+
scale_color_manual(values=c("Blue","Red")) +
#scale_x_continuous(breaks=c(1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008))+
labs(y= "Number of Athletes", title ="Total Number of Male and Female Olympians in the Summer Olympics 1976-2008") +
theme(plot.title = element_text(hjust = 0.5))

#Make Top 5 female bar plot "Code Help from Jonathan"----------------------------------------------------------
Torres <- filter(olympics, Athlete == "TORRES, Dara")
thompson <- filter(olympics, Athlete == "THOMPSON, Jenny")
fischer <- filter(olympics, Athlete == "FISCHER, Birgit")
coughlin <- filter(olympics, Athlete == "COUGHLIN, Natalie")
Van_Almsick <- filter(olympics, Athlete == "VAN ALMSICK, Franziska")
five <- rbind(Torres, thompson, fischer, coughlin, Van_Almsick)
Female_Athlete <- group_by(five, Athlete, Country_Code, Medal) %>% summarize(All_Medals = sum(Bronze)+
sum(Silver) + sum(Gold))
Female_Athlete$Country_Code[which(Female_Athlete$Country_Code == "USA")] = "us"
## Warning in `[<-.factor`(`*tmp*`, which(Female_Athlete$Country_Code == "USA"), :
## invalid factor level, NA generated
Female_Athlete$Country_Code[which(Female_Athlete$Country_Code == "RUS")] = "ru"
## Warning in `[<-.factor`(`*tmp*`, which(Female_Athlete$Country_Code == "RUS"), :
## invalid factor level, NA generated
Female_Athlete$Country_Code[which(Female_Athlete$Country_Code == "GER")] = "de"
## Warning in `[<-.factor`(`*tmp*`, which(Female_Athlete$Country_Code == "GER"), :
## invalid factor level, NA generated
Female_Athlete$Medal <- factor(Female_Athlete$Medal, levels = c("Bronze", "Silver", "Gold"))
Top5_Female_Winners <- ggplot(Female_Athlete, aes(x = reorder(Athlete, All_Medals), y = All_Medals, fill=Medal)) +
geom_bar(stat="identity", colour= "black", size= .3, width= .4)+
coord_flip() +
scale_fill_manual(values = c("#E69F00", "grey", "gold"), guide = guide_legend(reverse = TRUE)) +
scale_y_continuous(breaks=c(0,2,4,6,8,10,12,14,16))+
labs(y="Number of Medals", x=" Olympian")+
ggtitle("Top 5 Female Olympians from 1976-2008")+
theme_bw()+
theme(panel.border=element_rect(colour="black", fill=NA, size=1),
legend.background = element_rect(colour="black", size=0.5),
axis.ticks.y=element_blank(),
panel.grid.minor = element_line(size = 0.1, linetype = 'dashed',
colour = "grey"),
panel.grid.major = element_line(size = 0.35, linetype = 'dashed',
colour = "grey"))
Top5_Female_Winners

#Animate gender line plot-----------------------------------------------------------------------
library(gganimate) # for animation layer
## Warning: package 'gganimate' was built under R version 3.6.3
library(gifski) # for making the animation; restart R after installing
## Warning: package 'gifski' was built under R version 3.6.3
library(gapminder) # for gapminder data about countries
## Warning: package 'gapminder' was built under R version 3.6.3
Olympics_Gender_TimePlot <- ggplot(Athlete_GenderCount, aes(x=Year, y=Athletes, color=Gender)) +
geom_point(aes(group = seq_along(Year))) +
geom_point(size=2)+
geom_line() +
geom_text(x=1996, y=500, label= "IOC Mandates
Women Competitors", color="red")+
geom_text(x=1994, y= 950, label= "Games Are
Boycott Free", color = "Black")+
transition_reveal(Year)+
scale_color_manual(values=c("darkblue","red")) +
scale_x_continuous(breaks=c(1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008))+
labs(y= "Number of Athletes", title = "Total Number of Male and Female Olympians From 1976-2008") +
theme(plot.title = element_text(hjust = 0.5))
Olympics_Gender_TimePlot

# Save as gif:
anim_save("Olympics_Gender_LinePlot2.gif")
Athlete_time_plot<-olympics %>%
group_by(Year) %>%
summarize(Athletes = length(unique(Athlete)))%>%
ggplot(aes(x = Year, y = Athletes)) +
geom_line() +
geom_point() +
scale_color_manual(values=c("red")) +
scale_x_continuous(breaks=c(1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008))+
labs(x = "Year", y = "Number of Athletes", title = "Number of Athletes Participating in the Olympics Overtime (1976-2008)")+
theme_grey()
Athlete_time_plot

#Try a word cloud---------------------------------------------------
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 3.6.3
## Loading required package: RColorBrewer
library(RColorBrewer)
library(wordcloud2)
## Warning: package 'wordcloud2' was built under R version 3.6.3
library(tm)
## Warning: package 'tm' was built under R version 3.6.3
## Loading required package: NLP
## Warning: package 'NLP' was built under R version 3.6.3
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
#Vector containing only text
Sport_cloud<- olympics$Sport
##create a corpus
olympicsSportCloud<-Corpus(VectorSource(Sport_cloud))
#create document term matrix
dtm <- TermDocumentMatrix(olympicsSportCloud)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
Sportcloud_df <- data.frame(word = names(words),freq=words)
#Generate word cloud
set.seed(1234) # for reproducibility
wordcloud(words = Sportcloud_df$word, freq = Sportcloud_df$freq, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.35, scale = c(3.5, 0.60),
colors=brewer.pal(8, "Dark2"))

set.seed(1234)
wordcloud2(data=Sportcloud_df, size=1.6, color= "random-dark", shape ='circle' )
#Create a highest Medal Type map----------------------------------------------------------------------
olympic_Gold <- olympics %>%
filter(Medal == "Gold") %>%
group_by(Country) %>%
summarize(Medal_Total = length(Medal)) %>%
arrange(desc(Medal_Total)) %>%
mutate(region = str_trim(Country),Medal_Total = str_trim(Medal_Total))
olympic_Silver <- olympics %>%
group_by(Country) %>%
filter(Medal == "Silver") %>%
summarize(Medal_Total = length(Medal)) %>%
arrange(desc(Medal_Total)) %>%
mutate(region = str_trim(Country),Medal_Total = str_trim(Medal_Total)) %>%
filter(!Country %in% olympic_Gold$Country) %>%
mutate(Medal_Total = "Silver")
olympic_Bronze <- olympics %>%
group_by(Country) %>%
filter(Medal == "Bronze") %>%
summarize(Medal_Total = length(Medal)) %>%
arrange(desc(Medal_Total)) %>%
mutate(region = str_trim(Country),Medal_Total = str_trim(Medal_Total)) %>%
filter(!(region %in% olympic_Gold$region & region %in% olympic_Silver$region) )%>%
mutate(Medal_Total = "Bronze")
olympic_Gold$Medal_Total <- "Gold"
Medal_Total <- rbind(olympic_Gold, olympic_Silver)
Medal_Total <- rbind(Medal_Total, olympic_Bronze)
Medal_Total$region[which(Medal_Total$region == "United States")] = "USA"
world_map <- map_data("world")
olympic_world_map <- left_join(world_map, Medal_Total, by ='region')
olympic_world_map$Medal_Total[is.na(olympic_world_map$Medal_Total)] <- "No Medal"
olympic_world_map$Medal_Total<-factor(olympic_world_map$Medal_Total, levels = c("Gold","Silver", "Bronze", "No Medal" ))
#Create a map for overlay
#winning_Countries<-olympic_world_map%>%dplyr::filter(!Medal_Total=="No Medal")
ggplot(data = olympic_world_map, aes(x=long, y=lat, group = group, fill=Medal_Total)) +
geom_polygon(colour='black') +
scale_fill_manual(name= "Medal Won", values = c("gold", "darkgrey", "#E69F00", "white"))+
labs(x= "longtude", y= "latitude",title = 'Highest Medal Type Won Per Country in Olympic Games (1976-2008)') +
theme(
panel.background = element_rect(fill = "lightblue",
colour = "lightblue",
size = 0.5, linetype = "solid"),
panel.grid.major = element_line(size = 0.5, linetype = 'solid',
colour = "white"),
panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
colour = "white")+
theme(legend.position="right"))

#Create Womens medal map-----------------------------------------------------------------------
Womens_olympic_Gold <- olympics %>%
filter(Medal == "Gold", Gender=="Women") %>%
group_by(Country) %>%
summarize(Medal_Total = length(Medal)) %>%
arrange(desc(Medal_Total)) %>%
mutate(region = str_trim(Country),Medal_Total = str_trim(Medal_Total))
Womens_olympic_Silver <- olympics %>%
group_by(Country) %>%
filter(Medal == "Silver", Gender=="Women") %>%
summarize(Medal_Total = length(Medal)) %>%
arrange(desc(Medal_Total)) %>%
mutate(region = str_trim(Country),Medal_Total = str_trim(Medal_Total)) %>%
filter(!Country %in% olympic_Gold$Country) %>%
mutate(Medal_Total = "Silver")
Womens_olympic_Bronze <- olympics %>%
group_by(Country) %>%
filter(Medal == "Bronze", Gender=="Women") %>%
summarize(Medal_Total = length(Medal)) %>%
arrange(desc(Medal_Total)) %>%
mutate(region = str_trim(Country),Medal_Total = str_trim(Medal_Total)) %>%
filter(!(region %in% olympic_Gold$region & region %in% olympic_Silver$region) )%>%
mutate(Medal_Total = "Bronze")
Womens_olympic_Gold$Medal_Total <- "Gold"
Medal_Total <- rbind(Womens_olympic_Gold, Womens_olympic_Silver)
Medal_Total <- rbind(Medal_Total, Womens_olympic_Bronze)
Medal_Total$region[which(Medal_Total$region == "United States")] = "USA"
world_map <- map_data("world")
olympic_world_map <- left_join(world_map, Medal_Total, by ='region')
olympic_world_map$Medal_Total[is.na(olympic_world_map$Medal_Total)] <- "No Medal"
olympic_world_map$Medal_Total<-factor(olympic_world_map$Medal_Total, levels = c("Gold","Silver", "Bronze", "No Medal" ))
#Create a map for overlay
#winning_Countries<-olympic_world_map%>%dplyr::filter(!Medal_Total=="No Medal")
ggplot(data = olympic_world_map, aes(x=long, y=lat, group = group, fill=Medal_Total)) +
geom_polygon(colour='black') +
scale_fill_manual(name= "Medal Won", values = c("gold", "darkgrey", "#E69F00", "white"))+
labs(x= "longtude", y= "latitude",title = 'Highest Medal Type Won for Women Per Country in Olympic Games (1976-2008)') +
theme(
panel.background = element_rect(fill = "lightblue",
colour = "lightblue",
size = 0.5, linetype = "solid"),
panel.grid.major = element_line(size = 0.5, linetype = 'solid',
colour = "white"),
panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
colour = "white")+
theme(legend.position="right"))

#Create a Mens Map----------------------------------------------------------------------------------
Mens_olympic_Gold <- olympics %>%
filter(Medal == "Gold", Gender=="Men") %>%
group_by(Country) %>%
summarize(Medal_Total = length(Medal)) %>%
arrange(desc(Medal_Total)) %>%
mutate(region = str_trim(Country),Medal_Total = str_trim(Medal_Total))
Mens_olympic_Silver <- olympics %>%
group_by(Country) %>%
filter(Medal == "Silver", Gender=="Men") %>%
summarize(Medal_Total = length(Medal)) %>%
arrange(desc(Medal_Total)) %>%
mutate(region = str_trim(Country),Medal_Total = str_trim(Medal_Total)) %>%
filter(!Country %in% olympic_Gold$Country) %>%
mutate(Medal_Total = "Silver")
Mens_olympic_Bronze <- olympics %>%
group_by(Country) %>%
filter(Medal == "Bronze", Gender=="Men") %>%
summarize(Medal_Total = length(Medal)) %>%
arrange(desc(Medal_Total)) %>%
mutate(region = str_trim(Country),Medal_Total = str_trim(Medal_Total)) %>%
filter(!(region %in% olympic_Gold$region & region %in% olympic_Silver$region) )%>%
mutate(Medal_Total = "Bronze")
Mens_olympic_Gold$Medal_Total <- "Gold"
Medal_Total <- rbind(Mens_olympic_Gold, Mens_olympic_Silver)
Medal_Total <- rbind(Medal_Total, Mens_olympic_Bronze)
Medal_Total$region[which(Medal_Total$region == "United States")] = "USA"
world_map <- map_data("world")
olympic_world_map <- left_join(world_map, Medal_Total, by ='region')
olympic_world_map$Medal_Total[is.na(olympic_world_map$Medal_Total)] <- "No Medal"
olympic_world_map$Medal_Total<-factor(olympic_world_map$Medal_Total, levels = c("Gold","Silver", "Bronze", "No Medal" ))
#Create a map for overlay
#winning_Countries<-olympic_world_map%>%dplyr::filter(!Medal_Total=="No Medal")
ggplot(data = olympic_world_map, aes(x=long, y=lat, group = group, fill=Medal_Total)) +
geom_polygon(colour='black') +
scale_fill_manual(name= "Medal Won", values = c("gold", "darkgrey", "#E69F00", "white"))+
labs(x= "longtude", y= "latitude",title = 'Highest Medal Type Won for Men Per Country in Olympic Games (1976-2008)') +
theme(
panel.background = element_rect(fill = "lightblue",
colour = "lightblue",
size = 0.5, linetype = "solid"),
panel.grid.major = element_line(size = 0.5, linetype = 'solid',
colour = "white"),
panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
colour = "white")+
theme(legend.position="right"))
