#install libraries
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3
library(tidyselect)
## Warning: package 'tidyselect' was built under R version 3.6.3
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.6.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Warning: package 'plotly' was built under R version 3.6.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(data.table)
## Warning: package 'data.table' was built under R version 3.6.3
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.6.3
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(forcats)
## Warning: package 'forcats' was built under R version 3.6.3
library(rvest)
## Warning: package 'rvest' was built under R version 3.6.3
## Loading required package: xml2
## Warning: package 'xml2' was built under R version 3.6.3
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.6.3
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
## 
## Attaching package: 'ggmap'
## The following object is masked from 'package:magrittr':
## 
##     inset
## The following object is masked from 'package:plotly':
## 
##     wind
library(stringr)
library("maps")
## Warning: package 'maps' was built under R version 3.6.3
## [1] 0
##       ï..City Year     Sport Discipline                         Event
## 1     Beijing 2008 Taekwondo  Taekwondo                       - 58 kg
## 2 Los Angeles 1984    Boxing     Boxing                       71-75kg
## 3 Los Angeles 1984    Boxing     Boxing 75 - 81kg (light-heavyweight)
## 4   Barcelona 1992 Athletics  Athletics                         1500m
## 5   Barcelona 1992    Boxing     Boxing     54 - 57kg (featherweight)
## 6     Atlanta 1996 Athletics  Athletics                         1500m
##               Athlete Gender Country_Code     Country Event_gender  Medal Gold
## 1    NIKPAI, Rohullah    Men          AFG Afghanistan            M Bronze    0
## 2      ZAOUI, Mohamed    Men          ALG     Algeria            M Bronze    0
## 3    MOUSSA, Mustapha    Men          ALG     Algeria            M Bronze    0
## 4  BOULMERKA, Hassiba  Women          ALG     Algeria            W   Gold    1
## 5     SOLTANI, Hocine    Men          ALG     Algeria            M Bronze    0
## 6 MORCELI, Nourredine    Men          ALG     Algeria            M   Gold    1
##   Silver Bronze All
## 1      0      1   1
## 2      0      1   1
## 3      0      1   1
## 4      0      0   1
## 5      0      1   1
## 6      0      0   1
##       ï..City          Year             Sport            Discipline  
##  Beijing  :2042   Min.   :1976   Aquatics  :2210   Athletics  :1523  
##  Sydney   :2015   1st Qu.:1984   Athletics :1523   Swimming   :1422  
##  Athens   :1998   Median :1996   Rowing    :1377   Rowing     :1377  
##  Atlanta  :1859   Mean   :1994   Hockey    : 817   Hockey     : 817  
##  Barcelona:1705   3rd Qu.:2004   Gymnastics: 783   Handball   : 780  
##  Seoul    :1546   Max.   :2008   Handball  : 780   Artistic G.: 672  
##  (Other)  :4151                  (Other)   :7826   (Other)    :8725  
##                       Event                     Athlete        Gender    
##  hockey                  :  817   PHELPS, Michael   :   16   Men  :9388  
##  handball                :  780   ANDRIANOV, Nikolay:   12   Women:5928  
##  football                :  669   FISCHER, Birgit   :   12               
##  volleyball              :  647   NEMOV, Alexei     :   12               
##  basketball              :  646   THOMPSON, Jenny   :   12               
##  eight with coxswain (8+):  486   TORRES, Dara      :   12               
##  (Other)                 :11271   (Other)           :15240               
##   Country_Code           Country     Event_gender    Medal     
##  USA    :1992   United States:1992   M:8817       Bronze:5258  
##  RUS    :1882   Russia       :1882   W:5773       Gold  :5042  
##  GER    :1662   Germany      :1662   X: 726       Silver:5016  
##  AUS    : 798   Australia    : 798                             
##  CHN    : 679   China        : 679                             
##  ITA    : 486   Italy        : 486                             
##  (Other):7817   (Other)      :7817                             
##       Gold            Silver           Bronze            All   
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :1  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:1  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :1  
##  Mean   :0.3292   Mean   :0.3275   Mean   :0.3433   Mean   :1  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1  
## 
##          City Year     Sport Discipline                         Event
## 1     Beijing 2008 Taekwondo  Taekwondo                       - 58 kg
## 2 Los Angeles 1984    Boxing     Boxing                       71-75kg
## 3 Los Angeles 1984    Boxing     Boxing 75 - 81kg (light-heavyweight)
## 4   Barcelona 1992 Athletics  Athletics                         1500m
## 5   Barcelona 1992    Boxing     Boxing     54 - 57kg (featherweight)
## 6     Atlanta 1996 Athletics  Athletics                         1500m
##               Athlete Gender Country_Code     Country Event_gender  Medal Gold
## 1    NIKPAI, Rohullah    Men          AFG Afghanistan            M Bronze    0
## 2      ZAOUI, Mohamed    Men          ALG     Algeria            M Bronze    0
## 3    MOUSSA, Mustapha    Men          ALG     Algeria            M Bronze    0
## 4  BOULMERKA, Hassiba  Women          ALG     Algeria            W   Gold    1
## 5     SOLTANI, Hocine    Men          ALG     Algeria            M Bronze    0
## 6 MORCELI, Nourredine    Men          ALG     Algeria            M   Gold    1
##   Silver Bronze All
## 1      0      1   1
## 2      0      1   1
## 3      0      1   1
## 4      0      0   1
## 5      0      1   1
## 6      0      0   1
# Plot female vs male athletes
ggplot(Athlete_GenderCount, aes(x=Year, y=Athletes, group=Gender, color=Gender)) +
  geom_point(size=2) +
  geom_line() +
  geom_text(x=1996, y=500, label= "IOC Mandates 
            Women Competitors", color="red")+
  geom_text(x=1994, y= 950, label= "Games Are 
            Boycott Free", color = "Black")+
  scale_color_manual(values=c("Blue","Red")) +
  #scale_x_continuous(breaks=c(1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008))+
  labs(y= "Number of Athletes", title ="Total Number of Male and Female Olympians in the Summer Olympics 1976-2008") +
  theme(plot.title = element_text(hjust = 0.5))

#Make Top 5 female bar plot "Code Help from Jonathan"----------------------------------------------------------
Torres <- filter(olympics, Athlete == "TORRES, Dara")
thompson <- filter(olympics, Athlete == "THOMPSON, Jenny")
fischer <- filter(olympics, Athlete == "FISCHER, Birgit")
coughlin <- filter(olympics, Athlete == "COUGHLIN, Natalie")
Van_Almsick <- filter(olympics, Athlete == "VAN ALMSICK, Franziska")

five <- rbind(Torres, thompson, fischer, coughlin, Van_Almsick)

Female_Athlete <- group_by(five, Athlete, Country_Code, Medal) %>% summarize(All_Medals = sum(Bronze)+
                                                                       sum(Silver) + sum(Gold))

Female_Athlete$Country_Code[which(Female_Athlete$Country_Code == "USA")] = "us"
## Warning in `[<-.factor`(`*tmp*`, which(Female_Athlete$Country_Code == "USA"), :
## invalid factor level, NA generated
Female_Athlete$Country_Code[which(Female_Athlete$Country_Code == "RUS")] = "ru"
## Warning in `[<-.factor`(`*tmp*`, which(Female_Athlete$Country_Code == "RUS"), :
## invalid factor level, NA generated
Female_Athlete$Country_Code[which(Female_Athlete$Country_Code == "GER")] = "de"
## Warning in `[<-.factor`(`*tmp*`, which(Female_Athlete$Country_Code == "GER"), :
## invalid factor level, NA generated
Female_Athlete$Medal <- factor(Female_Athlete$Medal, levels = c("Bronze", "Silver", "Gold"))


Top5_Female_Winners <- ggplot(Female_Athlete, aes(x = reorder(Athlete, All_Medals), y = All_Medals, fill=Medal)) +
  geom_bar(stat="identity", colour= "black", size= .3, width= .4)+
  coord_flip() +
  scale_fill_manual(values = c("#E69F00", "grey", "gold"), guide = guide_legend(reverse = TRUE)) +
  scale_y_continuous(breaks=c(0,2,4,6,8,10,12,14,16))+
  labs(y="Number of Medals", x=" Olympian")+
  ggtitle("Top 5 Female Olympians from 1976-2008")+
  theme_bw()+
  theme(panel.border=element_rect(colour="black", fill=NA, size=1),
        legend.background = element_rect(colour="black", size=0.5),
        axis.ticks.y=element_blank(),
        panel.grid.minor = element_line(size = 0.1, linetype = 'dashed',
                                        colour = "grey"),
        panel.grid.major = element_line(size = 0.35, linetype = 'dashed',
                                        colour = "grey"))
Top5_Female_Winners

#Animate gender line plot-----------------------------------------------------------------------
library(gganimate) # for animation layer
## Warning: package 'gganimate' was built under R version 3.6.3
library(gifski) # for making the animation; restart R after installing
## Warning: package 'gifski' was built under R version 3.6.3
library(gapminder) # for gapminder data about countries
## Warning: package 'gapminder' was built under R version 3.6.3
Olympics_Gender_TimePlot <- ggplot(Athlete_GenderCount, aes(x=Year, y=Athletes, color=Gender)) +
  geom_point(aes(group = seq_along(Year))) +
  geom_point(size=2)+
  geom_line()  +
  geom_text(x=1996, y=500, label= "IOC Mandates 
            Women Competitors", color="red")+
  geom_text(x=1994, y= 950, label= "Games Are 
            Boycott Free", color = "Black")+
  transition_reveal(Year)+
  scale_color_manual(values=c("darkblue","red")) +
  scale_x_continuous(breaks=c(1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008))+
  labs(y= "Number of Athletes", title = "Total Number of Male and Female Olympians From 1976-2008") +
  theme(plot.title = element_text(hjust = 0.5))
Olympics_Gender_TimePlot

# Save as gif:
anim_save("Olympics_Gender_LinePlot2.gif")
Athlete_time_plot<-olympics %>%
  group_by(Year) %>%
  summarize(Athletes = length(unique(Athlete)))%>%
  ggplot(aes(x = Year, y = Athletes)) +
  geom_line() +
  geom_point() +
  scale_color_manual(values=c("red")) +
  scale_x_continuous(breaks=c(1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008))+
  labs(x = "Year", y = "Number of Athletes", title = "Number of Athletes Participating in the Olympics Overtime (1976-2008)")+
  theme_grey()  

Athlete_time_plot

#Try a word cloud---------------------------------------------------

library(wordcloud)
## Warning: package 'wordcloud' was built under R version 3.6.3
## Loading required package: RColorBrewer
library(RColorBrewer)
library(wordcloud2) 
## Warning: package 'wordcloud2' was built under R version 3.6.3
library(tm)
## Warning: package 'tm' was built under R version 3.6.3
## Loading required package: NLP
## Warning: package 'NLP' was built under R version 3.6.3
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
## 
##     annotate
#Vector containing only text
Sport_cloud<- olympics$Sport

##create a corpus
olympicsSportCloud<-Corpus(VectorSource(Sport_cloud))

#create document term matrix
dtm <- TermDocumentMatrix(olympicsSportCloud) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
Sportcloud_df <- data.frame(word = names(words),freq=words)

#Generate word cloud
set.seed(1234) # for reproducibility 
wordcloud(words = Sportcloud_df$word, freq = Sportcloud_df$freq, min.freq = 1,
          max.words=200, random.order=FALSE, rot.per=0.35, scale = c(3.5, 0.60),
          colors=brewer.pal(8, "Dark2"))

set.seed(1234)
wordcloud2(data=Sportcloud_df, size=1.6, color= "random-dark", shape ='circle' )
#Create a highest Medal Type map----------------------------------------------------------------------
olympic_Gold <- olympics %>%
  filter(Medal == "Gold") %>%
  group_by(Country) %>%
  summarize(Medal_Total = length(Medal)) %>%
  arrange(desc(Medal_Total)) %>% 
  mutate(region = str_trim(Country),Medal_Total = str_trim(Medal_Total))


olympic_Silver <- olympics %>%
  group_by(Country) %>%
  filter(Medal == "Silver") %>%
  summarize(Medal_Total = length(Medal)) %>%
  arrange(desc(Medal_Total)) %>% 
  mutate(region = str_trim(Country),Medal_Total = str_trim(Medal_Total)) %>%
  filter(!Country %in% olympic_Gold$Country) %>%
  mutate(Medal_Total = "Silver")

olympic_Bronze <- olympics %>%
  group_by(Country) %>%
  filter(Medal == "Bronze") %>%
  summarize(Medal_Total = length(Medal)) %>%
  arrange(desc(Medal_Total)) %>% 
  mutate(region = str_trim(Country),Medal_Total = str_trim(Medal_Total)) %>%
  filter(!(region %in% olympic_Gold$region & region %in% olympic_Silver$region) )%>%
  mutate(Medal_Total = "Bronze")

olympic_Gold$Medal_Total <- "Gold"


Medal_Total <- rbind(olympic_Gold, olympic_Silver)
Medal_Total <- rbind(Medal_Total, olympic_Bronze)

Medal_Total$region[which(Medal_Total$region == "United States")] = "USA"


world_map <- map_data("world")
olympic_world_map <- left_join(world_map, Medal_Total, by ='region')
olympic_world_map$Medal_Total[is.na(olympic_world_map$Medal_Total)] <- "No Medal"

olympic_world_map$Medal_Total<-factor(olympic_world_map$Medal_Total, levels = c("Gold","Silver", "Bronze", "No Medal" ))

#Create a map for overlay
#winning_Countries<-olympic_world_map%>%dplyr::filter(!Medal_Total=="No Medal")

ggplot(data = olympic_world_map, aes(x=long, y=lat, group = group, fill=Medal_Total)) +
  geom_polygon(colour='black') +
  scale_fill_manual(name= "Medal Won", values = c("gold", "darkgrey", "#E69F00", "white"))+
  labs(x= "longtude", y= "latitude",title = 'Highest Medal Type Won Per Country in Olympic Games (1976-2008)') +
  theme(
    panel.background = element_rect(fill = "lightblue",
                                    colour = "lightblue",
                                    size = 0.5, linetype = "solid"),
    panel.grid.major = element_line(size = 0.5, linetype = 'solid',
                                    colour = "white"), 
    panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
                                    colour = "white")+
  theme(legend.position="right"))

#Create Womens medal map-----------------------------------------------------------------------

Womens_olympic_Gold <- olympics %>%
  filter(Medal == "Gold", Gender=="Women") %>%
  group_by(Country) %>%
  summarize(Medal_Total = length(Medal)) %>%
  arrange(desc(Medal_Total)) %>% 
  mutate(region = str_trim(Country),Medal_Total = str_trim(Medal_Total))


Womens_olympic_Silver <- olympics %>%
  group_by(Country) %>%
  filter(Medal == "Silver", Gender=="Women") %>%
  summarize(Medal_Total = length(Medal)) %>%
  arrange(desc(Medal_Total)) %>% 
  mutate(region = str_trim(Country),Medal_Total = str_trim(Medal_Total)) %>%
  filter(!Country %in% olympic_Gold$Country) %>%
  mutate(Medal_Total = "Silver")

Womens_olympic_Bronze <- olympics %>%
  group_by(Country) %>%
  filter(Medal == "Bronze", Gender=="Women") %>%
  summarize(Medal_Total = length(Medal)) %>%
  arrange(desc(Medal_Total)) %>% 
  mutate(region = str_trim(Country),Medal_Total = str_trim(Medal_Total)) %>%
  filter(!(region %in% olympic_Gold$region & region %in% olympic_Silver$region) )%>%
  mutate(Medal_Total = "Bronze")

Womens_olympic_Gold$Medal_Total <- "Gold"


Medal_Total <- rbind(Womens_olympic_Gold, Womens_olympic_Silver)
Medal_Total <- rbind(Medal_Total, Womens_olympic_Bronze)

Medal_Total$region[which(Medal_Total$region == "United States")] = "USA"


world_map <- map_data("world")
olympic_world_map <- left_join(world_map, Medal_Total, by ='region')
olympic_world_map$Medal_Total[is.na(olympic_world_map$Medal_Total)] <- "No Medal"

olympic_world_map$Medal_Total<-factor(olympic_world_map$Medal_Total, levels = c("Gold","Silver", "Bronze", "No Medal" ))

#Create a map for overlay
#winning_Countries<-olympic_world_map%>%dplyr::filter(!Medal_Total=="No Medal")

ggplot(data = olympic_world_map, aes(x=long, y=lat, group = group, fill=Medal_Total)) +
  geom_polygon(colour='black') +
  scale_fill_manual(name= "Medal Won", values = c("gold", "darkgrey", "#E69F00", "white"))+
  labs(x= "longtude", y= "latitude",title = 'Highest Medal Type Won for Women Per Country in Olympic Games (1976-2008)') +
  theme(
    panel.background = element_rect(fill = "lightblue",
                                    colour = "lightblue",
                                    size = 0.5, linetype = "solid"),
    panel.grid.major = element_line(size = 0.5, linetype = 'solid',
                                    colour = "white"), 
    panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
                                    colour = "white")+
      theme(legend.position="right"))

#Create a Mens Map----------------------------------------------------------------------------------
Mens_olympic_Gold <- olympics %>%
  filter(Medal == "Gold", Gender=="Men") %>%
  group_by(Country) %>%
  summarize(Medal_Total = length(Medal)) %>%
  arrange(desc(Medal_Total)) %>% 
  mutate(region = str_trim(Country),Medal_Total = str_trim(Medal_Total))


Mens_olympic_Silver <- olympics %>%
  group_by(Country) %>%
  filter(Medal == "Silver", Gender=="Men") %>%
  summarize(Medal_Total = length(Medal)) %>%
  arrange(desc(Medal_Total)) %>% 
  mutate(region = str_trim(Country),Medal_Total = str_trim(Medal_Total)) %>%
  filter(!Country %in% olympic_Gold$Country) %>%
  mutate(Medal_Total = "Silver")

Mens_olympic_Bronze <- olympics %>%
  group_by(Country) %>%
  filter(Medal == "Bronze", Gender=="Men") %>%
  summarize(Medal_Total = length(Medal)) %>%
  arrange(desc(Medal_Total)) %>% 
  mutate(region = str_trim(Country),Medal_Total = str_trim(Medal_Total)) %>%
  filter(!(region %in% olympic_Gold$region & region %in% olympic_Silver$region) )%>%
  mutate(Medal_Total = "Bronze")

Mens_olympic_Gold$Medal_Total <- "Gold"


Medal_Total <- rbind(Mens_olympic_Gold, Mens_olympic_Silver)
Medal_Total <- rbind(Medal_Total, Mens_olympic_Bronze)

Medal_Total$region[which(Medal_Total$region == "United States")] = "USA"


world_map <- map_data("world")
olympic_world_map <- left_join(world_map, Medal_Total, by ='region')
olympic_world_map$Medal_Total[is.na(olympic_world_map$Medal_Total)] <- "No Medal"

olympic_world_map$Medal_Total<-factor(olympic_world_map$Medal_Total, levels = c("Gold","Silver", "Bronze", "No Medal" ))

#Create a map for overlay
#winning_Countries<-olympic_world_map%>%dplyr::filter(!Medal_Total=="No Medal")

ggplot(data = olympic_world_map, aes(x=long, y=lat, group = group, fill=Medal_Total)) +
  geom_polygon(colour='black') +
  scale_fill_manual(name= "Medal Won", values = c("gold", "darkgrey", "#E69F00", "white"))+
  labs(x= "longtude", y= "latitude",title = 'Highest Medal Type Won for Men Per Country in Olympic Games (1976-2008)') +
  theme(
    panel.background = element_rect(fill = "lightblue",
                                    colour = "lightblue",
                                    size = 0.5, linetype = "solid"),
    panel.grid.major = element_line(size = 0.5, linetype = 'solid',
                                    colour = "white"), 
    panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
                                    colour = "white")+
      theme(legend.position="right"))