This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
#install.packages("repr")
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
library(readr)
library(plyr)
library(rworldmap)
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.4.4
## ### Welcome to rworldmap ###
## For a short introduction type : vignette('rworldmap')
library(repr)
## Warning: package 'repr' was built under R version 3.4.4
You can also embed plots, for example:
# loading file in R
summer<- read.csv(file = "~/Desktop/DATASET/olympic-games/summer.csv")
# loading file in R
head(summer)
## Year City Sport Discipline Athlete Country Gender
## 1 1896 Athens Aquatics Swimming HAJOS, Alfred HUN Men
## 2 1896 Athens Aquatics Swimming HERSCHMANN, Otto AUT Men
## 3 1896 Athens Aquatics Swimming DRIVAS, Dimitrios GRE Men
## 4 1896 Athens Aquatics Swimming MALOKINIS, Ioannis GRE Men
## 5 1896 Athens Aquatics Swimming CHASAPIS, Spiridon GRE Men
## 6 1896 Athens Aquatics Swimming CHOROPHAS, Efstathios GRE Men
## Event Medal
## 1 100M Freestyle Gold
## 2 100M Freestyle Silver
## 3 100M Freestyle For Sailors Bronze
## 4 100M Freestyle For Sailors Gold
## 5 100M Freestyle For Sailors Silver
## 6 1200M Freestyle Bronze
str(summer)
## 'data.frame': 31165 obs. of 9 variables:
## $ Year : int 1896 1896 1896 1896 1896 1896 1896 1896 1896 1896 ...
## $ City : Factor w/ 22 levels "Amsterdam","Antwerp",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ Sport : Factor w/ 43 levels "Aquatics","Archery",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Discipline: Factor w/ 67 levels "Archery","Artistic G.",..: 50 50 50 50 50 50 50 50 50 50 ...
## $ Athlete : Factor w/ 22762 levels "AABYE, Edgar",..: 7412 8021 4894 12438 3157 3339 7412 503 3339 14271 ...
## $ Country : Factor w/ 148 levels "","AFG","AHO",..: 60 9 54 54 54 54 60 54 54 9 ...
## $ Gender : Factor w/ 2 levels "Men","Women": 1 1 1 1 1 1 1 1 1 1 ...
## $ Event : Factor w/ 666 levels "- 47.63KG (Flyweight)",..: 79 79 80 80 80 105 105 105 162 162 ...
## $ Medal : Factor w/ 3 levels "Bronze","Gold",..: 2 3 1 2 3 1 2 3 1 2 ...
count=c(1)
summer$Medalcount=count
summer$Athlete=as.character(summer$Athlete)
## Warning: package 'maps' was built under R version 3.4.4
##
## Attaching package: 'maps'
## The following object is masked from 'package:plyr':
##
## ozone
## 209 codes from your data successfully matched countries in the map
## 140 codes from your data failed to match with a country code in the map
## 159 codes from the map weren't represented in your data
## You asked for 7 quantiles, only 4 could be created in quantiles classification
Countries=as.data.frame(table(summer$Country,summer$Medalcount))
colnames(Countries)=c("Country","a","Medalcount")
Countries=Countries[order(-Countries$Medalcount),]
CountriesFilter=head(Countries,n=5)
topCountryFilter=summer[summer$Country %in% CountriesFilter$Country,]
options(repr.plot.width=6, repr.plot.height=3)
x=ddply(topCountryFilter, .(Country,Year), numcolwise(sum))
ggplot(x,aes(Year,Medalcount,color=Country,group=Country))+geom_point()+geom_line()
y=ddply(topCountryFilter, .(Country,Medal), numcolwise(sum))
ggplot(y,aes(x=reorder(Country,Medalcount),y=Medalcount,fill=Medal,group=Medal))+geom_bar(stat='identity')
tab=as.data.frame(table(summer$Athlete,summer$Medalcount))
colnames(tab)=c("Athlete","a","Medalcount")
topAthelete=tab[order(-tab$Medalcount),]
topAthelete=head(topAthelete,n=20)
topAthelete$Athlete=as.character(topAthelete$Athlete)
topAtheleteFilter=summer[summer$Athlete %in% topAthelete$Athlete,]
y=ddply(topAtheleteFilter, .(Athlete,Medal), numcolwise(sum))
ggplot(y,aes(x=reorder(Athlete,Medalcount),y=Medalcount,fill=Medal,group=Medal))+geom_bar(stat='identity') +coord_flip()
#Which countries produce the most successful candidates
y=ddply(topAtheleteFilter, .(Country,Medal), numcolwise(sum))
ggplot(y,aes(x=reorder(Country,Medalcount),y=Medalcount,fill=Medal,group=Medal))+geom_bar(stat='identity')
# which sports produces most atheletes
options(repr.plot.width=6, repr.plot.height=3)
y=ddply(topAtheleteFilter, .(Sport,Medal), numcolwise(sum))
ggplot(y,aes(x=reorder(Sport,Medalcount),y=Medalcount,fill=Medal,group=Medal))+geom_bar(stat='identity')
# Total number of medals per sport
options(repr.plot.width=6, repr.plot.height=6)
y=ddply(summer, .(Sport,Medal), numcolwise(sum))
ggplot(y,aes(x=reorder(Sport,Medalcount),y=Medalcount,fill=Medal,group=Medal))+geom_bar(stat='identity') +coord_flip()
Note that the
echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.