This application is used for webscraping the data from ‘www.transfermarkt.com’ of the world’s best players ‘Messi’ and ‘Ronaldo’. The function is written in R and tabularizes and plots the number of goals scored or assists made by Messi or Ronaldo.
User must input the name of the player as ‘cristiano-ronaldo’ with id =8198 or ‘lionel-messi’ with id=28003 and the season whos data the user wants to see.

The packages used are:
-httr: For making a connection to the website
-XML: For Extracting the tabular data for the website(webscraping)
-ggplot2: For making graphical plots
-dplyr: For manipulating the data tables
-xtable: For Tabularizing Data Frame

library(httr)
library(XML)
library(ggplot2)
library(dplyr)
library(xtable)

The R function which does the job of extracting the data from the website and tabularizing the data:
Content function takes an url as input and returns HTML Source code as output.
The key function used here is the readHTMLTable function of the XML package which takes the content as input and extracts the HTML Table data from it. Then the function creates a data frame which we use for our analysis.

playerstats<-function(name=character(),id=integer(),seasons=2014)
{   
    season<-gsub(pattern="-(.*)",replacement="",seasons)
    seasonend<-gsub(pattern="(.*)-",replacement="",seasons)
    i<-NULL;finaldata<-data.frame()
    for(i in 1:(as.numeric(seasonend)-as.numeric(season)))
    {
    GET(paste("http://www.transfermarkt.com/",name,"/leistungsdaten/spieler/",id,"/saison/",season,"/plus/",sep=""))->htmlcode
    readHTMLTable(content(htmlcode))->tables
    tables[4]->playerdata
    as.data.frame(playerdata)->playerdata
    playerdata<-playerdata[,-1]
    names(playerdata)<-c("Comp","Aps","Goals","Asts","YC","YC/RC","RC","Mins")
    gsub(pattern="(20)|(19)",replacement="",season)->temp
    as.numeric(temp)+1->temp
    if((temp/10)<1)
    {
        season<-gsub(pattern="(.*)",replacement=paste(season,"-0",temp,sep=""),temp)
    }
    if((temp/10)>=1)
    {
        season<-gsub(pattern="(.*)",replacement=paste(season,"-",temp,sep=""),temp)
    }
    playerdata<-cbind(playerdata,Season=season)
    playerdata<-playerdata[,c(1,9,2:8)]

    if(i==1)
    {finaldata=playerdata}
    finaldata<-merge(finaldata,playerdata,all=T)
    finaldata<-arrange(finaldata,desc(Season))
    season<-gsub(pattern="-(.*)",replacement="",season)
    season<-as.numeric(season)+1
}
finaldata<-cbind(Player= paste(toupper(substring(gsub(pattern="(.*)-",replacement="",name), 1,1)),substring(gsub(pattern="(.*)-",replacement="",name),2),sep=""),finaldata)
finaldata$Mins<-gsub(pattern="\\.|'",replacement="",finaldata$Mins)
j<-NULL
for(j in 4:length(finaldata[1,]))
{
    finaldata[,j]<-gsub(pattern="-",replacement=0,finaldata[,j])
    finaldata[,j]<-as.numeric(finaldata[,j])
}
finaldata
}

We use the R function to extract the data of the 2 players of the seasons 2009-2015.

cr7data<-playerstats("cristiano-ronaldo",id=8198,seasons="2009-2015")
messidata<-playerstats("lionel-messi",id=28003,seasons="2009-2015")

Viewing Collected Data in Dataframes:

#MESSI
messidata
##    Player             Comp  Season Aps Goals Asts YC YC/RC RC Mins
## 1   Messi Champions League 2014-15  13    10    6  1     0  0 1145
## 2   Messi     Copa del Rey 2014-15   6     5    4  1     0  0  540
## 3   Messi          La Liga 2014-15  38    43   21  4     0  0 3375
## 4   Messi Champions League 2013-14   7     8    1  0     0  0  630
## 5   Messi     Copa del Rey 2013-14   6     5    3  1     0  0  477
## 6   Messi          La Liga 2013-14  31    28   12  2     0  0 2498
## 7   Messi        Supercopa 2013-14   2     0    0  0     0  0  135
## 8   Messi Champions League 2012-13  11     8    3  0     0  0  827
## 9   Messi     Copa del Rey 2012-13   5     4    1  1     0  0  442
## 10  Messi          La Liga 2012-13  32    46   14  1     0  0 2629
## 11  Messi        Supercopa 2012-13   2     2    0  0     0  0  180
## 12  Messi Champions League 2011-12  11    14    9  2     0  0  990
## 13  Messi   Club World Cup 2011-12   2     2    1  0     0  0  180
## 14  Messi     Copa del Rey 2011-12   7     3    4  1     0  0  514
## 15  Messi          La Liga 2011-12  37    50   20  6     0  0 3270
## 16  Messi        Supercopa 2011-12   2     3    2  0     0  0  180
## 17  Messi    UEFA Supercup 2011-12   1     1    1  0     0  0   90
## 18  Messi Champions League 2010-11  13    12    4  0     0  0 1050
## 19  Messi     Copa del Rey 2010-11   7     7    3  1     0  0  542
## 20  Messi          La Liga 2010-11  33    31   21  4     0  0 2862
## 21  Messi        Supercopa 2010-11   2     3    0  0     0  0  129
## 22  Messi Champions League 2009-10  11     8    0  0     0  0  985
## 23  Messi   Club World Cup 2009-10   2     2    0  1     0  0  158
## 24  Messi     Copa del Rey 2009-10   3     1    0  1     0  0  212
## 25  Messi          La Liga 2009-10  35    34   13  3     0  0 2841
## 26  Messi        Supercopa 2009-10   1     2    0  0     0  0   90
## 27  Messi    UEFA Supercup 2009-10   1     0    1  1     0  0  120
#RONALDO
cr7data
##     Player             Comp  Season Aps Goals Asts YC YC/RC RC Mins
## 1  Ronaldo Champions League 2014-15  12    10    4  1     0  0 1064
## 2  Ronaldo          La Liga 2014-15  35    48   16  4     0  1 3096
## 3  Ronaldo     Copa del Rey 2014-15   2     1    0  0     0  0  118
## 4  Ronaldo        Supercopa 2014-15   2     0    0  1     0  0   89
## 5  Ronaldo   Club World Cup 2014-15   2     0    2  0     0  0  180
## 6  Ronaldo    UEFA Supercup 2014-15   1     2    0  0     0  0   90
## 7  Ronaldo Champions League 2013-14  11    17    6  1     0  0  991
## 8  Ronaldo          La Liga 2013-14  30    31   11  4     0  1 2537
## 9  Ronaldo     Copa del Rey 2013-14   6     3    2  2     0  0  495
## 10 Ronaldo Champions League 2012-13  12    12    1  1     0  0 1080
## 11 Ronaldo          La Liga 2012-13  34    34   11  9     0  0 2716
## 12 Ronaldo     Copa del Rey 2012-13   7     7    2  3     0  1  655
## 13 Ronaldo        Supercopa 2012-13   2     2    0  0     0  0  180
## 14 Ronaldo Champions League 2011-12  10    10    4  1     0  0  930
## 15 Ronaldo          La Liga 2011-12  38    46   13  4     0  0 3353
## 16 Ronaldo     Copa del Rey 2011-12   5     3    0  1     0  0  437
## 17 Ronaldo        Supercopa 2011-12   2     1    0  1     0  0  180
## 18 Ronaldo Champions League 2010-11  12     6    4  2     0  0 1018
## 19 Ronaldo          La Liga 2010-11  34    40   13  2     0  0 2914
## 20 Ronaldo     Copa del Rey 2010-11   8     7    1  3     0  0  684
## 21 Ronaldo Champions League 2009-10   6     7    2  0     0  0  450
## 22 Ronaldo          La Liga 2009-10  29    26   11  3     1  1 2462

We then clean the data and combine the datasets of the 2 players.

bothdata<-merge(messidata,cr7data,all=T)
bothdata<-group_by(bothdata,Season,Player)
allgoals<-summarise(bothdata,Goals=sum(Goals))
allasts<-summarise(bothdata,Asts=sum(Asts))

Summarizing the Dataset of Goals and Assists.

allgoals[order(desc(allgoals$Season)),]
## Source: local data frame [12 x 3]
## Groups: Season
## 
##     Season  Player Goals
## 1  2014-15   Messi    58
## 2  2014-15 Ronaldo    61
## 3  2013-14   Messi    41
## 4  2013-14 Ronaldo    51
## 5  2012-13   Messi    60
## 6  2012-13 Ronaldo    55
## 7  2011-12   Messi    73
## 8  2011-12 Ronaldo    60
## 9  2010-11   Messi    53
## 10 2010-11 Ronaldo    53
## 11 2009-10   Messi    47
## 12 2009-10 Ronaldo    33
allasts[order(desc(allasts$Season)),]
## Source: local data frame [12 x 3]
## Groups: Season
## 
##     Season  Player Asts
## 1  2014-15   Messi   31
## 2  2014-15 Ronaldo   22
## 3  2013-14   Messi   16
## 4  2013-14 Ronaldo   19
## 5  2012-13   Messi   18
## 6  2012-13 Ronaldo   14
## 7  2011-12   Messi   37
## 8  2011-12 Ronaldo   17
## 9  2010-11   Messi   28
## 10 2010-11 Ronaldo   18
## 11 2009-10   Messi   14
## 12 2009-10 Ronaldo   13

After refining the datasets we plot the data graphically.

PLOTTING TOTAL GOALS SCORED

goals<-ggplot(allgoals,aes(Season,Goals))
goals+geom_point(aes(color=Player),size=3.5)+geom_smooth(method="lm",linetype=2,size=1,aes(color=Player,group=Player),fill=NA)+geom_line(aes(color=Player,group=Player),size=1,alpha=.3)+scale_y_continuous(breaks=as.numeric(allgoals$Goals),limits=c(min(as.numeric(allgoals$Goals))-2,max(as.numeric(allgoals$Goals))+2))+labs(x="Season",y="Goals",title="MESSI VS. RONALDO, 2009 TO PRESENT, GOALS")+ scale_color_manual(values = c("steelblue","red"))+theme(axis.text.x=element_text(face="bold",color="darkblue"),axis.text.y=element_text(face="bold",color="darkgreen"),axis.title=element_text(face="bold"),legend.position=c(1,1),legend.justification=c(1,1),plot.title = element_text(face = "bold"))

PLOTTING TOTAL ASSISTS MADE

asts<-ggplot(allasts,aes(Season,Asts))
asts+geom_point(aes(color=Player),size=3.5)+geom_smooth(method="lm",linetype=2,size=1,aes(color=Player,group=Player),fill=NA)+geom_line(aes(color=Player,group=Player),size=1,alpha=.3)+scale_y_continuous(breaks=as.numeric(allasts$Asts),limits=c(min(as.numeric(allasts$Asts))-2,max(as.numeric(allasts$Asts))+2))+labs(x="Season",y="Assists",title="MESSI VS. RONALDO, 2009 TO PRESENT, ASSISTS")+ scale_color_manual(values = c("steelblue","red"))+theme(axis.text.x=element_text(face="bold",color="darkblue"),axis.text.y=element_text(face="bold",color="darkgreen"),axis.title=element_text(face="bold"),legend.position=c(1,1),legend.justification=c(1,1),plot.title = element_text(face = "bold"))

We then individually analyze the data, ie. we see how many assists and goals each of them have score in the Champions League and in Domestic Leagues.

LA LIGA GOALS

cr7data<-arrange(cr7data,Season)
cr7laliga<-filter(cr7data,Comp=="La Liga")
messilaliga<-filter(messidata,Comp=="La Liga")
laliga<-merge(messilaliga,cr7laliga,all=T)

Plotting goals scored in La Liga

lgoals<-ggplot(laliga,aes(Season,as.numeric(Goals)))
lgoals+geom_point(aes(color=Player),size=3.5)+geom_smooth(method="lm",linetype=2,size=1,aes(color=Player,group=Player),fill=NA)+geom_line(aes(color=Player,group=Player),size=1,alpha=.3)+scale_y_continuous(breaks=as.numeric(laliga$Goals),limits=c(min(as.numeric(laliga$Goals))-2,max(as.numeric(laliga$Goals))+2))+labs(x="Season",y="Goals",title="La Liga: MESSI VS. RONALDO, 2009 -> PRESENT, GOALS")+ scale_color_manual(values = c("steelblue","red"))+theme(axis.text.x=element_text(face="bold",color="darkblue"),axis.text.y=element_text(face="bold",color="darkgreen"),axis.title=element_text(face="bold"),legend.justification=c(1,1),plot.title = element_text(face = "bold"))

Plotting assists made in La Liga

lassists<-ggplot(laliga,aes(Season,as.numeric(Asts)))
lassists+geom_point(aes(color=Player),size=3.5)+geom_smooth(method="lm",linetype=2,size=1,aes(color=Player,group=Player),fill=NA)+geom_line(aes(color=Player,group=Player),size=1,alpha=.3)+scale_y_continuous(breaks=as.numeric(laliga$Asts),limits=c(min(as.numeric(laliga$Asts))-2,max(as.numeric(laliga$Asts))+2))+labs(x="Season",y="Assists",title="La Liga: MESSI VS. RONALDO, 2009 -> PRESENT, ASSISTS")+ scale_color_manual(values = c("steelblue","red"))+theme(axis.text.x=element_text(face="bold",color="darkblue"),axis.text.y=element_text(face="bold",color="darkgreen"),axis.title=element_text(face="bold"),legend.justification=c(1,1),plot.title = element_text(face = "bold"))

Analyzing Champions League Data.

messicl<-filter(messidata,Comp=="Champions League")
cr7cl<-filter(cr7data,Comp=="Champions League")
merge(messicl,cr7cl,all=T)->cl

Plotting Champions League Goals.

clgoals<-ggplot(cl,aes(Season,as.numeric(Goals)))
clgoals+geom_point(aes(color=Player),size=3.5)+geom_smooth(method="lm",linetype=2,size=1,aes(color=Player,group=Player),fill=NA)+geom_line(aes(color=Player,group=Player),size=1,alpha=.3)+scale_y_continuous(breaks=as.numeric(cl$Goals),limits=c(min(as.numeric(cl$Goals))-2,max(as.numeric(cl$Goals))+2))+labs(x="Season",y="Goals",title="CL: MESSI VS. RONALDO, 2009 -> PRESENT, GOALS")+ scale_color_manual(values = c("steelblue","red"))+theme(axis.text.x=element_text(face="bold",color="darkblue"),axis.text.y=element_text(face="bold",color="darkgreen"),axis.title=element_text(face="bold"),legend.position=c(1,1),legend.justification=c(1,1),plot.title = element_text(face = "bold"))

Plotting Champions League Assists

classists<-ggplot(cl,aes(Season,as.numeric(Asts)))
classists+geom_point(aes(color=Player),size=3.5)+geom_smooth(method="lm",linetype=2,size=1,aes(color=Player,group=Player),fill=NA)+geom_line(aes(color=Player,group=Player),size=1,alpha=.3)+scale_y_continuous(breaks=as.numeric(cl$Asts),limits=c(min(as.numeric(cl$Asts))-2,max(as.numeric(cl$Asts))+2))+labs(x="Season",y="Assists",title="CL: MESSI VS. RONALDO, 2009 -> PRESENT, ASSISTS")+ scale_color_manual(values = c("steelblue","red"))+theme(axis.text.x=element_text(face="bold",color="darkblue"),axis.text.y=element_text(face="bold",color="darkgreen"),axis.title=element_text(face="bold"),legend.position=c(1,1),legend.justification=c(1,1),plot.title = element_text(face = "bold"))

Tabularizing the Data

Season Player Liga_Goals Liga_Asts Liga_Aps Liga_Mins CL_Goals CL_Asts CL_Aps CL_Mins
2014-15 Messi 43 21 38 3375 10 6 13 1145
2014-15 Ronaldo 48 16 35 3096 10 4 12 1064
2013-14 Messi 28 12 31 2498 8 1 7 630
2013-14 Ronaldo 31 11 30 2537 17 6 11 991
2012-13 Messi 46 14 32 2629 8 3 11 827
2012-13 Ronaldo 34 11 34 2716 12 1 12 1080
2011-12 Messi 50 20 37 3270 14 9 11 990
2011-12 Ronaldo 46 13 38 3353 10 4 10 930
2010-11 Messi 31 21 33 2862 12 4 13 1050
2010-11 Ronaldo 40 13 34 2914 6 4 12 1018
2009-10 Messi 34 13 35 2841 8 0 11 985
2009-10 Ronaldo 26 11 29 2462 7 2 6 450

Code for Tabularizing Data

cr7data<-arrange(cr7data,desc(Season))
crmaindf<-data.frame(Season=c('2014-15','2013-14','2012-13','2011-12','2010-11','2009-10'),Player=rep('Ronaldo',times = 6),Liga_Goals=filter(cr7data,Comp=='La Liga')$Goals,Liga_Asts=filter(cr7data,Comp=='La Liga')$Asts,Liga_Aps=filter(cr7data,Comp=='La Liga')$Aps,Liga_Mins=filter(cr7data,Comp=='La Liga')$Mins,CL_Goals=filter(cr7data,Comp=='Champions League')$Goals,CL_Asts=filter(cr7data,Comp=='Champions League')$Asts,CL_Aps=filter(cr7data,Comp=='Champions League')$Aps,CL_Mins=filter(cr7data,Comp=='Champions League')$Mins)

messimaindf<-data.frame(Season=c('2014-15','2013-14','2012-13','2011-12','2010-11','2009-10'),Player=rep('Messi',times = 6),Liga_Goals=filter(messidata,Comp=='La Liga')$Goals,Liga_Asts=filter(messidata,Comp=='La Liga')$Asts,Liga_Aps=filter(messidata,Comp=='La Liga')$Aps,Liga_Mins=filter(messidata,Comp=='La Liga')$Mins,CL_Goals=filter(messidata,Comp=='Champions League')$Goals,CL_Asts=filter(messidata,Comp=='Champions League')$Asts,CL_Aps=filter(messidata,Comp=='Champions League')$Aps,CL_Mins=filter(messidata,Comp=='Champions League')$Mins)

merge(messimaindf,crmaindf,all=T)->mainmerge
mainmerge<-arrange(mainmerge,desc(Season))
for(i in 3:dim(mainmerge)[2])
  {
    mainmerge[,i]<-as.character(mainmerge[,i])
  }
xt<-xtable(mainmerge,)
print(xt,type="html",include.rownames=FALSE)