This is the R MarkdowDocument that bring the inicial focus on study and analyse data from payers of Brazilian soccer championship 2017.
After each round the scout data is avilable on cartolaFC site, a fantasy game that brings out the player performance, coach and team. To get the data, theres is a open API available https://github.com/wgenial/cartrolandofc/blob/master/nova-api.md that shows some endpoints. Unfortunately doesn’t exists an API that show the scout for each, due that, is necessary manually hit the URL https://api.cartolafc.globo.com/atletas/mercado just after the end of round to get data for each round.
Considering this scenario was necessary manually get data from site, save it on JSON format creating a list of JSON files. It means, file rodada1.json contains round 1 data and so on.
setwd("~/Desktop/Cartola")
library("rjson")
library("stringr")
library(reshape2)
library(ggplot2)
# read all file names that contains data to be analysed
json_files <- c(list.files(path = "./Data/", pattern="*.json"))
An important information about the data is:
iD, nickname,clubiD,position
Depending of player position and action during the game can exists an column or not. For example, if a player score a goal or not or a goalkeeper performs blocks a penality etc.
listOfAtletas <- data.frame(
atleta_id = character(),
apelido = character(),
clube_id = character(),
posicao_id = character(),
rodada_id = character(),
pontos_num = integer(),
preco_num = integer(),
variacao_num = integer(),
media_num = integer(),
# RB - Roubada de bolas (+ 1,7)
# G - Gol (+8,0)
# A - Assistencia (+5,0)
# SG - Jogos sem sofrer gols (+5,0)
# FS - Falta sofrida ( +0,5)
# FF - Finalizacao para fora (+0,7)
# FD - Finalizacao defendida (+1,0)
# FT - Finalizacao na trave (+3,5)
# DD - Defesa dificil (+3,0)
# DP - Defesa de penalti (+7,0)
# GC - Gol contra (-6,0)
# CV - Cartao vermelho (-5,0)
# CA - Cartao amarelo (-2,0)
# GS - Gol sofrido (-5,0)
# PP - Penalti perdido (-3,5)
# FC - Falta cometida (-0,5)
# I - Impedimento (-0,5)
# PE - Passe Errado (-0,3)
#
#positive
scout_RB <- integer(),
scout_G <- integer(),
scout_A <- integer(),
scout_SG <- integer(),
scout_FS <- integer(),
scout_FF <- integer(),
scout_FD <- integer(),
scout_FT <- integer(),
scout_DD <- integer(),
scout_DP <- integer(),
#negative
scout_GC <- integer(),
scout_CV <- integer(),
scout_CA <- integer(),
scout_GS <- integer(),
scout_PP <- integer(),
scout_FC <- integer(),
scout_I <-integer(),
scout_PE <-integer()
)
Helper function to get data and parse it
## Function to get JSON data file
## Return the file data
dataFromJSON <- function(file_to_read){
rawdata<-fromJSON(file = file_to_read)
return (rawdata)
}
## function to get the value of variable if exist
# return the value or 0
scout_var <- function(tmp_var){
if (length(tmp_var) > 0){
return(tmp_var)
}else{
return (0)
}
}
## iterating thru files
for (i in 1:length(json_files)){
result <- dataFromJSON(str_replace_all(paste("./Data/",json_files[i]), pattern=" ", repl=""))
for (i in 1:length(result$atletas)){
tmpDF <- data.frame()
if (length(result$atletas[[i]]$scout) > 0){
apelido <- c(result$atletas[[i]]$apelido)
atleta_id <- c(result$atletas[[i]]$atleta_id)
clube_id <- c(result$atletas[[i]]$clube_id)
posicao_id <- c(result$atletas[[i]]$posicao_id)
rodada_id <- c(result$atletas[[i]]$rodada_id)
pontos_num <- (result$atletas[[i]]$pontos_num)
preco_num <- (result$atletas[[i]]$preco_num)
variacao_num <- (result$atletas[[i]]$variacao_num)
media_num <- (result$atletas[[i]]$media_num)
#positive
scout_RB <- scout_var(result$atletas[[i]]$scout$RB)
scout_G <- scout_var(result$atletas[[i]]$scout$G)
scout_A <- scout_var(result$atletas[[i]]$scout$A)
scout_SG <- scout_var(result$atletas[[i]]$scout$SG)
scout_FS <- scout_var(result$atletas[[i]]$scout$FS)
scout_FF <- scout_var(result$atletas[[i]]$scout$FF)
scout_FD <- scout_var(result$atletas[[i]]$scout$FD)
scout_FT <- scout_var(result$atletas[[i]]$scout$FT)
scout_DD <- scout_var(result$atletas[[i]]$scout$DD)
scout_DP <- scout_var(result$atletas[[i]]$scout$DP)
#negative
scout_GC <- scout_var(result$atletas[[i]]$scout$GC)
scout_CV <- scout_var(result$atletas[[i]]$scout$CV)
scout_CA <- scout_var(result$atletas[[i]]$scout$CA)
scout_GS <- scout_var(result$atletas[[i]]$scout$GS)
scout_PP <- scout_var(result$atletas[[i]]$scout$PP)
scout_FC <- scout_var(result$atletas[[i]]$scout$FC)
scout_I <- scout_var(result$atletas[[i]]$scout$I)
scout_PE <- scout_var(result$atletas[[i]]$scout$PE)
tmpDF <- data.frame(atleta_id, apelido,clube_id, posicao_id, rodada_id,
pontos_num ,
preco_num ,
variacao_num ,
media_num ,
scout_RB ,
scout_G ,
scout_A ,
scout_SG ,
scout_FS ,
scout_FF ,
scout_FD ,
scout_FT ,
scout_DD ,
scout_DP ,
#negative
scout_GC ,
scout_CV ,
scout_CA ,
scout_GS ,
scout_PP ,
scout_FC ,
scout_I ,
scout_PE
)
listOfAtletas <- rbind(listOfAtletas, tmpDF)
}
}
}
Getting only player ‘Atacante’ where performed : FF- Finalizacao para fora (+0,7), FD - Finalizacao defendida (+1,0) and FT - Finalizacao na trave (+3,5)
## getting a subset to test
subset_listOfAtletas <- subset(listOfAtletas,
select=c("atleta_id","posicao_id", "apelido", "scout_FF","scout_FD","scout_FT"),
posicao_id == 5 & scout_FF > 0 & scout_FD >0 & scout_FT>0)
## meaning of aggregated subset
subset_listOfAtletas_aggregated <- aggregate( subset_listOfAtletas[, 4:6],
by=list(subset_listOfAtletas$apelido), FUN=mean)
## ordering the subset
subset_listOfAtletas_aggregated [order(scout_FF,scout_FD,scout_FT)]
## Group.1
## 1 Rafael Moura
## 2 Wellington Paulista
## 3 Ricardo Oliveira
## 4 Willian
## 5 Fred
## 6 Everton
## 7 Guilherme
## 8 Walter
## 9 Luan
## 10 Andr<U+00E9>
## 11 Henrique Almeida
## 12 J<U+00FA>nior Dutra
## 13 Bruno Henrique
## 14 Edigar Junio
## 15 Arthur
## 16 Everaldo
## 17 Paulinho
## 18 Rossi
## 19 R<U+00F3>ger Guedes
## 20 Joel
## 21 Neilton
## 22 Keno
## 23 Mendoza
## 24 Pedro Rocha
## 25 David
## 26 Felipe Vizeu
## 27 Paulo Vitor
## 28 Vinicius Junior
## 29 Ribamar
## 30 Penilla
## 31 Tr<U+00E9>llez
## Graphic with the TOP 10 players
data.m <- melt(head(subset_listOfAtletas_aggregated, 10), id.vars='Group.1')
ggplot(data.m, aes(Group.1, value)) + geom_bar(aes(fill = variable),
width = 0.4, position = position_dodge(width=0.5), stat="identity") +
theme(legend.position="top", legend.title =
element_blank(),axis.title.x=element_blank(),
axis.title.y=element_blank())