library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(fdth)
##
## Attaching package: 'fdth'
## The following objects are masked from 'package:stats':
##
## sd, var
library(ggplot2)
datos.FIFA <- read.csv('C:/Users/Blue/Documents/fifa.csv', encoding = "UTF-8")
# head(datos.FIFA, 10)
head(datos.FIFA[,c(3,4,6,8,9,10,12,13,14,15,27,28)], 10)
## Name Age Nationality Overall Potential Club
## 1 L. Messi 31 Argentina 94 94 FC Barcelona
## 2 Cristiano Ronaldo 33 Portugal 94 94 Juventus
## 3 Neymar Jr 26 Brazil 92 93 Paris Saint-Germain
## 4 De Gea 27 Spain 91 93 Manchester United
## 5 K. De Bruyne 27 Belgium 91 92 Manchester City
## 6 E. Hazard 27 Belgium 91 91 Chelsea
## 7 L. Modric 32 Croatia 91 91 Real Madrid
## 8 L. Suárez 31 Uruguay 91 91 FC Barcelona
## 9 Sergio Ramos 32 Spain 91 91 Real Madrid
## 10 J. Oblak 25 Slovenia 90 93 Atlético Madrid
## Value Wage Special Preferred.Foot Height Weight
## 1 \200110.5M \200565K 2202 Left 5'7 159lbs
## 2 \20077M \200405K 2228 Right 6'2 183lbs
## 3 \200118.5M \200290K 2143 Right 5'9 150lbs
## 4 \20072M \200260K 1471 Right 6'4 168lbs
## 5 \200102M \200355K 2281 Right 5'11 154lbs
## 6 \20093M \200340K 2142 Right 5'8 163lbs
## 7 \20067M \200420K 2280 Right 5'8 146lbs
## 8 \20080M \200455K 2346 Right 6'0 190lbs
## 9 \20051M \200380K 2201 Right 6'0 181lbs
## 10 \20068M \20094K 1331 Right 6'2 192lbs
# visualizar ultimos datos
tail(datos.FIFA[,c(3,4,6,8,9,10,12,13,14,15,27,28)], 10)
## Name Age Nationality Overall Potential
## 18198 D. Holland 18 Republic of Ireland 47 61
## 18199 J. Livesey 18 England 47 70
## 18200 M. Baldisimo 18 Canada 47 69
## 18201 J. Young 18 Scotland 47 62
## 18202 D. Walsh 18 Republic of Ireland 47 68
## 18203 J. Lundstram 19 England 47 65
## 18204 N. Christoffersson 19 Sweden 47 63
## 18205 B. Worman 16 England 47 67
## 18206 D. Walker-Rice 17 England 47 66
## 18207 G. Nugent 16 England 46 66
## Club Value Wage Special Preferred.Foot Height Weight
## 18198 Cork City \20060K \2001K 1362 Right 5'10 141lbs
## 18199 Burton Albion \20060K \2001K 792 Right 5'11 154lbs
## 18200 Vancouver Whitecaps FC \20070K \2001K 1303 Right 5'6 150lbs
## 18201 Swindon Town \20060K \2001K 1203 Left 5'9 157lbs
## 18202 Waterford FC \20060K \2001K 1098 Left 6'1 168lbs
## 18203 Crewe Alexandra \20060K \2001K 1307 Right 5'9 134lbs
## 18204 Trelleborgs FF \20060K \2001K 1098 Right 6'3 170lbs
## 18205 Cambridge United \20060K \2001K 1189 Right 5'8 148lbs
## 18206 Tranmere Rovers \20060K \2001K 1228 Right 5'10 154lbs
## 18207 Tranmere Rovers \20060K \2001K 1321 Right 5'10 176lbs
#Cargar funciones dadas
source ('C:/Users/Blue/Documents/f.r')
# Se crean nuevas variables y se aplican
#las funciones para obtener el peso y la altura en un formato mas entendible
datos.FIFA <- mutate(datos.FIFA, Estatura = festatura(Height), Pesokgs = flbskgs(Weight))
datos.FIFA.reduc <- select(datos.FIFA, Name, Age, Nationality, Overall, Potential, Club, Value, Preferred.Foot, Position, Height, Weight, Estatura, Pesokgs)
head(datos.FIFA.reduc)
## Name Age Nationality Overall Potential Club
## 1 L. Messi 31 Argentina 94 94 FC Barcelona
## 2 Cristiano Ronaldo 33 Portugal 94 94 Juventus
## 3 Neymar Jr 26 Brazil 92 93 Paris Saint-Germain
## 4 De Gea 27 Spain 91 93 Manchester United
## 5 K. De Bruyne 27 Belgium 91 92 Manchester City
## 6 E. Hazard 27 Belgium 91 91 Chelsea
## Value Preferred.Foot Position Height Weight Estatura Pesokgs
## 1 \200110.5M Left RF 5'7 159lbs 1.70 72.12
## 2 \20077M Right ST 6'2 183lbs 1.88 83.01
## 3 \200118.5M Right LW 5'9 150lbs 1.75 68.04
## 4 \20072M Right GK 6'4 168lbs 1.93 76.20
## 5 \200102M Right RCM 5'11 154lbs 1.80 69.85
## 6 \20093M Right LF 5'8 163lbs 1.73 73.94
paste("Cantidad de registros", nrow(datos.FIFA.reduc))
## [1] "Cantidad de registros 18207"
paste("Cantidad de variables", ncol(datos.FIFA.reduc))
## [1] "Cantidad de variables 13"
str(datos.FIFA.reduc)
## 'data.frame': 18207 obs. of 13 variables:
## $ Name : chr "L. Messi" "Cristiano Ronaldo" "Neymar Jr" "De Gea" ...
## $ Age : int 31 33 26 27 27 27 32 31 32 25 ...
## $ Nationality : chr "Argentina" "Portugal" "Brazil" "Spain" ...
## $ Overall : int 94 94 92 91 91 91 91 91 91 90 ...
## $ Potential : int 94 94 93 93 92 91 91 91 91 93 ...
## $ Club : chr "FC Barcelona" "Juventus" "Paris Saint-Germain" "Manchester United" ...
## $ Value : chr "\200110.5M" "\20077M" "\200118.5M" "\20072M" ...
## $ Preferred.Foot: chr "Left" "Right" "Right" "Right" ...
## $ Position : chr "RF" "ST" "LW" "GK" ...
## $ Height : chr "5'7" "6'2" "5'9" "6'4" ...
## $ Weight : chr "159lbs" "183lbs" "150lbs" "168lbs" ...
## $ Estatura : num 1.7 1.88 1.75 1.93 1.8 1.73 1.73 1.83 1.83 1.88 ...
## $ Pesokgs : num 72.1 83 68 76.2 69.8 ...
nacion <- datos.FIFA.reduc %>%
group_by (Nationality) %>%
summarise(n = n())
## `summarise()` ungrouping output (override with `.groups` argument)
nacion <- arrange(nacion, desc(n))
head(nacion, 10)
## # A tibble: 10 x 2
## Nationality n
## <chr> <int>
## 1 England 1662
## 2 Germany 1198
## 3 Spain 1072
## 4 Argentina 937
## 5 France 914
## 6 Brazil 827
## 7 Italy 702
## 8 Colombia 618
## 9 Japan 478
## 10 Netherlands 453
tail(nacion, 10)
## # A tibble: 10 x 2
## Nationality n
## <chr> <int>
## 1 New Caledonia 1
## 2 Oman 1
## 3 Palestine 1
## 4 Puerto Rico 1
## 5 Qatar 1
## 6 Rwanda 1
## 7 São Tomé & Príncipe 1
## 8 South Sudan 1
## 9 St Lucia 1
## 10 United Arab Emirates 1
ggplot(data = head(nacion, 10), aes(x = Nationality, y = n, color= Nationality)) +
geom_bar(stat = "identity")
ggplot(data = tail(nacion, 10), aes(x = Nationality, y = n, color=Nationality)) +
geom_bar(stat = "identity")
age.nacion <- datos.FIFA.reduc %>%
group_by (Nationality) %>%
summarise(n = n(), media = round(mean(Age),2), mediana = round(median(Age),2))
## `summarise()` ungrouping output (override with `.groups` argument)
age.nacion <- arrange(age.nacion, media)
head(age.nacion, 10)
## # A tibble: 10 x 4
## Nationality n media mediana
## <chr> <int> <dbl> <dbl>
## 1 Indonesia 1 17 17
## 2 Botswana 1 20 20
## 3 Rwanda 1 21 21
## 4 Tanzania 3 22 22
## 5 Zambia 9 22.2 21
## 6 Afghanistan 4 22.5 22
## 7 Chad 2 22.5 22.5
## 8 Antigua & Barbuda 4 22.8 22
## 9 Dominican Republic 2 23 23
## 10 Jordan 1 23 23
tail(age.nacion, 10)
## # A tibble: 10 x 4
## Nationality n media mediana
## <chr> <int> <dbl> <dbl>
## 1 Fiji 1 30 30
## 2 Guam 1 30 30
## 3 New Caledonia 1 30 30
## 4 Kuwait 1 31 31
## 5 Palestine 1 31 31
## 6 São Tomé & Príncipe 1 31 31
## 7 Trinidad & Tobago 4 31.8 28.5
## 8 Ethiopia 1 32 32
## 9 Puerto Rico 1 34 34
## 10 Oman 1 36 36
la.media<- round(mean(age.nacion$n),0)
la.media
## [1] 111
age.nacion.mean.n <- filter(age.nacion, n >= mean(age.nacion$n)) # Se puede utilizar la variable la.media
age.nacion.mean.n <- arrange(age.nacion.mean.n, media)
head(age.nacion.mean.n, 10)
## # A tibble: 10 x 4
## Nationality n media mediana
## <chr> <int> <dbl> <dbl>
## 1 Nigeria 121 23.1 22
## 2 Ghana 114 23.7 23
## 3 Netherlands 453 24 24
## 4 England 1662 24.0 23
## 5 Norway 341 24.0 24
## 6 Denmark 336 24.2 24
## 7 Mexico 366 24.3 23
## 8 Belgium 260 24.3 23
## 9 Germany 1198 24.3 24
## 10 Australia 236 24.4 24
tail(age.nacion.mean.n, 10)
## # A tibble: 10 x 4
## Nationality n media mediana
## <chr> <int> <dbl> <dbl>
## 1 Senegal 130 25.4 25.5
## 2 Serbia 126 25.6 26
## 3 Portugal 322 25.8 25
## 4 Italy 702 25.9 26
## 5 China PR 392 26.1 26
## 6 Japan 478 26.2 26
## 7 Argentina 937 26.2 26
## 8 Korea Republic 335 26.4 26
## 9 Uruguay 149 26.6 26
## 10 Brazil 827 27.6 27
ggplot(data = head(age.nacion.mean.n, 10), aes(Nationality, media)) +
geom_boxplot()
ggplot(data = tail(age.nacion.mean.n, 10), aes(Nationality, media)) +
geom_boxplot()
#### O con todos los datos del conjunto de datos reducido
ggplot(data = datos.FIFA.reduc, aes(x = Nationality, y = Age)) +
geom_boxplot()
paises.top <- head(age.nacion.mean.n$Nationality, 10)
paises.top
## [1] "Nigeria" "Ghana" "Netherlands" "England" "Norway"
## [6] "Denmark" "Mexico" "Belgium" "Germany" "Australia"
datos.FIFA.paises.top.mean.n <- filter(datos.FIFA, Nationality %in% paises.top)
head(datos.FIFA.paises.top.mean.n[,c(3,4,6)])
## Name Age Nationality
## 1 K. De Bruyne 27 Belgium
## 2 E. Hazard 27 Belgium
## 3 T. Kroos 28 Germany
## 4 H. Kane 24 England
## 5 M. ter Stegen 26 Germany
## 6 T. Courtois 26 Belgium
ggplot(data = datos.FIFA.paises.top.mean.n, aes(x = Nationality, y = Age , color=Nationality)) +
geom_boxplot()
datos.FIFA.reduc.merge <- merge(x = datos.FIFA.reduc, y=clubs.nation,
by.x = 'Club', by.y = 'club')
head(arrange(datos.FIFA.reduc.merge, desc(country)), 10)
## Club Name Age Nationality Overall Potential
## 1 Atlanta United M. Ambrose 24 United States 63 67
## 2 Atlanta United L. Kunga 19 United States 57 73
## 3 Atlanta United C. McCann 30 Republic of Ireland 66 66
## 4 Atlanta United F. Escobar 23 Argentina 67 74
## 5 Atlanta United A. Wheeler-Omiunu 23 United States 58 64
## 6 Atlanta United K. Kratz 31 Germany 67 67
## 7 Atlanta United E. Remedi 23 Argentina 71 79
## 8 Atlanta United J. Larentowicz 34 United States 68 68
## 9 Atlanta United J. Hernández 21 Venezuela 62 71
## 10 Atlanta United M. Robinson 21 United States 61 72
## Value Preferred.Foot Position Height Weight Estatura Pesokgs country
## 1 \200400K Left LB 5'9 165lbs 1.75 74.84 USA
## 2 \200220K Left LM 5'8 150lbs 1.73 68.04 USA
## 3 \200475K Left LB 6'1 165lbs 1.85 74.84 USA
## 4 \200900K Right RB 6'0 165lbs 1.83 74.84 USA
## 5 \200170K Right CM 5'9 174lbs 1.75 78.93 USA
## 6 \200675K Right CAM 5'8 159lbs 1.73 72.12 USA
## 7 \2003M Right LDM 5'7 159lbs 1.70 72.12 USA
## 8 \200270K Right CDM 6'1 174lbs 1.85 78.93 USA
## 9 \200400K Left LB 5'7 157lbs 1.70 71.21 USA
## 10 \200375K Right CB 6'2 185lbs 1.88 83.91 USA
## confederaion continent
## 1 CONMEBOL SOUTH AMERICA
## 2 CONMEBOL SOUTH AMERICA
## 3 CONMEBOL SOUTH AMERICA
## 4 CONMEBOL SOUTH AMERICA
## 5 CONMEBOL SOUTH AMERICA
## 6 CONMEBOL SOUTH AMERICA
## 7 CONMEBOL SOUTH AMERICA
## 8 CONMEBOL SOUTH AMERICA
## 9 CONMEBOL SOUTH AMERICA
## 10 CONMEBOL SOUTH AMERICA
datos.FIFA.only.club.coutry <- select(datos.FIFA.reduc.merge, Club, country)
datos.FIFA.only.club.coutry <- distinct(datos.FIFA.only.club.coutry)
head(datos.FIFA.only.club.coutry)
## Club country
## 1
## 2 SSV Jahn Regensburg
## 3 1. FC Heidenheim 1846
## 4 1. FC Kaiserslautern
## 5 1. FC Köln Germany
## 6 1. FC Magdeburg Germany
####Quitar (depurar) el primer registro o de Country vacío #### Mostrar la tabla
tabla <- data.frame(fdt_cat(datos.FIFA.only.club.coutry$country)) %>%
select (Category,f)
names(tabla) <- c("Country", "Equipos")
#tabla
tabla <- tabla[-1,] # Quita el primer registros que son los valores vacios
#o se puede hacer con
tabla <- filter(tabla, !Country == "")
head(tabla , 10)
## Country Equipos
## 1 England 48
## 2 Spain 28
## 3 USA 24
## 4 Italy 23
## 5 Germany 22
## 6 México 18
## 7 France 15
## 8 Argentina 12
## 9 China 10
## 10 Brazil 9
tail(tabla , 10)
## Country Equipos
## 23 South Corea 3
## 24 Turkey 3
## 25 Austria 2
## 26 Canada 2
## 27 Scotland 2
## 28 Ucrania 2
## 29 Australia 1
## 30 Corea 1
## 31 Denmark 1
## 32 Uruguay 1
ggplot(data = head(tabla,10), aes(Country, Equipos, color=Country)) +
geom_col()
ggplot(data = tail(tabla,10), aes(Country, Equipos, color=Country)) +
geom_col()
jug.por.club <- datos.FIFA.reduc.merge %>%
group_by (Club) %>%
summarise(n = n())
## `summarise()` ungrouping output (override with `.groups` argument)
jug.por.club
## # A tibble: 638 x 2
## Club n
## <chr> <int>
## 1 "" 241
## 2 " SSV Jahn Regensburg" 29
## 3 "1. FC Heidenheim 1846" 28
## 4 "1. FC Kaiserslautern" 26
## 5 "1. FC Köln" 28
## 6 "1. FC Magdeburg" 26
## 7 "1. FC Nürnberg" 29
## 8 "1. FC Union Berlin" 28
## 9 "1. FSV Mainz 05" 32
## 10 "Aalborg BK" 27
## # ... with 628 more rows
jug.por.club.top.ten <-head(arrange(jug.por.club, desc(n)),10)
jug.por.club.top.ten
## # A tibble: 10 x 2
## Club n
## <chr> <int>
## 1 "" 241
## 2 "Arsenal" 33
## 3 "AS Monaco" 33
## 4 "Atlético Madrid" 33
## 5 "Borussia Dortmund" 33
## 6 "Burnley" 33
## 7 "Cardiff City" 33
## 8 "CD Leganés" 33
## 9 "Chelsea" 33
## 10 "Eintracht Frankfurt" 33
jug.por.club.bot.ten <- tail(arrange(jug.por.club, desc(n)),10)
jug.por.club.bot.ten
## # A tibble: 10 x 2
## Club n
## <chr> <int>
## 1 Grêmio 20
## 2 Internacional 20
## 3 Paraná 20
## 4 Santos 20
## 5 Sport Club do Recife 20
## 6 Tromsø IL 20
## 7 Vitória 20
## 8 Limerick FC 19
## 9 Sligo Rovers 19
## 10 Derry City 18
datos.FIFA.reduc.merge.Value <- datos.FIFA.reduc.merge %>%
mutate(datos.FIFA.reduc.merge, Valor = ifelse (substr(Value, nchar(Value), nchar(Value)) == 'M', fcleanValue(Value) * 1000000, fcleanValue(Value) * 1000)) %>%
filter(Valor > 0)
head(datos.FIFA.reduc.merge.Value)
## Club Name Age Nationality Overall Potential Value
## 1 SSV Jahn Regensburg H. Al Ghaddioui 27 Morocco 64 64 \200475K
## 2 SSV Jahn Regensburg A. Dej 26 Poland 67 70 \200925K
## 3 SSV Jahn Regensburg M. Thalhammer 20 Germany 61 72 \200425K
## 4 SSV Jahn Regensburg A. Weis 28 Germany 68 69 \200725K
## 5 SSV Jahn Regensburg H. Hyseni 25 Germany 58 61 \200160K
## 6 SSV Jahn Regensburg S. Freis 33 Germany 66 66 \200400K
## Preferred.Foot Position Height Weight Estatura Pesokgs country confederaion
## 1 Right ST 6'3 203lbs 1.91 92.08
## 2 Right CM 5'9 163lbs 1.75 73.94
## 3 Right CM 6'3 181lbs 1.91 82.10
## 4 Right GK 6'2 185lbs 1.88 83.91
## 5 Right ST 6'4 194lbs 1.93 88.00
## 6 Right LM 6'0 172lbs 1.83 78.02
## continent Valor
## 1 475000
## 2 925000
## 3 425000
## 4 725000
## 5 160000
## 6 400000
tail(datos.FIFA.reduc.merge.Value)
## Club Name Age Nationality Overall Potential Value
## 17584 Yokohama F. Marinos O. Boumal 28 Cameroon 72 72 \2003.1M
## 17585 Yokohama F. Marinos K. Matsubara 25 Japan 67 72 \200850K
## 17586 Yokohama F. Marinos T. Kida 23 Japan 68 74 \2001M
## 17587 Yokohama F. Marinos K. Nakamachi 32 Japan 65 65 \200300K
## 17588 Yokohama F. Marinos K. Yamada 18 Japan 52 63 \20070K
## 17589 Yokohama F. Marinos I. Shinozuka 23 Russia 57 61 \200140K
## Preferred.Foot Position Height Weight Estatura Pesokgs country
## 17584 Left RM 6'0 148lbs 1.83 67.13
## 17585 Right RB 5'11 161lbs 1.80 73.03
## 17586 Right CDM 5'7 139lbs 1.70 63.05
## 17587 Right CDM 5'9 163lbs 1.75 73.94
## 17588 Right RB 5'9 132lbs 1.75 59.87
## 17589 Right RM 5'10 148lbs 1.78 67.13
## confederaion continent Valor
## 17584 3100000
## 17585 850000
## 17586 1000000
## 17587 300000
## 17588 70000
## 17589 140000
datos.FIFA.reduc.merge.Value <- datos.FIFA.reduc.merge.Value %>%
filter(!continent == "")
mean.Valor.continente <- datos.FIFA.reduc.merge.Value %>%
group_by(continent) %>%
summarise(media = mean(Valor))
## `summarise()` ungrouping output (override with `.groups` argument)
mean.Valor.continente
## # A tibble: 5 x 2
## continent media
## <chr> <dbl>
## 1 ASIA 1473879.
## 2 EUROPE 5360557.
## 3 NORTH AMERICA 1763361.
## 4 OCEANIA 747600
## 5 SOUTH AMERICA 2159836.
ggplot(datos.FIFA.reduc.merge.Value, aes(x=continent, y=Valor )) +
geom_boxplot() +
geom_hline(yintercept = mean(datos.FIFA.reduc.merge.Value$Valor ), color = "red") +
labs(title = "Valor económico de los jugadores", subtitle = paste("Valor económico medio = ", round(mean(datos.FIFA.reduc.merge.Value$Valor ),2)))
# media.SA <- mean(filter)
ggplot(datos.FIFA.reduc.merge.Value, aes(x=continent, y=Valor)) +
geom_boxplot() +
geom_jitter(aes(color = continent)) +
geom_hline(yintercept = mean(datos.FIFA.reduc.merge.Value$Valor), color = "red") +
labs(title = "Valor económico de los jugadores", subtitle = paste("Valor económico medio = ", round(mean(datos.FIFA.reduc.merge.Value$Valor),2)))
datos.FIFA.pie.preferido <- datos.FIFA %>%
group_by(Preferred.Foot) %>%
summarise(cuantos = n(), porc = paste(round(n() / nrow(datos.FIFA) * 100,2),"%"))
## `summarise()` ungrouping output (override with `.groups` argument)
datos.FIFA.pie.preferido
## # A tibble: 3 x 3
## Preferred.Foot cuantos porc
## <chr> <int> <chr>
## 1 "" 48 0.26 %
## 2 "Left" 4211 23.13 %
## 3 "Right" 13948 76.61 %
ggplot(data=datos.FIFA.reduc.merge.Value, aes(continent, Estatura, color=continent)) +
geom_boxplot() + # dibujamos el diagrama de cajas
stat_summary(fun.y=mean, geom="point",shape=18,
size=3, color="red") +
geom_hline(yintercept = mean(datos.FIFA.reduc.merge.Value$Estatura, na.rm = TRUE), color="red") +
labs(title = "Estatura media de jugadores por Continente", subtitle = paste("Media total = ", round(mean(datos.FIFA.reduc.merge.Value$Estatura, na.rm = TRUE),2)))
## Warning: `fun.y` is deprecated. Use `fun` instead.
## Warning: Removed 11 rows containing non-finite values (stat_boxplot).
## Warning: Removed 11 rows containing non-finite values (stat_summary).
ggplot(data=datos.FIFA.reduc.merge.Value, aes(continent, Pesokgs, color=continent)) +
geom_boxplot() + # dibujamos el diagrama de cajas
stat_summary(fun.y=mean, geom="point",shape=18,
size=3, color="red") +
geom_hline(yintercept = mean(datos.FIFA.reduc.merge.Value$Pesokgs, na.rm = TRUE), color="red") +
labs(title = "Peso medio de jugadores por Continente", subtitle = paste("Media total = ", round(mean(datos.FIFA.reduc.merge.Value$Pesokgs, na.rm = TRUE),2)))
## Warning: `fun.y` is deprecated. Use `fun` instead.
## Warning: Removed 11 rows containing non-finite values (stat_boxplot).
## Warning: Removed 11 rows containing non-finite values (stat_summary).