url <- "C:/Users/Usuario/Documents/Ciencia de los datos/Machine Learning/R Course/data/siit/estadisticas temporada espana.csv"
datos <- read.csv(url, header = TRUE, sep = ",")
head(datos) # Los primeros seis
## No Equipo Puntos PJ PG PE PP GF GC TA TR
## 1 1 Barcelona 91 38 29 4 5 112 29 66 1
## 2 2 Real Madrid 90 38 28 6 4 110 34 71 5
## 3 3 Atlético de Madrid 88 38 28 4 6 63 18 90 3
## 4 4 Villarreal 64 38 18 10 10 44 35 99 4
## 5 5 Athletic Club 62 38 18 8 12 58 45 85 6
## 6 6 Celta de Vigo 60 38 17 9 12 51 59 113 7
class(datos)
## [1] "data.frame"
names(datos)
## [1] "No" "Equipo" "Puntos" "PJ" "PG" "PE" "PP"
## [8] "GF" "GC" "TA" "TR"
# Que tipo de datos tenemos?
str(datos)
## 'data.frame': 20 obs. of 11 variables:
## $ No : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Equipo: Factor w/ 20 levels "Athletic Club",..: 3 15 2 20 1 5 17 13 16 4 ...
## $ Puntos: int 91 90 88 64 62 60 52 48 48 45 ...
## $ PJ : int 38 38 38 38 38 38 38 38 38 38 ...
## $ PG : int 29 28 28 18 18 17 14 12 13 11 ...
## $ PE : int 4 6 4 10 8 9 10 12 9 12 ...
## $ PP : int 5 4 6 10 12 12 14 14 16 15 ...
## $ GF : int 112 110 63 44 58 51 51 38 45 34 ...
## $ GC : int 29 34 18 35 45 59 50 35 48 52 ...
## $ TA : int 66 71 90 99 85 113 101 108 106 110 ...
## $ TR : int 1 5 3 4 6 7 8 5 5 5 ...
summary(datos)
## No Equipo Puntos PJ
## Min. : 1.00 Athletic Club : 1 Min. :32.00 Min. :38
## 1st Qu.: 5.75 Atlético de Madrid : 1 1st Qu.:41.25 1st Qu.:38
## Median :10.50 Barcelona : 1 Median :44.50 Median :38
## Mean :10.50 Betis : 1 Mean :52.40 Mean :38
## 3rd Qu.:15.25 Celta de Vigo : 1 3rd Qu.:60.50 3rd Qu.:38
## Max. :20.00 Deportivo de La Coruña: 1 Max. :91.00 Max. :38
## (Other) :14
## PG PE PP GF
## Min. : 8.00 Min. : 4.00 Min. : 4.00 Min. : 34.00
## 1st Qu.:10.00 1st Qu.: 8.00 1st Qu.:12.00 1st Qu.: 40.00
## Median :12.00 Median : 9.00 Median :15.50 Median : 45.50
## Mean :14.40 Mean : 9.20 Mean :14.40 Mean : 52.15
## 3rd Qu.:17.25 3rd Qu.:10.25 3rd Qu.:18.25 3rd Qu.: 51.25
## Max. :29.00 Max. :18.00 Max. :22.00 Max. :112.00
##
## GC TA TR
## Min. :18.00 Min. : 66.00 Min. : 1.00
## 1st Qu.:42.50 1st Qu.: 95.25 1st Qu.: 4.00
## Median :52.50 Median :106.00 Median : 5.00
## Mean :52.15 Mean :103.85 Mean : 5.40
## 3rd Qu.:63.25 3rd Qu.:112.25 3rd Qu.: 6.25
## Max. :74.00 Max. :139.00 Max. :10.00
##
corr <- cor(datos$Puntos, datos$GF, method = c("pearson"))
corr
## [1] 0.8420355
corr <- cor(datos$Puntos, datos$GF, method = c("kendall"))
corr
## [1] 0.4607198
corr <- cor(datos$Puntos, datos$GF, method = c("spearman"))
corr
## [1] 0.5808164
par(mfrow = c(1, 2))
plot(datos$Puntos, datos$GF, xlab = "Puntos", ylab = "Goles a Favor")
plot(datos$Puntos, datos$GC, xlab = "Puntos", ylab = "Goles en contra")
corr1 <- cor(datos$Puntos, datos$GF, method = c("pearson"))
corr1
## [1] 0.8420355
corr2 <- cor(datos$Puntos, datos$GC, method = c("pearson"))
corr2
## [1] -0.8332895
corr3 <- cor(datos$GF, datos$GC, method = c("pearson"))
corr3
## [1] -0.5451162
pairs(~ datos$Puntos + datos$GF + datos$GC, cex.labels = 1.2)
…@MISC {ronny_hdez2017}