El objetivo de este primer trabajo práctico es simular una situación de la vida real en la que se solicita analizar un conjunto de datos. El dataset contiene información acerca de todos los encuentros disputados en la UFC desde 1993 hasta 2019, donde cada encuentro es una fila distinta. Cuenta con un total de 5144 registros y 145 variables. Algunas de esas variables son inherentes a cada participante (datos del participante de la esquina roja o azul), mientras que otros son datos referentes a la pelea en sí (fecha, referee, ganador del encuentro, duración del mismo, etc.)
setwd("C:/Users/HP notebook/Documents/Archivos Sheila/FACULTAD/CCA/TP 1 ----")
getwd()
## [1] "C:/Users/HP notebook/Documents/Archivos Sheila/FACULTAD/CCA/TP 1 ----"
UFC <- read_csv("UFC_fight_data.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## R_fighter = col_character(),
## B_fighter = col_character(),
## Referee = col_character(),
## date = col_date(format = ""),
## location = col_character(),
## Winner = col_character(),
## title_bout = col_logical(),
## weight_class = col_character(),
## B_Stance = col_character(),
## R_Stance = col_character()
## )
## See spec(...) for full column specifications.
class(UFC)
## [1] "spec_tbl_df" "tbl_df" "tbl" "data.frame"
as.datatable(formattable(UFC[1:20]))
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
En el dataset observamos datos correspondientes a los encuentros de la UFC, desde el año 1993 a 2019 ; contamos con 145 variables entre las cuales se encuentran relacionadas a la pelea como ganador, referee, fecha y lugar donde se llevó a cabo, categoría de los jugadores y sí la pelea es por el título; y otras relacionadas con el luchador en sí por ejemplo nombre, peso, altura, edad,y lo que nosotras asumimos como cantidad promedio de los distintos ataques.
Usamos la funcion select del paquete dplyr para crear los data.frames que necesitamos por separado
UFCR_<-select(UFC,starts_with("R_")) #creamos un dataframe solo con la informaci?n referente al participante de la esquina roja.
UFCnoR_noB_<-select(UFC,-starts_with("R_"),-starts_with("B_")) #creamos un dataframe solo con la informaci?n en com?n del encuentro.
UFCR_comun<-cbind(UFCR_,UFCnoR_noB_) #creamos un dataframe con toda la informaci?n referente al participante de la esquina roja y la informaci?n en com?n del encuentro.
as.datatable(formattable(UFCR_comun[1:5,])) #MOSTRAREMOS SOLAMENTE 5 FILAS COMO OUTPUT, PERO ESTA HECHO SOBRE TODO EL DATASET
UFCB_<-select(UFC,starts_with("B_")) #creamos un dataframe solo con la informaci?n referente al participante de la esquina azul.
UFCB_comun<-cbind(UFCB_,UFCnoR_noB_) #creamos un dataframe con toda la informaci?n referente al participante de la esquina azul y la informaci?n en com?n del encuentro.
as.datatable(formattable(UFCB_comun[1:5,])) #MOSTRAREMOS SOLAMENTE 5 FILAS COMO OUTPUT, PERO ESTA HECHO SOBRE TODO EL DATASET
Usamos la funcion rename_at (del paquere dplyr)
#Estamos seleccionando las columnas cuyos nombres comienzan con "R_" dentro de la funci?n vars y luego, estamos usando la funci?n str_replace del paquete stringr R para reemplazar "R_" con " " dentro de la funci?n funs.
UFCR_comunFINAL <- UFCR_comun %>% rename_at(vars(starts_with("R_")),
funs(str_replace(., "R_", " ")))
as.datatable(formattable(UFCR_comunFINAL[1:5,])) #MOSTRAREMOS SOLAMENTE 5 FILAS COMO OUTPUT, PERO ESTA HECHO SOBRE TODO EL DATASET
#Estamos seleccionando las columnas cuyos nombres comienzan con "B_" dentro de la funci?n vars y luego, estamos usando la funci?n str_replace del paquete stringr R para reemplazar "B_" con " " dentro de la funci?n funs.
UFCB_comunFINAL <- UFCB_comun %>% rename_at(vars(starts_with("B_")),
funs(str_replace(., "B_", " ")))
as.datatable(formattable(UFCB_comunFINAL[1:5,]))
class(UFCB_comunFINAL[,73])
## [1] "character"
UFCB_comunFINAL[,73]<-as.character(UFCB_comunFINAL[,73])
class(UFCR_comunFINAL[,73])
## [1] "character"
UFCR_comunFINAL[,73]<-as.character(UFCR_comunFINAL[,73])
for(i in 1:5144) {if (UFCB_comunFINAL[i,73]=="Blue" ) {UFCB_comunFINAL[i,73]<-1} }
for(i in 1:5144) {if (UFCB_comunFINAL[i,73]=="Red"){UFCB_comunFINAL[i,73]<-0}}
as.datatable(formattable(UFCB_comunFINAL[1:5,]))
for(i in 1:5144) {if (UFCR_comunFINAL[i,73]=="Red" ) {UFCR_comunFINAL[i,73]<-1} }
for(i in 1:5144) {if (UFCR_comunFINAL[i,73]=="Blue"){UFCR_comunFINAL[i,73]<-0}}
as.datatable(formattable(UFCR_comunFINAL[1:5,]))
Para cambiar el nombre de la columna que contiene el color que indica la esquina de cada dataset usamos la función rename del paquete plyr.
DatosconjuntosByR__sin_esquinas<-rbind(UFCB_comunFINAL,UFCR_comunFINAL)
EsquinaB<-as.data.frame(matrix("Blue",5144))
EsquinaR<-as.data.frame(matrix("Red",5144))
Esquinas<-rbind(EsquinaB,EsquinaR)
DatosconjuntosByR_FINAL<-cbind.data.frame(DatosconjuntosByR__sin_esquinas,Esquinas) #Unimos ambos datasets con la variable que hace referencia al color de la esquina incluida.
library(plyr)
DatosconjuntosByR_FINAL = plyr::rename(DatosconjuntosByR_FINAL, c(V1="Corner_color")) #Renombro la columna (variable) del color de la esquina.
names(DatosconjuntosByR_FINAL)
## [1] " fighter" " current_lose_streak"
## [3] " current_win_streak" " draw"
## [5] " avg_BODY_att" " avg_BODY_landed"
## [7] " avg_CLINCH_att" " avg_CLINCH_landed"
## [9] " avg_DISTANCE_att" " avg_DISTANCE_landed"
## [11] " avg_GROUND_att" " avg_GROUND_landed"
## [13] " avg_HEAD_att" " avg_HEAD_landed"
## [15] " avg_KD" " avg_LEG_att"
## [17] " avg_LEG_landed" " avg_PASS"
## [19] " avg_REV" " avg_SIG_STR_att"
## [21] " avg_SIG_STR_landed" " avg_SIG_STR_pct"
## [23] " avg_SUB_ATT" " avg_TD_att"
## [25] " avg_TD_landed" " avg_TD_pct"
## [27] " avg_TOTAL_STR_att" " avg_TOTAL_STR_landed"
## [29] " longest_win_streak" " losses"
## [31] " avg_opp_BODY_att" " avg_opp_BODY_landed"
## [33] " avg_opp_CLINCH_att" " avg_opp_CLINCH_landed"
## [35] " avg_opp_DISTANCE_att" " avg_opp_DISTANCE_landed"
## [37] " avg_opp_GROUND_att" " avg_opp_GROUND_landed"
## [39] " avg_opp_HEAD_att" " avg_opp_HEAD_landed"
## [41] " avg_opp_KD" " avg_opp_LEG_att"
## [43] " avg_opp_LEG_landed" " avg_opp_PASS"
## [45] " avg_opp_REV" " avg_opp_SIG_STR_att"
## [47] " avg_opp_SIG_STR_landed" " avg_opp_SIG_STR_pct"
## [49] " avg_opp_SUB_ATT" " avg_opp_TD_att"
## [51] " avg_opp_TD_landed" " avg_opp_TD_pct"
## [53] " avg_opp_TOTAL_STR_att" " avg_opp_TOTAL_STR_landed"
## [55] " total_rounds_fought" " total_time_fought(seconds)"
## [57] " total_title_bouts" " win_by_Decision_Majority"
## [59] " win_by_Decision_Split" " win_by_Decision_Unanimous"
## [61] " win_by_KO/TKO" " win_by_Submission"
## [63] " win_by_TKO_Doctor_Stoppage" " wins"
## [65] " Stance" " Height_cms"
## [67] " Reach_cms" " Weight_lbs"
## [69] " age" "Referee"
## [71] "date" "location"
## [73] "Winner" "title_bout"
## [75] "weight_class" "no_of_rounds"
## [77] "Corner_color"
as.datatable(formattable(DatosconjuntosByR_FINAL))
col_idx <- grep("Winner", names(DatosconjuntosByR_FINAL))
DatosconjuntosByR_FINAL <- DatosconjuntosByR_FINAL[, c(col_idx, (1:ncol(DatosconjuntosByR_FINAL))[-col_idx])]
names(DatosconjuntosByR_FINAL) #verifico que la primer columna sea Winner, manteniendo el orden de las dem?s.
## [1] "Winner" " fighter"
## [3] " current_lose_streak" " current_win_streak"
## [5] " draw" " avg_BODY_att"
## [7] " avg_BODY_landed" " avg_CLINCH_att"
## [9] " avg_CLINCH_landed" " avg_DISTANCE_att"
## [11] " avg_DISTANCE_landed" " avg_GROUND_att"
## [13] " avg_GROUND_landed" " avg_HEAD_att"
## [15] " avg_HEAD_landed" " avg_KD"
## [17] " avg_LEG_att" " avg_LEG_landed"
## [19] " avg_PASS" " avg_REV"
## [21] " avg_SIG_STR_att" " avg_SIG_STR_landed"
## [23] " avg_SIG_STR_pct" " avg_SUB_ATT"
## [25] " avg_TD_att" " avg_TD_landed"
## [27] " avg_TD_pct" " avg_TOTAL_STR_att"
## [29] " avg_TOTAL_STR_landed" " longest_win_streak"
## [31] " losses" " avg_opp_BODY_att"
## [33] " avg_opp_BODY_landed" " avg_opp_CLINCH_att"
## [35] " avg_opp_CLINCH_landed" " avg_opp_DISTANCE_att"
## [37] " avg_opp_DISTANCE_landed" " avg_opp_GROUND_att"
## [39] " avg_opp_GROUND_landed" " avg_opp_HEAD_att"
## [41] " avg_opp_HEAD_landed" " avg_opp_KD"
## [43] " avg_opp_LEG_att" " avg_opp_LEG_landed"
## [45] " avg_opp_PASS" " avg_opp_REV"
## [47] " avg_opp_SIG_STR_att" " avg_opp_SIG_STR_landed"
## [49] " avg_opp_SIG_STR_pct" " avg_opp_SUB_ATT"
## [51] " avg_opp_TD_att" " avg_opp_TD_landed"
## [53] " avg_opp_TD_pct" " avg_opp_TOTAL_STR_att"
## [55] " avg_opp_TOTAL_STR_landed" " total_rounds_fought"
## [57] " total_time_fought(seconds)" " total_title_bouts"
## [59] " win_by_Decision_Majority" " win_by_Decision_Split"
## [61] " win_by_Decision_Unanimous" " win_by_KO/TKO"
## [63] " win_by_Submission" " win_by_TKO_Doctor_Stoppage"
## [65] " wins" " Stance"
## [67] " Height_cms" " Reach_cms"
## [69] " Weight_lbs" " age"
## [71] "Referee" "date"
## [73] "location" "title_bout"
## [75] "weight_class" "no_of_rounds"
## [77] "Corner_color"
Antes de obtener estadísticas descriptivas básicas debemos analizar los datos para tratar los datos faltantes o NA: Para eso usaremos df_status (del paquete funModeling), que: Para cada variable devuelve: Cantidad y porcentaje de ceros (q_zeros y p_zeros respectivamente). Las mismas métricas para los valores de NA (q_NA / p_na) y los valores infinitos (q_inf / p_inf). Las últimas dos columnas indican el tipo de datos y la cantidad de valores únicos. Esta función imprime y devuelve los resultados.
df_status(DatosconjuntosByR_FINAL)
## variable q_zeros p_zeros q_na p_na q_inf p_inf
## 1 Winner 5061 49.19 0 0.00 0 0
## 2 fighter 0 0.00 0 0.00 0 0
## 3 current_lose_streak 6610 64.25 0 0.00 0 0
## 4 current_win_streak 5593 54.36 0 0.00 0 0
## 5 draw 10288 100.00 0 0.00 0 0
## 6 avg_BODY_att 320 3.11 1915 18.61 0 0
## 7 avg_BODY_landed 429 4.17 1915 18.61 0 0
## 8 avg_CLINCH_att 466 4.53 1915 18.61 0 0
## 9 avg_CLINCH_landed 609 5.92 1915 18.61 0 0
## 10 avg_DISTANCE_att 62 0.60 1915 18.61 0 0
## 11 avg_DISTANCE_landed 187 1.82 1915 18.61 0 0
## 12 avg_GROUND_att 701 6.81 1915 18.61 0 0
## 13 avg_GROUND_landed 834 8.11 1915 18.61 0 0
## 14 avg_HEAD_att 28 0.27 1915 18.61 0 0
## 15 avg_HEAD_landed 111 1.08 1915 18.61 0 0
## 16 avg_KD 3654 35.52 1915 18.61 0 0
## 17 avg_LEG_att 592 5.75 1915 18.61 0 0
## 18 avg_LEG_landed 689 6.70 1915 18.61 0 0
## 19 avg_PASS 1677 16.30 1915 18.61 0 0
## 20 avg_REV 4860 47.24 1915 18.61 0 0
## 21 avg_SIG_STR_att 19 0.18 1915 18.61 0 0
## 22 avg_SIG_STR_landed 66 0.64 1915 18.61 0 0
## 23 avg_SIG_STR_pct 66 0.64 1915 18.61 0 0
## 24 avg_SUB_ATT 2827 27.48 1915 18.61 0 0
## 25 avg_TD_att 877 8.52 1915 18.61 0 0
## 26 avg_TD_landed 1549 15.06 1915 18.61 0 0
## 27 avg_TD_pct 1549 15.06 1915 18.61 0 0
## 28 avg_TOTAL_STR_att 14 0.14 1915 18.61 0 0
## 29 avg_TOTAL_STR_landed 32 0.31 1915 18.61 0 0
## 30 longest_win_streak 2878 27.97 0 0.00 0 0
## 31 losses 3492 33.94 0 0.00 0 0
## 32 avg_opp_BODY_att 317 3.08 1915 18.61 0 0
## 33 avg_opp_BODY_landed 445 4.33 1915 18.61 0 0
## 34 avg_opp_CLINCH_att 496 4.82 1915 18.61 0 0
## 35 avg_opp_CLINCH_landed 618 6.01 1915 18.61 0 0
## 36 avg_opp_DISTANCE_att 43 0.42 1915 18.61 0 0
## 37 avg_opp_DISTANCE_landed 179 1.74 1915 18.61 0 0
## 38 avg_opp_GROUND_att 942 9.16 1915 18.61 0 0
## 39 avg_opp_GROUND_landed 1092 10.61 1915 18.61 0 0
## 40 avg_opp_HEAD_att 36 0.35 1915 18.61 0 0
## 41 avg_opp_HEAD_landed 139 1.35 1915 18.61 0 0
## 42 avg_opp_KD 4575 44.47 1915 18.61 0 0
## 43 avg_opp_LEG_att 521 5.06 1915 18.61 0 0
## 44 avg_opp_LEG_landed 617 6.00 1915 18.61 0 0
## 45 avg_opp_PASS 1998 19.42 1915 18.61 0 0
## 46 avg_opp_REV 4917 47.79 1915 18.61 0 0
## 47 avg_opp_SIG_STR_att 17 0.17 1915 18.61 0 0
## 48 avg_opp_SIG_STR_landed 69 0.67 1915 18.61 0 0
## 49 avg_opp_SIG_STR_pct 69 0.67 1915 18.61 0 0
## 50 avg_opp_SUB_ATT 2976 28.93 1915 18.61 0 0
## 51 avg_opp_TD_att 762 7.41 1915 18.61 0 0
## 52 avg_opp_TD_landed 1577 15.33 1915 18.61 0 0
## 53 avg_opp_TD_pct 1577 15.33 1915 18.61 0 0
## 54 avg_opp_TOTAL_STR_att 13 0.13 1915 18.61 0 0
## 55 avg_opp_TOTAL_STR_landed 44 0.43 1915 18.61 0 0
## 56 total_rounds_fought 1915 18.61 0 0.00 0 0
## 57 total_time_fought(seconds) 0 0.00 1915 18.61 0 0
## 58 total_title_bouts 8230 80.00 0 0.00 0 0
## 59 win_by_Decision_Majority 10062 97.80 0 0.00 0 0
## 60 win_by_Decision_Split 8296 80.64 0 0.00 0 0
## 61 win_by_Decision_Unanimous 5525 53.70 0 0.00 0 0
## 62 win_by_KO/TKO 5589 54.33 0 0.00 0 0
## 63 win_by_Submission 6753 65.64 0 0.00 0 0
## 64 win_by_TKO_Doctor_Stoppage 9729 94.57 0 0.00 0 0
## 65 wins 2878 27.97 0 0.00 0 0
## 66 Stance 0 0.00 293 2.85 0 0
## 67 Height_cms 0 0.00 12 0.12 0 0
## 68 Reach_cms 0 0.00 982 9.55 0 0
## 69 Weight_lbs 0 0.00 9 0.09 0 0
## 70 age 0 0.00 236 2.29 0 0
## 71 Referee 0 0.00 46 0.45 0 0
## 72 date 0 0.00 0 0.00 0 0
## 73 location 0 0.00 0 0.00 0 0
## 74 title_bout 9618 93.49 0 0.00 0 0
## 75 weight_class 0 0.00 0 0.00 0 0
## 76 no_of_rounds 0 0.00 0 0.00 0 0
## 77 Corner_color 0 0.00 0 0.00 0 0
## type unique
## 1 character 3
## 2 character 1915
## 3 numeric 8
## 4 numeric 17
## 5 numeric 1
## 6 numeric 1411
## 7 numeric 1169
## 8 numeric 1368
## 9 numeric 1121
## 10 numeric 3048
## 11 numeric 2055
## 12 numeric 1540
## 13 numeric 1234
## 14 numeric 2932
## 15 numeric 1894
## 16 numeric 195
## 17 numeric 1266
## 18 numeric 1124
## 19 numeric 523
## 20 numeric 144
## 21 numeric 3142
## 22 numeric 2229
## 23 numeric 2547
## 24 numeric 309
## 25 numeric 852
## 26 numeric 530
## 27 numeric 2276
## 28 numeric 3376
## 29 numeric 2642
## 30 numeric 17
## 31 numeric 15
## 32 numeric 1331
## 33 numeric 1053
## 34 numeric 1285
## 35 numeric 1033
## 36 numeric 3012
## 37 numeric 1979
## 38 numeric 1313
## 39 numeric 1022
## 40 numeric 2860
## 41 numeric 1788
## 42 numeric 132
## 43 numeric 1134
## 44 numeric 1003
## 45 numeric 438
## 46 numeric 138
## 47 numeric 3085
## 48 numeric 2139
## 49 numeric 2467
## 50 numeric 265
## 51 numeric 727
## 52 numeric 417
## 53 numeric 2249
## 54 numeric 3277
## 55 numeric 2507
## 56 numeric 80
## 57 numeric 5008
## 58 numeric 17
## 59 numeric 3
## 60 numeric 6
## 61 numeric 11
## 62 numeric 12
## 63 numeric 14
## 64 numeric 3
## 65 numeric 24
## 66 character 5
## 67 numeric 23
## 68 numeric 25
## 69 numeric 77
## 70 numeric 31
## 71 character 190
## 72 Date 476
## 73 character 157
## 74 logical 2
## 75 character 14
## 76 numeric 5
## 77 factor 2
##Reemplazamos los NA en "age" con el promedio de las edades (excluyendo para el calculo del promedio aquellos datos faltantes):
DatosconjuntosByR_FINAL$` age`[is.na(DatosconjuntosByR_FINAL$` age`)] <- round(mean(DatosconjuntosByR_FINAL$` age`, na.rm = TRUE))
df_status(DatosconjuntosByR_FINAL) #Probamos que ya no hay NA en las edades
## variable q_zeros p_zeros q_na p_na q_inf p_inf
## 1 Winner 5061 49.19 0 0.00 0 0
## 2 fighter 0 0.00 0 0.00 0 0
## 3 current_lose_streak 6610 64.25 0 0.00 0 0
## 4 current_win_streak 5593 54.36 0 0.00 0 0
## 5 draw 10288 100.00 0 0.00 0 0
## 6 avg_BODY_att 320 3.11 1915 18.61 0 0
## 7 avg_BODY_landed 429 4.17 1915 18.61 0 0
## 8 avg_CLINCH_att 466 4.53 1915 18.61 0 0
## 9 avg_CLINCH_landed 609 5.92 1915 18.61 0 0
## 10 avg_DISTANCE_att 62 0.60 1915 18.61 0 0
## 11 avg_DISTANCE_landed 187 1.82 1915 18.61 0 0
## 12 avg_GROUND_att 701 6.81 1915 18.61 0 0
## 13 avg_GROUND_landed 834 8.11 1915 18.61 0 0
## 14 avg_HEAD_att 28 0.27 1915 18.61 0 0
## 15 avg_HEAD_landed 111 1.08 1915 18.61 0 0
## 16 avg_KD 3654 35.52 1915 18.61 0 0
## 17 avg_LEG_att 592 5.75 1915 18.61 0 0
## 18 avg_LEG_landed 689 6.70 1915 18.61 0 0
## 19 avg_PASS 1677 16.30 1915 18.61 0 0
## 20 avg_REV 4860 47.24 1915 18.61 0 0
## 21 avg_SIG_STR_att 19 0.18 1915 18.61 0 0
## 22 avg_SIG_STR_landed 66 0.64 1915 18.61 0 0
## 23 avg_SIG_STR_pct 66 0.64 1915 18.61 0 0
## 24 avg_SUB_ATT 2827 27.48 1915 18.61 0 0
## 25 avg_TD_att 877 8.52 1915 18.61 0 0
## 26 avg_TD_landed 1549 15.06 1915 18.61 0 0
## 27 avg_TD_pct 1549 15.06 1915 18.61 0 0
## 28 avg_TOTAL_STR_att 14 0.14 1915 18.61 0 0
## 29 avg_TOTAL_STR_landed 32 0.31 1915 18.61 0 0
## 30 longest_win_streak 2878 27.97 0 0.00 0 0
## 31 losses 3492 33.94 0 0.00 0 0
## 32 avg_opp_BODY_att 317 3.08 1915 18.61 0 0
## 33 avg_opp_BODY_landed 445 4.33 1915 18.61 0 0
## 34 avg_opp_CLINCH_att 496 4.82 1915 18.61 0 0
## 35 avg_opp_CLINCH_landed 618 6.01 1915 18.61 0 0
## 36 avg_opp_DISTANCE_att 43 0.42 1915 18.61 0 0
## 37 avg_opp_DISTANCE_landed 179 1.74 1915 18.61 0 0
## 38 avg_opp_GROUND_att 942 9.16 1915 18.61 0 0
## 39 avg_opp_GROUND_landed 1092 10.61 1915 18.61 0 0
## 40 avg_opp_HEAD_att 36 0.35 1915 18.61 0 0
## 41 avg_opp_HEAD_landed 139 1.35 1915 18.61 0 0
## 42 avg_opp_KD 4575 44.47 1915 18.61 0 0
## 43 avg_opp_LEG_att 521 5.06 1915 18.61 0 0
## 44 avg_opp_LEG_landed 617 6.00 1915 18.61 0 0
## 45 avg_opp_PASS 1998 19.42 1915 18.61 0 0
## 46 avg_opp_REV 4917 47.79 1915 18.61 0 0
## 47 avg_opp_SIG_STR_att 17 0.17 1915 18.61 0 0
## 48 avg_opp_SIG_STR_landed 69 0.67 1915 18.61 0 0
## 49 avg_opp_SIG_STR_pct 69 0.67 1915 18.61 0 0
## 50 avg_opp_SUB_ATT 2976 28.93 1915 18.61 0 0
## 51 avg_opp_TD_att 762 7.41 1915 18.61 0 0
## 52 avg_opp_TD_landed 1577 15.33 1915 18.61 0 0
## 53 avg_opp_TD_pct 1577 15.33 1915 18.61 0 0
## 54 avg_opp_TOTAL_STR_att 13 0.13 1915 18.61 0 0
## 55 avg_opp_TOTAL_STR_landed 44 0.43 1915 18.61 0 0
## 56 total_rounds_fought 1915 18.61 0 0.00 0 0
## 57 total_time_fought(seconds) 0 0.00 1915 18.61 0 0
## 58 total_title_bouts 8230 80.00 0 0.00 0 0
## 59 win_by_Decision_Majority 10062 97.80 0 0.00 0 0
## 60 win_by_Decision_Split 8296 80.64 0 0.00 0 0
## 61 win_by_Decision_Unanimous 5525 53.70 0 0.00 0 0
## 62 win_by_KO/TKO 5589 54.33 0 0.00 0 0
## 63 win_by_Submission 6753 65.64 0 0.00 0 0
## 64 win_by_TKO_Doctor_Stoppage 9729 94.57 0 0.00 0 0
## 65 wins 2878 27.97 0 0.00 0 0
## 66 Stance 0 0.00 293 2.85 0 0
## 67 Height_cms 0 0.00 12 0.12 0 0
## 68 Reach_cms 0 0.00 982 9.55 0 0
## 69 Weight_lbs 0 0.00 9 0.09 0 0
## 70 age 0 0.00 0 0.00 0 0
## 71 Referee 0 0.00 46 0.45 0 0
## 72 date 0 0.00 0 0.00 0 0
## 73 location 0 0.00 0 0.00 0 0
## 74 title_bout 9618 93.49 0 0.00 0 0
## 75 weight_class 0 0.00 0 0.00 0 0
## 76 no_of_rounds 0 0.00 0 0.00 0 0
## 77 Corner_color 0 0.00 0 0.00 0 0
## type unique
## 1 character 3
## 2 character 1915
## 3 numeric 8
## 4 numeric 17
## 5 numeric 1
## 6 numeric 1411
## 7 numeric 1169
## 8 numeric 1368
## 9 numeric 1121
## 10 numeric 3048
## 11 numeric 2055
## 12 numeric 1540
## 13 numeric 1234
## 14 numeric 2932
## 15 numeric 1894
## 16 numeric 195
## 17 numeric 1266
## 18 numeric 1124
## 19 numeric 523
## 20 numeric 144
## 21 numeric 3142
## 22 numeric 2229
## 23 numeric 2547
## 24 numeric 309
## 25 numeric 852
## 26 numeric 530
## 27 numeric 2276
## 28 numeric 3376
## 29 numeric 2642
## 30 numeric 17
## 31 numeric 15
## 32 numeric 1331
## 33 numeric 1053
## 34 numeric 1285
## 35 numeric 1033
## 36 numeric 3012
## 37 numeric 1979
## 38 numeric 1313
## 39 numeric 1022
## 40 numeric 2860
## 41 numeric 1788
## 42 numeric 132
## 43 numeric 1134
## 44 numeric 1003
## 45 numeric 438
## 46 numeric 138
## 47 numeric 3085
## 48 numeric 2139
## 49 numeric 2467
## 50 numeric 265
## 51 numeric 727
## 52 numeric 417
## 53 numeric 2249
## 54 numeric 3277
## 55 numeric 2507
## 56 numeric 80
## 57 numeric 5008
## 58 numeric 17
## 59 numeric 3
## 60 numeric 6
## 61 numeric 11
## 62 numeric 12
## 63 numeric 14
## 64 numeric 3
## 65 numeric 24
## 66 character 5
## 67 numeric 23
## 68 numeric 25
## 69 numeric 77
## 70 numeric 31
## 71 character 190
## 72 Date 476
## 73 character 157
## 74 logical 2
## 75 character 14
## 76 numeric 5
## 77 factor 2
##Creamos un data frame con cada categoria de peso para tratar el resto de los datos faltantes completar los datos faltantes con un promedio de los que tenemos por categoria y despues los unimos
levels(DatosconjuntosByR_FINAL$weight_class)
## NULL
Datos_Bantamweight<-subset(DatosconjuntosByR_FINAL,subset = weight_class == "Bantamweight")
Datos_CatchWeight<-subset(DatosconjuntosByR_FINAL,subset = weight_class == "Catch Weight")
Datos_Featherweight<-subset(DatosconjuntosByR_FINAL,subset = weight_class == "Featherweight")
Datos_Flyweight<-subset(DatosconjuntosByR_FINAL,subset = weight_class == "Flyweight")
Datos_Heavyweight<-subset(DatosconjuntosByR_FINAL,subset = weight_class == "Heavyweight")
Datos_LightHeavyweight<-subset(DatosconjuntosByR_FINAL,subset = weight_class == "Light Heavyweight")
Datos_Lightweight<-subset(DatosconjuntosByR_FINAL,subset = weight_class == "Lightweight")
Datos_Middleweight<-subset(DatosconjuntosByR_FINAL,subset = weight_class == "Middleweight")
Datos_OpenWeight<-subset(DatosconjuntosByR_FINAL,subset = weight_class == "Open Weight")
Datos_Welterweight<-subset(DatosconjuntosByR_FINAL,subset = weight_class == "Welterweight")
Datos_WomenBantamweight<-subset(DatosconjuntosByR_FINAL,subset = weight_class == "Women's Bantamweight")
Datos_WomenFeatherweight<-subset(DatosconjuntosByR_FINAL,subset = weight_class == "Women's Featherweight")
Datos_WomenFlyweight<-subset(DatosconjuntosByR_FINAL,subset = weight_class == "Women's Flyweight")
Datos_WomenStrawweight<-subset(DatosconjuntosByR_FINAL,subset = weight_class == "Women's Strawweight")
df_status(Datos_Bantamweight) #Muestro que hay Na
## variable q_zeros p_zeros q_na p_na q_inf p_inf
## 1 Winner 371 48.94 0 0.00 0 0
## 2 fighter 0 0.00 0 0.00 0 0
## 3 current_lose_streak 481 63.46 0 0.00 0 0
## 4 current_win_streak 414 54.62 0 0.00 0 0
## 5 draw 758 100.00 0 0.00 0 0
## 6 avg_BODY_att 8 1.06 137 18.07 0 0
## 7 avg_BODY_landed 14 1.85 137 18.07 0 0
## 8 avg_CLINCH_att 29 3.83 137 18.07 0 0
## 9 avg_CLINCH_landed 53 6.99 137 18.07 0 0
## 10 avg_DISTANCE_att 0 0.00 137 18.07 0 0
## 11 avg_DISTANCE_landed 5 0.66 137 18.07 0 0
## 12 avg_GROUND_att 54 7.12 137 18.07 0 0
## 13 avg_GROUND_landed 66 8.71 137 18.07 0 0
## 14 avg_HEAD_att 0 0.00 137 18.07 0 0
## 15 avg_HEAD_landed 5 0.66 137 18.07 0 0
## 16 avg_KD 296 39.05 137 18.07 0 0
## 17 avg_LEG_att 17 2.24 137 18.07 0 0
## 18 avg_LEG_landed 19 2.51 137 18.07 0 0
## 19 avg_PASS 152 20.05 137 18.07 0 0
## 20 avg_REV 382 50.40 137 18.07 0 0
## 21 avg_SIG_STR_att 0 0.00 137 18.07 0 0
## 22 avg_SIG_STR_landed 1 0.13 137 18.07 0 0
## 23 avg_SIG_STR_pct 1 0.13 137 18.07 0 0
## 24 avg_SUB_ATT 220 29.02 137 18.07 0 0
## 25 avg_TD_att 68 8.97 137 18.07 0 0
## 26 avg_TD_landed 122 16.09 137 18.07 0 0
## 27 avg_TD_pct 122 16.09 137 18.07 0 0
## 28 avg_TOTAL_STR_att 0 0.00 137 18.07 0 0
## 29 avg_TOTAL_STR_landed 1 0.13 137 18.07 0 0
## 30 longest_win_streak 224 29.55 0 0.00 0 0
## 31 losses 270 35.62 0 0.00 0 0
## 32 avg_opp_BODY_att 9 1.19 137 18.07 0 0
## 33 avg_opp_BODY_landed 18 2.37 137 18.07 0 0
## 34 avg_opp_CLINCH_att 27 3.56 137 18.07 0 0
## 35 avg_opp_CLINCH_landed 41 5.41 137 18.07 0 0
## 36 avg_opp_DISTANCE_att 0 0.00 137 18.07 0 0
## 37 avg_opp_DISTANCE_landed 2 0.26 137 18.07 0 0
## 38 avg_opp_GROUND_att 71 9.37 137 18.07 0 0
## 39 avg_opp_GROUND_landed 88 11.61 137 18.07 0 0
## 40 avg_opp_HEAD_att 0 0.00 137 18.07 0 0
## 41 avg_opp_HEAD_landed 2 0.26 137 18.07 0 0
## 42 avg_opp_KD 399 52.64 137 18.07 0 0
## 43 avg_opp_LEG_att 16 2.11 137 18.07 0 0
## 44 avg_opp_LEG_landed 24 3.17 137 18.07 0 0
## 45 avg_opp_PASS 181 23.88 137 18.07 0 0
## 46 avg_opp_REV 395 52.11 137 18.07 0 0
## 47 avg_opp_SIG_STR_att 0 0.00 137 18.07 0 0
## 48 avg_opp_SIG_STR_landed 2 0.26 137 18.07 0 0
## 49 avg_opp_SIG_STR_pct 2 0.26 137 18.07 0 0
## 50 avg_opp_SUB_ATT 273 36.02 137 18.07 0 0
## 51 avg_opp_TD_att 54 7.12 137 18.07 0 0
## 52 avg_opp_TD_landed 126 16.62 137 18.07 0 0
## 53 avg_opp_TD_pct 126 16.62 137 18.07 0 0
## 54 avg_opp_TOTAL_STR_att 0 0.00 137 18.07 0 0
## 55 avg_opp_TOTAL_STR_landed 2 0.26 137 18.07 0 0
## 56 total_rounds_fought 137 18.07 0 0.00 0 0
## 57 total_time_fought(seconds) 0 0.00 137 18.07 0 0
## 58 total_title_bouts 656 86.54 0 0.00 0 0
## 59 win_by_Decision_Majority 753 99.34 0 0.00 0 0
## 60 win_by_Decision_Split 604 79.68 0 0.00 0 0
## 61 win_by_Decision_Unanimous 395 52.11 0 0.00 0 0
## 62 win_by_KO/TKO 476 62.80 0 0.00 0 0
## 63 win_by_Submission 517 68.21 0 0.00 0 0
## 64 win_by_TKO_Doctor_Stoppage 751 99.08 0 0.00 0 0
## 65 wins 224 29.55 0 0.00 0 0
## 66 Stance 0 0.00 36 4.75 0 0
## 67 Height_cms 0 0.00 0 0.00 0 0
## 68 Reach_cms 0 0.00 11 1.45 0 0
## 69 Weight_lbs 0 0.00 0 0.00 0 0
## 70 age 0 0.00 0 0.00 0 0
## 71 Referee 0 0.00 10 1.32 0 0
## 72 date 0 0.00 0 0.00 0 0
## 73 location 0 0.00 0 0.00 0 0
## 74 title_bout 720 94.99 0 0.00 0 0
## 75 weight_class 0 0.00 0 0.00 0 0
## 76 no_of_rounds 0 0.00 0 0.00 0 0
## 77 Corner_color 0 0.00 0 0.00 0 0
## type unique
## 1 character 3
## 2 character 188
## 3 numeric 5
## 4 numeric 8
## 5 numeric 1
## 6 numeric 245
## 7 numeric 229
## 8 numeric 249
## 9 numeric 215
## 10 numeric 438
## 11 numeric 354
## 12 numeric 273
## 13 numeric 249
## 14 numeric 434
## 15 numeric 323
## 16 numeric 59
## 17 numeric 255
## 18 numeric 220
## 19 numeric 130
## 20 numeric 51
## 21 numeric 448
## 22 numeric 368
## 23 numeric 377
## 24 numeric 82
## 25 numeric 177
## 26 numeric 121
## 27 numeric 283
## 28 numeric 473
## 29 numeric 405
## 30 numeric 8
## 31 numeric 9
## 32 numeric 259
## 33 numeric 216
## 34 numeric 222
## 35 numeric 197
## 36 numeric 450
## 37 numeric 350
## 38 numeric 239
## 39 numeric 207
## 40 numeric 432
## 41 numeric 324
## 42 numeric 33
## 43 numeric 229
## 44 numeric 204
## 45 numeric 116
## 46 numeric 46
## 47 numeric 446
## 48 numeric 360
## 49 numeric 368
## 50 numeric 66
## 51 numeric 169
## 52 numeric 113
## 53 numeric 295
## 54 numeric 471
## 55 numeric 402
## 56 numeric 46
## 57 numeric 473
## 58 numeric 7
## 59 numeric 2
## 60 numeric 4
## 61 numeric 7
## 62 numeric 7
## 63 numeric 7
## 64 numeric 2
## 65 numeric 13
## 66 character 3
## 67 numeric 11
## 68 numeric 14
## 69 numeric 4
## 70 numeric 21
## 71 character 94
## 72 Date 224
## 73 character 109
## 74 logical 2
## 75 character 1
## 76 numeric 3
## 77 factor 2
Datos_Bantamweight[sapply(Datos_Bantamweight,is.numeric)]<-lapply((Datos_Bantamweight)[sapply(Datos_Bantamweight,is.numeric)],function(x)ifelse(is.na(x),mean(x,na.rm = T),x))
df_status(Datos_Bantamweight) #Muestro que los Na fueron reemplazados por el promedio de cada columna
## variable q_zeros p_zeros q_na p_na q_inf p_inf
## 1 Winner 371 48.94 0 0.00 0 0
## 2 fighter 0 0.00 0 0.00 0 0
## 3 current_lose_streak 481 63.46 0 0.00 0 0
## 4 current_win_streak 414 54.62 0 0.00 0 0
## 5 draw 758 100.00 0 0.00 0 0
## 6 avg_BODY_att 8 1.06 0 0.00 0 0
## 7 avg_BODY_landed 14 1.85 0 0.00 0 0
## 8 avg_CLINCH_att 29 3.83 0 0.00 0 0
## 9 avg_CLINCH_landed 53 6.99 0 0.00 0 0
## 10 avg_DISTANCE_att 0 0.00 0 0.00 0 0
## 11 avg_DISTANCE_landed 5 0.66 0 0.00 0 0
## 12 avg_GROUND_att 54 7.12 0 0.00 0 0
## 13 avg_GROUND_landed 66 8.71 0 0.00 0 0
## 14 avg_HEAD_att 0 0.00 0 0.00 0 0
## 15 avg_HEAD_landed 5 0.66 0 0.00 0 0
## 16 avg_KD 296 39.05 0 0.00 0 0
## 17 avg_LEG_att 17 2.24 0 0.00 0 0
## 18 avg_LEG_landed 19 2.51 0 0.00 0 0
## 19 avg_PASS 152 20.05 0 0.00 0 0
## 20 avg_REV 382 50.40 0 0.00 0 0
## 21 avg_SIG_STR_att 0 0.00 0 0.00 0 0
## 22 avg_SIG_STR_landed 1 0.13 0 0.00 0 0
## 23 avg_SIG_STR_pct 1 0.13 0 0.00 0 0
## 24 avg_SUB_ATT 220 29.02 0 0.00 0 0
## 25 avg_TD_att 68 8.97 0 0.00 0 0
## 26 avg_TD_landed 122 16.09 0 0.00 0 0
## 27 avg_TD_pct 122 16.09 0 0.00 0 0
## 28 avg_TOTAL_STR_att 0 0.00 0 0.00 0 0
## 29 avg_TOTAL_STR_landed 1 0.13 0 0.00 0 0
## 30 longest_win_streak 224 29.55 0 0.00 0 0
## 31 losses 270 35.62 0 0.00 0 0
## 32 avg_opp_BODY_att 9 1.19 0 0.00 0 0
## 33 avg_opp_BODY_landed 18 2.37 0 0.00 0 0
## 34 avg_opp_CLINCH_att 27 3.56 0 0.00 0 0
## 35 avg_opp_CLINCH_landed 41 5.41 0 0.00 0 0
## 36 avg_opp_DISTANCE_att 0 0.00 0 0.00 0 0
## 37 avg_opp_DISTANCE_landed 2 0.26 0 0.00 0 0
## 38 avg_opp_GROUND_att 71 9.37 0 0.00 0 0
## 39 avg_opp_GROUND_landed 88 11.61 0 0.00 0 0
## 40 avg_opp_HEAD_att 0 0.00 0 0.00 0 0
## 41 avg_opp_HEAD_landed 2 0.26 0 0.00 0 0
## 42 avg_opp_KD 399 52.64 0 0.00 0 0
## 43 avg_opp_LEG_att 16 2.11 0 0.00 0 0
## 44 avg_opp_LEG_landed 24 3.17 0 0.00 0 0
## 45 avg_opp_PASS 181 23.88 0 0.00 0 0
## 46 avg_opp_REV 395 52.11 0 0.00 0 0
## 47 avg_opp_SIG_STR_att 0 0.00 0 0.00 0 0
## 48 avg_opp_SIG_STR_landed 2 0.26 0 0.00 0 0
## 49 avg_opp_SIG_STR_pct 2 0.26 0 0.00 0 0
## 50 avg_opp_SUB_ATT 273 36.02 0 0.00 0 0
## 51 avg_opp_TD_att 54 7.12 0 0.00 0 0
## 52 avg_opp_TD_landed 126 16.62 0 0.00 0 0
## 53 avg_opp_TD_pct 126 16.62 0 0.00 0 0
## 54 avg_opp_TOTAL_STR_att 0 0.00 0 0.00 0 0
## 55 avg_opp_TOTAL_STR_landed 2 0.26 0 0.00 0 0
## 56 total_rounds_fought 137 18.07 0 0.00 0 0
## 57 total_time_fought(seconds) 0 0.00 0 0.00 0 0
## 58 total_title_bouts 656 86.54 0 0.00 0 0
## 59 win_by_Decision_Majority 753 99.34 0 0.00 0 0
## 60 win_by_Decision_Split 604 79.68 0 0.00 0 0
## 61 win_by_Decision_Unanimous 395 52.11 0 0.00 0 0
## 62 win_by_KO/TKO 476 62.80 0 0.00 0 0
## 63 win_by_Submission 517 68.21 0 0.00 0 0
## 64 win_by_TKO_Doctor_Stoppage 751 99.08 0 0.00 0 0
## 65 wins 224 29.55 0 0.00 0 0
## 66 Stance 0 0.00 36 4.75 0 0
## 67 Height_cms 0 0.00 0 0.00 0 0
## 68 Reach_cms 0 0.00 0 0.00 0 0
## 69 Weight_lbs 0 0.00 0 0.00 0 0
## 70 age 0 0.00 0 0.00 0 0
## 71 Referee 0 0.00 10 1.32 0 0
## 72 date 0 0.00 0 0.00 0 0
## 73 location 0 0.00 0 0.00 0 0
## 74 title_bout 720 94.99 0 0.00 0 0
## 75 weight_class 0 0.00 0 0.00 0 0
## 76 no_of_rounds 0 0.00 0 0.00 0 0
## 77 Corner_color 0 0.00 0 0.00 0 0
## type unique
## 1 character 3
## 2 character 188
## 3 numeric 5
## 4 numeric 8
## 5 numeric 1
## 6 numeric 246
## 7 numeric 230
## 8 numeric 250
## 9 numeric 216
## 10 numeric 439
## 11 numeric 355
## 12 numeric 274
## 13 numeric 250
## 14 numeric 435
## 15 numeric 324
## 16 numeric 60
## 17 numeric 256
## 18 numeric 221
## 19 numeric 131
## 20 numeric 52
## 21 numeric 449
## 22 numeric 369
## 23 numeric 378
## 24 numeric 83
## 25 numeric 178
## 26 numeric 122
## 27 numeric 284
## 28 numeric 474
## 29 numeric 406
## 30 numeric 8
## 31 numeric 9
## 32 numeric 260
## 33 numeric 217
## 34 numeric 223
## 35 numeric 198
## 36 numeric 451
## 37 numeric 351
## 38 numeric 240
## 39 numeric 208
## 40 numeric 433
## 41 numeric 325
## 42 numeric 34
## 43 numeric 230
## 44 numeric 205
## 45 numeric 117
## 46 numeric 47
## 47 numeric 447
## 48 numeric 361
## 49 numeric 369
## 50 numeric 67
## 51 numeric 170
## 52 numeric 114
## 53 numeric 296
## 54 numeric 472
## 55 numeric 403
## 56 numeric 46
## 57 numeric 474
## 58 numeric 7
## 59 numeric 2
## 60 numeric 4
## 61 numeric 7
## 62 numeric 7
## 63 numeric 7
## 64 numeric 2
## 65 numeric 13
## 66 character 3
## 67 numeric 11
## 68 numeric 15
## 69 numeric 4
## 70 numeric 21
## 71 character 94
## 72 Date 224
## 73 character 109
## 74 logical 2
## 75 character 1
## 76 numeric 3
## 77 factor 2
Datos_CatchWeight[sapply(Datos_CatchWeight,is.numeric)]<-lapply((Datos_CatchWeight)[sapply(Datos_CatchWeight,is.numeric)],function(x)ifelse(is.na(x),mean(x,na.rm = T),x))
Datos_Featherweight[sapply(Datos_Featherweight,is.numeric)]<-lapply((Datos_Featherweight)[sapply(Datos_Featherweight,is.numeric)],function(x)ifelse(is.na(x),mean(x,na.rm = T),x))
Datos_Flyweight[sapply(Datos_Flyweight,is.numeric)]<-lapply((Datos_Flyweight)[sapply(Datos_Flyweight,is.numeric)],function(x)ifelse(is.na(x),mean(x,na.rm = T),x))
Datos_Heavyweight[sapply(Datos_Heavyweight,is.numeric)]<-lapply((Datos_Heavyweight)[sapply(Datos_Heavyweight,is.numeric)],function(x)ifelse(is.na(x),mean(x,na.rm = T),x))
Datos_LightHeavyweight[sapply(Datos_LightHeavyweight,is.numeric)]<-lapply((Datos_LightHeavyweight)[sapply(Datos_LightHeavyweight,is.numeric)],function(x)ifelse(is.na(x),mean(x,na.rm = T),x))
Datos_Lightweight[sapply(Datos_Lightweight,is.numeric)]<-lapply((Datos_Lightweight)[sapply(Datos_Lightweight,is.numeric)],function(x)ifelse(is.na(x),mean(x,na.rm = T),x))
Datos_Middleweight[sapply(Datos_Middleweight,is.numeric)]<-lapply((Datos_Middleweight)[sapply(Datos_Middleweight,is.numeric)],function(x)ifelse(is.na(x),mean(x,na.rm = T),x))
Datos_OpenWeight[sapply(Datos_OpenWeight,is.numeric)]<-lapply((Datos_OpenWeight)[sapply(Datos_OpenWeight,is.numeric)],function(x)ifelse(is.na(x),mean(x,na.rm = T),x))
Datos_Welterweight[sapply(Datos_Welterweight,is.numeric)]<-lapply((Datos_Welterweight)[sapply(Datos_Welterweight,is.numeric)],function(x)ifelse(is.na(x),mean(x,na.rm = T),x))
Datos_WomenBantamweight[sapply(Datos_WomenBantamweight,is.numeric)]<-lapply((Datos_WomenBantamweight)[sapply(Datos_WomenBantamweight,is.numeric)],function(x)ifelse(is.na(x),mean(x,na.rm = T),x))
Datos_WomenFeatherweight[sapply(Datos_WomenFeatherweight,is.numeric)]<-lapply((Datos_WomenFeatherweight)[sapply(Datos_WomenFeatherweight,is.numeric)],function(x)ifelse(is.na(x),mean(x,na.rm = T),x))
Datos_WomenFlyweight[sapply(Datos_WomenFlyweight,is.numeric)]<-lapply((Datos_WomenFlyweight)[sapply(Datos_WomenFlyweight,is.numeric)],function(x)ifelse(is.na(x),mean(x,na.rm = T),x))
Datos_WomenStrawweight[sapply(Datos_WomenStrawweight,is.numeric)]<-lapply((Datos_WomenStrawweight)[sapply(Datos_WomenStrawweight,is.numeric)],function(x)ifelse(is.na(x),mean(x,na.rm = T),x))
##Vuelvo a unir los dataframes sin Na
Datosconjuntos_sinNA<-rbind(Datos_Bantamweight,Datos_CatchWeight,Datos_Featherweight,Datos_Flyweight,Datos_Heavyweight,Datos_LightHeavyweight,Datos_Lightweight,Datos_OpenWeight,Datos_Middleweight,Datos_Welterweight,Datos_WomenBantamweight,Datos_WomenFeatherweight,Datos_WomenFlyweight,Datos_WomenStrawweight)
df_status(Datosconjuntos_sinNA)
## variable q_zeros p_zeros q_na p_na q_inf p_inf
## 1 Winner 5061 49.19 0 0.00 0 0
## 2 fighter 0 0.00 0 0.00 0 0
## 3 current_lose_streak 6610 64.25 0 0.00 0 0
## 4 current_win_streak 5593 54.36 0 0.00 0 0
## 5 draw 10288 100.00 0 0.00 0 0
## 6 avg_BODY_att 320 3.11 0 0.00 0 0
## 7 avg_BODY_landed 429 4.17 0 0.00 0 0
## 8 avg_CLINCH_att 466 4.53 0 0.00 0 0
## 9 avg_CLINCH_landed 609 5.92 0 0.00 0 0
## 10 avg_DISTANCE_att 62 0.60 0 0.00 0 0
## 11 avg_DISTANCE_landed 187 1.82 0 0.00 0 0
## 12 avg_GROUND_att 701 6.81 0 0.00 0 0
## 13 avg_GROUND_landed 834 8.11 0 0.00 0 0
## 14 avg_HEAD_att 28 0.27 0 0.00 0 0
## 15 avg_HEAD_landed 111 1.08 0 0.00 0 0
## 16 avg_KD 3654 35.52 0 0.00 0 0
## 17 avg_LEG_att 592 5.75 0 0.00 0 0
## 18 avg_LEG_landed 689 6.70 0 0.00 0 0
## 19 avg_PASS 1677 16.30 0 0.00 0 0
## 20 avg_REV 4860 47.24 0 0.00 0 0
## 21 avg_SIG_STR_att 19 0.18 0 0.00 0 0
## 22 avg_SIG_STR_landed 66 0.64 0 0.00 0 0
## 23 avg_SIG_STR_pct 66 0.64 0 0.00 0 0
## 24 avg_SUB_ATT 2827 27.48 0 0.00 0 0
## 25 avg_TD_att 877 8.52 0 0.00 0 0
## 26 avg_TD_landed 1549 15.06 0 0.00 0 0
## 27 avg_TD_pct 1549 15.06 0 0.00 0 0
## 28 avg_TOTAL_STR_att 14 0.14 0 0.00 0 0
## 29 avg_TOTAL_STR_landed 32 0.31 0 0.00 0 0
## 30 longest_win_streak 2878 27.97 0 0.00 0 0
## 31 losses 3492 33.94 0 0.00 0 0
## 32 avg_opp_BODY_att 317 3.08 0 0.00 0 0
## 33 avg_opp_BODY_landed 445 4.33 0 0.00 0 0
## 34 avg_opp_CLINCH_att 496 4.82 0 0.00 0 0
## 35 avg_opp_CLINCH_landed 618 6.01 0 0.00 0 0
## 36 avg_opp_DISTANCE_att 43 0.42 0 0.00 0 0
## 37 avg_opp_DISTANCE_landed 179 1.74 0 0.00 0 0
## 38 avg_opp_GROUND_att 942 9.16 0 0.00 0 0
## 39 avg_opp_GROUND_landed 1092 10.61 0 0.00 0 0
## 40 avg_opp_HEAD_att 36 0.35 0 0.00 0 0
## 41 avg_opp_HEAD_landed 139 1.35 0 0.00 0 0
## 42 avg_opp_KD 4583 44.55 0 0.00 0 0
## 43 avg_opp_LEG_att 521 5.06 0 0.00 0 0
## 44 avg_opp_LEG_landed 617 6.00 0 0.00 0 0
## 45 avg_opp_PASS 1998 19.42 0 0.00 0 0
## 46 avg_opp_REV 4917 47.79 0 0.00 0 0
## 47 avg_opp_SIG_STR_att 17 0.17 0 0.00 0 0
## 48 avg_opp_SIG_STR_landed 69 0.67 0 0.00 0 0
## 49 avg_opp_SIG_STR_pct 69 0.67 0 0.00 0 0
## 50 avg_opp_SUB_ATT 2976 28.93 0 0.00 0 0
## 51 avg_opp_TD_att 762 7.41 0 0.00 0 0
## 52 avg_opp_TD_landed 1577 15.33 0 0.00 0 0
## 53 avg_opp_TD_pct 1577 15.33 0 0.00 0 0
## 54 avg_opp_TOTAL_STR_att 13 0.13 0 0.00 0 0
## 55 avg_opp_TOTAL_STR_landed 44 0.43 0 0.00 0 0
## 56 total_rounds_fought 1915 18.61 0 0.00 0 0
## 57 total_time_fought(seconds) 0 0.00 0 0.00 0 0
## 58 total_title_bouts 8230 80.00 0 0.00 0 0
## 59 win_by_Decision_Majority 10062 97.80 0 0.00 0 0
## 60 win_by_Decision_Split 8296 80.64 0 0.00 0 0
## 61 win_by_Decision_Unanimous 5525 53.70 0 0.00 0 0
## 62 win_by_KO/TKO 5589 54.33 0 0.00 0 0
## 63 win_by_Submission 6753 65.64 0 0.00 0 0
## 64 win_by_TKO_Doctor_Stoppage 9729 94.57 0 0.00 0 0
## 65 wins 2878 27.97 0 0.00 0 0
## 66 Stance 0 0.00 293 2.85 0 0
## 67 Height_cms 0 0.00 0 0.00 0 0
## 68 Reach_cms 0 0.00 0 0.00 0 0
## 69 Weight_lbs 0 0.00 0 0.00 0 0
## 70 age 0 0.00 0 0.00 0 0
## 71 Referee 0 0.00 46 0.45 0 0
## 72 date 0 0.00 0 0.00 0 0
## 73 location 0 0.00 0 0.00 0 0
## 74 title_bout 9618 93.49 0 0.00 0 0
## 75 weight_class 0 0.00 0 0.00 0 0
## 76 no_of_rounds 0 0.00 0 0.00 0 0
## 77 Corner_color 0 0.00 0 0.00 0 0
## type unique
## 1 character 3
## 2 character 1915
## 3 numeric 8
## 4 numeric 17
## 5 numeric 1
## 6 numeric 1425
## 7 numeric 1183
## 8 numeric 1382
## 9 numeric 1135
## 10 numeric 3062
## 11 numeric 2069
## 12 numeric 1554
## 13 numeric 1248
## 14 numeric 2946
## 15 numeric 1908
## 16 numeric 209
## 17 numeric 1280
## 18 numeric 1138
## 19 numeric 537
## 20 numeric 158
## 21 numeric 3156
## 22 numeric 2243
## 23 numeric 2561
## 24 numeric 323
## 25 numeric 866
## 26 numeric 544
## 27 numeric 2290
## 28 numeric 3390
## 29 numeric 2656
## 30 numeric 17
## 31 numeric 15
## 32 numeric 1345
## 33 numeric 1067
## 34 numeric 1299
## 35 numeric 1047
## 36 numeric 3026
## 37 numeric 1993
## 38 numeric 1327
## 39 numeric 1036
## 40 numeric 2874
## 41 numeric 1802
## 42 numeric 145
## 43 numeric 1148
## 44 numeric 1017
## 45 numeric 452
## 46 numeric 152
## 47 numeric 3099
## 48 numeric 2153
## 49 numeric 2481
## 50 numeric 279
## 51 numeric 741
## 52 numeric 431
## 53 numeric 2263
## 54 numeric 3291
## 55 numeric 2521
## 56 numeric 80
## 57 numeric 5022
## 58 numeric 17
## 59 numeric 3
## 60 numeric 6
## 61 numeric 11
## 62 numeric 12
## 63 numeric 14
## 64 numeric 3
## 65 numeric 24
## 66 character 5
## 67 numeric 28
## 68 numeric 36
## 69 numeric 82
## 70 numeric 31
## 71 character 190
## 72 Date 476
## 73 character 157
## 74 logical 2
## 75 character 14
## 76 numeric 5
## 77 factor 2
Usamos la función summary para obtener: mínimo, máximo, cuartiles, mediana y promedio de cada variable numérica. Usamos la función profiling_num (del paquete funModeling)para obtener: promedio, sd, coeficiente de variación, entre otras.
summary(Datosconjuntos_sinNA)
## Winner fighter current_lose_streak
## Length:10288 Length:10288 Min. :0.0000
## Class :character Class :character 1st Qu.:0.0000
## Mode :character Mode :character Median :0.0000
## Mean :0.4904
## 3rd Qu.:1.0000
## Max. :7.0000
## current_win_streak draw avg_BODY_att avg_BODY_landed
## Min. : 0.0000 Min. :0 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.0000 1st Qu.:0 1st Qu.: 4.333 1st Qu.: 3.000
## Median : 0.0000 Median :0 Median : 7.500 Median : 5.329
## Mean : 0.9188 Mean :0 Mean : 8.710 Mean : 6.083
## 3rd Qu.: 1.0000 3rd Qu.:0 3rd Qu.:11.228 3rd Qu.: 8.000
## Max. :16.0000 Max. :0 Max. :51.000 Max. :39.000
## avg_CLINCH_att avg_CLINCH_landed avg_DISTANCE_att
## Min. : 0.000 Min. : 0.000 Min. : 0.00
## 1st Qu.: 4.000 1st Qu.: 2.384 1st Qu.: 27.14
## Median : 7.436 Median : 4.988 Median : 47.87
## Mean : 8.179 Mean : 5.523 Mean : 53.10
## 3rd Qu.:10.200 3rd Qu.: 6.942 3rd Qu.: 70.86
## Max. :87.000 Max. :68.000 Max. :287.50
## avg_DISTANCE_landed avg_GROUND_att avg_GROUND_landed avg_HEAD_att
## Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.00
## 1st Qu.: 9.50 1st Qu.: 3.932 1st Qu.: 2.500 1st Qu.: 35.00
## Median : 16.50 Median : 8.500 Median : 5.607 Median : 51.83
## Mean : 19.35 Mean : 9.152 Mean : 6.050 Mean : 55.57
## 3rd Qu.: 25.97 3rd Qu.:11.000 3rd Qu.: 7.305 3rd Qu.: 70.00
## Max. :131.00 Max. :96.000 Max. :62.000 Max. :277.00
## avg_HEAD_landed avg_KD avg_LEG_att avg_LEG_landed
## Min. : 0.00 Min. :0.0000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 13.00 1st Qu.:0.0000 1st Qu.: 2.372 1st Qu.: 2.000
## Median : 19.13 Median :0.2085 Median : 5.490 Median : 4.423
## Mean : 19.98 Mean :0.2514 Mean : 6.152 Mean : 4.864
## 3rd Qu.: 24.53 3rd Qu.:0.3333 3rd Qu.: 7.927 3rd Qu.: 6.256
## Max. :137.00 Max. :5.0000 Max. :63.000 Max. :47.000
## avg_PASS avg_REV avg_SIG_STR_att avg_SIG_STR_landed
## Min. : 0.0000 Min. :0.00000 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.4545 1st Qu.:0.00000 1st Qu.: 45.33 1st Qu.: 20.40
## Median : 1.1429 Median :0.07476 Median : 66.33 Median : 29.46
## Mean : 1.3447 Mean :0.16017 Mean : 70.43 Mean : 30.93
## 3rd Qu.: 1.6667 3rd Qu.:0.18954 3rd Qu.: 88.60 3rd Qu.: 38.59
## Max. :15.0000 Max. :3.00000 Max. :299.00 Max. :154.00
## avg_SIG_STR_pct avg_SUB_ATT avg_TD_att avg_TD_landed
## Min. :0.0000 Min. :0.0000 Min. : 0.000 Min. : 0.000
## 1st Qu.:0.4075 1st Qu.:0.0000 1st Qu.: 1.000 1st Qu.: 0.500
## Median :0.4633 Median :0.5000 Median : 2.553 Median : 1.029
## Mean :0.4609 Mean :0.5439 Mean : 2.901 Mean : 1.242
## 3rd Qu.:0.5100 3rd Qu.:0.6923 3rd Qu.: 3.737 3rd Qu.: 1.600
## Max. :1.0000 Max. :9.0000 Max. :30.000 Max. :11.000
## avg_TD_pct avg_TOTAL_STR_att avg_TOTAL_STR_landed
## Min. :0.0000 Min. : 0.00 Min. : 0.00
## 1st Qu.:0.1850 1st Qu.: 65.07 1st Qu.: 35.89
## Median :0.3144 Median : 91.50 Median : 49.89
## Mean :0.3255 Mean : 92.87 Mean : 50.95
## 3rd Qu.:0.4325 3rd Qu.:115.57 3rd Qu.: 62.17
## Max. :1.0000 Max. :360.00 Max. :230.00
## longest_win_streak losses avg_opp_BODY_att
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 4.000
## Median : 1.000 Median : 1.000 Median : 7.286
## Mean : 1.921 Mean : 1.708 Mean : 8.180
## 3rd Qu.: 3.000 3rd Qu.: 3.000 3rd Qu.:10.750
## Max. :16.000 Max. :14.000 Max. :75.000
## avg_opp_BODY_landed avg_opp_CLINCH_att avg_opp_CLINCH_landed
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 3.000 1st Qu.: 3.500 1st Qu.: 2.000
## Median : 5.000 Median : 6.826 Median : 4.412
## Mean : 5.538 Mean : 7.374 Mean : 4.851
## 3rd Qu.: 7.000 3rd Qu.: 9.000 3rd Qu.: 6.000
## Max. :48.000 Max. :105.000 Max. :84.000
## avg_opp_DISTANCE_att avg_opp_DISTANCE_landed avg_opp_GROUND_att
## Min. : 0.00 Min. : 0.000 Min. : 0.000
## 1st Qu.: 26.89 1st Qu.: 8.667 1st Qu.: 2.250
## Median : 47.00 Median : 15.553 Median : 6.000
## Mean : 51.74 Mean : 17.952 Mean : 6.814
## 3rd Qu.: 69.00 3rd Qu.: 24.000 3rd Qu.: 8.153
## Max. :440.00 Max. :150.000 Max. :104.000
## avg_opp_GROUND_landed avg_opp_HEAD_att avg_opp_HEAD_landed
## Min. : 0.000 Min. : 0.00 Min. : 0.00
## 1st Qu.: 1.500 1st Qu.: 31.00 1st Qu.: 10.00
## Median : 3.923 Median : 47.84 Median : 15.39
## Mean : 4.426 Mean : 51.78 Mean : 17.02
## 3rd Qu.: 5.286 3rd Qu.: 66.21 3rd Qu.: 21.32
## Max. :84.000 Max. :400.00 Max. :132.00
## avg_opp_KD avg_opp_LEG_att avg_opp_LEG_landed avg_opp_PASS
## Min. :0.0000 Min. : 0.000 Min. : 0.000 Min. : 0.0000
## 1st Qu.:0.0000 1st Qu.: 2.900 1st Qu.: 2.082 1st Qu.: 0.2500
## Median :0.1000 Median : 5.410 Median : 4.200 Median : 0.8333
## Mean :0.1562 Mean : 5.971 Mean : 4.666 Mean : 1.0435
## 3rd Qu.:0.1984 3rd Qu.: 7.548 3rd Qu.: 6.000 3rd Qu.: 1.2194
## Max. :3.0000 Max. :63.000 Max. :50.000 Max. :19.0000
## avg_opp_REV avg_opp_SIG_STR_att avg_opp_SIG_STR_landed
## Min. :0.00000 Min. : 0.00 Min. : 0.00
## 1st Qu.:0.00000 1st Qu.: 40.55 1st Qu.: 17.04
## Median :0.07692 Median : 61.93 Median : 25.86
## Mean :0.15614 Mean : 65.93 Mean : 27.23
## 3rd Qu.:0.20000 3rd Qu.: 83.36 3rd Qu.: 34.00
## Max. :3.00000 Max. :454.00 Max. :202.00
## avg_opp_SIG_STR_pct avg_opp_SUB_ATT avg_opp_TD_att avg_opp_TD_landed
## Min. :0.0000 Min. :0.0000 Min. : 0.000 Min. : 0.000
## 1st Qu.:0.3656 1st Qu.:0.0000 1st Qu.: 1.333 1st Qu.: 0.400
## Median :0.4190 Median :0.3333 Median : 2.644 Median : 0.988
## Mean :0.4199 Mean :0.4533 Mean : 2.844 Mean : 1.048
## 3rd Qu.:0.4615 3rd Qu.:0.5655 3rd Qu.: 3.600 3rd Qu.: 1.302
## Max. :1.0000 Max. :8.0000 Max. :22.000 Max. :11.500
## avg_opp_TD_pct avg_opp_TOTAL_STR_att avg_opp_TOTAL_STR_landed
## Min. :0.0000 Min. : 0.00 Min. : 0.00
## 1st Qu.:0.1250 1st Qu.: 59.67 1st Qu.: 32.00
## Median :0.2521 Median : 84.36 Median : 44.00
## Mean :0.2660 Mean : 86.24 Mean : 45.41
## 3rd Qu.:0.3333 3rd Qu.:106.84 3rd Qu.: 55.00
## Max. :1.0000 Max. :461.00 Max. :232.00
## total_rounds_fought total_time_fought(seconds) total_title_bouts
## Min. : 0.00 Min. : 7.0 Min. : 0.0000
## 1st Qu.: 2.00 1st Qu.: 482.2 1st Qu.: 0.0000
## Median : 6.00 Median : 610.8 Median : 0.0000
## Mean :10.89 Mean : 595.8 Mean : 0.4385
## 3rd Qu.:16.00 3rd Qu.: 731.7 3rd Qu.: 0.0000
## Max. :80.00 Max. :1500.0 Max. :16.0000
## win_by_Decision_Majority win_by_Decision_Split
## Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000
## Mean :0.02226 Mean :0.2466
## 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :2.00000 Max. :5.0000
## win_by_Decision_Unanimous win_by_KO/TKO win_by_Submission
## Min. : 0.0000 Min. : 0.000 Min. : 0.0000
## 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 0.0000
## Median : 0.0000 Median : 0.000 Median : 0.0000
## Mean : 0.9787 Mean : 1.064 Mean : 0.6646
## 3rd Qu.: 1.0000 3rd Qu.: 1.000 3rd Qu.: 1.0000
## Max. :10.0000 Max. :11.000 Max. :13.0000
## win_by_TKO_Doctor_Stoppage wins Stance
## Min. :0.00000 Min. : 0.000 Length:10288
## 1st Qu.:0.00000 1st Qu.: 0.000 Class :character
## Median :0.00000 Median : 2.000 Mode :character
## Mean :0.05832 Mean : 3.041
## 3rd Qu.:0.00000 3rd Qu.: 5.000
## Max. :2.00000 Max. :23.000
## Height_cms Reach_cms Weight_lbs age
## Min. :152.4 Min. :152.4 Min. :115.0 Min. :18.0
## 1st Qu.:172.7 1st Qu.:177.8 1st Qu.:145.0 1st Qu.:26.0
## Median :180.3 Median :185.4 Median :170.0 Median :29.0
## Mean :179.3 Mean :183.9 Mean :172.1 Mean :29.3
## 3rd Qu.:185.4 3rd Qu.:190.5 3rd Qu.:185.0 3rd Qu.:32.0
## Max. :210.8 Max. :213.4 Max. :770.0 Max. :51.0
## Referee date location
## Length:10288 Min. :1993-11-12 Length:10288
## Class :character 1st Qu.:2010-02-20 Class :character
## Mode :character Median :2014-02-08 Mode :character
## Mean :2012-10-16
## 3rd Qu.:2016-10-01
## Max. :2019-06-08
## title_bout weight_class no_of_rounds Corner_color
## Mode :logical Length:10288 Min. :1.000 Blue:5144
## FALSE:9618 Class :character 1st Qu.:3.000 Red :5144
## TRUE :670 Mode :character Median :3.000
## Mean :3.119
## 3rd Qu.:3.000
## Max. :5.000
#Retrieves several statistics for numerical variables:
profiling_num(Datosconjuntos_sinNA)
## variable mean std_dev variation_coef
## 1 current_lose_streak 0.49037714 0.7771556 1.58481198
## 2 current_win_streak 0.91883748 1.4515013 1.57971500
## 3 draw 0.00000000 0.0000000 NaN
## 4 avg_BODY_att 8.71000871 6.4046646 0.73532241
## 5 avg_BODY_landed 6.08341667 4.5144361 0.74208893
## 6 avg_CLINCH_att 8.17874960 6.6732408 0.81592433
## 7 avg_CLINCH_landed 5.52342173 4.7900979 0.86723378
## 8 avg_DISTANCE_att 53.10026547 37.2384687 0.70128592
## 9 avg_DISTANCE_landed 19.35383190 14.2509535 0.73633757
## 10 avg_GROUND_att 9.15216618 8.0507113 0.87965091
## 11 avg_GROUND_landed 6.05046477 5.3004000 0.87603187
## 12 avg_HEAD_att 55.56920963 32.3309559 0.58181421
## 13 avg_HEAD_landed 19.97994586 11.4664927 0.57390009
## 14 avg_KD 0.25144295 0.3033193 1.20631460
## 15 avg_LEG_att 6.15196292 5.3448538 0.86880462
## 16 avg_LEG_landed 4.86435587 4.1903226 0.86143421
## 17 avg_PASS 1.34467487 1.3123759 0.97598005
## 18 avg_REV 0.16017024 0.2723779 1.70055267
## 19 avg_SIG_STR_att 70.43118125 39.5327638 0.56129633
## 20 avg_SIG_STR_landed 30.92771840 16.6560468 0.53854754
## 21 avg_SIG_STR_pct 0.46091484 0.1127036 0.24452152
## 22 avg_SUB_ATT 0.54386291 0.6168013 1.13411181
## 23 avg_TD_att 2.90139392 2.4959360 0.86025409
## 24 avg_TD_landed 1.24246185 1.1296878 0.90923339
## 25 avg_TD_pct 0.32554212 0.2252240 0.69184299
## 26 avg_TOTAL_STR_att 92.86906226 45.2356462 0.48709059
## 27 avg_TOTAL_STR_landed 50.94946637 25.3916029 0.49836838
## 28 longest_win_streak 1.92126750 1.9572128 1.01870918
## 29 losses 1.70752333 2.0110257 1.17774423
## 30 avg_opp_BODY_att 8.18017855 5.9158815 0.72319712
## 31 avg_opp_BODY_landed 5.53808112 4.1120332 0.74250144
## 32 avg_opp_CLINCH_att 7.37432368 6.1729892 0.83709225
## 33 avg_opp_CLINCH_landed 4.85050429 4.3662859 0.90017154
## 34 avg_opp_DISTANCE_att 51.74311705 36.7550988 0.71033793
## 35 avg_opp_DISTANCE_landed 17.95170040 13.5772189 0.75631938
## 36 avg_opp_GROUND_att 6.81378753 7.2249697 1.06034562
## 37 avg_opp_GROUND_landed 4.42650154 4.7803643 1.07994186
## 38 avg_opp_HEAD_att 51.77965982 32.1631307 0.62115377
## 39 avg_opp_HEAD_landed 17.02483678 11.1646655 0.65578693
## 40 avg_opp_KD 0.15618137 0.2428551 1.55495593
## 41 avg_opp_LEG_att 5.97138989 4.8792442 0.81710360
## 42 avg_opp_LEG_landed 4.66578834 3.8505043 0.82526338
## 43 avg_opp_PASS 1.04346838 1.2375314 1.18597879
## 44 avg_opp_REV 0.15613518 0.2734175 1.75115907
## 45 avg_opp_SIG_STR_att 65.93122826 39.1817802 0.59428258
## 46 avg_opp_SIG_STR_landed 27.22870624 16.0776803 0.59046802
## 47 avg_opp_SIG_STR_pct 0.41986554 0.1092344 0.26016510
## 48 avg_opp_SUB_ATT 0.45328706 0.5673856 1.25171372
## 49 avg_opp_TD_att 2.84350725 2.2062838 0.77590230
## 50 avg_opp_TD_landed 1.04833169 0.9971489 0.95117696
## 51 avg_opp_TD_pct 0.26599152 0.2126329 0.79939720
## 52 avg_opp_TOTAL_STR_att 86.23614446 43.6710762 0.50641267
## 53 avg_opp_TOTAL_STR_landed 45.40801157 23.5944282 0.51960937
## 54 total_rounds_fought 10.88715008 12.5188066 1.14986994
## 55 total_time_fought(seconds) 595.83867659 212.4760622 0.35659998
## 56 total_title_bouts 0.43847201 1.3752189 3.13638931
## 57 win_by_Decision_Majority 0.02225894 0.1494953 6.71619259
## 58 win_by_Decision_Split 0.24659798 0.5654741 2.29310110
## 59 win_by_Decision_Unanimous 0.97871306 1.4712942 1.50329478
## 60 win_by_KO/TKO 1.06376361 1.6891217 1.58787320
## 61 win_by_Submission 0.66456065 1.2359210 1.85975644
## 62 win_by_TKO_Doctor_Stoppage 0.05832037 0.2507901 4.30021416
## 63 wins 3.04131026 3.5161509 1.15613028
## 64 Height_cms 179.25993313 8.5742035 0.04783112
## 65 Reach_cms 183.90453530 9.9969691 0.05435956
## 66 Weight_lbs 172.11695164 36.0216150 0.20928569
## 67 age 29.30151633 4.0655260 0.13874797
## 68 title_bout 0.06512442 0.2467573 3.78901298
## 69 no_of_rounds 3.11936236 0.6314267 0.20242171
## p_01 p_05 p_25 p_50 p_75 p_95
## 1 0.000000 0.0000000 0.0000000 0.00000000 1.0000000 2.0000000
## 2 0.000000 0.0000000 0.0000000 0.00000000 1.0000000 4.0000000
## 3 0.000000 0.0000000 0.0000000 0.00000000 0.0000000 0.0000000
## 4 0.000000 1.0000000 4.3333333 7.50000000 11.2283398 21.2000000
## 5 0.000000 0.5000000 3.0000000 5.32908965 8.0000000 14.8488095
## 6 0.000000 0.4535714 4.0000000 7.43582952 10.2000000 20.0000000
## 7 0.000000 0.0000000 2.3836996 4.98824861 6.9419935 14.0000000
## 8 1.000000 4.5000000 27.1383929 47.87450980 70.8588957 123.2166667
## 9 0.000000 1.2500000 9.5000000 16.50000000 25.9733493 46.3243590
## 10 0.000000 0.0000000 3.9321429 8.50000000 11.0000000 24.4420139
## 11 0.000000 0.0000000 2.5000000 5.60715532 7.3051839 16.0000000
## 12 2.000000 9.6487482 35.0000000 51.83333333 70.0000000 114.7750000
## 13 0.000000 4.0000000 13.0000000 19.13335001 24.5346154 40.7049107
## 14 0.000000 0.0000000 0.0000000 0.20850247 0.3333333 1.0000000
## 15 0.000000 0.0000000 2.3721591 5.48991864 7.9273417 16.0000000
## 16 0.000000 0.0000000 2.0000000 4.42335608 6.2556818 12.7747863
## 17 0.000000 0.0000000 0.4545455 1.14285714 1.6666667 3.9708333
## 18 0.000000 0.0000000 0.0000000 0.07475547 0.1895400 0.6666667
## 19 3.000000 12.0000000 45.3333333 66.33333333 88.6000000 143.0000000
## 20 1.000000 6.6320733 20.4000000 29.46410256 38.5856481 61.0000000
## 21 0.144350 0.2900000 0.4075000 0.46333333 0.5100000 0.6495625
## 22 0.000000 0.0000000 0.0000000 0.50000000 0.6923077 1.6666667
## 23 0.000000 0.0000000 1.0000000 2.55293028 3.7375000 7.7092857
## 24 0.000000 0.0000000 0.5000000 1.02906977 1.6000000 3.4454545
## 25 0.000000 0.0000000 0.1850000 0.31438684 0.4325000 0.7500000
## 26 6.000000 22.5000000 65.0681818 91.50000000 115.5714286 174.0000000
## 27 3.000000 12.8200000 35.8888889 49.88888889 62.1718750 98.0000000
## 28 0.000000 0.0000000 0.0000000 1.00000000 3.0000000 6.0000000
## 29 0.000000 0.0000000 0.0000000 1.00000000 3.0000000 6.0000000
## 30 0.000000 1.0000000 4.0000000 7.28571429 10.7500000 18.8000000
## 31 0.000000 0.4000000 3.0000000 5.00000000 7.0000000 13.0000000
## 32 0.000000 0.3333333 3.5000000 6.82582792 9.0000000 18.0000000
## 33 0.000000 0.0000000 2.0000000 4.41189203 6.0000000 12.2000000
## 34 1.000000 4.0000000 26.8854167 47.00000000 69.0000000 118.5955882
## 35 0.000000 1.1539233 8.6666667 15.55277778 24.0000000 42.3333333
## 36 0.000000 0.0000000 2.2500000 6.00000000 8.1532386 19.0000000
## 37 0.000000 0.0000000 1.5000000 3.92307692 5.2857143 12.3041667
## 38 1.548333 7.6666667 31.0000000 47.83736578 66.2076287 108.4125000
## 39 0.000000 2.6666667 10.0000000 15.39230769 21.3242615 36.0000000
## 40 0.000000 0.0000000 0.0000000 0.10000000 0.1984073 0.5454545
## 41 0.000000 0.0000000 2.9000000 5.40950333 7.5478969 14.5000000
## 42 0.000000 0.0000000 2.0817308 4.20000000 6.0000000 11.4805556
## 43 0.000000 0.0000000 0.2500000 0.83333333 1.2194501 3.0000000
## 44 0.000000 0.0000000 0.0000000 0.07692308 0.2000000 0.6666667
## 45 3.000000 10.0000000 40.5535714 61.92547807 83.3554205 133.7500000
## 46 1.000000 4.4526837 17.0416667 25.86333272 34.0000000 55.0000000
## 47 0.110000 0.2600000 0.3655556 0.41896488 0.4614881 0.6000000
## 48 0.000000 0.0000000 0.0000000 0.33333333 0.5655436 1.5000000
## 49 0.000000 0.0000000 1.3333333 2.64441896 3.6000000 7.0000000
## 50 0.000000 0.0000000 0.4000000 0.98799699 1.3019231 3.0000000
## 51 0.000000 0.0000000 0.1250000 0.25214286 0.3333333 0.6666667
## 52 4.000000 20.0000000 59.6666667 84.35791479 106.8392857 162.0000000
## 53 2.000000 11.0000000 32.0000000 44.00000000 55.0000000 86.6666667
## 54 0.000000 0.0000000 2.0000000 6.00000000 16.0000000 37.0000000
## 55 60.000000 185.3500000 482.2494888 610.77102289 731.6676718 900.0000000
## 56 0.000000 0.0000000 0.0000000 0.00000000 0.0000000 2.0000000
## 57 0.000000 0.0000000 0.0000000 0.00000000 0.0000000 0.0000000
## 58 0.000000 0.0000000 0.0000000 0.00000000 0.0000000 1.0000000
## 59 0.000000 0.0000000 0.0000000 0.00000000 1.0000000 4.0000000
## 60 0.000000 0.0000000 0.0000000 0.00000000 1.0000000 5.0000000
## 61 0.000000 0.0000000 0.0000000 0.00000000 1.0000000 3.0000000
## 62 0.000000 0.0000000 0.0000000 0.00000000 0.0000000 1.0000000
## 63 0.000000 0.0000000 0.0000000 2.00000000 5.0000000 10.0000000
## 64 160.020000 165.1000000 172.7200000 180.34000000 185.4200000 193.0400000
## 65 160.020000 165.1000000 177.8000000 185.42000000 190.5000000 200.6600000
## 66 115.000000 125.0000000 145.0000000 170.00000000 185.0000000 250.0000000
## 67 21.000000 23.0000000 26.0000000 29.00000000 32.0000000 36.0000000
## 68 0.000000 0.0000000 0.0000000 0.00000000 0.0000000 1.0000000
## 69 1.000000 3.0000000 3.0000000 3.00000000 3.0000000 5.0000000
## p_99 skewness kurtosis iqr range_98
## 1 3.000000 1.90909409 7.725562 1.00000000 [0, 3]
## 2 6.000000 2.75083868 15.042578 1.00000000 [0, 6]
## 3 0.000000 NaN NaN 0.00000000 [0, 0]
## 4 30.381964 1.46692937 6.406562 6.89500651 [0, 30.3819642857142]
## 5 21.532500 1.56231216 7.205841 5.00000000 [0, 21.5324999999998]
## 6 32.000000 2.60453343 17.346837 6.20000000 [0, 32]
## 7 23.578393 2.80369208 19.504985 4.55829383 [0, 23.5783928571428]
## 8 179.052000 1.29499061 5.768473 43.72050288 [1, 179.052]
## 9 66.000000 1.48994153 7.153997 16.47334928 [0, 66]
## 10 39.000000 2.38826393 13.762692 7.06785714 [0, 39]
## 11 25.677500 2.27388847 12.510593 4.80518395 [0, 25.6774999999999]
## 12 163.355000 1.20379034 6.017110 35.00000000 [2, 163.355]
## 13 56.304167 1.44202803 8.605517 11.53461538 [0, 56.3041666666663]
## 14 1.166667 2.61395140 18.919422 0.33333333 [0, 1.16666666666667]
## 15 24.500000 2.18256010 12.472057 5.55518264 [0, 24.5]
## 16 19.500000 2.12066797 11.980340 4.25568182 [0, 19.5]
## 17 6.000000 2.25082847 12.238814 1.21212121 [0, 6]
## 18 1.000000 3.75602896 24.964131 0.18954004 [0, 1]
## 19 198.782500 1.03825175 5.165810 43.26666667 [3, 198.7825]
## 20 81.000000 1.06271650 5.974455 18.18564815 [1, 81]
## 21 0.785000 0.06945364 6.333458 0.10250000 [0.14435, 0.785]
## 22 3.000000 2.91937263 21.011884 0.69230769 [0, 3]
## 23 11.000000 1.83271721 9.691554 2.73750000 [0, 11]
## 24 5.000000 1.82545582 8.869834 1.10000000 [0, 5]
## 25 1.000000 0.71991138 3.879193 0.24750000 [0, 1]
## 26 228.065000 0.67811393 4.202923 50.50324675 [6, 228.065]
## 27 129.000000 0.95820907 5.577760 26.28298611 [3, 129]
## 28 8.000000 1.51294999 6.957231 3.00000000 [0, 8]
## 29 9.000000 1.76487162 6.860842 3.00000000 [0, 9]
## 30 29.000000 1.71450198 9.646045 6.75000000 [0, 29]
## 31 20.000000 1.98004107 11.412089 4.00000000 [0, 20]
## 32 30.782500 3.08289686 24.344712 5.50000000 [0, 30.7824999999998]
## 33 21.000000 3.50684529 31.496064 4.00000000 [0, 21]
## 34 171.130000 1.56859976 8.510350 42.11458333 [1, 171.129999999999]
## 35 65.513000 1.84492055 10.114706 15.33333333 [0, 65.5129999999999]
## 36 37.000000 3.61411317 27.246316 5.90323858 [0, 37]
## 37 23.355000 3.88369136 32.777954 3.78571429 [0, 23.3549999999999]
## 38 155.000000 1.59071873 9.385273 35.20762870 [1.54833333333333, 155]
## 39 56.000000 2.01567635 12.423629 11.32426146 [0, 56]
## 40 1.000000 3.57023523 24.430148 0.19840734 [0, 1]
## 41 24.500000 2.33469322 14.002519 4.64789686 [0, 24.5]
## 42 19.000000 2.34673915 14.541038 3.91826923 [0, 19]
## 43 6.000000 3.71825659 28.590787 0.96945009 [0, 6]
## 44 1.000000 3.83830225 25.135091 0.20000000 [0, 1]
## 45 190.591000 1.37400800 7.949578 42.80184906 [3, 190.590999999999]
## 46 82.032500 1.56105145 9.406321 16.95833333 [1, 82.0324999999998]
## 47 0.750000 0.41189633 6.955960 0.09593254 [0.11, 0.75]
## 48 3.000000 3.23881347 21.547025 0.56554355 [0, 3]
## 49 11.000000 1.79711228 9.204579 2.26666667 [0, 11]
## 50 5.000000 2.47742544 14.582955 0.90192308 [0, 5]
## 51 1.000000 1.25868261 5.230795 0.20833333 [0, 1]
## 52 222.000000 0.99517369 6.223377 47.17261905 [4, 222]
## 53 124.796667 1.32591455 7.772297 23.00000000 [2, 124.796666666666]
## 54 56.000000 1.78558684 6.642003 14.00000000 [0, 56]
## 55 997.390000 -0.34415390 3.273604 249.41818304 [60, 997.389999999998]
## 56 7.000000 5.51028376 41.191563 0.00000000 [0, 7]
## 57 1.000000 6.73697330 48.602648 0.00000000 [0, 1]
## 58 3.000000 2.73728148 12.021150 0.00000000 [0, 3]
## 59 6.000000 2.02545055 7.691217 1.00000000 [0, 6]
## 60 8.000000 2.26997655 9.059193 1.00000000 [0, 8]
## 61 6.000000 2.82951514 14.012030 1.00000000 [0, 6]
## 62 1.000000 4.50375572 24.335315 0.00000000 [0, 1]
## 63 15.000000 1.57415894 5.473250 5.00000000 [0, 15]
## 64 198.120000 -0.10029555 3.045104 12.70000000 [160.02, 198.12]
## 65 205.740000 -0.18360494 3.032752 12.70000000 [160.02, 205.74]
## 66 265.000000 1.43319873 11.547100 40.00000000 [115, 265]
## 67 40.000000 0.37197387 3.218737 6.00000000 [21, 40]
## 68 1.000000 3.52489502 13.424885 0.00000000 [0, 1]
## 69 5.000000 1.49899779 8.602607 0.00000000 [1, 5]
## range_80
## 1 [0, 1]
## 2 [0, 3]
## 3 [0, 0]
## 4 [1.66666666666667, 16.75]
## 5 [1, 11.75]
## 6 [1.5, 15.5]
## 7 [1, 10.7081818181818]
## 8 [10, 99.80375]
## 9 [3.5, 37.8]
## 10 [1, 18.3625]
## 11 [0.5, 12.2]
## 12 [18, 94.3333333333333]
## 13 [7, 33.75]
## 14 [0, 0.619047619047619]
## 15 [1, 12.6666666666667]
## 16 [0.5, 10]
## 17 [0, 3]
## 18 [0, 0.428571428571429]
## 19 [23.25, 120.275]
## 20 [11, 52]
## 21 [0.34, 0.59]
## 22 [0, 1.21428571428571]
## 23 [0.25, 6.07102272727276]
## 24 [0, 2.66666666666667]
## 25 [0, 0.6]
## 26 [36, 151.43]
## 27 [20, 82.3333333333333]
## 28 [0, 4]
## 29 [0, 4]
## 30 [1.66666666666667, 15.5]
## 31 [1, 10.2247863247863]
## 32 [1.5, 14]
## 33 [0.857142857142857, 9.42857142857143]
## 34 [10, 98]
## 35 [3.08333333333333, 34]
## 36 [0.333333333333333, 14]
## 37 [0, 9]
## 38 [15, 90]
## 39 [5, 29.4444444444444]
## 40 [0, 0.4]
## 41 [1, 11.3638399652098]
## 42 [0.666666666666667, 9]
## 43 [0, 2.16666666666667]
## 44 [0, 0.4]
## 45 [20.07, 113.773292366434]
## 46 [9, 46.5]
## 47 [0.307752136752137, 0.548922222222222]
## 48 [0, 1]
## 49 [0.5, 5.5]
## 50 [0, 2]
## 51 [0, 0.5]
## 52 [33, 139.333333333333]
## 53 [17.3333333333333, 72]
## 54 [0, 28]
## 55 [289, 900]
## 56 [0, 1]
## 57 [0, 0]
## 58 [0, 1]
## 59 [0, 3]
## 60 [0, 3]
## 61 [0, 2]
## 62 [0, 0]
## 63 [0, 8]
## 64 [167.64, 190.5]
## 65 [170.18, 195.58]
## 66 [135, 225]
## 67 [24, 35]
## 68 [0, 0]
## 69 [3, 3]
Usamos la función plot_num, del paquete funModeling para graficar histogramas de todas las variables numéricas al mismo tiempo.
plot_num(Datosconjuntos_sinNA)
## Warning: attributes are not identical across measure variables; they will
## be dropped
Usamos la función year del paquete lubridate para separar los años de las fechas específicas
barplot(table(year(Datos_Bantamweight$date)),xlab= "Año", ylab="Cantidad de peleas", main= "Bantamweight")
barplot(table(year(Datos_Featherweight$date)),xlab= "Año", ylab="Cantidad de peleas", main= "Featherweight")
barplot(table(year(Datos_Flyweight$date)),xlab= "Año", ylab="Cantidad de peleas", main= "Flyweight")
barplot(table(year(Datos_Flyweight$date)),xlab= "Año", ylab="Cantidad de peleas", main= "Flyweight")
barplot(table(year(Datos_Heavyweight$date)),xlab= "Año", ylab="Cantidad de peleas", main= "Heavyweigh")
barplot(table(year(Datos_LightHeavyweight$date)),xlab= "Año", ylab="Cantidad de peleas", main= "Light Heavyweigh")
barplot(table(year(Datos_Lightweight$date)),xlab= "Año", ylab="Cantidad de peleas", main= "Lightweight")
barplot(table(year(Datos_Middleweight$date)),xlab= "Año", ylab="Cantidad de peleas", main= "Middleweight")
barplot(table(year(Datos_OpenWeight$date)),xlab= "Año", ylab="Cantidad de peleas", main= "Open Weight")
barplot(table(year(Datos_Welterweight$date)),xlab= "Año", ylab="Cantidad de peleas", main= "Welterweight")
barplot(table(year(Datos_WomenBantamweight$date)),xlab= "Año", ylab="Cantidad de peleas", main= "Women Bantamweight")
barplot(table(year(Datos_WomenFeatherweight$date)),xlab= "Año", ylab="Cantidad de peleas", main= "Women Featherweight")
barplot(table(year(Datos_WomenFlyweight$date)),xlab= "Año", ylab="Cantidad de peleas", main= "Women Flyweight")
barplot(table(year(Datos_WomenStrawweight$date)),xlab= "Año", ylab="Cantidad de peleas", main= "Women Strawweight")
Lista_df_categorias<-list(Datos_Bantamweight,Datos_CatchWeight,Datos_Featherweight,Datos_Flyweight,Datos_Heavyweight,Datos_LightHeavyweight,Datos_Lightweight,Datos_OpenWeight,Datos_Middleweight,Datos_Welterweight,Datos_WomenBantamweight,Datos_WomenFeatherweight,Datos_WomenFlyweight,Datos_WomenStrawweight)
#Elegimos lightweight
Datos_Lightweight
write.csv(Datos_Lightweight,file = "Lightweight.csv")
Elegimos la categoría Lightweight para continuar el resto del trabajo (ya que es la que tiene mayor cantidad de observaciones)
Elegimos las variables: losses, height, age, win by KO.TKO, total title bouts
#Separamos en dos data sets los ganadores de los perdedores
LightweightWinner<-subset(Datos_Lightweight,subset = Winner == 1 )
LightweightLoser<-subset(Datos_Lightweight,subset = Winner == 0)
par(mfrow = c(1,2))
barplot(table(LightweightWinner$` losses`),ylab = "Fighters",xlab = "losses", col="green",main = "Winners")
barplot(table(LightweightLoser$` losses`),ylab = "Fighters",xlab = "losses",col="orange",main = "Losers")
par(mfrow = c(1,2))
barplot(table(LightweightWinner$` Height_cms`),ylab = "Fighters",xlab = "height", col="green",main = "Winners")
barplot(table(LightweightLoser$` Height_cms`),ylab = "Fighters",xlab = "height",col="orange",main = "Losers")
par(mfrow = c(1,2))
barplot(table(LightweightWinner$` age`),ylab = "Fighters",xlab = "Age", col="green",main = "Winners")
barplot(table(LightweightLoser$` age`),ylab = "Fighters",xlab = "Age",col="orange",main = "Losers")
par(mfrow = c(1,2))
barplot(table(LightweightWinner$` total_title_bouts`),ylab = "Fighters",xlab = "Total title bouts", col="green", main = "Winners")
barplot(table(LightweightLoser$` total_title_bouts`),ylab = "Fighters",xlab = "Total title bouts",col="orange",main = "Losers")
par(mfrow = c(1,2))
barplot(table(LightweightWinner$` win_by_KO.TKO`), ylab = "Fighters", xlab = "Win by KO/TKO", col="green", main = "Winners")
barplot(table(LightweightLoser$` win_by_KO.TKO`), ylab = "Fighters", xlab = "Win by KO/TKO", col="orange", main = "Losers")
#Creamos un data.frame que incluye solo las variables del punto anterior.
datos_variables<-cbind(Datos_Lightweight$` losses`,Datos_Lightweight$` Height_cms`,Datos_Lightweight$` win_by_KO.TKO`, Datos_Lightweight$` age`,Datos_Lightweight$` total_title_bouts`)
datos_variables<-as.data.frame(datos_variables)
datos_variables = plyr::rename(datos_variables, c(V1 = "losses",V2 = "Height_cms",V3 = "win_by_KO.TKO",V4 = "age",V5 = "total_title_bouts")) #Renombro la columna (variable) del color de la esquina.
## The following `from` values were not present in `x`: V5
names(datos_variables)
## [1] "losses" "Height_cms" "win_by_KO.TKO" "age"
Usando la función convert_df_to_categoric del paquete funModeling discretizamos las variables, la función por default agrupa los intervalos teniendo en cuenta la frecuencia con que se repiten los datos.
datos_variables <- convert_df_to_categoric(data=datos_variables, n_bins=4)
## Variables processed: losses, Height_cms, win_by_KO.TKO, age
"Winner"<-Datos_Lightweight$Winner
datos_variables_win<-cbind(Winner,datos_variables)
Usando la función dummy_cols del paquete fastDummies creamos las variables dummy para cada variable.
Datosconjuntos_sinNA__puntos_11_12_13 <- dummy_cols(datos_variables_win, select_columns = c("losses","Height_cms","win_by_KO.TKO","age","total_title_bouts"))
as.datatable(formattable(Datosconjuntos_sinNA__puntos_11_12_13))
Con la regresión queremos explicar la variable ganar. Usamos un modelo generalizado lineal, para eso como primer paso discretizamos la variables que elegimos. Para esto utilizamos el siguiente criterio: “losses “ porque intuimos que la cantidad de veces que perdió puede influir en el próximo resultado; “Height” porque la altura puede influir en los ataques que puede llegar a hacer, “win by knockout” porque suponemos que la cantidad de veces que ganó por knockout puede intimidar al otro competidor y ganar la pelea con más facilidad ,”Age” porque sí es más joven tiene mayor vitalidad y agilidad en los ataques y por último la variable “total tittle bouts” a mayor cantidad de títulos ganados, mayor probabilidad de éxito .
y <- Datosconjuntos_sinNA__puntos_11_12_13$Winner
L1 <- Datosconjuntos_sinNA__puntos_11_12_13$`losses_[ 4, Inf]`
L2 <- Datosconjuntos_sinNA__puntos_11_12_13$`losses_[ 2, 4)`
L3 <- Datosconjuntos_sinNA__puntos_11_12_13$`losses_[ 1, 2)`
L4 <- Datosconjuntos_sinNA__puntos_11_12_13$`losses_[-Inf, 1)`
H1 <- Datosconjuntos_sinNA__puntos_11_12_13$`Height_cms_[180.3, Inf]`
H2 <- Datosconjuntos_sinNA__puntos_11_12_13$`Height_cms_[176.5,180.3)`
H3 <- Datosconjuntos_sinNA__puntos_11_12_13$`Height_cms_[175.3,176.5)`
H4 <- Datosconjuntos_sinNA__puntos_11_12_13$`Height_cms_[ -Inf,175.3)`
WKO1 <- Datosconjuntos_sinNA__puntos_11_12_13$`win_by_KO.TKO_[ 1, 2)`
WKO2 <- Datosconjuntos_sinNA__puntos_11_12_13$`win_by_KO.TKO_[-Inf, 1)`
WKO3 <- Datosconjuntos_sinNA__puntos_11_12_13$`win_by_KO.TKO_[ 2, Inf]`
A1 <- Datosconjuntos_sinNA__puntos_11_12_13$`age_[ 27, 29)`
A2 <- Datosconjuntos_sinNA__puntos_11_12_13$`age_[ 29, 32)`
A3 <- Datosconjuntos_sinNA__puntos_11_12_13$`age_[ 32, Inf]`
A4 <- Datosconjuntos_sinNA__puntos_11_12_13$`age_[-Inf, 27)`
TTB1 <- Datosconjuntos_sinNA__puntos_11_12_13$`total_title_bouts_[ 1, Inf]`
TTB2 <- Datosconjuntos_sinNA__puntos_11_12_13$`total_title_bouts_[-Inf, 1)`
RL<-glm(y~L1+L2+L3+L4+H1+H2+H3+H4+WKO1+WKO2+WKO3+A1+A2+A3+A4+TTB1+TTB2,data=Datosconjuntos_sinNA__puntos_11_12_13, family = "binomial")
RL
summary(RL)
Realizamos una primera regresión en la cual utilizamos todas las variables dummies de las antes mencionadas, y seleccionamos a aquellas que eran significativas: intervalo de edad 29 a 32 y de 32 a 45, ganar por knockout :intervalo [0,1) , y el intervalo de peleas perdidas entre [1,2) y [2,4).
Luego dividimos el dataset en train y test aleatoriamente, con el primero (grupo de entrenamiento) y las variables que consideramos significativas creamos una nueva regresión que probamos en el grupo de control (test). Estos fueron los resultados:
set.seed(1234)
samples=sample(nrow(Datosconjuntos_sinNA__puntos_11_12_13), 0.5*nrow(Datosconjuntos_sinNA__puntos_11_12_13))
data_train = Datosconjuntos_sinNA__puntos_11_12_13[samples, ]
data_test = Datosconjuntos_sinNA__puntos_11_12_13[-samples,]
y<-data_train$`Winner`
L1<-data_train$`losses_[ 4, Inf]`
L2<-data_train$`losses_[ 2, 4)`
L3<-data_train$`losses_[ 1, 2)`
L4<-data_train$`losses_[-Inf, 1)`
H1<-data_train$`Height_cms_[180.3, Inf]`
H2<-data_train$`Height_cms_[176.5,180.3)`
H3<-data_train$`Height_cms_[175.3,176.5)`
H4<-data_train$`Height_cms_[ -Inf,175.3)`
WKO1<-data_train$`win_by_KO.TKO_[ 1, 2)`
WKO2<-data_train$`win_by_KO.TKO_[-Inf, 1)`
WKO3<-data_train$`win_by_KO.TKO_[ 2, Inf]`
A1<-data_train$`age_[ 27, 29)`
A2<-data_train$`age_[ 29, 32)`
A3<-data_train$`age_[ 32, Inf]`
A4<-data_train$`age_[-Inf, 27)`
TTB1<-data_train$`total_title_bouts_[ 1, Inf]`
TTB2<-data_train$`total_title_bouts_[-Inf, 1)`
modelo <- glm(y~A3+A2+WKO2+L3+L2, data= data_train, family="binomial") #usamos las variables significativa de data set dymmu
summary(modelo)
predicciones_train <-predict(object = modelo, type="response")
predicciones_train
De los resultados podemos observar, por ejemplo que: * El luchador 225, tiene un probabilidad de 0.4063 de ganar el próximo encuentro, y ese luchador cuenta con la siguiente información no perdió ninguna pelea, mide más de 180.3, nunca ganó por knockout, tiene entre 29 y 32 años, y nunca peleó por el título. * El luchador 1933, tiene un probabilidad de 0.67 de ganar el próximo encuentro y cuenta con la siguiente información: perdió una pelea, tiene un altura entre 175.3 y 176.5, ganó 2 o más pelea por knockout, tiene menos 27 años, y peleó al menos 1 vez por el título.