.

1 - Introduccion

Volver al Inicio



El siguiente trabajo consiste en utilizar los datos de todos los Pokemon para obtener insights respecto a sus cualidades, semejanzas y diferencias entre los mismos.

En primer lugar se va a realizar un análisis multivariado de los datos para luego pasar a un probar distintos modelos de reducción de la dimensionalidad para poder visualizar si efectivamente hay diferencia entre los Pokemon Legendarios y los No Legendarios.



2 - Analisis Multivariado

Volver al Inicio



Librerías

library(tidyverse)
library(readxl)
library(highcharter)
library(flexdashboard)
library(Rtsne)

Datos

df = read.csv('pokemon.csv')


txt = read_excel('values.xlsx', col_names =  T)


dfgroup = df %>% group_by(type1, type2) %>% count()

Total de Pokemons por Type

group_type = df %>% group_by(type1) %>% tally()


group_type %>% 
  arrange(desc(n)) %>% 
  hchart('column', hcaes(x = type1, y = n),
         showInLegend = F,
         maxSize = "15%",
         dataLabels = list(enabled = TRUE, 
                           format = '{point.y}')) %>% 
  hc_colorAxis(stops = color_stops(18, rev(RColorBrewer::brewer.pal(8, 'Dark2'))))  %>% 
  hc_legend (enabled = FALSE) %>% 
  hc_tooltip(shared = FALSE, 
             borderColor = "black", 
             valueDecimals = 0) %>%  
  hc_add_theme(hc_theme_google()) %>% 
  hc_tooltip(pointFormat = '<b>Total: </b> {point.y} <br> ' ) %>% 
  hc_title(text = 'Type Pokemon', 
           style = list(fontSize = '20px', fontWeight = 'bold')) %>% 
  hc_subtitle(text = '<b>Bar Graph</b>',
              style = list(fontSize = '16px', color = "black")) %>% 
  hc_credits(enabled = TRUE,
             text = "Fuente: Kaggle",
             align = "right",
             verticalAlign = "bottom", 
             style = list(color = "black", fontSize = '15px'), 
             href = "https://www.kaggle.com/datasets/rounakbanik/pokemon") %>% 
  hc_yAxis(labels = list(style = list(color = "black")),
           tickColor = "black") %>% 
  hc_xAxis(labels = list(style = list(color = "black")),
           tickColor = "black") 

Top 20 Pokemonos sumando todas las habilidades:

df_bar = df[,c('hp','speed','attack','defense','type1','name', 'is_legendary', 'pokedex_number')]

df_bar$total = rowSums(df_bar[,1:4])

img_input = paste0("<img src ='", colnames(txt) , "shiny/{point.pokedex_number}.png' width='100' height='120'> <ul><li><b>Pokemon: {point.name}</li></b> <li><b>Hp: {point.hp} </b></li> <li><b>Attack: {point.attack} </b></li><li><b>Defense: {point.defense} </b></li><li><b>Speed: {point.speed} </b></li><li><b>Legendary: {point.is_legendary} </b></li><li><b>Pokedex: {point.pokedex_number} </b></li></ul> ")



get_bar_plot = function(data, n, filter, poketype) {
  
  if (filter == TRUE){
    data = data %>% filter(type1 == poketype)
  }
  
  data %>% 
  arrange(desc(total)) %>% 
  top_n(n) %>% 
  hchart('column', hcaes(x = name, y = total),
         showInLegend = F,
         maxSize = "15%",
         dataLabels = list(enabled = TRUE,
                           format = '{point.y}')) %>% 
  hc_colorAxis(stops = color_stops(18, rev(RColorBrewer::brewer.pal(8, 'Set1'))))  %>%
  hc_legend (enabled = FALSE) %>% 
  hc_tooltip(pointFormat = img_input,
                 useHTML = T) %>%   
  hc_add_theme(hc_theme_google()) %>% 
  hc_title(text = 'Type Pokemon',
           style = list(fontSize = '20px', 
                        fontWeight = 'bold')) %>% 
  hc_subtitle(text = '<b>Bar Graph</b>',
              style = list(fontSize = '16px', 
                           color = "black")) %>% 
  hc_credits(enabled = TRUE, 
             text = "Fuente: Kaggle",
             align = "right",
             verticalAlign = "bottom",
      style = list(color = "black", 
                   fontSize = '15px'),
      href = "https://www.kaggle.com/datasets/rounakbanik/pokemon") %>% 
  hc_yAxis(labels = list(style = list(color = "black")),
           tickColor = "black") %>% 
  hc_xAxis(labels = list(style = list(color = "black")),
           tickColor = "black") 
  
}




get_bar_plot(df_bar, 20, FALSE)

Ultimos 20 Pokemons sumando todas las habilidades:

get_bar_plot(df_bar, -20, FALSE)
## Selecting by total

Ahora veamos los tipo Electric:

get_bar_plot(df_bar, 10, TRUE, 'electric')
## Selecting by total

En el siguiente gráfico podemos visualizar un Scatter Plor con la relación entre el nivel de ataque y defensa de todos los Pokemons agrupados por tipo.

Si dejamos el cursor sobre cada uno de los puntos del Scatter podemos ver todos los niveles de velocidad, ataque y defensa de cada uno de ellos.

df %>%
      hchart("scatter", hcaes(x = attack,
                              y = defense,
                              name = name,
                              group = type1),
             maxSize = "5%",
             regression = FALSE,
             regressionSettings = list(
                                  type = "polynomial",
                                  color = 'steelblue',
                                  dashStyle = "ShortDash",
                                  tooltip = FALSE,
                                  decimalPlaces = 6,
                                  useAllSerues = TRUE,
                                  order = 1,
                                  lineWidth = 5,
                                  name = "%eq | r2: %r",
                                  hideInLegend = TRUE)) %>%
      hc_tooltip(pointFormat = img_input,
                 useHTML = T) %>%  
      hc_add_dependency("plugins/highcharts-regression.js") %>%
      hc_add_theme(hc_theme_google()) %>% 
      hc_credits(enabled = TRUE, text = "Fuente: Kaggle",align = "right",verticalAlign = "bottom",
        style = list(color = "black", fontSize = '15px'),
        href = "https://www.kaggle.com/datasets/rounakbanik/pokemon") %>%
      hc_title(text = paste0('Attack vs Defense'),
         style = list(fontSize = '20px', fontWeight = 'bold'))
df %>%
      hchart("scatter", hcaes(x =  speed,
                              y = weight_kg,
                              name = name,
                              group = type1),
             maxSize = "5%",
             regression = FALSE,
             regressionSettings = list(
                                  type = "polynomial",
                                  color = 'steelblue',
                                  dashStyle = "ShortDash",
                                  tooltip = FALSE,
                                  decimalPlaces = 6,
                                  useAllSerues = TRUE,
                                  order = 1,
                                  lineWidth = 5,
                                  name = "%eq | r2: %r",
                                  hideInLegend = TRUE)) %>%
      hc_tooltip(pointFormat = img_input,
                 useHTML = T) %>%  
      hc_add_dependency("plugins/highcharts-regression.js") %>%
      hc_add_theme(hc_theme_google()) %>% 
      hc_credits(enabled = TRUE, text = "Fuente: Kaggle",align = "right",verticalAlign = "bottom",
        style = list(color = "black", fontSize = '15px'),
        href = "https://www.kaggle.com/datasets/rounakbanik/pokemon") %>%
      hc_title(text = paste0('Weight vs Speed'),
         style = list(fontSize = '20px', fontWeight = 'bold'))
dat <- data_to_boxplot(df, 
                         attack, 
                         type1, 
                         name = "height in meters", 
                         add_outliers = FALSE,
                         color = "black")


highchart() %>%
  hc_xAxis(type = "category" ) %>%
  hc_add_series_list(dat) %>% 
  hc_add_series(
    data = df,
    type = "scatter",
    marker = list(radius = 2.5), alpha = 0.9,
    hcaes(x = type1, y = attack, group = type1)
  ) %>% 
  hc_plotOptions(boxplot = list(
    fillColor = '#F0F0E0',
    lineWidth = 2,
    medianColor = '#0C5DA5',
    medianWidth = 3,
    stemColor = '#A63400',
    stemDashStyle = 'dot',
    stemWidth = 1,
    whiskerColor = '#3D9200',
    whiskerLength = '20%',
    whiskerWidth = 3,
    color = 'black'
  )) %>% 
  hc_legend (enabled = FALSE) %>% 
  hc_title(text = 'Box Plot Pokemon',
           style = list(fontSize = '20px', 
                        fontWeight = 'bold')) %>% 
  hc_subtitle(text = paste0('<b>','attack','</b>'),
              style = list(fontSize = '16px', 
                           color = "black")) %>% 
  hc_credits(enabled = TRUE, 
             text = "Fuente: Kaggle",
             align = "right",
             verticalAlign = "bottom",
      style = list(color = "black",
                   fontSize = '15px'),
      href = "https://www.kaggle.com/datasets/rounakbanik/pokemon") %>% 
  hc_yAxis(labels = list(style = list(color = "black")),
           tickColor = "black",
           title = list(text = paste0('<b>','attack','</b>'))) %>% 
  hc_xAxis(labels = list(style = list(color = "black")),
           tickColor = "black") 


3 - Algoritmos

Volver al Inicio



PCA:

PCA es uno de los métodos más importantes de reducción de dimensionalidad para visualizar datos. PCA es una técnica que convierte n-dimensiones de datos en k-dimensiones mientras mantiene la mayor cantidad de información del conjunto de datos original

TSNE:

El algoritmo t-SNE consiste en crear una distribución de probabilidad que represente las similitudes entre vecinos en un espacio de gran dimensión y en un espacio de menor dimensión. Por similitud, intentaremos convertir las distancias en probabilidades

Diferencias entre PCA y T-SNE:

PCA es una técnica de reducción de dimensionalidad lineal mientras que T-SNE no lo es. PCA intenta preservar la estructura global de los datos mientras que T-SNE preserva la estructura local. PCA se ve muy afectado por los valores atípicos, mientras que T-SNE puede manejar los valores atípicos.

Podemos observar que los algoritmos de reducción de la dimensionalidad nos dan información sobre la diferencia entre los Pokemons legendarios y no legendarios. Para este caso pasamos de 6 dimensiones a solo 2.

set.seed(15)

df_model = df[,c('sp_attack','sp_defense','hp','speed','attack','defense','type1','name', 'is_legendary', 'pokedex_number')]

tsne = Rtsne(df_model[,c('sp_attack','sp_defense','hp','speed','attack','defense')],
             perplexity = 30,
             eta = 1000,
             max_iter = 5000,
             check_duplicates = FALSE)


Y = as.data.frame(tsne$Y)

df_model$X = Y$V1
df_model$Y = Y$V2


df_model %>% 
      hchart("scatter", hcaes(x = df_model$X,
                              y = df_model$Y,
                              name = name,
                              group = is_legendary),
             maxSize = "5%",
             regression = FALSE,
             regressionSettings = list(
                                  type = "polynomial",
                                  color = 'steelblue',
                                  dashStyle = "ShortDash",
                                  order = 1,
                                  lineWidth = 5,
                                  name = "%eq | r2: %r",
                                  hideInLegend = TRUE)) %>%
      hc_tooltip(pointFormat = img_input,
                 useHTML = T) %>% 
      hc_add_theme(hc_theme_google()) %>% 
      hc_add_dependency("plugins/highcharts-regression.js") %>%
      hc_credits(enabled = TRUE, text = "Fuente: Kaggle",align = "right",verticalAlign = "bottom",
        style = list(color = "black", fontSize = '15px'),
        href = "https://www.kaggle.com/datasets/rounakbanik/pokemon") %>%
      hc_title(text = paste0('Pokemon RTSNE Model - Legendary'),
         style = list(fontSize = '20px', fontWeight = 'bold'))
PCA <- princomp(df_model[,c('sp_attack','sp_defense','hp','speed','attack','defense')],
                cor = T)


df_scores = as.data.frame(PCA$scores)


df_model$X_pca = df_scores$Comp.1
df_model$Y_pca = df_scores$Comp.2


df_model %>% 
      hchart("scatter", hcaes(x = df_model$X_pca,
                              y = df_model$Y_pca,
                              name = name,
                              group = is_legendary),
             maxSize = "5%",
             regression = FALSE,
             regressionSettings = list(
                                  type = "polynomial",
                                  color = 'steelblue',
                                  dashStyle = "ShortDash",
                                  order = 1,
                                  lineWidth = 5,
                                  name = "%eq | r2: %r",
                                  hideInLegend = TRUE)) %>%
      hc_tooltip(pointFormat = img_input,
                 useHTML = T) %>% 
      hc_colors(c('#43c7d9', '#9c38c7')) %>%
      hc_add_theme(hc_theme_google()) %>% 
      hc_add_dependency("plugins/highcharts-regression.js") %>%
      hc_credits(enabled = TRUE, text = "Fuente: Kaggle",align = "right",verticalAlign = "bottom",
        style = list(color = "black", fontSize = '15px'),
        href = "https://www.kaggle.com/datasets/rounakbanik/pokemon") %>%
      hc_title(text = paste0('Pokemon PCA Model - Legendary'),
         style = list(fontSize = '20px', fontWeight = 'bold'))