Dataset Pokemon

Pokemon merupakan dataset yang berisikan karakteristik pokemon seperti: nama pokemon, jenis pokemon, dan karakteristik kekuatan pokemon.

Kolom-kolom pada dataset tersebut, antara lain:

number : nomor seri pokemon name : nama pokemon type : jenis pokemon total : total nilai karakteristik serangan, kecepatan, health point, dan pertahanan pokemon hp : health point attack : Kekuatan serangan defense : kekuatan pertahanan special_attack : kekuatan serangan khusus special_defense : kekuatan pertahanan khusus speed : tingkat kecepatan

Persiapan

library(readr)

library(skimr)
library(DataExplorer)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(purrr)

Import Dataset

#Fungsi memanggil dataset pokemon dalam bentuk CSV 
pokemon <- library(readr)
pokemon <- read_csv("D:/Development/proyek data/pokemon.csv")
## Parsed with column specification:
## cols(
##   number = col_character(),
##   name = col_character(),
##   type = col_character(),
##   total = col_double(),
##   hp = col_double(),
##   attack = col_double(),
##   defense = col_double(),
##   special_attack = col_double(),
##   special_defense = col_double(),
##   speed = col_double()
## )
View(pokemon)

Ringkasan Data

glimpse(pokemon)
## Rows: 1,168
## Columns: 10
## $ number          <chr> " 001", " 001", " 002", " 002", " 003", " 003", " 0...
## $ name            <chr> "Bulbasaur", "Bulbasaur", "Ivysaur", "Ivysaur", "Ve...
## $ type            <chr> "GRASS", "POISON", "GRASS", "POISON", "GRASS", "POI...
## $ total           <dbl> 318, 318, 405, 405, 525, 525, 625, 625, 309, 405, 5...
## $ hp              <dbl> 45, 45, 60, 60, 80, 80, 80, 80, 39, 58, 78, 78, 78,...
## $ attack          <dbl> 49, 49, 62, 62, 82, 82, 100, 100, 52, 64, 84, 84, 1...
## $ defense         <dbl> 49, 49, 63, 63, 83, 83, 123, 123, 43, 58, 78, 78, 1...
## $ special_attack  <dbl> 65, 65, 80, 80, 100, 100, 122, 122, 60, 80, 109, 10...
## $ special_defense <dbl> 65, 65, 80, 80, 100, 100, 120, 120, 50, 65, 85, 85,...
## $ speed           <dbl> 45, 45, 60, 60, 80, 80, 80, 80, 65, 80, 100, 100, 1...
summary(pokemon)
##     number              name               type               total      
##  Length:1168        Length:1168        Length:1168        Min.   :180.0  
##  Class :character   Class :character   Class :character   1st Qu.:334.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :453.0  
##                                                           Mean   :435.6  
##                                                           3rd Qu.:515.0  
##                                                           Max.   :780.0  
##        hp             attack          defense       special_attack  
##  Min.   :  1.00   Min.   :  5.00   Min.   :  5.00   Min.   : 10.00  
##  1st Qu.: 50.00   1st Qu.: 55.00   1st Qu.: 50.00   1st Qu.: 50.00  
##  Median : 66.00   Median : 75.00   Median : 70.00   Median : 65.00  
##  Mean   : 69.53   Mean   : 78.82   Mean   : 74.37   Mean   : 72.62  
##  3rd Qu.: 82.00   3rd Qu.:100.00   3rd Qu.: 90.00   3rd Qu.: 95.00  
##  Max.   :255.00   Max.   :190.00   Max.   :230.00   Max.   :194.00  
##  special_defense      speed       
##  Min.   : 20.00   Min.   :  5.00  
##  1st Qu.: 50.00   1st Qu.: 47.00  
##  Median : 70.00   Median : 65.50  
##  Mean   : 71.72   Mean   : 68.59  
##  3rd Qu.: 90.00   3rd Qu.: 90.00  
##  Max.   :230.00   Max.   :180.00
skim(pokemon)
Data summary
Name pokemon
Number of rows 1168
Number of columns 10
_______________________
Column type frequency:
character 3
numeric 7
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
number 0 1 4 6 0 772 0
name 0 1 3 26 0 773 0
type 0 1 3 8 0 18 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
total 0 1 435.63 116.53 180 334 453.0 515 780 ▃▆▇▂▁
hp 0 1 69.53 24.92 1 50 66.0 82 255 ▃▇▁▁▁
attack 0 1 78.82 31.71 5 55 75.0 100 190 ▂▇▆▂▁
defense 0 1 74.37 30.76 5 50 70.0 90 230 ▃▇▂▁▁
special_attack 0 1 72.62 31.77 10 50 65.0 95 194 ▅▇▅▂▁
special_defense 0 1 71.72 27.27 20 50 70.0 90 230 ▇▇▂▁▁
speed 0 1 68.59 28.32 5 47 65.5 90 180 ▃▇▆▁▁
plot_intro(pokemon)

plot_missing(pokemon)

Variasi

***Data Kontinu

plot_histogram(pokemon)

***Data Kategorikal

plot_bar(pokemon)
## 2 columns ignored with more than 50 categories.
## number: 772 categories
## name: 773 categories

Kovarian

plot_correlation(pokemon)
## 2 features with more than 20 categories ignored!
## number: 772 categories
## name: 773 categories

#Pada plot correlation dataset pokemon, setiap feature pada pokemon memeiliki korelasi yang baik.
#dimana nilai minimum korelasi yaitu -1 dan maksimum yaitu 1. 
plot_boxplot(pokemon, by="type")

plot_scatterplot(pokemon, by="type")

Jenis Pokemon Terkuat

pokemon %>%
  ggplot() +
  geom_boxplot(aes(x = type, y = total)) +
  coord_flip()

Pokemon Terlemah

pokemon %>%
  arrange(total)
## # A tibble: 1,168 x 10
##    number name  type  total    hp attack defense special_attack special_defense
##    <chr>  <chr> <chr> <dbl> <dbl>  <dbl>   <dbl>          <dbl>           <dbl>
##  1  191   Sunk~ GRASS   180    30     30      30             30              30
##  2  298   Azur~ NORM~   190    50     20      40             20              40
##  3  298   Azur~ FAIRY   190    50     20      40             20              40
##  4  401   Kric~ BUG     194    37     25      41             25              41
##  5  010   Cate~ BUG     195    45     30      35             20              20
##  6  013   Weed~ BUG     195    40     35      30             20              20
##  7  013   Weed~ POIS~   195    40     35      30             20              20
##  8  265   Wurm~ BUG     195    45     45      35             20              30
##  9  280   Ralts PSYC~   198    28     25      25             45              35
## 10  280   Ralts FAIRY   198    28     25      25             45              35
## # ... with 1,158 more rows, and 1 more variable: speed <dbl>

Pokemon Tercepat

pokemon %>%
  arrange(desc(speed))
## # A tibble: 1,168 x 10
##    number name  type  total    hp attack defense special_attack special_defense
##    <chr>  <chr> <chr> <dbl> <dbl>  <dbl>   <dbl>          <dbl>           <dbl>
##  1  386.3 Deox~ PSYC~   600    50     95      90             95              90
##  2  291   Ninj~ BUG     456    61     90      45             50              50
##  3  291   Ninj~ FLYI~   456    61     90      45             50              50
##  4  065.1 Mega~ PSYC~   590    55     50      65            175              95
##  5  142.1 Mega~ ROCK    615    80    135      85             70              95
##  6  142.1 Mega~ FLYI~   615    80    135      85             70              95
##  7  386   Deox~ PSYC~   600    50    150      50            150              50
##  8  386.1 Deox~ PSYC~   600    50    180      20            180              20
##  9  617   Acce~ BUG     495    80     70      40            100              60
## 10  101   Elec~ ELEC~   480    60     50      70             80              80
## # ... with 1,158 more rows, and 1 more variable: speed <dbl>

Pokemon Terkuat dari jenisnya

max <- pokemon %>%
  group_by(type) %>%
  summarise(total = max(total)) 
## `summarise()` ungrouping output (override with `.groups` argument)
pokemon %>%
  right_join(max, by = c("type", "total"))
## # A tibble: 31 x 10
##    number name  type  total    hp attack defense special_attack special_defense
##    <chr>  <chr> <chr> <dbl> <dbl>  <dbl>   <dbl>          <dbl>           <dbl>
##  1  003.1 Mega~ GRASS   625    80    100     123            122             120
##  2  003.1 Mega~ POIS~   625    80    100     123            122             120
##  3  127.1 Mega~ BUG     600    65    155     120             65              90
##  4  150.1 Mega~ PSYC~   780   106    190     100            154             100
##  5  150.1 Mega~ FIGH~   780   106    190     100            154             100
##  6  150.2 Mega~ PSYC~   780   106    150      70            194             120
##  7  150.2 Mega~ PSYC~   780   106    150      70            194             120
##  8  212.1 Mega~ BUG     600    70    150     140             65             100
##  9  214.1 Mega~ BUG     600    80    185     115             40             105
## 10  248.1 Mega~ ROCK    700   100    164     150             95             120
## # ... with 21 more rows, and 1 more variable: speed <dbl>

Pokemon dengan Tingkat Serangan Spesial Tertinggi tiap Jenisnya

max <- pokemon %>%
  group_by(type) %>%
  summarise(special_attack = max(special_attack)) 
## `summarise()` ungrouping output (override with `.groups` argument)
pokemon %>%
  right_join(max, by = c("type", "special_attack"))
## # A tibble: 21 x 10
##    number name  type  total    hp attack defense special_attack special_defense
##    <chr>  <chr> <chr> <dbl> <dbl>  <dbl>   <dbl>          <dbl>           <dbl>
##  1  006.2 Mega~ FIRE    634    78    104      78            159             115
##  2  006.2 Mega~ FLYI~   634    78    104      78            159             115
##  3  094.1 Mega~ GHOST   600    60     65      80            170              95
##  4  094.1 Mega~ POIS~   600    60     65      80            170              95
##  5  139   Omas~ ROCK    495    70     60     125            115              70
##  6  150.1 Mega~ FIGH~   780   106    190     100            154             100
##  7  150.2 Mega~ PSYC~   780   106    150      70            194             120
##  8  150.2 Mega~ PSYC~   780   106    150      70            194             120
##  9  181.1 Mega~ ELEC~   610    90     95     105            165             110
## 10  229.1 Mega~ DARK    600    75     90      90            140              90
## # ... with 11 more rows, and 1 more variable: speed <dbl>

Pokemon dengan Tingkat Pertahanan Spesial Tertinggi tiap Jenisnya

max <- pokemon %>%
  group_by(type) %>%
  summarise(special_defense = max(special_defense)) 
## `summarise()` ungrouping output (override with `.groups` argument)
pokemon %>%
  right_join(max, by = c("type", "special_defense"))
## # A tibble: 23 x 10
##    number name  type  total    hp attack defense special_attack special_defense
##    <chr>  <chr> <chr> <dbl> <dbl>  <dbl>   <dbl>          <dbl>           <dbl>
##  1  130.1 Mega~ DARK    640    95    155     109             70             130
##  2  181.1 Mega~ ELEC~   610    90     95     105            165             110
##  3  197   Umbr~ DARK    525    95     65     110             60             130
##  4  213   Shuc~ BUG     505    20     10     230             10             230
##  5  213   Shuc~ ROCK    505    20     10     230             10             230
##  6  226   Mant~ WATER   465    65     40      70             80             140
##  7  242   Blis~ NORM~   540   255     10      10             75             135
##  8  249   Lugia FLYI~   680   106     90     130             90             154
##  9  250   Ho-oh FIRE    680   106    130      90            110             154
## 10  250   Ho-oh FLYI~   680   106    130      90            110             154
## # ... with 13 more rows, and 1 more variable: speed <dbl>

Apakah Pokemon dengan Tingkat Serangan Spesial Tinggi akan Memiliki Tingkat Pertahanan yang Tinggi juga?

ggplot(pokemon, aes(x = special_attack, y = special_defense)) +
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'