knitr::opts_chunk$set(echo = TRUE)
Pada artikel ini Saya akan menganalisis artikel ini sebagai bahan latihan Analisis Data Eksploratif menggunakan R. Pada kali ini, data yang digunakan adalah data pokemon.pokemon merupakan dataset yang terdiri dari beberapa variabel diantaranya adalah nama pokemon, jenis pokemon, dan karakteristik kekuatan pokemon.
kolom-kolom pada dataset antara lain:
number : nomor seri pokemonname : nama pokemontype : type pokemontotal : total dari semua variabelhp : health pointattack : kekuatan serangan pokemondefense : kekuatan pertahanan pokemonspesial_attack : kekuatann serangan khususspesial_defense : kekuatan pertahanan khususspeed : kecepatan pokemonif(!require(tidyverse)) install.packages("tidyverse")
if(!require(skimr)) install.packages("skimr")
if(!require(DataExplorer)) install.packages("DataEksplorer")
library(tidyverse)
library(skimr)
library(DataExplorer)
library(readr)
pokemon <- read_csv("G:/PERPUSTAKAAN_ANAS/edar-master/data/pokemon.csv")
View(pokemon)
pokemon
glimpse(pokemon)
## Rows: 1,168
## Columns: 10
## $ number <chr> " 001", " 001", " 002", " 002", " 003", " 003", " 0...
## $ name <chr> "Bulbasaur", "Bulbasaur", "Ivysaur", "Ivysaur", "Ve...
## $ type <chr> "GRASS", "POISON", "GRASS", "POISON", "GRASS", "POI...
## $ total <dbl> 318, 318, 405, 405, 525, 525, 625, 625, 309, 405, 5...
## $ hp <dbl> 45, 45, 60, 60, 80, 80, 80, 80, 39, 58, 78, 78, 78,...
## $ attack <dbl> 49, 49, 62, 62, 82, 82, 100, 100, 52, 64, 84, 84, 1...
## $ defense <dbl> 49, 49, 63, 63, 83, 83, 123, 123, 43, 58, 78, 78, 1...
## $ special_attack <dbl> 65, 65, 80, 80, 100, 100, 122, 122, 60, 80, 109, 10...
## $ special_defense <dbl> 65, 65, 80, 80, 100, 100, 120, 120, 50, 65, 85, 85,...
## $ speed <dbl> 45, 45, 60, 60, 80, 80, 80, 80, 65, 80, 100, 100, 1...
summary(pokemon)
## number name type total
## Length:1168 Length:1168 Length:1168 Min. :180.0
## Class :character Class :character Class :character 1st Qu.:334.0
## Mode :character Mode :character Mode :character Median :453.0
## Mean :435.6
## 3rd Qu.:515.0
## Max. :780.0
## hp attack defense special_attack
## Min. : 1.00 Min. : 5.00 Min. : 5.00 Min. : 10.00
## 1st Qu.: 50.00 1st Qu.: 55.00 1st Qu.: 50.00 1st Qu.: 50.00
## Median : 66.00 Median : 75.00 Median : 70.00 Median : 65.00
## Mean : 69.53 Mean : 78.82 Mean : 74.37 Mean : 72.62
## 3rd Qu.: 82.00 3rd Qu.:100.00 3rd Qu.: 90.00 3rd Qu.: 95.00
## Max. :255.00 Max. :190.00 Max. :230.00 Max. :194.00
## special_defense speed
## Min. : 20.00 Min. : 5.00
## 1st Qu.: 50.00 1st Qu.: 47.00
## Median : 70.00 Median : 65.50
## Mean : 71.72 Mean : 68.59
## 3rd Qu.: 90.00 3rd Qu.: 90.00
## Max. :230.00 Max. :180.00
skim(pokemon)
| Name | pokemon |
| Number of rows | 1168 |
| Number of columns | 10 |
| _______________________ | |
| Column type frequency: | |
| character | 3 |
| numeric | 7 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| number | 0 | 1 | 4 | 6 | 0 | 772 | 0 |
| name | 0 | 1 | 3 | 26 | 0 | 773 | 0 |
| type | 0 | 1 | 3 | 8 | 0 | 18 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| total | 0 | 1 | 435.63 | 116.53 | 180 | 334 | 453.0 | 515 | 780 | ▃▆▇▂▁ |
| hp | 0 | 1 | 69.53 | 24.92 | 1 | 50 | 66.0 | 82 | 255 | ▃▇▁▁▁ |
| attack | 0 | 1 | 78.82 | 31.71 | 5 | 55 | 75.0 | 100 | 190 | ▂▇▆▂▁ |
| defense | 0 | 1 | 74.37 | 30.76 | 5 | 50 | 70.0 | 90 | 230 | ▃▇▂▁▁ |
| special_attack | 0 | 1 | 72.62 | 31.77 | 10 | 50 | 65.0 | 95 | 194 | ▅▇▅▂▁ |
| special_defense | 0 | 1 | 71.72 | 27.27 | 20 | 50 | 70.0 | 90 | 230 | ▇▇▂▁▁ |
| speed | 0 | 1 | 68.59 | 28.32 | 5 | 47 | 65.5 | 90 | 180 | ▃▇▆▁▁ |
plot_intro(pokemon)
# interpretasi terlihat bahwa dataset yang mau dianalisis tidak terdapat eror
plot_missing(pokemon)
# interpretasi missing rows 0% semua, yang artinya dataset siap digunakan, tidak ada eror # variasi
plot_histogram(pokemon)
plot_bar(pokemon)
## 2 columns ignored with more than 50 categories.
## number: 772 categories
## name: 773 categories
plot_correlation(pokemon)
## 2 features with more than 20 categories ignored!
## number: 772 categories
## name: 773 categories
plot_boxplot(pokemon, by = "type")
plot_scatterplot(pokemon, by = "total")
pokemon %>%
ggplot()+
geom_boxplot(aes(x = type, y = total))+
coord_flip()
# interpretasi dari boxplot tersebut terlihat bahwa jenis dragon mempunyai distribusi yang normal serta merupakan jenis pokemon yang terkuat dari jenis lainnya # pokemon terkuat
pokemon %>%
arrange(desc(total))
pokemon %>%
arrange(total)
pokemon %>%
arrange(desc(speed))
max <-pokemon %>%
group_by(type) %>%
summarise(total = max(total))
## `summarise()` ungrouping output (override with `.groups` argument)
pokemon %>%
right_join(max, by = c("type", "total"))
max <-pokemon %>%
group_by(type) %>%
summarise(special_attack = max(special_attack))
## `summarise()` ungrouping output (override with `.groups` argument)
pokemon %>%
right_join(max, by = c("type", "special_attack"))
max <-pokemon %>%
group_by(type) %>%
summarise(defense = max(defense))
## `summarise()` ungrouping output (override with `.groups` argument)
pokemon %>%
right_join(max, by = c("type", "defense"))
ggplot(pokemon, aes(x = special_attack, y = special_defense))+
geom_point()+
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# interpretasi artinya pokemon yang memiliki
special_attack tinggi belum tentu memiliki special_defense tinggi juga
Demikian hasil EDA dataset pokemon, apabila terdapat kesalahan mohon dimaafkan. Terima kasih dan sampai jumpa