Pokemon.csv 데이터는 number, name, first and second type 그리고 basic stats(HP, Attack, Defense, Special Attack, Special Defense and Speed)를 포함한 800개의 관측치를 가지고 있으며 포켓몬 카드나 포켓몬 Go가 아닌 포켓몬 게임에 대한 데이터입니다.
library(readr)
library(stringr)
library(ggplot2)
library(dplyr)
library(DT)
setwd("C:\\R\\Pokemon")
getwd()
## [1] "C:/R/Pokemon"
pokemon_data <- read.csv("C:\\R\\Pokemon\\Pokemon.csv", header = T, stringsAsFactors = F)
head(pokemon_data)
## X. Name Type.1 Type.2 Total HP Attack Defense Sp..Atk
## 1 1 Bulbasaur Grass Poison 318 45 49 49 65
## 2 2 Ivysaur Grass Poison 405 60 62 63 80
## 3 3 Venusaur Grass Poison 525 80 82 83 100
## 4 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122
## 5 4 Charmander Fire 309 39 52 43 60
## 6 5 Charmeleon Fire 405 58 64 58 80
## Sp..Def Speed Generation Legendary
## 1 65 45 1 FALSE
## 2 80 60 1 FALSE
## 3 100 80 1 FALSE
## 4 120 80 1 FALSE
## 5 50 65 1 FALSE
## 6 65 80 1 FALSE
str(pokemon_data)
## 'data.frame': 800 obs. of 13 variables:
## $ X. : int 1 2 3 3 4 5 6 6 6 7 ...
## $ Name : chr "Bulbasaur" "Ivysaur" "Venusaur" "VenusaurMega Venusaur" ...
## $ Type.1 : chr "Grass" "Grass" "Grass" "Grass" ...
## $ Type.2 : chr "Poison" "Poison" "Poison" "Poison" ...
## $ Total : int 318 405 525 625 309 405 534 634 634 314 ...
## $ HP : int 45 60 80 80 39 58 78 78 78 44 ...
## $ Attack : int 49 62 82 100 52 64 84 130 104 48 ...
## $ Defense : int 49 63 83 123 43 58 78 111 78 65 ...
## $ Sp..Atk : int 65 80 100 122 60 80 109 130 159 50 ...
## $ Sp..Def : int 65 80 100 120 50 65 85 85 115 64 ...
## $ Speed : int 45 60 80 80 65 80 100 100 100 43 ...
## $ Generation: int 1 1 1 1 1 1 1 1 1 1 ...
## $ Legendary : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
summary(pokemon_data)
## X. Name Type.1 Type.2
## Min. : 1.0 Length:800 Length:800 Length:800
## 1st Qu.:184.8 Class :character Class :character Class :character
## Median :364.5 Mode :character Mode :character Mode :character
## Mean :362.8
## 3rd Qu.:539.2
## Max. :721.0
## Total HP Attack Defense
## Min. :180.0 Min. : 1.00 Min. : 5 Min. : 5.00
## 1st Qu.:330.0 1st Qu.: 50.00 1st Qu.: 55 1st Qu.: 50.00
## Median :450.0 Median : 65.00 Median : 75 Median : 70.00
## Mean :435.1 Mean : 69.26 Mean : 79 Mean : 73.84
## 3rd Qu.:515.0 3rd Qu.: 80.00 3rd Qu.:100 3rd Qu.: 90.00
## Max. :780.0 Max. :255.00 Max. :190 Max. :230.00
## Sp..Atk Sp..Def Speed Generation
## Min. : 10.00 Min. : 20.0 Min. : 5.00 Min. :1.000
## 1st Qu.: 49.75 1st Qu.: 50.0 1st Qu.: 45.00 1st Qu.:2.000
## Median : 65.00 Median : 70.0 Median : 65.00 Median :3.000
## Mean : 72.82 Mean : 71.9 Mean : 68.28 Mean :3.324
## 3rd Qu.: 95.00 3rd Qu.: 90.0 3rd Qu.: 90.00 3rd Qu.:5.000
## Max. :194.00 Max. :230.0 Max. :180.00 Max. :6.000
## Legendary
## Mode :logical
## FALSE:735
## TRUE :65
##
##
##
| 변수명 | 변수 설명 |
|---|---|
| #. | 포켓몬의 ID |
| Name | 포켓몬의 이름 |
| Type 1 | 각각의 포켓몬이 가진 타입(상성을 결정하기 위함) |
| Type 2 | 몇몇 포켓몬이 가진 2번째 타입 |
| Total | 모든 능력치의 합, 일반적으로 포켓몬의 강함을 측정하는 측도 |
| HP | 포켓몬의 체력 |
| Attack | 포켓몬의 기본 공격력(ex. 할퀴기, 펀치) |
| Defense | 포켓몬의 기본 방어력 |
| SP Atk | 포켓몬의 스페셜 공격력(ex. 불꽃 발사, 버블 빔) |
| SP Def | 포켓몬의 스페셜 공격력에 저항하는 스페셜 방어력 |
| Speed | 어느 포켓몬이 각 라운드에 첫 번째로 공격하는지 결정 |
names(pokemon_data)
## [1] "X." "Name" "Type.1" "Type.2" "Total"
## [6] "HP" "Attack" "Defense" "Sp..Atk" "Sp..Def"
## [11] "Speed" "Generation" "Legendary"
colnames(pokemon_data) <- c("No", "Name", "Type.1", "Type.2", "Total", "HP", "Attack", "Defense", "SP.Atk", "SP.Def", "Speed", "Generation", "Legendary")
names(pokemon_data)
## [1] "No" "Name" "Type.1" "Type.2" "Total"
## [6] "HP" "Attack" "Defense" "SP.Atk" "SP.Def"
## [11] "Speed" "Generation" "Legendary"
포켓몬의 각 개체만을 나타내는 변수인 No를 제외
pokemon_data <- pokemon_data %>%
select(-No)
str_replace(pokemon_data$Name, ".*(?=Mega)", "")
DT::datatable(pokemon_data)
pokemon_top3 <- pokemon_data %>%
filter(Legendary == FALSE) %>%
select(Name, Total) %>%
arrange(desc(Total)) %>%
head(3)
pokemon_top3
## Name Total
## 1 TyranitarMega Tyranitar 700
## 2 SalamenceMega Salamence 700
## 3 MetagrossMega Metagross 700
pokemon_bottom10 <- pokemon_data %>%
filter(Legendary == FALSE) %>%
select(Name, Total) %>%
arrange(Total) %>%
head(3)
pokemon_bottom10
## Name Total
## 1 Sunkern 180
## 2 Azurill 190
## 3 Kricketot 194
# Speed가 가장 높은 포켓몬 Top3
speed <- pokemon_data %>%
filter(Legendary == FALSE) %>%
select(Name, Speed) %>%
arrange(desc(Speed)) %>%
head(3)
speed
## Name Speed
## 1 Ninjask 160
## 2 AlakazamMega Alakazam 150
## 3 AerodactylMega Aerodactyl 150
speed <- pokemon_data %>%
filter(Legendary == FALSE) %>%
select(Name, Speed) %>%
arrange(Speed) %>%
head(3)
speed
## Name Speed
## 1 Shuckle 5
## 2 Munchlax 5
## 3 Trapinch 10
spatk <- pokemon_data %>%
filter(Legendary == FALSE) %>%
select(Name, SP.Atk) %>%
arrange(desc(SP.Atk)) %>%
head(3)
spatk
## Name SP.Atk
## 1 AlakazamMega Alakazam 175
## 2 GengarMega Gengar 170
## 3 AmpharosMega Ampharos 165
spdef <- pokemon_data %>%
filter(Legendary == FALSE) %>%
select(Name, SP.Def) %>%
arrange(desc(SP.Def)) %>%
head(3)
spdef
## Name SP.Def
## 1 Shuckle 230
## 2 Florges 154
## 3 Probopass 150
legend <- pokemon_data %>%
filter(Legendary == TRUE)
DT::datatable(legend)
legend_top <- legend %>%
select(Name, Total) %>%
arrange(desc(Total)) %>%
head(10)
legend_top
## Name Total
## 1 MewtwoMega Mewtwo X 780
## 2 MewtwoMega Mewtwo Y 780
## 3 RayquazaMega Rayquaza 780
## 4 KyogrePrimal Kyogre 770
## 5 GroudonPrimal Groudon 770
## 6 Arceus 720
## 7 LatiasMega Latias 700
## 8 LatiosMega Latios 700
## 9 KyuremBlack Kyurem 700
## 10 KyuremWhite Kyurem 700
Pikachu
pikachu <- pokemon_data %>%
filter(Name == "Pikachu")
pikachu
## Name Type.1 Type.2 Total HP Attack Defense SP.Atk SP.Def Speed
## 1 Pikachu Electric 320 35 55 40 50 50 90
## Generation Legendary
## 1 1 FALSE
normal <- pokemon_data %>%
select(Name, Total, Legendary) %>%
filter(Legendary == "FALSE")
head(normal)
## Name Total Legendary
## 1 Bulbasaur 318 FALSE
## 2 Ivysaur 405 FALSE
## 3 Venusaur 525 FALSE
## 4 VenusaurMega Venusaur 625 FALSE
## 5 Charmander 309 FALSE
## 6 Charmeleon 405 FALSE
legend <- pokemon_data %>%
select(Name, Total, Legendary) %>%
filter(Legendary == "TRUE")
head(legend)
## Name Total Legendary
## 1 Articuno 580 TRUE
## 2 Zapdos 580 TRUE
## 3 Moltres 580 TRUE
## 4 Mewtwo 680 TRUE
## 5 MewtwoMega Mewtwo X 780 TRUE
## 6 MewtwoMega Mewtwo Y 780 TRUE
정규성의 경우 일반적으로 관측치가 30개 이상이면 만족한다고 판단할 수 있다.
var.test(normal$Total, legend$Total)
##
## F test to compare two variances
##
## data: normal$Total and legend$Total
## F = 3.0694, num df = 734, denom df = 64, p-value = 2.242e-07
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 2.074173 4.294797
## sample estimates:
## ratio of variances
## 3.069395
p-value가 0.05보다 작게 나오므로 두 집단의 분산은 유의미하게 다르다고 볼 수 없다.
t.test(normal$Total, legend$Total, var.equal = F)
##
## Welch Two Sample t-test
##
## data: normal$Total and legend$Total
## t = -25.834, df = 102.8, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -237.0741 -203.2679
## sample estimates:
## mean of x mean of y
## 417.2136 637.3846
p-value가 0.05보다 작으므로 귀무가설을 기각하여 전설의 포켓몬과 일반 포켓몬의 Total에는 유의한 차이가 있다고 볼 수 있다.
table(pokemon_data$Type.1)
##
## Bug Dark Dragon Electric Fairy Fighting Fire Flying
## 69 31 32 44 17 27 52 4
## Ghost Grass Ground Ice Normal Poison Psychic Rock
## 32 70 32 24 98 28 57 44
## Steel Water
## 27 112
table(pokemon_data$Type.2)
##
## Bug Dark Dragon Electric Fairy Fighting Fire
## 386 3 20 18 6 23 26 12
## Flying Ghost Grass Ground Ice Normal Poison Psychic
## 97 14 25 35 14 4 34 33
## Rock Steel Water
## 14 22 14
721마리의 포켓몬들 중 386마리의 포켓몬들이 Type.1만을 가지고 있고 335마리의 포켓몬들이 Type.1과 Type.2를 가지고 있다.
ggplot(data = pokemon_data, aes(x = Type.1)) + geom_bar() + coord_flip()
ggplot(data = pokemon_data, aes(x = Type.2)) + geom_bar() + coord_flip()
<참고자료>
집단간 비교를 위한 면 분할 : facet_wrap()을 배움
시각화는 공부할 게 많다..!
pokemon.plot1 <- ggplot(data = pokemon_data, aes(x = Type.1)) +
geom_bar(aes(fill = as.factor(Type.1))) +
scale_fill_discrete(name = "Pokemon Types") +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
labs(x="Type 1", y="Count")
pokemon.plot1
pokemon.plot2 <- ggplot(pokemon_data, aes(Type.2)) +
geom_bar(aes(fill = as.factor(Type.2))) +
scale_fill_discrete(name = "Type 2") +
labs(x="Type 1", y="Count", Title = "Distr. of Type 1 and Type 2") +
facet_wrap(~Type.1) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())
pokemon.plot2
pokemon.plot3 <- ggplot(pokemon_data, aes(x=Total, fill=Type.1)) + geom_density(alpha=.5)
pokemon.plot3 <- pokemon.plot3 +
facet_wrap(~Type.1) +
labs(x="Total", y="Density", title="Pokemon Total Score") +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())
pokemon.plot3
pokemon.plot4 <- ggplot(pokemon_data, aes(x=Attack, y=Total,color=Type.1)) +
geom_point(shape=1, alpha=.5)
pokemon.plot4 <- pokemon.plot4 +
facet_wrap(~Type.1) +
geom_smooth(method=lm) +
labs(x="Base Attack", y = "Total Score", title = "Pokemon score vs. Pokemon Attack")
pokemon.plot4
pokemon.plot5 <- ggplot(pokemon_data, aes(x=Defense, y=Total,color=Type.1)) +
geom_point(shape=1, alpha=.5)
pokemon.plot5 <- pokemon.plot5 +
facet_wrap(~Type.1) +
geom_smooth(method=lm) +
labs(x="Base Defense", y = "Total Score", title = "Pokemon score vs. Pokemon Def")
pokemon.plot5