Week 2 Challenge: Pokemon Stats

Load in some libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)

Import the data file

pokemon_data <- read_csv("pokemon.csv")
## Rows: 1072 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, type1, type2
## dbl (9): number, total, hp, attack, defense, sp_attack, sp_defense, speed, g...
## lgl (1): legendary
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Let’s look at a sampling of the data

library(knitr)
kable(head(pokemon_data, 20))
number name type1 type2 total hp attack defense sp_attack sp_defense speed generation legendary
1 Bulbasaur Grass Poison 318 45 49 49 65 65 45 1 FALSE
2 Ivysaur Grass Poison 405 60 62 63 80 80 60 1 FALSE
3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 FALSE
3 Mega Venusaur Grass Poison 625 80 100 123 122 120 80 1 FALSE
3 Gigantamax Venusaur Grass Poison 525 80 82 83 100 100 80 1 FALSE
4 Charmander Fire NA 309 39 52 43 60 50 65 1 FALSE
5 Charmeleon Fire NA 405 58 64 58 80 65 80 1 FALSE
6 Charizard Fire Flying 534 78 84 78 109 85 100 1 FALSE
6 Mega Charizard X Fire Dragon 634 78 130 111 130 85 100 1 FALSE
6 Mega Charizard Y Fire Flying 634 78 104 78 159 115 100 1 FALSE
6 Gigantamax Charizard Fire Flying 534 78 84 78 109 85 100 1 FALSE
7 Squirtle Water NA 314 44 48 65 50 64 43 1 FALSE
8 Wartortle Water NA 405 59 63 80 65 80 58 1 FALSE
9 Blastoise Water NA 530 79 83 100 85 105 78 1 FALSE
9 Mega Blastoise Water NA 630 79 103 120 135 115 78 1 FALSE
9 Gigantamax Blasoise Blastoise Water 530 79 83 100 85 105 78 1 FALSE
10 Caterpie Bug NA 195 45 30 35 20 20 45 1 FALSE
11 Metapod Bug NA 205 50 20 55 25 25 30 1 FALSE
12 Butterfree Bug Flying 395 60 45 50 90 80 70 1 FALSE
12 Gigantamax Butterfree Bug Flying 395 60 45 50 90 80 70 1 FALSE

The first thing we’ll check is all the different kinds of pokemon typings.

levels(as.factor(pokemon_data$type1))
##  [1] "Blastoise" "Bug"       "Dark"      "Dragon"    "Electric"  "Fairy"    
##  [7] "Fighting"  "Fire"      "Flying"    "Ghost"     "Graass"    "Grass"    
## [13] "Ground"    "Ice"       "Normal"    "Poison"    "Psychic"   "Rock"     
## [19] "Steel"     "Water"

…OK… So it looks like the dataset is just a bit off here. It’s counting “Blastoise” as a type, which is incorrect (it’s just a water type). So let’s go ahead and change that

pokemon_data[16, c("name", "type1", "type2")]
## # A tibble: 1 × 3
##   name                type1     type2
##   <chr>               <chr>     <chr>
## 1 Gigantamax Blasoise Blastoise Water
pokemon_data$type1[16] <- "Water"
pokemon_data$type2[16] <- NA
pokemon_data[16, c("name", "type1", "type2")]
## # A tibble: 1 × 3
##   name                type1 type2
##   <chr>               <chr> <chr>
## 1 Gigantamax Blasoise Water <NA>

And let’s double check that overall

levels(as.factor(pokemon_data$type1))
##  [1] "Bug"      "Dark"     "Dragon"   "Electric" "Fairy"    "Fighting"
##  [7] "Fire"     "Flying"   "Ghost"    "Graass"   "Grass"    "Ground"  
## [13] "Ice"      "Normal"   "Poison"   "Psychic"  "Rock"     "Steel"   
## [19] "Water"
Nice.

Let’s look at non-legnedaries with a base stat total >= 570

pokemon_NL <- subset(x=pokemon_data, subset = legendary=='FALSE')
pokemon_NL_1 <- subset(x = pokemon_NL, subset = `total`>= 580)
kable(head(pokemon_NL_1, 20))
number name type1 type2 total hp attack defense sp_attack sp_defense speed generation legendary
3 Mega Venusaur Grass Poison 625 80 100 123 122 120 80 1 FALSE
6 Mega Charizard X Fire Dragon 634 78 130 111 130 85 100 1 FALSE
6 Mega Charizard Y Fire Flying 634 78 104 78 159 115 100 1 FALSE
9 Mega Blastoise Water NA 630 79 103 120 135 115 78 1 FALSE
65 Mega Alakazam Psychic NA 590 55 50 65 175 95 150 1 FALSE
80 Mega Slowbro Water Psychic 590 95 75 180 130 80 30 1 FALSE
94 Mega Gengar Ghost Poison 600 60 65 80 170 95 130 1 FALSE
115 Mega Kangaskhan Normal NA 590 105 125 100 60 100 100 1 FALSE
127 Mega Pinsir Bug Flying 600 65 155 120 65 90 105 1 FALSE
130 Mega Gyarados Water Dark 640 95 155 109 70 130 81 1 FALSE
142 Mega Aerodactyl Rock Flying 615 80 135 85 70 95 150 1 FALSE
149 Dragonite Dragon Flying 600 91 134 95 100 100 80 1 FALSE
181 Mega Ampharos Electric Dragon 610 90 95 105 165 110 45 2 FALSE
208 Mega Steelix Steel Ground 610 75 125 230 55 95 30 2 FALSE
212 Mega Scizor Bug Steel 600 70 150 140 65 100 75 2 FALSE
214 Mega Heracross Bug Fighting 600 80 185 115 40 105 75 2 FALSE
229 Mega Houndoom Dark Fire 600 75 90 90 140 90 115 2 FALSE
248 Tyranitar Rock Dark 600 100 134 110 95 100 61 2 FALSE
248 Mega Tyranitar Rock Dark 700 100 164 150 95 120 71 2 FALSE
254 Mega Sceptile Grass Dragon 630 70 110 75 145 85 145 3 FALSE

What is the fastest non-legendary pokemon (which one has the highest speed stat?)

which.max(pokemon_NL$speed)
## [1] 345
kable(pokemon_NL[345,])
number name type1 type2 total hp attack defense sp_attack sp_defense speed generation legendary
291 Ninjask Bug Flying 456 61 90 45 50 50 160 3 FALSE

What are the weakest Gen 1 pokemon?

subset(pokemon_data, subset = generation==1 & total < 250, select= name)
## # A tibble: 6 × 1
##   name    
##   <chr>   
## 1 Caterpie
## 2 Metapod 
## 3 Weedle  
## 4 Kakuna  
## 5 Zubat   
## 6 Magikarp

Now let’s make a bar chart of average base stat total by generation

average_base <- pokemon_data %>%
  group_by(generation) %>%
  summarise(average_base = round(mean(total),0))

average_base <- average_base[-1,]
#I'm removing the first row because for some reason R made a "Generation 0" from the above code... lol

 ggplot(average_base, aes(x = generation, y = average_base)) +  geom_bar(stat = "identity", fill="steelblue") +  geom_text(aes(label=average_base), vjust=1.6, color="white", size=3.5) + theme_minimal() +  labs(x = "Generation", y = "Average Base Stat Total", title = "Average Pokemon Base Stat Total by Generation")

Gen 8 has the higest base total stat, just barely beating out Gen 4.

Now let’s make a bar chart of average base stat total by type

average_base <- pokemon_data %>%
  group_by(type1) %>%
  summarise(average_base = round(mean(total),0))

 ggplot(average_base, aes(x = type1, y = average_base)) +  geom_bar(stat = "identity", fill="steelblue") +  geom_text(aes(label=average_base), vjust=2.0, color="white", size=3.5) + theme_minimal() +  labs(x = "Type", y = "Average Base Stat Total", title = "Average Pokemon Base Stat Total by Type1") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

Dragons have the highest base stat total by far. Bug Pokemon, unsurprisingly, have the lowest total