knitr::opts_chunk$set(echo = TRUE,warning=F)
#library(tidyverse)
library(dplyr)
library(purrr)
library(ggplot2)
dataset=read.csv('https://osf.io/download/c3bfs/')
head(dataset)
dexnum | name | generation | type1 | type2 | species | height | weight | ability1 | ability2 | hidden_ability | hp | attack | defense | sp_atk | sp_def | speed | total | ev_yield | catch_rate | base_friendship | base_exp | growth_rate | egg_group1 | egg_group2 | percent_male | percent_female | egg_cycles | special_group |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | Bulbasaur | 1 | Grass | Poison | Seed Pokémon | 0.7 | 6.9 | Overgrow | Chlorophyll | 45 | 49 | 49 | 65 | 65 | 45 | 318 | 1 Sp. Atk | 45 | 50 | 64 | Medium Slow | Grass | Monster | 87.5 | 12.5 | 20 | Ordinary | |
2 | Ivysaur | 1 | Grass | Poison | Seed Pokémon | 1.0 | 13.0 | Overgrow | Chlorophyll | 60 | 62 | 63 | 80 | 80 | 60 | 405 | 1 Sp. Atk, 1 Sp. Def | 45 | 50 | 142 | Medium Slow | Grass | Monster | 87.5 | 12.5 | 20 | Ordinary | |
3 | Venusaur | 1 | Grass | Poison | Seed Pokémon | 2.0 | 100.0 | Overgrow | Chlorophyll | 80 | 82 | 83 | 100 | 100 | 80 | 525 | 2 Sp. Atk, 1 Sp. Def | 45 | 50 | 236 | Medium Slow | Grass | Monster | 87.5 | 12.5 | 20 | Ordinary | |
4 | Charmander | 1 | Fire | Lizard Pokémon | 0.6 | 8.5 | Blaze | Solar Power | 39 | 52 | 43 | 60 | 50 | 65 | 309 | 1 Speed | 45 | 50 | 62 | Medium Slow | Dragon | Monster | 87.5 | 12.5 | 20 | Ordinary | ||
5 | Charmeleon | 1 | Fire | Flame Pokémon | 1.1 | 19.0 | Blaze | Solar Power | 58 | 64 | 58 | 80 | 65 | 80 | 405 | 1 Sp. Atk, 1 Speed | 45 | 50 | 142 | Medium Slow | Dragon | Monster | 87.5 | 12.5 | 20 | Ordinary | ||
6 | Charizard | 1 | Fire | Flying | Flame Pokémon | 1.7 | 90.5 | Blaze | Solar Power | 78 | 84 | 78 | 109 | 85 | 100 | 534 | 3 Sp. Atk | 45 | 50 | 267 | Medium Slow | Dragon | Monster | 87.5 | 12.5 | 20 | Ordinary |
skimr::skim(dataset)
Name | dataset |
Number of rows | 1025 |
Number of columns | 29 |
_______________________ | |
Column type frequency: | |
character | 15 |
numeric | 14 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
name | 0 | 1 | 3 | 17 | 0 | 1025 | 0 |
type1 | 0 | 1 | 3 | 8 | 0 | 18 | 0 |
type2 | 0 | 1 | 0 | 8 | 499 | 19 | 0 |
species | 0 | 1 | 1 | 21 | 0 | 715 | 0 |
ability1 | 0 | 1 | 4 | 16 | 0 | 223 | 0 |
ability2 | 0 | 1 | 0 | 16 | 167 | 170 | 0 |
hidden_ability | 0 | 1 | 0 | 13 | 495 | 126 | 0 |
ev_yield | 0 | 1 | 4 | 30 | 0 | 48 | 0 |
base_friendship | 0 | 1 | 1 | 3 | 0 | 9 | 0 |
base_exp | 0 | 1 | 1 | 3 | 0 | 181 | 0 |
growth_rate | 0 | 1 | 4 | 11 | 0 | 6 | 0 |
egg_group1 | 0 | 1 | 3 | 12 | 0 | 26 | 0 |
egg_group2 | 0 | 1 | 0 | 10 | 746 | 12 | 0 |
egg_cycles | 0 | 1 | 1 | 3 | 0 | 12 | 0 |
special_group | 0 | 1 | 6 | 15 | 0 | 8 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
dexnum | 0 | 1.00 | 513.00 | 296.04 | 1.0 | 257.0 | 513 | 769.0 | 1025.0 | ▇▇▇▇▇ |
generation | 0 | 1.00 | 4.74 | 2.63 | 1.0 | 3.0 | 5 | 7.0 | 9.0 | ▇▇▅▅▇ |
height | 0 | 1.00 | 1.21 | 1.25 | 0.1 | 0.5 | 1 | 1.5 | 20.0 | ▇▁▁▁▁ |
weight | 0 | 1.00 | 66.98 | 121.28 | 0.1 | 8.5 | 28 | 70.0 | 999.9 | ▇▁▁▁▁ |
hp | 0 | 1.00 | 70.18 | 26.63 | 1.0 | 50.0 | 68 | 85.0 | 255.0 | ▃▇▁▁▁ |
attack | 0 | 1.00 | 77.52 | 29.78 | 5.0 | 55.0 | 75 | 100.0 | 181.0 | ▂▇▆▃▁ |
defense | 0 | 1.00 | 72.51 | 29.29 | 5.0 | 50.0 | 70 | 90.0 | 230.0 | ▃▇▂▁▁ |
sp_atk | 0 | 1.00 | 70.08 | 29.66 | 10.0 | 47.0 | 65 | 90.0 | 173.0 | ▃▇▅▂▁ |
sp_def | 0 | 1.00 | 70.21 | 26.64 | 20.0 | 50.0 | 67 | 86.0 | 230.0 | ▇▇▂▁▁ |
speed | 0 | 1.00 | 67.19 | 28.72 | 5.0 | 45.0 | 65 | 88.0 | 200.0 | ▃▇▅▁▁ |
total | 0 | 1.00 | 427.69 | 112.77 | 175.0 | 325.0 | 450 | 508.0 | 720.0 | ▂▆▇▆▁ |
catch_rate | 0 | 1.00 | 94.93 | 76.11 | 3.0 | 45.0 | 60 | 140.0 | 255.0 | ▇▃▂▂▂ |
percent_male | 155 | 0.85 | 54.93 | 20.31 | 0.0 | 50.0 | 50 | 50.0 | 100.0 | ▁▁▇▁▂ |
percent_female | 155 | 0.85 | 45.07 | 20.31 | 0.0 | 50.0 | 50 | 50.0 | 100.0 | ▂▁▇▁▁ |
# sum(is.na(dataset))
#
# sum(is.null(dataset))
#
# sum(!complete.cases(dataset))
Let’s get familiar with our data. Create a table that shows the frequency of each type of Pokémon in the dataset (variable name: type1).
tab1 = dataset %>%
filter(complete.cases(type1)) %>%
group_by(type1) %>%
count() %>%
arrange(desc(n))
tab1
type1 | n |
---|---|
Water | 134 |
Normal | 118 |
Grass | 103 |
Bug | 83 |
Fire | 66 |
Psychic | 60 |
Electric | 59 |
Rock | 58 |
Dark | 45 |
Poison | 42 |
Fighting | 40 |
Ground | 40 |
Dragon | 37 |
Steel | 36 |
Ghost | 35 |
Ice | 31 |
Fairy | 29 |
Flying | 9 |
Next, let’s look at the representation of each generation of Pokémon in the dataset. Create a table that shows the number of Pokémon in the dataset associated with each generation (variable name: generation)
tab2 = dataset %>%
filter(complete.cases(generation)) %>%
group_by(generation) %>%
count() %>%
arrange(desc(n))
tab2
generation | n |
---|---|
5 | 156 |
1 | 151 |
3 | 135 |
9 | 120 |
4 | 107 |
2 | 100 |
8 | 96 |
7 | 88 |
6 | 72 |
Provide the mean, standard deviation, median, and range of values of attack stats (variable name: attack) for ‘Water’ type Pokémon (variable name: type1).
water_attack = dataset %>%
filter(type1 == 'Water',
complete.cases(type1)) %>%
select(attack)
#create function for desc stats
desc_stat = function(x){
c(mean = mean(x,na.rm = T),
standard_dev = sd(x,na.rm = T),
median = median(x,na.rm = T),
range = range(x,na.rm = T))
}
#mean: 72.34
#std: 25.26
#median: 70
#range: 10 to 130
map(water_attack,desc_stat)
## $attack
## mean standard_dev median range1 range2
## 72.34328 25.25959 70.00000 10.00000 130.00000
-Provide the mean, standard deviation, median, and range of values of speed stats (variable name: speed) for ‘Dragon’ type Pokémon (variable name: type1).
dragon_speed = dataset %>%
filter(type1 == 'Dragon',
complete.cases(type1)) %>%
select(speed)
#mean: 80.19
#std: 24.62
#median: 82
#range: 40 to 142
map(dragon_speed,desc_stat)
## $speed
## mean standard_dev median range1 range2
## 80.18919 24.62365 82.00000 40.00000 142.00000
Are there any outliers for the ‘total’ stat? What is the value(s), and which Pokémon(s) is/are associated with it?
library(ggplot2)
boxplot(dataset$total)
outlier_lower = dataset %>%
select(name,total) %>%
arrange(total)
outlier_upper = dataset %>%
select(name,total) %>%
arrange(desc(total))
head(outlier_lower)
name | total |
---|---|
Wishiwashi | 175 |
Sunkern | 180 |
Blipbug | 180 |
Snom | 185 |
Azurill | 190 |
Kricketot | 194 |
head(outlier_upper)
name | total |
---|---|
Arceus | 720 |
Eternatus | 690 |
Mewtwo | 680 |
Lugia | 680 |
Ho-oh | 680 |
Rayquaza | 680 |
#Wishwashi with a total of 175 is the lower outlier and Arceus with a total of 720 is the upper outlier.
##Level 4-
Is there a correlation between a Pokémon’s ‘total’ stat and their base level of experience? (variable: base_exp)
dataset2=dataset %>%
filter(complete.cases(base_exp),
complete.cases(total)) %>%
mutate(base_exp2=as.numeric(base_exp)) %>%
filter(!is.na(base_exp2),
!is.na(total)) %>%
select(base_exp,base_exp2,total,everything())
#violates normality
shapiro.test(dataset2$base_exp2)
##
## Shapiro-Wilk normality test
##
## data: dataset2$base_exp2
## W = 0.92187, p-value < 2.2e-16
hist(dataset2$base_exp2)
# p-value < 2.2e-16, sample estimates: tau 0.9132707
cor.test(dataset2$total,dataset2$base_exp2,method = 'kendall')
##
## Kendall's rank correlation tau
##
## data: dataset2$total and dataset2$base_exp2
## z = 42.937, p-value < 2.2e-16
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau
## 0.9132707
-Create a visualization that shows the average ‘total’ stat based on the primary type of the Pokémon (type1)
dataset3 = dataset2 %>%
group_by('Pokemon Primary Type' =type1) %>%
summarise('Average of Total Stat'=round(mean(total),2)) %>%
arrange(desc(`Average of Total Stat`))
dataset3
Pokemon Primary Type | Average of Total Stat |
---|---|
Dragon | 490.16 |
Steel | 468.03 |
Dark | 454.73 |
Psychic | 446.72 |
Fire | 443.98 |
Fighting | 441.55 |
Rock | 438.54 |
Ice | 436.39 |
Flying | 436.11 |
Fairy | 436.07 |
Ground | 434.58 |
Electric | 433.66 |
Ghost | 431.17 |
Water | 417.58 |
Poison | 411.61 |
Grass | 407.94 |
Normal | 399.41 |
Bug | 374.64 |
dataset3 %>%
ggplot(aes(y=`Pokemon Primary Type`,x=`Average of Total Stat`,fill=`Average of Total Stat`))+
geom_col()+
labs(x='Total Stat Average',y='Primary Type',
title='A Bar Chart Brawl of Pokemon Power',
subtitle='Examining Primary Types and their Average Total Stats',
fill=' ')+
theme_bw()+
theme(plot.title = element_text(hjust=.5),
plot.subtitle = element_text(hjust = .5))