Source: Psi Chi R

Load packages and import data

knitr::opts_chunk$set(echo = TRUE,warning=F)
#library(tidyverse)

library(dplyr)
library(purrr)
library(ggplot2)

dataset=read.csv('https://osf.io/download/c3bfs/')

Check the data

head(dataset)
dexnum name generation type1 type2 species height weight ability1 ability2 hidden_ability hp attack defense sp_atk sp_def speed total ev_yield catch_rate base_friendship base_exp growth_rate egg_group1 egg_group2 percent_male percent_female egg_cycles special_group
1 Bulbasaur 1 Grass Poison Seed Pokémon 0.7 6.9 Overgrow Chlorophyll 45 49 49 65 65 45 318 1 Sp. Atk 45 50 64 Medium Slow Grass Monster 87.5 12.5 20 Ordinary
2 Ivysaur 1 Grass Poison Seed Pokémon 1.0 13.0 Overgrow Chlorophyll 60 62 63 80 80 60 405 1 Sp. Atk, 1 Sp. Def 45 50 142 Medium Slow Grass Monster 87.5 12.5 20 Ordinary
3 Venusaur 1 Grass Poison Seed Pokémon 2.0 100.0 Overgrow Chlorophyll 80 82 83 100 100 80 525 2 Sp. Atk, 1 Sp. Def 45 50 236 Medium Slow Grass Monster 87.5 12.5 20 Ordinary
4 Charmander 1 Fire Lizard Pokémon 0.6 8.5 Blaze Solar Power 39 52 43 60 50 65 309 1 Speed 45 50 62 Medium Slow Dragon Monster 87.5 12.5 20 Ordinary
5 Charmeleon 1 Fire Flame Pokémon 1.1 19.0 Blaze Solar Power 58 64 58 80 65 80 405 1 Sp. Atk, 1 Speed 45 50 142 Medium Slow Dragon Monster 87.5 12.5 20 Ordinary
6 Charizard 1 Fire Flying Flame Pokémon 1.7 90.5 Blaze Solar Power 78 84 78 109 85 100 534 3 Sp. Atk 45 50 267 Medium Slow Dragon Monster 87.5 12.5 20 Ordinary
skimr::skim(dataset)
Data summary
Name dataset
Number of rows 1025
Number of columns 29
_______________________
Column type frequency:
character 15
numeric 14
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
name 0 1 3 17 0 1025 0
type1 0 1 3 8 0 18 0
type2 0 1 0 8 499 19 0
species 0 1 1 21 0 715 0
ability1 0 1 4 16 0 223 0
ability2 0 1 0 16 167 170 0
hidden_ability 0 1 0 13 495 126 0
ev_yield 0 1 4 30 0 48 0
base_friendship 0 1 1 3 0 9 0
base_exp 0 1 1 3 0 181 0
growth_rate 0 1 4 11 0 6 0
egg_group1 0 1 3 12 0 26 0
egg_group2 0 1 0 10 746 12 0
egg_cycles 0 1 1 3 0 12 0
special_group 0 1 6 15 0 8 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
dexnum 0 1.00 513.00 296.04 1.0 257.0 513 769.0 1025.0 ▇▇▇▇▇
generation 0 1.00 4.74 2.63 1.0 3.0 5 7.0 9.0 ▇▇▅▅▇
height 0 1.00 1.21 1.25 0.1 0.5 1 1.5 20.0 ▇▁▁▁▁
weight 0 1.00 66.98 121.28 0.1 8.5 28 70.0 999.9 ▇▁▁▁▁
hp 0 1.00 70.18 26.63 1.0 50.0 68 85.0 255.0 ▃▇▁▁▁
attack 0 1.00 77.52 29.78 5.0 55.0 75 100.0 181.0 ▂▇▆▃▁
defense 0 1.00 72.51 29.29 5.0 50.0 70 90.0 230.0 ▃▇▂▁▁
sp_atk 0 1.00 70.08 29.66 10.0 47.0 65 90.0 173.0 ▃▇▅▂▁
sp_def 0 1.00 70.21 26.64 20.0 50.0 67 86.0 230.0 ▇▇▂▁▁
speed 0 1.00 67.19 28.72 5.0 45.0 65 88.0 200.0 ▃▇▅▁▁
total 0 1.00 427.69 112.77 175.0 325.0 450 508.0 720.0 ▂▆▇▆▁
catch_rate 0 1.00 94.93 76.11 3.0 45.0 60 140.0 255.0 ▇▃▂▂▂
percent_male 155 0.85 54.93 20.31 0.0 50.0 50 50.0 100.0 ▁▁▇▁▂
percent_female 155 0.85 45.07 20.31 0.0 50.0 50 50.0 100.0 ▂▁▇▁▁
# sum(is.na(dataset))
# 
# sum(is.null(dataset))
# 
# sum(!complete.cases(dataset))

Level 1-

Let’s get familiar with our data. Create a table that shows the frequency of each type of Pokémon in the dataset (variable name: type1).

tab1 = dataset %>% 
  filter(complete.cases(type1)) %>% 
  group_by(type1) %>% 
  count() %>% 
  arrange(desc(n))

tab1
type1 n
Water 134
Normal 118
Grass 103
Bug 83
Fire 66
Psychic 60
Electric 59
Rock 58
Dark 45
Poison 42
Fighting 40
Ground 40
Dragon 37
Steel 36
Ghost 35
Ice 31
Fairy 29
Flying 9

Next, let’s look at the representation of each generation of Pokémon in the dataset. Create a table that shows the number of Pokémon in the dataset associated with each generation (variable name: generation)

tab2 = dataset %>% 
  filter(complete.cases(generation)) %>% 
  group_by(generation) %>% 
  count() %>% 
  arrange(desc(n))

tab2
generation n
5 156
1 151
3 135
9 120
4 107
2 100
8 96
7 88
6 72

Level 2-

Provide the mean, standard deviation, median, and range of values of attack stats (variable name: attack) for ‘Water’ type Pokémon (variable name: type1).

water_attack = dataset %>% 
  filter(type1 == 'Water',
         complete.cases(type1)) %>% 
  select(attack)

#create function for desc stats
desc_stat = function(x){
  c(mean = mean(x,na.rm = T),
  standard_dev = sd(x,na.rm = T),
  median = median(x,na.rm = T),
  range = range(x,na.rm = T))
}

#mean: 72.34
#std: 25.26
#median: 70
#range: 10 to 130
map(water_attack,desc_stat)
## $attack
##         mean standard_dev       median       range1       range2 
##     72.34328     25.25959     70.00000     10.00000    130.00000

-Provide the mean, standard deviation, median, and range of values of speed stats (variable name: speed) for ‘Dragon’ type Pokémon (variable name: type1).

dragon_speed = dataset %>% 
  filter(type1 == 'Dragon',
         complete.cases(type1)) %>% 
  select(speed)


#mean: 80.19
#std: 24.62
#median: 82
#range: 40 to 142
map(dragon_speed,desc_stat)
## $speed
##         mean standard_dev       median       range1       range2 
##     80.18919     24.62365     82.00000     40.00000    142.00000

Level 3-

Are there any outliers for the ‘total’ stat? What is the value(s), and which Pokémon(s) is/are associated with it?

library(ggplot2)

boxplot(dataset$total)

outlier_lower = dataset %>% 
  select(name,total) %>% 
  arrange(total)

outlier_upper = dataset %>% 
  select(name,total) %>% 
  arrange(desc(total))

head(outlier_lower)
name total
Wishiwashi 175
Sunkern 180
Blipbug 180
Snom 185
Azurill 190
Kricketot 194
head(outlier_upper)
name total
Arceus 720
Eternatus 690
Mewtwo 680
Lugia 680
Ho-oh 680
Rayquaza 680
#Wishwashi with a total of 175 is the lower outlier and Arceus with a total of 720 is the upper outlier.

##Level 4-

Is there a correlation between a Pokémon’s ‘total’ stat and their base level of experience? (variable: base_exp)

dataset2=dataset %>% 
  filter(complete.cases(base_exp),
         complete.cases(total)) %>% 
  mutate(base_exp2=as.numeric(base_exp)) %>% 
  filter(!is.na(base_exp2),
         !is.na(total)) %>% 
  select(base_exp,base_exp2,total,everything())

#violates normality
shapiro.test(dataset2$base_exp2)
## 
##  Shapiro-Wilk normality test
## 
## data:  dataset2$base_exp2
## W = 0.92187, p-value < 2.2e-16
hist(dataset2$base_exp2)

# p-value < 2.2e-16, sample estimates: tau 0.9132707 
cor.test(dataset2$total,dataset2$base_exp2,method = 'kendall')
## 
##  Kendall's rank correlation tau
## 
## data:  dataset2$total and dataset2$base_exp2
## z = 42.937, p-value < 2.2e-16
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
##       tau 
## 0.9132707

-Create a visualization that shows the average ‘total’ stat based on the primary type of the Pokémon (type1)

dataset3 = dataset2 %>% 
  group_by('Pokemon Primary Type' =type1) %>% 
  summarise('Average of Total Stat'=round(mean(total),2)) %>% 
  arrange(desc(`Average of Total Stat`))

dataset3
Pokemon Primary Type Average of Total Stat
Dragon 490.16
Steel 468.03
Dark 454.73
Psychic 446.72
Fire 443.98
Fighting 441.55
Rock 438.54
Ice 436.39
Flying 436.11
Fairy 436.07
Ground 434.58
Electric 433.66
Ghost 431.17
Water 417.58
Poison 411.61
Grass 407.94
Normal 399.41
Bug 374.64
dataset3 %>% 
  ggplot(aes(y=`Pokemon Primary Type`,x=`Average of Total Stat`,fill=`Average of Total Stat`))+
  geom_col()+
  labs(x='Total Stat Average',y='Primary Type',
       title='A Bar Chart Brawl of Pokemon Power',
       subtitle='Examining Primary Types and their Average Total Stats',
       fill=' ')+
  theme_bw()+
  theme(plot.title = element_text(hjust=.5),
        plot.subtitle = element_text(hjust = .5))

LS0tDQp0aXRsZTogIlBzaSBDaGkgUiAtIEp1bmUgMjAyNCINCmF1dGhvcjogImJ5IEFsYW4gTGFtIg0KI2RhdGU6ICJgciBTeXMuRGF0ZSgpYCINCmRhdGU6ICJEYXRlOiAyNyBKdW5lIDIwMjQ7IFVwZGF0ZWQ6IGByIGZvcm1hdChTeXMuRGF0ZSgpLCAnJWQgJUIgJVknKWAiIA0Kb3V0cHV0Og0KICBodG1sX2RvY3VtZW50Og0KICAgIHRoZW1lOiByZWFkYWJsZQ0KICAgIGFsd2F5c19hbGxvd19odG1sOiB5ZXMNCiAgICBkZl9wcmludDoga2FibGUNCiAgICB0b2M6IHllcw0KICAgIHRvY19mbG9hdDogeWVzDQogICAgbnVtYmVyX3NlY3Rpb25zOiBubw0KICAgIGFuY2hvcl9zZWN0aW9uczogVFJVRQ0KICAgIGNvZGVfZm9sZGluZzogaGlkZQ0KICAgIGNvZGVfZG93bmxvYWQ6IHRydWUNCi0tLQ0KDQpbKipTb3VyY2UqKjogUHNpIENoaSBSXShodHRwczovL29zZi5pby8zaDRnZS8pDQoNCiMjIExvYWQgcGFja2FnZXMgYW5kIGltcG9ydCBkYXRhDQoNCmBgYHtyIHNldHVwLCB3YXJuaW5nPUYsbWVzc2FnZT1GfQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFLHdhcm5pbmc9RikNCiNsaWJyYXJ5KHRpZHl2ZXJzZSkNCg0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkocHVycnIpDQpsaWJyYXJ5KGdncGxvdDIpDQoNCmRhdGFzZXQ9cmVhZC5jc3YoJ2h0dHBzOi8vb3NmLmlvL2Rvd25sb2FkL2MzYmZzLycpDQpgYGANCg0KIyMgQ2hlY2sgdGhlIGRhdGENCg0KYGBge3J9DQpoZWFkKGRhdGFzZXQpDQoNCnNraW1yOjpza2ltKGRhdGFzZXQpDQpgYGANCg0KYGBge3J9DQojIHN1bShpcy5uYShkYXRhc2V0KSkNCiMgDQojIHN1bShpcy5udWxsKGRhdGFzZXQpKQ0KIyANCiMgc3VtKCFjb21wbGV0ZS5jYXNlcyhkYXRhc2V0KSkNCmBgYA0KDQojIyBMZXZlbCAxLQ0KDQpMZXTigJlzIGdldCBmYW1pbGlhciB3aXRoIG91ciBkYXRhLiBDcmVhdGUgYSB0YWJsZSB0aGF0IHNob3dzIHRoZSBmcmVxdWVuY3kgb2YgZWFjaCB0eXBlIG9mIFBva8OpbW9uIGluIHRoZSBkYXRhc2V0ICh2YXJpYWJsZSBuYW1lOiB0eXBlMSkuIA0KDQpgYGB7cn0NCnRhYjEgPSBkYXRhc2V0ICU+JSANCiAgZmlsdGVyKGNvbXBsZXRlLmNhc2VzKHR5cGUxKSkgJT4lIA0KICBncm91cF9ieSh0eXBlMSkgJT4lIA0KICBjb3VudCgpICU+JSANCiAgYXJyYW5nZShkZXNjKG4pKQ0KDQp0YWIxDQpgYGANCg0KTmV4dCwgbGV04oCZcyBsb29rIGF0IHRoZSByZXByZXNlbnRhdGlvbiBvZiBlYWNoIGdlbmVyYXRpb24gb2YgUG9rw6ltb24gaW4gdGhlIGRhdGFzZXQuIENyZWF0ZSBhIHRhYmxlIHRoYXQgc2hvd3MgdGhlIG51bWJlciBvZiBQb2vDqW1vbiBpbiB0aGUgZGF0YXNldCBhc3NvY2lhdGVkIHdpdGggZWFjaCBnZW5lcmF0aW9uICh2YXJpYWJsZSBuYW1lOiBnZW5lcmF0aW9uKQ0KYGBge3J9DQp0YWIyID0gZGF0YXNldCAlPiUgDQogIGZpbHRlcihjb21wbGV0ZS5jYXNlcyhnZW5lcmF0aW9uKSkgJT4lIA0KICBncm91cF9ieShnZW5lcmF0aW9uKSAlPiUgDQogIGNvdW50KCkgJT4lIA0KICBhcnJhbmdlKGRlc2MobikpDQoNCnRhYjINCmBgYA0KDQoNCiMjIExldmVsIDItDQoNClByb3ZpZGUgdGhlIG1lYW4sIHN0YW5kYXJkIGRldmlhdGlvbiwgbWVkaWFuLCBhbmQgcmFuZ2Ugb2YgdmFsdWVzIG9mIGF0dGFjayBzdGF0cyAodmFyaWFibGUgbmFtZTogYXR0YWNrKSBmb3Ig4oCYV2F0ZXLigJkgdHlwZSBQb2vDqW1vbiAodmFyaWFibGUgbmFtZTogdHlwZTEpLiANCg0KYGBge3J9DQp3YXRlcl9hdHRhY2sgPSBkYXRhc2V0ICU+JSANCiAgZmlsdGVyKHR5cGUxID09ICdXYXRlcicsDQogICAgICAgICBjb21wbGV0ZS5jYXNlcyh0eXBlMSkpICU+JSANCiAgc2VsZWN0KGF0dGFjaykNCg0KI2NyZWF0ZSBmdW5jdGlvbiBmb3IgZGVzYyBzdGF0cw0KZGVzY19zdGF0ID0gZnVuY3Rpb24oeCl7DQogIGMobWVhbiA9IG1lYW4oeCxuYS5ybSA9IFQpLA0KICBzdGFuZGFyZF9kZXYgPSBzZCh4LG5hLnJtID0gVCksDQogIG1lZGlhbiA9IG1lZGlhbih4LG5hLnJtID0gVCksDQogIHJhbmdlID0gcmFuZ2UoeCxuYS5ybSA9IFQpKQ0KfQ0KDQojbWVhbjogNzIuMzQNCiNzdGQ6IDI1LjI2DQojbWVkaWFuOiA3MA0KI3JhbmdlOiAxMCB0byAxMzANCm1hcCh3YXRlcl9hdHRhY2ssZGVzY19zdGF0KQ0KDQogIA0KYGBgDQoNCi1Qcm92aWRlIHRoZSBtZWFuLCBzdGFuZGFyZCBkZXZpYXRpb24sIG1lZGlhbiwgYW5kIHJhbmdlIG9mIHZhbHVlcyBvZiBzcGVlZCBzdGF0cyAodmFyaWFibGUgbmFtZTogc3BlZWQpIGZvciDigJhEcmFnb27igJkgdHlwZSBQb2vDqW1vbiAodmFyaWFibGUgbmFtZTogdHlwZTEpLiANCg0KYGBge3J9DQpkcmFnb25fc3BlZWQgPSBkYXRhc2V0ICU+JSANCiAgZmlsdGVyKHR5cGUxID09ICdEcmFnb24nLA0KICAgICAgICAgY29tcGxldGUuY2FzZXModHlwZTEpKSAlPiUgDQogIHNlbGVjdChzcGVlZCkNCg0KDQojbWVhbjogODAuMTkNCiNzdGQ6IDI0LjYyDQojbWVkaWFuOiA4Mg0KI3JhbmdlOiA0MCB0byAxNDINCm1hcChkcmFnb25fc3BlZWQsZGVzY19zdGF0KQ0KDQpgYGANCg0KIyMgTGV2ZWwgMy0NCg0KQXJlIHRoZXJlIGFueSBvdXRsaWVycyBmb3IgdGhlIOKAmHRvdGFs4oCZIHN0YXQ/IFdoYXQgaXMgdGhlIHZhbHVlKHMpLCBhbmQgd2hpY2ggUG9rw6ltb24ocykgaXMvYXJlIGFzc29jaWF0ZWQgd2l0aCBpdD8NCmBgYHtyfQ0KbGlicmFyeShnZ3Bsb3QyKQ0KDQpib3hwbG90KGRhdGFzZXQkdG90YWwpDQoNCm91dGxpZXJfbG93ZXIgPSBkYXRhc2V0ICU+JSANCiAgc2VsZWN0KG5hbWUsdG90YWwpICU+JSANCiAgYXJyYW5nZSh0b3RhbCkNCg0Kb3V0bGllcl91cHBlciA9IGRhdGFzZXQgJT4lIA0KICBzZWxlY3QobmFtZSx0b3RhbCkgJT4lIA0KICBhcnJhbmdlKGRlc2ModG90YWwpKQ0KDQpoZWFkKG91dGxpZXJfbG93ZXIpDQoNCmhlYWQob3V0bGllcl91cHBlcikNCg0KI1dpc2h3YXNoaSB3aXRoIGEgdG90YWwgb2YgMTc1IGlzIHRoZSBsb3dlciBvdXRsaWVyIGFuZCBBcmNldXMgd2l0aCBhIHRvdGFsIG9mIDcyMCBpcyB0aGUgdXBwZXIgb3V0bGllci4NCmBgYA0KDQojI0xldmVsIDQtDQoNCklzIHRoZXJlIGEgY29ycmVsYXRpb24gYmV0d2VlbiBhIFBva8OpbW9u4oCZcyDigJh0b3RhbOKAmSBzdGF0IGFuZCB0aGVpciBiYXNlIGxldmVsIG9mIGV4cGVyaWVuY2U/ICh2YXJpYWJsZTogYmFzZV9leHApDQoNCg0KYGBge3J9DQpkYXRhc2V0Mj1kYXRhc2V0ICU+JSANCiAgZmlsdGVyKGNvbXBsZXRlLmNhc2VzKGJhc2VfZXhwKSwNCiAgICAgICAgIGNvbXBsZXRlLmNhc2VzKHRvdGFsKSkgJT4lIA0KICBtdXRhdGUoYmFzZV9leHAyPWFzLm51bWVyaWMoYmFzZV9leHApKSAlPiUgDQogIGZpbHRlcighaXMubmEoYmFzZV9leHAyKSwNCiAgICAgICAgICFpcy5uYSh0b3RhbCkpICU+JSANCiAgc2VsZWN0KGJhc2VfZXhwLGJhc2VfZXhwMix0b3RhbCxldmVyeXRoaW5nKCkpDQoNCiN2aW9sYXRlcyBub3JtYWxpdHkNCnNoYXBpcm8udGVzdChkYXRhc2V0MiRiYXNlX2V4cDIpDQpoaXN0KGRhdGFzZXQyJGJhc2VfZXhwMikNCg0KIyBwLXZhbHVlIDwgMi4yZS0xNiwgc2FtcGxlIGVzdGltYXRlczogdGF1IDAuOTEzMjcwNyANCmNvci50ZXN0KGRhdGFzZXQyJHRvdGFsLGRhdGFzZXQyJGJhc2VfZXhwMixtZXRob2QgPSAna2VuZGFsbCcpDQoNCmBgYA0KDQoNCi1DcmVhdGUgYSB2aXN1YWxpemF0aW9uIHRoYXQgc2hvd3MgdGhlIGF2ZXJhZ2Ug4oCYdG90YWzigJkgc3RhdCBiYXNlZCBvbiB0aGUgcHJpbWFyeSB0eXBlIG9mIHRoZSBQb2vDqW1vbiAodHlwZTEpDQpgYGB7cn0NCmRhdGFzZXQzID0gZGF0YXNldDIgJT4lIA0KICBncm91cF9ieSgnUG9rZW1vbiBQcmltYXJ5IFR5cGUnID10eXBlMSkgJT4lIA0KICBzdW1tYXJpc2UoJ0F2ZXJhZ2Ugb2YgVG90YWwgU3RhdCc9cm91bmQobWVhbih0b3RhbCksMikpICU+JSANCiAgYXJyYW5nZShkZXNjKGBBdmVyYWdlIG9mIFRvdGFsIFN0YXRgKSkNCg0KZGF0YXNldDMNCmBgYA0KDQpgYGB7cn0NCmRhdGFzZXQzICU+JSANCiAgZ2dwbG90KGFlcyh5PWBQb2tlbW9uIFByaW1hcnkgVHlwZWAseD1gQXZlcmFnZSBvZiBUb3RhbCBTdGF0YCxmaWxsPWBBdmVyYWdlIG9mIFRvdGFsIFN0YXRgKSkrDQogIGdlb21fY29sKCkrDQogIGxhYnMoeD0nVG90YWwgU3RhdCBBdmVyYWdlJyx5PSdQcmltYXJ5IFR5cGUnLA0KICAgICAgIHRpdGxlPSdBIEJhciBDaGFydCBCcmF3bCBvZiBQb2tlbW9uIFBvd2VyJywNCiAgICAgICBzdWJ0aXRsZT0nRXhhbWluaW5nIFByaW1hcnkgVHlwZXMgYW5kIHRoZWlyIEF2ZXJhZ2UgVG90YWwgU3RhdHMnLA0KICAgICAgIGZpbGw9JyAnKSsNCiAgdGhlbWVfYncoKSsNCiAgdGhlbWUocGxvdC50aXRsZSA9IGVsZW1lbnRfdGV4dChoanVzdD0uNSksDQogICAgICAgIHBsb3Quc3VidGl0bGUgPSBlbGVtZW50X3RleHQoaGp1c3QgPSAuNSkpDQpgYGANCg0K