Github link contained a downloadable csv which was loaded into a raw dataframe. Three seperate clients have given feedback into the project. Each ask will be broken into seperate sections where possible.
# manipulate the data into a data frame
data_raw <- data.frame(read.csv(file = "Pokemon.csv", header = TRUE, sep = ","))
# As per client request(Don Padmaperuma), the type columns were adjusted.
colnames(data_raw)[3:4] <- c("Primary", "Secondary")
head(data_raw, 10)## Number Name Primary Secondary Total HP Attack
## 1 1 Bulbasaur Grass Poison 318 45 49
## 2 2 Ivysaur Grass Poison 405 60 62
## 3 3 Venusaur Grass Poison 525 80 82
## 4 3 VenusaurMega Venusaur Grass Poison 625 80 100
## 5 4 Charmander Fire <NA> 309 39 52
## 6 5 Charmeleon Fire <NA> 405 58 64
## 7 6 Charizard Fire Flying 534 78 84
## 8 6 CharizardMega Charizard X Fire Dragon 634 78 130
## 9 6 CharizardMega Charizard Y Fire Flying 634 78 104
## 10 7 Squirtle Water <NA> 314 44 48
## Defense SpecialAtk SpecialDef Speed Generation Legendary
## 1 49 65 65 45 1 FALSE
## 2 63 80 80 60 1 FALSE
## 3 83 100 100 80 1 FALSE
## 4 123 122 120 80 1 FALSE
## 5 43 60 50 65 1 FALSE
## 6 58 80 65 80 1 FALSE
## 7 78 109 85 100 1 FALSE
## 8 111 130 85 100 1 FALSE
## 9 78 159 115 100 1 FALSE
## 10 65 50 64 43 1 FALSE
With the data loaded, we can now start the process to clean the data. The first chunk will adjust a strange character naming system.
# QA Step, Find all instances of Mega in the Name column. Do all Names follow the same pattern (NameMega Name)?
# Numbers 154 Meganium and 469 Yanmega do not follow pattern and will need to be fixed after this step.
data_raw%>% filter(str_detect(Name, "Mega"))## Number Name Primary Secondary Total HP Attack
## 1 3 VenusaurMega Venusaur Grass Poison 625 80 100
## 2 6 CharizardMega Charizard X Fire Dragon 634 78 130
## 3 6 CharizardMega Charizard Y Fire Flying 634 78 104
## 4 9 BlastoiseMega Blastoise Water <NA> 630 79 103
## 5 15 BeedrillMega Beedrill Bug Poison 495 65 150
## 6 18 PidgeotMega Pidgeot Normal Flying 579 83 80
## 7 65 AlakazamMega Alakazam Psychic <NA> 590 55 50
## 8 80 SlowbroMega Slowbro Water Psychic 590 95 75
## 9 94 GengarMega Gengar Ghost Poison 600 60 65
## 10 115 KangaskhanMega Kangaskhan Normal <NA> 590 105 125
## 11 127 PinsirMega Pinsir Bug Flying 600 65 155
## 12 130 GyaradosMega Gyarados Water Dark 640 95 155
## 13 142 AerodactylMega Aerodactyl Rock Flying 615 80 135
## 14 150 MewtwoMega Mewtwo X Psychic Fighting 780 106 190
## 15 150 MewtwoMega Mewtwo Y Psychic <NA> 780 106 150
## 16 154 Meganium Grass <NA> 525 80 82
## 17 181 AmpharosMega Ampharos Electric Dragon 610 90 95
## 18 208 SteelixMega Steelix Steel Ground 610 75 125
## 19 212 ScizorMega Scizor Bug Steel 600 70 150
## 20 214 HeracrossMega Heracross Bug Fighting 600 80 185
## 21 229 HoundoomMega Houndoom Dark Fire 600 75 90
## 22 248 TyranitarMega Tyranitar Rock Dark 700 100 164
## 23 254 SceptileMega Sceptile Grass Dragon 630 70 110
## 24 257 BlazikenMega Blaziken Fire Fighting 630 80 160
## 25 260 SwampertMega Swampert Water Ground 635 100 150
## 26 282 GardevoirMega Gardevoir Psychic Fairy 618 68 85
## 27 302 SableyeMega Sableye Dark Ghost 480 50 85
## 28 303 MawileMega Mawile Steel Fairy 480 50 105
## 29 306 AggronMega Aggron Steel <NA> 630 70 140
## 30 308 MedichamMega Medicham Fighting Psychic 510 60 100
## 31 310 ManectricMega Manectric Electric <NA> 575 70 75
## 32 319 SharpedoMega Sharpedo Water Dark 560 70 140
## 33 323 CameruptMega Camerupt Fire Ground 560 70 120
## 34 334 AltariaMega Altaria Dragon Fairy 590 75 110
## 35 354 BanetteMega Banette Ghost <NA> 555 64 165
## 36 359 AbsolMega Absol Dark <NA> 565 65 150
## 37 362 GlalieMega Glalie Ice <NA> 580 80 120
## 38 373 SalamenceMega Salamence Dragon Flying 700 95 145
## 39 376 MetagrossMega Metagross Steel Psychic 700 80 145
## 40 380 LatiasMega Latias Dragon Psychic 700 80 100
## 41 381 LatiosMega Latios Dragon Psychic 700 80 130
## 42 384 RayquazaMega Rayquaza Dragon Flying 780 105 180
## 43 428 LopunnyMega Lopunny Normal Fighting 580 65 136
## 44 445 GarchompMega Garchomp Dragon Ground 700 108 170
## 45 448 LucarioMega Lucario Fighting Steel 625 70 145
## 46 460 AbomasnowMega Abomasnow Grass Ice 594 90 132
## 47 475 GalladeMega Gallade Psychic Fighting 618 68 165
## 48 531 AudinoMega Audino Normal Fairy 545 103 60
## 49 719 DiancieMega Diancie Rock Fairy 700 50 160
## Defense SpecialAtk SpecialDef Speed Generation Legendary
## 1 123 122 120 80 1 FALSE
## 2 111 130 85 100 1 FALSE
## 3 78 159 115 100 1 FALSE
## 4 120 135 115 78 1 FALSE
## 5 40 15 80 145 1 FALSE
## 6 80 135 80 121 1 FALSE
## 7 65 175 95 150 1 FALSE
## 8 180 130 80 30 1 FALSE
## 9 80 170 95 130 1 FALSE
## 10 100 60 100 100 1 FALSE
## 11 120 65 90 105 1 FALSE
## 12 109 70 130 81 1 FALSE
## 13 85 70 95 150 1 FALSE
## 14 100 154 100 130 1 TRUE
## 15 70 194 120 140 1 TRUE
## 16 100 83 100 80 2 FALSE
## 17 105 165 110 45 2 FALSE
## 18 230 55 95 30 2 FALSE
## 19 140 65 100 75 2 FALSE
## 20 115 40 105 75 2 FALSE
## 21 90 140 90 115 2 FALSE
## 22 150 95 120 71 2 FALSE
## 23 75 145 85 145 3 FALSE
## 24 80 130 80 100 3 FALSE
## 25 110 95 110 70 3 FALSE
## 26 65 165 135 100 3 FALSE
## 27 125 85 115 20 3 FALSE
## 28 125 55 95 50 3 FALSE
## 29 230 60 80 50 3 FALSE
## 30 85 80 85 100 3 FALSE
## 31 80 135 80 135 3 FALSE
## 32 70 110 65 105 3 FALSE
## 33 100 145 105 20 3 FALSE
## 34 110 110 105 80 3 FALSE
## 35 75 93 83 75 3 FALSE
## 36 60 115 60 115 3 FALSE
## 37 80 120 80 100 3 FALSE
## 38 130 120 90 120 3 FALSE
## 39 150 105 110 110 3 FALSE
## 40 120 140 150 110 3 TRUE
## 41 100 160 120 110 3 TRUE
## 42 100 180 100 115 3 TRUE
## 43 94 54 96 135 4 FALSE
## 44 115 120 95 92 4 FALSE
## 45 88 140 70 112 4 FALSE
## 46 105 132 105 30 4 FALSE
## 47 95 65 115 110 4 FALSE
## 48 126 80 126 50 5 FALSE
## 49 110 160 110 110 6 TRUE
## Number Name Primary Secondary Total HP Attack Defense SpecialAtk
## 1 469 Yanmega Bug Flying 515 86 76 86 116
## SpecialDef Speed Generation Legendary
## 1 56 95 4 FALSE
# Run Filter command to subset Mega names to new Data frame
# Run Mutate command to seperate the column on the word Mega
# Repair the one name found in the QC Step
# Create new column with Mega designator
name_raw<-
data_raw %>%
as_tibble()%>%
filter(grepl('Mega', Name))%>%
mutate(Name = sub('.*Mega', '', Name))%>%
mutate(Name = replace(Name, Name == "nium", "Meganium"))%>%
mutate(Name = replace(Name, Name == "Yan", "Yanmega"))%>%
mutate(Mega = "Mega")We join the new table with the old and clean up unwanted lines.
# Full Join the adjusted table from the chunk above
# Filter out the extra lines with old format (NameMega Name)
# Arrange by Number
# Trim to remove unneeded White Spaces
Pokemon<-
name_raw%>%
full_join(data_raw, copy = FALSE, keep = FALSE)%>%
filter(!grepl('Mega', Name))%>%
arrange(Number)## Joining, by = c("Number", "Name", "Primary", "Secondary", "Total", "HP", "Attack", "Defense", "SpecialAtk", "SpecialDef", "Speed", "Generation", "Legendary")
## Warning: Column `Name` joining character vector and factor, coercing into
## character vector
…An analysis that might be performed on the data is comparing the strongest and weakest Pokemon between two different types.
For the purpose of analysis we will define “Weak” and “Strong” as high or low Total points. The data can then be adjusted and tidy’ed with a subset of values.
weak <-
Pokemon %>%
select(Primary,Name,Total, Mega)%>%
group_by(Primary)%>%
summarise(weak = min(Total))%>%
arrange(desc(weak))
strong<-
Pokemon %>%
select(Primary,Name,Total, Mega)%>%
group_by(Primary)%>%
summarise(strong = max(Total))%>%
arrange(desc(strong))
weak## # A tibble: 18 x 2
## Primary weak
## <fct> <int>
## 1 Dragon 300
## 2 Steel 300
## 3 Rock 280
## 4 Ghost 275
## 5 Ground 265
## 6 Fire 250
## 7 Ice 250
## 8 Flying 245
## 9 Poison 245
## 10 Dark 220
## 11 Fairy 218
## 12 Fighting 210
## 13 Electric 205
## 14 Psychic 198
## 15 Bug 194
## 16 Normal 190
## 17 Grass 180
## 18 Water 175
## # A tibble: 18 x 2
## Primary strong
## <fct> <int>
## 1 Dragon 780
## 2 Psychic 780
## 3 Ground 770
## 4 Water 770
## 5 Normal 720
## 6 Rock 700
## 7 Steel 700
## 8 Dark 680
## 9 Fairy 680
## 10 Fire 680
## 11 Ghost 680
## 12 Grass 630
## 13 Fighting 625
## 14 Electric 610
## 15 Bug 600
## 16 Flying 580
## 17 Ice 580
## 18 Poison 540
Looking at the values returned:
* From the Strongest Category
+ Dragon (780,300) strongest of Strong
+ Psychic (780, 198) Strongest of Strong
+ Poison (540,245) *Weakest of Strong
Pokemon %>%
filter(Primary == "Dragon" & Total %in% c(780,300) | Primary == "Psychic" & Total %in% c(780, 198) | Primary == "Poison" & Total %in% c(540,245) | Primary == "Water" & Total %in% c(770, 175))%>%
arrange(Primary, desc(Total))## # A tibble: 13 x 14
## Number Name Primary Secondary Total HP Attack Defense SpecialAtk
## <int> <chr> <fct> <fct> <int> <int> <int> <int> <int>
## 1 384 Rayq~ Dragon Flying 780 105 180 100 180
## 2 147 Drat~ Dragon <NA> 300 41 64 45 50
## 3 371 Bagon Dragon <NA> 300 45 75 60 40
## 4 443 Gible Dragon Ground 300 58 70 45 40
## 5 704 Goomy Dragon <NA> 300 45 50 35 55
## 6 782 Jang~ Dragon <NA> 300 45 55 65 45
## 7 804 Naga~ Poison Dragon 540 73 73 73 127
## 8 41 Zubat Poison Flying 245 40 45 35 30
## 9 150 Mewt~ Psychic Fighting 780 106 190 100 154
## 10 150 Mewt~ Psychic <NA> 780 106 150 70 194
## 11 280 Ralts Psychic Fairy 198 28 25 25 45
## 12 382 Kyog~ Water <NA> 770 100 150 90 180
## 13 746 Wish~ Water <NA> 175 45 20 20 25
## # ... with 5 more variables: SpecialDef <int>, Speed <int>,
## # Generation <int>, Legendary <lgl>, Mega <chr>
x <- c("Total_s","HP_s","Attack_s","Defense_s","SpecialAtk_s","SpecialDef_s","Speed_s")
Pokemon_pool <- data.frame(matrix(ncol = 7, nrow = 0))
colnames(Pokemon_pool)<- x
Pokemon_pool<- Pokemon_pool[2,][NA,]
sPokemon <- data.frame(matrix(ncol = 7, nrow = 0))
colnames(sPokemon)<- x
mega_random<- data.frame(matrix(ncol = 7, nrow = 0))
colnames(mega_random)<- x
Normal_random<- data.frame(matrix(ncol = 7, nrow = 0))
colnames(Normal_random)<- x
sample_name <- data.frame(matrix(ncol = 1, nrow = 0))
colnames(sample_name)<- "sample"It seems like there would be a lot of potential to create some creative visualizations with this data set. Maybe some other useful stats can be created by mutating data as well. (William Outcault)
Define the difference between character stats from Normal to Mega to answer the following question:
If I were to select a Pokemon which has a mega upgrade, what could I expect the median stats to be for the normal and mega versions?
mega_pool<- Pokemon %>%
filter(Mega == "Mega")
mega_sample<- sample_n(mega_pool, size = 100, replace = TRUE)
Normal_filter<- Pokemon %>%
filter(Number %in% mega_sample$Number) %>%
filter(is.na(Mega))
Normal_pool<- sample_n(Normal_filter, size = 100, replace = TRUE)
mega_pool_s <- mega_pool%>%
mutate(Total_s = median(Total))%>%
mutate(HP_s = median(HP))%>%
mutate(Attack_s = median(Attack))%>%
mutate(Defense_s = median(Defense))%>%
mutate(SpecialAtk_s = median(SpecialAtk))%>%
mutate(SpecialDef_s = median(SpecialDef))%>%
mutate(Speed_s = median(Speed))%>%
distinct(Total_s,HP_s, Attack_s,Defense_s,SpecialAtk_s,SpecialDef_s,Speed_s)
normal_pool_s <- Normal_pool%>%
mutate(Total_s = median(Total))%>%
mutate(HP_s = median(HP))%>%
mutate(Attack_s = median(Attack))%>%
mutate(Defense_s = median(Defense))%>%
mutate(SpecialAtk_s = median(SpecialAtk))%>%
mutate(SpecialDef_s = median(SpecialDef))%>%
mutate(Speed_s = median(Speed))%>%
distinct(Total_s,HP_s, Attack_s,Defense_s,SpecialAtk_s,SpecialDef_s,Speed_s)
compare_pool <- full_join(mega_pool_s, normal_pool_s)## Joining, by = c("Total_s", "HP_s", "Attack_s", "Defense_s", "SpecialAtk_s", "SpecialDef_s", "Speed_s")
## Type Total_s HP_s Attack_s Defense_s SpecialAtk_s SpecialDef_s Speed_s
## 1 Mega 610 78.0 131 100 120 98 100
## 2 Normal 500 79.5 90 80 85 80 80