Loading Libraries
library(ggplot2) #For making plots
library(dplyr) #For data manipulation
library(ggthemes) #For adding themes to plots and graphs
library(GGally)
library(tidyr) #For data manipulation
Loading dataset and exploring data
pokemon <- read.csv("Pokemon.csv")
dim(pokemon)
names(pokemon)
str(pokemon)
pokemon$Name <- as.character(pokemon$Name)
length(unique(pokemon$Name))
Coorelation and Distribution of Variables
ggpairs(pokemon, columns = c('Attack', 'Defense', 'HP', 'Sp..Atk', 'Sp..Def', 'Speed')) +
theme_bw() +
labs(title = 'Correlation Matrix of Pokemon Stats')

Number of Pokemon by Type
pokemon %>%
group_by(Type.1) %>%
summarise(number = n()) %>%
ggplot(aes(x = reorder(Type.1, number), y = number , fill = Type.1)) +
geom_bar(stat = 'identity') +
xlab(label = "Type of Pokemon") +
ylab(label = "Number of Pokemon") +
ggtitle(label = "Number of Pokemon by Type") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position="none") +
coord_flip() +
geom_text(aes(label = number), hjust = -1.0)

Pokemon by Secondry Type
pokemon %>%
filter(Type.2 != '') %>%
group_by(Type.2) %>%
summarise(number = n()) %>%
ggplot(aes(x = reorder(Type.2, number), y = number , fill = Type.2)) +
geom_bar(stat = 'identity') +
xlab(label = "Type of Pokemon") +
ylab(label = "Number of Pokemon") +
ggtitle(label = "Number of Pokemon by Type") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position="none") +
coord_flip() +
geom_text(aes(label = number), hjust = -1.0)

Heatmap Distribution of Pokemon Stats by Type
pokemon %>%
gather(., key, value, HP:Speed) %>%
group_by(., Type.1, key) %>%
summarise(., Stat = as.integer(mean(value))) %>%
ggplot(., aes(y=Type.1, x=key)) +
geom_tile(aes(fill = Stat)) +
theme_bw() +
theme(legend.position = 'bottom') +
geom_text(aes(label = Stat), color = 'white', size = 3) +
labs(x='Stat Category', y='Pokemon Type', title = 'Heatmap Distribution of Pokemon Stats by Type') +
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position="none")

Total Score of Pokemon by generation and Lengendry type
ggplot(data = pokemon, aes(x = Generation, y = Total, fill = as.factor(Generation))) +
geom_boxplot() +
xlab(label = "Pokemon Generation") +
ylab(label = "Total Score of Pokemon") +
ggtitle(label = "Pokemon Score by Generation facet by Lengendry flag") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position="none") +
facet_grid( ~ Legendary)

Total Score of Pokemon by Type
ggplot(data = pokemon, aes(x = Type.1, y = Total, fill = Type.1)) +
geom_boxplot() +
xlab(label = "Pokemon Type") +
ylab(label = "Total Score of Pokemon") +
ggtitle(label = "Pokemon Score by Type") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position="none")

Distributrion of all scores
pokemon %>%
gather(key, value, HP:Speed) %>%
ggplot(aes(x=key, y=value, fill = key)) +
geom_boxplot() +
theme(legend.position = 'none') +
labs(y='Stats', x='Category', title = 'Boxplot Distribution of Overall Pokemon Stats') +
theme(plot.title = element_text(hjust = 0.5))

Score of Pokemon by generation
pokemon %>%
gather(key, value, HP:Speed) %>%
ggplot(aes(x = Generation, y = value, fill = as.factor(Generation))) +
geom_boxplot() +
facet_grid(~key) +
xlab(label = "Generation") +
ylab(label = "Score") +
ggtitle("Various score based on Generation flag") +
theme(plot.title = element_text(hjust = 0.5))

Score of Pokemon by Legendary type
pokemon %>%
gather(key, value, HP:Speed) %>%
ggplot(aes(x = Legendary, y = value, fill = as.factor(Legendary))) +
geom_boxplot() +
facet_grid(~key) +
xlab(label = "Lengendry") +
ylab(label = "Score") +
ggtitle("Various score based on Lengendry flag") +
theme(plot.title = element_text(hjust = 0.5))
