Loading Libraries

library(ggplot2)    #For making plots
library(dplyr)      #For data manipulation
library(ggthemes)   #For adding themes to plots and graphs
library(GGally)
library(tidyr)      #For data manipulation

Loading dataset and exploring data

pokemon <- read.csv("Pokemon.csv")
dim(pokemon)

names(pokemon)
str(pokemon)

pokemon$Name <- as.character(pokemon$Name)
length(unique(pokemon$Name))

Coorelation and Distribution of Variables

ggpairs(pokemon, columns = c('Attack', 'Defense', 'HP', 'Sp..Atk', 'Sp..Def', 'Speed')) +
  theme_bw() +
  labs(title = 'Correlation Matrix of Pokemon Stats')

Number of Pokemon by Type

pokemon %>%
  group_by(Type.1) %>%
  summarise(number = n()) %>%
  ggplot(aes(x = reorder(Type.1, number), y = number , fill = Type.1)) +
  geom_bar(stat = 'identity') +
  xlab(label = "Type of Pokemon") +
  ylab(label = "Number of Pokemon") +
  ggtitle(label = "Number of Pokemon by Type") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(legend.position="none") +
  coord_flip() +
  geom_text(aes(label = number), hjust = -1.0)

Pokemon by Secondry Type

pokemon %>%
  filter(Type.2 != '') %>%
  group_by(Type.2) %>%
  summarise(number = n()) %>%
  ggplot(aes(x = reorder(Type.2, number), y = number , fill = Type.2)) +
  geom_bar(stat = 'identity') +
  xlab(label = "Type of Pokemon") +
  ylab(label = "Number of Pokemon") +
  ggtitle(label = "Number of Pokemon by Type") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(legend.position="none") +
  coord_flip() +
  geom_text(aes(label = number), hjust = -1.0)

Heatmap Distribution of Pokemon Stats by Type

pokemon %>%
  gather(., key, value, HP:Speed) %>%
  group_by(., Type.1, key) %>%
  summarise(., Stat = as.integer(mean(value))) %>%
  ggplot(., aes(y=Type.1, x=key)) + 
  geom_tile(aes(fill = Stat)) +
  theme_bw() + 
  theme(legend.position = 'bottom') +
  geom_text(aes(label = Stat), color = 'white', size = 3) +
  labs(x='Stat Category', y='Pokemon Type', title = 'Heatmap Distribution of Pokemon Stats by Type') +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(legend.position="none")

Total Score of Pokemon by generation and Lengendry type

ggplot(data = pokemon, aes(x = Generation, y = Total, fill = as.factor(Generation))) +
  geom_boxplot() +
  xlab(label = "Pokemon Generation") +
  ylab(label = "Total Score of Pokemon") +
  ggtitle(label = "Pokemon Score by Generation facet by Lengendry flag") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(legend.position="none") +
  facet_grid( ~ Legendary)

Total Score of Pokemon by Type

ggplot(data = pokemon, aes(x = Type.1, y = Total, fill = Type.1)) +
  geom_boxplot() +
  xlab(label = "Pokemon Type") +
  ylab(label = "Total Score of Pokemon") +
  ggtitle(label = "Pokemon Score by Type") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(legend.position="none")

Distributrion of all scores

pokemon %>%
  gather(key, value, HP:Speed) %>%
  ggplot(aes(x=key, y=value, fill = key)) +
  geom_boxplot() +
  theme(legend.position = 'none') +
  labs(y='Stats', x='Category', title = 'Boxplot Distribution of Overall Pokemon Stats') +
  theme(plot.title = element_text(hjust = 0.5))

Score of Pokemon by generation

pokemon %>%
  gather(key, value, HP:Speed) %>%
  ggplot(aes(x = Generation, y = value, fill = as.factor(Generation))) +
  geom_boxplot() +
  facet_grid(~key) +
  xlab(label = "Generation") +
  ylab(label = "Score") +
  ggtitle("Various score based on Generation flag") +
  theme(plot.title = element_text(hjust = 0.5))

Score of Pokemon by Legendary type

pokemon %>%
  gather(key, value, HP:Speed) %>%
  ggplot(aes(x = Legendary, y = value, fill = as.factor(Legendary))) +
  geom_boxplot() +
  facet_grid(~key) +
  xlab(label = "Lengendry") +
  ylab(label = "Score") +
  ggtitle("Various score based on Lengendry flag") +
  theme(plot.title = element_text(hjust = 0.5))