knitr::opts_chunk$set(echo = T,
warning = F,
message = F,
fig.align = "center")
## Load the required package: tidyverse
library(tidyverse)
## Reading in the pokedex for almost 700 pokemon from github
pokedex <-
# Data set
read.csv('https://raw.githubusercontent.com/Shammalamala/DS-2870-Data-Sets/refs/heads/main/pokedex.csv') |>
# Adding a coupe of columns for later questions
mutate(
# Matching the gen number to the two main games in the franchise
debut_game = case_when(
generation == 1 ~ 'Red/Blue',
generation == 2 ~ 'Gold/Silver',
generation == 3 ~ 'Ruby/Sapphire',
generation == 4 ~ 'Diamond/Pearl',
generation == 5 ~ 'Black/White',
generation == 6 ~ 'X/Y'
),
# Adding a column to indicate which of the 'main types' a pokemon is, if any
main_type = case_when(
type1 == 'Grass' | type2 == 'Grass' ~ 'Grass',
type1 == 'Water' | type2 == 'Water' ~ 'Water',
type1 == 'Fire' | type2 == 'Fire' ~ 'Fire',
type1 == 'Bug' | type2 == 'Bug' ~ 'Bug',
type1 == 'Normal' ~ 'Normal',
.default = 'Other'
) |> factor(levels = c('Fire', 'Water', 'Grass', 'Bug', 'Normal', 'Other')),
# Changing legendary to be 'Legendary'/'Not Legendary' instead of T/F
legendary = if_else(legendary, 'Legendary', 'Not Legendary'),
# Changing hectograms to ounces and decimeters to inches
weight = weight * 3.5274,
height = height * 3.93701
)
The pokedex data set has the information on 697 Pokemon up to generation 6 (X & Y). The relevant columns for this assignment are:
number: the unique identifier for the pokemon in the
data setname: the name of the pokemontype1 and type2: Which of 17 different
types a pokemon can be (many have two types)assessed_value: The value of the property assessed by
the town for tax purposeshp, attack, defense,
sp_atk, sp_def, and speed: the
base combat attributesgeneration and debut_game: Which game the
pokemon debuted in.height and weight: The size of each
pokemon (in inches and ounces, respectively)sprites: a url to an image of each pokemon in sprite
formmain_type: Which of the 5 most common types
(Water/Grass/Fire/Bug/Normal/Other) a pokemon is based on
type1 or type2Create a bar chart for each generation 1 - 6. See what it should look like in Brightspace
Use the following vectors in the appropriate function to
match the bar colors:
fill = c('#ff1111', '#daa520', '#a00000', '#aaaaff', '#444444', '#6376b8')
color = c('#1111ff', '#c0c0c0', '#0000a0', '#ffaaaa', '#e1e1e1', '#ed5540')
ggplot(
data = pokedex,
mapping = aes(
x = fct_reorder(debut_game, generation),
y = after_stat(prop),
group = 1
)
) +
geom_bar(
fill = c('#ff1111', '#daa520', '#a00000', '#aaaaff', '#444444', '#6376b8'),
color = c('#1111ff', '#c0c0c0', '#0000a0', '#ffaaaa', '#e1e1e1', '#ed5540')
) +
labs(
x = 'Debut Generation',
y = NULL,
title = 'New Pokemon From Each Generation'
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5)) +
# Having the bars sit on the x-axis
scale_y_continuous(expand = c(0, 0, 0.05, 0))
Create the graph seen in question 2 in Brightspace. The color codes for each type are:
'Normal' = '#a8a77a',
'Grass' = '#7ac74c',
'Water' = '#6390F0',
'Fire' = '#EE8130',
'Bug' = '#a6b91a',
'Other' = '#6f35fc'
ggplot(
data = pokedex,
mapping = aes(
x = fct_reorder(debut_game, generation),
fill = main_type
)
) +
geom_bar(
position = 'fill'
) +
labs(
x = 'Debut Generation',
y = NULL,
title = 'Percentage of Pokemon by Type for Each Generation',
fill = 'Type'
) +
theme_test() +
theme(plot.title = element_text(hjust = 0.5)) +
# Having the bars sit on the x-axis
scale_y_continuous(
expand = c(0, 0, 0.05, 0),
labels = scales::label_percent()
) +
scale_fill_manual(
values = c(
'Normal' = '#a8a77a',
'Grass' = '#7ac74c',
'Water' = '#6390F0',
'Fire' = '#EE8130',
'Bug' = '#a6b91a',
'Other' = '#6f35fc'
)
)
Which games had a fewest Fire types introduced as a percentage of new pokemon?
Create the graph seen in Brightspace. The colors for
main_type are the same as question 2.
pokedex |>
# Picking only grass, water, and fire types
filter(
main_type %in% c("Fire", "Water", "Grass")
) |>
# Creating the bar chart
ggplot(
mapping = aes(
x = fct_reorder(debut_game, generation),
fill = main_type
)
) +
# Bar chart with bars side-by-side
geom_bar(
position = 'dodge2'
) +
labs(
x = 'Debut Generation',
y = NULL,
title = 'New Pokemon by Starter Type for Each Generation',
fill = 'Type'
) +
theme_test() +
theme(plot.title = element_text(hjust = 0.5)) +
# Having the bars sit on the x-axis
scale_y_continuous(expand = c(0, 0, 0.05, 0)) +
scale_fill_manual(
values = c(
'Normal' = '#a8a77a',
'Grass' = '#7ac74c',
'Water' = '#6390F0',
'Fire' = '#EE8130',
'Bug' = '#a6b91a',
'Other' = '#6f35fc'
)
)
Create the graph seen in Brightspace. The values on the x and y-axis are the z-scores for HP and speed calculated using all 697 pokemon.
\[z = (x - \mu_x)/\sigma_x\]
The graph displays only the 38 legendary.
You can add vertical and horizontal lines using
geom_vline(xintercept) and
geom_hline(yintercept) respectively.
pokedex |>
# Calculating the hp and speed z-scores for each pokemon
mutate(
z_hp = (hp - mean(hp))/sd(hp),
z_speed = (speed - mean(speed))/sd(speed)
) |>
# Keeping just the legendary pokemon
filter(legendary == 'Legendary') |>
# Creating a scatterplot of just legendaries
ggplot(
mapping = aes(
x = z_hp,
y = z_speed
)
) +
geom_point(alpha = 0.5) +
geom_vline(
xintercept = 0,
linetype = 'dashed'
) +
geom_hline(
yintercept = 0,
linetype = 'dashed'
) +
labs(
x = "HP z-score",
y = "Speed z-score",
title = "Are Legendaries Better than the Average Pokemon?"
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5)
)