knitr::opts_chunk$set(echo = T,
                      warning = F,
                      message = F,
                      fig.align = "center")

## Load the required package: tidyverse
library(tidyverse)

## Reading in the pokedex for almost 700 pokemon from github
pokedex <- 
  # Data set
  read.csv('https://raw.githubusercontent.com/Shammalamala/DS-2870-Data-Sets/refs/heads/main/pokedex.csv') |> 
  # Adding a coupe of columns for later questions
  mutate(
    # Matching the gen number to the two main games in the franchise
    debut_game = case_when(
      generation == 1 ~ 'Red/Blue',
      generation == 2 ~ 'Gold/Silver',
      generation == 3 ~ 'Ruby/Sapphire',
      generation == 4 ~ 'Diamond/Pearl',
      generation == 5 ~ 'Black/White',
      generation == 6 ~ 'X/Y'
    ),
    # Adding a column to indicate which of the 'main types' a pokemon is, if any
    main_type = case_when(
      type1 == 'Grass'  | type2 == 'Grass' ~ 'Grass',
      type1 == 'Water'  | type2 == 'Water' ~ 'Water',
      type1 == 'Fire'   | type2 == 'Fire'  ~ 'Fire',
      type1 == 'Bug'    | type2 == 'Bug'   ~ 'Bug',
      type1 == 'Normal'                    ~ 'Normal',
      .default = 'Other'
    ) |> factor(levels = c('Fire', 'Water', 'Grass', 'Bug', 'Normal', 'Other')),
    
    # Changing legendary to be 'Legendary'/'Not Legendary' instead of T/F
    legendary = if_else(legendary, 'Legendary', 'Not Legendary'),
    # Changing hectograms to ounces and decimeters to inches
    weight = weight * 3.5274,
    height = height * 3.93701
  )

Data Description

The pokedex data set has the information on 697 Pokemon up to generation 6 (X & Y). The relevant columns for this assignment are:

  1. number: the unique identifier for the pokemon in the data set
  2. name: the name of the pokemon
  3. type1 and type2: Which of 17 different types a pokemon can be (many have two types)
  4. assessed_value: The value of the property assessed by the town for tax purposes
  5. hp, attack, defense, sp_atk, sp_def, and speed: the base combat attributes
  6. generation and debut_game: Which game the pokemon debuted in.
  7. height and weight: The size of each pokemon (in inches and ounces, respectively)
  8. sprites: a url to an image of each pokemon in sprite form
  9. main_type: Which of the 5 most common types (Water/Grass/Fire/Bug/Normal/Other) a pokemon is based on type1 or type2

Question 1: Bar chart of Generation

Create a bar chart for each generation 1 - 6. See what it should look like in Brightspace

Use the following vectors in the appropriate function to match the bar colors: fill = c('#ff1111', '#daa520', '#a00000', '#aaaaff', '#444444', '#6376b8') color = c('#1111ff', '#c0c0c0', '#0000a0', '#ffaaaa', '#e1e1e1', '#ed5540')

ggplot(
  data = pokedex,
  mapping = aes(
    x = fct_reorder(debut_game, generation),
    y = after_stat(prop),
    group = 1
  )
) + 
  geom_bar(
    fill =  c('#ff1111', '#daa520', '#a00000', '#aaaaff', '#444444', '#6376b8'),
    color = c('#1111ff', '#c0c0c0', '#0000a0', '#ffaaaa', '#e1e1e1', '#ed5540')
  ) + 
  labs(
    x = 'Debut Generation',
    y = NULL,
    title = 'New Pokemon From Each Generation'
  ) + 
  theme_classic() + 
  theme(plot.title = element_text(hjust = 0.5)) + 
  # Having the bars sit on the x-axis
  scale_y_continuous(expand = c(0, 0, 0.05, 0))

Question 2: Main Type by Generation

Create the graph seen in question 2 in Brightspace. The color codes for each type are:

'Normal' = '#a8a77a',
'Grass'  = '#7ac74c',
'Water'  = '#6390F0',
'Fire'   = '#EE8130',
'Bug'    = '#a6b91a',
'Other'  = '#6f35fc'
ggplot(
  data = pokedex,
  mapping = aes(
    x = fct_reorder(debut_game, generation),
    fill = main_type
  )
) + 
  geom_bar(
    position = 'fill'
  ) + 
  labs(
    x = 'Debut Generation',
    y = NULL,
    title = 'Percentage of Pokemon by Type for Each Generation',
    fill = 'Type'
  ) + 
  theme_test() + 
  theme(plot.title = element_text(hjust = 0.5)) + 
  # Having the bars sit on the x-axis
  scale_y_continuous(
    expand = c(0, 0, 0.05, 0),
    labels = scales::label_percent()
  ) + 
  scale_fill_manual(
    values = c(
      'Normal' = '#a8a77a',
      'Grass'  = '#7ac74c',
      'Water'  = '#6390F0',
      'Fire'   = '#EE8130',
      'Bug'    = '#a6b91a',
      'Other'  = '#6f35fc'
    )
  )

Which games had a fewest Fire types introduced as a percentage of new pokemon?

Question 3: Side-by-side bar chart

Create the graph seen in Brightspace. The colors for main_type are the same as question 2.

pokedex |> 
  # Picking only grass, water, and fire types
  filter(
    main_type %in% c("Fire", "Water", "Grass")
  ) |> 
  # Creating the bar chart
  ggplot(
    mapping = aes(
      x = fct_reorder(debut_game, generation),
      fill = main_type
    )
  ) + 
  # Bar chart with bars side-by-side
  geom_bar(
    position = 'dodge2'
  ) + 
  labs(
    x = 'Debut Generation',
    y = NULL,
    title = 'New Pokemon by  Starter Type for Each Generation',
    fill = 'Type'
  ) + 
  theme_test() + 
  theme(plot.title = element_text(hjust = 0.5)) + 
  
  # Having the bars sit on the x-axis
  scale_y_continuous(expand = c(0, 0, 0.05, 0)) + 
  scale_fill_manual(
    values = c(
      'Normal' = '#a8a77a',
      'Grass'  = '#7ac74c',
      'Water'  = '#6390F0',
      'Fire'   = '#EE8130',
      'Bug'    = '#a6b91a',
      'Other'  = '#6f35fc'
    )
  )

Question 4: Average attack vs average defense for legendary pokemon

Create the graph seen in Brightspace. The values on the x and y-axis are the z-scores for HP and speed calculated using all 697 pokemon.

\[z = (x - \mu_x)/\sigma_x\]

The graph displays only the 38 legendary.

You can add vertical and horizontal lines using geom_vline(xintercept) and geom_hline(yintercept) respectively.

pokedex |> 
  # Calculating the hp and speed z-scores for each pokemon
  mutate(
    z_hp = (hp - mean(hp))/sd(hp),
    z_speed = (speed - mean(speed))/sd(speed)
  ) |>
  
  # Keeping just the legendary pokemon
  filter(legendary == 'Legendary') |> 

  # Creating a scatterplot of just legendaries 
  ggplot(
      mapping = aes(
        x = z_hp,
        y = z_speed
      )
  ) + 
  geom_point(alpha = 0.5) + 
  geom_vline(
    xintercept = 0,
    linetype = 'dashed'
  ) +
  geom_hline(
    yintercept = 0,
    linetype = 'dashed'
  ) +
  labs(
    x = "HP z-score",
    y = "Speed z-score",
    title = "Are Legendaries Better than the Average Pokemon?"
  ) +
  theme_bw() + 
  theme(
    plot.title = element_text(hjust = 0.5)
  )