Module 4 Lab

knitr::opts_chunk$set(echo = T,
                      #warning = F,
                      #message = F,
                      fig.align = "center")

# Load the tidyverse, skimr, and gt packages
pacman::p_load(tidyverse, skimr, gt)

Instructions:

Make sure that the methods you use in this assignment use the different methods we’ve learned in the course so far using the tidyverse functions
The code should have basic comments included describing what the code below is achieving.
When creating histograms, density plots, or bar charts, include scale_y_continuous(expand = c(0, 0, 0.05, 0)) to the graph.

Data Description:

The supers2.csv file has 12 variables on 6961 characters with super abilities. The 12 variables are:

Character: The primary name of the character
Creator: If the owner is Marvel or DC
Alignment: If the character is considered a good, neutral, or bad guy
Alter Ego: If the character has an alter ego (IE Spider-Man = Peter Parker)
Eye_color and Hair_color: The character’s eye and hair colors
Species: The species of the character
IQ: The character’s IQ
Intelligence, Strength, Combat, Durability: Attribute scores on a scale from 1 - 100

Read in the “supers2.csv” data set and save it as a global object named supers.

# Read in the csv file
supers <- read.csv("https://raw.githubusercontent.com/Shammalamala/DS-1870-Data/refs/heads/main/supers.csv")

Question 1: Keeping only Marvel and DC characters

Part 1A: Comics data set

Create a data set named comics that has:

Creators as Marvel Comics and DC Comics
Alignment of the characters is not missing (NA).

If done correctly, you should have 1899 rows. Display the resulting data set using the tibble() function.

comics <- 
  supers |> 
  filter(
    Creator %in% c("Marvel Comics", "DC Comics"),
    !is.na(Alignment)
  )


# Displaying the result below:
tibble(comics)

## # A tibble: 1,899 × 12
##    Character     Creator Alignment Alter_Egos Eye_color Hair_color Species    IQ
##    <chr>         <chr>   <chr>     <chr>      <chr>     <chr>      <chr>   <int>
##  1 3-D Man       Marvel… Good      None       <NA>      <NA>       None      110
##  2 A-Bomb        Marvel… Good      None       Yellow (… No Hair    Human     130
##  3 A.I.M. Agent  Marvel… Bad       None       None      None       Human      70
##  4 A.M.A.Z.O     DC Com… Bad       None       Red       Orange     Android   115
##  5 A.M.A.Z.O.    DC Com… Bad       None       Red       Brown      Android   190
##  6 Abby          DC Com… Good      None       Black     White      Animal    106
##  7 Abel Cuvier   DC Com… None      None       None      None       None      160
##  8 Abomination   Marvel… Bad       None       <NA>      <NA>       Human     130
##  9 Above All Ot… Marvel… Neutral   None       None      None       Cosmic…   200
## 10 Abraxas       Marvel… Bad       None       Blue      Black      Cosmic…   200
## # ℹ 1,889 more rows
## # ℹ 4 more variables: Intelligence <int>, Strength <int>, Combat <int>,
## #   Durability <int>

Part 1B) Comics IQ

Using the comics data set created in 1A), create the density plots seen in the pdf in Brightspace. Make sure to have the area under the curves partly see through

ggplot(
  data = comics,
  mapping = aes(
    x = IQ,
    fill = Creator
  )
) + 
  
  # Density plot that is partiall see thru
  geom_density(alpha = 0.5) +
  
  # Changing the colors to match DC and Marvel colors
  scale_fill_manual(
    values = c("DC Comics" = "#0476f2", "Marvel Comics" = "#ed1d24")
  ) + 
  
  # Removing some of the extra space from the left, right, and bottom
  scale_y_continuous(expand = c(0, 0, 0.05, 0)) + 
  
  scale_x_continuous(expand = c(0, 0, 0, 0)) + 
  
  # Making small multiples based on Alignment
  facet_wrap(facets = vars(Alignment)) +
  
  # Changing the themes
  theme_bw() + 
  
  theme(legend.position = "top") + 
  
  # Removing the label for fill
  labs(fill = NULL)

Part 1C) Improved Density Plots

Using one of the dplyr verbs, create a new data set named comics2, with:

Alignment having 3 groups:
1. Good = Good
2. Neutral = Neutral or None
3. Bad = Bad
- Hint: Look at the slides for mutate for the appropriate dplyr verb for this question!
Alignment groups should be in order of Good, Neutral, Bad
Physical = (Strength + Combat + Durability)/3
Remove the word ” Comics” from the Creator columns using the str_remove() function

The resulting data set should still have 1899 characters. After saving comics2, display that the results worked using skim() on only the three columns mentioned above!

# Create comics2 below
comics2 <- 
  comics |> 
  mutate(
    # Changing None to Neutral using if_else()
    Alignment = if_else(Alignment == "None",
                        true = "Neutral",
                        false = Alignment),
    # Reordering the groups of Alignment
    Alignment = factor(Alignment,
                       levels = c("Good", "Neutral", "Bad")),
    # Creating the Physical Column
    Physical = (Strength + Combat + Durability)/3,
    
    # Removing the word comics from the Creator column
    Creator = str_remove(Creator, " Comics")
  ) 

comics2 |> 
  dplyr::select(Creator, Alignment, Physical) |> 
  skimr::skim()

Data summary
Name	dplyr::select(comics2, Cr…
Number of rows	1899
Number of columns	3
_______________________
Column type frequency:
character	1
factor	1
numeric	1
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
Creator	0	1	2	6	0	2	0

Variable type: factor

skim_variable	n_missing	complete_rate	ordered	n_unique	top_counts
Alignment	0	1	FALSE	3	Goo: 727, Bad: 593, Neu: 579

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
Physical	0	1	61.14	30.03	1	34.33	60	91.67	100	▂▆▃▂▇

Part 1D) IQ Density Plot

Recreate the density plot in 1B using comics2 for characters with an IQ at or above 50. Don’t save a new data set for the at least 50 IQ characters!

comics2 |> 
  filter(IQ >= 50) |> 
  ggplot(
    mapping = aes(
      x = IQ,
      fill = Creator
    )
  ) + 
  
  # Density plot that is partiall see thru
  geom_density(alpha = 0.5) +
  
  # Changing the colors to match DC and Marvel colors
  scale_fill_manual(
    values = c("DC" = "#0476f2", "Marvel" = "#ed1d24")
  ) + 
  
  # Removing some of the extra space from the left, right, and bottom
  scale_y_continuous(expand = c(0, 0, 0.05, 0)) + 
  
  scale_x_continuous(expand = c(0, 0, 0, 0)) + 
  
  # Making small multiples based on Alignment
  facet_wrap(facets = vars(Alignment)) +
  
  # Changing the themes
  theme_bw() + 
  
  theme(legend.position = "top") + 
  
  # Removing the label for fill
  labs(fill = NULL)

Question 2) Average Physical by Creator and Alignment

Calculate the sample size, mean, median, and standard deviation for Physical rating for each Creator and Alignment combination. Round each calculated summary to the nearest whole number, then sort the data set from largest to smallest average. Save it as comic_phys.

Display the resulting data set in the knitted document using the gt() function

# Create comic_phys below:
comic_phys <- 
  comics2 |> 
  
  # Calculating the summary stats for Physical 
  summarize(
    .by = c(Creator, Alignment),
    characters = n(),
    phys_avg = mean(Physical) |> round(),
    phys_med = median(Physical) |> round(),
    phys_sd  = sd(Physical) |> round()
  ) |> 
  # Ordering from strongest to weakest
  arrange(-phys_avg)
  



# Displaying the results
gt(comic_phys)

Creator	Alignment	characters	phys_avg	phys_med	phys_sd
Marvel	Neutral	304	70	79	28
DC	Bad	238	65	73	31
Marvel	Bad	355	60	55	29
Marvel	Good	470	58	50	30
DC	Good	257	58	53	29
DC	Neutral	275	58	55	33

Question 3) Average Character Scores Across Alignment and Creator

Part 3A) Creating the summarized data set

Using comics2 created in 1C and the different dplyr verbs seen so far, create a data set with 4 columns:

Creator: Either DC Comics or Marvel Comics
Alignment: Good/Neutral/Bad (in that order)
Attribute: Intelligence, Strength, Combat, Durability (in that order) (No Physical)

Hint: Use as_factor() as a quick way to change the order of the groups!

score_avg: Average score for the attribute in column 3 for each creator and alignment combination rounded to the nearest whole number

Save the results as supers_attr. Display the first 10 rows using tibble(). See the pdf in Brightspace for what the final data frame should look like

supers_attr <- 
  comics2 |>
  
  # Creating a long data set with Intelligence - Durability scores in 1 column
  pivot_longer(
    cols = Intelligence:Durability, 
    names_to = "Attribute",
    values_to = "score"
  ) |> 
  
  # Changing alignment to have 3 levels in the correct order and
  # Attribute to be in the correct order
  mutate(
    Attribute = as_factor(Attribute)
  ) |> 
  
  # Calculating the average score and rounding to the nearest whole number
  summarize(
    .by = c(Creator, Alignment, Attribute),
    score_avg = mean(score) |> round()
  )

# Displaying the results
tibble(supers_attr)

## # A tibble: 24 × 4
##    Creator Alignment Attribute    score_avg
##    <chr>   <fct>     <fct>            <dbl>
##  1 Marvel  Good      Intelligence        73
##  2 Marvel  Good      Strength            51
##  3 Marvel  Good      Combat              68
##  4 Marvel  Good      Durability          54
##  5 Marvel  Bad       Intelligence        74
##  6 Marvel  Bad       Strength            57
##  7 Marvel  Bad       Combat              67
##  8 Marvel  Bad       Durability          56
##  9 DC      Bad       Intelligence        78
## 10 DC      Bad       Strength            62
## # ℹ 14 more rows

Part 3B) Dumbbell Plots

Create the graph seen in the pdf in Brightspace.

ggplot(
  data = supers_attr,
  mapping = aes(
    x = score_avg,
    y = Attribute
  )
) + 
  
  geom_line(linewidth = 1) + 
  
  geom_point(
    mapping = aes(color = Creator),
    size = 3
  ) +
  
  facet_wrap(
    facets = vars(Alignment),
    ncol = 1
  ) + 
  
  scale_color_manual(values = c("#0476f2", "#ed1d24")) + 
  
  theme_bw() + 
  
  theme(legend.position = "top") + 
  
  labs(
    y = NULL,
    x = "Average Score",
    color = NULL
  )

Module 4 Lab - Key

Your Name

DS 1870