Welcome to Star Wars!

Loading and Checking the Data

Load Relevant Packages

#options(repos = "https://cloud.r-project.org/")
#install.packages("tinytex",dependencies = TRUE, type = "source")
#tinytex::install_tinytex(force = TRUE)
library(tinytex)
library(psych)
library(tidyverse)
library(ggplot2)
library(showtext)
library(dplyr)

Import Star Wars Data

### Check Built-In Data
data()
### Load Star Wars Data
star_wars_data <- starwars
head(star_wars_data, 15)
## # A tibble: 15 Ă— 14
##    name     height  mass hair_color skin_color eye_color birth_year sex   gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
##  1 Luke Sk…    172    77 blond      fair       blue            19   male  mascu…
##  2 C-3PO       167    75 <NA>       gold       yellow         112   none  mascu…
##  3 R2-D2        96    32 <NA>       white, bl… red             33   none  mascu…
##  4 Darth V…    202   136 none       white      yellow          41.9 male  mascu…
##  5 Leia Or…    150    49 brown      light      brown           19   fema… femin…
##  6 Owen La…    178   120 brown, gr… light      blue            52   male  mascu…
##  7 Beru Wh…    165    75 brown      light      blue            47   fema… femin…
##  8 R5-D4        97    32 <NA>       white, red red             NA   none  mascu…
##  9 Biggs D…    183    84 black      light      brown           24   male  mascu…
## 10 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  mascu…
## 11 Anakin …    188    84 blond      fair       blue            41.9 male  mascu…
## 12 Wilhuff…    180    NA auburn, g… fair       blue            64   male  mascu…
## 13 Chewbac…    228   112 brown      unknown    blue           200   male  mascu…
## 14 Han Solo    180    80 brown      fair       brown           29   male  mascu…
## 15 Greedo      173    74 <NA>       green      black           44   male  mascu…
## # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
tail(star_wars_data, 15)
## # A tibble: 15 Ă— 14
##    name     height  mass hair_color skin_color eye_color birth_year sex   gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
##  1 Jocasta…    167    NA white      fair       blue              NA fema… femin…
##  2 R4-P17       96    NA none       silver, r… red, blue         NA none  femin…
##  3 Wat Tam…    193    48 none       green, gr… unknown           NA male  mascu…
##  4 San Hill    191    NA none       grey       gold              NA male  mascu…
##  5 Shaak Ti    178    57 none       red, blue… black             NA fema… femin…
##  6 Grievous    216   159 none       brown, wh… green, y…         NA male  mascu…
##  7 Tarfful     234   136 brown      brown      blue              NA male  mascu…
##  8 Raymus …    188    79 brown      light      brown             NA male  mascu…
##  9 Sly Moo…    178    48 none       pale       white             NA <NA>  <NA>  
## 10 Tion Me…    206    80 none       grey       black             NA male  mascu…
## 11 Finn         NA    NA black      dark       dark              NA male  mascu…
## 12 Rey          NA    NA brown      light      hazel             NA fema… femin…
## 13 Poe Dam…     NA    NA brown      light      brown             NA male  mascu…
## 14 BB8          NA    NA none       none       black             NA none  mascu…
## 15 Captain…     NA    NA none       none       unknown           NA fema… femin…
## # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
### We find out there are 14 columns
names(star_wars_data)
##  [1] "name"       "height"     "mass"       "hair_color" "skin_color"
##  [6] "eye_color"  "birth_year" "sex"        "gender"     "homeworld" 
## [11] "species"    "films"      "vehicles"   "starships"
### Delete variables "Films", "Vehicles", and "Starships" 
sw_data <- star_wars_data %>%
  mutate(films = NULL, vehicles = NULL, starships = NULL)
### Check the names again to ensure they were deleted
names(sw_data)
##  [1] "name"       "height"     "mass"       "hair_color" "skin_color"
##  [6] "eye_color"  "birth_year" "sex"        "gender"     "homeworld" 
## [11] "species"

Data Manipulation

Rewrite gender variable to be a factor with two levels

sw_data <- sw_data %>% 
  mutate(gender = factor(gender, levels = c("masculine", "feminine")))

Rename height to height in cm

sw_data <- sw_data %>%
  rename(height_cm = height)

New height variable in inches

sw_data$height_inches <- sw_data$height_cm/2.54

Filter so height cm > 200 or characters homeworld = Tatooine

sw_data_filtered <- sw_data %>%
  filter(height_cm > 200 | homeworld == "Tatooine")

Data Descriptives

Mean mass across all Star Wars characters

mean_mass <- mean(sw_data$mass, na.rm = TRUE) # Uncovered mean mass = 97 KG

Count of masculine and feminine characters

count_gender <- sw_data %>%
  count(gender, na.rm = TRUE) # N masculine = 66, N feminine = 17

Which species is tallest?

height_species <- sw_data %>%
  group_by(species) %>%
  summarise(mean_height = mean(height_cm, na.rm = TRUE)) %>%
  arrange(desc(mean_height)) # Quermian is tallest with an average height of 264 cm

Which species has the shortest character?

height_species <- sw_data %>%
  arrange(height_cm) # Yoda's species is the smallest with an average height of 66 cm

Which species is the largest group by sample size?

count_species <- sw_data %>%
  count(species) %>%
  arrange(desc(n)) # Human is the largest sample with N = 35

Data Visualizations

Histogram for characters’ height

hist(sw_data$height_cm, 
     xlab = "Height in Cm", 
     ylab = "Frequency of Height",
     main = "Characters' Heights")

# Histogram for characters’ mass

hist(sw_data$mass, 
     xlab = "Mass", 
     ylab = "Frequency of Mass",
     main = "Characters' Masses")

# Scatterplot for characters’ heights and masses

plot(sw_data$height_cm, sw_data$mass, 
     main = "Relationship Between Height and Mass",
     xlab = "Height (cm)", 
     ylab = "Mass (kg)", 
     col = "blue",
     pch = 19)

Remove NA before barplot

sw_data_cleaned <- na.omit(sw_data)

Barplot for number of characters living in each homeworld

sw_data_cleaned %>% 
  ggplot(aes(x = homeworld, fill = homeworld)) +
  geom_bar() +
  labs(title = "Number of Characters Living in Each Homeworld",
       x = "Homeworld",
       y = "Number of Characters",
       subtitle = "n = 87",
       caption = "This data is from the Star Wars dataset.") +
  theme_minimal() +
  theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5), 
      plot.caption = element_text(size = 8, face = "italic", hjust = 0), 
      legend.position = "none")