#options(repos = "https://cloud.r-project.org/")
#install.packages("tinytex",dependencies = TRUE, type = "source")
#tinytex::install_tinytex(force = TRUE)
library(tinytex)
library(psych)
library(tidyverse)
library(ggplot2)
library(showtext)
library(dplyr)
### Check Built-In Data
data()
### Load Star Wars Data
star_wars_data <- starwars
head(star_wars_data, 15)
## # A tibble: 15 Ă— 14
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male mascu…
## 2 C-3PO 167 75 <NA> gold yellow 112 none mascu…
## 3 R2-D2 96 32 <NA> white, bl… red 33 none mascu…
## 4 Darth V… 202 136 none white yellow 41.9 male mascu…
## 5 Leia Or… 150 49 brown light brown 19 fema… femin…
## 6 Owen La… 178 120 brown, gr… light blue 52 male mascu…
## 7 Beru Wh… 165 75 brown light blue 47 fema… femin…
## 8 R5-D4 97 32 <NA> white, red red NA none mascu…
## 9 Biggs D… 183 84 black light brown 24 male mascu…
## 10 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male mascu…
## 11 Anakin … 188 84 blond fair blue 41.9 male mascu…
## 12 Wilhuff… 180 NA auburn, g… fair blue 64 male mascu…
## 13 Chewbac… 228 112 brown unknown blue 200 male mascu…
## 14 Han Solo 180 80 brown fair brown 29 male mascu…
## 15 Greedo 173 74 <NA> green black 44 male mascu…
## # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
tail(star_wars_data, 15)
## # A tibble: 15 Ă— 14
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Jocasta… 167 NA white fair blue NA fema… femin…
## 2 R4-P17 96 NA none silver, r… red, blue NA none femin…
## 3 Wat Tam… 193 48 none green, gr… unknown NA male mascu…
## 4 San Hill 191 NA none grey gold NA male mascu…
## 5 Shaak Ti 178 57 none red, blue… black NA fema… femin…
## 6 Grievous 216 159 none brown, wh… green, y… NA male mascu…
## 7 Tarfful 234 136 brown brown blue NA male mascu…
## 8 Raymus … 188 79 brown light brown NA male mascu…
## 9 Sly Moo… 178 48 none pale white NA <NA> <NA>
## 10 Tion Me… 206 80 none grey black NA male mascu…
## 11 Finn NA NA black dark dark NA male mascu…
## 12 Rey NA NA brown light hazel NA fema… femin…
## 13 Poe Dam… NA NA brown light brown NA male mascu…
## 14 BB8 NA NA none none black NA none mascu…
## 15 Captain… NA NA none none unknown NA fema… femin…
## # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
### We find out there are 14 columns
names(star_wars_data)
## [1] "name" "height" "mass" "hair_color" "skin_color"
## [6] "eye_color" "birth_year" "sex" "gender" "homeworld"
## [11] "species" "films" "vehicles" "starships"
### Delete variables "Films", "Vehicles", and "Starships"
sw_data <- star_wars_data %>%
mutate(films = NULL, vehicles = NULL, starships = NULL)
### Check the names again to ensure they were deleted
names(sw_data)
## [1] "name" "height" "mass" "hair_color" "skin_color"
## [6] "eye_color" "birth_year" "sex" "gender" "homeworld"
## [11] "species"
sw_data <- sw_data %>%
mutate(gender = factor(gender, levels = c("masculine", "feminine")))
sw_data <- sw_data %>%
rename(height_cm = height)
sw_data$height_inches <- sw_data$height_cm/2.54
sw_data_filtered <- sw_data %>%
filter(height_cm > 200 | homeworld == "Tatooine")
mean_mass <- mean(sw_data$mass, na.rm = TRUE) # Uncovered mean mass = 97 KG
count_gender <- sw_data %>%
count(gender, na.rm = TRUE) # N masculine = 66, N feminine = 17
height_species <- sw_data %>%
group_by(species) %>%
summarise(mean_height = mean(height_cm, na.rm = TRUE)) %>%
arrange(desc(mean_height)) # Quermian is tallest with an average height of 264 cm
height_species <- sw_data %>%
arrange(height_cm) # Yoda's species is the smallest with an average height of 66 cm
count_species <- sw_data %>%
count(species) %>%
arrange(desc(n)) # Human is the largest sample with N = 35
hist(sw_data$height_cm,
xlab = "Height in Cm",
ylab = "Frequency of Height",
main = "Characters' Heights")
# Histogram for characters’ mass
hist(sw_data$mass,
xlab = "Mass",
ylab = "Frequency of Mass",
main = "Characters' Masses")
# Scatterplot for characters’ heights and masses
plot(sw_data$height_cm, sw_data$mass,
main = "Relationship Between Height and Mass",
xlab = "Height (cm)",
ylab = "Mass (kg)",
col = "blue",
pch = 19)
sw_data_cleaned <- na.omit(sw_data)
sw_data_cleaned %>%
ggplot(aes(x = homeworld, fill = homeworld)) +
geom_bar() +
labs(title = "Number of Characters Living in Each Homeworld",
x = "Homeworld",
y = "Number of Characters",
subtitle = "n = 87",
caption = "This data is from the Star Wars dataset.") +
theme_minimal() +
theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5),
plot.caption = element_text(size = 8, face = "italic", hjust = 0),
legend.position = "none")