# Load dplyr package
library(dplyr) #for use of dplyr functions such as glimpse(), mutate(), and filter()
library(ggplot2) #for use of ggplot2 functions such ggplot()
# Import data
comics <- read.csv("/resources/rstudio/Business Statistics/Data/comics.csv")
# Convert data to tbl_df
comics <- tbl_df(comics)
str(comics)
## Classes 'tbl_df', 'tbl' and 'data.frame': 23272 obs. of 11 variables:
## $ name : Factor w/ 23272 levels "'Spinner (Earth-616)",..: 19833 3335 22769 9647 20956 2220 17576 9346 18794 10957 ...
## $ id : Factor w/ 4 levels "No Dual","Public",..: 3 2 2 2 1 2 2 2 2 2 ...
## $ align : Factor w/ 4 levels "Bad","Good","Neutral",..: 2 2 3 2 2 2 2 2 3 2 ...
## $ eye : Factor w/ 26 levels "Amber Eyes","Auburn Hair",..: 11 5 5 5 5 5 6 6 6 5 ...
## $ hair : Factor w/ 28 levels "Auburn Hair",..: 7 27 3 3 4 14 7 7 7 4 ...
## $ gender : Factor w/ 3 levels "Female","Male",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ gsm : Factor w/ 6 levels "Bisexual Characters",..: NA NA NA NA NA NA NA NA NA NA ...
## $ alive : Factor w/ 2 levels "Deceased Characters",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ appearances : int 4043 3360 3061 2961 2258 2255 2072 2017 1955 1934 ...
## $ first_appear: Factor w/ 2328 levels "1-Apr","1-Aug",..: 1772 2074 2255 2089 2185 2192 2192 2139 2292 2192 ...
## $ publisher : Factor w/ 2 levels "dc","marvel": 2 2 2 2 2 2 2 2 2 2 ...
# Check the levels of haircolor
levels(comics$hair)
## [1] "Auburn Hair" "Bald"
## [3] "Black Hair" "Blond Hair"
## [5] "Blue Hair" "Bronze Hair"
## [7] "Brown Hair" "Dyed Hair"
## [9] "Gold Hair" "Green Hair"
## [11] "Grey Hair" "Light Brown Hair"
## [13] "Magenta Hair" "No Hair"
## [15] "Orange Hair" "Orange-brown Hair"
## [17] "Pink Hair" "Platinum Blond Hair"
## [19] "Purple Hair" "Red Hair"
## [21] "Reddish Blond Hair" "Reddish Brown Hair"
## [23] "Silver Hair" "Strawberry Blond Hair"
## [25] "Variable Hair" "Violet Hair"
## [27] "White Hair" "Yellow Hair"
# Create a 2-way contingency table
tab <- table(comics$hair, comics$gender)
# Print tab
tab
##
## Female Male Other
## Auburn Hair 50 28 0
## Bald 43 778 6
## Black Hair 1557 3735 2
## Blond Hair 1044 1272 0
## Blue Hair 43 51 1
## Bronze Hair 0 1 0
## Brown Hair 875 2592 1
## Dyed Hair 1 0 0
## Gold Hair 1 12 0
## Green Hair 69 87 1
## Grey Hair 101 583 0
## Light Brown Hair 0 6 0
## Magenta Hair 4 1 0
## No Hair 92 841 29
## Orange Hair 14 49 1
## Orange-brown Hair 0 3 0
## Pink Hair 30 12 0
## Platinum Blond Hair 2 0 0
## Purple Hair 50 27 0
## Red Hair 557 520 0
## Reddish Blond Hair 0 6 0
## Reddish Brown Hair 1 2 0
## Silver Hair 10 9 0
## Strawberry Blond Hair 46 29 0
## Variable Hair 5 22 2
## Violet Hair 3 1 0
## White Hair 203 881 0
## Yellow Hair 4 16 0
# Remove hair level
comics <- comics %>%
filter(hair == "Red Hair") %>%
droplevels()
# Create side-by-side barchart of gender by alignment
ggplot(comics, aes(x = hair, fill = gender)) +
geom_bar(position = "dodge") #position = "dodge", to have a side-by-side (i.e. not stacked) barchart.
Interpretation