Load the libraries

library(dplyr)
library(ggplot2)

Load comics.csv file into R

comics <- read.csv("../../DataSet/comics.csv", colClasses = c("align" = "factor", "gender" = "factor"))
glimpse(comics)
## Rows: 23,272
## Columns: 11
## $ name         <chr> "Spider-Man (Peter Parker)", "Captain America (Steven ...
## $ id           <chr> "Secret", "Public", "Public", "Public", "No Dual", "Pu...
## $ align        <fct> Good, Good, Neutral, Good, Good, Good, Good, Good, Neu...
## $ eye          <chr> "Hazel Eyes", "Blue Eyes", "Blue Eyes", "Blue Eyes", "...
## $ hair         <chr> "Brown Hair", "White Hair", "Black Hair", "Black Hair"...
## $ gender       <fct> Male, Male, Male, Male, Male, Male, Male, Male, Male, ...
## $ gsm          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ alive        <chr> "Living Characters", "Living Characters", "Living Char...
## $ appearances  <int> 4043, 3360, 3061, 2961, 2258, 2255, 2072, 2017, 1955, ...
## $ first_appear <chr> "Aug-62", "Mar-41", "Oct-74", "Mar-63", "Nov-50", "Nov...
## $ publisher    <chr> "marvel", "marvel", "marvel", "marvel", "marvel", "mar...

Check the levels of align

levels(comics$align)
## [1] "Bad"                "Good"               "Neutral"           
## [4] "Reformed Criminals"

check the levels of gender

levels(comics$gender)
## [1] "Female" "Male"   "Other"

create a 2-way contingency table

table(comics$align, comics$gender)
##                     
##                      Female Male Other
##   Bad                  1573 7561    32
##   Good                 2490 4809    17
##   Neutral               836 1799    17
##   Reformed Criminals      1    2     0

Remove align level with minimal data/info

comics <- comics %>%
  filter(align != 'Reformed Criminals') %>%
  droplevels()

Create side-by-side barchart of gender by alignment

comics %>%
  ggplot(aes(x = align, fill = gender)) +
  geom_bar(position = "dodge")

Create side-by-side barchart of alignment by gender with x axis angle as 90 degree

comics %>%
  ggplot(aes(x = gender, fill = align)) +
  geom_bar(position = "dodge") +
  theme(axis.text.x = element_text(angle = 90))

Conditional proportions

The following code generates tables of joint and conditional proportions, respectively:

tab <- table(comics$align, comics$gender)
options(scipen = 999, digits = 3) # Print fewer digits
prop.table(tab, 2)  # Conditional on columns
##          
##           Female  Male Other
##   Bad      0.321 0.534 0.485
##   Good     0.508 0.339 0.258
##   Neutral  0.171 0.127 0.258
prop.table(tab)     # Joint proportions
##          
##             Female     Male    Other
##   Bad     0.082210 0.395160 0.001672
##   Good    0.130135 0.251333 0.000888
##   Neutral 0.043692 0.094021 0.000888
prop.table(tab, 1)  # Conditional on Rows
##          
##            Female    Male   Other
##   Bad     0.17161 0.82490 0.00349
##   Good    0.34035 0.65733 0.00232
##   Neutral 0.31523 0.67836 0.00641

Counts vs proportions

plot of gender by align - visualize in bar plot

comics %>%
  ggplot(aes(x = align, fill = gender)) +
  geom_bar()

Plot proportion of gender, conditional on align - Visualize in Bar plot

comics %>%
  ggplot(aes(x = align, fill =  gender)) +
  geom_bar(position = "fill")

Change the order of the levels in align as Bad, Neurtal and Good

levels(comics$align) = c("Bad", "Neutral", "Good")

Create bar plot of align

comics %>%
  ggplot(aes(x = align, fill = align)) +
  geom_bar()

Plot of alignment broken down by gender

comics %>%
  ggplot(aes(x = align, fill = gender)) +
  geom_bar() +
  facet_wrap(~gender)