Load the libraries
library(dplyr)
library(ggplot2)
Load comics.csv file into R
comics <- read.csv("../../DataSet/comics.csv", colClasses = c("align" = "factor", "gender" = "factor"))
glimpse(comics)
## Rows: 23,272
## Columns: 11
## $ name <chr> "Spider-Man (Peter Parker)", "Captain America (Steven ...
## $ id <chr> "Secret", "Public", "Public", "Public", "No Dual", "Pu...
## $ align <fct> Good, Good, Neutral, Good, Good, Good, Good, Good, Neu...
## $ eye <chr> "Hazel Eyes", "Blue Eyes", "Blue Eyes", "Blue Eyes", "...
## $ hair <chr> "Brown Hair", "White Hair", "Black Hair", "Black Hair"...
## $ gender <fct> Male, Male, Male, Male, Male, Male, Male, Male, Male, ...
## $ gsm <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ alive <chr> "Living Characters", "Living Characters", "Living Char...
## $ appearances <int> 4043, 3360, 3061, 2961, 2258, 2255, 2072, 2017, 1955, ...
## $ first_appear <chr> "Aug-62", "Mar-41", "Oct-74", "Mar-63", "Nov-50", "Nov...
## $ publisher <chr> "marvel", "marvel", "marvel", "marvel", "marvel", "mar...
Print the first row of the data
comics[1,]
## name id align eye hair gender gsm
## 1 Spider-Man (Peter Parker) Secret Good Hazel Eyes Brown Hair Male <NA>
## alive appearances first_appear publisher
## 1 Living Characters 4043 Aug-62 marvel
Check the levels of align
levels(comics$align)
## [1] "Bad" "Good" "Neutral"
## [4] "Reformed Criminals"
check the levels of gender
levels(comics$gender)
## [1] "Female" "Male" "Other"
create a 2-way contingency table
table(comics$align, comics$gender)
##
## Female Male Other
## Bad 1573 7561 32
## Good 2490 4809 17
## Neutral 836 1799 17
## Reformed Criminals 1 2 0
Remove align level with minimal data/info
comics <- comics %>%
filter(align != 'Reformed Criminals') %>%
droplevels()
Create side-by-side barchart of gender by alignment
comics %>%
ggplot(aes(x = align, fill = gender)) +
geom_bar(position = "dodge")

Create side-by-side barchart of alignment by gender with x axis angle as 90 degree
comics %>%
ggplot(aes(x = gender, fill = align)) +
geom_bar(position = "dodge") +
theme(axis.text.x = element_text(angle = 90))

Conditional proportions
The following code generates tables of joint and conditional proportions, respectively:
tab <- table(comics$align, comics$gender)
options(scipen = 999, digits = 3) # Print fewer digits
prop.table(tab, 2) # Conditional on columns
##
## Female Male Other
## Bad 0.321 0.534 0.485
## Good 0.508 0.339 0.258
## Neutral 0.171 0.127 0.258
prop.table(tab) # Joint proportions
##
## Female Male Other
## Bad 0.082210 0.395160 0.001672
## Good 0.130135 0.251333 0.000888
## Neutral 0.043692 0.094021 0.000888
prop.table(tab, 1) # Conditional on Rows
##
## Female Male Other
## Bad 0.17161 0.82490 0.00349
## Good 0.34035 0.65733 0.00232
## Neutral 0.31523 0.67836 0.00641
Counts vs proportions
plot of gender by align - visualize in bar plot
comics %>%
ggplot(aes(x = align, fill = gender)) +
geom_bar()

Plot proportion of gender, conditional on align - Visualize in Bar plot
comics %>%
ggplot(aes(x = align, fill = gender)) +
geom_bar(position = "fill")

Change the order of the levels in align as Bad, Neurtal and Good
levels(comics$align) = c("Bad", "Neutral", "Good")
Create bar plot of align
comics %>%
ggplot(aes(x = align, fill = align)) +
geom_bar()

Plot of alignment broken down by gender
comics %>%
ggplot(aes(x = align, fill = gender)) +
geom_bar() +
facet_wrap(~gender)
