Load Comics.csv file into R

comics<-read.csv("comics.csv")

The Dataset Comics.csv

head(comics)
##                                    name      id   align        eye       hair
## 1             Spider-Man (Peter Parker)  Secret    Good Hazel Eyes Brown Hair
## 2       Captain America (Steven Rogers)  Public    Good  Blue Eyes White Hair
## 3 Wolverine (James \\"Logan\\" Howlett)  Public Neutral  Blue Eyes Black Hair
## 4   Iron Man (Anthony \\"Tony\\" Stark)  Public    Good  Blue Eyes Black Hair
## 5                   Thor (Thor Odinson) No Dual    Good  Blue Eyes Blond Hair
## 6            Benjamin Grimm (Earth-616)  Public    Good  Blue Eyes    No Hair
##   gender  gsm             alive appearances first_appear publisher
## 1   Male <NA> Living Characters        4043       Aug-62    marvel
## 2   Male <NA> Living Characters        3360       Mar-41    marvel
## 3   Male <NA> Living Characters        3061       Oct-74    marvel
## 4   Male <NA> Living Characters        2961       Mar-63    marvel
## 5   Male <NA> Living Characters        2258       Nov-50    marvel
## 6   Male <NA> Living Characters        2255       Nov-61    marvel

Print the first rows of the data

head(comics,0)
##  [1] name         id           align        eye          hair        
##  [6] gender       gsm          alive        appearances  first_appear
## [11] publisher   
## <0 rows> (or 0-length row.names)
colnames(comics)
##  [1] "name"         "id"           "align"        "eye"          "hair"        
##  [6] "gender"       "gsm"          "alive"        "appearances"  "first_appear"
## [11] "publisher"

Check the levels of align

comics$align<-as.factor(comics$align)
levels(comics$align)
## [1] "Bad"                "Good"               "Neutral"           
## [4] "Reformed Criminals"

Check the levels of gender

comics$gender<-as.factor(comics$gender)
levels(comics$gender)
## [1] "Female" "Male"   "Other"

Note:We need to Convert that column into factor to get levels

Create a 2-way contingency table

table(comics$align,comics$gender)
##                     
##                      Female Male Other
##   Bad                  1573 7561    32
##   Good                 2490 4809    17
##   Neutral               836 1799    17
##   Reformed Criminals      1    2     0

Load dplyr package

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Remove align level With minimal data/info

table(comics$align)
## 
##                Bad               Good            Neutral Reformed Criminals 
##               9615               7468               2773                  3
scomics<-comics%>%
  filter(align!="Reformed Criminals")%>%
  droplevels()
nlevels(scomics$align)
## [1] 3
table(scomics$align)
## 
##     Bad    Good Neutral 
##    9615    7468    2773

Load ggplot2

library(ggplot2)

Create side-by-side barchart of gender by alignment

comics%>%
  ggplot(aes(x=align,fill=gender))+geom_bar(position = "dodge")

Create side-by-side barchart of alignment by gender with x axis angle as 90 degree

  ggplot(aes(x=align,fill=gender))+geom_bar(position = "dodge")+
  xlab("Alignment")+ylab("Count")+
  theme(axis.text.x = element_text(angle = 90))
comics%>%
  ggplot(aes(x=align,fill=gender))+geom_bar(position = "dodge")+
  xlab("Alignment")+ylab("Count")+
  theme(axis.text.x = element_text(angle = 90,size = 10,colour = "Blue"),
         panel.grid  = element_line(linetype = "longdash",color ="white"),
        panel.background = element_rect(colour = "blue",fill = "black",linetype = "dotted",size =3 ))

Conditional proportions The following code generates tables of joint and conditional proportions, respectively:

tab <- table(comics$align, comics$gender)
options(scipen = 999, digits = 3) # Print fewer digits
prop.table(tab)     # Joint proportions
##                     
##                         Female      Male     Other
##   Bad                0.0821968 0.3950985 0.0016722
##   Good               0.1301144 0.2512933 0.0008883
##   Neutral            0.0436850 0.0940064 0.0008883
##   Reformed Criminals 0.0000523 0.0001045 0.0000000
prop.table(tab, 2)  # Conditional on columns
##                     
##                        Female     Male    Other
##   Bad                0.321020 0.533554 0.484848
##   Good               0.508163 0.339355 0.257576
##   Neutral            0.170612 0.126949 0.257576
##   Reformed Criminals 0.000204 0.000141 0.000000

Counts vs. proportions #Plot of gender by align - Visualize in bar plot

comics%>%
  ggplot(aes(x=align,fill=gender))+geom_bar()

Plot proportion of gender, conditional on align - Visualize in Bar plot

comics%>%
  ggplot(aes(x=align,fill=gender))+geom_bar(position="fill")

Change the order of the levels in align as Bad, Neutral and Good

comics$align<-factor(comics$align,
                     levels = c("Bad","Neutral","Good"))

Create bar plot of align

comics%>%
  ggplot(aes(x=align,fill=gender))+geom_bar(position="fill")

Plot of alignment broken down by gender

comics%>%
  ggplot(aes(x=align,fill=gender))+geom_bar(position="dodge")