#Exploratory Data Aanalylsis (EDA) - Catagorical Data
#Read and Assign dataset to comics
comic<-read.csv("comics.csv")
View(comic)

#View and display the structure of comics
str(comic)
## 'data.frame':    23272 obs. of  11 variables:
##  $ name        : chr  "Spider-Man (Peter Parker)" "Captain America (Steven Rogers)" "Wolverine (James \\\"Logan\\\" Howlett)" "Iron Man (Anthony \\\"Tony\\\" Stark)" ...
##  $ id          : chr  "Secret" "Public" "Public" "Public" ...
##  $ align       : chr  "Good" "Good" "Neutral" "Good" ...
##  $ eye         : chr  "Hazel Eyes" "Blue Eyes" "Blue Eyes" "Blue Eyes" ...
##  $ hair        : chr  "Brown Hair" "White Hair" "Black Hair" "Black Hair" ...
##  $ gender      : chr  "Male" "Male" "Male" "Male" ...
##  $ gsm         : chr  NA NA NA NA ...
##  $ alive       : chr  "Living Characters" "Living Characters" "Living Characters" "Living Characters" ...
##  $ appearances : int  4043 3360 3061 2961 2258 2255 2072 2017 1955 1934 ...
##  $ first_appear: chr  "Aug-62" "Mar-41" "Oct-74" "Mar-63" ...
##  $ publisher   : chr  "marvel" "marvel" "marvel" "marvel" ...
#Print the first row/ header only
head(comic,0)
##  [1] name         id           align        eye          hair        
##  [6] gender       gsm          alive        appearances  first_appear
## [11] publisher   
## <0 rows> (or 0-length row.names)
#Check the levels of 'align'
levels(as.factor(comic$align))
## [1] "Bad"                "Good"               "Neutral"           
## [4] "Reformed Criminals"
#Check the levels of 'gender'
levels(as.factor(comic$gender))
## [1] "Female" "Male"   "Other"
#Create a 2 way Contigency table for align and gender
a<-table(comic$align,comic$gender)

# Load dplyr
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Remove align level With mininal data/info
comic <- comic %>%
filter(align != 'Reformed Criminals') %>%
droplevels()

# Load ggplot2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
# Create side-by-side barchart of gender by alignment
ggplot(comic, aes(x = align, fill = gender)) + 
geom_bar(position = "dodge")

# Create side-by-side barchart of alignment by gender with x axis angle as 90 degree
ggplot(comic, aes(x = gender, fill = align)) + 
geom_bar(position = "dodge") +
theme(axis.text.x = element_text(angle = 90))

#Conditional proportions
#Try the below code
#The following code generates tables of joint and conditional proportions, respectively:
tab <- table(comic$align, comic$gender)
options(scipen = 999, digits = 3) # Print fewer digits
prop.table(tab)     # Joint proportions[prop table(m)-->[single values order by order/total values]
##          
##             Female     Male    Other
##   Bad     0.082210 0.395160 0.001672
##   Good    0.130135 0.251333 0.000888
##   Neutral 0.043692 0.094021 0.000888
prop.table(tab, 2)  # Conditional on columns
##          
##           Female  Male Other
##   Bad      0.321 0.534 0.485
##   Good     0.508 0.339 0.258
##   Neutral  0.171 0.127 0.258
#Counts vs. proportions
# Plot of gender by align - Visualize in bar plot
ggplot(comic, aes(x = align, fill = gender)) +
geom_bar()

#Plot proportion of gender, conditional on align - Visualize in Bar plot
ggplot(comic, aes(x = align, fill = gender)) + geom_bar(position = "fill")

#Change the order of the levels in align as Bad, Neurtal and Good
comic$align <- factor(comic$align, levels = c("Bad", "Neutral", "Good"))

#Create bar plot of align
ggplot(comic, aes(x = align)) + geom_bar()

#Plot of alignment broken down by gender
ggplot(comic, aes(x = align)) + 
geom_bar() +
facet_wrap(~ gender)