#Exploratory Data Aanalylsis (EDA) - Catagorical Data
#Read and Assign dataset to comics
comic<-read.csv("comics.csv")
View(comic)
#View and display the structure of comics
str(comic)
## 'data.frame': 23272 obs. of 11 variables:
## $ name : chr "Spider-Man (Peter Parker)" "Captain America (Steven Rogers)" "Wolverine (James \\\"Logan\\\" Howlett)" "Iron Man (Anthony \\\"Tony\\\" Stark)" ...
## $ id : chr "Secret" "Public" "Public" "Public" ...
## $ align : chr "Good" "Good" "Neutral" "Good" ...
## $ eye : chr "Hazel Eyes" "Blue Eyes" "Blue Eyes" "Blue Eyes" ...
## $ hair : chr "Brown Hair" "White Hair" "Black Hair" "Black Hair" ...
## $ gender : chr "Male" "Male" "Male" "Male" ...
## $ gsm : chr NA NA NA NA ...
## $ alive : chr "Living Characters" "Living Characters" "Living Characters" "Living Characters" ...
## $ appearances : int 4043 3360 3061 2961 2258 2255 2072 2017 1955 1934 ...
## $ first_appear: chr "Aug-62" "Mar-41" "Oct-74" "Mar-63" ...
## $ publisher : chr "marvel" "marvel" "marvel" "marvel" ...
#Print the first row/ header only
head(comic,0)
## [1] name id align eye hair
## [6] gender gsm alive appearances first_appear
## [11] publisher
## <0 rows> (or 0-length row.names)
#Check the levels of 'align'
levels(as.factor(comic$align))
## [1] "Bad" "Good" "Neutral"
## [4] "Reformed Criminals"
#Check the levels of 'gender'
levels(as.factor(comic$gender))
## [1] "Female" "Male" "Other"
#Create a 2 way Contigency table for align and gender
a<-table(comic$align,comic$gender)
# Load dplyr
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Remove align level With mininal data/info
comic <- comic %>%
filter(align != 'Reformed Criminals') %>%
droplevels()
# Load ggplot2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
# Create side-by-side barchart of gender by alignment
ggplot(comic, aes(x = align, fill = gender)) +
geom_bar(position = "dodge")

# Create side-by-side barchart of alignment by gender with x axis angle as 90 degree
ggplot(comic, aes(x = gender, fill = align)) +
geom_bar(position = "dodge") +
theme(axis.text.x = element_text(angle = 90))

#Conditional proportions
#Try the below code
#The following code generates tables of joint and conditional proportions, respectively:
tab <- table(comic$align, comic$gender)
options(scipen = 999, digits = 3) # Print fewer digits
prop.table(tab) # Joint proportions[prop table(m)-->[single values order by order/total values]
##
## Female Male Other
## Bad 0.082210 0.395160 0.001672
## Good 0.130135 0.251333 0.000888
## Neutral 0.043692 0.094021 0.000888
prop.table(tab, 2) # Conditional on columns
##
## Female Male Other
## Bad 0.321 0.534 0.485
## Good 0.508 0.339 0.258
## Neutral 0.171 0.127 0.258
#Counts vs. proportions
# Plot of gender by align - Visualize in bar plot
ggplot(comic, aes(x = align, fill = gender)) +
geom_bar()

#Plot proportion of gender, conditional on align - Visualize in Bar plot
ggplot(comic, aes(x = align, fill = gender)) + geom_bar(position = "fill")

#Change the order of the levels in align as Bad, Neurtal and Good
comic$align <- factor(comic$align, levels = c("Bad", "Neutral", "Good"))
#Create bar plot of align
ggplot(comic, aes(x = align)) + geom_bar()

#Plot of alignment broken down by gender
ggplot(comic, aes(x = align)) +
geom_bar() +
facet_wrap(~ gender)
