#1

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
setwd("~/Data 101")
heroes_df <-read.csv("heroes_information.csv") 
str(heroes_df)
## 'data.frame':    734 obs. of  11 variables:
##  $ X         : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ name      : chr  "A-Bomb" "Abe Sapien" "Abin Sur" "Abomination" ...
##  $ Gender    : chr  "Male" "Male" "Male" "Male" ...
##  $ Eye.color : chr  "yellow" "blue" "blue" "green" ...
##  $ Race      : chr  "Human" "Icthyo Sapien" "Ungaran" "Human / Radiation" ...
##  $ Hair.color: chr  "No Hair" "No Hair" "No Hair" "No Hair" ...
##  $ Height    : num  203 191 185 203 -99 193 -99 185 173 178 ...
##  $ Publisher : chr  "Marvel Comics" "Dark Horse Comics" "DC Comics" "Marvel Comics" ...
##  $ Skin.color: chr  "-" "blue" "red" "-" ...
##  $ Alignment : chr  "good" "good" "good" "bad" ...
##  $ Weight    : num  441 65 90 441 -99 122 -99 88 61 81 ...
dim(heroes_df)
## [1] 734  11
head(heroes_df)
##   X          name Gender Eye.color              Race Hair.color Height
## 1 0        A-Bomb   Male    yellow             Human    No Hair    203
## 2 1    Abe Sapien   Male      blue     Icthyo Sapien    No Hair    191
## 3 2      Abin Sur   Male      blue           Ungaran    No Hair    185
## 4 3   Abomination   Male     green Human / Radiation    No Hair    203
## 5 4       Abraxas   Male      blue     Cosmic Entity      Black    -99
## 6 5 Absorbing Man   Male      blue             Human    No Hair    193
##           Publisher Skin.color Alignment Weight
## 1     Marvel Comics          -      good    441
## 2 Dark Horse Comics       blue      good     65
## 3         DC Comics        red      good     90
## 4     Marvel Comics          -       bad    441
## 5     Marvel Comics          -       bad    -99
## 6     Marvel Comics          -       bad    122

#Some problems include such things as the “-” in the skin color, race, and eye color columns

#2

human_df <-
heroes_df%>%
filter(Race=="Human")

# 208 are human
length(human_df$Race)
## [1] 208
table(heroes_df$Race)%>%
  sort()%>%
  tail(3)
## 
## Mutant  Human      - 
##     63    208    304

#Mutants have the second highest number of superheroes

#3

table(heroes_df$Gender)
## 
##      - Female   Male 
##     29    200    505

#200 male and 505 female

#4

table <- table(heroes_df$Gender, heroes_df$Alignment)
View(table)

Overall, there are more “good” & “bad”male superheroes than women. Overall more male superheroes.

#5

New_df <- subset(heroes_df, Height > 0 & Weight > 0)
View(New_df)

#6

New_df%>%
  summarise(MeanWeight = mean(Weight), MedianWeight = median(Weight), SDWeight = sd(Weight))
##   MeanWeight MedianWeight SDWeight
## 1   112.1796           81 104.4227
New_df%>%
  summarise(MeanHeight = mean(Height), MedianHeight = median(Height), SDHeight = sd(Height))
##   MeanHeight MedianHeight SDHeight
## 1   187.1239          183 58.99002

#7

hist(New_df$Weight)

This histogram shows us that most of the superhero are between the weight of 0 and 200, then the frequency slowly trickles down as the weight of superheroes increase.

#8

ggplot(data = New_df, aes(x = Height, y = Weight)) +
         geom_point()

There is strong correlation between shorter and less weighed superheroes where there are much more of them than taller and heavier heroes. The correlation being plotted is the shorter the hero, the less they weigh.

9

summary(New_df$Height)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    15.2   173.5   183.0   187.1   188.0   975.0
summary(New_df$Weight)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     4.0    61.0    81.0   112.2   106.0   900.0
boxplot(New_df$Weight ~ New_df$Alignment) 

The chart shows us the Weight vs the alignment, which show that most of the heroes are neutral in regard to alignment and are mostly between 50 and 200 pounds when it comes to weight.

#10

table(New_df$Publisher)
## 
##                   Dark Horse Comics         DC Comics      George Lucas 
##                 1                11               143                 5 
##      Image Comics     Marvel Comics          Shueisha     Sony Pictures 
##                 1               318                 4                 1 
##         Star Trek      Team Epic TV 
##                 2                 4
Publisher_df <- New_df %>% 
  filter(Publisher == "Marvel Comics" | Publisher == "DC Comics") %>%
   group_by(Publisher, Alignment)
View(Publisher_df)
print(filter(Publisher_df,Alignment=="bad"))
## # A tibble: 132 x 11
## # Groups:   Publisher, Alignment [2]
##        X name      Gender Eye.color Race  Hair.color Height Publisher Skin.color
##    <int> <chr>     <chr>  <chr>     <chr> <chr>       <dbl> <chr>     <chr>     
##  1     3 Abominat~ Male   green     Huma~ No Hair       203 Marvel C~ -         
##  2     5 Absorbin~ Male   blue      Human No Hair       193 Marvel C~ -         
##  3    11 Air-Walk~ Male   blue      -     White         188 Marvel C~ -         
##  4    12 Ajax      Male   brown     Cybo~ Black         193 Marvel C~ -         
##  5    19 Amazo     Male   red       Andr~ -             257 DC Comics -         
##  6    20 Ammo      Male   brown     Human Black         188 Marvel C~ -         
##  7    28 Annihilus Male   green     -     No Hair       180 Marvel C~ -         
##  8    34 Apocalyp~ Male   red       Muta~ Black         213 Marvel C~ grey      
##  9    40 Arclight  Female violet    -     Purple        173 Marvel C~ -         
## 10    48 Atlas     Male   blue      God ~ Brown         198 DC Comics -         
## # ... with 122 more rows, and 2 more variables: Alignment <chr>, Weight <dbl>
print(filter(Publisher_df,Alignment=="good"))
## # A tibble: 309 x 11
## # Groups:   Publisher, Alignment [2]
##        X name      Gender Eye.color Race  Hair.color Height Publisher Skin.color
##    <int> <chr>     <chr>  <chr>     <chr> <chr>       <dbl> <chr>     <chr>     
##  1     0 A-Bomb    Male   yellow    Human No Hair       203 Marvel C~ -         
##  2     2 Abin Sur  Male   blue      Unga~ No Hair       185 DC Comics red       
##  3     7 Adam Str~ Male   blue      Human Blond         185 DC Comics -         
##  4     8 Agent 13  Female blue      -     Blond         173 Marvel C~ -         
##  5     9 Agent Bob Male   brown     Human Brown         178 Marvel C~ -         
##  6    10 Agent Ze~ Male   -         -     -             191 Marvel C~ -         
##  7    13 Alan Sco~ Male   blue      -     Blond         180 DC Comics -         
##  8    16 Alfred P~ Male   blue      Human Black         178 DC Comics -         
##  9    22 Angel     Male   blue      -     Blond         183 Marvel C~ -         
## 10    24 Angel Du~ Female yellow    Muta~ Black         165 Marvel C~ -         
## # ... with 299 more rows, and 2 more variables: Alignment <chr>, Weight <dbl>
data_pub<-
paste(Publisher_df$Publisher,Publisher_df$Alignment)

table(data_pub)
## data_pub
##           DC Comics -         DC Comics bad        DC Comics good 
##                     1                    40                    94 
##     DC Comics neutral       Marvel Comics -     Marvel Comics bad 
##                     8                     2                    92 
##    Marvel Comics good Marvel Comics neutral 
##                   215                     9

Marvel Comics has the higher proportion of bad aligned superheroes with 92