#1
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
setwd("~/Data 101")
heroes_df <-read.csv("heroes_information.csv")
str(heroes_df)
## 'data.frame': 734 obs. of 11 variables:
## $ X : int 0 1 2 3 4 5 6 7 8 9 ...
## $ name : chr "A-Bomb" "Abe Sapien" "Abin Sur" "Abomination" ...
## $ Gender : chr "Male" "Male" "Male" "Male" ...
## $ Eye.color : chr "yellow" "blue" "blue" "green" ...
## $ Race : chr "Human" "Icthyo Sapien" "Ungaran" "Human / Radiation" ...
## $ Hair.color: chr "No Hair" "No Hair" "No Hair" "No Hair" ...
## $ Height : num 203 191 185 203 -99 193 -99 185 173 178 ...
## $ Publisher : chr "Marvel Comics" "Dark Horse Comics" "DC Comics" "Marvel Comics" ...
## $ Skin.color: chr "-" "blue" "red" "-" ...
## $ Alignment : chr "good" "good" "good" "bad" ...
## $ Weight : num 441 65 90 441 -99 122 -99 88 61 81 ...
dim(heroes_df)
## [1] 734 11
head(heroes_df)
## X name Gender Eye.color Race Hair.color Height
## 1 0 A-Bomb Male yellow Human No Hair 203
## 2 1 Abe Sapien Male blue Icthyo Sapien No Hair 191
## 3 2 Abin Sur Male blue Ungaran No Hair 185
## 4 3 Abomination Male green Human / Radiation No Hair 203
## 5 4 Abraxas Male blue Cosmic Entity Black -99
## 6 5 Absorbing Man Male blue Human No Hair 193
## Publisher Skin.color Alignment Weight
## 1 Marvel Comics - good 441
## 2 Dark Horse Comics blue good 65
## 3 DC Comics red good 90
## 4 Marvel Comics - bad 441
## 5 Marvel Comics - bad -99
## 6 Marvel Comics - bad 122
#Some problems include such things as the “-” in the skin color, race, and eye color columns
#2
human_df <-
heroes_df%>%
filter(Race=="Human")
# 208 are human
length(human_df$Race)
## [1] 208
table(heroes_df$Race)%>%
sort()%>%
tail(3)
##
## Mutant Human -
## 63 208 304
#Mutants have the second highest number of superheroes
#3
table(heroes_df$Gender)
##
## - Female Male
## 29 200 505
#200 male and 505 female
#4
table <- table(heroes_df$Gender, heroes_df$Alignment)
View(table)
#5
New_df <- subset(heroes_df, Height > 0 & Weight > 0)
View(New_df)
#6
New_df%>%
summarise(MeanWeight = mean(Weight), MedianWeight = median(Weight), SDWeight = sd(Weight))
## MeanWeight MedianWeight SDWeight
## 1 112.1796 81 104.4227
New_df%>%
summarise(MeanHeight = mean(Height), MedianHeight = median(Height), SDHeight = sd(Height))
## MeanHeight MedianHeight SDHeight
## 1 187.1239 183 58.99002
#7
hist(New_df$Weight)
#8
ggplot(data = New_df, aes(x = Height, y = Weight)) +
geom_point()
summary(New_df$Height)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.2 173.5 183.0 187.1 188.0 975.0
summary(New_df$Weight)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.0 61.0 81.0 112.2 106.0 900.0
boxplot(New_df$Weight ~ New_df$Alignment)
#10
table(New_df$Publisher)
##
## Dark Horse Comics DC Comics George Lucas
## 1 11 143 5
## Image Comics Marvel Comics Shueisha Sony Pictures
## 1 318 4 1
## Star Trek Team Epic TV
## 2 4
Publisher_df <- New_df %>%
filter(Publisher == "Marvel Comics" | Publisher == "DC Comics") %>%
group_by(Publisher, Alignment)
View(Publisher_df)
print(filter(Publisher_df,Alignment=="bad"))
## # A tibble: 132 x 11
## # Groups: Publisher, Alignment [2]
## X name Gender Eye.color Race Hair.color Height Publisher Skin.color
## <int> <chr> <chr> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 3 Abominat~ Male green Huma~ No Hair 203 Marvel C~ -
## 2 5 Absorbin~ Male blue Human No Hair 193 Marvel C~ -
## 3 11 Air-Walk~ Male blue - White 188 Marvel C~ -
## 4 12 Ajax Male brown Cybo~ Black 193 Marvel C~ -
## 5 19 Amazo Male red Andr~ - 257 DC Comics -
## 6 20 Ammo Male brown Human Black 188 Marvel C~ -
## 7 28 Annihilus Male green - No Hair 180 Marvel C~ -
## 8 34 Apocalyp~ Male red Muta~ Black 213 Marvel C~ grey
## 9 40 Arclight Female violet - Purple 173 Marvel C~ -
## 10 48 Atlas Male blue God ~ Brown 198 DC Comics -
## # ... with 122 more rows, and 2 more variables: Alignment <chr>, Weight <dbl>
print(filter(Publisher_df,Alignment=="good"))
## # A tibble: 309 x 11
## # Groups: Publisher, Alignment [2]
## X name Gender Eye.color Race Hair.color Height Publisher Skin.color
## <int> <chr> <chr> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 0 A-Bomb Male yellow Human No Hair 203 Marvel C~ -
## 2 2 Abin Sur Male blue Unga~ No Hair 185 DC Comics red
## 3 7 Adam Str~ Male blue Human Blond 185 DC Comics -
## 4 8 Agent 13 Female blue - Blond 173 Marvel C~ -
## 5 9 Agent Bob Male brown Human Brown 178 Marvel C~ -
## 6 10 Agent Ze~ Male - - - 191 Marvel C~ -
## 7 13 Alan Sco~ Male blue - Blond 180 DC Comics -
## 8 16 Alfred P~ Male blue Human Black 178 DC Comics -
## 9 22 Angel Male blue - Blond 183 Marvel C~ -
## 10 24 Angel Du~ Female yellow Muta~ Black 165 Marvel C~ -
## # ... with 299 more rows, and 2 more variables: Alignment <chr>, Weight <dbl>
data_pub<-
paste(Publisher_df$Publisher,Publisher_df$Alignment)
table(data_pub)
## data_pub
## DC Comics - DC Comics bad DC Comics good
## 1 40 94
## DC Comics neutral Marvel Comics - Marvel Comics bad
## 8 2 92
## Marvel Comics good Marvel Comics neutral
## 215 9