Name: Muhammad Showherda Ad-Din

Matric Number: S2117118

Occurrence: 1

Q1a

Chosen dataset: HairEyeColor

Dataset

data(HairEyeColor)
HairEyeColor
## , , Sex = Male
## 
##        Eye
## Hair    Brown Blue Hazel Green
##   Black    32   11    10     3
##   Brown    53   50    25    15
##   Red      10   10     7     7
##   Blond     3   30     5     8
## 
## , , Sex = Female
## 
##        Eye
## Hair    Brown Blue Hazel Green
##   Black    36    9     5     2
##   Brown    66   34    29    14
##   Red      16    7     7     7
##   Blond     4   64     5     8

Dataset Columns

dimnames(HairEyeColor)
## $Hair
## [1] "Black" "Brown" "Red"   "Blond"
## 
## $Eye
## [1] "Brown" "Blue"  "Hazel" "Green"
## 
## $Sex
## [1] "Male"   "Female"

Dataset Column Variable Types

lapply(dimnames(HairEyeColor), typeof)
## $Hair
## [1] "character"
## 
## $Eye
## [1] "character"
## 
## $Sex
## [1] "character"

Dataset Visualizations

Male Hair Color Dot Plot

dotchart(x=c(sum(HairEyeColor[1,1:4,1]),sum(HairEyeColor[2,1:4,1]),sum(HairEyeColor[3,1:4,1]),sum(HairEyeColor[4,1:4,1])), labels=dimnames(HairEyeColor)$Hair, main = "Male Hair Color")

Male Eye Color Dot Plot

dotchart(x=c(sum(HairEyeColor[1:4,1,1]),sum(HairEyeColor[1:4,2,1]),sum(HairEyeColor[1:4,3,1]),sum(HairEyeColor[1:4,4,1])), labels=dimnames(HairEyeColor)$Eye, main = "Male Eye Color")

Female Hair Color Dot Plot

dotchart(x=c(sum(HairEyeColor[1,1:4,2]),sum(HairEyeColor[2,1:4,2]),sum(HairEyeColor[3,1:4,2]),sum(HairEyeColor[4,1:4,2])), labels=dimnames(HairEyeColor)$Hair, main = "Female Hair Color")

Female Eye Color Dot Plot

dotchart(x=c(sum(HairEyeColor[1:4,1,2]),sum(HairEyeColor[1:4,2,2]),sum(HairEyeColor[1:4,3,2]),sum(HairEyeColor[1:4,4,2])), labels=dimnames(HairEyeColor)$Eye, main = "Female Eye Color")

Dataset Findings

Hair color prevalent among males

v<-c(sum(HairEyeColor[1,1:4,1]),sum(HairEyeColor[2,1:4,1]),sum(HairEyeColor[3,1:4,1]),sum(HairEyeColor[4,1:4,1]))
names(v)=dimnames(HairEyeColor)$Hair
names(v)[v==max(v)]
## [1] "Brown"

Eye color prevalent among males

v<-c(sum(HairEyeColor[1:4,1,1]),sum(HairEyeColor[1:4,2,1]),sum(HairEyeColor[1:4,3,1]),sum(HairEyeColor[1:4,4,1]))
names(v)=dimnames(HairEyeColor)$Eye
names(v)[v==max(v)]
## [1] "Blue"

Hair color prevalent among females

v<-c(sum(HairEyeColor[1,1:4,2]),sum(HairEyeColor[2,1:4,2]),sum(HairEyeColor[3,1:4,2]),sum(HairEyeColor[4,1:4,2]))
names(v)=dimnames(HairEyeColor)$Hair
names(v)[v==max(v)]
## [1] "Brown"

Eye color prevalent among females

v<-c(sum(HairEyeColor[1:4,1,2]),sum(HairEyeColor[1:4,2,2]),sum(HairEyeColor[1:4,3,2]),sum(HairEyeColor[1:4,4,2]))
names(v)=dimnames(HairEyeColor)$Eye
names(v)[v==max(v)]
## [1] "Brown"

Q1b

Chosen dataset: starwars (under dplyr)

Dataset

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data("starwars")
head(starwars)
## # A tibble: 6 × 14
##   name     height  mass hair_color  skin_color eye_color birth_year sex   gender
##   <chr>     <int> <dbl> <chr>       <chr>      <chr>          <dbl> <chr> <chr> 
## 1 Luke Sk…    172    77 blond       fair       blue            19   male  mascu…
## 2 C-3PO       167    75 <NA>        gold       yellow         112   none  mascu…
## 3 R2-D2        96    32 <NA>        white, bl… red             33   none  mascu…
## 4 Darth V…    202   136 none        white      yellow          41.9 male  mascu…
## 5 Leia Or…    150    49 brown       light      brown           19   fema… femin…
## 6 Owen La…    178   120 brown, grey light      blue            52   male  mascu…
## # … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>

filter()

The filter() function filters or subsets a data set by evaluating the conditions passed in as arguments. The following command filters all the rows which have records for “Droid” species.

filter(starwars, species == "Droid")
## # A tibble: 6 × 14
##   name   height  mass hair_color skin_color  eye_color birth_year sex   gender  
##   <chr>   <int> <dbl> <chr>      <chr>       <chr>          <dbl> <chr> <chr>   
## 1 C-3PO     167    75 <NA>       gold        yellow           112 none  masculi…
## 2 R2-D2      96    32 <NA>       white, blue red               33 none  masculi…
## 3 R5-D4      97    32 <NA>       white, red  red               NA none  masculi…
## 4 IG-88     200   140 none       metal       red               15 none  masculi…
## 5 R4-P17     96    NA none       silver, red red, blue         NA none  feminine
## 6 BB8        NA    NA none       none        black             NA none  masculi…
## # … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>

arrange()

arrange() orders or sorts the rows of a data set by the values of the selected columns passed in as arguments. The default sorting method is ascending. Pass in desc() for descending order. As in the following argument which orders the starwars data set by birth year in descending order.

arrange(starwars, desc(birth_year))
## # A tibble: 87 × 14
##    name    height  mass hair_color skin_color  eye_color birth_year sex   gender
##    <chr>    <int> <dbl> <chr>      <chr>       <chr>          <dbl> <chr> <chr> 
##  1 Yoda        66    17 white      green       brown            896 male  mascu…
##  2 Jabba …    175  1358 <NA>       green-tan,… orange           600 herm… mascu…
##  3 Chewba…    228   112 brown      unknown     blue             200 male  mascu…
##  4 C-3PO      167    75 <NA>       gold        yellow           112 none  mascu…
##  5 Dooku      193    80 white      fair        brown            102 male  mascu…
##  6 Qui-Go…    193    89 brown      fair        blue              92 male  mascu…
##  7 Ki-Adi…    198    82 white      pale        yellow            92 male  mascu…
##  8 Finis …    170    NA blond      fair        blue              91 male  mascu…
##  9 Palpat…    170    75 grey       pale        yellow            82 male  mascu…
## 10 Cliegg…    183    NA brown      fair        blue              82 male  mascu…
## # … with 77 more rows, and 5 more variables: homeworld <chr>, species <chr>,
## #   films <list>, vehicles <list>, starships <list>

mutate()

mutate() adds new variables or columns to a data set while preserving the actual data set. The following command adds a “height_in_inches” column based on the current height column which stores heights in centimeters.

mutate(starwars, height_in_inches = height/2.54)
## # A tibble: 87 × 15
##    name    height  mass hair_color  skin_color eye_color birth_year sex   gender
##    <chr>    <int> <dbl> <chr>       <chr>      <chr>          <dbl> <chr> <chr> 
##  1 Luke S…    172    77 blond       fair       blue            19   male  mascu…
##  2 C-3PO      167    75 <NA>        gold       yellow         112   none  mascu…
##  3 R2-D2       96    32 <NA>        white, bl… red             33   none  mascu…
##  4 Darth …    202   136 none        white      yellow          41.9 male  mascu…
##  5 Leia O…    150    49 brown       light      brown           19   fema… femin…
##  6 Owen L…    178   120 brown, grey light      blue            52   male  mascu…
##  7 Beru W…    165    75 brown       light      blue            47   fema… femin…
##  8 R5-D4       97    32 <NA>        white, red red             NA   none  mascu…
##  9 Biggs …    183    84 black       light      brown           24   male  mascu…
## 10 Obi-Wa…    182    77 auburn, wh… fair       blue-gray       57   male  mascu…
## # … with 77 more rows, and 6 more variables: homeworld <chr>, species <chr>,
## #   films <list>, vehicles <list>, starships <list>, height_in_inches <dbl>

select()

select() selects columns or variables in a data set. The following command shows how you can view the skin_color, eye_color, and birth_year columns separately by using select().

select(starwars, skin_color, eye_color, birth_year)
## # A tibble: 87 × 3
##    skin_color  eye_color birth_year
##    <chr>       <chr>          <dbl>
##  1 fair        blue            19  
##  2 gold        yellow         112  
##  3 white, blue red             33  
##  4 white       yellow          41.9
##  5 light       brown           19  
##  6 light       blue            52  
##  7 light       blue            47  
##  8 white, red  red             NA  
##  9 light       brown           24  
## 10 fair        blue-gray       57  
## # … with 77 more rows

summarise()

summarise() returns a new data set. It can summarise by grouping and return one or more rows based on the grouping variables. The following commmand summarises the starwars dataset by species and generates the mean mass for each species.

starwars %>% group_by(species) %>% summarise(mean=mean(mass))
## # A tibble: 38 × 2
##    species    mean
##    <chr>     <dbl>
##  1 Aleena       15
##  2 Besalisk    102
##  3 Cerean       82
##  4 Chagrian     NA
##  5 Clawdite     55
##  6 Droid        NA
##  7 Dug          40
##  8 Ewok         20
##  9 Geonosian    80
## 10 Gungan       NA
## # … with 28 more rows