Chosen dataset: HairEyeColor
data(HairEyeColor)
HairEyeColor
## , , Sex = Male
##
## Eye
## Hair Brown Blue Hazel Green
## Black 32 11 10 3
## Brown 53 50 25 15
## Red 10 10 7 7
## Blond 3 30 5 8
##
## , , Sex = Female
##
## Eye
## Hair Brown Blue Hazel Green
## Black 36 9 5 2
## Brown 66 34 29 14
## Red 16 7 7 7
## Blond 4 64 5 8
dimnames(HairEyeColor)
## $Hair
## [1] "Black" "Brown" "Red" "Blond"
##
## $Eye
## [1] "Brown" "Blue" "Hazel" "Green"
##
## $Sex
## [1] "Male" "Female"
lapply(dimnames(HairEyeColor), typeof)
## $Hair
## [1] "character"
##
## $Eye
## [1] "character"
##
## $Sex
## [1] "character"
dotchart(x=c(sum(HairEyeColor[1,1:4,1]),sum(HairEyeColor[2,1:4,1]),sum(HairEyeColor[3,1:4,1]),sum(HairEyeColor[4,1:4,1])), labels=dimnames(HairEyeColor)$Hair, main = "Male Hair Color")
dotchart(x=c(sum(HairEyeColor[1:4,1,1]),sum(HairEyeColor[1:4,2,1]),sum(HairEyeColor[1:4,3,1]),sum(HairEyeColor[1:4,4,1])), labels=dimnames(HairEyeColor)$Eye, main = "Male Eye Color")
dotchart(x=c(sum(HairEyeColor[1,1:4,2]),sum(HairEyeColor[2,1:4,2]),sum(HairEyeColor[3,1:4,2]),sum(HairEyeColor[4,1:4,2])), labels=dimnames(HairEyeColor)$Hair, main = "Female Hair Color")
dotchart(x=c(sum(HairEyeColor[1:4,1,2]),sum(HairEyeColor[1:4,2,2]),sum(HairEyeColor[1:4,3,2]),sum(HairEyeColor[1:4,4,2])), labels=dimnames(HairEyeColor)$Eye, main = "Female Eye Color")
v<-c(sum(HairEyeColor[1,1:4,1]),sum(HairEyeColor[2,1:4,1]),sum(HairEyeColor[3,1:4,1]),sum(HairEyeColor[4,1:4,1]))
names(v)=dimnames(HairEyeColor)$Hair
names(v)[v==max(v)]
## [1] "Brown"
v<-c(sum(HairEyeColor[1:4,1,1]),sum(HairEyeColor[1:4,2,1]),sum(HairEyeColor[1:4,3,1]),sum(HairEyeColor[1:4,4,1]))
names(v)=dimnames(HairEyeColor)$Eye
names(v)[v==max(v)]
## [1] "Blue"
v<-c(sum(HairEyeColor[1,1:4,2]),sum(HairEyeColor[2,1:4,2]),sum(HairEyeColor[3,1:4,2]),sum(HairEyeColor[4,1:4,2]))
names(v)=dimnames(HairEyeColor)$Hair
names(v)[v==max(v)]
## [1] "Brown"
v<-c(sum(HairEyeColor[1:4,1,2]),sum(HairEyeColor[1:4,2,2]),sum(HairEyeColor[1:4,3,2]),sum(HairEyeColor[1:4,4,2]))
names(v)=dimnames(HairEyeColor)$Eye
names(v)[v==max(v)]
## [1] "Brown"
Chosen dataset: starwars (under dplyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data("starwars")
head(starwars)
## # A tibble: 6 × 14
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male mascu…
## 2 C-3PO 167 75 <NA> gold yellow 112 none mascu…
## 3 R2-D2 96 32 <NA> white, bl… red 33 none mascu…
## 4 Darth V… 202 136 none white yellow 41.9 male mascu…
## 5 Leia Or… 150 49 brown light brown 19 fema… femin…
## 6 Owen La… 178 120 brown, grey light blue 52 male mascu…
## # … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
The filter() function filters or subsets a data set by evaluating the conditions passed in as arguments. The following command filters all the rows which have records for “Droid” species.
filter(starwars, species == "Droid")
## # A tibble: 6 × 14
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 C-3PO 167 75 <NA> gold yellow 112 none masculi…
## 2 R2-D2 96 32 <NA> white, blue red 33 none masculi…
## 3 R5-D4 97 32 <NA> white, red red NA none masculi…
## 4 IG-88 200 140 none metal red 15 none masculi…
## 5 R4-P17 96 NA none silver, red red, blue NA none feminine
## 6 BB8 NA NA none none black NA none masculi…
## # … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
arrange() orders or sorts the rows of a data set by the values of the selected columns passed in as arguments. The default sorting method is ascending. Pass in desc() for descending order. As in the following argument which orders the starwars data set by birth year in descending order.
arrange(starwars, desc(birth_year))
## # A tibble: 87 × 14
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Yoda 66 17 white green brown 896 male mascu…
## 2 Jabba … 175 1358 <NA> green-tan,… orange 600 herm… mascu…
## 3 Chewba… 228 112 brown unknown blue 200 male mascu…
## 4 C-3PO 167 75 <NA> gold yellow 112 none mascu…
## 5 Dooku 193 80 white fair brown 102 male mascu…
## 6 Qui-Go… 193 89 brown fair blue 92 male mascu…
## 7 Ki-Adi… 198 82 white pale yellow 92 male mascu…
## 8 Finis … 170 NA blond fair blue 91 male mascu…
## 9 Palpat… 170 75 grey pale yellow 82 male mascu…
## 10 Cliegg… 183 NA brown fair blue 82 male mascu…
## # … with 77 more rows, and 5 more variables: homeworld <chr>, species <chr>,
## # films <list>, vehicles <list>, starships <list>
mutate() adds new variables or columns to a data set while preserving the actual data set. The following command adds a “height_in_inches” column based on the current height column which stores heights in centimeters.
mutate(starwars, height_in_inches = height/2.54)
## # A tibble: 87 × 15
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 Luke S… 172 77 blond fair blue 19 male mascu…
## 2 C-3PO 167 75 <NA> gold yellow 112 none mascu…
## 3 R2-D2 96 32 <NA> white, bl… red 33 none mascu…
## 4 Darth … 202 136 none white yellow 41.9 male mascu…
## 5 Leia O… 150 49 brown light brown 19 fema… femin…
## 6 Owen L… 178 120 brown, grey light blue 52 male mascu…
## 7 Beru W… 165 75 brown light blue 47 fema… femin…
## 8 R5-D4 97 32 <NA> white, red red NA none mascu…
## 9 Biggs … 183 84 black light brown 24 male mascu…
## 10 Obi-Wa… 182 77 auburn, wh… fair blue-gray 57 male mascu…
## # … with 77 more rows, and 6 more variables: homeworld <chr>, species <chr>,
## # films <list>, vehicles <list>, starships <list>, height_in_inches <dbl>
select() selects columns or variables in a data set. The following command shows how you can view the skin_color, eye_color, and birth_year columns separately by using select().
select(starwars, skin_color, eye_color, birth_year)
## # A tibble: 87 × 3
## skin_color eye_color birth_year
## <chr> <chr> <dbl>
## 1 fair blue 19
## 2 gold yellow 112
## 3 white, blue red 33
## 4 white yellow 41.9
## 5 light brown 19
## 6 light blue 52
## 7 light blue 47
## 8 white, red red NA
## 9 light brown 24
## 10 fair blue-gray 57
## # … with 77 more rows
summarise() returns a new data set. It can summarise by grouping and return one or more rows based on the grouping variables. The following commmand summarises the starwars dataset by species and generates the mean mass for each species.
starwars %>% group_by(species) %>% summarise(mean=mean(mass))
## # A tibble: 38 × 2
## species mean
## <chr> <dbl>
## 1 Aleena 15
## 2 Besalisk 102
## 3 Cerean 82
## 4 Chagrian NA
## 5 Clawdite 55
## 6 Droid NA
## 7 Dug 40
## 8 Ewok 20
## 9 Geonosian 80
## 10 Gungan NA
## # … with 28 more rows