data folder inside
first_project folder. NOT materials
folderstarwars in data folder.ggpubr package by
install.packages("ggpubr")RColorBrewer package by
install.packages("RColorBrewer")week_1_materials.Rmd in
materials folder.data-visualization.pdf in pics
folder, which is under materials folderFollow the Intruction and run
week1_materials.Rmdsucsessful
Publishing week1_materials.html to your
rpub account
Open the new working file, name week1_homework.R,
practicing writing r codes, look at the rcodes in block of
week1_materials.Rmd. I suggest copy all r block to
week1_homework.R, then working on
week1_homework.R
library(dplyr)
library(ggpubr)
library(ggplot2)
library(RColorBrewer)
library(reactable)
library(reactablefmtr)
library(htmltools)
library(fontawesome)
library(DT)
library(klippy) We are using library dplyr and data name
starwars for data manipulation. starwars
stored in data folder. Before you run the
r_codes, remember call library(dplyr)
starwars from data folder.starwars <- read.csv("../data/starwars.csv")
starwarshead(starwars)
# 1. Read data `starwars` from data folder.
starwars <- read.csv("../data/starwars.csv")
# 2. Print the first 6 rows of `starwars`
head(starwars)## name height mass hair_color skin_color eye_color birth_year
## 1 Luke Skywalker 172 77 blond fair blue 19.0
## 2 C-3PO 167 75 <NA> gold yellow 112.0
## 3 R2-D2 96 32 <NA> white, blue red 33.0
## 4 Darth Vader 202 136 none white yellow 41.9
## 5 Leia Organa 150 49 brown light brown 19.0
## 6 Owen Lars 178 120 brown, grey light blue 52.0
## sex gender homeworld species
## 1 male masculine Tatooine Human
## 2 none masculine Tatooine Droid
## 3 none masculine Naboo Droid
## 4 male masculine Tatooine Human
## 5 female feminine Alderaan Human
## 6 male masculine Tatooine Human
## films
## 1 The Empire Strikes Back, Revenge of the Sith, Return of the Jedi, A New Hope, The Force Awakens
## 2 The Empire Strikes Back, Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, A New Hope
## 3 The Empire Strikes Back, Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, A New Hope, The Force Awakens
## 4 The Empire Strikes Back, Revenge of the Sith, Return of the Jedi, A New Hope
## 5 The Empire Strikes Back, Revenge of the Sith, Return of the Jedi, A New Hope, The Force Awakens
## 6 Attack of the Clones, Revenge of the Sith, A New Hope
## vehicles starships
## 1 Snowspeeder, Imperial Speeder Bike X-wing, Imperial shuttle
## 2
## 3
## 4 TIE Advanced x1
## 5 Imperial Speeder Bike
## 6
glimpse()starwars.dplyr::glimpse(starwars)
starwars.summary(starwars)
starwars.colnames(starwars)
# 1. Checking structure of `starwars`. you can you use str(starwars)
dplyr::glimpse(starwars)## Rows: 87
## Columns: 14
## $ name <chr> "Luke Skywalker", "C-3PO", "R2-D2", "Darth Vader", "Leia Or…
## $ height <int> 172, 167, 96, 202, 150, 178, 165, 97, 183, 182, 188, 180, 2…
## $ mass <dbl> 77.0, 75.0, 32.0, 136.0, 49.0, 120.0, 75.0, 32.0, 84.0, 77.…
## $ hair_color <chr> "blond", NA, NA, "none", "brown", "brown, grey", "brown", N…
## $ skin_color <chr> "fair", "gold", "white, blue", "white", "light", "light", "…
## $ eye_color <chr> "blue", "yellow", "red", "yellow", "brown", "blue", "blue",…
## $ birth_year <dbl> 19.0, 112.0, 33.0, 41.9, 19.0, 52.0, 47.0, NA, 24.0, 57.0, …
## $ sex <chr> "male", "none", "none", "male", "female", "male", "female",…
## $ gender <chr> "masculine", "masculine", "masculine", "masculine", "femini…
## $ homeworld <chr> "Tatooine", "Tatooine", "Naboo", "Tatooine", "Alderaan", "T…
## $ species <chr> "Human", "Droid", "Droid", "Human", "Human", "Human", "Huma…
## $ films <chr> "The Empire Strikes Back, Revenge of the Sith, Return of th…
## $ vehicles <chr> "Snowspeeder, Imperial Speeder Bike", "", "", "", "Imperial…
## $ starships <chr> "X-wing, Imperial shuttle", "", "", "TIE Advanced x1", "", …
# 2. Checking summary of `starwars`.
summary(starwars)## name height mass hair_color
## Length:87 Min. : 66.0 Min. : 15.00 Length:87
## Class :character 1st Qu.:167.0 1st Qu.: 55.60 Class :character
## Mode :character Median :180.0 Median : 79.00 Mode :character
## Mean :174.4 Mean : 97.31
## 3rd Qu.:191.0 3rd Qu.: 84.50
## Max. :264.0 Max. :1358.00
## NA's :6 NA's :28
## skin_color eye_color birth_year sex
## Length:87 Length:87 Min. : 8.00 Length:87
## Class :character Class :character 1st Qu.: 35.00 Class :character
## Mode :character Mode :character Median : 52.00 Mode :character
## Mean : 87.57
## 3rd Qu.: 72.00
## Max. :896.00
## NA's :44
## gender homeworld species films
## Length:87 Length:87 Length:87 Length:87
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## vehicles starships
## Length:87 Length:87
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
# 3. List of column names of `starwars`.
colnames(starwars)## [1] "name" "height" "mass" "hair_color" "skin_color"
## [6] "eye_color" "birth_year" "sex" "gender" "homeworld"
## [11] "species" "films" "vehicles" "starships"
select()starwars[c(1:4), c('name', 'homeworld')]
dplyr packagestarwars %>% slice(1:4) %>% dplyr::select(name, homeworld)
starwars %>% slice(1:4) %>% dplyr::select(1, 9)
starwars %>% slice(1:4) %>% dplyr::select(-name)
starwars %>% slice(1:4) %>% dplyr::select(contains("_"))
starwars %>% slice(1:4) %>% dplyr::select(starts_with("s"))
# Select the firs 4 rows and columns name, homeworld with R base
starwars[c(1:4), c('name', 'homeworld')]## name homeworld
## 1 Luke Skywalker Tatooine
## 2 C-3PO Tatooine
## 3 R2-D2 Naboo
## 4 Darth Vader Tatooine
# Select the firs 4 rows and columns name, homeworld with R base using index
starwars[c(1:4), c(1, 9)]## name gender
## 1 Luke Skywalker masculine
## 2 C-3PO masculine
## 3 R2-D2 masculine
## 4 Darth Vader masculine
# using dplyr
# Select the firs 4 rows and columns name, homeworld
starwars %>% slice(1:4) %>% dplyr::select(name, homeworld)## name homeworld
## 1 Luke Skywalker Tatooine
## 2 C-3PO Tatooine
## 3 R2-D2 Naboo
## 4 Darth Vader Tatooine
# Select the firs 4 rows and columns name, homeworld with index
starwars %>% slice(1:4) %>% dplyr::select(1, 9)## name gender
## 1 Luke Skywalker masculine
## 2 C-3PO masculine
## 3 R2-D2 masculine
## 4 Darth Vader masculine
# Select the firs 4 rows and choose all columns BUT name
starwars %>% slice(1:4) %>% dplyr::select(-name)## height mass hair_color skin_color eye_color birth_year sex gender
## 1 172 77 blond fair blue 19.0 male masculine
## 2 167 75 <NA> gold yellow 112.0 none masculine
## 3 96 32 <NA> white, blue red 33.0 none masculine
## 4 202 136 none white yellow 41.9 male masculine
## homeworld species
## 1 Tatooine Human
## 2 Tatooine Droid
## 3 Naboo Droid
## 4 Tatooine Human
## films
## 1 The Empire Strikes Back, Revenge of the Sith, Return of the Jedi, A New Hope, The Force Awakens
## 2 The Empire Strikes Back, Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, A New Hope
## 3 The Empire Strikes Back, Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, A New Hope, The Force Awakens
## 4 The Empire Strikes Back, Revenge of the Sith, Return of the Jedi, A New Hope
## vehicles starships
## 1 Snowspeeder, Imperial Speeder Bike X-wing, Imperial shuttle
## 2
## 3
## 4 TIE Advanced x1
# Select the firs 4 rows and choose only columns containing an underscore
starwars %>% slice(1:4) %>% dplyr::select(contains("_"))## hair_color skin_color eye_color birth_year
## 1 blond fair blue 19.0
## 2 <NA> gold yellow 112.0
## 3 <NA> white, blue red 33.0
## 4 none white yellow 41.9
# Select the firs 4 rows and choose only columns beginning with "s"
starwars %>% slice(1:4) %>% dplyr::select(starts_with("s"))## skin_color sex species starships
## 1 fair male Human X-wing, Imperial shuttle
## 2 gold none Droid
## 3 white, blue none Droid
## 4 white male Human TIE Advanced x1
filter()starwars %>% filter(mass > mean(mass, na.rm = TRUE)) %>% dplyr::select(name,mass, homeworld)
filter(starwars, species == "Human") %>% dplyr::select(name, species, homeworld)
filter(starwars, hair_color == "none" & eye_color == "black") %>% dplyr::select(name, hair_color,homeworld, eye_color )
starwars %>% filter(mass >1000 & height > 150) %>% dplyr::select(name, hair_color,homeworld, eye_color )
starwars %>% group_by(gender) %>% filter(mass > mean(mass, na.rm = TRUE)) %>% dplyr::select(gender, mass, name, hair_color, eye_color )
# 1. Select columns name, mass, homeworld and filter only characters with their Weight greater than the average of the Weight for all characters using dplyr
starwars %>% filter(mass > mean(mass, na.rm = TRUE)) %>% dplyr::select(name,mass, homeworld)## name mass homeworld
## 1 Darth Vader 136 Tatooine
## 2 Owen Lars 120 Tatooine
## 3 Chewbacca 112 Kashyyyk
## 4 Jabba Desilijic Tiure 1358 Nal Hutta
## 5 Jek Tono Porkins 110 Bestine IV
## 6 IG-88 140 <NA>
## 7 Bossk 113 Trandosha
## 8 Dexter Jettster 102 Ojom
## 9 Grievous 159 Kalee
## 10 Tarfful 136 Kashyyyk
# 2. Select columns name, species, homeworld and and filter only characters with species == "Human"
filter(starwars, species == "Human") %>% dplyr::select(name, species, homeworld)## name species homeworld
## 1 Luke Skywalker Human Tatooine
## 2 Darth Vader Human Tatooine
## 3 Leia Organa Human Alderaan
## 4 Owen Lars Human Tatooine
## 5 Beru Whitesun lars Human Tatooine
## 6 Biggs Darklighter Human Tatooine
## 7 Obi-Wan Kenobi Human Stewjon
## 8 Anakin Skywalker Human Tatooine
## 9 Wilhuff Tarkin Human Eriadu
## 10 Han Solo Human Corellia
## 11 Wedge Antilles Human Corellia
## 12 Jek Tono Porkins Human Bestine IV
## 13 Palpatine Human Naboo
## 14 Boba Fett Human Kamino
## 15 Lando Calrissian Human Socorro
## 16 Lobot Human Bespin
## 17 Mon Mothma Human Chandrila
## 18 Arvel Crynyd Human <NA>
## 19 Qui-Gon Jinn Human <NA>
## 20 Finis Valorum Human Coruscant
## 21 Shmi Skywalker Human Tatooine
## 22 Mace Windu Human Haruun Kal
## 23 Gregar Typho Human Naboo
## 24 Cordé Human Naboo
## 25 Cliegg Lars Human Tatooine
## 26 Dormé Human Naboo
## 27 Dooku Human Serenno
## 28 Bail Prestor Organa Human Alderaan
## 29 Jango Fett Human Concord Dawn
## 30 Jocasta Nu Human Coruscant
## 31 Raymus Antilles Human Alderaan
## 32 Finn Human <NA>
## 33 Rey Human <NA>
## 34 Poe Dameron Human <NA>
## 35 Padmé Amidala Human Naboo
# 3. Select columns name, hair_color, homeworld, eye_color and and filter only characters with hair_color is "none" and eye_color is "black"
filter(starwars, hair_color == "none" & eye_color == "black") %>% dplyr::select(name, hair_color,homeworld, eye_color )## name hair_color homeworld eye_color
## 1 Nien Nunb none Sullust black
## 2 Gasgano none Troiken black
## 3 Kit Fisto none Glee Anselm black
## 4 Plo Koon none Dorin black
## 5 Lama Su none Kamino black
## 6 Taun We none Kamino black
## 7 Shaak Ti none Shili black
## 8 Tion Medon none Utapau black
## 9 BB8 none <NA> black
# 4. Select columns name, hair_color, homeworld, eye_color and and filter only characters with weight greater than 1000 and height greater than 150
starwars %>% filter(mass >1000 & height > 150) %>% dplyr::select(name, hair_color,homeworld, eye_color )## name hair_color homeworld eye_color
## 1 Jabba Desilijic Tiure <NA> Nal Hutta orange
# 5. Within group Gender, select columns gender, mass, name, hair_color, eye_color eye_color and filter only characters with their Weight greater than the average of the Weight for all characters.
starwars %>% group_by(gender) %>% filter(mass > mean(mass, na.rm = TRUE)) %>% dplyr::select(gender, mass, name, hair_color, eye_color )## # A tibble: 14 × 5
## # Groups: gender [2]
## gender mass name hair_color eye_color
## <chr> <dbl> <chr> <chr> <chr>
## 1 masculine 136 Darth Vader none yellow
## 2 masculine 120 Owen Lars brown, grey blue
## 3 feminine 75 Beru Whitesun lars brown blue
## 4 masculine 112 Chewbacca brown blue
## 5 masculine 1358 Jabba Desilijic Tiure <NA> orange
## 6 masculine 110 Jek Tono Porkins brown blue
## 7 masculine 140 IG-88 none red
## 8 masculine 113 Bossk none red
## 9 feminine 55 Ayla Secura none hazel
## 10 feminine 56.2 Luminara Unduli black blue
## 11 feminine 55 Zam Wesell blonde yellow
## 12 feminine 57 Shaak Ti none black
## 13 masculine 159 Grievous none green, yellow
## 14 masculine 136 Tarfful brown blue
mutate()starwars %>% select(name, mass, species) %>% mutate(mass_norm = mass / mean(mass, na.rm = TRUE))
# without group
# Select variable name, mass, species. Create a new variable name mass_norm, which use mass divide by mean of mass
starwars %>%
select(name, mass, species) %>%
mutate(mass_norm = mass / mean(mass, na.rm = TRUE))## name mass species mass_norm
## 1 Luke Skywalker 77.0 Human 0.7912704
## 2 C-3PO 75.0 Droid 0.7707179
## 3 R2-D2 32.0 Droid 0.3288397
## 4 Darth Vader 136.0 Human 1.3975685
## 5 Leia Organa 49.0 Human 0.5035357
## 6 Owen Lars 120.0 Human 1.2331487
## 7 Beru Whitesun lars 75.0 Human 0.7707179
## 8 R5-D4 32.0 Droid 0.3288397
## 9 Biggs Darklighter 84.0 Human 0.8632041
## 10 Obi-Wan Kenobi 77.0 Human 0.7912704
## 11 Anakin Skywalker 84.0 Human 0.8632041
## 12 Wilhuff Tarkin NA Human NA
## 13 Chewbacca 112.0 Wookiee 1.1509388
## 14 Han Solo 80.0 Human 0.8220991
## 15 Greedo 74.0 Rodian 0.7604417
## 16 Jabba Desilijic Tiure 1358.0 Hutt 13.9551329
## 17 Wedge Antilles 77.0 Human 0.7912704
## 18 Jek Tono Porkins 110.0 Human 1.1303863
## 19 Yoda 17.0 Yoda's species 0.1746961
## 20 Palpatine 75.0 Human 0.7707179
## 21 Boba Fett 78.2 Human 0.8036019
## 22 IG-88 140.0 Droid 1.4386735
## 23 Bossk 113.0 Trandoshan 1.1612150
## 24 Lando Calrissian 79.0 Human 0.8118229
## 25 Lobot 79.0 Human 0.8118229
## 26 Ackbar 83.0 Mon Calamari 0.8529279
## 27 Mon Mothma NA Human NA
## 28 Arvel Crynyd NA Human NA
## 29 Wicket Systri Warrick 20.0 Ewok 0.2055248
## 30 Nien Nunb 68.0 Sullustan 0.6987843
## 31 Qui-Gon Jinn 89.0 Human 0.9145853
## 32 Nute Gunray 90.0 Neimodian 0.9248615
## 33 Finis Valorum NA Human NA
## 34 Jar Jar Binks 66.0 Gungan 0.6782318
## 35 Roos Tarpals 82.0 Gungan 0.8426516
## 36 Rugor Nass NA Gungan NA
## 37 Ric Olié NA <NA> NA
## 38 Watto NA Toydarian NA
## 39 Sebulba 40.0 Dug 0.4110496
## 40 Quarsh Panaka NA <NA> NA
## 41 Shmi Skywalker NA Human NA
## 42 Darth Maul 80.0 Zabrak 0.8220991
## 43 Bib Fortuna NA Twi'lek NA
## 44 Ayla Secura 55.0 Twi'lek 0.5651932
## 45 Dud Bolt 45.0 Vulptereen 0.4624308
## 46 Gasgano NA Xexto NA
## 47 Ben Quadinaros 65.0 Toong 0.6679556
## 48 Mace Windu 84.0 Human 0.8632041
## 49 Ki-Adi-Mundi 82.0 Cerean 0.8426516
## 50 Kit Fisto 87.0 Nautolan 0.8940328
## 51 Eeth Koth NA Zabrak NA
## 52 Adi Gallia 50.0 Tholothian 0.5138120
## 53 Saesee Tiin NA Iktotchi NA
## 54 Yarael Poof NA Quermian NA
## 55 Plo Koon 80.0 Kel Dor 0.8220991
## 56 Mas Amedda NA Chagrian NA
## 57 Gregar Typho 85.0 Human 0.8734803
## 58 Cordé NA Human NA
## 59 Cliegg Lars NA Human NA
## 60 Poggle the Lesser 80.0 Geonosian 0.8220991
## 61 Luminara Unduli 56.2 Mirialan 0.5775246
## 62 Barriss Offee 50.0 Mirialan 0.5138120
## 63 Dormé NA Human NA
## 64 Dooku 80.0 Human 0.8220991
## 65 Bail Prestor Organa NA Human NA
## 66 Jango Fett 79.0 Human 0.8118229
## 67 Zam Wesell 55.0 Clawdite 0.5651932
## 68 Dexter Jettster 102.0 Besalisk 1.0481764
## 69 Lama Su 88.0 Kaminoan 0.9043091
## 70 Taun We NA Kaminoan NA
## 71 Jocasta Nu NA Human NA
## 72 Ratts Tyerell 15.0 Aleena 0.1541436
## 73 R4-P17 NA Droid NA
## 74 Wat Tambor 48.0 Skakoan 0.4932595
## 75 San Hill NA Muun NA
## 76 Shaak Ti 57.0 Togruta 0.5857456
## 77 Grievous 159.0 Kaleesh 1.6339220
## 78 Tarfful 136.0 Wookiee 1.3975685
## 79 Raymus Antilles 79.0 Human 0.8118229
## 80 Sly Moore 48.0 <NA> 0.4932595
## 81 Tion Medon 80.0 Pau'an 0.8220991
## 82 Finn NA Human NA
## 83 Rey NA Human NA
## 84 Poe Dameron NA Human NA
## 85 BB8 NA Droid NA
## 86 Captain Phasma NA <NA> NA
## 87 Padmé Amidala 45.0 Human 0.4624308
starwars %>% select(name, height, mass, homeworld, species) %>% group_by(species) %>% mutate(mass_norm = mass / mean(mass, na.rm = TRUE))
# Select variable name, height, mass, homeworld, species. Create a new variable name mass_norm within group of species, which use mass divide by mean of mass
starwars %>%
select(name, height, mass, homeworld, species) %>%
group_by(species) %>%
mutate(mass_norm = mass / mean(mass, na.rm = TRUE))## # A tibble: 87 × 6
## # Groups: species [38]
## name height mass homeworld species mass_norm
## <chr> <int> <dbl> <chr> <chr> <dbl>
## 1 Luke Skywalker 172 77 Tatooine Human 0.930
## 2 C-3PO 167 75 Tatooine Droid 1.08
## 3 R2-D2 96 32 Naboo Droid 0.459
## 4 Darth Vader 202 136 Tatooine Human 1.64
## 5 Leia Organa 150 49 Alderaan Human 0.592
## 6 Owen Lars 178 120 Tatooine Human 1.45
## 7 Beru Whitesun lars 165 75 Tatooine Human 0.906
## 8 R5-D4 97 32 Tatooine Droid 0.459
## 9 Biggs Darklighter 183 84 Tatooine Human 1.01
## 10 Obi-Wan Kenobi 182 77 Stewjon Human 0.930
## # ℹ 77 more rows
starwars %>% select(name, height, mass, homeworld) %>% mutate( mass2 = mass * 2, mass2_squared = mass2 * mass2)
# Select variable name, height, mass, homeworld. Create a variable mass2 by mass x 2 then mass2 x mass2
starwars %>%
select(name, height, mass, homeworld) %>%
mutate(
mass2 = mass * 2,
mass2_squared = mass2 * mass2
)## name height mass homeworld mass2 mass2_squared
## 1 Luke Skywalker 172 77.0 Tatooine 154.0 23716.00
## 2 C-3PO 167 75.0 Tatooine 150.0 22500.00
## 3 R2-D2 96 32.0 Naboo 64.0 4096.00
## 4 Darth Vader 202 136.0 Tatooine 272.0 73984.00
## 5 Leia Organa 150 49.0 Alderaan 98.0 9604.00
## 6 Owen Lars 178 120.0 Tatooine 240.0 57600.00
## 7 Beru Whitesun lars 165 75.0 Tatooine 150.0 22500.00
## 8 R5-D4 97 32.0 Tatooine 64.0 4096.00
## 9 Biggs Darklighter 183 84.0 Tatooine 168.0 28224.00
## 10 Obi-Wan Kenobi 182 77.0 Stewjon 154.0 23716.00
## 11 Anakin Skywalker 188 84.0 Tatooine 168.0 28224.00
## 12 Wilhuff Tarkin 180 NA Eriadu NA NA
## 13 Chewbacca 228 112.0 Kashyyyk 224.0 50176.00
## 14 Han Solo 180 80.0 Corellia 160.0 25600.00
## 15 Greedo 173 74.0 Rodia 148.0 21904.00
## 16 Jabba Desilijic Tiure 175 1358.0 Nal Hutta 2716.0 7376656.00
## 17 Wedge Antilles 170 77.0 Corellia 154.0 23716.00
## 18 Jek Tono Porkins 180 110.0 Bestine IV 220.0 48400.00
## 19 Yoda 66 17.0 <NA> 34.0 1156.00
## 20 Palpatine 170 75.0 Naboo 150.0 22500.00
## 21 Boba Fett 183 78.2 Kamino 156.4 24460.96
## 22 IG-88 200 140.0 <NA> 280.0 78400.00
## 23 Bossk 190 113.0 Trandosha 226.0 51076.00
## 24 Lando Calrissian 177 79.0 Socorro 158.0 24964.00
## 25 Lobot 175 79.0 Bespin 158.0 24964.00
## 26 Ackbar 180 83.0 Mon Cala 166.0 27556.00
## 27 Mon Mothma 150 NA Chandrila NA NA
## 28 Arvel Crynyd NA NA <NA> NA NA
## 29 Wicket Systri Warrick 88 20.0 Endor 40.0 1600.00
## 30 Nien Nunb 160 68.0 Sullust 136.0 18496.00
## 31 Qui-Gon Jinn 193 89.0 <NA> 178.0 31684.00
## 32 Nute Gunray 191 90.0 Cato Neimoidia 180.0 32400.00
## 33 Finis Valorum 170 NA Coruscant NA NA
## 34 Jar Jar Binks 196 66.0 Naboo 132.0 17424.00
## 35 Roos Tarpals 224 82.0 Naboo 164.0 26896.00
## 36 Rugor Nass 206 NA Naboo NA NA
## 37 Ric Olié 183 NA Naboo NA NA
## 38 Watto 137 NA Toydaria NA NA
## 39 Sebulba 112 40.0 Malastare 80.0 6400.00
## 40 Quarsh Panaka 183 NA Naboo NA NA
## 41 Shmi Skywalker 163 NA Tatooine NA NA
## 42 Darth Maul 175 80.0 Dathomir 160.0 25600.00
## 43 Bib Fortuna 180 NA Ryloth NA NA
## 44 Ayla Secura 178 55.0 Ryloth 110.0 12100.00
## 45 Dud Bolt 94 45.0 Vulpter 90.0 8100.00
## 46 Gasgano 122 NA Troiken NA NA
## 47 Ben Quadinaros 163 65.0 Tund 130.0 16900.00
## 48 Mace Windu 188 84.0 Haruun Kal 168.0 28224.00
## 49 Ki-Adi-Mundi 198 82.0 Cerea 164.0 26896.00
## 50 Kit Fisto 196 87.0 Glee Anselm 174.0 30276.00
## 51 Eeth Koth 171 NA Iridonia NA NA
## 52 Adi Gallia 184 50.0 Coruscant 100.0 10000.00
## 53 Saesee Tiin 188 NA Iktotch NA NA
## 54 Yarael Poof 264 NA Quermia NA NA
## 55 Plo Koon 188 80.0 Dorin 160.0 25600.00
## 56 Mas Amedda 196 NA Champala NA NA
## 57 Gregar Typho 185 85.0 Naboo 170.0 28900.00
## 58 Cordé 157 NA Naboo NA NA
## 59 Cliegg Lars 183 NA Tatooine NA NA
## 60 Poggle the Lesser 183 80.0 Geonosis 160.0 25600.00
## 61 Luminara Unduli 170 56.2 Mirial 112.4 12633.76
## 62 Barriss Offee 166 50.0 Mirial 100.0 10000.00
## 63 Dormé 165 NA Naboo NA NA
## 64 Dooku 193 80.0 Serenno 160.0 25600.00
## 65 Bail Prestor Organa 191 NA Alderaan NA NA
## 66 Jango Fett 183 79.0 Concord Dawn 158.0 24964.00
## 67 Zam Wesell 168 55.0 Zolan 110.0 12100.00
## 68 Dexter Jettster 198 102.0 Ojom 204.0 41616.00
## 69 Lama Su 229 88.0 Kamino 176.0 30976.00
## 70 Taun We 213 NA Kamino NA NA
## 71 Jocasta Nu 167 NA Coruscant NA NA
## 72 Ratts Tyerell 79 15.0 Aleen Minor 30.0 900.00
## 73 R4-P17 96 NA <NA> NA NA
## 74 Wat Tambor 193 48.0 Skako 96.0 9216.00
## 75 San Hill 191 NA Muunilinst NA NA
## 76 Shaak Ti 178 57.0 Shili 114.0 12996.00
## 77 Grievous 216 159.0 Kalee 318.0 101124.00
## 78 Tarfful 234 136.0 Kashyyyk 272.0 73984.00
## 79 Raymus Antilles 188 79.0 Alderaan 158.0 24964.00
## 80 Sly Moore 178 48.0 Umbara 96.0 9216.00
## 81 Tion Medon 206 80.0 Utapau 160.0 25600.00
## 82 Finn NA NA <NA> NA NA
## 83 Rey NA NA <NA> NA NA
## 84 Poe Dameron NA NA <NA> NA NA
## 85 BB8 NA NA <NA> NA NA
## 86 Captain Phasma NA NA <NA> NA NA
## 87 Padmé Amidala 165 45.0 Naboo 90.0 8100.00
starwars %>% select(name, height, mass, homeworld) %>% mutate( mass = NULL, height = height * 0.0328084 # convert to feet)
# As well as adding new variables, you can use mutate() to
# remove variables and modify existing variables.
starwars %>%
select(name, height, mass, homeworld) %>%
mutate(
mass = NULL,
height = height * 0.0328084 # convert to feet
)## name height homeworld
## 1 Luke Skywalker 5.643045 Tatooine
## 2 C-3PO 5.479003 Tatooine
## 3 R2-D2 3.149606 Naboo
## 4 Darth Vader 6.627297 Tatooine
## 5 Leia Organa 4.921260 Alderaan
## 6 Owen Lars 5.839895 Tatooine
## 7 Beru Whitesun lars 5.413386 Tatooine
## 8 R5-D4 3.182415 Tatooine
## 9 Biggs Darklighter 6.003937 Tatooine
## 10 Obi-Wan Kenobi 5.971129 Stewjon
## 11 Anakin Skywalker 6.167979 Tatooine
## 12 Wilhuff Tarkin 5.905512 Eriadu
## 13 Chewbacca 7.480315 Kashyyyk
## 14 Han Solo 5.905512 Corellia
## 15 Greedo 5.675853 Rodia
## 16 Jabba Desilijic Tiure 5.741470 Nal Hutta
## 17 Wedge Antilles 5.577428 Corellia
## 18 Jek Tono Porkins 5.905512 Bestine IV
## 19 Yoda 2.165354 <NA>
## 20 Palpatine 5.577428 Naboo
## 21 Boba Fett 6.003937 Kamino
## 22 IG-88 6.561680 <NA>
## 23 Bossk 6.233596 Trandosha
## 24 Lando Calrissian 5.807087 Socorro
## 25 Lobot 5.741470 Bespin
## 26 Ackbar 5.905512 Mon Cala
## 27 Mon Mothma 4.921260 Chandrila
## 28 Arvel Crynyd NA <NA>
## 29 Wicket Systri Warrick 2.887139 Endor
## 30 Nien Nunb 5.249344 Sullust
## 31 Qui-Gon Jinn 6.332021 <NA>
## 32 Nute Gunray 6.266404 Cato Neimoidia
## 33 Finis Valorum 5.577428 Coruscant
## 34 Jar Jar Binks 6.430446 Naboo
## 35 Roos Tarpals 7.349082 Naboo
## 36 Rugor Nass 6.758530 Naboo
## 37 Ric Olié 6.003937 Naboo
## 38 Watto 4.494751 Toydaria
## 39 Sebulba 3.674541 Malastare
## 40 Quarsh Panaka 6.003937 Naboo
## 41 Shmi Skywalker 5.347769 Tatooine
## 42 Darth Maul 5.741470 Dathomir
## 43 Bib Fortuna 5.905512 Ryloth
## 44 Ayla Secura 5.839895 Ryloth
## 45 Dud Bolt 3.083990 Vulpter
## 46 Gasgano 4.002625 Troiken
## 47 Ben Quadinaros 5.347769 Tund
## 48 Mace Windu 6.167979 Haruun Kal
## 49 Ki-Adi-Mundi 6.496063 Cerea
## 50 Kit Fisto 6.430446 Glee Anselm
## 51 Eeth Koth 5.610236 Iridonia
## 52 Adi Gallia 6.036746 Coruscant
## 53 Saesee Tiin 6.167979 Iktotch
## 54 Yarael Poof 8.661418 Quermia
## 55 Plo Koon 6.167979 Dorin
## 56 Mas Amedda 6.430446 Champala
## 57 Gregar Typho 6.069554 Naboo
## 58 Cordé 5.150919 Naboo
## 59 Cliegg Lars 6.003937 Tatooine
## 60 Poggle the Lesser 6.003937 Geonosis
## 61 Luminara Unduli 5.577428 Mirial
## 62 Barriss Offee 5.446194 Mirial
## 63 Dormé 5.413386 Naboo
## 64 Dooku 6.332021 Serenno
## 65 Bail Prestor Organa 6.266404 Alderaan
## 66 Jango Fett 6.003937 Concord Dawn
## 67 Zam Wesell 5.511811 Zolan
## 68 Dexter Jettster 6.496063 Ojom
## 69 Lama Su 7.513124 Kamino
## 70 Taun We 6.988189 Kamino
## 71 Jocasta Nu 5.479003 Coruscant
## 72 Ratts Tyerell 2.591864 Aleen Minor
## 73 R4-P17 3.149606 <NA>
## 74 Wat Tambor 6.332021 Skako
## 75 San Hill 6.266404 Muunilinst
## 76 Shaak Ti 5.839895 Shili
## 77 Grievous 7.086614 Kalee
## 78 Tarfful 7.677166 Kashyyyk
## 79 Raymus Antilles 6.167979 Alderaan
## 80 Sly Moore 5.839895 Umbara
## 81 Tion Medon 6.758530 Utapau
## 82 Finn NA <NA>
## 83 Rey NA <NA>
## 84 Poe Dameron NA <NA>
## 85 BB8 NA <NA>
## 86 Captain Phasma NA <NA>
## 87 Padmé Amidala 5.413386 Naboo
starwars %>% select(name, homeworld, species) %>% mutate(across(!name, as.factor))
# Use across() with mutate() to apply a transformation
# to multiple columns in a tibble.
starwars %>%
select(name, homeworld, species) %>%
mutate(across(!name, as.factor))## name homeworld species
## 1 Luke Skywalker Tatooine Human
## 2 C-3PO Tatooine Droid
## 3 R2-D2 Naboo Droid
## 4 Darth Vader Tatooine Human
## 5 Leia Organa Alderaan Human
## 6 Owen Lars Tatooine Human
## 7 Beru Whitesun lars Tatooine Human
## 8 R5-D4 Tatooine Droid
## 9 Biggs Darklighter Tatooine Human
## 10 Obi-Wan Kenobi Stewjon Human
## 11 Anakin Skywalker Tatooine Human
## 12 Wilhuff Tarkin Eriadu Human
## 13 Chewbacca Kashyyyk Wookiee
## 14 Han Solo Corellia Human
## 15 Greedo Rodia Rodian
## 16 Jabba Desilijic Tiure Nal Hutta Hutt
## 17 Wedge Antilles Corellia Human
## 18 Jek Tono Porkins Bestine IV Human
## 19 Yoda <NA> Yoda's species
## 20 Palpatine Naboo Human
## 21 Boba Fett Kamino Human
## 22 IG-88 <NA> Droid
## 23 Bossk Trandosha Trandoshan
## 24 Lando Calrissian Socorro Human
## 25 Lobot Bespin Human
## 26 Ackbar Mon Cala Mon Calamari
## 27 Mon Mothma Chandrila Human
## 28 Arvel Crynyd <NA> Human
## 29 Wicket Systri Warrick Endor Ewok
## 30 Nien Nunb Sullust Sullustan
## 31 Qui-Gon Jinn <NA> Human
## 32 Nute Gunray Cato Neimoidia Neimodian
## 33 Finis Valorum Coruscant Human
## 34 Jar Jar Binks Naboo Gungan
## 35 Roos Tarpals Naboo Gungan
## 36 Rugor Nass Naboo Gungan
## 37 Ric Olié Naboo <NA>
## 38 Watto Toydaria Toydarian
## 39 Sebulba Malastare Dug
## 40 Quarsh Panaka Naboo <NA>
## 41 Shmi Skywalker Tatooine Human
## 42 Darth Maul Dathomir Zabrak
## 43 Bib Fortuna Ryloth Twi'lek
## 44 Ayla Secura Ryloth Twi'lek
## 45 Dud Bolt Vulpter Vulptereen
## 46 Gasgano Troiken Xexto
## 47 Ben Quadinaros Tund Toong
## 48 Mace Windu Haruun Kal Human
## 49 Ki-Adi-Mundi Cerea Cerean
## 50 Kit Fisto Glee Anselm Nautolan
## 51 Eeth Koth Iridonia Zabrak
## 52 Adi Gallia Coruscant Tholothian
## 53 Saesee Tiin Iktotch Iktotchi
## 54 Yarael Poof Quermia Quermian
## 55 Plo Koon Dorin Kel Dor
## 56 Mas Amedda Champala Chagrian
## 57 Gregar Typho Naboo Human
## 58 Cordé Naboo Human
## 59 Cliegg Lars Tatooine Human
## 60 Poggle the Lesser Geonosis Geonosian
## 61 Luminara Unduli Mirial Mirialan
## 62 Barriss Offee Mirial Mirialan
## 63 Dormé Naboo Human
## 64 Dooku Serenno Human
## 65 Bail Prestor Organa Alderaan Human
## 66 Jango Fett Concord Dawn Human
## 67 Zam Wesell Zolan Clawdite
## 68 Dexter Jettster Ojom Besalisk
## 69 Lama Su Kamino Kaminoan
## 70 Taun We Kamino Kaminoan
## 71 Jocasta Nu Coruscant Human
## 72 Ratts Tyerell Aleen Minor Aleena
## 73 R4-P17 <NA> Droid
## 74 Wat Tambor Skako Skakoan
## 75 San Hill Muunilinst Muun
## 76 Shaak Ti Shili Togruta
## 77 Grievous Kalee Kaleesh
## 78 Tarfful Kashyyyk Wookiee
## 79 Raymus Antilles Alderaan Human
## 80 Sly Moore Umbara <NA>
## 81 Tion Medon Utapau Pau'an
## 82 Finn <NA> Human
## 83 Rey <NA> Human
## 84 Poe Dameron <NA> Human
## 85 BB8 <NA> Droid
## 86 Captain Phasma <NA> <NA>
## 87 Padmé Amidala Naboo Human
ggplot2 and
ggpubrmpg data from ggplot2 package, plot
disl and hwy with color by
classggplot(data = mpg, aes(x = displ, y = hwy, colour = class)) + geom_point(size = 3)
Using mpg data from ggplot2 package,
plot disl and hwy with color by
class and add color style and linear regression
line
Using mpg data from ggplot2 package,
plot disl and hwy with color by
class and facet_grid(vars(cyl)
# 1. Using `mpg` data from `ggplot2` package, plot `disl` and `hwy` with color by `class`
p <- ggplot(data = mpg, aes(x = displ, y = hwy, colour = class)) +
geom_point(size = 3)
p# 2. Using `mpg` data from `ggplot2` package, plot `disl` and `hwy` with color by `class` and add color style and linear regression line
p <- ggplot(data = mpg, aes(x = displ, y = hwy, colour = class)) +
geom_point(size = 3) +
geom_smooth(method = "lm", formula = "y ~ x", alpha = 0.3) +
scale_color_viridis_d() +
scale_fill_viridis_d() +
labs(x = "Display", y = "Highway", fill = "Class", color = "Class")
p# 3. Using `mpg` data from `ggplot2` package, plot `disl` and `hwy` with color by `class` and `facet_grid(vars(cyl)`
p <- ggplot(mpg, aes(displ, hwy, colour = class)) +
geom_point(size = 3) +
scale_color_viridis_d() +
scale_fill_viridis_d() +
facet_grid(vars(cyl), scales = "free")
pUsing mpg data from ggplot2 package,
plot the number of manufacturer by color class
Using mpg data from ggplot2 package,
plot the propotion of manufacturer by color class
# 1. Using `mpg` data from `ggplot2` package, plot the number of manufacturer by color `class`
p <- ggplot(data = mpg, aes(x = manufacturer,fill = as.character(cyl))) +
geom_bar(position = "stack") +
scale_fill_brewer(palette = "BuPu")
p# 2. Using `mpg` data from `ggplot2` package, plot the propotion of manufacturer by color `class`
p <- ggplot(data = mpg, aes(x = manufacturer,fill = as.character(cyl))) +
geom_bar(position = "fill")
p# Load data
data("mtcars")
dfm <- mtcars
# Convert the cyl variable to a factor
dfm$cyl <- as.factor(dfm$cyl)
# Add the name colums
dfm$name <- rownames(dfm)
ggbarplot(dfm, x = "name", y = "mpg",
fill = "cyl", # change fill color by cyl
color = "white", # Set bar border colors to white
palette = "jco", # jco journal color palett. see ?ggpar
sort.val = "desc", # Sort the value in dscending order
sort.by.groups = FALSE, # Don't sort inside each group
x.text.angle = 90 # Rotate vertically x axis texts
)ggbarplot(dfm, x = "name", y = "mpg",
fill = "cyl", # change fill color by cyl
color = "white", # Set bar border colors to white
palette = "jco", # jco journal color palett. see ?ggpar
sort.val = "asc", # Sort the value in dscending order
sort.by.groups = TRUE, # Sort inside each group
x.text.angle = 90 # Rotate vertically x axis texts
)wdata# Create some data format
set.seed(1234)
wdata = data.frame(
sex = factor(rep(c("F", "M"), each=200)),
weight = c(rnorm(200, 55), rnorm(200, 58)))
head(wdata, 4)## sex weight
## 1 F 53.79293
## 2 F 55.27743
## 3 F 56.08444
## 4 F 52.65430
# Density plot with mean lines and marginal rug
# Change outline and fill colors by groups ("sex")
# Use custom palette
ggdensity(data = wdata, x = "weight",
add = "mean", rug = TRUE,
color = "sex", fill = "sex",
palette = c("#00AFBB", "#E7B800"))wdata# Histogram plot with mean lines and marginal rug
# Change outline and fill colors by groups ("sex")
# Use custom color palette
gghistogram(data = wdata, x = "weight",
add = "mean", rug = TRUE,
color = "sex", fill = "sex",
palette = c("#00AFBB", "#E7B800"))ToothGrowth# Load data
data("ToothGrowth")
df <- ToothGrowth
head(df, 4)## len supp dose
## 1 4.2 VC 0.5
## 2 11.5 VC 0.5
## 3 7.3 VC 0.5
## 4 5.8 VC 0.5
# Box plots with jittered points
# Change outline colors by groups: dose
# Use custom color palette
# Add jitter points and change the shape by groups
p <- ggboxplot(df, x = "dose", y = "len",
color = "dose", palette =c("#00AFBB", "#E7B800", "#FC4E07"),
add = "jitter", shape = "dose")
p# Add p-values comparing groups
# Specify the comparisons you want
my_comparisons <- list( c("0.5", "1"), c("1", "2"), c("0.5", "2") )
p + stat_compare_means(comparisons = my_comparisons)+ # Add pairwise comparisons p-value
stat_compare_means(label.y = 50) # Violin plots with box plots inside
# Change fill color by groups: dose
# add boxplot with white fill color
ggviolin(df, x = "dose", y = "len", fill = "dose",
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
add = "boxplot", add.params = list(fill = "white"))+
stat_compare_means(comparisons = my_comparisons, label = "p.signif")+ # Add significance levels
stat_compare_means(label.y = 50) The deviation graph shows the deviation of quantitatives values to a reference value. In the R code below, we’ll plot the mpg z-score from the mtcars dataset.
Calculate the z-score of the mpg data:
# Calculate the z-score of the mpg data
dfm$mpg_z <- (dfm$mpg -mean(dfm$mpg))/sd(dfm$mpg)
dfm$mpg_grp <- factor(ifelse(dfm$mpg_z < 0, "low", "high"),
levels = c("low", "high"))
# Inspect the data
head(dfm[, c("name", "wt", "mpg", "mpg_z", "mpg_grp", "cyl")])## name wt mpg mpg_z mpg_grp cyl
## Mazda RX4 Mazda RX4 2.620 21.0 0.1508848 high 6
## Mazda RX4 Wag Mazda RX4 Wag 2.875 21.0 0.1508848 high 6
## Datsun 710 Datsun 710 2.320 22.8 0.4495434 high 4
## Hornet 4 Drive Hornet 4 Drive 3.215 21.4 0.2172534 high 6
## Hornet Sportabout Hornet Sportabout 3.440 18.7 -0.2307345 low 8
## Valiant Valiant 3.460 18.1 -0.3302874 low 6
ggbarplot(dfm, x = "name", y = "mpg_z",
fill = "mpg_grp", # change fill color by mpg_level
color = "white", # Set bar border colors to white
palette = "jco", # jco journal color palett. see ?ggpar
sort.val = "asc", # Sort the value in ascending order
sort.by.groups = FALSE, # Don't sort inside each group
x.text.angle = 90, # Rotate vertically x axis texts
ylab = "MPG z-score",
xlab = FALSE,
legend.title = "MPG Group"
)ggbarplot(dfm, x = "name", y = "mpg_z",
fill = "mpg_grp", # change fill color by mpg_level
color = "white", # Set bar border colors to white
palette = "jco", # jco journal color palett. see ?ggpar
sort.val = "desc", # Sort the value in descending order
sort.by.groups = FALSE, # Don't sort inside each group
x.text.angle = 90, # Rotate vertically x axis texts
ylab = "MPG z-score",
legend.title = "MPG Group",
rotate = TRUE,
ggtheme = theme_minimal()
)ggdotchart(dfm, x = "name", y = "mpg",
color = "cyl", # Color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"), # Custom color palette
sorting = "ascending", # Sort value in descending order
add = "segments", # Add segments from y = 0 to dots
ggtheme = theme_pubr() # ggplot2 theme
)ggdotchart(dfm, x = "name", y = "mpg",
color = "cyl", # Color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"), # Custom color palette
sorting = "descending", # Sort value in descending order
add = "segments", # Add segments from y = 0 to dots
rotate = TRUE, # Rotate vertically
group = "cyl", # Order by groups
dot.size = 6, # Large dot size
label = round(dfm$mpg), # Add mpg values as dot labels
font.label = list(color = "white", size = 9,
vjust = 0.5), # Adjust label parameters
ggtheme = theme_pubr() # ggplot2 theme
)ggdotchart(dfm, x = "name", y = "mpg_z",
color = "cyl", # Color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"), # Custom color palette
sorting = "descending", # Sort value in descending order
add = "segments", # Add segments from y = 0 to dots
add.params = list(color = "lightgray", size = 2), # Change segment color and size
group = "cyl", # Order by groups
dot.size = 6, # Large dot size
label = round(dfm$mpg_z,1), # Add mpg values as dot labels
font.label = list(color = "white", size = 9,
vjust = 0.5), # Adjust label parameters
ggtheme = theme_pubr() # ggplot2 theme
)+
geom_hline(yintercept = 0, linetype = 2, color = "lightgray")Lollipop chart is an alternative to bar plots, when you have a large set of values to visualize.
Lollipop chart colored by the grouping variable “cyl”:
ggdotchart(dfm, x = "name", y = "mpg",
color = "cyl", # Color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"), # Custom color palette
sorting = "ascending", # Sort value in descending order
add = "segments", # Add segments from y = 0 to dots
ggtheme = theme_pubr() # ggplot2 theme
)Sort in decending order. sorting = “descending”.
Rotate the plot vertically, using rotate = TRUE.
Sort the mpg value inside each group by using group = “cyl”.
Set dot.size to 6.
Add mpg values as label. label = “mpg” or label = round(dfm$mpg).
ggdotchart(dfm, x = "name", y = "mpg",
color = "cyl", # Color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"), # Custom color palette
sorting = "descending", # Sort value in descending order
add = "segments", # Add segments from y = 0 to dots
rotate = TRUE, # Rotate vertically
group = "cyl", # Order by groups
dot.size = 6, # Large dot size
label = round(dfm$mpg), # Add mpg values as dot labels
font.label = list(color = "white", size = 9,
vjust = 0.5), # Adjust label parameters
ggtheme = theme_pubr() # ggplot2 theme
)Deviation graph:
Use y = “mpg_z” Change segment color and size: add.params = list(color = “lightgray”, size = 2)
ggdotchart(dfm, x = "name", y = "mpg_z",
color = "cyl", # Color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"), # Custom color palette
sorting = "descending", # Sort value in descending order
add = "segments", # Add segments from y = 0 to dots
add.params = list(color = "lightgray", size = 2), # Change segment color and size
group = "cyl", # Order by groups
dot.size = 6, # Large dot size
label = round(dfm$mpg_z,1), # Add mpg values as dot labels
font.label = list(color = "white", size = 9,
vjust = 0.5), # Adjust label parameters
ggtheme = theme_pubr() # ggplot2 theme
)+
geom_hline(yintercept = 0, linetype = 2, color = "lightgray")Color y text by groups. Use y.text.col = TRUE.
ggdotchart(dfm, x = "name", y = "mpg",
color = "cyl", # Color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"), # Custom color palette
sorting = "descending", # Sort value in descending order
rotate = TRUE, # Rotate vertically
dot.size = 2, # Large dot size
y.text.col = TRUE, # Color y text by groups
ggtheme = theme_pubr() # ggplot2 theme
)+
theme_cleveland() # Add dashed gridsdisplay.brewer.all(colorblindFriendly = TRUE)display.brewer.all(colorblindFriendly = TRUE)