Setting up Visualisation in R Environment

Load required library packages

# Install and Load libraries
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.3     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
# install.packages("maps")
library(maps)
## Warning: package 'maps' was built under R version 4.1.2
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
# View(map_data)
# Loading datasets
data("diamonds")
View(diamonds)
dim(diamonds)
## [1] 53940    10
str(diamonds)
## tibble [53,940 x 10] (S3: tbl_df/tbl/data.frame)
##  $ carat  : num [1:53940] 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
##  $ cut    : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
##  $ color  : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
##  $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
##  $ depth  : num [1:53940] 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
##  $ table  : num [1:53940] 55 61 65 58 58 57 57 55 61 61 ...
##  $ price  : int [1:53940] 326 326 327 334 335 336 336 337 337 338 ...
##  $ x      : num [1:53940] 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
##  $ y      : num [1:53940] 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
##  $ z      : num [1:53940] 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
# Explore ggplot2 dataset (mpg)
data(mpg)
View(mpg)
dim(mpg)
## [1] 234  11
str(mpg)
## tibble [234 x 11] (S3: tbl_df/tbl/data.frame)
##  $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
##  $ model       : chr [1:234] "a4" "a4" "a4" "a4" ...
##  $ displ       : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr [1:234] "f" "f" "f" "f" ...
##  $ cty         : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr [1:234] "p" "p" "p" "p" ...
##  $ class       : chr [1:234] "compact" "compact" "compact" "compact" ...
mpg$model
##   [1] "a4"                     "a4"                     "a4"                    
##   [4] "a4"                     "a4"                     "a4"                    
##   [7] "a4"                     "a4 quattro"             "a4 quattro"            
##  [10] "a4 quattro"             "a4 quattro"             "a4 quattro"            
##  [13] "a4 quattro"             "a4 quattro"             "a4 quattro"            
##  [16] "a6 quattro"             "a6 quattro"             "a6 quattro"            
##  [19] "c1500 suburban 2wd"     "c1500 suburban 2wd"     "c1500 suburban 2wd"    
##  [22] "c1500 suburban 2wd"     "c1500 suburban 2wd"     "corvette"              
##  [25] "corvette"               "corvette"               "corvette"              
##  [28] "corvette"               "k1500 tahoe 4wd"        "k1500 tahoe 4wd"       
##  [31] "k1500 tahoe 4wd"        "k1500 tahoe 4wd"        "malibu"                
##  [34] "malibu"                 "malibu"                 "malibu"                
##  [37] "malibu"                 "caravan 2wd"            "caravan 2wd"           
##  [40] "caravan 2wd"            "caravan 2wd"            "caravan 2wd"           
##  [43] "caravan 2wd"            "caravan 2wd"            "caravan 2wd"           
##  [46] "caravan 2wd"            "caravan 2wd"            "caravan 2wd"           
##  [49] "dakota pickup 4wd"      "dakota pickup 4wd"      "dakota pickup 4wd"     
##  [52] "dakota pickup 4wd"      "dakota pickup 4wd"      "dakota pickup 4wd"     
##  [55] "dakota pickup 4wd"      "dakota pickup 4wd"      "dakota pickup 4wd"     
##  [58] "durango 4wd"            "durango 4wd"            "durango 4wd"           
##  [61] "durango 4wd"            "durango 4wd"            "durango 4wd"           
##  [64] "durango 4wd"            "ram 1500 pickup 4wd"    "ram 1500 pickup 4wd"   
##  [67] "ram 1500 pickup 4wd"    "ram 1500 pickup 4wd"    "ram 1500 pickup 4wd"   
##  [70] "ram 1500 pickup 4wd"    "ram 1500 pickup 4wd"    "ram 1500 pickup 4wd"   
##  [73] "ram 1500 pickup 4wd"    "ram 1500 pickup 4wd"    "expedition 2wd"        
##  [76] "expedition 2wd"         "expedition 2wd"         "explorer 4wd"          
##  [79] "explorer 4wd"           "explorer 4wd"           "explorer 4wd"          
##  [82] "explorer 4wd"           "explorer 4wd"           "f150 pickup 4wd"       
##  [85] "f150 pickup 4wd"        "f150 pickup 4wd"        "f150 pickup 4wd"       
##  [88] "f150 pickup 4wd"        "f150 pickup 4wd"        "f150 pickup 4wd"       
##  [91] "mustang"                "mustang"                "mustang"               
##  [94] "mustang"                "mustang"                "mustang"               
##  [97] "mustang"                "mustang"                "mustang"               
## [100] "civic"                  "civic"                  "civic"                 
## [103] "civic"                  "civic"                  "civic"                 
## [106] "civic"                  "civic"                  "civic"                 
## [109] "sonata"                 "sonata"                 "sonata"                
## [112] "sonata"                 "sonata"                 "sonata"                
## [115] "sonata"                 "tiburon"                "tiburon"               
## [118] "tiburon"                "tiburon"                "tiburon"               
## [121] "tiburon"                "tiburon"                "grand cherokee 4wd"    
## [124] "grand cherokee 4wd"     "grand cherokee 4wd"     "grand cherokee 4wd"    
## [127] "grand cherokee 4wd"     "grand cherokee 4wd"     "grand cherokee 4wd"    
## [130] "grand cherokee 4wd"     "range rover"            "range rover"           
## [133] "range rover"            "range rover"            "navigator 2wd"         
## [136] "navigator 2wd"          "navigator 2wd"          "mountaineer 4wd"       
## [139] "mountaineer 4wd"        "mountaineer 4wd"        "mountaineer 4wd"       
## [142] "altima"                 "altima"                 "altima"                
## [145] "altima"                 "altima"                 "altima"                
## [148] "maxima"                 "maxima"                 "maxima"                
## [151] "pathfinder 4wd"         "pathfinder 4wd"         "pathfinder 4wd"        
## [154] "pathfinder 4wd"         "grand prix"             "grand prix"            
## [157] "grand prix"             "grand prix"             "grand prix"            
## [160] "forester awd"           "forester awd"           "forester awd"          
## [163] "forester awd"           "forester awd"           "forester awd"          
## [166] "impreza awd"            "impreza awd"            "impreza awd"           
## [169] "impreza awd"            "impreza awd"            "impreza awd"           
## [172] "impreza awd"            "impreza awd"            "4runner 4wd"           
## [175] "4runner 4wd"            "4runner 4wd"            "4runner 4wd"           
## [178] "4runner 4wd"            "4runner 4wd"            "camry"                 
## [181] "camry"                  "camry"                  "camry"                 
## [184] "camry"                  "camry"                  "camry"                 
## [187] "camry solara"           "camry solara"           "camry solara"          
## [190] "camry solara"           "camry solara"           "camry solara"          
## [193] "camry solara"           "corolla"                "corolla"               
## [196] "corolla"                "corolla"                "corolla"               
## [199] "land cruiser wagon 4wd" "land cruiser wagon 4wd" "toyota tacoma 4wd"     
## [202] "toyota tacoma 4wd"      "toyota tacoma 4wd"      "toyota tacoma 4wd"     
## [205] "toyota tacoma 4wd"      "toyota tacoma 4wd"      "toyota tacoma 4wd"     
## [208] "gti"                    "gti"                    "gti"                   
## [211] "gti"                    "gti"                    "jetta"                 
## [214] "jetta"                  "jetta"                  "jetta"                 
## [217] "jetta"                  "jetta"                  "jetta"                 
## [220] "jetta"                  "jetta"                  "new beetle"            
## [223] "new beetle"             "new beetle"             "new beetle"            
## [226] "new beetle"             "new beetle"             "passat"                
## [229] "passat"                 "passat"                 "passat"                
## [232] "passat"                 "passat"                 "passat"
# Counting number of unique models from mpg dataset
df <- c(mpg$model)
df_uniq <- unique(df)
length(df_uniq)
## [1] 38

Creating ggplot from mpg dataset

# Format: ggplot(data = <DATA>) + <GEOM_FUNCTION>(mapping = aes(<MAPPINGS>))
# Scatter plot of displacement vs highway
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy))

# Assign plot to a variable
# Plotting template: ggplot(data = <DATA>, mapping = aes(<MAPPINGS>)) +  <GEOM_FUNCTION>()
mpg_data <- ggplot(data = mpg,
                       mapping = aes(x = displ, y = hwy))

# Draw the plot
mpg_data +
    geom_point()

# Scatter plot of highway vs cylinders
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = hwy, y = cyl))

# Adding aesthetic - color to the class variable to show class of each car
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy, color = class))

# Adding aesthetic - color to the transmission variable to show trans of each car
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy, color = trans))

# Mapping to aesthetic using city miles per gallon variable

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy, size = cty))

# Mapping class to size aesthetic variable

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy, size = class))
## Warning: Using size for a discrete variable is not advised.

# Mapping class to alpha aesthetic variable

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy, alpha = class))
## Warning: Using alpha for a discrete variable is not advised.

# Mapping class to shape aesthetic variable

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy, shape = class))
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).

# Setting color aesthetic property manually

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy), color = "blue")

# Setting color aesthetic property manually

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy, size = cyl), color = "blue")

# Setting stroke aesthetic property manually

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy, stroke = cyl), color = "blue")

# Applying Facets to a plot using a single variable - facet_wrap()
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
               facet_wrap(~ class, nrow = 2)

# Facet a plot on a combination of two variables - facet_grid()
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
               facet_grid(drv ~ cyl)

# If prefer not to Facet a plot in the rows or columns dimension, use (. ~)
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
               facet_grid(. ~ cyl)

# If prefer not to Facet a plot in the rows or columns dimension, use (. ~)
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
               facet_grid(drv ~ .)

#* Geometric Objects
#* geom_point
ggplot(data = mpg) + 
  geom_smooth(mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#* Geometric Objects
#* geom_line
ggplot(data = mpg) + 
  geom_smooth(mapping = aes(x = displ, y = hwy, linetype = drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Geom_smooth 

par(mfrow=c(1,3))

ggplot(data = mpg) + 
  geom_smooth(mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = mpg) + 
  geom_smooth(mapping = aes(x = displ, y = hwy, group = drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = mpg) + 
  geom_smooth(mapping = aes(x = displ, y = hwy, color = drv), show.legend = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Multiple geoms on the same plot
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
  geom_smooth(mapping = aes(x = displ, y = hwy, group = drv)) +
  geom_smooth(mapping = aes(x = displ, y = hwy, color = drv), show.legend = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Multiple geoms on the same plot
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
  geom_smooth(mapping = aes(x = displ, y = hwy)) 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Cleaner representation of Multiple geoms on the same plot
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Cleaner representation of Multiple geoms on the same plot

ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
  geom_point(mapping = aes(color = class)) +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Using geom_smooth to display a subset of the dataset
# Local argument in geom_smooth overrides global data in ggplot() for this layer only
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
  geom_point(mapping = aes(color = class)) +
  geom_smooth(data = filter(mpg, class == "subcompact"), 
  show.legend = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Using geom_smooth to display a subset of the dataset
# Local argument in geom_smooth overrides global data in ggplot() for this layer only
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
  geom_point(mapping = aes(color = class)) +
  geom_smooth(data = filter(mpg, class == "compact"), 
  show.legend = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Using geom_smooth to display a subset of the dataset
# Local argument in geom_smooth overrides global data in ggplot() for this layer only
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
  geom_point(mapping = aes(color = class)) +
  geom_smooth(data = filter(mpg, class == "pickup"), 
  se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Cleaner representation of Multiple geoms on the same plot
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
  geom_line() +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) +
  geom_point() +
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Statistical Transformations

# Using Geom_bar for a Histogram plot
# Plot of diamond cuts
ggplot(data = diamonds) +
  geom_bar(mapping = aes(x = cut))  

# Number of unique colors in diamonds dataset
# Counting number of unique models from diamonds dataset
df <- c(diamonds$color)
df_uniq <- unique(df)
length(df_uniq)
## [1] 7
# Using Geom_bar for a Histogram plot
# Plot of diamond colors (7)
ggplot(data = diamonds) +
  geom_bar(mapping = aes(x = color))  

# Using stat_count() instead of Geom_bar() for a Histogram plot
# Plot of diamond colors (7)
ggplot(data = diamonds) +
  stat_count(mapping = aes(x = color))  

# Changing the settings for default geom_bar() for specific plots
simiTest <- tribble(~a, ~b, "bar_1", 20, "bar_2", 30, "bar_3", 40)
simiTest
## # A tibble: 3 x 2
##   a         b
##   <chr> <dbl>
## 1 bar_1    20
## 2 bar_2    30
## 3 bar_3    40
ggplot(data = simiTest) +
  geom_bar(mapping = aes(x = a, y = b), stat = "identity")

# Overriding default mapping for aesthetics
# Variable diamonds$cut
ggplot(data = diamonds) +
  geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1))

# Overriding default mapping for aesthetics
# Variable diamonds$color
ggplot(data = diamonds) +
  geom_bar(mapping = aes(x = color, y = ..prop.., group = 1))

# Computing Statistical Summary
# Variable diamonds$cut
ggplot(data = diamonds) +
  stat_summary(mapping = aes(x = cut, y = depth), 
               fun.min = min, fun.max = max, fun = median)

 #Computing Statistical Summary
# Variable diamonds$color
ggplot(data = diamonds) +
  stat_summary(mapping = aes(x = color, y = price), 
               fun.min = min, fun.max = max, fun = median)

par(mfrow=c(1,2))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1))

ggplot(data = diamonds) +
geom_bar(
mapping = aes(x = cut, fill = color, y = ..prop.., group = 1)
)

# Position Adjustment for Barcharts
# Color Aesthetic or fill 
par(mfrow=c(1,2))
ggplot(data = diamonds) +
  geom_bar(mapping = aes(x = cut, color = cut))

ggplot(data = diamonds) +
  geom_bar(mapping = aes(x = cut, fill = cut))

# Mapping barchart on clarity

par(mfrow=c(1,2))
ggplot(data = diamonds) +
  geom_bar(mapping = aes(x = cut, color = clarity))

ggplot(data = diamonds) +
  geom_bar(mapping = aes(x = cut, fill = clarity))

# Applying position = "identity" to place each object on the graph
# Setting Alpha to a small value 
ggplot(data = diamonds,
mapping = aes(x = cut, fill = clarity)) +
geom_bar(alpha = 1/5, position = "identity")

# Applying position = "identity" to place each object on the graph
# Setting fill = NA
ggplot(data = diamonds,
mapping = aes(x = cut, color = clarity)) +
geom_bar(fill = NA, position = "identity")

# position = "fill" for barcharts
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity), position = "fill")

# Position = "dodge" places overlapping objects on barcharts side by side
# Easier to compare individual values
par(mfrow=c(1,2))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = carat, fill = color), position = "dodge")

ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity), position = "dodge")

# Scatterplot position = "jitter" adds a small amount of random noise to points
# mpg dataset
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), position = "jitter")

# Flipping x - y axis using coord_flip()
par(mfrow=c(1,2))
ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
geom_boxplot()

ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
geom_boxplot() +
coord_flip()

# Plotting spartial data using coord_quickmap() 
# sets the aspect ratio correctly for maps (New zealand)
par(mfrow=c(1,2))
nz <- map_data("nz")
ggplot(nz, aes(long, lat, group = group)) +
geom_polygon(fill = "white", color = "black")

ggplot(nz, aes(long, lat, group = group)) +
geom_polygon(fill = "white", color = "black") +
coord_quickmap()

# Plotting spartial data using coord_quickmap() 
# sets the aspect ratio correctly for maps (United States)
par(mfrow=c(1,2))
usa <- map_data("usa")
ggplot(usa, aes(long, lat, group = group)) +
geom_polygon(fill = "white", color = "black")

ggplot(usa, aes(long, lat, group = group)) +
geom_polygon(fill = "white", color = "black") +
coord_quickmap()

# Plotting spartial data using coord_polar() for polar coordinates 
# connection between a bar chart and a Coxcomb chart

bar <- ggplot(data = diamonds) +
geom_bar(
mapping = aes(x = cut, fill = cut),
show.legend = FALSE,
width = 1
) +
theme(aspect.ratio = 1) +
labs(x = NULL, y = NULL)
bar + coord_flip()

bar + coord_polar()

save.image(file = "C:/Users/lutta/MA5800/GGPLOT2/MA5800_Wk3/R4DS.RData")

Data Visualisation (Week 3 Lecture)

MyGraph <- ggplot(data = mpg)

Adjusting Aesthetics

# Rescaling hwy dataset to [0,1]
hwy_rescaled <- (mpg$hwy - min(mpg$hwy))/(max(mpg$hwy) - min(mpg$hwy))
hwy_rescaled
##   [1] 0.53125 0.53125 0.59375 0.56250 0.43750 0.43750 0.46875 0.43750 0.40625
##  [10] 0.50000 0.46875 0.40625 0.40625 0.40625 0.40625 0.37500 0.40625 0.34375
##  [19] 0.25000 0.09375 0.25000 0.15625 0.15625 0.43750 0.34375 0.43750 0.40625
##  [28] 0.37500 0.21875 0.06250 0.09375 0.15625 0.46875 0.56250 0.43750 0.53125
##  [37] 0.43750 0.37500 0.37500 0.31250 0.31250 0.37500 0.37500 0.15625 0.31250
##  [46] 0.28125 0.34375 0.34375 0.21875 0.18750 0.15625 0.15625 0.21875 0.21875
##  [55] 0.00000 0.15625 0.09375 0.15625 0.15625 0.00000 0.15625 0.12500 0.18750
##  [64] 0.09375 0.12500 0.00000 0.15625 0.15625 0.12500 0.00000 0.09375 0.12500
##  [73] 0.15625 0.09375 0.15625 0.15625 0.18750 0.15625 0.21875 0.15625 0.21875
##  [82] 0.21875 0.15625 0.15625 0.15625 0.12500 0.12500 0.15625 0.09375 0.15625
##  [91] 0.43750 0.40625 0.43750 0.37500 0.28125 0.31250 0.34375 0.31250 0.25000
## [100] 0.65625 0.62500 0.62500 0.53125 0.62500 0.68750 0.75000 0.75000 0.53125
## [109] 0.43750 0.46875 0.56250 0.59375 0.43750 0.43750 0.50000 0.43750 0.53125
## [118] 0.50000 0.46875 0.37500 0.37500 0.37500 0.31250 0.21875 0.25000 0.15625
## [127] 0.00000 0.21875 0.18750 0.06250 0.09375 0.18750 0.18750 0.09375 0.15625
## [136] 0.12500 0.18750 0.15625 0.21875 0.21875 0.15625 0.53125 0.46875 0.59375
## [145] 0.62500 0.46875 0.43750 0.43750 0.40625 0.40625 0.15625 0.15625 0.25000
## [154] 0.18750 0.43750 0.43750 0.46875 0.50000 0.40625 0.40625 0.37500 0.46875
## [163] 0.40625 0.43750 0.34375 0.43750 0.43750 0.43750 0.43750 0.40625 0.46875
## [172] 0.40625 0.46875 0.25000 0.25000 0.21875 0.15625 0.25000 0.15625 0.53125
## [181] 0.46875 0.59375 0.59375 0.43750 0.43750 0.50000 0.46875 0.53125 0.59375
## [190] 0.59375 0.43750 0.43750 0.46875 0.56250 0.65625 0.71875 0.78125 0.71875
## [199] 0.09375 0.18750 0.25000 0.25000 0.31250 0.15625 0.21875 0.18750 0.25000
## [208] 0.53125 0.43750 0.53125 0.53125 0.37500 1.00000 0.53125 0.43750 0.53125
## [217] 0.53125 0.53125 0.53125 0.34375 0.37500 1.00000 0.90625 0.53125 0.43750
## [226] 0.50000 0.53125 0.53125 0.53125 0.50000 0.53125 0.43750 0.43750 0.43750
hwy_rescaled <- (mpg$hwy-min(mpg$hwy))/(max(mpg$hwy)-min(mpg$hwy)) # Rescaling hwy into [0,1]

displ_rescaled <- (mpg$displ-min(mpg$displ))/(max(mpg$displ)-min(mpg$displ)) # Same for displ

ggplot(data=mpg) + geom_point(mapping=aes(x=displ, y=hwy, size=displ_rescaled*hwy_rescaled))

# Including discrete variable cly, which stands for the number of cylinders in the car, as a transparency aesthetic (alpha).
ggplot(data = mpg) +

    geom_point(mapping=aes(x=displ, y=hwy, size=displ_rescaled*hwy_rescaled, alpha=cyl))

#  Adding car type (variable class in the mpg dataset) as an additional aesthetic.
# Aesthetic color is used
ggplot(data = mpg) + geom_point(mapping=aes(x=displ, y=hwy, colour = class), size = 4)

# Using shape Aesthetics
ggplot(data = mpg) + geom_point(mapping=aes(x=displ, y=hwy, shape = class), size = 4)
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).

# Adding Jitter to superposed points
ggplot(data = mpg) +

    geom_point(mapping = aes(x=displ, y=hwy, size=displ_rescaled*hwy_rescaled, alpha=cyl), position="jitter")

# Introducing text labels and color
ggplot(data = mpg) + geom_text(mapping=aes(x=displ, y=hwy, label = cyl, colour = class))

# Introducing text labels and color
ggplot(data = mpg) + geom_text(mapping=aes(x=displ, y=hwy, label = class, colour = cyl))

# Use of Facets to produce a collection of subfigures
MyGraph <- ggplot(data=mpg) + geom_point(mapping=aes(x=displ,y=hwy), colour="red", shape=15)
MyGraph + facet_grid(class ~ drv)

# Additional facets
 MyGraph + facet_wrap(c("trans"), ncol = 1)

# Additional Facets
MyGraph + facet_grid(trans ~ .)

# One dimensional sequence of panels (avoids displaying empty panels)
 MyGraph + facet_wrap(c("class", "drv"), ncol = 4)

# Using Facet_wrap for one or more variables in mpg
 MyGraph + facet_wrap(c("trans"), nrow = 1)

# Facets for a single variable using facet_grid()
MyGraph + facet_grid(. ~ trans)

Other Types of Visual Objects

# Fitted model
MyGraph + geom_smooth(mapping = aes(x=displ,y=hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Linear fitted model
# Setting the confidence interval (se = FALSE)
MyGraph + geom_smooth(mapping = aes(x=displ,y=hwy), method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

# Single Geometric Objects across multiple data records
MyGraph + geom_smooth(mapping = aes(x=displ, y=hwy, group = drv), 
                      method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Using the ‘mdsr’ package from R

# Installing mdsr package 
# install.packages("mdsr")
library(mdsr)
## Warning: package 'mdsr' was built under R version 4.1.2
# Viewing CIACountries from mdsr dataset
View(CIACountries)
# Barplot for Categorical variable "Internet Users" from CIACountries
ggplot(data = CIACountries) + geom_bar(mapping = aes(x = net_users))

# Using Sample_n function from dplyr, mdsr and tidyverse packages
# Viewing a sample size = 30 out of 236 records
CIACountries_Sample <- sample_n(CIACountries, size = 30)
CIACountries_Sample
##                   country        pop     area oil_prod   gdp educ    roadways
## 1                Honduras    8746673   112090        0  4900   NA  0.13151931
## 2               Mauritius    1339827     2040        0 19500  3.5  1.05343137
## 3                  Israel    8049314    20770      390 33700  5.6  0.89388541
## 4              Seychelles      92430      455        0 26300  3.6  1.11648352
## 5                   Nepal   31551305   147181        0  2500  4.7  0.07367799
## 6                 Finland    5476922   338145        0 41100  6.8  0.23067027
## 7                  Latvia    1986705    64589        0 24700  5.0  1.12155321
## 8          Cayman Islands      56092      264        0 43800   NA  2.97348485
## 9                   Tonga     106501      747        0  5100  3.9  0.91030790
## 10                 Taiwan   23415126    35980      159 46800   NA  1.15272374
## 11             Bangladesh  168957745   143998     4000  3600  2.2  0.14770344
## 12                  Macau     592731       28        0 98200  2.7 14.75000000
## 13  Sao Tome and Principe     194006      964        0  3200  9.5  0.33195021
## 14               Malaysia   30513848   329847   597500 26300  5.9  0.43778782
## 15                  China 1367485388  9596960  4189000 14100   NA  0.42788414
## 16 British Virgin Islands      33454      151        0 42300  4.4  1.32450331
## 17                  Haiti   10110019    27750        0  1800   NA  0.15372973
## 18                  Japan  126919659   377915     4666 38100  3.8  3.20244235
## 19                  India 1251695584  3287263   767600  6200  3.2  1.42667076
## 20                 Russia  142423773 17098242 10840000 25400  4.1  0.07505959
## 21                  Kenya   45925301   580367        0  3200  6.7  0.27720046
## 22               Barbados     290604      430     1000 16600  5.6  3.72093023
## 23              Argentina   43431886  2780400   532100 22600  6.3  0.08321608
## 24             Uzbekistan   29199942   447400    64810  6100   NA  0.19333035
## 25            Gambia, The    1967709    11295        0  1600  4.1  0.33111996
## 26             Montenegro     647073    13812        0 16100   NA  0.56204749
## 27                  Syria   17064854   185180    22660  5100  5.1  0.37732477
## 28                  Chile   17508260   756102     6666 23500  4.5  0.10284856
## 29                   Laos    6911544   236800        0  5300  2.8  0.16709459
## 30                   Chad   11631456  1284000   103400  2600  2.3  0.03115265
##    net_users
## 1       >15%
## 2        >5%
## 3       >60%
## 4       >35%
## 5        >5%
## 6       >60%
## 7       >60%
## 8       >60%
## 9       >35%
## 10      >60%
## 11       >5%
## 12      >35%
## 13      >15%
## 14      >35%
## 15      >35%
## 16       >5%
## 17       >5%
## 18      >60%
## 19      >15%
## 20      >35%
## 21      >35%
## 22      >60%
## 23      >35%
## 24      >35%
## 25       >5%
## 26      >35%
## 27      >15%
## 28      >60%
## 29       >0%
## 30       >0%
# Reorder sample data based on country & population using reorder() function
ordered_countries <- reorder(CIACountries_Sample$country, CIACountries_Sample$pop)
ordered_countries
##  [1] Honduras               Mauritius              Israel                
##  [4] Seychelles             Nepal                  Finland               
##  [7] Latvia                 Cayman Islands         Tonga                 
## [10] Taiwan                 Bangladesh             Macau                 
## [13] Sao Tome and Principe  Malaysia               China                 
## [16] British Virgin Islands Haiti                  Japan                 
## [19] India                  Russia                 Kenya                 
## [22] Barbados               Argentina              Uzbekistan            
## [25] Gambia, The            Montenegro             Syria                 
## [28] Chile                  Laos                   Chad                  
## attr(,"scores")
##              Argentina             Bangladesh               Barbados 
##               43431886              168957745                 290604 
## British Virgin Islands         Cayman Islands                   Chad 
##                  33454                  56092               11631456 
##                  Chile                  China                Finland 
##               17508260             1367485388                5476922 
##            Gambia, The                  Haiti               Honduras 
##                1967709               10110019                8746673 
##                  India                 Israel                  Japan 
##             1251695584                8049314              126919659 
##                  Kenya                   Laos                 Latvia 
##               45925301                6911544                1986705 
##                  Macau               Malaysia              Mauritius 
##                 592731               30513848                1339827 
##             Montenegro                  Nepal                 Russia 
##                 647073               31551305              142423773 
##  Sao Tome and Principe             Seychelles                  Syria 
##                 194006                  92430               17064854 
##                 Taiwan                  Tonga             Uzbekistan 
##               23415126                 106501               29199942 
## 30 Levels: British Virgin Islands Cayman Islands Seychelles ... China
# Plotting CIACountries_Sample Bar Chat
G <- ggplot(data = CIACountries_Sample) + geom_bar(mapping = aes(x = ordered_countries, y = pop), stat = "identity") + coord_flip()
G

# Using ggplot2 feature to produce similar bar plot as above
CIACountries_Sample <- sample_n(CIACountries, size = 30) # Another Sample

ordered_countries <- reorder(CIACountries_Sample$country,CIACountries_Sample$pop)

G <- G %+% CIACountries_Sample # Update the data mapped to graph G
G

# Representation of Color as a categorical variable in Diamond dataset
ggplot(data = diamonds) + geom_bar(mapping = aes(x = color, fill = color))

# Representation of cut
ggplot(data = diamonds) + geom_bar(mapping = aes(x = color, fill = cut))

# Including proportions in the cuts bar plot
ggplot(data=diamonds) + geom_bar(mapping=aes(x=color, fill=cut), position="fill")

# Placing bar plots side by side using dodge feature
ggplot(data=diamonds) + geom_bar(mapping=aes(x=color, fill=cut), position="dodge")

# Plotting continous variables using Histogram
ggplot(data = diamonds, mapping = aes(price)) + geom_histogram(binwidth = 200, colour = "black", fill = "white")

# Applying Histogram (Continous variable) to CIACountries dataset - population
ggplot(data = CIACountries, mapping = aes(pop)) + geom_histogram(bins = 50, colour = "black")

# Viewing CIACountries (Pop) on a log scale
ggplot(data = CIACountries, mapping = aes(pop)) + geom_histogram(bins = 50, colour = "black") + scale_x_log10()

Applying density plot instead of a histogram

# Density plots for continous variables instead of bins for histograms
ggplot(data = CIACountries, mapping = aes(pop)) + geom_density() + scale_x_log10()

# Setting adjust values =2 in geometric density plots
ggplot(data = CIACountries, mapping = aes(pop)) + geom_density(adjust = 2) + scale_x_log10()

# Setting adjust values = 0.2 in geometric density plots
ggplot(data = CIACountries, mapping = aes(pop)) + geom_density(adjust = 0.2) + scale_x_log10()

Visualising Datasets using Boxplots

# Visualising Diamonds dataset (carat ~ color) variables
ggplot(data = diamonds, mapping = aes(x = color, y = carat)) + geom_boxplot()

# Diamond Boxplot based on clarity ~ carat variables
ggplot(data = diamonds, mapping = aes(x = clarity, y = carat)) + geom_boxplot()

# Diamond Boxplot based on cut ~ carat variables
ggplot(data = diamonds, mapping = aes(x = cut, y = carat)) + geom_boxplot()

# Confounding Variables in Diamonds dataset
# Variable carat (weight) vs price 
ggplot(data = diamonds) +geom_point( mapping = aes(x = price, y = carat))  + geom_smooth(mapping = aes(x=price, y=carat, colour = cut), se=TRUE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# Use of geom_jitter on Diamonds dataset
ggplot(data = diamonds, mapping = aes(x = clarity, y = carat)) + geom_boxplot(outlier.color = "red", outlier.shape = 3) + geom_jitter(width = 0.1, alpha = 0.05, color = "blue")