Creating ggplot from mpg dataset
# Format: ggplot(data = <DATA>) + <GEOM_FUNCTION>(mapping = aes(<MAPPINGS>))
# Scatter plot of displacement vs highway
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy))

# Assign plot to a variable
# Plotting template: ggplot(data = <DATA>, mapping = aes(<MAPPINGS>)) + <GEOM_FUNCTION>()
mpg_data <- ggplot(data = mpg,
mapping = aes(x = displ, y = hwy))
# Draw the plot
mpg_data +
geom_point()

# Scatter plot of highway vs cylinders
ggplot(data = mpg) +
geom_point(mapping = aes(x = hwy, y = cyl))

# Adding aesthetic - color to the class variable to show class of each car
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = class))

# Adding aesthetic - color to the transmission variable to show trans of each car
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = trans))

# Mapping to aesthetic using city miles per gallon variable
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, size = cty))

# Mapping class to size aesthetic variable
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, size = class))
## Warning: Using size for a discrete variable is not advised.

# Mapping class to alpha aesthetic variable
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, alpha = class))
## Warning: Using alpha for a discrete variable is not advised.

# Mapping class to shape aesthetic variable
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, shape = class))
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).

# Setting color aesthetic property manually
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), color = "blue")

# Setting color aesthetic property manually
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, size = cyl), color = "blue")

# Setting stroke aesthetic property manually
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, stroke = cyl), color = "blue")

# Applying Facets to a plot using a single variable - facet_wrap()
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap(~ class, nrow = 2)

# Facet a plot on a combination of two variables - facet_grid()
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ cyl)

# If prefer not to Facet a plot in the rows or columns dimension, use (. ~)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(. ~ cyl)

# If prefer not to Facet a plot in the rows or columns dimension, use (. ~)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ .)

#* Geometric Objects
#* geom_point
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#* Geometric Objects
#* geom_line
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy, linetype = drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Geom_smooth
par(mfrow=c(1,3))
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy, group = drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy, color = drv), show.legend = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Multiple geoms on the same plot
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
geom_smooth(mapping = aes(x = displ, y = hwy, group = drv)) +
geom_smooth(mapping = aes(x = displ, y = hwy, color = drv), show.legend = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Multiple geoms on the same plot
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
geom_smooth(mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Cleaner representation of Multiple geoms on the same plot
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Cleaner representation of Multiple geoms on the same plot
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(mapping = aes(color = class)) +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Using geom_smooth to display a subset of the dataset
# Local argument in geom_smooth overrides global data in ggplot() for this layer only
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(mapping = aes(color = class)) +
geom_smooth(data = filter(mpg, class == "subcompact"),
show.legend = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Using geom_smooth to display a subset of the dataset
# Local argument in geom_smooth overrides global data in ggplot() for this layer only
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(mapping = aes(color = class)) +
geom_smooth(data = filter(mpg, class == "compact"),
show.legend = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Using geom_smooth to display a subset of the dataset
# Local argument in geom_smooth overrides global data in ggplot() for this layer only
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(mapping = aes(color = class)) +
geom_smooth(data = filter(mpg, class == "pickup"),
se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Cleaner representation of Multiple geoms on the same plot
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_line() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) +
geom_point() +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Statistical Transformations
# Using Geom_bar for a Histogram plot
# Plot of diamond cuts
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut))

# Number of unique colors in diamonds dataset
# Counting number of unique models from diamonds dataset
df <- c(diamonds$color)
df_uniq <- unique(df)
length(df_uniq)
## [1] 7
# Using Geom_bar for a Histogram plot
# Plot of diamond colors (7)
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = color))

# Using stat_count() instead of Geom_bar() for a Histogram plot
# Plot of diamond colors (7)
ggplot(data = diamonds) +
stat_count(mapping = aes(x = color))

# Changing the settings for default geom_bar() for specific plots
simiTest <- tribble(~a, ~b, "bar_1", 20, "bar_2", 30, "bar_3", 40)
simiTest
## # A tibble: 3 x 2
## a b
## <chr> <dbl>
## 1 bar_1 20
## 2 bar_2 30
## 3 bar_3 40
ggplot(data = simiTest) +
geom_bar(mapping = aes(x = a, y = b), stat = "identity")

# Overriding default mapping for aesthetics
# Variable diamonds$cut
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1))

# Overriding default mapping for aesthetics
# Variable diamonds$color
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = color, y = ..prop.., group = 1))

# Computing Statistical Summary
# Variable diamonds$cut
ggplot(data = diamonds) +
stat_summary(mapping = aes(x = cut, y = depth),
fun.min = min, fun.max = max, fun = median)

#Computing Statistical Summary
# Variable diamonds$color
ggplot(data = diamonds) +
stat_summary(mapping = aes(x = color, y = price),
fun.min = min, fun.max = max, fun = median)

par(mfrow=c(1,2))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1))

ggplot(data = diamonds) +
geom_bar(
mapping = aes(x = cut, fill = color, y = ..prop.., group = 1)
)

# Position Adjustment for Barcharts
# Color Aesthetic or fill
par(mfrow=c(1,2))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, color = cut))

ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = cut))

# Mapping barchart on clarity
par(mfrow=c(1,2))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, color = clarity))

ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity))

# Applying position = "identity" to place each object on the graph
# Setting Alpha to a small value
ggplot(data = diamonds,
mapping = aes(x = cut, fill = clarity)) +
geom_bar(alpha = 1/5, position = "identity")

# Applying position = "identity" to place each object on the graph
# Setting fill = NA
ggplot(data = diamonds,
mapping = aes(x = cut, color = clarity)) +
geom_bar(fill = NA, position = "identity")

# position = "fill" for barcharts
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity), position = "fill")

# Position = "dodge" places overlapping objects on barcharts side by side
# Easier to compare individual values
par(mfrow=c(1,2))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = carat, fill = color), position = "dodge")

ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity), position = "dodge")

# Scatterplot position = "jitter" adds a small amount of random noise to points
# mpg dataset
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), position = "jitter")

# Flipping x - y axis using coord_flip()
par(mfrow=c(1,2))
ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
geom_boxplot()

ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
geom_boxplot() +
coord_flip()

# Plotting spartial data using coord_quickmap()
# sets the aspect ratio correctly for maps (New zealand)
par(mfrow=c(1,2))
nz <- map_data("nz")
ggplot(nz, aes(long, lat, group = group)) +
geom_polygon(fill = "white", color = "black")

ggplot(nz, aes(long, lat, group = group)) +
geom_polygon(fill = "white", color = "black") +
coord_quickmap()

# Plotting spartial data using coord_quickmap()
# sets the aspect ratio correctly for maps (United States)
par(mfrow=c(1,2))
usa <- map_data("usa")
ggplot(usa, aes(long, lat, group = group)) +
geom_polygon(fill = "white", color = "black")

ggplot(usa, aes(long, lat, group = group)) +
geom_polygon(fill = "white", color = "black") +
coord_quickmap()

# Plotting spartial data using coord_polar() for polar coordinates
# connection between a bar chart and a Coxcomb chart
bar <- ggplot(data = diamonds) +
geom_bar(
mapping = aes(x = cut, fill = cut),
show.legend = FALSE,
width = 1
) +
theme(aspect.ratio = 1) +
labs(x = NULL, y = NULL)
bar + coord_flip()

bar + coord_polar()

save.image(file = "C:/Users/lutta/MA5800/GGPLOT2/MA5800_Wk3/R4DS.RData")
Data Visualisation (Week 3 Lecture)
MyGraph <- ggplot(data = mpg)
Adjusting Aesthetics
# Rescaling hwy dataset to [0,1]
hwy_rescaled <- (mpg$hwy - min(mpg$hwy))/(max(mpg$hwy) - min(mpg$hwy))
hwy_rescaled
## [1] 0.53125 0.53125 0.59375 0.56250 0.43750 0.43750 0.46875 0.43750 0.40625
## [10] 0.50000 0.46875 0.40625 0.40625 0.40625 0.40625 0.37500 0.40625 0.34375
## [19] 0.25000 0.09375 0.25000 0.15625 0.15625 0.43750 0.34375 0.43750 0.40625
## [28] 0.37500 0.21875 0.06250 0.09375 0.15625 0.46875 0.56250 0.43750 0.53125
## [37] 0.43750 0.37500 0.37500 0.31250 0.31250 0.37500 0.37500 0.15625 0.31250
## [46] 0.28125 0.34375 0.34375 0.21875 0.18750 0.15625 0.15625 0.21875 0.21875
## [55] 0.00000 0.15625 0.09375 0.15625 0.15625 0.00000 0.15625 0.12500 0.18750
## [64] 0.09375 0.12500 0.00000 0.15625 0.15625 0.12500 0.00000 0.09375 0.12500
## [73] 0.15625 0.09375 0.15625 0.15625 0.18750 0.15625 0.21875 0.15625 0.21875
## [82] 0.21875 0.15625 0.15625 0.15625 0.12500 0.12500 0.15625 0.09375 0.15625
## [91] 0.43750 0.40625 0.43750 0.37500 0.28125 0.31250 0.34375 0.31250 0.25000
## [100] 0.65625 0.62500 0.62500 0.53125 0.62500 0.68750 0.75000 0.75000 0.53125
## [109] 0.43750 0.46875 0.56250 0.59375 0.43750 0.43750 0.50000 0.43750 0.53125
## [118] 0.50000 0.46875 0.37500 0.37500 0.37500 0.31250 0.21875 0.25000 0.15625
## [127] 0.00000 0.21875 0.18750 0.06250 0.09375 0.18750 0.18750 0.09375 0.15625
## [136] 0.12500 0.18750 0.15625 0.21875 0.21875 0.15625 0.53125 0.46875 0.59375
## [145] 0.62500 0.46875 0.43750 0.43750 0.40625 0.40625 0.15625 0.15625 0.25000
## [154] 0.18750 0.43750 0.43750 0.46875 0.50000 0.40625 0.40625 0.37500 0.46875
## [163] 0.40625 0.43750 0.34375 0.43750 0.43750 0.43750 0.43750 0.40625 0.46875
## [172] 0.40625 0.46875 0.25000 0.25000 0.21875 0.15625 0.25000 0.15625 0.53125
## [181] 0.46875 0.59375 0.59375 0.43750 0.43750 0.50000 0.46875 0.53125 0.59375
## [190] 0.59375 0.43750 0.43750 0.46875 0.56250 0.65625 0.71875 0.78125 0.71875
## [199] 0.09375 0.18750 0.25000 0.25000 0.31250 0.15625 0.21875 0.18750 0.25000
## [208] 0.53125 0.43750 0.53125 0.53125 0.37500 1.00000 0.53125 0.43750 0.53125
## [217] 0.53125 0.53125 0.53125 0.34375 0.37500 1.00000 0.90625 0.53125 0.43750
## [226] 0.50000 0.53125 0.53125 0.53125 0.50000 0.53125 0.43750 0.43750 0.43750
hwy_rescaled <- (mpg$hwy-min(mpg$hwy))/(max(mpg$hwy)-min(mpg$hwy)) # Rescaling hwy into [0,1]
displ_rescaled <- (mpg$displ-min(mpg$displ))/(max(mpg$displ)-min(mpg$displ)) # Same for displ
ggplot(data=mpg) + geom_point(mapping=aes(x=displ, y=hwy, size=displ_rescaled*hwy_rescaled))

# Including discrete variable cly, which stands for the number of cylinders in the car, as a transparency aesthetic (alpha).
ggplot(data = mpg) +
geom_point(mapping=aes(x=displ, y=hwy, size=displ_rescaled*hwy_rescaled, alpha=cyl))

# Adding car type (variable class in the mpg dataset) as an additional aesthetic.
# Aesthetic color is used
ggplot(data = mpg) + geom_point(mapping=aes(x=displ, y=hwy, colour = class), size = 4)

# Using shape Aesthetics
ggplot(data = mpg) + geom_point(mapping=aes(x=displ, y=hwy, shape = class), size = 4)
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).

# Adding Jitter to superposed points
ggplot(data = mpg) +
geom_point(mapping = aes(x=displ, y=hwy, size=displ_rescaled*hwy_rescaled, alpha=cyl), position="jitter")

# Introducing text labels and color
ggplot(data = mpg) + geom_text(mapping=aes(x=displ, y=hwy, label = cyl, colour = class))

# Introducing text labels and color
ggplot(data = mpg) + geom_text(mapping=aes(x=displ, y=hwy, label = class, colour = cyl))

# Use of Facets to produce a collection of subfigures
MyGraph <- ggplot(data=mpg) + geom_point(mapping=aes(x=displ,y=hwy), colour="red", shape=15)
MyGraph + facet_grid(class ~ drv)

# Additional facets
MyGraph + facet_wrap(c("trans"), ncol = 1)

# Additional Facets
MyGraph + facet_grid(trans ~ .)

# One dimensional sequence of panels (avoids displaying empty panels)
MyGraph + facet_wrap(c("class", "drv"), ncol = 4)

# Using Facet_wrap for one or more variables in mpg
MyGraph + facet_wrap(c("trans"), nrow = 1)

# Facets for a single variable using facet_grid()
MyGraph + facet_grid(. ~ trans)

Other Types of Visual Objects
# Fitted model
MyGraph + geom_smooth(mapping = aes(x=displ,y=hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Linear fitted model
# Setting the confidence interval (se = FALSE)
MyGraph + geom_smooth(mapping = aes(x=displ,y=hwy), method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

# Single Geometric Objects across multiple data records
MyGraph + geom_smooth(mapping = aes(x=displ, y=hwy, group = drv),
method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Using the ‘mdsr’ package from R
# Installing mdsr package
# install.packages("mdsr")
library(mdsr)
## Warning: package 'mdsr' was built under R version 4.1.2
# Viewing CIACountries from mdsr dataset
View(CIACountries)
# Barplot for Categorical variable "Internet Users" from CIACountries
ggplot(data = CIACountries) + geom_bar(mapping = aes(x = net_users))

# Using Sample_n function from dplyr, mdsr and tidyverse packages
# Viewing a sample size = 30 out of 236 records
CIACountries_Sample <- sample_n(CIACountries, size = 30)
CIACountries_Sample
## country pop area oil_prod gdp educ roadways
## 1 Honduras 8746673 112090 0 4900 NA 0.13151931
## 2 Mauritius 1339827 2040 0 19500 3.5 1.05343137
## 3 Israel 8049314 20770 390 33700 5.6 0.89388541
## 4 Seychelles 92430 455 0 26300 3.6 1.11648352
## 5 Nepal 31551305 147181 0 2500 4.7 0.07367799
## 6 Finland 5476922 338145 0 41100 6.8 0.23067027
## 7 Latvia 1986705 64589 0 24700 5.0 1.12155321
## 8 Cayman Islands 56092 264 0 43800 NA 2.97348485
## 9 Tonga 106501 747 0 5100 3.9 0.91030790
## 10 Taiwan 23415126 35980 159 46800 NA 1.15272374
## 11 Bangladesh 168957745 143998 4000 3600 2.2 0.14770344
## 12 Macau 592731 28 0 98200 2.7 14.75000000
## 13 Sao Tome and Principe 194006 964 0 3200 9.5 0.33195021
## 14 Malaysia 30513848 329847 597500 26300 5.9 0.43778782
## 15 China 1367485388 9596960 4189000 14100 NA 0.42788414
## 16 British Virgin Islands 33454 151 0 42300 4.4 1.32450331
## 17 Haiti 10110019 27750 0 1800 NA 0.15372973
## 18 Japan 126919659 377915 4666 38100 3.8 3.20244235
## 19 India 1251695584 3287263 767600 6200 3.2 1.42667076
## 20 Russia 142423773 17098242 10840000 25400 4.1 0.07505959
## 21 Kenya 45925301 580367 0 3200 6.7 0.27720046
## 22 Barbados 290604 430 1000 16600 5.6 3.72093023
## 23 Argentina 43431886 2780400 532100 22600 6.3 0.08321608
## 24 Uzbekistan 29199942 447400 64810 6100 NA 0.19333035
## 25 Gambia, The 1967709 11295 0 1600 4.1 0.33111996
## 26 Montenegro 647073 13812 0 16100 NA 0.56204749
## 27 Syria 17064854 185180 22660 5100 5.1 0.37732477
## 28 Chile 17508260 756102 6666 23500 4.5 0.10284856
## 29 Laos 6911544 236800 0 5300 2.8 0.16709459
## 30 Chad 11631456 1284000 103400 2600 2.3 0.03115265
## net_users
## 1 >15%
## 2 >5%
## 3 >60%
## 4 >35%
## 5 >5%
## 6 >60%
## 7 >60%
## 8 >60%
## 9 >35%
## 10 >60%
## 11 >5%
## 12 >35%
## 13 >15%
## 14 >35%
## 15 >35%
## 16 >5%
## 17 >5%
## 18 >60%
## 19 >15%
## 20 >35%
## 21 >35%
## 22 >60%
## 23 >35%
## 24 >35%
## 25 >5%
## 26 >35%
## 27 >15%
## 28 >60%
## 29 >0%
## 30 >0%
# Reorder sample data based on country & population using reorder() function
ordered_countries <- reorder(CIACountries_Sample$country, CIACountries_Sample$pop)
ordered_countries
## [1] Honduras Mauritius Israel
## [4] Seychelles Nepal Finland
## [7] Latvia Cayman Islands Tonga
## [10] Taiwan Bangladesh Macau
## [13] Sao Tome and Principe Malaysia China
## [16] British Virgin Islands Haiti Japan
## [19] India Russia Kenya
## [22] Barbados Argentina Uzbekistan
## [25] Gambia, The Montenegro Syria
## [28] Chile Laos Chad
## attr(,"scores")
## Argentina Bangladesh Barbados
## 43431886 168957745 290604
## British Virgin Islands Cayman Islands Chad
## 33454 56092 11631456
## Chile China Finland
## 17508260 1367485388 5476922
## Gambia, The Haiti Honduras
## 1967709 10110019 8746673
## India Israel Japan
## 1251695584 8049314 126919659
## Kenya Laos Latvia
## 45925301 6911544 1986705
## Macau Malaysia Mauritius
## 592731 30513848 1339827
## Montenegro Nepal Russia
## 647073 31551305 142423773
## Sao Tome and Principe Seychelles Syria
## 194006 92430 17064854
## Taiwan Tonga Uzbekistan
## 23415126 106501 29199942
## 30 Levels: British Virgin Islands Cayman Islands Seychelles ... China
# Plotting CIACountries_Sample Bar Chat
G <- ggplot(data = CIACountries_Sample) + geom_bar(mapping = aes(x = ordered_countries, y = pop), stat = "identity") + coord_flip()
G

# Using ggplot2 feature to produce similar bar plot as above
CIACountries_Sample <- sample_n(CIACountries, size = 30) # Another Sample
ordered_countries <- reorder(CIACountries_Sample$country,CIACountries_Sample$pop)
G <- G %+% CIACountries_Sample # Update the data mapped to graph G
G

# Representation of Color as a categorical variable in Diamond dataset
ggplot(data = diamonds) + geom_bar(mapping = aes(x = color, fill = color))

# Representation of cut
ggplot(data = diamonds) + geom_bar(mapping = aes(x = color, fill = cut))

# Including proportions in the cuts bar plot
ggplot(data=diamonds) + geom_bar(mapping=aes(x=color, fill=cut), position="fill")

# Placing bar plots side by side using dodge feature
ggplot(data=diamonds) + geom_bar(mapping=aes(x=color, fill=cut), position="dodge")

# Plotting continous variables using Histogram
ggplot(data = diamonds, mapping = aes(price)) + geom_histogram(binwidth = 200, colour = "black", fill = "white")

# Applying Histogram (Continous variable) to CIACountries dataset - population
ggplot(data = CIACountries, mapping = aes(pop)) + geom_histogram(bins = 50, colour = "black")

# Viewing CIACountries (Pop) on a log scale
ggplot(data = CIACountries, mapping = aes(pop)) + geom_histogram(bins = 50, colour = "black") + scale_x_log10()

Applying density plot instead of a histogram
# Density plots for continous variables instead of bins for histograms
ggplot(data = CIACountries, mapping = aes(pop)) + geom_density() + scale_x_log10()

# Setting adjust values =2 in geometric density plots
ggplot(data = CIACountries, mapping = aes(pop)) + geom_density(adjust = 2) + scale_x_log10()

# Setting adjust values = 0.2 in geometric density plots
ggplot(data = CIACountries, mapping = aes(pop)) + geom_density(adjust = 0.2) + scale_x_log10()

Visualising Datasets using Boxplots
# Visualising Diamonds dataset (carat ~ color) variables
ggplot(data = diamonds, mapping = aes(x = color, y = carat)) + geom_boxplot()

# Diamond Boxplot based on clarity ~ carat variables
ggplot(data = diamonds, mapping = aes(x = clarity, y = carat)) + geom_boxplot()

# Diamond Boxplot based on cut ~ carat variables
ggplot(data = diamonds, mapping = aes(x = cut, y = carat)) + geom_boxplot()

# Confounding Variables in Diamonds dataset
# Variable carat (weight) vs price
ggplot(data = diamonds) +geom_point( mapping = aes(x = price, y = carat)) + geom_smooth(mapping = aes(x=price, y=carat, colour = cut), se=TRUE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# Use of geom_jitter on Diamonds dataset
ggplot(data = diamonds, mapping = aes(x = clarity, y = carat)) + geom_boxplot(outlier.color = "red", outlier.shape = 3) + geom_jitter(width = 0.1, alpha = 0.05, color = "blue")
