knitr::opts_chunk$set(echo = TRUE, cache = TRUE)

if echo = false, then no code will show in output

Packages

Loading required packages:

library(ggplot2)
library(dplyr)
library(ggthemes)
library(DescTools)
library(forcats)
#install.packages("patchwork")
library(scales)
library(ggpubr)
library(esquisse)
library(patchwork)

Setting global theme

#theme_set(theme_bw())

Data

Iris data

data(iris)

Loading excel data

Student <- readxl::read_excel("student_dataset-1.xlsx")

Scatter plot

ggplot( data = iris) +
  aes(x = Sepal.Length, y = Sepal.Width ) +
  geom_point()

For species wise scatter plot

ggplot(data= iris) +
  geom_point(aes(x = Sepal.Length, y = Sepal.Width )) +
  facet_wrap(~Species)

For geom_point design

ggplot(data= iris, aes(x=Sepal.Length, y=Sepal.Width)) +
  geom_point(col= "red", size= 3, shape= 16)

For speices wise color change

ggplot(data= iris, aes(x=Sepal.Length, y=Sepal.Width, col = Species)) +
  geom_point() +
  labs( x= "Sepal Length", y= "Sepal Width", col = "Species Legend",
        title = "Scatter Plot of Sepal Length vs Width")

Histogram

binwidth

ggplot(iris, aes(x= Sepal.Length)) +
  geom_histogram(binwidth = .1, fill= "blue", col= "black")

bins number

ggplot(iris, aes(x= Sepal.Length)) +
  geom_histogram(bins = 15, fill= "blue", col= "black")

Species wise color change

ggplot(data= iris) +
  geom_histogram(aes(x= Sepal.Length, fill = Species), bins = 10, col= "black", alpha= .3)

Reduce gap between plot and axis============

ggplot(data = iris)+
  geom_histogram(aes(x= Sepal.Length, fill = Species),
                   bins = 10, col= "white", alpha= .5) +
  coord_cartesian(expand = FALSE)

ggplot(data = iris)+
  geom_histogram(aes(x=Sepal.Length, fill = Species),
                 bins = 10, col= "black", alpha= .6)+
  scale_y_continuous(
    breaks = seq(0, 40, by= 5),
    expand = expansion(
      mult = c(0, 0),                 #expand upper portion of the plot 
      add = c(0, 0)                   #increase gap at the bottom portion
    )
  )

ggplot(data = iris)+
  geom_histogram(aes(x=Sepal.Length, fill = Species),
                 bins = 10, col= "white", alpha= .6)+
  scale_y_continuous(expand = expansion(add = c(0, 5)))+
  scale_x_continuous(expand = expansion(add = c(0, 0)))

Facet

facet_wrap

ggplot(data=iris)+
  geom_histogram(aes(x=Sepal.Length, fill = Species),
                 bins = 10, col= "black", alpha =.6)+
  facet_wrap(vars(Species), ncol= 1)

ggplot(data = iris)+
  geom_histogram(aes(x=Sepal.Length, fill = Species),
                 bins = 10, col= "black", alpha= .6)+
  facet_wrap(vars(Species), ncol=1, scale= "free" )

ggplot(data = iris)+
  geom_histogram(aes(x=Sepal.Length, fill = Species),
                 bins = 10, col="black", alpha= 0.6)+
  facet_wrap(vars(Species), ncol=3, scale= "free_x")

Facet_grid

ggplot(data = iris)+
  geom_histogram(aes(x=Sepal.Length, fill = Species),
                 bins = 10, col= "black", alpha= .6)+
  facet_grid(rows= vars(Species))

ggplot(data = Student)+
  geom_histogram(aes(x=GPA, fill = Employment),
                 bins = 10, col= "black", alpha= .6)+
  facet_grid(rows= vars(Employment), cols= vars(Gender))

ggplot(data = Student)+
  geom_histogram(aes(x=GPA, fill = Employment),
                 bins = 10, col= "black", alpha= .6)+
  facet_grid(rows= vars(Employment), cols= vars(Class))

Student %>% 
  mutate(Class= factor(Class, levels = c("Freshman", "Sophomore", "Junior", "Senior"))) %>%
  summary()
##        ID            Gender               Age              Class   
##  Min.   :  1.00   Length:132         Min.   :18.00   Freshman : 4  
##  1st Qu.: 33.75   Class :character   1st Qu.:20.00   Sophomore:29  
##  Median : 66.50   Mode  :character   Median :22.00   Junior   :24  
##  Mean   : 66.50                      Mean   :21.84   Senior   :75  
##  3rd Qu.: 99.25                      3rd Qu.:24.00                 
##  Max.   :132.00                      Max.   :30.00                 
##     Major           Grad Intention          GPA         Employment       
##  Length:132         Length:132         Min.   :2.340   Length:132        
##  Class :character   Class :character   1st Qu.:2.950   Class :character  
##  Mode  :character   Mode  :character   Median :3.000   Mode  :character  
##                                        Mean   :3.016                     
##                                        3rd Qu.:3.300                     
##                                        Max.   :3.680                     
##      Salary      Social Networking  Satisfaction      Spending     
##  Min.   :30.00   Min.   : 0.000    Min.   :1.000   Min.   : 700.0  
##  1st Qu.:50.00   1st Qu.: 0.000    1st Qu.:3.000   1st Qu.: 800.0  
##  Median :60.00   Median : 6.500    Median :4.000   Median : 900.0  
##  Mean   :57.89   Mean   : 4.902    Mean   :3.523   Mean   : 952.3  
##  3rd Qu.:65.00   3rd Qu.: 8.000    3rd Qu.:4.000   3rd Qu.:1100.0  
##  Max.   :85.00   Max.   :12.000    Max.   :6.000   Max.   :1200.0  
##    Computer         Text Messages       Wealth       
##  Length:132         Min.   :  0.0   Min.   :  0.100  
##  Class :character   1st Qu.: 40.0   1st Qu.:  0.200  
##  Mode  :character   Median :300.0   Median :  1.500  
##                     Mean   :267.6   Mean   :  6.825  
##                     3rd Qu.:500.0   3rd Qu.: 10.000  
##                     Max.   :600.0   Max.   :100.000
Student %>% 
  mutate(Class= factor(Class, levels = c("Freshman", "Sophomore", "Junior", "Senior"))) %>%
  ggplot()+
  geom_histogram(aes(x=GPA, fill = Employment),
                 bins = 10, col= "black", alpha= .6)+
  facet_grid(rows= vars(Employment), cols= vars(Class))

Theme======

Build in Theme

ggplot(data=iris)+
  geom_histogram(aes(x=Sepal.Length, fill = Species),
                 bins = 10, col= "black", alpha =.6)+
  facet_wrap(vars(Species), ncol= 1)+
  theme_minimal()

Other pre-build theme (ggthemes)

ggplot(data=iris)+
  geom_histogram(aes(x=Sepal.Length, fill = Species),
                 bins = 10, col= "black", alpha =.6)+
  facet_wrap(vars(Species), ncol= 1)+
  theme_calc()

ggthemeAssist

p1 <- ggplot(data=iris)+
  geom_histogram(aes(x=Sepal.Length, fill = Species),
                 bins = 10, col= "black", alpha =.6)+
  facet_wrap(vars(Species), ncol= 1)+
  labs(
    title = "Histogram of Sepal Length by Species",
    x = "Sepal Length",
    y = "Frequency",
    fill = "Species Legend",
    subtitle = "Using Facet and other customizations",
    caption = "Data : Iris"
  )

Manually changing color

ggplot(data=iris)+
  geom_histogram(aes(x=Sepal.Length, fill = Species),
                 bins = 10, col= "black", alpha =.6)+
  facet_wrap(vars(Species), ncol= 1)+
  scale_fill_manual(values = c("setosa"= "tomato", "versicolor"="black", "virginica" ="skyblue"))

ggplot(data=iris)+
  geom_histogram(aes(x=Sepal.Length, fill = Species),
                 bins = 10, col= "black", alpha =1)+
  facet_wrap(vars(Species), ncol= 1)+
  scale_fill_brewer(palette = "Set1")

Density plot

ggplot(data= iris)+
  geom_density(aes( x= Sepal.Length, fill = Species), alpha= 0.5)

ggplot(data = iris)+
  geom_density(aes(x= Sepal.Length, fill = Species), alpha= .5)+
  facet_wrap(vars(Species), ncol=1)

Histogram + Density

ggplot(data= iris, aes(x= Sepal.Length, fill = Species))+
  geom_density(alpha= .6, color= "white")+
  geom_histogram(aes(y= after_stat(density), alpha = .6, bins = 10))+
  facet_wrap(vars(Species), ncol= 1)
## Warning in geom_histogram(aes(y = after_stat(density), alpha = 0.6, bins =
## 10)): Ignoring unknown aesthetics: bins
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

ggplot(data = iris, aes(x= Sepal.Length))+
  geom_histogram(aes(y= after_stat(density)), colour = "white", fill= "blue", alpha= .8)+
  geom_density(color= "black", fill= "green", alpha= .5)+
  geom_vline(aes(xintercept = mean(Sepal.Length)),
             color= "black", size= 1, linetype = "dashed")+
  labs(
    title = "Distribution of Sepal Length",
    subtitle = "Made by ggplot2",
    caption = "Source: Iris Data",
    x = "Sepal Length",
    y = "Density")+
  theme_classic()+
theme(
  plot.title = element_text(color = "blue", size = 15, face = "bold"),
  plot.subtitle = element_text(size = 10),
  plot.caption = element_text(face = "italic"))+
  annotate("text", x=5.9, y= .75,
           label = paste0("Mode: ",
  round(DescTools::Mode(iris$Sepal.Length),1 )), hjust =0)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

bar_plot

when in “computer” column who use “desktop”, “tablet”, “laptop” (Charecter) are given on this following type

ggplot(Student)+
geom_bar(aes(x= Computer)) 

ggplot(Student) +
  geom_bar(aes(x= Computer)) + 
  coord_flip() +                  # aftr fliping axis will change
  labs( y= "Number of Students")

ggplot(Student) +
  geom_bar(aes(y= Computer))   # no coord_flip is reqiued

geom_col plot

when destop=50, laptop= 100, tablet= 15 are given on this following type. Y= value are given. (y argument must be needed)

Student %>%
  count(Computer) %>%
  ggplot() +
  geom_col(aes(x= Computer, y= n))

Student %>% 
  count(Computer) %>%
  ggplot() +
  geom_col( aes(x= Computer, y= n)) +
  coord_flip()

Student %>% 
  count(Computer) %>%               # no coord_flip is required
  ggplot() +
  geom_col( aes(y= Computer, x= n)) 

Arranging bars

geom_bar modification

ggplot(Student) +
  geom_bar(aes(x= Computer)) +
  scale_x_discrete(limits = c ("Desktop", "Laptop", "Tablet"))

ggplot(Student) +                    # library(forcats) & descending order arrange
  geom_bar(aes(x= fct_infreq(Computer))) +
  labs( x= "Computer usage status")

ggplot(Student) +
  geom_bar(aes(x= fct_infreq(Computer) %>% fct_rev()))  # ascendng order

ggplot(Student) +
  geom_bar(aes(x= Computer), fill= c("red", "green", "black"), alpha= .6)

ggplot(Student) +
  geom_bar(aes(x= Computer, fill = Computer))+
  scale_fill_manual(values = c("red", "black", "green" ))

geom_col modification

Student %>% 
  count(Computer) %>% 
  ggplot() +
  geom_col(aes(x= Computer, y= n)) +
  scale_x_discrete(limits= c("Desktop", "Laptop", "Tablet"))

Student %>% 
  count(Computer) %>%             # -n= descending order
  ggplot() +
  geom_col(aes(x= reorder(Computer, -n), y=n))  

Student %>% 
  count(Computer) %>%            # n= ascending order
  ggplot() +
  geom_col(aes(x= reorder(Computer, n), y=n))

Student %>% 
  count(Computer) %>%            
  ggplot(aes(x= reorder(Computer, -n), y=n)) +
  geom_col() +
  geom_text(aes(label = n), vjust= 1.41, color= "white", size= 3) +
  theme_minimal() +
  ylim(0, 120)

Student %>% 
  count(Computer) %>%            
  ggplot(aes(x= reorder(Computer, -n), y=n)) +
  geom_col() +
  geom_text(aes(label = n), vjust= -1, color= "black", size= 3) +
  theme_minimal() +
  ylim(0, 110)

Student %>% 
  count(Computer, Class) %>%            
  ggplot(aes(x= reorder(Computer, -n), y=n)) +
  geom_col() +
  geom_text(aes(label = n), vjust= 1.41, color= "white", size= 3) +
  theme_minimal() +
  facet_wrap(vars(Class))+
  ylim(0, 60)+
  labs(x= "Computer usage")

Student %>% 
  count(Computer, Class) %>%            
  ggplot(aes(x= reorder(Computer, -n), y=n)) +
  geom_col(fill= "cornflowerblue") +
  geom_text(aes(label = n), vjust= -.5, color= "black", size= 3) +
  theme_light() +
  facet_wrap(vars(Class))+
  ylim(0, 60)+
  labs(x= "Computer usage", y= "Frequency", title = "Frequency of Device usage by Class")+
  theme(plot.title = element_text(hjust = 0.5),
        strip.text = element_text(colour = "black"))

Values on bar

ggplot(Student, aes(y= Major))+
  geom_bar()+
  geom_text(aes(x=after_stat(count +1), label = after_stat(count)),
            size = 3,
            stat = "count",
            color = "black")+
  labs( x= "Frequency", y= NULL)

ggplot(Student, aes(y= Major, fill = Computer))+
  geom_bar(position = "dodge") +
  geom_text(aes(x= after_stat(count+1), label= after_stat(count)),
            stat = "count",
            size = 3,
            position= position_dodge(1))+
   labs(x= "Freq", y= NULL)

Stack and percentage Filled bar plot

ggplot(Student)+
  geom_bar(aes(x= Class))

ggplot(Student)+
  geom_bar(aes(x= Class, fill = Employment), position = "stack")

ggplot(Student)+
  geom_bar(aes(x= Class, fill = Employment), position = "dodge")

ggplot(Student)+
  geom_bar(aes(x= Class, fill = Employment), position = "dodge2")

ggplot(Student)+
  geom_bar(aes(x= Class, fill = Employment), position = "fill")

Arranging bar

Student %>%
  mutate(Class = factor(Class, levels = c("Freshman", "Sophomore", "Junior", "Senior"))) %>%
  ggplot()+
  geom_bar(aes(x= Class, fill = Employment), position = "fill")

ggplot(Student)+
  geom_bar(aes(x= Class, fill = Employment), position = "fill")+
  scale_x_discrete(limits= c("Freshman", "Sophomore", "Junior", "Senior"))

values on bar

ggplot(Student, aes(x= Class, fill = Employment))+
  geom_bar(position = "fill")+
  geom_text(aes(label = after_stat(count)), size = 3,
            stat = "count", position = position_fill(vjust = 0.5))

# this in not available now in update version
#CGPfunctions::plotXTabs2(
#  data= Student,
#  y = Gender,
#  results.subtitle = FALSE,   
#  sample.size.label = TRUE, palette = "Set3",
#  ggtheme= ggplot2::theme_bw()
#)+
# labs(title = "Stacked bar plot of device usage by gender")

Legend customization

Legend position

ggplot(iris)+
  geom_point(aes(x= Sepal.Length, y= Sepal.Width, col= Species, size = Petal.Length))+
  labs(x= "Sepal Length", y= "Sepal Width",
       title = "Scatter Plot of Sepal Length vs Width")+
  theme(legend.position = "bottom")

ggplot(iris)+
  geom_point(aes(x= Sepal.Length, y= Sepal.Width, col= Species, size = Petal.Length))+
  labs(x= "Sepal Length", y= "Sepal Width",
       title = "Scatter Plot of Sepal Length vs Width")+
  guides(color= guide_legend(position = "bottom"))

ggplot(iris)+
  geom_point(aes(x= Sepal.Length, y= Sepal.Width, col= Species, size = Petal.Length))+
  labs(x= "Sepal Length", y= "Sepal Width",
       title = "Scatter Plot of Sepal Length vs Width")+
  guides(size= guide_legend(position = "bottom"))

ggplot(iris)+
  geom_point(aes(x= Sepal.Length, y= Sepal.Width, col= Species, size = Petal.Length))+
  labs(x= "Sepal Length", y= "Sepal Width",
       title = "Scatter Plot of Sepal Length vs Width")+
  guides(
    color= guide_legend(
      title = "Species Name",
      position = "bottom",
      direction = "horizontal",
      title.position= "left",
      reverse = FALSE
    )
  )

Hide legend for specific attributes

ggplot(iris)+
  geom_point(aes(x= Sepal.Length, y= Sepal.Width, col= Species, size = Petal.Length))+
  guides(color= "none")

ggplot(iris)+
  geom_point(aes(x= Sepal.Length, y= Sepal.Width, col= Species, size = Petal.Length))+
  guides(size= "none")

ggplot(iris)+
  geom_point(aes(x= Sepal.Length, y= Sepal.Width, col= Species, size = Petal.Length))+
  guides(size= "none", color= "none")

Hide all legend

ggplot(iris)+
  geom_point(aes(x= Sepal.Length, y= Sepal.Width, col= Species, size = Petal.Length))+
  theme(legend.position = "none")

Reordering levels of legends

ggplot(data = Student)+
  geom_bar(aes(y= Computer, fill = Gender), position = "fill")+
  scale_fill_discrete(breaks= c("Male", "Female"))+
  theme(legend.position = "bottom")

Axis customization

ggplot(Student, aes(x= Class, fill = Employment))+
  geom_bar(position = "fill")+
  labs(y= "Proportion")+
  scale_y_continuous(labels = scales::label_percent())

Student %>%
  mutate(Class= factor(Class, 
                       levels = c("Freshman", "Sophomore", "Junior", "Senior")))%>%
  group_by(Class, Gender)%>%
  summarise(AvgSpending= mean(Spending))%>%
  ungroup() %>%
  ggplot() +
  geom_col(aes(fill = Class, y= AvgSpending, x= Gender), position = "dodge")+
  theme(legend.position = "bottom")+
  scale_y_continuous(labels = scales::label_dollar(prefix = "US "))
## `summarise()` has grouped output by 'Class'. You can override using the
## `.groups` argument.

Box Plot

ggplot(Student, aes(x= Class, y= Spending))+
  geom_boxplot() +
  geom_jitter()

iris %>%
  ggplot(aes(x= Species, y= Sepal.Length))+
  geom_boxplot()+
  geom_jitter()

install.packages("ggpubr")
## Warning: package 'ggpubr' is in use and will not be installed
library(ggpubr)
# boxplot with anova and t test. lecture-12, time-1.38hr - 1.40hr

esquisse add-ins (Go addins button, then click ggplot2 builder )

It is a drag & drop app. After creating plot copy the code and paste in code chunk

ggplot(Student) +
  aes(x = Class, fill = Employment) +
  geom_bar(position = "fill") +
  scale_fill_hue(direction = 1) +
  coord_flip() +
  ggthemes::theme_stata() +
  theme(legend.position = "bottom")

Combing multiple plot

patchwork

#{r fig.width= 10, fig.width=8} #library(patchwork) #(p3| p2) / # p1