knitr::opts_chunk$set(echo = TRUE, cache = TRUE)
library(ggplot2)
library(dplyr)
library(ggthemes)
library(DescTools)
library(forcats)
#install.packages("patchwork")
library(scales)
library(ggpubr)
library(esquisse)
library(patchwork)
#theme_set(theme_bw())
data(iris)
Student <- readxl::read_excel("student_dataset-1.xlsx")
ggplot( data = iris) +
aes(x = Sepal.Length, y = Sepal.Width ) +
geom_point()
ggplot(data= iris) +
geom_point(aes(x = Sepal.Length, y = Sepal.Width )) +
facet_wrap(~Species)
ggplot(data= iris, aes(x=Sepal.Length, y=Sepal.Width)) +
geom_point(col= "red", size= 3, shape= 16)
ggplot(data= iris, aes(x=Sepal.Length, y=Sepal.Width, col = Species)) +
geom_point() +
labs( x= "Sepal Length", y= "Sepal Width", col = "Species Legend",
title = "Scatter Plot of Sepal Length vs Width")
ggplot(iris, aes(x= Sepal.Length)) +
geom_histogram(binwidth = .1, fill= "blue", col= "black")
ggplot(iris, aes(x= Sepal.Length)) +
geom_histogram(bins = 15, fill= "blue", col= "black")
ggplot(data= iris) +
geom_histogram(aes(x= Sepal.Length, fill = Species), bins = 10, col= "black", alpha= .3)
ggplot(data = iris)+
geom_histogram(aes(x= Sepal.Length, fill = Species),
bins = 10, col= "white", alpha= .5) +
coord_cartesian(expand = FALSE)
ggplot(data = iris)+
geom_histogram(aes(x=Sepal.Length, fill = Species),
bins = 10, col= "black", alpha= .6)+
scale_y_continuous(
breaks = seq(0, 40, by= 5),
expand = expansion(
mult = c(0, 0), #expand upper portion of the plot
add = c(0, 0) #increase gap at the bottom portion
)
)
ggplot(data = iris)+
geom_histogram(aes(x=Sepal.Length, fill = Species),
bins = 10, col= "white", alpha= .6)+
scale_y_continuous(expand = expansion(add = c(0, 5)))+
scale_x_continuous(expand = expansion(add = c(0, 0)))
ggplot(data=iris)+
geom_histogram(aes(x=Sepal.Length, fill = Species),
bins = 10, col= "black", alpha =.6)+
facet_wrap(vars(Species), ncol= 1)
ggplot(data = iris)+
geom_histogram(aes(x=Sepal.Length, fill = Species),
bins = 10, col= "black", alpha= .6)+
facet_wrap(vars(Species), ncol=1, scale= "free" )
ggplot(data = iris)+
geom_histogram(aes(x=Sepal.Length, fill = Species),
bins = 10, col="black", alpha= 0.6)+
facet_wrap(vars(Species), ncol=3, scale= "free_x")
ggplot(data = iris)+
geom_histogram(aes(x=Sepal.Length, fill = Species),
bins = 10, col= "black", alpha= .6)+
facet_grid(rows= vars(Species))
ggplot(data = Student)+
geom_histogram(aes(x=GPA, fill = Employment),
bins = 10, col= "black", alpha= .6)+
facet_grid(rows= vars(Employment), cols= vars(Gender))
ggplot(data = Student)+
geom_histogram(aes(x=GPA, fill = Employment),
bins = 10, col= "black", alpha= .6)+
facet_grid(rows= vars(Employment), cols= vars(Class))
Student %>%
mutate(Class= factor(Class, levels = c("Freshman", "Sophomore", "Junior", "Senior"))) %>%
summary()
## ID Gender Age Class
## Min. : 1.00 Length:132 Min. :18.00 Freshman : 4
## 1st Qu.: 33.75 Class :character 1st Qu.:20.00 Sophomore:29
## Median : 66.50 Mode :character Median :22.00 Junior :24
## Mean : 66.50 Mean :21.84 Senior :75
## 3rd Qu.: 99.25 3rd Qu.:24.00
## Max. :132.00 Max. :30.00
## Major Grad Intention GPA Employment
## Length:132 Length:132 Min. :2.340 Length:132
## Class :character Class :character 1st Qu.:2.950 Class :character
## Mode :character Mode :character Median :3.000 Mode :character
## Mean :3.016
## 3rd Qu.:3.300
## Max. :3.680
## Salary Social Networking Satisfaction Spending
## Min. :30.00 Min. : 0.000 Min. :1.000 Min. : 700.0
## 1st Qu.:50.00 1st Qu.: 0.000 1st Qu.:3.000 1st Qu.: 800.0
## Median :60.00 Median : 6.500 Median :4.000 Median : 900.0
## Mean :57.89 Mean : 4.902 Mean :3.523 Mean : 952.3
## 3rd Qu.:65.00 3rd Qu.: 8.000 3rd Qu.:4.000 3rd Qu.:1100.0
## Max. :85.00 Max. :12.000 Max. :6.000 Max. :1200.0
## Computer Text Messages Wealth
## Length:132 Min. : 0.0 Min. : 0.100
## Class :character 1st Qu.: 40.0 1st Qu.: 0.200
## Mode :character Median :300.0 Median : 1.500
## Mean :267.6 Mean : 6.825
## 3rd Qu.:500.0 3rd Qu.: 10.000
## Max. :600.0 Max. :100.000
Student %>%
mutate(Class= factor(Class, levels = c("Freshman", "Sophomore", "Junior", "Senior"))) %>%
ggplot()+
geom_histogram(aes(x=GPA, fill = Employment),
bins = 10, col= "black", alpha= .6)+
facet_grid(rows= vars(Employment), cols= vars(Class))
ggplot(data=iris)+
geom_histogram(aes(x=Sepal.Length, fill = Species),
bins = 10, col= "black", alpha =.6)+
facet_wrap(vars(Species), ncol= 1)+
theme_minimal()
ggplot(data=iris)+
geom_histogram(aes(x=Sepal.Length, fill = Species),
bins = 10, col= "black", alpha =.6)+
facet_wrap(vars(Species), ncol= 1)+
theme_calc()
p1 <- ggplot(data=iris)+
geom_histogram(aes(x=Sepal.Length, fill = Species),
bins = 10, col= "black", alpha =.6)+
facet_wrap(vars(Species), ncol= 1)+
labs(
title = "Histogram of Sepal Length by Species",
x = "Sepal Length",
y = "Frequency",
fill = "Species Legend",
subtitle = "Using Facet and other customizations",
caption = "Data : Iris"
)
ggplot(data=iris)+
geom_histogram(aes(x=Sepal.Length, fill = Species),
bins = 10, col= "black", alpha =.6)+
facet_wrap(vars(Species), ncol= 1)+
scale_fill_manual(values = c("setosa"= "tomato", "versicolor"="black", "virginica" ="skyblue"))
ggplot(data=iris)+
geom_histogram(aes(x=Sepal.Length, fill = Species),
bins = 10, col= "black", alpha =1)+
facet_wrap(vars(Species), ncol= 1)+
scale_fill_brewer(palette = "Set1")
ggplot(data= iris)+
geom_density(aes( x= Sepal.Length, fill = Species), alpha= 0.5)
ggplot(data = iris)+
geom_density(aes(x= Sepal.Length, fill = Species), alpha= .5)+
facet_wrap(vars(Species), ncol=1)
ggplot(data= iris, aes(x= Sepal.Length, fill = Species))+
geom_density(alpha= .6, color= "white")+
geom_histogram(aes(y= after_stat(density), alpha = .6, bins = 10))+
facet_wrap(vars(Species), ncol= 1)
## Warning in geom_histogram(aes(y = after_stat(density), alpha = 0.6, bins =
## 10)): Ignoring unknown aesthetics: bins
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
ggplot(data = iris, aes(x= Sepal.Length))+
geom_histogram(aes(y= after_stat(density)), colour = "white", fill= "blue", alpha= .8)+
geom_density(color= "black", fill= "green", alpha= .5)+
geom_vline(aes(xintercept = mean(Sepal.Length)),
color= "black", size= 1, linetype = "dashed")+
labs(
title = "Distribution of Sepal Length",
subtitle = "Made by ggplot2",
caption = "Source: Iris Data",
x = "Sepal Length",
y = "Density")+
theme_classic()+
theme(
plot.title = element_text(color = "blue", size = 15, face = "bold"),
plot.subtitle = element_text(size = 10),
plot.caption = element_text(face = "italic"))+
annotate("text", x=5.9, y= .75,
label = paste0("Mode: ",
round(DescTools::Mode(iris$Sepal.Length),1 )), hjust =0)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
ggplot(Student)+
geom_bar(aes(x= Computer))
ggplot(Student) +
geom_bar(aes(x= Computer)) +
coord_flip() + # aftr fliping axis will change
labs( y= "Number of Students")
ggplot(Student) +
geom_bar(aes(y= Computer)) # no coord_flip is reqiued
Student %>%
count(Computer) %>%
ggplot() +
geom_col(aes(x= Computer, y= n))
Student %>%
count(Computer) %>%
ggplot() +
geom_col( aes(x= Computer, y= n)) +
coord_flip()
Student %>%
count(Computer) %>% # no coord_flip is required
ggplot() +
geom_col( aes(y= Computer, x= n))
ggplot(Student) +
geom_bar(aes(x= Computer)) +
scale_x_discrete(limits = c ("Desktop", "Laptop", "Tablet"))
ggplot(Student) + # library(forcats) & descending order arrange
geom_bar(aes(x= fct_infreq(Computer))) +
labs( x= "Computer usage status")
ggplot(Student) +
geom_bar(aes(x= fct_infreq(Computer) %>% fct_rev())) # ascendng order
ggplot(Student) +
geom_bar(aes(x= Computer), fill= c("red", "green", "black"), alpha= .6)
ggplot(Student) +
geom_bar(aes(x= Computer, fill = Computer))+
scale_fill_manual(values = c("red", "black", "green" ))
Student %>%
count(Computer) %>%
ggplot() +
geom_col(aes(x= Computer, y= n)) +
scale_x_discrete(limits= c("Desktop", "Laptop", "Tablet"))
Student %>%
count(Computer) %>% # -n= descending order
ggplot() +
geom_col(aes(x= reorder(Computer, -n), y=n))
Student %>%
count(Computer) %>% # n= ascending order
ggplot() +
geom_col(aes(x= reorder(Computer, n), y=n))
Student %>%
count(Computer) %>%
ggplot(aes(x= reorder(Computer, -n), y=n)) +
geom_col() +
geom_text(aes(label = n), vjust= 1.41, color= "white", size= 3) +
theme_minimal() +
ylim(0, 120)
Student %>%
count(Computer) %>%
ggplot(aes(x= reorder(Computer, -n), y=n)) +
geom_col() +
geom_text(aes(label = n), vjust= -1, color= "black", size= 3) +
theme_minimal() +
ylim(0, 110)
Student %>%
count(Computer, Class) %>%
ggplot(aes(x= reorder(Computer, -n), y=n)) +
geom_col() +
geom_text(aes(label = n), vjust= 1.41, color= "white", size= 3) +
theme_minimal() +
facet_wrap(vars(Class))+
ylim(0, 60)+
labs(x= "Computer usage")
Student %>%
count(Computer, Class) %>%
ggplot(aes(x= reorder(Computer, -n), y=n)) +
geom_col(fill= "cornflowerblue") +
geom_text(aes(label = n), vjust= -.5, color= "black", size= 3) +
theme_light() +
facet_wrap(vars(Class))+
ylim(0, 60)+
labs(x= "Computer usage", y= "Frequency", title = "Frequency of Device usage by Class")+
theme(plot.title = element_text(hjust = 0.5),
strip.text = element_text(colour = "black"))
ggplot(Student, aes(y= Major))+
geom_bar()+
geom_text(aes(x=after_stat(count +1), label = after_stat(count)),
size = 3,
stat = "count",
color = "black")+
labs( x= "Frequency", y= NULL)
ggplot(Student, aes(y= Major, fill = Computer))+
geom_bar(position = "dodge") +
geom_text(aes(x= after_stat(count+1), label= after_stat(count)),
stat = "count",
size = 3,
position= position_dodge(1))+
labs(x= "Freq", y= NULL)
ggplot(Student)+
geom_bar(aes(x= Class))
ggplot(Student)+
geom_bar(aes(x= Class, fill = Employment), position = "stack")
ggplot(Student)+
geom_bar(aes(x= Class, fill = Employment), position = "dodge")
ggplot(Student)+
geom_bar(aes(x= Class, fill = Employment), position = "dodge2")
ggplot(Student)+
geom_bar(aes(x= Class, fill = Employment), position = "fill")
Student %>%
mutate(Class = factor(Class, levels = c("Freshman", "Sophomore", "Junior", "Senior"))) %>%
ggplot()+
geom_bar(aes(x= Class, fill = Employment), position = "fill")
ggplot(Student)+
geom_bar(aes(x= Class, fill = Employment), position = "fill")+
scale_x_discrete(limits= c("Freshman", "Sophomore", "Junior", "Senior"))
ggplot(Student, aes(x= Class, fill = Employment))+
geom_bar(position = "fill")+
geom_text(aes(label = after_stat(count)), size = 3,
stat = "count", position = position_fill(vjust = 0.5))
# this in not available now in update version
#CGPfunctions::plotXTabs2(
# data= Student,
# y = Gender,
# results.subtitle = FALSE,
# sample.size.label = TRUE, palette = "Set3",
# ggtheme= ggplot2::theme_bw()
#)+
# labs(title = "Stacked bar plot of device usage by gender")
ggplot(iris)+
geom_point(aes(x= Sepal.Length, y= Sepal.Width, col= Species, size = Petal.Length))+
labs(x= "Sepal Length", y= "Sepal Width",
title = "Scatter Plot of Sepal Length vs Width")+
theme(legend.position = "bottom")
ggplot(iris)+
geom_point(aes(x= Sepal.Length, y= Sepal.Width, col= Species, size = Petal.Length))+
labs(x= "Sepal Length", y= "Sepal Width",
title = "Scatter Plot of Sepal Length vs Width")+
guides(color= guide_legend(position = "bottom"))
ggplot(iris)+
geom_point(aes(x= Sepal.Length, y= Sepal.Width, col= Species, size = Petal.Length))+
labs(x= "Sepal Length", y= "Sepal Width",
title = "Scatter Plot of Sepal Length vs Width")+
guides(size= guide_legend(position = "bottom"))
ggplot(iris)+
geom_point(aes(x= Sepal.Length, y= Sepal.Width, col= Species, size = Petal.Length))+
labs(x= "Sepal Length", y= "Sepal Width",
title = "Scatter Plot of Sepal Length vs Width")+
guides(
color= guide_legend(
title = "Species Name",
position = "bottom",
direction = "horizontal",
title.position= "left",
reverse = FALSE
)
)
ggplot(iris)+
geom_point(aes(x= Sepal.Length, y= Sepal.Width, col= Species, size = Petal.Length))+
guides(color= "none")
ggplot(iris)+
geom_point(aes(x= Sepal.Length, y= Sepal.Width, col= Species, size = Petal.Length))+
guides(size= "none")
ggplot(iris)+
geom_point(aes(x= Sepal.Length, y= Sepal.Width, col= Species, size = Petal.Length))+
guides(size= "none", color= "none")
ggplot(iris)+
geom_point(aes(x= Sepal.Length, y= Sepal.Width, col= Species, size = Petal.Length))+
theme(legend.position = "none")
ggplot(data = Student)+
geom_bar(aes(y= Computer, fill = Gender), position = "fill")+
scale_fill_discrete(breaks= c("Male", "Female"))+
theme(legend.position = "bottom")
ggplot(Student, aes(x= Class, fill = Employment))+
geom_bar(position = "fill")+
labs(y= "Proportion")+
scale_y_continuous(labels = scales::label_percent())
Student %>%
mutate(Class= factor(Class,
levels = c("Freshman", "Sophomore", "Junior", "Senior")))%>%
group_by(Class, Gender)%>%
summarise(AvgSpending= mean(Spending))%>%
ungroup() %>%
ggplot() +
geom_col(aes(fill = Class, y= AvgSpending, x= Gender), position = "dodge")+
theme(legend.position = "bottom")+
scale_y_continuous(labels = scales::label_dollar(prefix = "US "))
## `summarise()` has grouped output by 'Class'. You can override using the
## `.groups` argument.
ggplot(Student, aes(x= Class, y= Spending))+
geom_boxplot() +
geom_jitter()
iris %>%
ggplot(aes(x= Species, y= Sepal.Length))+
geom_boxplot()+
geom_jitter()
install.packages("ggpubr")
## Warning: package 'ggpubr' is in use and will not be installed
library(ggpubr)
# boxplot with anova and t test. lecture-12, time-1.38hr - 1.40hr
ggplot(Student) +
aes(x = Class, fill = Employment) +
geom_bar(position = "fill") +
scale_fill_hue(direction = 1) +
coord_flip() +
ggthemes::theme_stata() +
theme(legend.position = "bottom")
#{r fig.width= 10, fig.width=8} #library(patchwork) #(p3| p2) / # p1