# install.packages("ggplot2")
# load package and data
options(scipen=999) # turn-off scientific notation like 1e+48
library(ggplot2)
theme_set(theme_bw()) # pre-set the bw theme.
data("midwest", package = "ggplot2")
# midwest <- read.csv("http://goo.gl/G1K41K") # bkup data source
# Scatterplot
gg <- ggplot(midwest, aes(x=area, y=poptotal)) +
geom_point(aes(col=state, size=popdensity)) +
geom_smooth(method="loess", se=F) +
xlim(c(0, 0.1)) +
ylim(c(0, 500000)) +
labs(subtitle="Area Vs Population",
y="Population",
x="Area",
title="Scatterplot",
caption = "Source: midwest")
plot(gg)## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).
# install 'ggalt' pkg
# devtools::install_github("hrbrmstr/ggalt")
options(scipen = 999)
library(ggplot2)
library(ggalt)## Warning: package 'ggalt' was built under R version 3.4.2
midwest_select <- midwest[midwest$poptotal > 350000 &
midwest$poptotal <= 500000 &
midwest$area > 0.01 &
midwest$area < 0.1, ]
# Plot
ggplot(midwest, aes(x=area, y=poptotal)) +
geom_point(aes(col=state, size=popdensity)) + # draw points
geom_smooth(method="loess", se=F) +
xlim(c(0, 0.1)) +
ylim(c(0, 500000)) + # draw smoothing line
geom_encircle(aes(x=area, y=poptotal),
data=midwest_select,
color="red",
size=2,
expand=0.08) + # encircle
labs(subtitle="Area Vs Population",
y="Population",
x="Area",
title="Scatterplot + Encircle",
caption="Source: midwest")## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).
# load package and data
library(ggplot2)
data(mpg, package="ggplot2")
# mpg <- read.csv("http://goo.gl/uEeRGu")
# Scatterplot
theme_set(theme_bw()) # pre-set the bw theme.
g <- ggplot(mpg, aes(cty, hwy))
g + geom_count(col="tomato3", show.legend=F) +
labs(subtitle="mpg: city vs highway mileage",
y="hwy",
x="cty",
title="Counts Plot")# load package and data
library(ggplot2)
library(ggExtra)## Warning: package 'ggExtra' was built under R version 3.4.2
data(mpg, package="ggplot2")
# mpg <- read.csv("http://goo.gl/uEeRGu")
# Scatterplot
theme_set(theme_bw()) # pre-set the bw theme.
mpg_select <- mpg[mpg$hwy >= 35 & mpg$cty > 27, ]
g <- ggplot(mpg, aes(cty, hwy)) +
geom_count() +
geom_smooth(method="lm", se=F)
ggMarginal(g, type = "histogram", fill="transparent")
ggMarginal(g, type = "boxplot", fill="transparent")# ggMarginal(g, type = "density", fill="transparent")# Read data
library(readxl)
library(ggfortify)
Pop <- read_excel('D:/3.Project/6.Paper/Dang Tuan Vu/chart/Popular.xlsx')
Pop$GroupAge <- factor(Pop$GroupAge, levels = c('0 - 4','5-9','10-14','15-19','20-24','25-29','30-34','35-39','40-44','45-49','50-54','55 - 59','60-64','65-69','70-74'
,'75-79','80-84','85-89','90-94','95-99','100+'))
# X Axis Breaks and Labels
brks <- seq(-15000000, 15000000, 5000000)
lbls = paste0(as.character(c(seq(15, 0, -5), seq(5, 15, 5))), "m")
# Plot
theme_set(theme_bw(base_family = 'serif'))
ggplot(Pop, aes(x = GroupAge, y = Users, fill = Gender)) + # Fill column
geom_bar(stat = "identity", width = .6) + # draw the bars
scale_y_continuous(breaks = brks, # Breaks
labels = lbls) + # Labels
coord_flip() + # Flip axes
labs(title="Populator Chart") +
# theme_tufte() + # Tufte theme from ggfortify
theme(plot.title = element_text(hjust = .5),
axis.ticks = element_blank(),
legend.position = 'bottom') + # Centre plot title
scale_fill_brewer(palette = "Dark2") # Color palette# devtools::install_github("kassambara/ggcorrplot")
library(ggplot2)
library(ggcorrplot)## Warning: package 'ggcorrplot' was built under R version 3.4.2
# Correlation matrix
data(mtcars)
corr <- round(cor(mtcars), 1)
# Plot
ggcorrplot(corr, hc.order = TRUE,
type = "lower",
lab = TRUE,
lab_size = 3,
method="square",
colors = c("tomato2", "white", "springgreen3"),
title="Correlation matrix in mtcars",
ggtheme = ggplot2::theme_bw) +
theme_bw(base_family = 'serif') +
theme(text = element_text(family = "serif"),
plot.title = element_text(hjust = .5)) +
labs(x = '',
y = '')# Read data
library(readxl)
# library(ggfortify)
library(ggalt)
# theme_set(theme_bw(base_family = 'serif'))
pct <- read_excel('D:/3.Project/6.Paper/Dang Tuan Vu/chart/GDP.xlsx')
pct$Area <- factor(pct$Area,levels = as.character(pct$Area))
breaks <- seq(0.05,0.25,by = 0.05)
percent <- paste0(as.character(c(seq(5,25,5))), "%")
gg <- ggplot(pct, aes(x=pct_2013, xend=pct_2014, y=Area, group=Area)) +
geom_dumbbell(color="#a3c4dc",
size=0.75,
point.colour.l="#0e668b") +
scale_x_continuous(breaks = breaks,
labels = percent) +
labs(x=NULL,
y=NULL,
title="Dumbbell Chart",
subtitle="Pct Change: 2013 vs 2014",
caption="Source: https://github.com/hrbrmstr/ggalt") +
theme(plot.title = element_text(hjust=0.5, face="bold"),
plot.background=element_rect(fill="#f7f7f7"),
panel.background=element_rect(fill="#f7f7f7"),
panel.grid.minor=element_blank(),
panel.grid.major.y=element_blank(),
panel.grid.major.x=element_line(),
axis.ticks=element_blank(),
legend.position="top",
panel.border=element_blank())## Warning: Ignoring unknown parameters: point.colour.l
plot(gg)library(ggplot2)
theme_set(theme_bw(base_family = 'serif'))
# Plot
Data <- mtcars
Data <- Data[order(Data$mpg), ]
Data$name <- rownames(Data)
Data$name <- factor(Data$name, levels = Data$name)
Data %>%
slice(10:20) %>%
ggplot(aes(x=name, y=mpg, label = mpg)) +
geom_point(size=6) +
geom_segment(aes(x=name,
xend=name,
y=0,
yend=mpg)) +
geom_text(color = 'white',family = 'serif',size = 2) +
labs(title="Lollipop Chart",
subtitle="Cars Vs MPG",
caption="source: mtcars") +
theme(plot.title = element_text(hjust=0.5, face="bold")) +
coord_flip() +
labs(x = '',
y = '') theme_set(theme_bw(base_family = 'serif'))
Data$mpg_z <- round((Data$mpg - mean(Data$mpg))/sd(Data$mpg), 2) # compute normalized mpg
Data$mpg_type <- ifelse(Data$mpg_z < 0, "below", "above") # above / below avg flag
Data <- Data[order(Data$mpg_z), ]
# Diverging Barcharts
Data %>%
ggplot(aes(x=name, y=mpg_z, label=mpg_z)) +
geom_point(stat='identity', aes(col=mpg_type), size=6) +
scale_color_manual(name="Mileage",
labels = c("Above Average", "Below Average"),
values = c("above"="#00ba38", "below"="#f8766d")) +
geom_text(color="white", size=2) +
labs(title="Diverging Dot Plot",
subtitle="Normalized mileage from 'mtcars': Dotplot") +
ylim(-2.5, 2.5) +
coord_flip()## Example data
x <- seq(1,10, length.out=20)
y <- seq(1,10, length.out=20)
data <- data.frame(x, y)
data$Z <- runif(20, 0, 20)
# Levelplot with ggplot2
library(ggplot2)
theme_set(theme_bw(base_family = 'serif'))
# ggplot(data, aes(x, y, Z)) + geom_tile(aes(fill = Z)) + theme_bw()
# To change the color of the gradation :
ggplot(data, aes(x, y, Z)) + geom_tile(aes(fill = Z)) +
theme_bw() +
scale_fill_gradient(low="white", high="blue") # loading libraries
# Source: http://analyzecore.com/2015/12/10/cohort-analysis-retention-rate-visualization-r/
library(dplyr)
library(reshape2)
library(ggplot2)
library(scales)
library(gridExtra)##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
# creating data sample
set.seed(10)
cohorts <- data.frame(cohort = paste('cohort', formatC(c(1:36), width=2, format='d', flag='0'), sep = '_'),
Y_00 = sample(c(1300:1500), 36, replace = TRUE),
Y_01 = c(sample(c(800:1000), 36, replace = TRUE)),
Y_02 = c(sample(c(600:800), 24, replace = TRUE), rep(NA, 12)),
Y_03 = c(sample(c(400:500), 12, replace = TRUE), rep(NA, 24)))
# simulating seasonality (Black Friday)
cohorts[c(11, 23, 35), 2] <- as.integer(cohorts[c(11, 23, 35), 2] * 1.25)
cohorts[c(11, 23, 35), 3] <- as.integer(cohorts[c(11, 23, 35), 3] * 1.10)
cohorts[c(11, 23, 35), 4] <- as.integer(cohorts[c(11, 23, 35), 4] * 1.07)
# calculating retention rate and preparing data for plotting
df_plot <- melt(cohorts, id.vars = 'cohort', value.name = 'number', variable.name = 'year_of_LT')
df_plot <- df_plot %>%
group_by(cohort) %>%
arrange(year_of_LT) %>%
mutate(number_prev_year = lag(number),
number_Y_00 = number[which(year_of_LT == 'Y_00')]) %>%
ungroup() %>%
mutate(ret_rate_prev_year = number / number_prev_year,
ret_rate = number / number_Y_00,
year_cohort = paste(year_of_LT, cohort, sep = '-'))
##### The first way for plotting cycle plot via scaling
# calculating the coefficient for scaling 2nd axis
k <- max(df_plot$number_prev_year[df_plot$year_of_LT == 'Y_01'] * 1.15) / min(df_plot$ret_rate[df_plot$year_of_LT == 'Y_01'])# retention rate cycle plot
ggplot(na.omit(df_plot), aes(x = year_cohort, y = ret_rate, group = year_of_LT, color = year_of_LT)) +
theme_bw() +
geom_point(size = 4) +
geom_text(aes(label = percent(round(ret_rate, 2))),
size = 4, hjust = 0.4, vjust = -0.6, fontface = "plain") +
# smooth method can be changed (e.g. for "lm")
geom_smooth(size = 2.5, method = 'loess', color = 'darkred', aes(fill = year_of_LT)) +
geom_bar(aes(y = number_prev_year / k, fill = year_of_LT), alpha = 0.2, stat = 'identity') +
geom_bar(aes(y = number / k, fill = year_of_LT), alpha = 0.6, stat = 'identity') +
geom_text(aes(y = 0, label = cohort), color = 'white', angle = 90, size = 4, hjust = -0.05, vjust = 0.4) +
geom_text(aes(y = number_prev_year / k, label = number_prev_year),
angle = 90, size = 4, hjust = -0.1, vjust = 0.4) +
geom_text(aes(y = number / k, label = number),
angle = 90, size = 4, hjust = -0.1, vjust = 0.4) +
theme(legend.position='none',
plot.title = element_text(size=20, face="bold", vjust=2),
axis.title.x = element_text(size=18, face="bold"),
axis.title.y = element_text(size=18, face="bold"),
axis.text = element_text(size=16),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
labs(x = 'Year of Lifetime by Cohorts', y = 'Number of Customers / Retention Rate') +
ggtitle("Customer Retention Rate - Cycle plot")##### The second way for plotting cycle plot via multi-plotting
# plot #1 - Retention rate
p1 <- ggplot(na.omit(df_plot), aes(x = year_cohort, y = ret_rate, group = year_of_LT, color = year_of_LT)) +
theme_bw() +
geom_point(size = 4) +
geom_text(aes(label = percent(round(ret_rate, 2))),
size = 4, hjust = 0.4, vjust = -0.6, fontface = "plain") +
geom_smooth(size = 2.5, method = 'loess', color = 'darkred', aes(fill = year_of_LT)) +
theme(legend.position='none',
plot.title = element_text(size=20, face="bold", vjust=2),
axis.title.x = element_blank(),
axis.title.y = element_text(size=18, face="bold"),
axis.text = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
labs(y = 'Retention Rate') +
ggtitle("Customer Retention Rate - Cycle plot")
# plot #2 - number of customers
p2 <- ggplot(na.omit(df_plot), aes(x = year_cohort, group = year_of_LT, color = year_of_LT)) +
theme_bw() +
geom_bar(aes(y = number_prev_year, fill = year_of_LT), alpha = 0.2, stat = 'identity') +
geom_bar(aes(y = number, fill = year_of_LT), alpha = 0.6, stat = 'identity') +
geom_text(aes(y = number_prev_year, label = number_prev_year),
angle = 90, size = 4, hjust = -0.1, vjust = 0.4) +
geom_text(aes(y = number, label = number),
angle = 90, size = 4, hjust = -0.1, vjust = 0.4) +
geom_text(aes(y = 0, label = cohort), color = 'white', angle = 90, size = 4, hjust = -0.05, vjust = 0.4) +
theme(legend.position='none',
plot.title = element_text(size=20, face="bold", vjust=2),
axis.title.x = element_text(size=18, face="bold"),
axis.title.y = element_text(size=18, face="bold"),
axis.text = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
scale_y_continuous(limits = c(0, max(df_plot$number_Y_00 * 1.1))) +
labs(x = 'Year of Lifetime by Cohorts', y = 'Number of Customers')
# multiplot
grid.arrange(p1, p2, ncol = 1)# retention rate bubble chart
ggplot(na.omit(df_plot), aes(x = cohort, y = ret_rate, group = cohort, color = year_of_LT)) +
theme_bw() +
scale_size(range = c(15, 40)) +
geom_line(size = 2, alpha = 0.3) +
geom_point(aes(size = number_prev_year), alpha = 0.3) +
geom_point(aes(size = number), alpha = 0.8) +
geom_smooth(linetype = 2, size = 2, method = 'loess', aes(group = year_of_LT, fill = year_of_LT), alpha = 0.2) +
geom_text(aes(label = paste0(number, '/', number_prev_year, '\n', percent(round(ret_rate, 2)))),
color = 'white', size = 3, hjust = 0.5, vjust = 0.5, fontface = "plain") +
theme(legend.position='none',
plot.title = element_text(size=20, face="bold", vjust=2),
axis.title.x = element_text(size=18, face="bold"),
axis.title.y = element_text(size=18, face="bold"),
axis.text = element_text(size=16),
axis.text.x = element_text(size=10, angle=90, hjust=.5, vjust=.5, face="plain"),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
labs(x = 'Cohorts', y = 'Retention Rate by Year of Lifetime') +
ggtitle("Customer Retention Rate - Bubble chart")# retention rate falling drops chart
ggplot(df_plot, aes(x = cohort, y = ret_rate, group = cohort, color = year_of_LT)) +
theme_bw() +
scale_size(range = c(15, 40)) +
scale_y_continuous(limits = c(0, 1)) +
geom_line(size = 2, alpha = 0.3) +
geom_point(aes(size = number), alpha = 0.8) +
geom_text(aes(label = paste0(number, '\n', percent(round(ret_rate, 2)))),
color = 'white', size = 3, hjust = 0.5, vjust = 0.5, fontface = "plain") +
theme(legend.position='none',
plot.title = element_text(size=20, face="bold", vjust=2),
axis.title.x = element_text(size=18, face="bold"),
axis.title.y = element_text(size=18, face="bold"),
axis.text = element_text(size=16),
axis.text.x = element_text(size=10, angle=90, hjust=.5, vjust=.5, face="plain"),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
labs(x = 'Cohorts', y = 'Retention Rate by Year of Lifetime') +
ggtitle("Customer Retention Rate - Falling Drops chart")## Warning: Removed 36 rows containing missing values (geom_path).
## Warning: Removed 36 rows containing missing values (geom_point).
## Warning: Removed 36 rows containing missing values (geom_text).
# https://rlbarter.github.io/superheat-examples/Organ/