# setting a seed sets for constant randomization
set.seed(133)
a<- sample(month.name, 50, replace=TRUE)
# create a factor
f<- factor(a, levels=month.name, labels=month.abb)
is.factor(f)
## [1] TRUE
# check the levels and labels of the new factor. Notice it is not what you expect
levels(f)
## [1] "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"
labels(f)
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15"
## [16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "30"
## [31] "31" "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42" "43" "44" "45"
## [46] "46" "47" "48" "49" "50"
# transform the factor into a vector
as.character(f) # it is better to use levels(f)[f]
## [1] "Sep" "Jun" "Apr" "Jan" "Jan" "Sep" "Jun" "Feb" "Aug" "Apr" "Feb" "Jun"
## [13] "Jan" "Dec" "Jan" "Jan" "Sep" "Dec" "Mar" "Apr" "Feb" "Aug" "Aug" "Mar"
## [25] "Sep" "Mar" "Dec" "Aug" "Feb" "Sep" "Jan" "Aug" "Sep" "Oct" "Jan" "Aug"
## [37] "Jan" "Aug" "May" "Dec" "Apr" "Jul" "Mar" "Dec" "Apr" "Feb" "May" "Sep"
## [49] "Mar" "Sep"
as.numeric(f)
## [1] 9 6 4 1 1 9 6 2 8 4 2 6 1 12 1 1 9 12 3 4 2 8 8 3 9
## [26] 3 12 8 2 9 1 8 9 10 1 8 1 8 5 12 4 7 3 12 4 2 5 9 3 9
# now use the forcats library
# https://forcats.tidyverse.org/index.html
library(forcats)
# notice Nov has 0 occurances
table(f)
## f
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 8 5 5 5 2 3 1 7 8 1 0 5
fct_count(f)
## # A tibble: 12 × 2
## f n
## <fct> <int>
## 1 Jan 8
## 2 Feb 5
## 3 Mar 5
## 4 Apr 5
## 5 May 2
## 6 Jun 3
## 7 Jul 1
## 8 Aug 7
## 9 Sep 8
## 10 Oct 1
## 11 Nov 0
## 12 Dec 5
# remove unused levels
f<- fct_drop(f)
levels(f)
## [1] "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Dec"
# reorder the levels: make Dec the first month
f<- fct_relevel(f, c("Dec"))
levels(f)
## [1] "Dec" "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct"
# order according to frequency
f<- fct_infreq(f)
levels(f)
## [1] "Jan" "Sep" "Aug" "Dec" "Feb" "Mar" "Apr" "Jun" "May" "Jul" "Oct"
# show only top 3 levels and collapse the rest into others
f<- fct_lump(f, 3)
levels(f)
## [1] "Jan" "Sep" "Aug" "Other"
# reverse the order of levels
f<- fct_rev(f)
levels(f)
## [1] "Other" "Aug" "Sep" "Jan"
# recode the levels
fct_recode(f, "Boring"="Jan", "Hot"="Aug")
## [1] Sep Other Other Boring Boring Sep Other Other Hot Other
## [11] Other Other Boring Other Boring Boring Sep Other Other Other
## [21] Other Hot Hot Other Sep Other Other Hot Other Sep
## [31] Boring Hot Sep Other Boring Hot Boring Hot Other Other
## [41] Other Other Other Other Other Other Other Sep Other Sep
## Levels: Other Hot Sep Boring
ds<- gapminder::gapminder
ggplot(data=ds, aes(x=gdpPercap, y=lifeExp))
ds %>% filter(year=="2007") %>%
ggplot(aes(x=gdpPercap, y=lifeExp))+
geom_point()
ds %>% filter(year=="2007") %>%
ggplot(aes(x=gdpPercap, y=lifeExp, size=pop, color=continent))+
geom_point(alpha=0.6)
ds %>% filter(year=="2007") %>%
ggplot(aes(x=gdpPercap, y=lifeExp))+
geom_point()+
stat_smooth(method = 'loess', formula='y ~ x', se=F )
ds %>% filter(country=="Jordan") %>%
ggplot(aes(x=year, y=lifeExp))+ # add color=continent
geom_point()+
geom_line(na.rm=T)
ds %>%
filter(continent=="Europe") %>% # select European countries
# reorder colors according to the last value
ggplot(aes(x=year, y=pop, color=fct_reorder(country, pop, tail,n=1, .desc=T))) +
geom_point() +
geom_line(na.rm=T)+ # always add na.rm=T
labs(color="Countries", x="")+
theme_bw()+
scale_y_continuous(labels=c("0", "20M", "40M", "60M", "80M"))+ # you can use breaks and labels
scale_x_continuous(breaks=seq(1952, 2007, 5))+
theme(axis.text.x = element_text(size=8))
ds %>% filter(year=="2007") %>%
ggplot(aes(x=continent, y=lifeExp))+
geom_point()
ds %>% filter(year=="2007") %>%
ggplot(aes(x=continent, y=lifeExp, size=pop, col=continent))+
geom_jitter(width=0.2)
ds %>% filter(year=="2007") %>%
ggplot(aes(x=lifeExp))+
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ds %>% filter(year=="2007") %>%
ggplot(aes(x=continent))+ # you can add fill=country
geom_bar()
ds %>% filter(year=="2007") %>%
ggplot(aes(x=continent))+ # you can add fill=country
geom_bar()+
geom_text(aes(label=..count..), stat='count', color="red", vjust=-0.3)
ds %>% filter(year=="2007") %>%
slice(1:20) %>%
ggplot(aes(x=continent, fill=country))+ # you can add fill=country
geom_bar()
ds %>% filter(year=="2007") %>%
slice(1:20) %>%
ggplot(aes(x=continent, fill=country))+ # you can add fill=country
geom_bar(position="fill") # try position="stack" , position="fill" , position="dodge"
ds %>% filter(year=="2007") %>%
ggplot(aes(x=fct_infreq(continent)))+ # you can add fill=country
geom_bar()
There is no geom_piechart the pie chart is actually a round bar_chart
ds %>% filter(year=="2007") %>%
ggplot(aes(x="",fill=continent))+ # you can add fill=country
geom_bar()+
coord_polar(theta="y")+
theme_void()
ds %>% filter(year=="2007") %>%
count(continent) %>%
ggplot(aes(x="", y=n, fill=continent))+ # you can add fill=country
geom_col()+
geom_text(aes(label = scales::percent(n/sum(n),1)), position = position_stack(vjust = 0.5), size=5) +
coord_polar(theta="y")+
theme_void()
pie_chart<- function(DS){
# DS needs to have two columns at least: FILL (*)character) and n (numeric)
ggplot(DS, aes(x="", y=n, fill=FILL))+ # you can add fill=country
geom_col()+
geom_text(aes(label = scales::percent(n/sum(n),1)), position = position_stack(vjust = 0.5), size=5) +
coord_polar(theta="y")+
theme_void()
}
ds %>% filter(year=="2007") %>%
count(continent) %>%
mutate(FILL=continent) %>%
pie_chart()
pie_chart
## function(DS){
## # DS needs to have two columns at least: FILL (*)character) and n (numeric)
## ggplot(DS, aes(x="", y=n, fill=FILL))+ # you can add fill=country
## geom_col()+
## geom_text(aes(label = scales::percent(n/sum(n),1)), position = position_stack(vjust = 0.5), size=5) +
## coord_polar(theta="y")+
## theme_void()
## }
ds %>% filter(year=="2007") %>% slice(c(1:20)) %>%
group_by(continent) %>%
count(country) %>%
ggplot(aes(x=continent, y=n, fill=country))+
geom_col() # position="stack" or "fill" or "dodge"
ds %>% filter(year=="2007") %>% slice(c(1:20)) %>%
group_by(continent) %>%
count(country) %>%
mutate(total=sum(n)) %>%
ungroup() %>%
ggplot()+
geom_col(aes(x=reorder(continent, -total), y=n, fill=country)) # position="stack" or "fill" or "dodge"
ds %>% filter(year=="2007") %>% slice(c(1:20)) %>%
group_by(continent) %>%
count(country) %>%
ggplot(aes(x=continent, y=n, fill=country))+
geom_col(position="fill") # position="stack" or "fill" or "dodge"
ds %>% filter(year=="2007") %>%
ggplot(aes(x=continent, y=lifeExp))+
geom_boxplot()
ds %>% filter(year=="2007") %>%
ggplot(aes(x=continent, y=lifeExp))+
geom_boxplot()+
geom_jitter(width=0.2)
ds %>%
count(continent) %>%
mutate(total=sum(n), fraction=n/total) %>%
mutate(ymax=cumsum(fraction), ymin=c(0, head(ymax, n=-1)),
labelPosition = (ymax + ymin) / 2,
label=paste0(round(fraction*100,0),"%")) %>%
ggplot(aes(ymax=ymax, ymin=ymin, xmax=3.5, xmin=2.5, fill=continent)) +
geom_rect() +
#expand_limits(x = c(10, 10), y=c(10,10)) +
geom_text( x=4.1, aes(y=labelPosition, label=paste0(continent, "\n", label)), color="black",size=5, fontface = "bold") +
scale_fill_lancet()+
coord_polar(theta="y", direction=-1) +
xlim(c(1.5, 4.1)) +
guides(fill = guide_legend(reverse=T))+
theme_void() +
theme(legend.position = "none", plot.margin=unit(c(0,0,0,00),"lines"))
ds %>% filter(year=="2007") %>%
ggplot(aes(x=fct_rev(fct_infreq(continent))))+ # you can add fill=country
geom_bar()+
geom_text(aes(x= continent, label=..count..), stat='count', color="black", hjust=-0.4)+
coord_flip()
ds %>% filter(year=="2007") %>%
ggplot(aes(x=continent, y=lifeExp))+
geom_boxplot(alpha=0.5)+
stat_summary(fun.y=median, colour="black", geom="text", vjust=-0.7, aes(label=round(..y.., digits=1)))+
stat_summary(fun.y=mean, geom="point", color="red")
## Warning: `fun.y` is deprecated. Use `fun` instead.
## `fun.y` is deprecated. Use `fun` instead.
ds %>% filter(year=="2007") %>%
ggplot(aes(x=gdpPercap, y=lifeExp))+
geom_point()+
stat_smooth(method = "lm", formula='y ~ x', se = T)
ds %>% filter(year=="2007") %>%
ggplot(aes(x=gdpPercap, y=lifeExp))+
geom_point(aes(color=continent))+
labs(title="Title", subtitle = "subtitle", caption="caption", x="GDP per Capita", y="Life Expectency in years", color="CONTINENT")
https://ggplot2.tidyverse.org/reference/ggtheme.html
p<-ds %>% filter(year=="2007") %>%
ggplot(aes(x=gdpPercap, y=lifeExp))+
geom_point(aes(color=continent))
p + theme_classic() + labs(title="theme_classic")
p + theme_void() + labs(title="theme_void")
p + theme_minimal() + labs(title="theme_minimal")
p + theme_bw() + labs(title="theme_bw")
ds %>% filter(year=="2007") %>%
ggplot(aes(x=gdpPercap, y=lifeExp))+
geom_point(aes(color=continent))+
labs(x="GDP per Capita", y="Life Expectency in years", color="CONTINENT")+
theme_classic()+
theme(legend.position = c(0.7,0.3),
axis.text.x=element_text(angle=45, vjust=0.9, hjust=0.9, size=10))
ds %>% filter(year=="2007") %>%
ggplot(aes(x=gdpPercap, y=lifeExp))+
geom_point(aes(color=continent))+
labs(x="GDP per Capita", y="Life Expectency in years", color="CONTINENT")+
theme_classic()+
# scale_alpha_continuous()+
# scale_alpha_manual()+
# scale_y_discrete()+
# scale_y_reverse()+
# scale_y_log10()+
# scale_y_sqrt()+
# scale_y_continuous(labels = scales::percent_format(scale = 1)) # make access percent
scale_x_log10(limits=c(1000, 100000), breaks=c(1000, 10000, 100000), labels=c("1k", "10k", "100k"))+
scale_y_continuous(expand = expansion(mult = c(0.1,0.2)))+ # expand the axis from both sides
scale_color_manual(labels=c("AF", "AM", "AS", "EU", "OC"), values=c("black", "grey", "pink", "red", "#38E54D"))
## Warning: Removed 20 rows containing missing values (geom_point).
library(RColorBrewer)
ds %>% filter(year=="2007") %>%
ggplot(aes(x=gdpPercap, y=lifeExp))+
geom_point(aes(color=continent))+
labs(x="GDP per Capita", y="Life Expectency in years", color="CONTINENT")+
theme_classic()+
scale_color_brewer(palette="Set1")
p<- ds %>% filter(year=="2007") %>%
ggplot(aes(x=gdpPercap, y=lifeExp, size=pop, shape=continent, color=continent))+
geom_point()+
theme_classic()
# show plot
p
# remove one legend:
p + guides(size="none")
# change order of legends:
p + guides(size=guide_legend(order=1),
color=guide_legend(order=2),
shape=guide_legend(order=2))
# change legend parameters, size, keyheight, keywidth, nrow, ncol
p + guides(size=guide_legend(nrow=2),
color=guide_legend(nrow=2, keyheight = 1, keywidth =1, override.aes = list(size = 7)),
shape="none")
ds %>% filter(year=="2007") %>%
ggplot(aes(x=gdpPercap, y=lifeExp, color=continent))+
geom_point()+
theme_classic()+
facet_wrap(~continent)
ds %>% filter(year=="2007") %>%
ggplot(aes(x=gdpPercap, y=lifeExp, color=continent))+
geom_point()+
theme_classic()+
facet_wrap(~continent, scales="free", as.table=T, strip.position = "right")+
theme(strip.background = element_blank(), strip.text = element_blank(), legend.position = c(0.85,0.2))+
labs(title="Free scales and remove strips")
A <- ds %>% filter(year=="2007", continent=="Asia") %>%
ggplot(aes(x=gdpPercap, y=lifeExp))+
geom_point()
B <- ds %>% filter(year=="2007", continent=="Africa") %>%
ggplot(aes(x=gdpPercap, y=lifeExp))+
geom_point()
C <- ds %>% filter(year=="2007", continent=="Europe") %>%
ggplot(aes(x=gdpPercap, y=lifeExp))+
geom_point()
D <- ds %>% filter(year=="2007", continent=="Americas") %>%
ggplot(aes(x=gdpPercap, y=lifeExp))+
geom_point()
ggpubr::ggarrange(A,B,C,D, ncol=2, nrow=2, labels="AUTO")
# labels can be also a vector. try c("a1", "a2", "b", "c")
# if multiple plots have the same legend you can show only one legend using common.legend = T
ds %>% filter(year=="2007") %>%
ggplot(aes(x=gdpPercap, y=lifeExp))+
geom_point(aes(color=continent))
ggsave("Fig1.jpeg", height = 6, width=10, dpi=600)
ds %>%
filter(year==2007) %>%
ggplot(aes(x=gdpPercap, y=lifeExp))+
stat_smooth(method = "loess", se=F, formula= 'y ~ x')+
geom_point(aes(size=pop, color=ifelse(country=="Jordan", "black", continent)))+
ggrepel::geom_text_repel(aes(label=ifelse(country=="Jordan", as.character(country), NA)), vjust=-3, size=5)+
#geom_text()+
theme_classic()+
labs(x="GDP per capita", y="Life Expectancy",
title="Gapminder analysis",
caption=Sys.Date(),
color="Continent")+
theme(title=element_text(size=15, color="red"),
axis.title = element_text(size=15, color="black", hjust=0.5, vjust=100),
axis.text=element_text(size=12, color="blue"),
legend.title = element_text(color="Green"),
legend.text = element_text(size=12),
legend.position = c(0.92,0.4),
legend.background = element_blank())+
scale_x_log10()+
scale_color_brewer(palette="Set1", labels=c("Af", "Am", "As", "Eu", "Au", "Jo"))+
guides(size="none", col=guide_legend(keyheight = 1, keywidth =1, override.aes = list(size = 8)))+
coord_flip()
## Warning: Removed 141 rows containing missing values (geom_text_repel).
add specific notes or lines to the plot
ds %>% filter(year=="2007") %>%
ggplot(aes(x=gdpPercap, y=lifeExp))+
geom_point(aes(color=continent))+
annotate("text", x=10000, y=40, label="test")+
annotate("segment", x=10000, y=41, xend=12500, yend=50, col="red", linetype="dashed")
df<- data.frame(X=1:20, Y=c(1:6, rep(3,14)))
ggplot(df, aes(x=X, y=Y, col=as.factor(X), shape=as.factor(Y)))+
geom_point(size=5)+
scale_x_continuous("Color", breaks=1:20)+
scale_y_continuous("Shape", breaks=1:6)+
guides(color="none")
ds %>%
filter(year==2007) %>%
ggplot(aes(x=gdpPercap, y=lifeExp))+
stat_smooth(method = "loess", se=F, formula= 'y ~ x')+
geom_point(aes(size=pop, color=continent))+
theme_classic()+
ggrepel::geom_text_repel(data=ds %>% filter(year=="2007", pop>1e8), aes(label=country))
ds %>%
filter(country!="Kuwait") %>%
ggplot(aes(x=year, y=gdpPercap, group=country))+
geom_line(color="grey", na.rm=T)+
geom_line(data=ds %>% filter(country %in% c("Jordan","Saudi Arabia")), aes(color=country), na.rm=T)+
theme(legend.position = "none")+
theme_classic()+
geom_text(data = ds %>% filter(year == last(year), country %in% c("Jordan","Saudi Arabia")),
aes(label = country,
x = year + 1,
y = gdpPercap,
color = country), hjust=0) +
guides(color = FALSE) +
scale_x_continuous(breaks = unique(ds$year), expand = expansion(mult = c(0.1,0.3)))+
scale_y_continuous(breaks=10000*0:5, labels=paste0(seq(0, 50, 10), "k"))
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
library(ggsci)
ds %>% filter(year=="2007") %>%
ggplot(aes(x=gdpPercap, y=lifeExp))+
geom_point(aes(color=continent))+
labs(x="GDP per Capita", y="Life Expectency in years", color="CONTINENT")+
theme_classic()+
scale_color_nejm()
# Population pyramid
ds %>%
filter(year=="2007", continent=="Europe") %>%
mutate(Male=pop/2, Female=pop/2) %>%
gather(Sex, n, -c(1:6)) %>%
ggplot(aes(x = ifelse(test = Sex == "Male", yes = -n, no = n), y = reorder(country, -n), fill = Sex)) +
geom_col() +
lemon::scale_x_symmetric(labels = c("40M", "30M", "20M", "10M", "0", "10M", "20M", "30M", "40M"), breaks=seq(-4e7,4e7,1e7)) +
labs(x = "Population")+
scale_fill_brewer(palette = "Set1")+
theme_minimal()+
theme(text=element_text(size=12))+
labs(x="", y="")
Hmisc::getHdata(pbc)
pbc %>%
ggplot()+
geom_bar(aes(x=str_wrap(drug, width = 10)))
plotly::ggplotly(ds %>% filter(year=="2007") %>%
ggplot(aes(x=gdpPercap, y=lifeExp, size=pop, color=continent, label=country))+
geom_point(alpha=0.8))