Some remarks and examples about pyramid and pie plots with ggplot2.
library(ggplot2)
library(RColorBrewer) # for colors
library(xtable) # for godlooking tables
library(reshape2) # for melting data frames
library(plyr) # for subseting data in Pyramid plot
library(knitr)
# function for plot captions
# solution found on:
# https://support.rstudio.com/hc/communities/public/questions/200635448-knitr-fig-cap-with-markdown
# code from:
# https://github.com/yihui/knitr-examples/blob/master/063-html5-figure.Rmd
knit_hooks$set(plot = function(x, options) {
paste('<figure><img src="',
opts_knit$get('base.url'), paste(x, collapse = '.'),
'"><figcaption><center><b>', options$fig.cap, '</b></center></figcaption></figure>',
sep = '')
})
questions <- factor(x = 1:4, labels = c("q1", "q2", "q3", "q4"))
Yes <- c(50, 25, 50, 75)
No <- c(50, 25, 40, NA)
Maybe <- c(NA, 25, 5, 25)
Dont_know <- c(NA, 25, 5, NA)
dt <- data.frame(questions, Yes, No, Maybe, Dont_know)
rm(questions, Yes, No, Maybe, Dont_know)
# printing data table
print(xtable(dt, caption = "<b><i>Data for pie plot</b></i>"),
type = "html",
include.rownames = FALSE,
caption.placement = "top")
| questions | Yes | No | Maybe | Dont_know |
|---|---|---|---|---|
| q1 | 50.00 | 50.00 | ||
| q2 | 25.00 | 25.00 | 25.00 | 25.00 |
| q3 | 50.00 | 40.00 | 5.00 | 5.00 |
| q4 | 75.00 | 25.00 |
# creating position coordinates for label
# making copy of the data frame
position <- dt[1, ]
position <- position[-1, ]
# stupid way to calculate positions
# !!! Must find beter way !!!
for(i in 1:dim(dt)[1]){
position[i, 1] <- dt[i, 1] # just copy of qustion name
position[i, 2] <- dt[i, 2] / 2 # midlle of the first bar
j <- 3
# midlle of the second bar
if(is.na(dt[i, j])){
position[i, j] <- NA
} else {
position[i, j] <- dt[i, 3] / 2 + sum(dt[i, 2:(j-1)], na.rm = TRUE)
}
j <- j +1
# midlle of the third bar
if(is.na(dt[i, j])){
position[i, j] <- NA
} else {
position[i, j] <- dt[i, 4] / 2 + sum(dt[i, 2:(j-1)], na.rm = TRUE)
}
j <- j +1
# midlle of the bar
if(is.na(dt[i, j])){
position[i, j] <- NA
} else {
position[i, j] <- dt[i, 5] / 2 + sum(dt[i, 2:(j-1)], na.rm = TRUE)
}
}
#position <- melt(position, id.vars = c("questions"))
# melting data frame
dt <- melt(dt, id.vars = c("questions"))
# creating label for plot
dt$label <- paste(format(dt$value, nsmall = 1),
"%",
sep = "")
# adding positions of data labels
dt$position <- melt(position, id.vars = c("questions"))[, 3]
# adding position ajustment for data labels
dt$vjust <- 0
dt$vjust[11] <- 1
dt$vjust[15] <- -1
rm(position)
p <- ggplot(data = dt, aes(x = questions, y = value, fill = variable)) # creating main object
p <- p + geom_bar(stat="identity") # adding geom
# adding colors, removing legend titel, and changing legend labels
p <- p + scale_fill_manual(values = brewer.pal(4, "YlOrRd"),
name = "",
labels = c("Yes",
"No",
"Maybe",
"Dont know"))
# changing:
# legend position
# legend label size
# axis label size and color
p <- p + theme(legend.position="bottom",
legend.text = element_text(size = 20),
axis.text = element_text(colour = "black", size = 20),
plot.background = element_rect(fill = "white", colour = "white"),
panel.background = element_rect(fill = "white", colour = "white"))
# adding data labels with modifided positions
p <- p + geom_text(aes(y = dt$position, label = label),
size = 7,
vjust = dt$vjust)
# removing numbers and breaks from axis
p <- p + scale_y_continuous(breaks=NULL,
labels=NULL)
# changing axis
p <- p + coord_flip()
# removing labels from axis
p <- p + ylab("") + xlab("")
# making pie plot
p <- p + coord_polar()
# printing plot
p
The second plot, which I want to save for me for the future is pyramid plot.
I’m not very hapy with it and how it looks.
Answers <- factor(x=1:4, labels = c("Yes", "No", "Maybe", "Dont Know"))
Men <- c(40, 30, 30, 20)
Women <- c(50, 30, 15, 5)
dt <- data.frame(Answers, Men, Women)
rm(Answers, Men, Women)
# printing data table
print(xtable(dt, caption = "<b><i>Data for pyramid plot</b></i>"),
type = "html",
include.rownames = FALSE,
caption.placement = "top")
| Answers | Men | Women |
|---|---|---|
| Yes | 40.00 | 50.00 |
| No | 30.00 | 30.00 |
| Maybe | 30.00 | 15.00 |
| Dont Know | 20.00 | 5.00 |
# meltig data frame
dt <- melt(dt, id.vars=c("Answers"))
# creating labels for plot
dt$label <- paste(format(dt$value, nsmall = 1),
"%",
sep = "")
ggplot(data = dt, aes(x = Answers, fill = variable)) +
geom_bar(stat = "identity", subset = .(variable == "Men"), aes(y = value)) +
geom_text(subset = .(variable == "Men"), aes(y = value, label = label), size = 4, hjust = -0.1) +
geom_bar(stat = "identity", subset = .(variable == "Women"), aes(y=value * (-1)) ) +
geom_text(subset = .(variable == "Women"), aes(y = value * (-1), label = label), size = 4, hjust = 1) +
scale_y_continuous(limits = c(-55, 55), breaks=seq(-50,50,10),labels=abs(seq(-50,50,10))) +
theme(axis.text = element_text(colour = "black"), plot.title = element_text(lineheight=.8) ) +
coord_flip() +
annotate("text", x = 4.3, y = -40, label = "Women", fontfacet = "bold") +
annotate("text", x = 4.3, y = 40, label = "Men", fontfacet = "bold") +
ylab("") + xlab("") + guides(fill=FALSE)
## Warning: Stacking not well defined when ymin != 0