We will be doing simplified reproductions of figures found in Storytelling with Data. When we learn more about ggplot we can personalize and polish our plots more.
To create a heatmapped table you might want to use the following packages: ztable or tidyverse.
I had a lot of difficult with installing ztable. So I will recreate heatmapped tables using the tiling feature of ggplot2.
#### Load the example data
bars<-matrix(c(4, 3, 1, 1,
5, 6, 3, 1,
4, 5, 1, 2,
4, 5, 3, 2,
7, 6, 5, 3), nrow=5, byrow=TRUE)
rownames(bars)<-c("Cat1", "Cat2", "Cat3", "Cat4", "Cat5")
colnames(bars)<-c("A", "B", "C", "D")
bars
## A B C D
## Cat1 4 3 1 1
## Cat2 5 6 3 1
## Cat3 4 5 1 2
## Cat4 4 5 3 2
## Cat5 7 6 5 3
I use the melt function to establish observations are row and variables as columns.
#install.packages("reshape2")
library(reshape2)
## Warning: package 'reshape2' was built under R version 3.6.2
barsM<-melt(bars)
# Now look at the data
head(barsM)
## Var1 Var2 value
## 1 Cat1 A 4
## 2 Cat2 A 5
## 3 Cat3 A 4
## 4 Cat4 A 4
## 5 Cat5 A 7
## 6 Cat1 B 3
library(tidyverse)
ggplot(barsM, aes(Var1, Var2)) + # x and y axes => Var1 and Var2
geom_tile(aes(fill = value)) + # background colours are mapped according to the value column
geom_text(aes(fill = value, label = round(value, 2))) +
scale_fill_gradient2(low = "white",
high = "forestgreen")
## FIG 2.5
heat<-read.delim("/Users/hsmalley/Downloads/swdFig2.5.txt",
header = TRUE)
rownames(heat)<-heat[,1]
heat<-heat[,-1]
headM<-melt(as.matrix(heat))
# Relevel categories in reverse
headM$Var1<-ordered(headM$Var1, levels = rev(levels(headM$Var1)))
ggplot(headM, aes(Var1, Var2)) + # x and y axes => Var1 and Var2
geom_tile(aes(fill = value)) + # background colours are mapped according to the value column
geom_text(aes(label = round(value, 2))) +
scale_fill_gradient2(low = "white",
high = "forestgreen")+
coord_flip()
## NOTE: You can also use the gather function in tidyr to do the manipulation
heat<-read.delim("/Users/hsmalley/Downloads/swdFig2.5.txt",
header = TRUE)
headG<-heat%>%
gather("type", "percent", -X)
Here is a simple example where text would suffice:
## RECREATE FIG2.2
mothers<-read.csv("https://raw.githubusercontent.com/adamribaudo/storytelling-with-data-ggplot/master/data/FIG0202-3.csv",
header=TRUE,
stringsAsFactors = FALSE)
# geom_col used when counts instead of indiv obs
ggplot(mothers, aes(x=as.factor(Year), y=Value))+
geom_col()+
xlab("Year")+
ylab("Percent")+
theme_minimal()+
ggtitle("Pecent Stay-at-Home")
## RECREATE FIG2.6 and FIG2.7
drive<-read.csv("https://raw.githubusercontent.com/adamribaudo/storytelling-with-data-ggplot/master/data/FIG0206-7.csv",
header=TRUE,
stringsAsFactors = FALSE)
str(drive)
## 'data.frame': 36 obs. of 2 variables:
## $ Miles.Driven : chr "1,100" "1,177" "1,239" "1,294" ...
## $ Cost.Per.Mile: chr "$2.40" "$2.80" "$2.20" "$2.50" ...
drive$Cost.Per.Mile2<-as.numeric(gsub("[\\$,]", "", drive$Cost.Per.Mile))
drive$Miles.Driven2<-as.numeric(gsub("[\\$,]", "", drive$Miles.Driven))
str(drive)
## 'data.frame': 36 obs. of 4 variables:
## $ Miles.Driven : chr "1,100" "1,177" "1,239" "1,294" ...
## $ Cost.Per.Mile : chr "$2.40" "$2.80" "$2.20" "$2.50" ...
## $ Cost.Per.Mile2: num 2.4 2.8 2.2 2.5 1.9 2 2.2 1.35 2 1.3 ...
## $ Miles.Driven2 : num 1100 1177 1239 1294 1378 ...
above<-drive%>%
mutate(above=(Cost.Per.Mile2>=mean(Cost.Per.Mile2)))
ggplot(above, aes(Miles.Driven2, Cost.Per.Mile2, color=above))+
geom_point()+
geom_hline(yintercept=1.5, linetype="dashed", size=1)+
geom_point(aes(x=mean(Miles.Driven2), y=mean(Cost.Per.Mile2)),color="black", size=5)+
xlab("Miles driven per month")+
ylab("Cost per mile")+
theme_minimal()+
theme(legend.position = "none")+
ggtitle("Scatterplot: Miles Driven vs Cost Per Mile")
In this example, we need to use a date type variable. This can be done with the package lubridate.
## RECREATE FIG2.8
#install.packages("lubridate")
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.2
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
lineEx<-read.csv("https://raw.githubusercontent.com/adamribaudo/storytelling-with-data-ggplot/master/data/FIG0209.csv",
header=TRUE,
stringsAsFactors = FALSE)
lineDate<-lineEx%>%
mutate(date = ymd(paste(Year, Month, 1)))
ggplot(lineDate, aes(date, Avg))+
geom_line()+
theme_minimal()
Warning: These data are not tidy. Use the gather function!
## RECREATE FIG2.10
slope<-read.csv("https://raw.githubusercontent.com/adamribaudo/storytelling-with-data-ggplot/master/data/FIG0210-11.csv",
header=TRUE,
stringsAsFactors = FALSE)
# needs some transformation
colnames(slope)<-c("Item", 2014, 2015)
slopeT<-slope%>%
gather("year", "percent", -Item)
slopeT$percent2<-as.numeric(gsub("[\\%]", "", slopeT$percent))/100
ggplot(slopeT, aes(year, percent2, group=Item))+
geom_line()+
theme_minimal()
Let’s create some fake data for this:
# This is how the book does it
bars<-matrix(c(4, 3, 1, 1,
5, 6, 3, 1,
4, 5, 1, 2,
4, 5, 3, 2,
7, 6, 5, 3), nrow=5, byrow=TRUE)
bars<-cbind(c("Cat1", "Cat2", "Cat3", "Cat4", "Cat5"),
bars)
colnames(bars)<-c("Category","A", "B", "C", "D")
# But we need to make it tidy
barsT<-as.data.frame(bars)%>%
gather("Letter", "Count", 2:5)
## Warning: attributes are not identical across measure variables;
## they will be dropped
barsT$Count<-as.numeric(barsT$Count)
ggplot(barsT, aes(x=Category, y = Count)) +
geom_bar(stat="identity")
ggplot(barsT, aes(x=Category, y = Count, fill=Letter)) +
geom_bar(stat="identity")
ggplot(barsT, aes(x=Category, y = Count, fill=Letter)) +
geom_bar(stat="identity", position="fill")
ggplot(barsT, aes(x=Category, y = Count, fill=Letter)) +
geom_bar(stat="identity", position="dodge")
Any of the plots above can be made horizontal!
ggplot(barsT, aes(x=Category, y = Count)) +
geom_bar(stat="identity")+
coord_flip()
Since we are learning how to make graphics for the first time. I suggest using this helpful tool!
#install.packages("esquisse")
library(esquisse)
## MOTHER EXAMPLE
mothers<-read.csv("https://raw.githubusercontent.com/adamribaudo/storytelling-with-data-ggplot/master/data/FIG0202-3.csv",
header=TRUE,
stringsAsFactors = FALSE)
## MILES DRIVEN
drive<-read.csv("https://raw.githubusercontent.com/adamribaudo/storytelling-with-data-ggplot/master/data/FIG0206-7.csv",
header=TRUE,
stringsAsFactors = FALSE)
drive$Cost.Per.Mile2<-as.numeric(gsub("[\\$,]", "", drive$Cost.Per.Mile))
drive$Miles.Driven2<-as.numeric(gsub("[\\$,]", "", drive$Miles.Driven))
above<-drive%>%
mutate(above=(Cost.Per.Mile2>=mean(Cost.Per.Mile2)))
## PASSPORT WAIT
#install.packages("lubridate")
library(lubridate)
lineEx<-read.csv("https://raw.githubusercontent.com/adamribaudo/storytelling-with-data-ggplot/master/data/FIG0209.csv",
header=TRUE,
stringsAsFactors = FALSE)
lineDate<-lineEx%>%
mutate(date = ymd(paste(Year, Month, 1)))
## EMPLOYEE FEEDBACK
slope<-read.csv("https://raw.githubusercontent.com/adamribaudo/storytelling-with-data-ggplot/master/data/FIG0210-11.csv",
header=TRUE,
stringsAsFactors = FALSE)
# needs some transformation
colnames(slope)<-c("Item", 2014, 2015)
slopeT<-slope%>%
gather("year", "percent", -Item)
slopeT$percent2<-as.numeric(gsub("[\\%]", "", slopeT$percent))/100
### MEALS
meals<-read.delim("/Users/hsmalley/Downloads/meals.txt",
header = TRUE)
str(meals)
## 'data.frame': 10 obs. of 2 variables:
## $ Campaign.Year: int 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019
## $ Meals.Served : int 40139 127020 168193 153115 202102 232897 277912 205350 233389 232797
ggplot(meals, aes(y=Campaign.Year, x=1)) + # x and y axes
geom_tile(aes(fill = Meals.Served)) + # background colours are mapped according to the value column
geom_text(aes(label = round(Meals.Served, 2))) +
scale_fill_gradient2(low = "white",
high = "forestgreen")+
theme_minimal()
ggplot(meals, aes(x=Campaign.Year, y=Meals.Served))+
geom_bar(stat="identity")+
theme_minimal()
ggplot(meals, aes(x=Campaign.Year, y=Meals.Served))+
geom_line()+
theme_minimal()