- Clarity of Data
- Trellis Displays
- Clarity of Plot Elements
- Clarity of Understanding
- Scales & Axes
- Summary of Principles
Spring 2020
Good plots should:
MyData = data.frame(x=rnorm(15), y=rnorm(15))
ggplot(MyData,aes(x,y)) + geom_point(size=5) +
theme_bw() +
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
text = element_text(size=20, family="Times")) +
geom_rug(color="gray")
library(dplyr)
MyData = data.frame(state.x77[3:8,])
MyData$Name = rownames(MyData)
ggplot(MyData, aes(x=reorder(Name,-Area),y=Area)) +
geom_bar(stat="identity", fill="darkred") +
geom_hline(yintercept=seq(from=0,to=150000,by=25000), color="white", size=1.25) +
theme_bw() +
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
text = element_text(size=20, family="Times")) +
xlab("State") +
ylab("Area (square miles)")
WineData<-read.csv ("http://eecs.ucf.edu/~wiegand/ids6938/datasets/r-lab1.dat")
ggplot(WineData, aes(Alcohol, WineType)) +
geom_point(shape=21, size=5) +
theme(text = element_text(size=20, family="Times"))
ggplot(WineData, aes(Alcohol, WineType)) + geom_jitter(shape=21, size=5) + theme(text = element_text(size=20, family="Times"))
ggplot(WineData, aes(Alcohol, WineType, fill=WineType)) + geom_jitter(shape=21, size=5) + theme(text = element_text(size=20, family="Times"))
ggplot(WineData, aes(y=Alcohol, x=WineType, fill=WineType)) + geom_boxplot() + coord_flip() + theme(text = element_text(size=20, family="Times"))
MyData = data.frame(x=rnorm(1500), y=rnorm(1500)) ggplot(MyData, aes(x,y)) + geom_point(size=5, alpha=0.4, color="steelblue") + theme(text = element_text(size=20, family="Times"))
ggplot(MyData, aes(x,y)) + geom_point(size=2, color="steelblue") + theme(text = element_text(size=20, family="Times"))
n=40
MyData = data.frame(x=c(rnorm(n),0.6*rnorm(n)+1,1.1*rnorm(n)-1),
y=c(rnorm(n),0.6*rnorm(n)+1,1.1*rnorm(n)-1),
Type=c(rep("Thing A",n),rep("Thing B",n),rep("Thing C",n)))
ggplot(MyData, aes(x,y,color=Type,shape=Type)) + geom_point(size=5)
In ggplot we create a trellis using a facet and a model
bwt = birthwt$bwt
smoke = factor(c("Mother Didn't Smoke","Mother Smoked")[birthwt$smoke+1])
MyData = data.frame(bwt,smoke)
ggplot(MyData, aes(x=bwt)) +
geom_histogram(fill="white",color="black", binwidth=500) +
facet_grid(smoke ~ .) +
xlab("Birth Weight") +
ylab("Count") +
theme(text=element_text(size=20, family="Times"))
ggplot(mpg, aes(x=displ, y=hwy)) +
geom_point(size=3) +
facet_grid(drv ~ class) +
xlab("Engine Displacement (liters)") +
ylab("MPG on Highway") +
theme(text=element_text(size=20, family="Times"))
ggplot(mpg, aes(x=manufacturer,y=hwy)) +
geom_bar(stat="identity") +
facet_grid(year ~ .) +
xlab("Car Manufacturer") +
ylab("MPG on the Highway") +
theme(text=element_text(size=20, family="Times"))
Cylinders=mtcars$cyl
MPG=mtcars$mpg
Gears=factor(paste(mtcars$gear,"Gears"))
MyData = data.frame(MPG,Cylinders,Gears)
ggplot(MyData, aes(x=factor(Cylinders), y=MPG)) +
geom_boxplot() +
facet_grid(Gears ~ .) +
xlab("Number of Engine Cylinders") +
ylab("Average MPG") +
theme(text=element_text(size=20, family="Times"))
ggplot(data.frame(state.x77), aes(x=Population, y=Income)) +
theme(text = element_text(size=20, family="Times")) +
geom_text(aes(label=state.abb),hjust=-0.25,size=4) +
ggtitle("State Income vs. Population")
ggplot(longley, aes(x=Year, y=Armed.Forces)) +
geom_rect(xmin=1950.2,xmax=1953.5,ymin=140,ymax=370,fill="lightgreen",alpha=0.03) +
geom_text(x=1951.55,y=366,label="Korean War", color="darkgreen") +
geom_line(size=1.5) +
theme(text=element_text(size=20, family="Times")) +
ylab("Number of People in Armed Forces (thousands)") +
ylim(c(150,360))
library(dplyr)
library(ggplot2)
crime <- read.csv('Crime/fbi-crime-1996-2015.csv', header=T)
ggplot(arrange(crime, Year), aes(x=Year, y=Murder.and..nonnegligent..manslaughter..rate.)) +
geom_line(size=1.35, col="darkblue") +
geom_point(size=4, shape=21, fill="white", color="darkblue") +
ylab("Murder & Manslaughter Rate in U.S (per 100K people)") +
theme(text=element_text(family="Times", size=16)) +
geom_hline(yintercept=7.7, linetype="dashed", color="black") +
annotate("text",2005,7.75,vjust=0,label="Murder Rate of Bolivia, 2011", color="black") +
geom_hline(yintercept=3.7, linetype="dashed", color="black") +
annotate("text",2005,3.75,vjust=0,label="Murder Rate of Chile, 2011", color="black") +
annotate("text", 1996, 4.5, hjust=0, label="The U.S. Murder Rate was the lowest in the last\n100 years in 1955, when it was 4.5 per 100K people") +
ggtitle("Murder in the U.S. is at an Historic Low")
With other types of graph, begin and end a little above or below the extrema
Make intervals on the scale easy numbers to understand (e.g., round numbers)
Good visualization software will make following such rules easy
Pie Chart of Loans
Positive, but negative bar chart
|
|
|
When data contains a few very extreme values, it can be difficult to display data appropriately. Things to consider:
|
|
|