- Color, legends, and labeling
- Basic principles of part-to-whole
- Examples of basic methods to visualize proportions
- Additional methods to visualize proportions
- Interactive visualizations
Summer 2020
For information about changing legends, see: http://www.cookbook-r.com/Graphs/Legends_(ggplot2)
For general ggplot2 color information, see: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)
For more information about RColorBrewer, see: http://earlglynn.github.io/RNotes/package/RColorBrewer/index.html
library(ggplot2) ggplot(data=PlantGrowth, aes(x=group, y=weight, fill=group)) + geom_boxplot() + theme(text=element_text(size=18,family="Times"))
library(ggplot2)
ggplot(data=PlantGrowth, aes(x=group, y=weight, fill=group)) +
geom_boxplot() +
scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"),
name="Experimental\nCondition",
breaks=c("ctrl", "trt1", "trt2"),
labels=c("Control", "Treatment 1", "Treatment 2")) +
xlab("") + ylab("Weight (g)") +
theme(text=element_text(size=18,family="Times"))
myMlotObj + guides(fill = guide_legend(reverse=TRUE)) myMlotObj + scale_fill_discrete(guide = guide_legend(reverse=TRUE)) myMlotObj + scale_fill_discrete(breaks = rev(levels(PlantGrowth$group)))
library(ggplot2)
df2 = read.table('http://eecs.ucf.edu/~wiegand/idc6700/datasets/color-cookbook-eg.txt', header=T)
ggplot(df2, aes(x=cond1, y=yval)) +
geom_line(aes(colour=cond2, group=cond2)) +
geom_point(aes(colour=cond2), size=3) +
theme(text=element_text(size=18,family="Times"))
## List of 1 ## $ text:List of 11 ## ..$ family : chr "Times" ## ..$ face : NULL ## ..$ colour : NULL ## ..$ size : num 18 ## ..$ hjust : NULL ## ..$ vjust : NULL ## ..$ angle : NULL ## ..$ lineheight : NULL ## ..$ margin : NULL ## ..$ debug : NULL ## ..$ inherit.blank: logi FALSE ## ..- attr(*, "class")= chr [1:2] "element_text" "element" ## - attr(*, "class")= chr [1:2] "theme" "gg" ## - attr(*, "complete")= logi FALSE ## - attr(*, "validate")= logi TRUE
library(ggplot2)
df2 = read.table('http://eecs.ucf.edu/~wiegand/idc6700/datasets/color-cookbook-eg.txt', header=T)
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
ggplot(df2, aes(x=cond1, y=yval)) +
geom_line(aes(color=cond2, group=cond2), size=1.15) +
geom_point(aes(color=cond2), size=4) +
scale_color_manual(values=cbPalette,
breaks=c("J","I","K","L"),
name="Condition 2") +
xlab("Condition 1") + ylab("y-Value")
theme(text=element_text(size=18,family="Times"))
## List of 1 ## $ text:List of 11 ## ..$ family : chr "Times" ## ..$ face : NULL ## ..$ colour : NULL ## ..$ size : num 18 ## ..$ hjust : NULL ## ..$ vjust : NULL ## ..$ angle : NULL ## ..$ lineheight : NULL ## ..$ margin : NULL ## ..$ debug : NULL ## ..$ inherit.blank: logi FALSE ## ..- attr(*, "class")= chr [1:2] "element_text" "element" ## - attr(*, "class")= chr [1:2] "theme" "gg" ## - attr(*, "complete")= logi FALSE ## - attr(*, "validate")= logi TRUE
library(ggplot2)
library(RColorBrewer)
df2 = read.table('http://eecs.ucf.edu/~wiegand/idc6700/datasets/color-cookbook-eg.txt', header=T)
ggplot(df2, aes(x=cond1, y=yval)) +
geom_line(aes(color=cond2, group=cond2), size=1.15) +
geom_point(aes(color=cond2), size=4) +
scale_color_brewer(palette="Set1",
breaks=c("J","I","K","L"),
name="Condition 2") +
xlab("Condition 1") + ylab("y-Value")
theme(text=element_text(size=18,family="Times"))
library(ggplot2)
myScatterData = data.frame(Meeples=runif(20),
Furples=runif(20),
Troddles=runif(20))
ggplot(myScatterData, aes(x=Meeples, y=Furples, fill=Troddles)) +
geom_point(size=4, shape=21) +
scale_fill_gradient(low="yellow", high="red") +
theme(text=element_text(size=18,family="Times"))
library(ggplot2)
myScatterData = data.frame(Meeples=runif(20),
Furples=runif(20),
Troddles=runif(20))
ggplot(myScatterData, aes(x=Meeples, y=Furples, size=Troddles)) +
geom_point() +
scale_size_continuous(range=c(1,20)) +
theme(text=element_text(size=18,family="Times"))
There are a variety of typical part-to-whole patterns:
library(ggplot2)
ggplot(mpg, aes(x=class)) +
geom_bar() +
xlab("Vehicle Class") +
ylab("Count") +
ggtitle("Vehicles of Different Classes, 1999-2008") +
theme(text=element_text(size=18,family="Times"))
library(ggplot2)
library(RColorBrewer)
numLevels = length(levels(factor(mpg$class)))
classPalette = colorRampPalette(brewer.pal(5,"Blues"))(numLevels)
ggplot(mpg, aes(x=factor(1),fill=class)) +
geom_bar() +
scale_fill_manual(values=classPalette) +
xlab("Vehicle Class") +
ylab("Count") +
ggtitle("Vehicles of Different Classes, 1999-2008") +
theme(text=element_text(size=18,family="Times"))
library(ggplot2)
library(dplyr)
mpg.ordered = summarise(group_by(mpg, class), Count=length(class))
ggplot(mpg.ordered, aes(x=reorder(class, -Count), y=Count)) +
geom_bar(stat="identity", fill="darkblue") +
xlab("Vehicle Class") +
ylab("Count") +
ggtitle("Vehicles of Different Classes, 1999-2008") +
theme(text=element_text(size=18,family="Times"))
library(ggplot2)
library(dplyr)
totalCount = dim(mpg)[1]
mpg.ordered = summarise(group_by(mpg, class), Count=length(class))
mpg.ordered = arrange(mpg.ordered, -Count)
mpg.ordered$Percentage = 100 * mpg.ordered$Count / totalCount
mpg.ordered$Remainder = 100 * cumsum(mpg.ordered$Count) / totalCount
ggplot(mpg.ordered, aes(x=reorder(class, -Count), y=Percentage)) +
geom_bar(stat="identity", fill="darkgray") +
geom_path(aes(y=Remainder, group=1), size=1.25, color="firebrick") +
geom_point(aes(y=Remainder), size=4) +
xlab("Vehicle Class") +
ylab("Percentage") +
ggtitle("Vehicles of Different Classes, 1999-2008") +
theme(text=element_text(size=18,family="Times"))
library(ggplot2)
library(RColorBrewer)
numLevels = length(levels(factor(mpg$class)))
classPalette = colorRampPalette(brewer.pal(5,"Blues"))(numLevels)
ggplot(mpg, aes(x="",fill=class)) +
geom_bar() + coord_polar(theta="y") +
scale_fill_manual(values=classPalette) +
xlab("") +
ylab("") +
ggtitle("Vehicles of Different Classes, 1999-2008") +
theme(text=element_text(size=18,family="Times"),
axis.ticks.y = element_blank(),
axis.text.y = element_blank())
library(ggplot2)
library(RColorBrewer)
library(reshape2)
# Create the color ramp
numLevels = length(levels(factor(mpg$class)))
classPalette = colorRampPalette(brewer.pal(5,"Blues"))(numLevels)
# Create the totals for the rectangle y positions
classCounts = mutate(melt(table(mpg$class), value.name="Count"),
ymax=cumsum(Count))
classCounts$ymin = c(0, head(classCounts$ymax, length(classCounts$ymax)-1))
# Plot this thing
ggplot(classCounts, aes(fill=Var1, ymin=ymin, ymax=ymax, xmin=3, xmax=4)) +
geom_rect() +
coord_polar(theta="y") +
scale_fill_manual(values=classPalette, name="Car Class") +
xlim(c(0,4)) +
xlab("") +
ylab("") +
ggtitle("Vehicles of Different Classes, 1999-2008") +
theme(text=element_text(size=18,family="Times"),
axis.ticks.y = element_blank(),
axis.text.y = element_blank())
library(ggplot2)
library(RColorBrewer)
numLevels = length(levels(factor(mpg$class)))
classPalette = colorRampPalette(brewer.pal(5,"Blues"))(numLevels)
ggplot(mpg, aes(x=drv,fill=class)) +
geom_bar(position="fill") +
scale_fill_manual(values=classPalette) +
xlab("Drive") +
ylab("Ratio") +
ggtitle("Vehicles of Different Classes & Drives, 1999-2008") +
theme(text=element_text(size=18,family="Times"))
library(ggplot2)
labels = c("4"="Four Wheel Drive", "f"="Front Wheel Drive", "r"="Rear Wheel Drive")
ggplot(mpg, aes(x=class)) +
geom_bar() + coord_flip() +
facet_grid(facets=drv ~ ., labeller=labeller(drv=labels)) +
xlab("Vehicle Class") +
ylab("Count") +
ggtitle("Vehicles of Different Classes & Drives, 1999-2008") +
theme(text=element_text(size=18,family="Times"))
library(ggplot2)
library(RColorBrewer)
numLevels = length(levels(factor(mpg$class)))
classPalette = colorRampPalette(brewer.pal(5,"Blues"))(numLevels)
labels = c("4"="Four Wheel Drive", "f"="Front Wheel Drive", "r"="Rear Wheel Drive")
ggplot(mpg, aes(x=factor(1),fill=class)) +
geom_bar(position="fill") + coord_polar(theta="y") +
facet_grid(facets=. ~ drv, labeller=labeller(drv=labels)) +
scale_fill_manual(values=classPalette) +
xlab("Drive") +
ylab("") +
ggtitle("Vehicles of Different Classes, 1999-2008") +
theme(text=element_text(size=18,family="Times"),
axis.ticks.y = element_blank(),
axis.text.y = element_blank())
library(ggplot2)
library(RColorBrewer)
library(gcookbook) # for uspopage
ggplot(uspopage, aes(x=Year, y=Thousands, fill=AgeGroup)) +
geom_area(color="black", size=0.2, alpha=0.4, position="fill") +
scale_fill_brewer(palette="Blues", breaks=rev(levels(uspopage$AgeGroup))) +
theme(text=element_text(size=18,family="Times")) +
ylab("Proportion of People in Age Group") +
ggtitle("Proportion of People of Different Ages in US, 1900-2000")
library(ggplot2)
library(dplyr)
library(reshape2)
library(RColorBrewer)
# Variables to setup the filter
semester="fall"
year=2017
courseID="DIG5876"
instructorName="Rudolf Wiegand"
# Get the data
offset =c("spring"=0, "summer"=10, "fall"=20)
semesterID = 30*(year-1996)+960 + offset[semester]
dataFile = paste('Data/ucfspi',semesterID,'.data', sep='')
spi <- read.delim(dataFile, sep='#', header=F)
rpwSPI = filter(spi, InstructorName == instructorName & CourseID == courseID)
# Column/Variable Names
colnames(spi) <- c("College", "Department", "InstructorEmail", "InstructorName",
"CourseName", "CourseID", "CourseSuffix", "SemesterID",
"n", "Organization", "Expectations", "Communicating",
"Respect", "Interest", "LearningEnv", "PerformanceFeedback",
"AchieveObjectives", "OverallEffectiveness", "NotUsed")
# Put the data in long form
rpwSPI.melt = melt(rpwSPI, id=1:9)
# We only care about the questions and make sure the responses
# are treated like an ordered factor (ordinal data), not numeric
rpwSPI.corrected = mutate(filter(rpwSPI.melt, variable != "NotUsed"),
Responses = factor(value, levels=factor(5:1),
ordered=T))
# Tally up all 1's, 2's, 3's, 4's, and 5's for each question
rpwSPI.counts = summarise(group_by(rpwSPI.corrected, Responses, variable),
Count=length(Responses))
# Get overall counts in order to obtain percentages
rpwSPI.totals = summarise(group_by(rpwSPI.corrected, variable),
Count=length(variable))
overallNN = max(rpwSPI.totals$Count)
# Setup some question labels
questions = c("Effectiveness organizing the course",
"Effectiveness explaining course requirements,\n grading criteria, and expectations",
"Effectiveness communicating ideas\n and/or information",
"Effectiveness showing respect and\n concern for student",
"Effectiveness stimulating interest\n in the course",
"Effectiveness creating an environment\n that helps students learn",
"Effectiveness giving useful feedback\n on course performance",
"Effectiveness helping students\n achieve course objectives",
"Overall effectiveness of the instructor")
# Setup legend texts and palette
responseText = c("Strongly Agree",
"Agree",
"No Opinion",
"Disagree",
"Strongly Disagree")
likertPalette = c("#336633", # Strongly Agree, 5
"#99CC99", # Agree, 4
"#999999", # No Opinion, 3
"#FF6666", # Disagree, 2
"#FF3333") # Strongly Disagree, 1
# The plot!
ggplot(rpwSPI.counts, aes(x=variable, y=100*Count/overallNN, fill=Responses)) +
geom_bar(stat="identity") +
geom_hline(yintercept=c(0,25,50,75,100), color="white", size=1) +
scale_fill_manual(values=likertPalette,
name = "",
breaks = 5:1,
labels=responseText) +
scale_x_discrete(labels=questions) +
coord_flip() +
ylab("Percentage") +
xlab("") +
guides(fill = guide_legend(reverse=T)) +
theme_bw() +
theme(text=element_text(family="Times", size=18),
axis.text.y = element_text(size=16, hjust=1),
legend.position = "bottom",
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
library(portfolio)
posts <- read.csv("http://datasets.flowingdata.com/post-data.txt")
map.market(id=posts$id,
area=posts$views,
group=posts$category,
color=posts$comments,
main="FlowingData Map")
library(tm)
library(wordcloud)
rpwDissertationURL = "http://cs.ucf.edu/~wiegand/ids6938/datasets/rpw-diss.txt"
dissertation = Corpus(URISource(rpwDissertationURL))
# Take out all white space and change everything to lower case
dissertation = tm_map(dissertation,stripWhitespace)
dissertation = tm_map(dissertation, tolower)
# Remove unnecessary and redundant words with slight suffix variations
dissertation = tm_map(dissertation, removeWords, stopwords("english"))
dissertation = tm_map(dissertation, stemDocument)
# Make sure it is really a text document
dissertation = tm_map(dissertation, PlainTextDocument)
wordcloud(dissertation,
scale=c(5,0.5),
max.words=150,
random.order=FALSE,
rot.per=0.35)
library(ggplot2)
library(dplyr)
library(mapproj)
# Extract reference data
mapstates <- map_data("state")
# Get the electoral data
elec2008URL = "http://www.electoral-vote.com/evp2008/Pres/Final-2008.csv"
elec2008 = mutate(read.csv(elec2008URL),
region=tolower(as.character(State)))
# Merge the data for the chloropleth
plotableCountyData = merge(mapstates, elec2008)
ggplot(plotableCountyData, aes(long,lat,group=group)) +
geom_polygon(aes(fill=Obama.Pct)) +
scale_fill_gradient(low="white",high="darkblue", limits=c(0,100)) +
coord_map(project="globular") +
geom_path(data = mapstates, colour = "white", size = .75, alpha = .1) +
ggtitle("Who Voted for Obama in 2008?")
library(ggplot2)
library(dplyr)
library(mapproj)
# Extract reference data
mapstates <- map_data("state")
# Get the electoral data
elec2008URL = "http://www.electoral-vote.com/evp2008/Pres/Final-2008.csv"
elec2008 = mutate(read.csv(elec2008URL),
region=tolower(as.character(State)))
# Merge the data for the chloropleth
plotableCountyData = merge(mapstates, elec2008)
ggplot(plotableCountyData, aes(long,lat,group=group)) +
geom_polygon(aes(fill=Obama.Pct)) +
scale_fill_gradient(low="white",high="darkblue", limits=range(plotableCountyData$Obama.Pct)) +
coord_map(project="globular") +
geom_path(data = mapstates, colour = "white", size = .75, alpha = .1) +
ggtitle("Who Voted for Obama in 2008?")