Flowing Data tutorial link: http://flowingdata.com/2010/11/23/how-to-make-bubble-charts/
This is not a very intuitive visualization of the data provided, however, it is an interesting chart type.
Hans Rosling TED Talk: https://www.ted.com/talks/hans_rosling_shows_the_best_stats_you_ve_ever_seen?language=en
getwd()## [1] "/Users/hollyjones/Desktop/FlowingData"
setwd("/Users/hollyjones/Desktop/FlowingData/bubbles")
#data on crime rates by state in 2008
#one caveat - crime rates are for 2005 while population numbers are for 2008
crime = read.csv("crimeRatesByState2008.csv", header=TRUE, sep="\t")
head(crime)## state murder Forcible_rate Robbery aggravated_assult burglary
## 1 Alabama 8.2 34.3 141.4 247.8 953.8
## 2 Alaska 4.8 81.1 80.9 465.1 622.5
## 3 Arizona 7.5 33.8 144.4 327.4 948.4
## 4 Arkansas 6.7 42.9 91.1 386.8 1084.6
## 5 California 6.9 26.0 176.1 317.3 693.3
## 6 Colorado 3.7 43.4 84.6 264.7 744.8
## larceny_theft motor_vehicle_theft population
## 1 2650.0 288.3 4627851
## 2 2599.1 391.0 686293
## 3 2965.2 924.4 6500180
## 4 2711.2 262.1 2855390
## 5 1916.5 712.8 36756666
## 6 2735.2 559.5 4861515
symbols(crime$murder, crime$burglary, circles = crime$population, main= "Default Chart")
#the default chart features bubbles that are disproportionately large
#this is because circles are sized by RADIUS instead of AREA
#TO SIZE THE RADII CORRECTLY: use the equation for the area of a circle = pie*r^2
#Thus, RADIUS = sqrt(area/pie)
radius = sqrt(crime$population/pi)
#GRAPHIC PARAMETERS for "symbols" - by deafult, "symbols" scales the largest bubble to 1 inch
#all remaining bubbles are scaled relative to the largest 1 inch bubble. To change this use the "inches" arg
symbols(crime$murder, crime$burglary, circles = radius, inches = 0.35, bg = "red", xlab = "Murder Rate", ylab = "Burglary Rate", main = "Burglary vs. Murder per 100,000 People")
#Annotate the chart
text(crime$murder, crime$burglary, crime$state, cex = 0.85)Need to edit the above plot in an open-source version of Adobe Illustrator in order to clean up the text annotations and add additional details, such as a legend. (Export the image as a PDF and proceed to edit).
Flowing Data tutorial link: http://flowingdata.com/2012/10/04/more-on-making-heat-maps-in-r/
getwd()## [1] "/Users/hollyjones/Desktop/FlowingData"
setwd("/Users/hollyjones/Desktop/FlowingData/heatmaps")
dir()## [1] "2008-09nba.trunc.csv" "heatmaps-custom.R" "ray.png"
#NBA play-by-play data (DRB, 3pa, STL, AST, MIN, etc.) for leading scorers
#with X and Y coordinates for the 2008-2009 season - very interesting!!!
nba = read.csv("2008-09nba.trunc.csv", header=TRUE, sep = ",")
head(nba)## team etype player points type x y
## 1 OFF jump ball NA NA NA
## 2 BOS foul Rajon Rondo NA shooting NA NA
## 3 CLE free throw Delonte West NA NA NA
## 4 CLE free throw Delonte West NA NA NA
## 5 BOS shot Kevin Garnett NA jump 9 7
## 6 CLE rebound LeBron James NA def NA NA
tail(nba)## team etype player points type x y
## 510989 WAS shot Juan Dixon NA 3pt 17 30
## 510990 BOS rebound Glen Davis NA def NA NA
## 510991 BOS shot Glen Davis 3 3pt 2 6
## 510992 WAS shot Caron Butler 3 3pt 41 26
## 510993 BOS shot Glen Davis NA 3pt 25 77
## 510994 BOS rebound NA NA NA
shots = nba[nba$etype == "shot", ]
head(shots)## team etype player points type x y
## 5 BOS shot Kevin Garnett NA jump 9 7
## 7 CLE shot LeBron James 2 pullup jump 39 20
## 8 BOS shot Paul Pierce 2 slam dunk 26 6
## 9 CLE shot LeBron James NA 3pt 32 30
## 11 BOS shot Kevin Garnett 2 slam dunk 25 6
## 12 CLE shot Zydrunas Ilgauskas 2 driving layup 25 8
#some shots are NA - get only known shots
shots = shots[!is.na(shots$x) & !is.na(shots$y), ]
#subset to just GSW
gsw = subset(shots, team == "GSW")
#PROBLEM = when there is more than one shot from a particular spot on the floor, dots overlap each other
#circles need to be sized by the number of shots as opposed to making them all the same size
symbols(gsw$x, gsw$y, circles = rep(1, length(gsw$x)), inches = FALSE, asp = 1, main = "Default Chart")
######################################################################################################
# CODE TO DRAW BASKETBALL COURT FROM SCRATCH #
######################################################################################################
library(plotrix)
#Adding an NBA court outline for kicks and giggles.
draw.arc(25, 5.25, 9/12, angle1=0, angle2=2*pi, col="orange", lwd=2) # Hoop
lines(c(22,28), c(4,4), col="orange", lwd=2) # Backboard
lines(c(2.5, 2.5), c(0, 13.5), col="orange", lwd=2) # Side 3-pt
lines(c(47.5, 47.5), c(0, 13.5), col="orange", lwd=2)
lines(c(19,19), c(0,19), col="orange", lwd=2) # Inside lane
lines(c(31,31), c(0,19), col="orange", lwd=2)
lines(c(19,31), c(19,19), col="orange", lwd=2) # Free throw
lines(c(0,50),c(0,0), col="orange", lwd=2) # Baseline
draw.arc(25, 5.25, 23.75, angle1=pi/9.8, angle2=pi/1.113, col="orange", lwd=2) # 3-pt arc######################################################################################################
# END CODE #
######################################################################################################
#subset to look at ray allen's scoring only
ray = subset(shots, player == "Ray Allen")
head(ray)## team etype player points type x y
## 19 BOS shot Ray Allen NA layup 25 7
## 28 BOS shot Ray Allen NA 3pt 2 13
## 31 BOS shot Ray Allen NA 3pt 42 24
## 115 BOS shot Ray Allen NA layup 25 6
## 154 BOS shot Ray Allen NA jump 44 18
## 208 BOS shot Ray Allen 2 jump 16 24
#The count() function from the plyr package makes it straightforward to aggregate by two columns.
library(plyr)
scoring.agg = count(ray, "type")
scoring.agg = scoring.agg[order(scoring.agg$freq, decreasing = FALSE), ]
head(scoring.agg)## type freq
## 4 driving jump 1
## 7 dunk 1
## 15 putback layup 1
## 20 step back jump 1
## 22 turnaround jump 1
## 2 driving dunk 2
par(mar=c(5, 9, 2, 2))
barplot(scoring.agg$freq, names.arg = scoring.agg$type, horiz = TRUE, las = 2, cex.names = 0.85, xlim = c(0, 500), col = "dark green", main = "Ray Allen Scoring by Shot Type")##################################################################################################
par(mar = c(2, 2, 2, 2))
# Aggregate by shot frequency
library(plyr)
gsw.agg <- count(gsw, c("x","y"))
# Size symbols by number of shots.
symbols(gsw.agg$x, gsw.agg$y, circles=sqrt(gsw.agg$freq)/8, asp=1, inches=FALSE, ylim=c(0,40))symbols(gsw.agg$x, gsw.agg$y, squares=sqrt(gsw.agg$freq)/8, asp=1, inches=FALSE, ylim=c(0,40))
# Hexbins
library(hexbin)h <- hexbin(gsw$x, gsw$y, xbins=20, shape=80/50)
plot(h)# Helper function to get color by shot frequency
getColor <- function(val) {
minVal <- log(1) # Using logarithmic scale
maxVal <- log(1065)
numCols <- 20
pal <- colorRampPalette(c("forestgreen", "red"))
cols <- pal(numCols)
# Get index to pick color.
colIndex <- round(numCols * (log(val) - minVal) / (maxVal - minVal))
colIndex <- max(1, colIndex)
return(cols[colIndex])
}
# Get colors and draw heat map of shot frequency.
gridColors <- sapply(gsw.agg$freq, getColor)
symbols(gsw.agg$x, gsw.agg$y, squares=rep(1,length(gsw.agg$x)), asp=1, inches=FALSE, ylim=c(0,40), bg=gridColors, fg=NA)# Find points per shot for each spot on court.
ptsper <- c()
for (i in 1:length(gsw.agg[,1])) {
xcoord <- gsw.agg$x[i]
ycoord <- gsw.agg$y[i]
spot <- subset(gsw, x == xcoord & y == ycoord)
totalShots <- length(spot[,1])
totalPts <- sum(spot$points[!is.na(spot$points)])
ptsper <- c(ptsper, totalPts/totalShots)
}
# Helper function to get color by points per shot.
getColorByPoints <- function(val) {
minVal <- 0
maxVal <- 3
numCols <- 30
pal <- colorRampPalette(c("forestgreen", "red"))
cols <- pal(numCols)
# Get index to pick color.
colIndex <- round(numCols * (val - minVal) / (maxVal - minVal))
colIndex <- max(1, colIndex)
return(cols[colIndex])
}
# Get colors and draw heat map of points per shot.
gridColors <- sapply(ptsper, getColorByPoints)
plot(0, 0, xlim=c(0,50), ylim=c(0,40), type="n", xaxt="n", yaxt="n", xlab="", ylab="", bty="n", main = "Points Per Area on the Floor - GSW")
symbols(gsw.agg$x, gsw.agg$y, squares=sqrt(gsw.agg$freq)/3, asp=1, inches=FALSE, add=TRUE, bg=gridColors, fg=NA)
# Adding an NBA court outline for kicks and giggles.
draw.arc(25, 5.25, 9/12, angle1=0, angle2=2*pi, col="#cccccc", lwd=2) # Hoop
lines(c(22,28), c(4,4), col="#cccccc", lwd=2) # Backboard
lines(c(2.5, 2.5), c(0, 13.5), col="#cccccc", lwd=2) # Side 3-pt
lines(c(47.5, 47.5), c(0, 13.5), col="#cccccc", lwd=2)
lines(c(19,19), c(0,19), col="#cccccc", lwd=2) # Inside lane
lines(c(31,31), c(0,19), col="#cccccc", lwd=2)
lines(c(19,31), c(19,19), col="#cccccc", lwd=2) # Free throw
lines(c(0,50),c(0,0), col="#cccccc", lwd=2) # Baseline
draw.arc(25, 5.25, 23.75, angle1=pi/9.8, angle2=pi/1.113, col="#cccccc", lwd=2) # 3-pt arc