GGPLOT2 Tutorial Visualizing Data

install.packages("ggplot2")
## Installing package(s) into 'C:/Program Files/RStudio/R/library' (as 'lib'
## is unspecified)
## Warning: 'lib = "C:/Program Files/RStudio/R/library"' is not writable
## Error: unable to install packages
install.packages("maptools")
## Installing package(s) into 'C:/Program Files/RStudio/R/library' (as 'lib'
## is unspecified)
## Warning: 'lib = "C:/Program Files/RStudio/R/library"' is not writable
## Error: unable to install packages
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 2.15.3
library(maptools)
## Warning: package 'maptools' was built under R version 2.15.3
## Loading required package: foreign
## Loading required package: sp
## Warning: package 'sp' was built under R version 2.15.3
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: TRUE

## LOAD DATA
USA <- readShapePoly("C:/Users/Hallie/Desktop/Spring 2013 Courses/Quantitative Methods/Data/USA copy.shp")
## Remove count fields and rows with missing data
USA <- USA[, c(1:8, 14:30)]
USA <- na.omit(USA)

plot1 <- ggplot(data = USA@data, aes(x = Obese, y = homevalu))

plot1 + geom_point()

plot of chunk unnamed-chunk-1


plot1 + geom_point() + scale_x_log10() + scale_y_log10()

plot of chunk unnamed-chunk-1


## add transparency to the points to make overplotting visible.
plot1 + geom_point(alpha = 1/10) + scale_x_log10() + scale_y_log10()

plot of chunk unnamed-chunk-1


# Add fitted line to the plot
plot1 + geom_point(alpha = 1/10) + geom_smooth(method = "lm")

plot of chunk unnamed-chunk-1


# other ways to deal w/ over-plotting problem
install.packages("hexbin")
## Installing package(s) into 'C:/Program Files/RStudio/R/library' (as 'lib'
## is unspecified)
## Warning: 'lib = "C:/Program Files/RStudio/R/library"' is not writable
## Error: unable to install packages
library(hexbin)
## Warning: package 'hexbin' was built under R version 2.15.3
plot1 + stat_binhex()

plot of chunk unnamed-chunk-1


plot1 + geom_bin2d()

plot of chunk unnamed-chunk-1


plot1 + geom_density2d()

plot of chunk unnamed-chunk-1


USA$good_states <- ifelse(USA$STATE_NAME %in% c("New York", "Massachusetts", 
    "Rhode Island", "Wyoming"), yes = "its good", no = "its ok")
USA$good_states <- as.factor(USA$good_states)

ggplot2 makes it very easy to incorporate qualitative variables. These can be used in several ways: 1. Facets: Each level of a factor can be plotted in its own panel. 2. Groups: Each level of a factor can be assigned its own group. For example, plotting fitted lines for each group through a scatter plot. 3. Appearance: Color, symbols, line weight, fill, and other variables can be assigned to a factor (qualitative variable).

Lets create a qualitative variable:

# MODIFY PLOT 1
plot2 <- ggplot(data = USA@data, aes(x = Obese, y = homevalu, color = good_states))
plot2 + geom_point()

plot of chunk unnamed-chunk-2


plot2 <- ggplot(data = USA@data, aes(x = Obese, y = homevalu, color = good_states, 
    shape = good_states))
plot2 + stat_smooth()  #uses a local fit
## geom_smooth: method="auto" and size of largest group is >=1000, so using
## gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the
## smoothing method.

plot of chunk unnamed-chunk-2


plot2 + geom_point() + stat_smooth(method = "lm", se = TRUE, lwd = 0.5, lty = 1)

plot of chunk unnamed-chunk-2

# lwd controls line thickness lty controls line type 1= solid line, higher
# numbers various forms of dashed lines. se can be used to turn off the
# grey standard error envelopes.

plot3 <- ggplot(data = USA@data, aes(x = pctcoled, y = pcincome))
plot3 + geom_point() + ylab("Per Capita Income") + xlab("Percent College Educated") + 
    ggtitle("US Counties (2000)\nPercent College Educated by Per Capita Income")

plot of chunk unnamed-chunk-2


plot4 <- ggplot(data = USA@data, aes(x = pctcoled, y = pcincome, color = unemploy)) + 
    geom_point() + ylab("Per Capita Income") + xlab("Percent College Educated") + 
    ggtitle("US Counties (2000)\nPercent College Educated by Per Capita Income") + 
    scale_color_gradient2("Unemployment", breaks = c(min(USA$unemploy), mean(USA$unemploy), 
        max(USA$unemploy)), labels = c("Below Average", "Average", "Above Average"), 
        low = "green", mid = "yellow", high = "red", midpoint = mean(USA$unemploy))
plot4

plot of chunk unnamed-chunk-2


plot4 + facet_grid(. ~ good_states)

plot of chunk unnamed-chunk-2


# If you wanted to go crazy you could do: plot4 + facet_grid(.~
# STATE_NAME)

plot4 + theme_classic()

plot of chunk unnamed-chunk-2


install.packages("ggthemes", dependencies = TRUE)
## Installing package(s) into 'C:/Program Files/RStudio/R/library' (as 'lib'
## is unspecified)
## Warning: 'lib = "C:/Program Files/RStudio/R/library"' is not writable
## Error: unable to install packages

library(ggthemes)
## Warning: package 'ggthemes' was built under R version 2.15.3
plot4 + theme_economist()

plot of chunk unnamed-chunk-2


plot4 + theme_solarized()  #OUCH!

plot of chunk unnamed-chunk-2


plot4 + theme_tufte()

plot of chunk unnamed-chunk-2

Seth has made have a custom theme that he often uses in presentations:

sethTheme <- theme(panel.background = element_rect(fill = "black"), plot.background = element_rect(fill = "black"), 
    panel.grid.minor = element_blank(), panel.grid.major = element_line(linetype = 3, 
        colour = "white"), title = element_text(colour = "grey80"), axis.text.x = element_text(colour = "grey80"), 
    axis.text.y = element_text(colour = "grey80"), axis.title.x = element_text(colour = "grey80"), 
    axis.title.y = element_text(colour = "grey80"), legend.key = element_rect(fill = "black"), 
    legend.text = element_text(colour = "white"), legend.title = element_text(colour = "grey80"), 
    legend.background = element_rect(fill = "black"), axis.ticks = element_blank())
plot4 + sethTheme

plot of chunk unnamed-chunk-3

`?`(theme())
## starting httpd help server ...
## done

Making Maps

install.packages("rgdal")
## Installing package(s) into 'C:/Program Files/RStudio/R/library' (as 'lib'
## is unspecified)
## Warning: 'lib = "C:/Program Files/RStudio/R/library"' is not writable
## Error: unable to install packages
library(rgdal)
## Warning: package 'rgdal' was built under R version 2.15.3
## rgdal: version: 0.8-6, (SVN revision Unversioned directory) Geospatial
## Data Abstraction Library extensions to R successfully loaded Loaded GDAL
## runtime: GDAL 1.9.2, released 2012/10/08 Path to GDAL shared files:
## C:/Users/Hallie/Documents/R/win-library/2.15/rgdal/gdal GDAL does not use
## iconv for recoding strings. Loaded PROJ.4 runtime: Rel. 4.7.1, 23
## September 2009, [PJ_VERSION: 470] Path to PROJ.4 shared files:
## C:/Users/Hallie/Documents/R/win-library/2.15/rgdal/proj
install.packages("rgeos")
## Installing package(s) into 'C:/Program Files/RStudio/R/library' (as 'lib'
## is unspecified)
## Warning: 'lib = "C:/Program Files/RStudio/R/library"' is not writable
## Error: unable to install packages
library(rgeos)
## Warning: package 'rgeos' was built under R version 2.15.3
## rgeos version: 0.2-16, (SVN revision 389) GEOS runtime version:
## 3.3.6-CAPI-1.7.6 Polygon checking: TRUE
install.packages("gpclib")
## Installing package(s) into 'C:/Program Files/RStudio/R/library' (as 'lib'
## is unspecified)
## Warning: 'lib = "C:/Program Files/RStudio/R/library"' is not writable
## Error: unable to install packages
library(gpclib)
## Warning: A specification for class "gpc.poly" in package 'gpclib' seems
## equivalent to one from package 'rgeos' and is not turning on duplicate
## class definitions for this class
## Warning: A specification for class "gpc.poly.nohole" in package 'gpclib'
## seems equivalent to one from package 'rgeos' and is not turning on
## duplicate class definitions for this class
## General Polygon Clipper Library for R (version 1.5-1) Type 'class ?
## gpc.poly' for help
## Attaching package: 'gpclib'
## The following object(s) are masked from 'package:rgeos':
## 
## append.poly, area.poly, get.bbox, get.pts, read.polyfile, scale.poly,
## triangulate, tristrip, write.polyfile
gpclibPermit()
## [1] TRUE
## Use fortify to extract ploygon boundaries from the spatialDataFrame
## (its slow)
usa_geom <- fortify(USA, region = "FIPS")

## reattach data to polygon boundaries
usa_map_df <- merge(usa_geom, USA, by.x = "id", by.y = "FIPS")

## make a map of bush_pct
map1 <- ggplot(usa_map_df, aes(long, lat, group = group)) + geom_polygon(data = usa_map_df, 
    aes(fill = Bush_pct)) + coord_equal() + scale_fill_gradient(low = "yellow", 
    high = "red") + geom_path(data = usa_geom, aes(long, lat, group = group), 
    lty = 3, lwd = 0.1, color = "white")
map1

plot of chunk unnamed-chunk-4


# Apply the Seth theme
map1 + sethTheme

plot of chunk unnamed-chunk-4


library(classInt)
## Warning: package 'classInt' was built under R version 2.15.3
## Loading required package: class
## Loading required package: e1071
classIntervals(USA$Bush_pct, n = 5, style = "quantile")
## style: quantile
##     [0,50.52) [50.52,58.07) [58.07,64.37) [64.37,71.31) [71.31,92.83] 
##           622           622           622           622           623

breaks <- c(0, 50, 58, 64, 71, 93)  #approximate quantiles
labels = c("[0 - 50%]", "[50% - 58%]", "[58% - 64%]", "[64% - 71%]", "[71% - 93%]")
usa_map_df$bushBreaks <- cut(usa_map_df$Bush_pct, breaks = breaks, labels = labels)
map2 <- ggplot(aes(long, lat, group = group), data = usa_map_df) + geom_polygon(data = usa_map_df, 
    aes(fill = bushBreaks)) + coord_equal()
map2

plot of chunk unnamed-chunk-4


install.packages("RColorBrewer")
## Installing package(s) into 'C:/Program Files/RStudio/R/library' (as 'lib'
## is unspecified)
## Warning: 'lib = "C:/Program Files/RStudio/R/library"' is not writable
## Error: unable to install packages
library(RColorBrewer)
map2 + scale_fill_brewer("Votes for Bush in 2004 (%)", palette = "YlGnBu") + 
    sethTheme + ggtitle("Votes for Bush in 2004 (%)") + theme(plot.title = element_text(size = 24, 
    face = "bold", color = "white", hjust = 2))

plot of chunk unnamed-chunk-4