library(ggplot2)
library(maptools)
## Loading required package: foreign
## Warning: package 'foreign' was built under R version 2.15.3
## Loading required package: sp
## Warning: package 'sp' was built under R version 2.15.3
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()
USA <- readShapePoly("/Users/xiwang/Dropbox/GEOG 5023 - offline/Data/USA.shp")
# Remove count fields and rows with missing data
USA <- USA[, c(1:8, 14:30)]
USA <- na.omit(USA)
# Simple scatter plot
plot1 <- ggplot(data = USA@data, aes(x = Obese, y = homevalu))
plot1 + geom_point() # Not much of a relationship between obesity and home values
# Points are clumped together and over-plotted, so transform the
# coordinates:
plot1 + geom_point() + scale_x_log10() + scale_y_log10()
# Add transparency to the points to make overplotting visible:
plot1 + geom_point(alpha = 1/10) + scale_x_log10() + scale_y_log10() # alpha=1/10 means that 10 dots will have to be plotted atop each other to generate an opaque black point
# Most values are concentrated near the mean of “Obese” and “homevalu”
plot1 + geom_point(alpha = 1/10) + geom_smooth(method = "lm") # Add a fitted line to the plot
# Another version of the line
plot1 + geom_point(alpha = 1/10) + geom_smooth(method = "loess")
# Other way of dealing with the over-plotting problem:
install.packages("hexbin")
## Installing package(s) into
## '/Applications/RStudio.app/Contents/Resources/R/library' (as 'lib' is
## unspecified)
## Error: trying to use CRAN without setting a mirror
library(hexbin)
plot1 + stat_binhex()
plot1 + geom_bin2d()
plot1 + geom_density2d() # Look, mom, gradients!
ggplot2 makes it very easy to incorporate qualitative variables. These can be used in several ways:# Create a qualitative variable
USA$good_states <- ifelse(USA$STATE_NAME %in% c("New York", "Massachusetts",
"Rhode Island", "Wyoming"), yes = "its good", no = "its ok")
USA$good_states <- as.factor(USA$good_states)
# Modify plot1
plot2 <- ggplot(data = USA@data, aes(x = Obese, y = homevalu, color = good_states))
plot2 + geom_point()
plot2 <- ggplot(data = USA@data, aes(x = Obese, y = homevalu, color = good_states,
shape = good_states))
plot2 + stat_smooth() # Uses a local fit
## geom_smooth: method="auto" and size of largest group is >=1000, so using
## gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the
## smoothing method.
plot2 + geom_point() + stat_smooth(method = "lm", se = TRUE, lwd = 0.5, lty = 1) # 'lwd' controls line thickenss; 'lty' controls line type (1=solid line), higher numbers various forms of dashed line; 'se' can be used to turn off the grey standaed error envelopes
# Look at the percent college educated (pctcoled) and the per capita
# income (pcincome), these two variables have a correlation of r=.7
plot3 <- ggplot(data = USA@data, aes(x = pctcoled, y = pcincome))
plot3 + geom_point() + ylab("Per Capita Income") + xlab("Percent College Educated") +
ggtitle("US Counties (2000)\nPercent College Educated by Per Capita Income")
# Add dimensionality: add the unemployment variable (unemploy) to the plot
# by changing the color of the dots based on the unemployment rate
plot4 <- ggplot(data = USA@data, aes(x = pctcoled, y = pcincome, color = unemploy)) +
geom_point() + ylab("Per Capita Income") + xlab("Percent College Educated") +
ggtitle("US Counties (2000)\nPercent College Educated by Per Capita Income") +
scale_color_gradient2("Unemployment", breaks = c(min(USA$unemploy), mean(USA$unemploy),
max(USA$unemploy)), labels = c("Below Average", "Average", "Above Average"),
low = "green", mid = "yellow", high = "red", midpoint = mean(USA$unemploy))
plot4
# Split the plot into panels based upon the “good states” variable by
# creating “facets” or subplots that display only the data for each level
# of the factor:
plot4 + facet_grid(. ~ good_states)
# Go nuts!
plot4 + facet_grid(. ~ STATE_NAME) # Doesn't look good, or ok
# Change the 'theme'' used to diplay the plot:
plot4 + theme_classic()
# Oh look someone created a library of themes:
install.packages("ggthemes", dependencies = TRUE)
## Installing package(s) into
## '/Applications/RStudio.app/Contents/Resources/R/library' (as 'lib' is
## unspecified)
## Error: trying to use CRAN without setting a mirror
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 2.15.3
plot4 + theme_economist()
plot4 + theme_solarized()
plot4 + theme_tufte()
# Seth made a theme:
sethTheme <- theme(panel.background = element_rect(fill = "black"), plot.background = element_rect(fill = "black"),
panel.grid.minor = element_blank(), panel.grid.major = element_line(linetype = 3,
colour = "white"), axis.text.x = element_text(colour = "grey80"), axis.text.y = element_text(colour = "grey80"),
axis.title.x = element_text(colour = "grey80"), axis.title.y = element_text(colour = "grey80"),
legend.key = element_rect(fill = "black"), legend.text = element_text(colour = "white"),
legend.title = element_text(colour = "black"), legend.background = element_rect(fill = "black"),
axis.ticks = element_blank())
plot4 + sethTheme # Looks schmancy!
# Fix theme to get plot title and legend title to diplay
sethTheme2 <- theme(panel.background = element_rect(fill = "black"), plot.background = element_rect(fill = "black"),
panel.grid.minor = element_blank(), panel.grid.major = element_line(linetype = 3,
colour = "white"), plot.title = element_text(colour = "white"), axis.text.x = element_text(colour = "grey80"),
axis.text.y = element_text(colour = "grey80"), axis.title.x = element_text(colour = "grey80"),
axis.title.y = element_text(colour = "grey80"), legend.key = element_rect(fill = "black"),
legend.text = element_text(colour = "white"), legend.title = element_text(colour = "white"),
legend.background = element_rect(fill = "black"), axis.ticks = element_blank())
plot4 + sethTheme2 # Yay!
ggsave to save plots in any graphics format. For example, ggsave(“path/plotName.png”) saves a png file. To save a PDF file, change the extension: ggsave(“path/plotName.pdf”).# Use fortify to extract ploygon boundaries from the spatialDataFrame (its
# slow)
usa_geom <- fortify(USA, region = "FIPS")
## Error: isTRUE(gpclibPermitStatus()) is not TRUE
# Reattach data to ploygon boundaries
usa_map_df <- merge(usa_geom, USA, by.x = "id", by.y = "FIPS")
## Error: object 'usa_geom' not found
# Make a map of bush_pct
map1 <- ggplot(usa_map_df, aes(long, lat, group = group)) + geom_polygon(data = usa_map_df,
aes(fill = Bush_pct)) + coord_equal() + scale_fill_gradient(low = "yellow",
high = "red") + geom_path(data = usa_geom, aes(long, lat, group = group),
lty = 3, lwd = 0.1, color = "white")
## Error: object 'usa_map_df' not found
map1
## Error: object 'map1' not found
# Apply 'sethTheme'
map1 + sethTheme
## Error: object 'map1' not found
# Can also create proper thematic maps with legend classes, ColorBrewer is
# implemented in ggplot:
library(classInt)
## Loading required package: class
## Loading required package: e1071
classIntervals(USA$Bush_pct, n = 5, style = "quantile")
## style: quantile
## [0,50.52) [50.52,58.07) [58.07,64.37) [64.37,71.31) [71.31,92.83]
## 622 622 622 622 623
breaks <- c(0, 50, 58, 64, 71, 93) #approximate quantiles
labels = c("[0 - 50%]", "[50% - 58%]", "[58% - 64%]", "[64% - 71%]", "[71% - 93%]")
usa_map_df$bushBreaks <- cut(usa_map_df$Bush_pct, breaks = breaks, labels = labels)
## Error: object 'usa_map_df' not found
map2 <- ggplot(aes(long, lat, group = group), data = usa_map_df) + geom_polygon(data = usa_map_df,
aes(fill = bushBreaks)) + coord_equal()
## Error: object 'usa_map_df' not found
map2
## Error: object 'map2' not found
# Assign a more reasonable color for the legend:
library(RColorBrewer)
map2 + scale_fill_brewer("Votes for Bush in 2004 (%)", palette = "YlGnBu") +
sethTheme + ggtitle("Votes for Bush in 2004 (%)") + theme(plot.title = element_text(size = 24,
face = "bold", color = "white", hjust = 2))
## Error: object 'map2' not found