library(ggplot2)
## Warning: package 'ggplot2' was built under R version 2.15.3
library(maptools)
## Warning: package 'maptools' was built under R version 2.15.3
## Loading required package: foreign
## Loading required package: sp
## Warning: package 'sp' was built under R version 2.15.3
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: TRUE
library(hexbin)
## Warning: package 'hexbin' was built under R version 2.15.3
USA <- readShapePoly("C:\\Users\\QINGHUAN\\Desktop\\Data 2\\USA copy.shp")
summary(USA)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
## min max
## x -124.73 -66.97
## y 24.96 49.37
## Is projected: NA
## proj4string : [NA]
## Data attributes:
## SP_ID NAME STATE_NAME STATE_FIPS
## 0 : 1 Washington: 32 Texas : 254 48 : 254
## 1 : 1 Jefferson : 26 Georgia : 159 13 : 159
## 10 : 1 Franklin : 25 Virginia: 136 51 : 136
## 100 : 1 Jackson : 24 Kentucky: 120 21 : 120
## 1000 : 1 Lincoln : 24 Missouri: 115 29 : 115
## 1001 : 1 Madison : 20 Kansas : 105 20 : 105
## (Other):3105 (Other) :2960 (Other) :2222 (Other):2222
## CNTY_FIPS FIPS AREA FIPS_num
## 001 : 48 01001 : 1 Min. : 2 Min. : 1001
## 003 : 48 01003 : 1 1st Qu.: 435 1st Qu.:19048
## 005 : 48 01005 : 1 Median : 622 Median :29217
## 009 : 47 01007 : 1 Mean : 965 Mean :30699
## 007 : 46 01009 : 1 3rd Qu.: 931 3rd Qu.:46012
## 011 : 46 01011 : 1 Max. :20175 Max. :56045
## (Other):2828 (Other):3105
## Bush Kerry County_F Nader
## Min. : 0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 2926 1st Qu.: 1778 1st Qu.:19042 1st Qu.: 0
## Median : 6357 Median : 4041 Median :29211 Median : 14
## Mean : 19055 Mean : 17940 Mean :30656 Mean : 145
## 3rd Qu.: 15894 3rd Qu.: 10418 3rd Qu.:46008 3rd Qu.: 67
## Max. :954764 Max. :1670341 Max. :56045 Max. :13251
##
## Total Bush_pct Kerry_pct Nader_pct
## Min. : 0 Min. : 0.0 Min. : 0.0 Min. :0.000
## 1st Qu.: 4808 1st Qu.:52.7 1st Qu.:30.2 1st Qu.:0.000
## Median : 10407 Median :61.2 Median :38.5 Median :0.302
## Mean : 37140 Mean :60.6 Mean :38.9 Mean :0.401
## 3rd Qu.: 26552 3rd Qu.:69.4 3rd Qu.:46.8 3rd Qu.:0.633
## Max. :2625105 Max. :92.8 Max. :90.0 Max. :4.467
##
## MDratio pcturban pctfemhh pcincome
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0
## 1st Qu.: 37.3 1st Qu.: 0.0 1st Qu.: 9.6 1st Qu.:15466
## Median : 65.6 Median : 33.4 Median :12.2 Median :17448
## Mean : 93.0 Mean : 35.3 Mean :13.0 Mean :17788
## 3rd Qu.: 117.5 3rd Qu.: 56.5 3rd Qu.:15.4 3rd Qu.:19818
## Max. :2189.5 Max. :100.0 Max. :41.1 Max. :58096
##
## pctpoor pctcoled unemploy homevalu
## Min. : 0.0 Min. : 0.0 Min. : 0.00 Min. : 0
## 1st Qu.:11.0 1st Qu.: 9.0 1st Qu.: 3.90 1st Qu.: 35850
## Median :15.1 Median :11.6 Median : 5.30 Median : 44400
## Mean :16.5 Mean :13.1 Mean : 5.87 Mean : 52015
## 3rd Qu.:20.4 3rd Qu.:15.3 3rd Qu.: 7.20 3rd Qu.: 58600
## Max. :63.1 Max. :53.4 Max. :37.90 Max. :500001
##
## popdens Obese Noins HISP_LAT
## Min. : 0 Min. :0.000 Min. :0.000 Min. : 0.00
## 1st Qu.: 15 1st Qu.:0.320 1st Qu.:0.100 1st Qu.: 0.90
## Median : 39 Median :0.340 Median :0.120 Median : 1.80
## Mean : 194 Mean :0.335 Mean :0.129 Mean : 6.18
## 3rd Qu.: 93 3rd Qu.:0.360 3rd Qu.:0.150 3rd Qu.: 5.10
## Max. :53801 Max. :0.630 Max. :0.410 Max. :97.50
##
## MEDAGE2000 PEROVER65
## Min. : 0.0 Min. : 0.0
## 1st Qu.:35.2 1st Qu.:12.1
## Median :37.4 Median :14.4
## Mean :37.3 Mean :14.8
## 3rd Qu.:39.8 3rd Qu.:17.1
## Max. :54.3 Max. :34.7
##
USA <- USA[, c(1:8, 14:30)]
USA <- na.omit(USA)
# one variable to the x-coordinate and one to the y-coordinate
plot1 <- ggplot(data = USA@data, aes(x = Obese, y = homevalu))
plot1 + geom_point()
plot1 + geom_point() + scale_x_log10() + scale_y_log10()
# add transparency to the points to make overplotting visible
plot1 + geom_point(alpha = 1/10) + scale_x_log10() + scale_y_log10()
# add a fitted line to the plot
plot1 + geom_point(alpha = 1/10) + geom_smooth(method = "lm")
plot1 + geom_point(alpha = 1/10) + geom_smooth(method = "loess")
# deal with the over-plotting problem
plot1 + stat_binhex()
plot1 + geom_bin2d()
plot1 + geom_density2d()
# several ways to incorporate qualitative variables 1.Facets:each level of
# a factor can be plotted in its own panel; 2.Groups:each level of a
# factor can be assigned its own group;e.g.plotting fitted lines for each
# group through a scatter plot. 3.Appearance:color,symbols,line
# weight,fill, and other variables can be assigned to a factor
# create a qualitative variable
USA$good_states <- ifelse(USA$STATE_NAME %in% c("New York", "Massachusetts",
"Rhode Island", "Wyoming"), yes = "its good", no = "its ok")
USA$good_states <- as.factor(USA$good_states)
# modify plot1
plot2 <- ggplot(data = USA@data, aes(x = Obese, y = homevalu, color = good_states))
plot2 + geom_point()
plot2 <- ggplot(data = USA@data, aes(x = Obese, y = homevalu, color = good_states,
shape = good_states))
plot2 + stat_smooth() #uses a local fit
## geom_smooth: method="auto" and size of largest group is >=1000, so using
## gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the
## smoothing method.
plot2 + geom_point() + stat_smooth(method = "lm", se = TRUE, lwd = 0.5, lty = 1)
# lwd controls line thickness,lty controls line type(1=solid line), se
# sets grey standard error envelopes
# Add marginalia and change the appearance of the plot
plot3 <- ggplot(data = USA@data, aes(x = pctcoled, y = pcincome))
plot3 + geom_point() + ylab("Per Capita Income") + xlab("Percent College Educated") +
ggtitle("US Counties (2000)\n Percent College Educated by Per Capita Income")
# make multidimensional plot by adding the unemployment variable to the
# plot change the color of the dots based on the unemployment rate
plot4 <- ggplot(data = USA@data, aes(x = pctcoled, y = pcincome, color = unemploy)) +
geom_point() + ylab("Per Capita Income") + xlab("Percent College Educated") +
ggtitle("USA Counties (2000)\n Percent College Educated by Per Capita Income") +
scale_color_gradient2("Unemployment", breaks = c(min(USA$unemploy), mean(USA$unemploy),
max(USA$unemploy)), labels = c("Below Average", "Average", "Above Average"),
low = "green", mid = "yellow", high = "red", midpoint = mean(USA$unemploy))
plot4
# create facets that display only the data for each level of the factor
plot4 + facet_grid(. ~ good_states)
# change the 'theme' used to display the plot
plot4 + theme_classic()
# full blown custom themes can be used to make a consistent set of
# graphics for a presentation or paper
install.packages("ggthemes", dependencies = TRUE)
## Installing package(s) into 'C:/Program Files/RStudio/R/library' (as 'lib'
## is unspecified)
## Warning: 'lib = "C:/Program Files/RStudio/R/library"' is not writable
## Error: unable to install packages
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 2.15.3
plot4 + theme_economist()
plot4 + theme_solarized()
plot4 + theme_tufte()
# Seth's custom theme
sethTheme <- theme(panel.background = element_rect(fill = "black"), plot.background = element_rect(fill = "black"),
panel.grid.minor = element_blank(), panel.grid.major = element_line(linetype = 3,
colour = "white"), title = element_text(colour = "white"), axis.text.x = element_text(colour = "grey80"),
axis.text.y = element_text(colour = "grey80"), axis.title.x = element_text(colour = "grey80"),
axis.title.y = element_text(colour = "grey80"), legend.key = element_rect(fill = "black"),
legend.text = element_text(colour = "white"), legend.title = element_text(colour = "black"),
legend.background = element_rect(fill = "black"), axis.ticks = element_blank())
plot4 + sethTheme