Visualizing data by ggplot2

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 2.15.3
library(maptools)
## Warning: package 'maptools' was built under R version 2.15.3
## Loading required package: foreign
## Loading required package: sp
## Warning: package 'sp' was built under R version 2.15.3
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: TRUE
library(hexbin)
## Warning: package 'hexbin' was built under R version 2.15.3
USA <- readShapePoly("C:\\Users\\QINGHUAN\\Desktop\\Data 2\\USA copy.shp")
summary(USA)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
##       min    max
## x -124.73 -66.97
## y   24.96  49.37
## Is projected: NA 
## proj4string : [NA]
## Data attributes:
##      SP_ID              NAME         STATE_NAME     STATE_FIPS  
##  0      :   1   Washington:  32   Texas   : 254   48     : 254  
##  1      :   1   Jefferson :  26   Georgia : 159   13     : 159  
##  10     :   1   Franklin  :  25   Virginia: 136   51     : 136  
##  100    :   1   Jackson   :  24   Kentucky: 120   21     : 120  
##  1000   :   1   Lincoln   :  24   Missouri: 115   29     : 115  
##  1001   :   1   Madison   :  20   Kansas  : 105   20     : 105  
##  (Other):3105   (Other)   :2960   (Other) :2222   (Other):2222  
##    CNTY_FIPS         FIPS           AREA          FIPS_num    
##  001    :  48   01001  :   1   Min.   :    2   Min.   : 1001  
##  003    :  48   01003  :   1   1st Qu.:  435   1st Qu.:19048  
##  005    :  48   01005  :   1   Median :  622   Median :29217  
##  009    :  47   01007  :   1   Mean   :  965   Mean   :30699  
##  007    :  46   01009  :   1   3rd Qu.:  931   3rd Qu.:46012  
##  011    :  46   01011  :   1   Max.   :20175   Max.   :56045  
##  (Other):2828   (Other):3105                                  
##       Bush            Kerry            County_F         Nader      
##  Min.   :     0   Min.   :      0   Min.   :    0   Min.   :    0  
##  1st Qu.:  2926   1st Qu.:   1778   1st Qu.:19042   1st Qu.:    0  
##  Median :  6357   Median :   4041   Median :29211   Median :   14  
##  Mean   : 19055   Mean   :  17940   Mean   :30656   Mean   :  145  
##  3rd Qu.: 15894   3rd Qu.:  10418   3rd Qu.:46008   3rd Qu.:   67  
##  Max.   :954764   Max.   :1670341   Max.   :56045   Max.   :13251  
##                                                                    
##      Total            Bush_pct      Kerry_pct      Nader_pct    
##  Min.   :      0   Min.   : 0.0   Min.   : 0.0   Min.   :0.000  
##  1st Qu.:   4808   1st Qu.:52.7   1st Qu.:30.2   1st Qu.:0.000  
##  Median :  10407   Median :61.2   Median :38.5   Median :0.302  
##  Mean   :  37140   Mean   :60.6   Mean   :38.9   Mean   :0.401  
##  3rd Qu.:  26552   3rd Qu.:69.4   3rd Qu.:46.8   3rd Qu.:0.633  
##  Max.   :2625105   Max.   :92.8   Max.   :90.0   Max.   :4.467  
##                                                                 
##     MDratio          pcturban        pctfemhh       pcincome    
##  Min.   :   0.0   Min.   :  0.0   Min.   : 0.0   Min.   :    0  
##  1st Qu.:  37.3   1st Qu.:  0.0   1st Qu.: 9.6   1st Qu.:15466  
##  Median :  65.6   Median : 33.4   Median :12.2   Median :17448  
##  Mean   :  93.0   Mean   : 35.3   Mean   :13.0   Mean   :17788  
##  3rd Qu.: 117.5   3rd Qu.: 56.5   3rd Qu.:15.4   3rd Qu.:19818  
##  Max.   :2189.5   Max.   :100.0   Max.   :41.1   Max.   :58096  
##                                                                 
##     pctpoor        pctcoled       unemploy        homevalu     
##  Min.   : 0.0   Min.   : 0.0   Min.   : 0.00   Min.   :     0  
##  1st Qu.:11.0   1st Qu.: 9.0   1st Qu.: 3.90   1st Qu.: 35850  
##  Median :15.1   Median :11.6   Median : 5.30   Median : 44400  
##  Mean   :16.5   Mean   :13.1   Mean   : 5.87   Mean   : 52015  
##  3rd Qu.:20.4   3rd Qu.:15.3   3rd Qu.: 7.20   3rd Qu.: 58600  
##  Max.   :63.1   Max.   :53.4   Max.   :37.90   Max.   :500001  
##                                                                
##     popdens          Obese           Noins          HISP_LAT    
##  Min.   :    0   Min.   :0.000   Min.   :0.000   Min.   : 0.00  
##  1st Qu.:   15   1st Qu.:0.320   1st Qu.:0.100   1st Qu.: 0.90  
##  Median :   39   Median :0.340   Median :0.120   Median : 1.80  
##  Mean   :  194   Mean   :0.335   Mean   :0.129   Mean   : 6.18  
##  3rd Qu.:   93   3rd Qu.:0.360   3rd Qu.:0.150   3rd Qu.: 5.10  
##  Max.   :53801   Max.   :0.630   Max.   :0.410   Max.   :97.50  
##                                                                 
##    MEDAGE2000     PEROVER65   
##  Min.   : 0.0   Min.   : 0.0  
##  1st Qu.:35.2   1st Qu.:12.1  
##  Median :37.4   Median :14.4  
##  Mean   :37.3   Mean   :14.8  
##  3rd Qu.:39.8   3rd Qu.:17.1  
##  Max.   :54.3   Max.   :34.7  
## 
USA <- USA[, c(1:8, 14:30)]
USA <- na.omit(USA)
# one variable to the x-coordinate and one to the y-coordinate
plot1 <- ggplot(data = USA@data, aes(x = Obese, y = homevalu))
plot1 + geom_point()

plot of chunk unnamed-chunk-2

plot1 + geom_point() + scale_x_log10() + scale_y_log10()

plot of chunk unnamed-chunk-2

# add transparency to the points to make overplotting visible
plot1 + geom_point(alpha = 1/10) + scale_x_log10() + scale_y_log10()

plot of chunk unnamed-chunk-2

# add a fitted line to the plot
plot1 + geom_point(alpha = 1/10) + geom_smooth(method = "lm")

plot of chunk unnamed-chunk-2

plot1 + geom_point(alpha = 1/10) + geom_smooth(method = "loess")

plot of chunk unnamed-chunk-2

# deal with the over-plotting problem
plot1 + stat_binhex()

plot of chunk unnamed-chunk-2

plot1 + geom_bin2d()

plot of chunk unnamed-chunk-2

plot1 + geom_density2d()

plot of chunk unnamed-chunk-2

# several ways to incorporate qualitative variables 1.Facets:each level of
# a factor can be plotted in its own panel; 2.Groups:each level of a
# factor can be assigned its own group;e.g.plotting fitted lines for each
# group through a scatter plot. 3.Appearance:color,symbols,line
# weight,fill, and other variables can be assigned to a factor

# create a qualitative variable
USA$good_states <- ifelse(USA$STATE_NAME %in% c("New York", "Massachusetts", 
    "Rhode Island", "Wyoming"), yes = "its good", no = "its ok")
USA$good_states <- as.factor(USA$good_states)
# modify plot1
plot2 <- ggplot(data = USA@data, aes(x = Obese, y = homevalu, color = good_states))
plot2 + geom_point()

plot of chunk unnamed-chunk-3

plot2 <- ggplot(data = USA@data, aes(x = Obese, y = homevalu, color = good_states, 
    shape = good_states))
plot2 + stat_smooth()  #uses a local fit
## geom_smooth: method="auto" and size of largest group is >=1000, so using
## gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the
## smoothing method.

plot of chunk unnamed-chunk-3


plot2 + geom_point() + stat_smooth(method = "lm", se = TRUE, lwd = 0.5, lty = 1)

plot of chunk unnamed-chunk-3

# lwd controls line thickness,lty controls line type(1=solid line), se
# sets grey standard error envelopes

# Add marginalia and change the appearance of the plot
plot3 <- ggplot(data = USA@data, aes(x = pctcoled, y = pcincome))
plot3 + geom_point() + ylab("Per Capita Income") + xlab("Percent College Educated") + 
    ggtitle("US Counties (2000)\n Percent College Educated by Per Capita Income")

plot of chunk unnamed-chunk-3


# make multidimensional plot by adding the unemployment variable to the
# plot change the color of the dots based on the unemployment rate
plot4 <- ggplot(data = USA@data, aes(x = pctcoled, y = pcincome, color = unemploy)) + 
    geom_point() + ylab("Per Capita Income") + xlab("Percent College Educated") + 
    ggtitle("USA Counties (2000)\n Percent College Educated by Per Capita Income") + 
    scale_color_gradient2("Unemployment", breaks = c(min(USA$unemploy), mean(USA$unemploy), 
        max(USA$unemploy)), labels = c("Below Average", "Average", "Above Average"), 
        low = "green", mid = "yellow", high = "red", midpoint = mean(USA$unemploy))
plot4

plot of chunk unnamed-chunk-3


# create facets that display only the data for each level of the factor
plot4 + facet_grid(. ~ good_states)

plot of chunk unnamed-chunk-3

# change the 'theme' used to display the plot
plot4 + theme_classic()

plot of chunk unnamed-chunk-3

# full blown custom themes can be used to make a consistent set of
# graphics for a presentation or paper

install.packages("ggthemes", dependencies = TRUE)
## Installing package(s) into 'C:/Program Files/RStudio/R/library' (as 'lib'
## is unspecified)
## Warning: 'lib = "C:/Program Files/RStudio/R/library"' is not writable
## Error: unable to install packages
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 2.15.3
plot4 + theme_economist()

plot of chunk unnamed-chunk-4

plot4 + theme_solarized()

plot of chunk unnamed-chunk-4

plot4 + theme_tufte()

plot of chunk unnamed-chunk-4


# Seth's custom theme
sethTheme <- theme(panel.background = element_rect(fill = "black"), plot.background = element_rect(fill = "black"), 
    panel.grid.minor = element_blank(), panel.grid.major = element_line(linetype = 3, 
        colour = "white"), title = element_text(colour = "white"), axis.text.x = element_text(colour = "grey80"), 
    axis.text.y = element_text(colour = "grey80"), axis.title.x = element_text(colour = "grey80"), 
    axis.title.y = element_text(colour = "grey80"), legend.key = element_rect(fill = "black"), 
    legend.text = element_text(colour = "white"), legend.title = element_text(colour = "black"), 
    legend.background = element_rect(fill = "black"), axis.ticks = element_blank())
plot4 + sethTheme

plot of chunk unnamed-chunk-4