1. Install
#install.packages('maps')
library(maps)
library(ggplot2)
library(dplyr)
#install.packages('ggforce')
library(ggforce)
  1. State map dataframe.
states_map <- map_data(map = "state")
dim(states_map)
## [1] 15537     6
colnames(states_map)
## [1] "long"      "lat"       "group"     "order"     "region"    "subregion"
  1. Unique value.
unique(states_map$region)
##  [1] "alabama"              "arizona"              "arkansas"            
##  [4] "california"           "colorado"             "connecticut"         
##  [7] "delaware"             "district of columbia" "florida"             
## [10] "georgia"              "idaho"                "illinois"            
## [13] "indiana"              "iowa"                 "kansas"              
## [16] "kentucky"             "louisiana"            "maine"               
## [19] "maryland"             "massachusetts"        "michigan"            
## [22] "minnesota"            "mississippi"          "missouri"            
## [25] "montana"              "nebraska"             "nevada"              
## [28] "new hampshire"        "new jersey"           "new mexico"          
## [31] "new york"             "north carolina"       "north dakota"        
## [34] "ohio"                 "oklahoma"             "oregon"              
## [37] "pennsylvania"         "rhode island"         "south carolina"      
## [40] "south dakota"         "tennessee"            "texas"               
## [43] "utah"                 "vermont"              "virginia"            
## [46] "washington"           "west virginia"        "wisconsin"           
## [49] "wyoming"
g <- ggplot(states_map, aes(x=long, y=lat, color=region)) + geom_point() + guides(colour=FALSE)
g

  1. Connect the points with lines.
g + geom_line()

g + geom_path()

We can see geom_path is more appropriate. According to the help doc, geom_path() connects the observations in the order in which they appear in the data. geom_line() connects them in order of the variable on the x axis.

state2 <- states_map[states_map$region=="california" | states_map$region== "colorado",]
ggplot(state2, aes(x=long, y=lat, color=region)) + geom_point()  + geom_path()

(g)

set.seed(4365)
state2_random <- state2[sample(1:dim(state2)[1]),]
ggplot(state2_random, aes(x=long, y=lat, color=region)) + geom_point()  + geom_path()

ggplot() + 
    geom_map(map = states_map, map_id=states_map$region, data = states_map, aes(fill=group))+ 
    expand_limits(x = states_map$long, y = states_map$lat)

  1. Add State ID.
regions<- unique(states_map$region)
n <- length(regions)
regions <- cbind.data.frame(stateid=1:n, region=regions)
states_map <- merge(states_map, regions)
ggplot() + 
    geom_map(map = states_map, map_id=states_map$region, data = states_map, aes(fill=stateid))+ 
    expand_limits(x = states_map$long, y = states_map$lat)

  1. Add murder arrests per 100,000 people for different states.
data(state)
states_map$InfoValue <- state.x77[match(states_map$region,tolower(rownames(state.x77))),5]
  1. Label and bind dataframes.
states_map$InfoType = "Murder"
states_map2 <- states_map
states_map2$InfoType <- "Grad"
states_map2$InfoValue <- state.x77[match(states_map$region,tolower(rownames(state.x77))),6]
statesbind <- rbind(states_map, states_map2)
  1. Plot.
#ggplot() + 
#    geom_map(map = statesbind, map_id=statesbind$region, data = statesbind, aes(fill=InfoValue))+ 
#    expand_limits(x = statesbind$long, y = statesbind$lat) + facet_grid(.~InfoType)

Unfortunately, the method mentioned in the HW instruction does not work for me, so I use “melt” function instead.

Infos <- data.frame(state = tolower(rownames(state.x77)), state.x77[,5:6])
Infosmelt <- reshape2::melt(Infos, id = 1)
ggplot(Infosmelt, aes(map_id = state)) +
    geom_map(aes(fill = value), map = states_map) +
    expand_limits(x = states_map$long, y = states_map$lat) +
    facet_grid( ~ variable)

  1. Calculate dataframe of circle center.
by_state <- group_by(states_map, State= region)
states_circle <- summarize(by_state, MeanLong = mean(long), MeanLat = mean(lat))
states_circle$ArrentCount <- state.x77[match(states_circle$State,tolower(rownames(state.x77))),3]
ggplot() + 
    geom_circle(aes(x0=MeanLong, y0=MeanLat, r=ArrentCount, fill=State), data=states_circle) + 
    guides(fill=FALSE)

X <- unique(states_map$region)
fun <- function(statename) 
{
    MeanLong <- mean(states_map$long[states_map$region==statename])
    MeanLat <- mean(states_map$lat[states_map$region==statename])
    c(MeanLong, MeanLat) 
}
circle2 <- data.frame(t(sapply(X, fun)))
colnames(circle2) <- c('MeanLong', 'MeanLat')
head(circle2)
##               MeanLong  MeanLat
## alabama      -86.85342 31.66372
## arizona     -113.27464 34.52859
## arkansas     -91.28040 34.63297
## california  -120.70642 36.71313
## colorado    -105.07031 38.58087
## connecticut  -72.72006 41.38260
head(states_circle)
## # A tibble: 6 x 4
##   State       MeanLong MeanLat ArrentCount
##   <chr>          <dbl>   <dbl>       <dbl>
## 1 alabama       - 86.9    31.7       2.10 
## 2 arizona       -113      34.5       1.80 
## 3 arkansas      - 91.3    34.6       1.90 
## 4 california    -121      36.7       1.10 
## 5 colorado      -105      38.6       0.700
## 6 connecticut   - 72.7    41.4       1.10

The two dataframes are the same.