#install.packages('maps')
library(maps)
library(ggplot2)
library(dplyr)
#install.packages('ggforce')
library(ggforce)
states_map <- map_data(map = "state")
dim(states_map)
## [1] 15537 6
colnames(states_map)
## [1] "long" "lat" "group" "order" "region" "subregion"
unique(states_map$region)
## [1] "alabama" "arizona" "arkansas"
## [4] "california" "colorado" "connecticut"
## [7] "delaware" "district of columbia" "florida"
## [10] "georgia" "idaho" "illinois"
## [13] "indiana" "iowa" "kansas"
## [16] "kentucky" "louisiana" "maine"
## [19] "maryland" "massachusetts" "michigan"
## [22] "minnesota" "mississippi" "missouri"
## [25] "montana" "nebraska" "nevada"
## [28] "new hampshire" "new jersey" "new mexico"
## [31] "new york" "north carolina" "north dakota"
## [34] "ohio" "oklahoma" "oregon"
## [37] "pennsylvania" "rhode island" "south carolina"
## [40] "south dakota" "tennessee" "texas"
## [43] "utah" "vermont" "virginia"
## [46] "washington" "west virginia" "wisconsin"
## [49] "wyoming"
g <- ggplot(states_map, aes(x=long, y=lat, color=region)) + geom_point() + guides(colour=FALSE)
g
g + geom_line()
g + geom_path()
We can see geom_path is more appropriate. According to the help doc, geom_path() connects the observations in the order in which they appear in the data. geom_line() connects them in order of the variable on the x axis.
state2 <- states_map[states_map$region=="california" | states_map$region== "colorado",]
ggplot(state2, aes(x=long, y=lat, color=region)) + geom_point() + geom_path()
(g)
set.seed(4365)
state2_random <- state2[sample(1:dim(state2)[1]),]
ggplot(state2_random, aes(x=long, y=lat, color=region)) + geom_point() + geom_path()
ggplot() +
geom_map(map = states_map, map_id=states_map$region, data = states_map, aes(fill=group))+
expand_limits(x = states_map$long, y = states_map$lat)
regions<- unique(states_map$region)
n <- length(regions)
regions <- cbind.data.frame(stateid=1:n, region=regions)
states_map <- merge(states_map, regions)
ggplot() +
geom_map(map = states_map, map_id=states_map$region, data = states_map, aes(fill=stateid))+
expand_limits(x = states_map$long, y = states_map$lat)
data(state)
states_map$InfoValue <- state.x77[match(states_map$region,tolower(rownames(state.x77))),5]
states_map$InfoType = "Murder"
states_map2 <- states_map
states_map2$InfoType <- "Grad"
states_map2$InfoValue <- state.x77[match(states_map$region,tolower(rownames(state.x77))),6]
statesbind <- rbind(states_map, states_map2)
#ggplot() +
# geom_map(map = statesbind, map_id=statesbind$region, data = statesbind, aes(fill=InfoValue))+
# expand_limits(x = statesbind$long, y = statesbind$lat) + facet_grid(.~InfoType)
Unfortunately, the method mentioned in the HW instruction does not work for me, so I use “melt” function instead.
Infos <- data.frame(state = tolower(rownames(state.x77)), state.x77[,5:6])
Infosmelt <- reshape2::melt(Infos, id = 1)
ggplot(Infosmelt, aes(map_id = state)) +
geom_map(aes(fill = value), map = states_map) +
expand_limits(x = states_map$long, y = states_map$lat) +
facet_grid( ~ variable)
by_state <- group_by(states_map, State= region)
states_circle <- summarize(by_state, MeanLong = mean(long), MeanLat = mean(lat))
states_circle$ArrentCount <- state.x77[match(states_circle$State,tolower(rownames(state.x77))),3]
ggplot() +
geom_circle(aes(x0=MeanLong, y0=MeanLat, r=ArrentCount, fill=State), data=states_circle) +
guides(fill=FALSE)
X <- unique(states_map$region)
fun <- function(statename)
{
MeanLong <- mean(states_map$long[states_map$region==statename])
MeanLat <- mean(states_map$lat[states_map$region==statename])
c(MeanLong, MeanLat)
}
circle2 <- data.frame(t(sapply(X, fun)))
colnames(circle2) <- c('MeanLong', 'MeanLat')
head(circle2)
## MeanLong MeanLat
## alabama -86.85342 31.66372
## arizona -113.27464 34.52859
## arkansas -91.28040 34.63297
## california -120.70642 36.71313
## colorado -105.07031 38.58087
## connecticut -72.72006 41.38260
head(states_circle)
## # A tibble: 6 x 4
## State MeanLong MeanLat ArrentCount
## <chr> <dbl> <dbl> <dbl>
## 1 alabama - 86.9 31.7 2.10
## 2 arizona -113 34.5 1.80
## 3 arkansas - 91.3 34.6 1.90
## 4 california -121 36.7 1.10
## 5 colorado -105 38.6 0.700
## 6 connecticut - 72.7 41.4 1.10
The two dataframes are the same.