Part A:
Part B:
Part C:
source("create_datasets.R")
load('data/test_datasets.RData')
library(readr)
library(dplyr)
library(ggplot2)
library(purrr)
library(maps)
library(ggmap)
library(ggthemes)
library(viridis)
library(rgdal)
library(ggfortify)
library(animation)
library(gganimate)
library(gapminder)
library(car)
 Â
# library(maps)
# library(ggmap)
# Use map_data() to create usa and inspect
usa <- map_data("usa")
str(usa)
## 'data.frame': 7243 obs. of 6 variables:
## $ long : num -101 -101 -101 -101 -101 ...
## $ lat : num 29.7 29.7 29.7 29.6 29.6 ...
## $ group : num 1 1 1 1 1 1 1 1 1 1 ...
## $ order : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "main" "main" "main" "main" ...
## $ subregion: chr NA NA NA NA ...
head(usa)
## long lat group order region subregion
## 1 -101.4078 29.74224 1 1 main <NA>
## 2 -101.3906 29.74224 1 2 main <NA>
## 3 -101.3620 29.65056 1 3 main <NA>
## 4 -101.3505 29.63911 1 4 main <NA>
## 5 -101.3219 29.63338 1 5 main <NA>
## 6 -101.3047 29.64484 1 6 main <NA>
table(usa$group)
##
## 1 2 3 4 5 6 7 8 9 10
## 6886 36 30 16 10 168 17 17 19 44
# Build the map
ggplot(usa, aes(x = long, y = lat, group = group)) +
geom_polygon() +
coord_map() +
theme_nothing()
# usa, cities, and all required packages are available
# library(readr)
cities <- read_tsv('https://assets.datacamp.com/production/course_862/datasets/US_Cities.txt')
head(usa)
## long lat group order region subregion
## 1 -101.4078 29.74224 1 1 main <NA>
## 2 -101.3906 29.74224 1 2 main <NA>
## 3 -101.3620 29.65056 1 3 main <NA>
## 4 -101.3505 29.63911 1 4 main <NA>
## 5 -101.3219 29.63338 1 5 main <NA>
## 6 -101.3047 29.64484 1 6 main <NA>
head(cities)
## # A tibble: 6 x 5
## City State Pop_est lat long
## <chr> <chr> <int> <dbl> <dbl>
## 1 Eugene Oregon 163460 44.0567 -123.1162
## 2 Salem Oregon 164549 44.9237 -123.0231
## 3 Hillsboro Oregon 102347 45.5167 -122.9833
## 4 Santa Rosa California 174972 38.4468 -122.7061
## 5 Portland Oregon 632309 45.5370 -122.6500
## 6 Vancouver Washington 172860 45.6372 -122.5965
## Need this to get the theme_map() and scale_color_viridis() functions
# library(ggthemes)
# library(viridis)
# Finish plot 1
ggplot(usa, aes(x = long, y = lat, group = group)) +
geom_polygon() +
geom_point(data = cities, aes(group = State, size = Pop_est),
col = "red", shape = 16, alpha = 0.6) +
coord_map() +
theme_map()
# Arrange cities
cities_arr <- arrange(cities, Pop_est)
# Copy-paste plot 1 and adapt
ggplot(usa, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "grey90") +
geom_point(data = cities_arr, aes(group = State, col = Pop_est),
shape = 16, size = 2) +
coord_map() +
theme_map() +
scale_color_viridis()
## New york really tips the scale
## The colors might look better on a log scale
cities_arr %>%
arrange(desc(Pop_est)) %>%
head(10)
## # A tibble: 10 x 5
## City State Pop_est lat long
## <chr> <chr> <int> <dbl> <dbl>
## 1 New York New York 8550405 40.6643 -73.9385
## 2 Los Angeles California 3971883 34.0194 -118.4108
## 3 Chicago Illinois 2720546 41.8376 -87.6818
## 4 Houston Texas 2296224 29.7805 -95.3863
## 5 Philadelphia Pennsylvania 1567442 40.0094 -75.1333
## 6 Phoenix Arizona 1563025 33.5722 -112.0880
## 7 San Antonio Texas 1469845 29.4724 -98.5251
## 8 San Diego California 1394928 32.8153 -117.1350
## 9 Dallas Texas 1300092 32.7757 -96.7967
## 10 San Jose California 1026908 37.2969 -121.8193
# pop and all required packages are available
# Use map_data() to create state
state <- map_data("state")
head(state)
## long lat group order region subregion
## 1 -87.46201 30.38968 1 1 alabama <NA>
## 2 -87.48493 30.37249 1 2 alabama <NA>
## 3 -87.52503 30.37249 1 3 alabama <NA>
## 4 -87.53076 30.33239 1 4 alabama <NA>
## 5 -87.57087 30.32665 1 5 alabama <NA>
## 6 -87.58806 30.32665 1 6 alabama <NA>
# Map of states
ggplot(state, aes(x = long, y = lat, fill = region, group = group)) +
geom_polygon(col = "white") +
coord_map() +
theme_nothing()
# Merge state and pop: state2
state2 <- merge(state, pop)
head(state2)
## region long lat group order subregion Pop_est
## 1 alabama -87.46201 30.38968 1 1 <NA> 4858979
## 2 alabama -87.48493 30.37249 1 2 <NA> 4858979
## 3 alabama -87.52503 30.37249 1 3 <NA> 4858979
## 4 alabama -87.53076 30.33239 1 4 <NA> 4858979
## 5 alabama -87.57087 30.32665 1 5 <NA> 4858979
## 6 alabama -87.58806 30.32665 1 6 <NA> 4858979
# Map of states with populations
ggplot(state2, aes(x = long, y = lat, fill = Pop_est, group = group)) +
geom_polygon(col = "white") +
coord_map() +
theme_map()
# Import shape information: germany
# library(rgdal)
germany <- readOGR(dsn = 'data/shape_files', layer = "DEU_adm1")
## OGR data source with driver: ESRI Shapefile
## Source: "data/shape_files", layer: "DEU_adm1"
## with 16 features
## It has 16 fields
# fortify germany: bundes
bundes <- fortify(germany)
# Plot map of germany
ggplot(bundes, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "blue", col = "white") +
coord_map() +
theme_nothing()
# germany, bundes and unemp are available
head(bundes)
## long lat order hole piece id group
## 1 9.650460 49.77634 1 FALSE 1 0 0.1
## 2 9.650968 49.76515 2 FALSE 1 0 0.1
## 3 9.656839 49.76145 3 FALSE 1 0 0.1
## 4 9.640400 49.75014 4 FALSE 1 0 0.1
## 5 9.652028 49.74276 5 FALSE 1 0 0.1
## 6 9.652208 49.73903 6 FALSE 1 0 0.1
# re-add state names to bundes
bundes$state <- factor(as.numeric(bundes$id))
levels(bundes$state) <- germany$NAME_1
head(bundes)
## long lat order hole piece id group state
## 1 9.650460 49.77634 1 FALSE 1 0 0.1 Baden-Württemberg
## 2 9.650968 49.76515 2 FALSE 1 0 0.1 Baden-Württemberg
## 3 9.656839 49.76145 3 FALSE 1 0 0.1 Baden-Württemberg
## 4 9.640400 49.75014 4 FALSE 1 0 0.1 Baden-Württemberg
## 5 9.652028 49.74276 5 FALSE 1 0 0.1 Baden-Württemberg
## 6 9.652208 49.73903 6 FALSE 1 0 0.1 Baden-Württemberg
head(unemp)
## state unemployment
## 1 Bayern 3.7
## 2 Baden-Württemberg 4.0
## 3 Rheinland-Pfalz 5.4
## 4 Hessen 5.8
## 5 Niedersachsen 6.5
## 6 Schleswig-Holstein 6.7
# Merge bundes and unemp: bundes_unemp
bundes_unemp <- merge(bundes, unemp)
# Update the ggplot call
ggplot(bundes_unemp, aes(x = long, y = lat, group = group, fill = unemployment)) +
geom_polygon() +
coord_map() +
theme_map()
# Load the ggmap package
# library(ggmap)
# Create london_map_13 with get_map
london_map_13 <- get_map("London, England", zoom = 13)
# Create the map of london
ggmap(london_map_13)
# Experiment with get_map() and use ggmap() to plot it!
ggmap(get_map(
"London, England",
zoom = 13,
maptype = "toner",
source = "stamen"))
# london_sites and ggmap are available
london_sites <- c("Tower of London, London", "Buckingham Palace, London", "Tower Bridge, London",
"Westminster Abbey, London", "Queen Elizabeth Olympic Park, London"
)
# Use geocode() to create xx
xx <- geocode(london_sites)
# Add a location column to xx
xx$location <- sub(", London","",london_sites)
# Get map data
london_ton_13 <- get_map(location = "London, England", zoom = 13,
source = "stamen", maptype = "toner")
# Add a geom_points layer
ggmap(london_ton_13) +
geom_point(data = xx, aes(col = location), size = 6)
# london_sites and ggmap are available
# Build xx
xx <- geocode(london_sites)
xx$location <- sub(", London", "", london_sites)
xx$location[5] <- "Queen Elizabeth\nOlympic Park"
# Create bounding box: bbox
bbox <- make_bbox(lon = xx$lon, lat = xx$lat, f = 0.3)
# Re-run get_map to use bbox
london_ton_13 <- get_map(location = bbox, zoom = 13,
source = "stamen", maptype = "toner")
# Map from previous exercise
ggmap(london_ton_13) +
geom_point(data = xx, aes(col = location), size = 6)
# New map with labels
ggmap(london_ton_13) +
geom_label(
data = xx,
aes(label = location),
size = 4,
fontface = "bold",
fill = "grey90",
col = "#E41A1C")
# bundes is available, as are all required packages
# Get the map data of "Germany"
germany_06 <- get_map(location = "Germany", zoom = 6)
# Plot map and polygon on top:
ggmap(germany_06) +
geom_polygon(
data = bundes,
aes(x = long, y = lat, group = group),
fill = NA,
col = "red") +
coord_map()
ImageMagik
with homebrew# Inspect structure of japan
japan <- read_tsv('https://assets.datacamp.com/production/course_862/datasets/japanPOP.txt')
str(japan)
## Classes 'tbl_df', 'tbl' and 'data.frame': 8282 obs. of 4 variables:
## $ AGE : int 0 1 2 3 4 5 6 7 8 9 ...
## $ POP : int -572954 -581748 -585239 -582223 -568788 -571899 -590530 -602349 -612527 -620373 ...
## $ time: int 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 ...
## $ SEX : chr "Male" "Male" "Male" "Male" ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 4
## .. ..$ AGE : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ POP : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ time: list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ SEX : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
# Finish the code inside saveGIF
# library(animation)
saveGIF({
# Loop through all time points
for (i in unique(japan$time)) {
# Subset japan: data
data <- subset(japan, time == i)
# Finish the ggplot command
p <- ggplot(data, aes(x = AGE, y = POP, fill = SEX, width = 1)) +
coord_flip() +
geom_bar(data = data[data$SEX == "Female",], stat = "identity") +
geom_bar(data = data[data$SEX == "Male",], stat = "identity") +
ggtitle(i)
print(p)
}
}, movie.name = "pyramid.gif", interval = 0.1)
## [1] TRUE
## from the car library
head(Vocab)
## year sex education vocabulary
## 20040001 2004 Female 9 3
## 20040002 2004 Female 14 6
## 20040003 2004 Male 14 9
## 20040005 2004 Female 17 8
## 20040008 2004 Male 14 1
## 20040010 2004 Male 14 7
# Update the static plot
p <- ggplot(Vocab, aes(x = education, y = vocabulary,
color = year, group = year,
frame = year, cumulative = T)) +
stat_smooth(method = "lm", se = FALSE, size = 3)
# Call gganimate on p
# Form the gganimate library
animation <- gganimate(p, filename = "vocab.gif", interval = 0.5)
And one more because this is fun and I think its pretty powerful
# library(gapminder)
head(gapminder)
## # A tibble: 6 x 6
## country continent year lifeExp pop gdpPercap
## <fctr> <fctr> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.801 8425333 779.4453
## 2 Afghanistan Asia 1957 30.332 9240934 820.8530
## 3 Afghanistan Asia 1962 31.997 10267083 853.1007
## 4 Afghanistan Asia 1967 34.020 11537966 836.1971
## 5 Afghanistan Asia 1972 36.088 13079460 739.9811
## 6 Afghanistan Asia 1977 38.438 14880372 786.1134
theme_set(theme_bw())
p <- ggplot(gapminder, aes(gdpPercap, lifeExp, size = log(pop), color = continent, frame = year)) +
geom_point() +
scale_x_log10()
animation <- gganimate(p, "gapminder.gif", interval = 0.5, ani.width = 800, ani.height = 600)