# Create movies_small
library(ggplot2movies)
set.seed(123)
movies_small <- movies[sample(nrow(movies), 1000), ]
movies_small$rating <- factor(round(movies_small$rating))
# Explore movies_small with str()
str(movies_small)
## Classes 'tbl_df', 'tbl' and 'data.frame': 1000 obs. of 24 variables:
## $ title : chr "Fair and Worm-er" "Shelf Life" "House: After Five Years of Living" "Three Long Years" ...
## $ year : int 1946 2000 1955 2003 1963 1992 1999 1972 1994 1985 ...
## $ length : int 7 4 11 76 103 107 87 84 127 94 ...
## $ budget : int NA NA NA NA NA NA NA NA NA NA ...
## $ rating : Factor w/ 10 levels "1","2","3","4",..: 7 7 6 8 8 5 4 8 5 5 ...
## $ votes : int 16 11 15 11 103 28 105 9 37 28 ...
## $ r1 : num 0 0 14.5 4.5 4.5 4.5 14.5 0 4.5 4.5 ...
## $ r2 : num 0 0 0 0 4.5 0 4.5 0 4.5 0 ...
## $ r3 : num 0 0 4.5 4.5 0 4.5 4.5 0 14.5 4.5 ...
## $ r4 : num 0 0 4.5 0 4.5 4.5 4.5 0 4.5 14.5 ...
## $ r5 : num 4.5 4.5 0 0 4.5 0 4.5 14.5 24.5 4.5 ...
## $ r6 : num 4.5 24.5 34.5 4.5 4.5 0 14.5 0 4.5 14.5 ...
## $ r7 : num 64.5 4.5 24.5 0 14.5 4.5 14.5 14.5 14.5 14.5 ...
## $ r8 : num 14.5 24.5 4.5 4.5 14.5 24.5 14.5 24.5 14.5 14.5 ...
## $ r9 : num 0 0 0 14.5 14.5 24.5 14.5 14.5 4.5 4.5 ...
## $ r10 : num 14.5 24.5 14.5 44.5 44.5 24.5 14.5 44.5 4.5 24.5 ...
## $ mpaa : chr "" "" "" "" ...
## $ Action : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Animation : int 1 0 0 0 0 0 0 0 0 0 ...
## $ Comedy : int 1 0 0 1 0 1 1 1 0 0 ...
## $ Drama : int 0 0 0 0 1 0 0 0 1 1 ...
## $ Documentary: int 0 0 1 0 0 0 0 0 0 0 ...
## $ Romance : int 0 0 0 0 0 0 1 0 0 0 ...
## $ Short : int 1 1 1 0 0 0 0 0 0 0 ...
# Build a scatter plot with mean and 95% CI
ggplot(movies_small, aes(x = rating, y = votes)) +
geom_point() +
stat_summary(fun.data = "mean_cl_normal",
geom = "crossbar",
width = 0.2,
col = "red") +
scale_y_log10()
str(diamonds)
## Classes 'tbl_df', 'tbl' and 'data.frame': 53940 obs. of 10 variables:
## $ carat : num 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
## $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
## $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
## $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
## $ depth : num 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
## $ table : num 55 61 65 58 58 57 57 55 61 61 ...
## $ price : int 326 326 327 334 335 336 336 337 337 338 ...
## $ x : num 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
## $ y : num 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
## $ z : num 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
# Reproduce the plot
ggplot(diamonds, aes(x = carat, y = price, col = color)) +
geom_point(alpha = 0.5, size = 0.5, shape = 16) +
scale_x_log10(expression(log[10](Carat)), limits = c(0.1, 10)) +
scale_y_log10(expression(log[10](Price)), limits = c(100, 100000)) +
scale_color_brewer(palette = "YlOrRd") +
coord_equal() +
theme_classic()
# Add smooth layer and facet the plot
ggplot(diamonds, aes(x = carat, y = price, col = color)) +
stat_smooth(method = "lm") +
scale_x_log10(expression(log[10](Carat)), limits = c(0.1,10)) +
scale_y_log10(expression(log[10](Price)), limits = c(100,100000)) +
scale_color_brewer(palette = "YlOrRd") +
coord_equal() +
theme_classic()
# movies_small is available
# Add a boxplot geom
d <- ggplot(movies_small, aes(x = rating, y = votes)) +
geom_point() +
geom_boxplot() +
stat_summary(fun.data = "mean_cl_normal",
geom = "crossbar",
width = 0.2,
col = "red")
# Untransformed plot
d
# Transform the scale
d + scale_y_log10()
# Transform the coordinates
## coord_trans is different to scale transformations in that it occurs after statistical transformation and will affect the visual appearance of geoms - there is no guarantee that straight lines will continue to be straight.
## This does not work in my case.
## It likely is from the statitics having a zero value.
## d + coord_trans(y = "log10")
# It works fine without the stats layer
ggplot(movies_small, aes(x = rating, y = votes)) +
geom_point() +
geom_boxplot() +
coord_trans(y = "log10")
This is the example from the documentation which actually works
# Three ways of doing transformation in ggplot:
# * by transforming the data
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_point()
# * by transforming the scales
ggplot(diamonds, aes(carat, price)) +
geom_point() +
scale_x_log10() +
scale_y_log10()
# * by transforming the coordinate system:
ggplot(diamonds, aes(carat, price)) +
geom_point() +
coord_trans(x = "log10", y = "log10")
# The difference between transforming the scales and
# transforming the coordinate system is that scale
# transformation occurs BEFORE statistics, and coordinate
# transformation afterwards. Coordinate transformation also
# changes the shape of geoms:
d <- subset(diamonds, carat > 0.5)
ggplot(d, aes(carat, price)) +
geom_point() +
geom_smooth(method = "lm") +
scale_x_log10() +
scale_y_log10()
ggplot(d, aes(carat, price)) +
geom_point() +
geom_smooth(method = "lm") +
coord_trans(x = "log10", y = "log10")
# Plot object p
p <- ggplot(diamonds, aes(x = carat, y = price))
# Use cut_interval
p + geom_boxplot(aes(group = cut_interval(carat, n = 10)))
# Use cut_number
p + geom_boxplot(aes(group = cut_number(carat, n = 10)))
# Use cut_width
p + geom_boxplot(aes(group = cut_width(carat, width = 0.25)))
cut_interval(x, n)
makes n groups from vector x with equal range.cut_number(x, n)
makes n groups from vector x with (approximately) equal numbers of observations.cut_width(x, width)
makes groups of width width from vector x.plot_quart <- function(n) {
set.seed(123)
playData <- data.frame(raw.values = rnorm(n, 1, 6))
quan.summary <- data.frame(t(sapply(1:9, function(x) quantile(playData$raw.values, type = x))))
names(quan.summary) <- c("Min", "Q1", "Median", "Q3", "Max")
quan.summary$Type <- as.factor(1:9)
library(reshape2)
quan.summary <- melt(quan.summary, id = "Type")
quan.summary <- list(quartiles = quan.summary, values = playData)
ggplot(quan.summary$quartiles, aes(x = Type, y = value, col = variable)) +
geom_point() +
geom_rug(data = quan.summary$values, aes(y = raw.values), sides = "l", inherit.aes = F)
}
plot_quart(4)
plot_quart(10)
plot_quart(50)
plot_quart(100)
load('data/test_datasets.RData')
str(ch1_test_data)
## 'data.frame': 200 obs. of 3 variables:
## $ norm : num -0.5605 -0.2302 1.5587 0.0705 0.1293 ...
## $ bimodal: num 0.199 -0.688 -2.265 -1.457 -2.414 ...
## $ uniform: num -0.117 -0.537 -1.515 -1.812 -0.949 ...
# Calculating density: d
d <- density(ch1_test_data$norm, bw = "nrd0", kernel = "gaussian")
# Use which.max() to calculate mode
mode <- d$x[which.max(d$y)]
# Finish the ggplot call
ggplot(ch1_test_data, aes(x = norm)) +
geom_density() +
geom_rug() +
geom_vline(xintercept = mode, col = "red")
# ch1_test_data is available
# Arguments you'll need later on
fun_args <- list(mean = mean(ch1_test_data$norm), sd = sd(ch1_test_data$norm))
# Finish the ggplot
ggplot(ch1_test_data, aes(x = norm)) +
geom_histogram(aes(y = ..density..)) +
geom_density(col = "red") +
stat_function(
fun = dnorm,
args = fun_args,
col = "blue")
# small_data is available
small_data <- structure(list(x = c(-3.5, 0, 0.5, 6)), .Names = "x", row.names = c(NA,
-4L), class = "data.frame")
# Get the bandwith
get_bw <- density(small_data$x)$bw
# Basic plotting object
p <- ggplot(small_data, aes(x = x)) +
geom_rug() +
coord_cartesian(ylim = c(0,0.5))
# Create three plots
p + geom_density()
p + geom_density(adjust = 0.25)
p + geom_density(bw = 0.25 * get_bw)
# Create two plots
## rectangular kernel
p + geom_density(kernel = "r")
## epanechnikov kernel
p + geom_density(kernel = "e")
# Finish the plot
ggplot(diamonds, aes(x = cut, y = price, col = color)) +
geom_boxplot(varwidth = T) +
facet_grid(. ~ color)
# ch1_test_data and ch1_test_data2 are available
str(ch1_test_data)
## 'data.frame': 200 obs. of 3 variables:
## $ norm : num -0.5605 -0.2302 1.5587 0.0705 0.1293 ...
## $ bimodal: num 0.199 -0.688 -2.265 -1.457 -2.414 ...
## $ uniform: num -0.117 -0.537 -1.515 -1.812 -0.949 ...
str(ch1_test_data2)
## 'data.frame': 400 obs. of 2 variables:
## $ dist : Factor w/ 2 levels "norm","bimodal": 1 1 1 1 1 1 1 1 1 1 ...
## $ value: num -0.5605 -0.2302 1.5587 0.0705 0.1293 ...
# Plot with ch1_test_data
ggplot(ch1_test_data, aes(x = norm)) +
geom_rug() +
geom_density()
# Plot two distributions with ch1_test_data2
ggplot(ch1_test_data2, aes(x = value, fill = dist, col = dist)) +
geom_rug(alpha = 0.6) +
geom_density(alpha = 0.6)
# Individual densities
ggplot(mammals[mammals$vore == "Insectivore", ],
aes(x = sleep_total, fill = vore)) +
geom_density(col = NA, alpha = 0.35) +
scale_x_continuous(limits = c(0, 24)) +
coord_cartesian(ylim = c(0, 0.3))
# With faceting
ggplot(mammals, aes(x = sleep_total, fill = vore)) +
geom_density(col = NA, alpha = 0.35) +
scale_x_continuous(limits = c(0, 24)) +
coord_cartesian(ylim = c(0, 0.3)) +
facet_wrap( ~ vore, nrow = 2)
# Note that by default, the x ranges fill the scale
ggplot(mammals, aes(x = sleep_total, fill = vore)) +
geom_density(col = NA, alpha = 0.35) +
scale_x_continuous(limits = c(0, 24)) +
coord_cartesian(ylim = c(0, 0.3))
# Trim each density plot individually
ggplot(mammals, aes(x = sleep_total, fill = vore)) +
geom_density(col = NA, alpha = 0.35, trim = T) +
scale_x_continuous(limits=c(0,24)) +
coord_cartesian(ylim = c(0, 0.3))
# Unweighted density plot from before
ggplot(mammals, aes(x = sleep_total, fill = vore)) +
geom_density(col = NA, alpha = 0.35) +
scale_x_continuous(limits = c(0, 24)) +
coord_cartesian(ylim = c(0, 0.3))
# Unweighted violin plot
ggplot(mammals, aes(x = vore, y = sleep_total, fill = vore)) +
geom_violin()
# Calculate weighting measure
library(dplyr)
mammals2 <- mammals %>%
group_by(vore) %>%
mutate(n = n() / nrow(mammals)) -> mammals
str(mammals2, give.attr = F)
## Classes 'grouped_df', 'tbl_df', 'tbl' and 'data.frame': 76 obs. of 3 variables:
## $ vore : Factor w/ 4 levels "Carnivore","Herbivore",..: 1 4 2 4 2 2 1 1 2 2 ...
## $ sleep_total: num 12.1 17 14.4 14.9 4 14.4 8.7 10.1 3 5.3 ...
## $ n : num 0.25 0.263 0.421 0.263 0.421 ...
str(mammals, give.attr = F)
## Classes 'grouped_df', 'tbl_df', 'tbl' and 'data.frame': 76 obs. of 3 variables:
## $ vore : Factor w/ 4 levels "Carnivore","Herbivore",..: 1 4 2 4 2 2 1 1 2 2 ...
## $ sleep_total: num 12.1 17 14.4 14.9 4 14.4 8.7 10.1 3 5.3 ...
## $ n : num 0.25 0.263 0.421 0.263 0.421 ...
# Weighted density plot
ggplot(mammals, aes(x = sleep_total, fill = vore)) +
geom_density(aes(weight = n), col = NA, alpha = 0.35) +
scale_x_continuous(limits = c(0, 24)) +
coord_cartesian(ylim = c(0, 0.3))
# Weighted violin plot
ggplot(mammals, aes(x = vore, y = sleep_total, fill = vore)) +
geom_violin(aes(weight = n), col = NA)
# Base layers
p <- ggplot(faithful, aes(x = waiting, y = eruptions)) +
scale_y_continuous(limits = c(1, 5.5), expand = c(0, 0)) +
scale_x_continuous(limits = c(40, 100), expand = c(0, 0)) +
coord_fixed(60 / 4.5)
# 1 - Use geom_density_2d()
p + geom_density_2d()
# 2 - Use stat_density_2d() with arguments
p + stat_density_2d(aes(col = ..level..), h = c(5, 0.5))
# Load in the viridis package
library(viridis)
# Add viridis color scale
ggplot(faithful, aes(x = waiting, y = eruptions)) +
scale_y_continuous(limits = c(1, 5.5), expand = c(0,0)) +
scale_x_continuous(limits = c(40, 100), expand = c(0,0)) +
coord_fixed(60/4.5) +
stat_density_2d(geom = "tile", aes(fill = ..density..), h=c(5,.5), contour = FALSE) +
scale_fill_viridis()
# pairs
pairs(iris[1:4])
# chart.Correlation
library(PerformanceAnalytics)
chart.Correlation(iris[1:4])
# ggpairs
library(GGally)
mtcars_fact <- mtcars %>%
mutate(
cyl = as.factor(cyl),
vs = as.factor(vs),
am = as.factor(am),
gear = as.factor(gear),
carb = as.factor(carb)
)
ggpairs(mtcars_fact[1:3])
library(ggplot2)
library(reshape2)
cor_list <- function(x) {
L <- M <- cor(x)
M[lower.tri(M, diag = TRUE)] <- NA
M <- melt(M)
names(M)[3] <- "points"
L[upper.tri(L, diag = TRUE)] <- NA
L <- melt(L)
names(L)[3] <- "labels"
merge(M, L)
}
# Calculate xx with cor_list
library(dplyr)
xx <- iris %>%
group_by(Species) %>%
do(cor_list(.[1:4]))
# Finish the plot
ggplot(xx, aes(x = Var1, y = Var2)) +
geom_point(
aes(col = points, size = abs(points)),
shape = 16
) +
geom_text(
aes(col = labels, size = abs(labels), label = round(labels, 2))
) +
scale_size(range = c(0, 6)) +
scale_color_gradient2("r", limits = c(-1, 1)) +
scale_y_discrete("", limits = rev(levels(xx$Var1))) +
scale_x_discrete("") +
guides(size = FALSE) +
geom_abline(slope = -1, intercept = nlevels(xx$Var1) + 1) +
coord_fixed() +
facet_grid(. ~ Species) +
theme(axis.text.y = element_text(angle = 45, hjust = 1),
axis.text.x = element_text(angle = 45, hjust = 1),
strip.background = element_blank())
# Explore africa
load('data/africa.RData')
str(africa)
## 'data.frame': 40093 obs. of 3 variables:
## $ Sand: num 24 36 56 52 65 43 42 47 57 51 ...
## $ Silt: num 12 14 18 21 3 14 22 19 15 14 ...
## $ Clay: num 64 50 26 27 32 43 36 34 28 35 ...
africa_sample <- sample_n(africa, 50)
str(africa_sample)
## 'data.frame': 50 obs. of 3 variables:
## $ Sand: num 42 28 83 40 35 24 29 30 75 92 ...
## $ Silt: num 15 29 6 14 10 22 20 4 10 6 ...
## $ Clay: num 43 43 11 46 55 54 51 66 15 2 ...
# Add an ID column from the row.names
africa_sample$ID <- row.names(africa_sample)
head(africa_sample)
## Sand Silt Clay ID
## 34893 42 15 43 34893
## 85261 28 29 43 85261
## 64201 83 6 11 64201
## 52595 40 14 46 52595
## 44793 35 10 55 44793
## 79175 24 22 54 79175
# Gather africa_sample
library(tidyr)
africa_sample_tidy <- gather(africa_sample, key, value, -ID)
head(africa_sample_tidy)
## ID key value
## 1 34893 Sand 42
## 2 85261 Sand 28
## 3 64201 Sand 83
## 4 52595 Sand 40
## 5 44793 Sand 35
## 6 79175 Sand 24
# Finish the ggplot command
ggplot(africa_sample_tidy, aes(x = factor(ID), y = value, fill = key)) +
geom_col() +
coord_flip()
# Load ggtern
library(ggtern)
# Build ternary plot
str(africa)
## 'data.frame': 40093 obs. of 3 variables:
## $ Sand: num 24 36 56 52 65 43 42 47 57 51 ...
## $ Silt: num 12 14 18 21 3 14 22 19 15 14 ...
## $ Clay: num 64 50 26 27 32 43 36 34 28 35 ...
ggtern(africa, aes(x = Sand, y = Silt, z = Clay)) +
geom_point(shape = 16, alpha = 0.2)
# ggtern and ggplot2 are loaded
# Original plot:
ggtern(africa, aes(x = Sand, y = Silt, z = Clay)) +
geom_point(shape = 16, alpha = 0.2)
# Plot 1
ggtern(africa, aes(x = Sand, y = Silt, z = Clay)) +
geom_density_tern()
# Plot 2
ggtern(africa, aes(x = Sand, y = Silt, z = Clay)) +
stat_density_tern(geom = 'polygon', aes(fill = ..level.., alpha = ..level..)) +
guides(fill = FALSE)
# Load geomnet & examine structure of madmen
library(geomnet)
str(madmen)
## List of 2
## $ edges :'data.frame': 39 obs. of 2 variables:
## ..$ Name1: Factor w/ 9 levels "Betty Draper",..: 1 1 2 2 2 2 2 2 2 2 ...
## ..$ Name2: Factor w/ 39 levels "Abe Drexler",..: 15 31 2 4 5 6 8 9 11 21 ...
## $ vertices:'data.frame': 45 obs. of 2 variables:
## ..$ label : Factor w/ 45 levels "Abe Drexler",..: 5 9 16 23 26 32 33 38 39 17 ...
## ..$ Gender: Factor w/ 2 levels "female","male": 1 2 2 1 2 1 2 2 2 2 ...
## This is a much better way to see whats in each list. Love it.
library(purrr)
madmen %>% map(head)
## $edges
## Name1 Name2
## 1 Betty Draper Henry Francis
## 2 Betty Draper Random guy
## 3 Don Draper Allison
## 4 Don Draper Bethany Van Nuys
## 5 Don Draper Betty Draper
## 6 Don Draper Bobbie Barrett
##
## $vertices
## label Gender
## 1 Betty Draper female
## 2 Don Draper male
## 3 Harry Crane male
## 4 Joan Holloway female
## 5 Lane Pryce male
## 6 Peggy Olson female
# Merge edges and vertices
mmnet <- merge(madmen$edges, madmen$vertices,
by.x = "Name1", by.y = "label",
all = TRUE)
# Examine structure of mmnet
head(mmnet)
## Name1 Name2 Gender
## 1 Betty Draper Henry Francis female
## 2 Betty Draper Random guy female
## 3 Don Draper Allison male
## 4 Don Draper Bethany Van Nuys male
## 5 Don Draper Betty Draper male
## 6 Don Draper Bobbie Barrett male
str(mmnet)
## 'data.frame': 75 obs. of 3 variables:
## $ Name1 : Factor w/ 45 levels "Betty Draper",..: 1 1 2 2 2 2 2 2 2 2 ...
## $ Name2 : Factor w/ 39 levels "Abe Drexler",..: 15 31 2 4 5 6 8 9 11 21 ...
## $ Gender: Factor w/ 2 levels "female","male": 1 1 2 2 2 2 2 2 2 2 ...
# geomnet is pre-loaded
# Merge edges and vertices
mmnet <- merge(madmen$edges, madmen$vertices,
by.x = "Name1", by.y = "label",
all = TRUE)
head(mmnet)
## Name1 Name2 Gender
## 1 Betty Draper Henry Francis female
## 2 Betty Draper Random guy female
## 3 Don Draper Allison male
## 4 Don Draper Bethany Van Nuys male
## 5 Don Draper Betty Draper male
## 6 Don Draper Bobbie Barrett male
# Finish the ggplot command
ggplot(data = mmnet, aes(from_id = Name1, to_id = Name2)) +
geom_net(
aes(col = Gender),
size = 6,
linewidth = 1,
labelon = T,
fontsize = 3,
labelcolour = "black")
# geomnet is pre-loaded
library(ggmap)
# Merge edges and vertices
mmnet <- merge(madmen$edges, madmen$vertices,
by.x = "Name1", by.y = "label",
all = TRUE)
# Tweak the network plot
ggplot(data = mmnet, aes(from_id = Name1, to_id = Name2)) +
geom_net(
aes(col = Gender),
size = 6,
linewidth = 1,
labelon = TRUE,
fontsize = 3,
labelcolour = "black",
directed = T) +
scale_color_manual(values = c("#FF69B4", "#0099ff")) +
xlim(c(-0.05, 1.05)) +
theme_nothing() +
theme(legend.key = element_blank())
# Create linear model: res
res <- lm(Volume ~ Girth, data = trees)
# Plot res
plot(res)
# Import ggfortify and use autoplot()
library(ggfortify)
autoplot(res, ncol = 2)
# ggfortify and Canada are available
# Inspect structure of Canada
str(Canada)
## Time-Series [1:84, 1:4] from 1980 to 2001: 930 930 930 931 933 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:4] "e" "prod" "rw" "U"
head(Canada)
## [1] 929.6105 929.8040 930.3184 931.4277 932.6620 933.5509
# Call plot() on Canada
plot(Canada)
# Call autoplot() on Canada
autoplot(Canada)
# ggfortify and eurodist are available
str(eurodist)
## Class 'dist' atomic [1:210] 3313 2963 3175 3339 2762 ...
## ..- attr(*, "Size")= num 21
## ..- attr(*, "Labels")= chr [1:21] "Athens" "Barcelona" "Brussels" "Calais" ...
# Autoplot + ggplot2 tweaking
autoplot(eurodist) +
coord_fixed()
# Autoplot of MDS
autoplot(cmdscale(eurodist, eig = TRUE),
label = TRUE,
label.size = 3,
size = 0)
# Perform clustering
iris_k <- kmeans(iris[-5], 3)
# Autoplot: color according to cluster
autoplot(iris_k, data = iris, frame = T)
# Autoplot: above, plus shape according to species
autoplot(iris_k, data = iris, frame = T, shape = 'Species')
library(maps)
library(ggmap)
# maps, ggplot2, and ggmap are pre-loaded
# Use map_data() to create usa and inspect
usa <- map_data("usa")
str(usa)
## 'data.frame': 7243 obs. of 6 variables:
## $ long : num -101 -101 -101 -101 -101 ...
## $ lat : num 29.7 29.7 29.7 29.6 29.6 ...
## $ group : num 1 1 1 1 1 1 1 1 1 1 ...
## $ order : int 1 2 3 4 5 6 7 8 9 10 ...
## $ region : chr "main" "main" "main" "main" ...
## $ subregion: chr NA NA NA NA ...
head(usa, 20)
## long lat group order region subregion
## 1 -101.4078 29.74224 1 1 main <NA>
## 2 -101.3906 29.74224 1 2 main <NA>
## 3 -101.3620 29.65056 1 3 main <NA>
## 4 -101.3505 29.63911 1 4 main <NA>
## 5 -101.3219 29.63338 1 5 main <NA>
## 6 -101.3047 29.64484 1 6 main <NA>
## 7 -101.3047 29.62192 1 7 main <NA>
## 8 -101.3161 29.58181 1 8 main <NA>
## 9 -101.3047 29.56462 1 9 main <NA>
## 10 -101.2817 29.57035 1 10 main <NA>
## 11 -101.2703 29.59327 1 11 main <NA>
## 12 -101.2646 29.61619 1 12 main <NA>
## 13 -101.2474 29.61046 1 13 main <NA>
## 14 -101.2302 29.59327 1 14 main <NA>
## 15 -101.2359 29.55316 1 15 main <NA>
## 16 -101.2531 29.53597 1 16 main <NA>
## 17 -101.2416 29.50733 1 17 main <NA>
## 18 -101.2015 29.50733 1 18 main <NA>
## 19 -101.1557 29.47868 1 19 main <NA>
## 20 -101.1041 29.47295 1 20 main <NA>
table(usa$group)
##
## 1 2 3 4 5 6 7 8 9 10
## 6886 36 30 16 10 168 17 17 19 44
# Build the map
ggplot(usa, aes(x = long, y = lat, group = group)) +
geom_polygon() +
coord_map() +
theme_nothing()
# usa, cities, and all required packages are available
library(readr)
cities <- read_tsv('https://assets.datacamp.com/production/course_862/datasets/US_Cities.txt')
head(usa)
## long lat group order region subregion
## 1 -101.4078 29.74224 1 1 main <NA>
## 2 -101.3906 29.74224 1 2 main <NA>
## 3 -101.3620 29.65056 1 3 main <NA>
## 4 -101.3505 29.63911 1 4 main <NA>
## 5 -101.3219 29.63338 1 5 main <NA>
## 6 -101.3047 29.64484 1 6 main <NA>
head(cities)
## # A tibble: 6 x 5
## City State Pop_est lat long
## <chr> <chr> <int> <dbl> <dbl>
## 1 Eugene Oregon 163460 44.0567 -123.1162
## 2 Salem Oregon 164549 44.9237 -123.0231
## 3 Hillsboro Oregon 102347 45.5167 -122.9833
## 4 Santa Rosa California 174972 38.4468 -122.7061
## 5 Portland Oregon 632309 45.5370 -122.6500
## 6 Vancouver Washington 172860 45.6372 -122.5965
## Need this to get the theme_map() and scale_color_viridis() functions
library(ggthemes)
library(viridis)
# Finish plot 1
ggplot(usa, aes(x = long, y = lat, group = group)) +
geom_polygon() +
geom_point(data = cities, aes(group = State, size = Pop_est),
col = "red", shape = 16, alpha = 0.6) +
coord_map() +
theme_map()
# Arrange cities
library(dplyr)
cities_arr <- arrange(cities, Pop_est)
# Copy-paste plot 1 and adapt
ggplot(usa, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "grey90") +
geom_point(data = cities_arr, aes(group = State, col = Pop_est),
shape = 16, size = 2) +
coord_map() +
theme_map() +
scale_color_viridis()
# pop and all required packages are available
# Use map_data() to create state
state <- map_data("state")
head(state)
## long lat group order region subregion
## 1 -87.46201 30.38968 1 1 alabama <NA>
## 2 -87.48493 30.37249 1 2 alabama <NA>
## 3 -87.52503 30.37249 1 3 alabama <NA>
## 4 -87.53076 30.33239 1 4 alabama <NA>
## 5 -87.57087 30.32665 1 5 alabama <NA>
## 6 -87.58806 30.32665 1 6 alabama <NA>
# Map of states
ggplot(state, aes(x = long, y = lat, fill = region, group = group)) +
geom_polygon(col = "white") +
coord_map() +
theme_nothing()
# Merge state and pop: state2
state2 <- merge(state, pop)
head(state2)
## region long lat group order subregion Pop_est
## 1 alabama -87.46201 30.38968 1 1 <NA> 4858979
## 2 alabama -87.48493 30.37249 1 2 <NA> 4858979
## 3 alabama -87.52503 30.37249 1 3 <NA> 4858979
## 4 alabama -87.53076 30.33239 1 4 <NA> 4858979
## 5 alabama -87.57087 30.32665 1 5 <NA> 4858979
## 6 alabama -87.58806 30.32665 1 6 <NA> 4858979
# Map of states with populations
ggplot(state2, aes(x = long, y = lat, fill = Pop_est, group = group)) +
geom_polygon(col = "white") +
coord_map() +
theme_map()
# Import shape information: germany
library(rgdal)
germany <- readOGR(dsn = 'data/shape_files', layer = "DEU_adm1")
## OGR data source with driver: ESRI Shapefile
## Source: "data/shape_files", layer: "DEU_adm1"
## with 16 features
## It has 16 fields
# fortify germany: bundes
bundes <- fortify(germany)
# Plot map of germany
ggplot(bundes, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "blue", col = "white") +
coord_map() +
theme_nothing()
# germany, bundes and unemp are available
head(bundes)
## long lat order hole piece id group
## 1 9.650460 49.77634 1 FALSE 1 0 0.1
## 2 9.650968 49.76515 2 FALSE 1 0 0.1
## 3 9.656839 49.76145 3 FALSE 1 0 0.1
## 4 9.640400 49.75014 4 FALSE 1 0 0.1
## 5 9.652028 49.74276 5 FALSE 1 0 0.1
## 6 9.652208 49.73903 6 FALSE 1 0 0.1
# re-add state names to bundes
bundes$state <- factor(as.numeric(bundes$id))
levels(bundes$state) <- germany$NAME_1
head(bundes)
## long lat order hole piece id group state
## 1 9.650460 49.77634 1 FALSE 1 0 0.1 Baden-Württemberg
## 2 9.650968 49.76515 2 FALSE 1 0 0.1 Baden-Württemberg
## 3 9.656839 49.76145 3 FALSE 1 0 0.1 Baden-Württemberg
## 4 9.640400 49.75014 4 FALSE 1 0 0.1 Baden-Württemberg
## 5 9.652028 49.74276 5 FALSE 1 0 0.1 Baden-Württemberg
## 6 9.652208 49.73903 6 FALSE 1 0 0.1 Baden-Württemberg
head(unemp)
## state unemployment
## 1 Bayern 3.7
## 2 Baden-Württemberg 4.0
## 3 Rheinland-Pfalz 5.4
## 4 Hessen 5.8
## 5 Niedersachsen 6.5
## 6 Schleswig-Holstein 6.7
# Merge bundes and unemp: bundes_unemp
bundes_unemp <- merge(bundes, unemp)
# Update the ggplot call
ggplot(bundes_unemp, aes(x = long, y = lat, group = group, fill = unemployment)) +
geom_polygon() +
coord_map() +
theme_map()