## gapminder should be installed, just restart your jupyterhub
### if issues persist this back up method works
#options(download.file.method = "wget")
#remotes::install_github("jennybc/gapminder")
## Run this code, to manage packages and install as needed.
# Install pacman
if (!require("pacman")) install.packages("pacman", repos = "http://cran.us.r-project.org")
# p_load function loads packages if installed, or install then loads otherwise
pacman::p_load(tidyverse,gapminder,leaflet)
Data visualization and mapping are essential techniques in data
analysis, allowing us to uncover patterns, trends, and relationships
within datasets. This project will guide us through fundamental data
visualization techniques using R, focusing on the gapminder
dataset and Seattle Airbnb listings data. We will explore how to create
scatterplots, apply scales, use colors effectively, implement faceting,
and generate interactive maps.
gapminder datasetggplot2
leaflet
gapminder DatasetThe gapminder dataset provides demographic and economic
data from countries worldwide. It includes key variables such as GDP per
capita, life expectancy, and population. To get started, install and
load the necessary packages:
#install.packages("gapminder")
# Load necessary libraries
library(gapminder)
library(tidyverse)
str(gapminder)
## tibble [1,704 × 6] (S3: tbl_df/tbl/data.frame)
## $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ year : int [1:1704] 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ lifeExp : num [1:1704] 28.8 30.3 32 34 36.1 ...
## $ pop : int [1:1704] 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
## $ gdpPercap: num [1:1704] 779 821 853 836 740 ...
The dataset contains categorical variables (country,
continent) and continuous variables (year,
lifeExp, pop, gdpPercap).
Understanding these data types helps in selecting appropriate
visualization techniques.
To focus on a specific country, subset the data for Algeria:
algeria_data <- gapminder %>% filter(country == "Algeria")
head(algeria_data)
## # A tibble: 6 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Algeria Africa 1952 43.1 9279525 2449.
## 2 Algeria Africa 1957 45.7 10270856 3014.
## 3 Algeria Africa 1962 48.3 11000948 2551.
## 4 Algeria Africa 1967 51.4 12760499 3247.
## 5 Algeria Africa 1972 54.5 14760787 4183.
## 6 Algeria Africa 1977 58.0 17152804 4910.
ggplot2Scatterplots help visualize relationships between two variables and detect outliers. The following example plots GDP per capita against life expectancy:
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp)) +
geom_point()
To analyze the relationship between population and life expectancy:
ggplot(gapminder, aes(x = pop, y = lifeExp)) +
geom_point() +
labs(title = "Population vs. Life Expectancy",
x = "Population",
y = "Life Expectancy") +
theme_minimal()
#### Scales
Applying a logarithmic scale can improve data visualization by making patterns clearer:
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp)) +
geom_point() +
scale_x_log10()
#### Colors
Adding colors enhances data interpretation. Coloring by continent:
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp, color = continent)) +
geom_point() +
scale_x_log10()
For continuous variables, a gradient color scale can be applied:
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp, color = log(pop))) +
geom_point() +
scale_x_log10()
#### Facets
Faceting creates multiple small plots, breaking down the data by a categorical variable:
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp)) +
geom_point() +
scale_x_log10() +
facet_wrap(~continent)
### 3. Analyzing Life Expectancy Trends
To examine life expectancy trends, plot life expectancy over time for each country:
ggplot(gapminder, aes(x = year, y = lifeExp, group = country, color = continent)) +
geom_line(alpha = 0.5) +
labs(title = "Life Expectancy Over Time by Country",
x = "Year",
y = "Life Expectancy",
color = "Continent") +
theme_minimal() +
facet_wrap(~continent)
### 4. Interactive Mapping with
leaflet
The leaflet package enables interactive map
visualization. First, install and load the package, then read the Airbnb
dataset:
if (!require(leaflet)) install.packages("leaflet", dependencies = TRUE)
library(leaflet)
library(tidyverse)
airbnb_data <- read_csv("listings.csv")
## Rows: 7785 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): name, host_name, neighbourhood_group, neighbourhood, room_type
## dbl (10): id, host_id, latitude, longitude, price, minimum_nights, number_o...
## date (1): last_review
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
To create a basic map displaying Airbnb listings:
leaflet(airbnb_data) %>%
addTiles() %>%
addCircles(popup = ~name)
## Assuming "longitude" and "latitude" are longitude and latitude, respectively
To highlight listings priced over $200:
airbnb_high_price <- airbnb_data %>% filter(price > 200)
leaflet(airbnb_high_price) %>%
addTiles() %>%
addCircles(lng = ~longitude, lat = ~latitude, popup = ~name, color = "red")
leaflet(airbnb_data) %>%
addTiles() %>%
addCircles(lng = ~longitude, lat = ~latitude, color = ~colorFactor("YlOrRd", price)(price), popup = ~paste("Price:", price)) %>%
addLegend("bottomright", pal = colorFactor("YlOrRd", airbnb_data$price), values = airbnb_data$price, title = "Price Range")
## Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette YlOrRd is 9
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette YlOrRd is 9
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette YlOrRd is 9
## Returning the palette you asked for with that many colors
This project covered essential data visualization techniques in R, from basic scatterplots and faceting to interactive mapping with leaflet. By combining these methods, we can better analyze patterns and trends in datasets like gapminder and Airbnb listings, leading to more effective data-driven insights.
Kieran Healy, Data Visualization: A Practical Introduction
Charles Lanfear, Introduction to R for Social Scientists