Looking forward to get out and embrace the nature? This week we present the national park visit data to give a little bit information on, what are the most popular parks in the 80s, 90s, 2000s, and in 2017 (most recent data), as well as what parks became suddenly popular since 2000, and what were forgotten from the beginning of the millennium.
The data source is https://data.world/ TBT Week 20 - US National Park Visits - 1979 to 2017.
Rename the columns for easier calling and make the data frame a tibble.
park_visit <-
read.csv("US National Parks Visists 1979 - 2017.CSV", encoding = "UTF-8") %>%
rename(park = "X.U.FEFF.Park",
state = State,
lat = Latitude,
lon = Longitude,
established = Date.established.as.park,
size_acre_2017 = X2017.Area..Acres.,
size_km_2017 = X2017.Area..Sq..KM.,
description = Description,
year = Visits.Year,
visits = Recreation.Visits) %>%
as_tibble()
park_visit %>%
count(year) %>%
ggplot(aes(x = year, y = n)) +
geom_line() +
expand_limits(y = 0) +
labs(x = "year (1979-2017)",
y = "number of national parks",
title = "The number of national parks did not change much between 1979-2017")
park_visit %>%
filter(year == 2017) %>%
count(state, name = "number of parks") %>%
mutate(state = tolower(state)) %>%
right_join(map_data("state"), by = c("state" = "region")) %>%
ggplot(aes(x = long, y = lat, group = group, fill = `number of parks`)) +
geom_polygon()+
coord_map()+
ggthemes::theme_map() +
theme(plot.title = element_text(size=20,hjust=0.5))+
labs(title = "Number of national parks by state")
park_visit %>%
mutate(centry = ifelse(year < 1980, "70s",
ifelse(year < 1990, "80s",
ifelse(year < 2000, "90s",
ifelse(year < 2010, "2000s",
ifelse(year < 2017, "2010-2016", "2017")))))) %>%
filter(centry %in% c("70s", "80s", "90s", "2000s","2010-2016", "2017")) %>%
mutate(centry = factor(centry,
levels = c("70s", "80s", "90s", "2000s", "2010-2016", "2017"))) %>%
group_by(centry, park) %>%
mutate(average_visits = sum(visits)/n()) %>%
ungroup() %>%
distinct(centry, park, .keep_all = T) %>%
group_by(centry) %>%
arrange(desc(average_visits)) %>%
top_n(average_visits, n = 10) %>%
ungroup() %>%
mutate(average_visits_m = average_visits/1000000) %>%
mutate(park = reorder_within(park, average_visits_m, centry)) %>%
ggplot(aes(y = park,
x = average_visits_m)) +
geom_col() +
facet_wrap(~centry, scale = "free_y") +
scale_y_reordered() +
labs(x = "Average annual visits within the time period (in millions)",
y = "Top 10 visited park",
title = "Most popular parks across time periods (1979-2017)")
park_visit %>%
filter(year %in% c(2010, 2017)) %>%
pivot_wider(names_from = "year", values_from = "visits", values_fill = list(n = NA)) %>%
mutate(ratio_2017_2010 = `2017`/`2010` - 1) %>%
arrange(desc(ratio_2017_2010)) %>%
slice(c(1:10, seq(n()-9, n()))) %>%
mutate(park = fct_reorder(park, ratio_2017_2010)) %>%
ggplot(aes(x = ratio_2017_2010, y = park, fill = ratio_2017_2010 > 0)) +
geom_col() +
labs(title = "Park popularity increased/decreased from 2010 to 2017",
y = "park",
x = "The absolute ratio of visits in 2017 over 2010\n(visit in 2017 / visits in 2010) - 1",
fill = "increased")
The top 5 most visited national parks within each state. The general description are available upon clicking/tapping on the dots.
park_visit %>%
filter(year == 2017) %>%
group_by(state) %>%
arrange(desc(visits)) %>%
top_n(visits, n = 5) %>%
ungroup() %>%
mutate(visits_2017_in_millions = visits/1000000) %>%
gather(key, value,
park, established, size_acre_2017, description, visits_2017_in_millions) %>%
replace_na(list(value = "Unknown")) %>%
mutate(key = str_to_title(str_replace_all(key, "_", " ")),
key = paste0("<b>", key, "</b>")) %>%
nest(data = c(key, value)) %>%
mutate(html = map(data,
knitr::kable,
format = "html",
escape = F,
col.names = c("", ""))) %>%
leaflet() %>%
addTiles() %>%
addCircleMarkers(lat = ~ lat,
lng = ~lon,
popup = ~ html,
radius = 5) %>%
addMeasure()