library (dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library (readr)
library (ggplot2)
library (tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (rnaturalearth)
library (rnaturalearthdata)
##
## Attaching package: 'rnaturalearthdata'
##
## The following object is masked from 'package:rnaturalearth':
##
## countries110
library (rnaturalearthhires)
library (shiny)
## Warning: package 'shiny' was built under R version 4.4.3
library (forcats)
guidedata <- read_csv("/Users/pault/OneDrive - SUNY Canton/Clarkson/Coursework/IA640 - Information Visualization/HW_3_GraphingInR/Guides_Currently_Licensed_in_New_York_State_20250127.csv")
## Rows: 6276 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Last_Name, First_Name, City, State, Zip, Expiration Date, County, ...
## dbl (1): Badge Number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
us_states <- map_data("state")
us_counties <- map_data("county")
guidedataCounty <- guidedata %>%
group_by(County, Activity_Description) %>%
summarize(count = n())%>%
na.omit()
## `summarise()` has grouped output by 'County'. You can override using the
## `.groups` argument.
TopStates<- guidedata %>%
filter(State != "NY")%>%
group_by(State) %>%
summarize(count = n())%>%
slice_max(count, n = 5)%>%
na.omit()
guidedataCounty$County <- tolower(guidedataCounty$County)
countycount <- left_join(us_counties, guidedataCounty, by = c("subregion" = "County"))
## Warning in left_join(us_counties, guidedataCounty, by = c(subregion = "County")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1341 of `x` matches multiple rows in `y`.
## ℹ Row 121 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
countycountFish <- countycount %>% filter(Activity_Description == "Fishing" | is.na(Activity_Description))
plot1 <- ggplot(data = countycount,
mapping = aes(x = long, y = lat, group = group, fill = count))+
geom_polygon(color = "grey90", linewidth = 0.1)+
coord_map(projection = "albers", lat0 = 25, lat1 = 60, xlim = c(-80, -71), ylim = c(40,46))+
scale_fill_binned(aesthetics = "fill",guide = "colourbar", na.value = "grey50",type = "viridis")+
labs(title = "Number of Guides in New York State by County", x = "Longitude", y = "Latitute")
plot2 <- ggplot(data = TopStates,
mapping = aes(fct_rev(fct_reorder(State,count)),count))+
geom_col(fill = "blue" )+
labs(title = "Out-of-State Guides Licesnsed to Operate in NYS", subtitle = "Top Five States", x = "State", y = "Number of Guides")+
theme_minimal()
plot1
plot2
ui <- fluidPage(
plotOutput("NewYorkGuides"),
plotOutput("TopStates")
)
server <- function(input, output, session) {
output$NewYorkGuides <- renderPlot(ggplot(data = countycount,
mapping = aes(x = long, y = lat, group = group, fill = count))+
geom_polygon(color = "grey90", linewidth = 0.1)+
coord_map(projection = "albers", lat0 = 25, lat1 = 60, xlim = c(-80, -71), ylim = c(40,46))+
scale_fill_binned(aesthetics = "fill",guide = "colourbar", na.value = "grey50",type = "viridis")+
labs(title = "Number of Guides in New York State by County", x = "Longitude", y = "Latitute"))
output$TopStates <- renderPlot(ggplot(data = TopStates,
mapping = aes(fct_rev(fct_reorder(State,count)),count))+
geom_col(fill = "blue" )+
labs(title = "Out-of-State Guides Licesnsed to Operate in NYS", subtitle = "Top Five States", x = "State", y = "Number of Guides")+
theme_minimal())
}
shinyApp(ui, server)
Number of Guides in New York State by County
Data types: Geospatial - latitude and longitudinal data. Categorical – counties within New York State (and just outside of NYS). Quantitative – discrete/nominal.
Task: Comparison
Business Relationship: Nominal Comparison – The color of each county indicates the number of licensed guides in that area but does not rank or order them in any way.
Channels: County – Shape, identity channel, categorical attribute. Count/frequency – color saturation (color hue?)
The data manipulation was not that difficult. The dplyr cheat sheet covered most of what was needed. The one tricky part on the left.join was changing the guide data to lowercase to match the conventions of the map_data data sets. The one area I’m till confused by is how to create the two layers on the plot. One layer with the ggplot data and mapping that varies the fill coloring based on the guide count. The second is a geom_polygon as a base layer that leaves counties that do not have guides and are not NULL values. Maybe a binned scale that is a specific bin for zero or NULL values. Scale_fill_manual()?
Out-of-State Guides Licensed to Operate in NYS
Data types: Amount/Bars. Categorical – states with guides licensed to operate in NYS. Quantitative – discrete/nominal
Task: Comparison
Business Relationship: Rank– The length of each bar indicates the number of licensed guides in that state, and the descending order suggests a form ranking.
Channels: Mark – line. Channel – vertical position. Abbreviated state name on x-axis.
Given the county level of the map information in the first plot, I added this plot to show what states have guides that operate in New York State. Reasonably, the states closest to NY have the most significant number of guides, but guides from all over the country are listed in the original dataset. I initially was going to create a ranked bar chart that included all states with any number of guides (including NY). When I made that plot, I saw Alaska, and several other states would have been nearly impossible to see next to all the states, including NY. It would be more beneficial, usable, and complementary to the map only to plot the top five states.