Lab #6

library(ggplot2)
library(maps)
library(ggmap)

## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
##   Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service/>
##   OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles/>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.

library(mapproj)

1. Add a comment for each line of code, explaining what that line of code does.

# Load US state boundary data from the 'maps' package
us <- map_data("state")

# Create a base ggplot object using the 'us' data
map <- ggplot(us)
# Add the aesthetic mappings for the x and y coordinates, and group the polygons by state for correct rendering
map <- map + aes(x=long, y=lat, group=group) + geom_polygon()
# Expand the limits of the plot to ensure all states are visible
map <- map + expand_limits(x=us$long, y=us$lat)
# Apply a map projection and add a title to the map
map <- map + coord_map() + ggtitle("USA Map")
# Render the map
map

2. The map you just created fills in the area of each county in black while outlining it with a thin white line.

# Use the fill= and color= commands inside the call to geom_polygon( ) to reverse the color scheme
map <- ggplot(us)
map <- map + aes(x=long, y=lat, group=group) + geom_polygon(fill = "white", color = "black" ) 
map <- map + expand_limits(x=us$long, y=us$lat)
map <- map + coord_map() + ggtitle("USA Map")
map

3. Just as in step 2, the map created below fills in the area of each county in black while outlining it with a thin white line.

ny_counties <- map_data("county","new york")
ggplot(ny_counties) +
  aes(long,lat, group=group) +
  geom_polygon()

3a.Use the fill= and color= commands inside the call to geom_polygon( ) to reverse the color scheme.

# Load New York county boundary data
ny_counties <- map_data("county","new york")
# Create a base ggplot object using the 'ny_counties' data
ggplot(ny_counties) +
# Add aesthetic mappings for x, y coordinates, and group by county
  aes(long,lat, group=group) +
# Add polygons filled with white and outlined in black
  geom_polygon(fill = "white", color = "black")

4. Run head(ny_counties) to verify how the county outline data looks.

head(ny_counties)

##        long      lat group order   region subregion
## 1 -73.78550 42.46763     1     1 new york    albany
## 2 -74.25533 42.41034     1     2 new york    albany
## 3 -74.25533 42.41034     1     3 new york    albany
## 4 -74.27252 42.41607     1     4 new york    albany
## 5 -74.24960 42.46763     1     5 new york    albany
## 6 -74.22668 42.50774     1     6 new york    albany

5. Make a copy of your code from step 3 and add the following subcommand to your ggplot( ) call (don’t forget to put a plus sign after the geom_polygon( ) statement to tell R that you are continuing to build the command): coord_map(projection = “mercator”)

In what way is the map different from the previous map. Be prepared to explain what a Mercator projection is.

# Load New York county boundary data from the 'maps' package
ny_counties <- map_data("county","new york")
# Create a base ggplot object using the 'ny_counties' data
ggplot(ny_counties) +
  # Map the longitude (`long`) to the x-axis, latitude (`lat`) to the y-axis, and group the data by the 'group' column to create individual polygons for each county.
  aes(long,lat, group=group) +
  # Add a polygon layer to the plot, using the aesthetics defined above to draw the county boundaries
  geom_polygon() +
 # Set the map projection to Mercator. This will visually distort the map, especially at higher latitudes, but is useful for some specific purposes like navigation.
  coord_map(projection = "mercator")

6. Grab a copy of the nyData.csv data set from:

https://intro-datascience.s3.us-east-2.amazonaws.com/nyData.csv

Read that data set into R with read_csv(). The next step assumes that you have named the resulting data frame “nyData.”

# Load necessary libraries
library(readr)
# URL of the dataset
url <- "https://intro-datascience.s3.us-east-2.amazonaws.com/nyData.csv"
# Read the CSV data from the URL into a data frame named 'nyData'
nyData <- read_csv(url)

## Rows: 62 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): county
## num (4): pop2010, pop2000, sqMiles, popDen
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Display the first few rows of the data to verify it loaded correctly
head(nyData)

## # A tibble: 6 × 5
##   county      pop2010 pop2000 sqMiles  popDen
##   <chr>         <dbl>   <dbl>   <dbl>   <dbl>
## 1 albany       304204  294565   523.    582. 
## 2 allegany      48946   49927  1029.     47.6
## 3 bronx       1385108 1332650    42.1 32900. 
## 4 broome       200600  200536   706.    284. 
## 5 cattaraugus   80317   83955  1308.     61.4
## 6 cayuga        80026   81963   692.    116.

7. Next, merge your ny_counties data from the first breakout group with your new nyData data frame, with this code: mergeNY <- merge(ny_counties,nyData, all.x=TRUE,by.x=“subregion”,by.y=“county”)

mergeNY <- merge(ny_counties, nyData, all.x = TRUE, by.x = "subregion", by.y = "county")

8. Run head(mergeNY) to verify how the merged data looks.

head(mergeNY)

##   subregion      long      lat group order   region pop2010 pop2000 sqMiles
## 1    albany -73.78550 42.46763     1     1 new york  304204  294565   522.8
## 2    albany -74.25533 42.41034     1     2 new york  304204  294565   522.8
## 3    albany -74.25533 42.41034     1     3 new york  304204  294565   522.8
## 4    albany -74.27252 42.41607     1     4 new york  304204  294565   522.8
## 5    albany -74.24960 42.46763     1     5 new york  304204  294565   522.8
## 6    albany -74.22668 42.50774     1     6 new york  304204  294565   522.8
##   popDen
## 1 581.87
## 2 581.87
## 3 581.87
## 4 581.87
## 5 581.87
## 6 581.87

9. Now drive the fill color inside each county by adding the fill aesthetic inside of your geom_polygon( ) subcommand (fill based on the pop2000).

ggplot(mergeNY) +
 # Map aesthetics:
  #   - longitude (long) to x-axis
  #   - latitude (lat) to y-axis
  #   - group by 'group' column to create polygons for each county
  #   - fill color based on the 'pop2000' column (population in 2000)
  aes(long, lat, group = group, fill = pop2000) +   

  # Create polygons (county shapes) using the defined aesthetics
  geom_polygon() +                            

  # Apply default map projection
  coord_map() +  

  # Add a title to the plot
  ggtitle("New York Counties by Population (2000)")

10. Extra (not required):

Read in the following JSON datasets: ‘https://gbfs.citibikenyc.com/gbfs/en/station_information.json’ ‘https://gbfs.citibikenyc.com/gbfs/en/station_status.json’

library(RCurl)   # For access to Internet data
library(jsonlite) # For decoding JSON

station_link <- 'https://gbfs.citibikenyc.com/gbfs/en/station_status.json'
apiOutput <- getURL(station_link) # Grab the data
apiData <- fromJSON(apiOutput) # Parse the data
stationStatus <- apiData$data$stations
cols <- c('num_bikes_disabled','num_docks_disabled', 'station_id',
          'num_ebikes_available', 'num_bikes_available', 'num_docks_available')
stationStatus = stationStatus[,cols]

bikeURL <- 'https://gbfs.citibikenyc.com/gbfs/en/station_information.json'

apiOutput <- getURL(bikeURL) # Grab the data
apiData <- fromJSON(apiOutput) # Parse the data
stationInfo <- apiData$data$stations
stationInfo = stationInfo[,c('station_id','capacity', 'lon', 'lat', 'name')]

Merge the datasets, based on ‘station_id’

mergedData <- merge(stationInfo, stationStatus, by = "station_id")

Clean the merged dataset to only include useful information For this work, you only need lat, lon and the number of bikes available

# Select only the columns we need: lat, lon, and num_bikes_available
cleanData <- mergedData[, c("lat", "lon", "num_bikes_available")]

# Display the first few rows of the cleaned data
head(cleanData)

##        lat       lon num_bikes_available
## 1 40.76409 -73.91065                  12
## 2 40.81423 -73.90393                  19
## 3 40.74446 -73.89764                   8
## 4 40.73592 -74.00094                  14
## 5 40.67560 -73.94150                  12
## 6 40.67977 -73.98470                  19

Create a stamen map using ‘get_stamenmap()’ Have the limits of the map be defined by the lat and lot of the stations

# Load necessary libraries (install if needed)
library(ggmap)
library(ggplot2)
api_key <- "0a32bf5e-a9c6-4b2a-859c-af5d770cb7f6"
register_stadiamaps(api_key, write = TRUE)

## ℹ Replacing old key (0a32bf5e) with new key in /Users/gilraitses/.Renviron

# Define the bounding box using min/max latitude and longitude
bbox <- c(left = min(mergedData$lon), 
         bottom = min(mergedData$lat), 
         right = max(mergedData$lon), 
         top = max(mergedData$lat))

# Get the Stamen map (you can choose a different map type if you like)
map <- get_stadiamap(bbox, zoom = 12, maptype = "stamen_toner_lite")

## ℹ © Stadia Maps © Stamen Design © OpenMapTiles © OpenStreetMap contributors.

# Plot the map
ggmap(map) + 
  geom_point(data = mergedData, aes(x = lon, y = lat), size = 2, color = "blue") +  # Add points for station locations
  labs(title = "Citi Bike Stations in New York City")

Show the stations, as points on the map.

ggmap(map) +  # Display the Stamen map
  geom_point(data = mergedData, aes(x = lon, y = lat), size = 2, color = "blue") +  # Add points for station locations
  labs(title = "Citi Bike Stations in New York City")  # Add plot title

Show the number of bikes available as a color

ggmap(map) +
  geom_point(data = mergedData, aes(x = lon, y = lat, color = num_bikes_available), size = 3) +  # Map color to num_bikes_available
  scale_color_gradient(low = "yellow", high = "red", name = "Bikes Available") +  # Customize color scale
  labs(title = "Citi Bike Stations: Bikes Available")