install.packages(c(tidyverse, sf, tmap, spatstat, maptools, ggplot2, sqldf)) install.packages(“spatstat.geom”, dependencies = TRUE) install.packages(“leaflet.providers”) install.packages(“markdown”, dependencies = TRUE) install.packages(“polyclip”, dependencies = TRUE) install.packages(“ggmap”, dependencies = TRUE)

rdf %>% 
  mutate(birth.year = as.numeric('birth.year')) %>% #THIS CHANGES A STRING INTO A NUMBER
  dplyr::select(-c('birth.year'))  %>%    #THIS DELETES THE OLD COLUMN 
  arrange(birth.year) #THIS SORTS ON BIRTH YEAR
Warning: Problem with `mutate()` column `birth.year`.
ℹ `birth.year = as.numeric("birth.year")`.
ℹ NAs introduced by coercion
Error: arrange() failed at implicit mutate() step. 
* Problem with `mutate()` column `..1`.
ℹ `..1 = birth.year`.
x object 'birth.year' not found
Run `rlang::last_error()` to see where the error occurred.

# Turns it into a gis file
morningStationsGeo <- st_as_sf(morningStations, coords = c("lon", "lat"), dim = "XY", crs = 4326) #crs=coordinate reference system

# Save if you feel like it! # st_write(popularStations, "popularStations.shp", delete_layer = TRUE)

# Look at the map 
tmap_mode('view')
tmap mode set to interactive viewing
tm_shape(morningStationsGeo %>% 
           filter(!is.na(morningtrips)) %>% 
           mutate(morningtrips = as.numeric(morningtrips))) + tm_dots(col = "morningtrips", style="quantile", id = "name")
tm_shape(meStationsGeo %>% 
           filter(!is.na(diff)) %>% 
           mutate(diff = as.numeric(diff))) + tm_dots(col="diff", style="quantile", id = "name")
Variable(s) "diff" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
######## PART 4 KERNEL DENSITY  ######### 

## Read SF for a file of boroughs
boroughs <- st_read("borough.shp")
Reading layer `borough' from data source 
  `/Users/chaneumpark/OneDrive - Georgia Institute of Technology/2021-Fall/Urban Analytics/R lab session/Assignment2 CitiBike/borough.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 5 features and 4 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
Geodetic CRS:  WGS84(DD)
boroughs_prj <- st_transform(boroughs, crs = 2263)

# Density map (This method doesn't allow us to see the background map)
df.size.proj <- st_transform(morningStationsGeo, crs = 2263)
p.sp <- as(df.size.proj, "Spatial")
p.ppp <- as(p.sp, "ppp")
kernelDensityMorning <- (density(p.ppp, sigma = 1000))

##Plot the two together
plot(kernelDensityMorning, main="Heat Map of Mornign Citibike Origins")
plot(boroughs_prj$geometry, add=TRUE)


##CHALLENGES FOR YOUR DENSITY MAP
##Extra: Clip the raster and clip the vector as well. (only plot the intersection of the two?) 
##Extra: Make the lines white (hint: it is now a polygon!)
##Extra: Add the top 3 (or 5) points and label them.
######## PART 5 GGPLOT EXAMPLE WITH CONTOURS ######### 

# Heatmap in ggplot
library(ggplot2)
library(ggmap)

df.size.degree <- st_transform(morningStationsGeo, crs = 4326) %>% 
  mutate(lat = st_coordinates(.)[,2],
         lon = st_coordinates(.)[,1])

##Get borders
height <- max(st_coordinates(df.size.degree)[,2]) - min(st_coordinates(df.size.degree)[,2])
width <- max(st_coordinates(df.size.degree)[,1]) - min(st_coordinates(df.size.degree)[,1])
  
sac_borders <- c(bottom = min(st_coordinates(df.size.degree)[,2]) - 0.1*height,
                top = max(st_coordinates(df.size.degree)[,2]) + 0.1*height,
                left = min(st_coordinates(df.size.degree)[,1]) - 0.1*width,
                right = max(st_coordinates(df.size.degree)[,1]) + 0.1*width)

map <- get_stamenmap(sac_borders, zoom = 12, maptype = "toner-lite")
Source : http://tile.stamen.com/toner-lite/12/1205/1538.png
Source : http://tile.stamen.com/toner-lite/12/1206/1538.png
Source : http://tile.stamen.com/toner-lite/12/1205/1539.png
Source : http://tile.stamen.com/toner-lite/12/1206/1539.png
Source : http://tile.stamen.com/toner-lite/12/1205/1540.png
Source : http://tile.stamen.com/toner-lite/12/1206/1540.png
Source : http://tile.stamen.com/toner-lite/12/1205/1541.png
Source : http://tile.stamen.com/toner-lite/12/1206/1541.png
ggmap(map) +
  geom_density2d(data = df.size.degree, aes(x = lon, y = lat), size = 0.3) +
  stat_density2d(data = df.size.degree, aes(x = lon, y = lat, fill = ..level.., alpha = ..level..), 
                 geom = "polygon", bins = 10) + 
  scale_fill_gradient(low = "green", high = "red") +
  scale_alpha(range = c(0, 0.4), guide = FALSE) #Plot +
Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please use `guide = "none"` instead.

 coord_cartesian(xlim = c(-74.02, -95.7), 
                  ylim = c(29.5, 30.1))
<ggproto object: Class CoordCartesian, Coord, gg>
    aspect: function
    backtransform_range: function
    clip: on
    default: FALSE
    distance: function
    expand: TRUE
    is_free: function
    is_linear: function
    labels: function
    limits: list
    modify_scales: function
    range: function
    render_axis_h: function
    render_axis_v: function
    render_bg: function
    render_fg: function
    setup_data: function
    setup_layout: function
    setup_panel_guides: function
    setup_panel_params: function
    setup_params: function
    train_panel_guides: function
    transform: function
    super:  <ggproto object: Class CoordCartesian, Coord, gg>
---
title: "R Notebook"
output: html_notebook
---
install.packages(c(tidyverse, sf, tmap, spatstat, maptools, ggplot2, sqldf))
install.packages("spatstat.geom", dependencies = TRUE)
install.packages("leaflet.providers")
install.packages("markdown", dependencies = TRUE)
install.packages("polyclip", dependencies = TRUE)
install.packages("ggmap", dependencies = TRUE)

```{r}
######## PART 1 SET UP ######### 

library(tidyverse) 
library(sf)
library(tmap)
library(spatstat)
library(maptools)
library(sqldf)
library(ggplot2)
library(spatstat.geom)
library(leaflet.providers)
library(markdown)
library(polyclip)
library(ggmap)

# Environment (change this path to your folder where you stored the CITI Bike data)
setwd("/Users/chaneumpark/OneDrive - Georgia Institute of Technology/2021-Fall/Urban Analytics/R lab session/Assignment2 CitiBike")

## We downloaded the data from here: https://s3.amazonaws.com/tripdata/index.html
## Some code taken from: http://lab.rady.ucsd.edu/sawtooth/business_analytics_in_r/maps.html
## Thanks to Bon Woo Koo for his help.

# Read data
rdf <- read.csv("201306-citibike-tripdata.csv")
rdf <- sample_n(rdf1,(33719*2))
write.csv(rdf, "citibike_smallsample.csv")

# Take a look by birth year!
rdf %>% 
  mutate(birth.year = as.numeric('birth.year')) %>% #THIS CHANGES A STRING INTO A NUMBER
  dplyr::select(-c('birth.year'))  %>%    #THIS DELETES THE OLD COLUMN 
  arrange(birth.year) #THIS SORTS ON BIRTH YEAR
```

```{r}
######## PART 2 EXPLORING THE DATA ######### 

# Split time, birth year and filter by birth year
rdfClean <- rdf %>% 
  mutate(date = as.Date(starttime),
         time = format(as.POSIXct(starttime), format = "%H:%M:%S")) %>% ## SPECIAL FORMATTING TO GET H,M,S.
  mutate(hour = substring(.$time, first = 1, last = 2)) %>% ## TRUNCATES THE STRING
  mutate(gender = as.character(gender)) %>%  ##  CHANGES NUMBERS into STRINGS
  dplyr::select(date, time, hour, everything()) %>%  ## REORDERS THE COLUMNS 
  filter("birth_year" > 1930) ## KEEPS BIRTH YEARS AFTER 1930


# Aggregate by hour
tripsByHour <- rdfClean  %>%
  group_by(`hour`, `gender`) %>%
  summarise(trips=n()) %>%
  mutate(hour = as.numeric(hour))

#other way to do it (may not return hour as int...)
#tripsByHour <- sqldf("select hour, gender, count(hour) as trips from rdfClean group by hour, gender")

# See trips by hour (very basic plot---you can also use another plotting call from ggplot2 etc)
plot(tripsByHour$hour, tripsByHour$trips, xlab = "Hour of the Day", ylab = "Number of Trips")
```  

```{r}
## SAME PLOT
ggplot(tripsByHour, aes(x=hour, y=trips, colour=gender)) + geom_line()

```

```{r}
######## PART 3 SEGMENTATION AND MAPPING ######### 

# Sample for just the morning 
morning <- rdfClean %>% filter(hour %in% c('07', '08', '09'))

# Find most important stations
morningStations <- morning  %>%
  group_by(`start.station.id`) %>%
  summarise(lat=as.numeric(`start.station.latitude`[1]),
            lon=as.numeric(`start.station.longitude`[1]),
            name=`start.station.name`[1],
            trips=n())
morningStations<-rename(morningStations,morningtrips=trips)

# Sample for just the evening
evening <- rdfClean %>% filter(hour %in% c('17', '18', '19'))

# Find most important stations
eveningStations <- evening  %>%
  group_by(`start.station.id`) %>%
  summarise(lat=as.numeric(`start.station.latitude`[1]),
            lon=as.numeric(`start.station.longitude`[1]),
            name=`start.station.name`[1],
            trips=n())
eveningStations<-rename(eveningStations,eveningtrips=trips)

meStations<-merge(morningStations,eveningStations)
meStations$diff<-meStations$morningtrips-meStations$eveningtrips

meStations %>%
  arrange(desc(morningtrips))

meStations %>%
  arrange(desc(eveningtrips))

## Take a look
morningStations %>%
  arrange(desc(morningtrips))

eveningStations %>%
  arrange(desc(trips))

meStations %>%
  arrange(desc(morningtrips))
```

```{r}
# Turns it into a gis file
morningStationsGeo <- st_as_sf(morningStations, coords = c("lon", "lat"), dim = "XY", crs = 4326) #crs=coordinate reference system

# Save if you feel like it! # st_write(popularStations, "popularStations.shp", delete_layer = TRUE)

# Look at the map 
tmap_mode('view')
tm_shape(morningStationsGeo %>% 
           filter(!is.na(morningtrips)) %>% 
           mutate(morningtrips = as.numeric(morningtrips))) + tm_dots(col = "morningtrips", style="quantile", id = "name")
```

```{r}
meStationsGeo <- st_as_sf(meStations, coords = c("lon", "lat"), dim = "XY", crs = 4326)

tmap_mode('view')
tm_shape(meStationsGeo %>% 
           filter(!is.na(diff)) %>% 
           mutate(diff = as.numeric(diff))) + tm_dots(col="diff", style="quantile", id = "name")
```


```{r}
######## PART 4 KERNEL DENSITY  ######### 

## Read SF for a file of boroughs
boroughs <- st_read("borough.shp")
boroughs_prj <- st_transform(boroughs, crs = 2263)

# Density map (This method doesn't allow us to see the background map)
df.size.proj <- st_transform(morningStationsGeo, crs = 2263)
p.sp <- as(df.size.proj, "Spatial")
p.ppp <- as(p.sp, "ppp")
kernelDensityMorning <- (density(p.ppp, sigma = 1000))

##Plot the two together
plot(kernelDensityMorning, main="Heat Map of Mornign Citibike Origins")
plot(boroughs_prj$geometry, add=TRUE)

##CHALLENGES FOR YOUR DENSITY MAP
##Extra: Clip the raster and clip the vector as well. (only plot the intersection of the two?) 
##Extra: Make the lines white (hint: it is now a polygon!)
##Extra: Add the top 3 (or 5) points and label them.
```

```{r}
######## PART 5 GGPLOT EXAMPLE WITH CONTOURS ######### 

# Heatmap in ggplot
library(ggplot2)
library(ggmap)

df.size.degree <- st_transform(morningStationsGeo, crs = 4326) %>% 
  mutate(lat = st_coordinates(.)[,2],
         lon = st_coordinates(.)[,1])

##Get borders
height <- max(st_coordinates(df.size.degree)[,2]) - min(st_coordinates(df.size.degree)[,2])
width <- max(st_coordinates(df.size.degree)[,1]) - min(st_coordinates(df.size.degree)[,1])
  
sac_borders <- c(bottom = min(st_coordinates(df.size.degree)[,2]) - 0.1*height,
                top = max(st_coordinates(df.size.degree)[,2]) + 0.1*height,
                left = min(st_coordinates(df.size.degree)[,1]) - 0.1*width,
                right = max(st_coordinates(df.size.degree)[,1]) + 0.1*width)

map <- get_stamenmap(sac_borders, zoom = 12, maptype = "toner-lite")

ggmap(map) +
  geom_density2d(data = df.size.degree, aes(x = lon, y = lat), size = 0.3) +
  stat_density2d(data = df.size.degree, aes(x = lon, y = lat, fill = ..level.., alpha = ..level..), 
                 geom = "polygon", bins = 10) + 
  scale_fill_gradient(low = "green", high = "red") +
  scale_alpha(range = c(0, 0.4), guide = FALSE) #Plot +
 coord_cartesian(xlim = c(-74.02, -95.7), 
                  ylim = c(29.5, 30.1))
```
