install.packages(c(tidyverse, sf, tmap, spatstat, maptools, ggplot2, sqldf)) install.packages(“spatstat.geom”, dependencies = TRUE) install.packages(“leaflet.providers”) install.packages(“markdown”, dependencies = TRUE) install.packages(“polyclip”, dependencies = TRUE) install.packages(“ggmap”, dependencies = TRUE)

library(tidyverse) 
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
-- Attaching packages ----------------------------------------------------------------------- tidyverse 1.3.1 --
v ggplot2 3.3.5     v purrr   0.3.4
v tibble  3.1.4     v dplyr   1.0.7
v tidyr   1.1.3     v stringr 1.4.0
v readr   2.0.1     v forcats 0.5.1
-- Conflicts -------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(sf)
Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(tmap)
Registered S3 methods overwritten by 'htmltools':
  method               from         
  print.html           tools:rstudio
  print.shiny.tag      tools:rstudio
  print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
library(spatstat)
Loading required package: spatstat.data
Loading required package: spatstat.geom
Registered S3 method overwritten by 'spatstat.geom':
  method     from
  print.boxx cli 
spatstat.geom 2.2-2
Loading required package: spatstat.core
Loading required package: nlme

Attaching package: ‘nlme’

The following object is masked from ‘package:dplyr’:

    collapse

Loading required package: rpart
spatstat.core 2.3-0
Loading required package: spatstat.linnet
spatstat.linnet 2.3-0

spatstat 2.2-0       (nickname: ‘That's not important right now’) 
For an introduction to spatstat, type ‘beginner’ 
library(maptools)
Checking rgeos availability: FALSE
Please note that 'maptools' will be retired by the end of 2023,
plan transition at your earliest convenience;
some functionality will be moved to 'sp'.
    Note: when rgeos is not available, polygon geometry     computations in maptools depend on gpclib,
    which has a restricted licence. It is disabled by default;
    to enable gpclib, type gpclibPermit()
library(sqldf)
Loading required package: gsubfn
Loading required package: proto
Loading required package: RSQLite
library(ggplot2)
library(spatstat.geom)
library(leaflet.providers)
library(markdown)
library(polyclip)
polyclip 1.10-0 built from Clipper C++ version 6.4.0
library(ggmap)
Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
Please cite ggmap if you use it! See citation("ggmap") for details.
# Aggregate by hour
tripsByHour <- rdfClean  %>%
  group_by(`hour`, `gender`) %>%
  summarise(trips=n()) %>%
  mutate(hour = as.numeric(hour))
`summarise()` has grouped output by 'hour'. You can override using the `.groups` argument.

#other way to do it (may not return hour as int...)
#tripsByHour <- sqldf("select hour, gender, count(hour) as trips from rdfClean group by hour, gender")

# See trips by hour (very basic plot---you can also use another plotting call from ggplot2 etc)
plot(tripsByHour$hour, tripsByHour$trips, xlab = "Hour of the Day", ylab = "Number of Trips")

# Turns it into a gis file
morningStationsGeo <- st_as_sf(morningStations, coords = c("lon", "lat"), dim = "XY", crs = 4326) #st_as_sf is Data to shape file

# Save if you feel like it! # st_write(popularStations, "popularStations.shp", delete_layer = TRUE)

# Look at the map 
tmap_mode('view')
tmap mode set to interactive viewing
tm_shape(morningStationsGeo %>% 
           filter(!is.na(trips)) %>% 
           mutate(trips = as.numeric(trips))) + tm_dots(col = "trips", style="quantile", id = "name")
######## PART 4 KERNEL DENSITY  ######### 

## Read SF for a file of boroughs
boroughs <- st_read("borough.shp")
boroughs_prj <- st_transform(boroughs, crs = 2263)

# Density map (This method doesn't allow us to see the background map)
df.size.proj <- st_transform(morningStationsGeo, crs = 2263)
p.sp <- as(df.size.proj, "Spatial")
p.ppp <- as(p.sp, "ppp")
kernelDensityMorning <- (density(p.ppp, sigma = 1000))

##Plot the two together
plot(kernelDensityMorning)
plot(boroughs_prj$geometry, add=TRUE)

##CHALLENGES FOR YOUR DENSITY MAP
##Extra: Clip the raster and clip the vector as well. (only plot the intersection of the two?) 
##Extra: Make the lines white (hint: it is now a polygon!)
##Extra: Add the top 3 (or 5) points and label them.
## Read SF for a file of boroughs
boroughs <- st_read("borough.shp")
Reading layer `borough' from data source 
  `D:\GaTech\Academic\Fall 2021\Urban Analytics\Assignment__Citibikes_export\ForClass\borough.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 5 features and 4 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
Geodetic CRS:  WGS84(DD)
boroughs_prj <- st_transform(boroughs, crs = 2263)

# Density map (This method doesn't allow us to see the background map)
df.size.proj <- st_transform(morningStationsGeo, crs = 2263)
p.sp <- as(df.size.proj, "Spatial")
p.ppp <- as(p.sp, "ppp")
kernelDensityMorning <- (density(p.ppp, sigma = 1000))

##Plot the two together
plot(kernelDensityMorning)
plot(boroughs_prj$geometry, add=TRUE)


# Look at the map 
tmap_mode('view')
tmap mode set to interactive viewing
tm_shape(DifferenceGeo %>% 
           filter(!is.na(Difference)) %>% 
           mutate(trips = as.numeric(Difference))) + tm_dots(col = "Difference", style="quantile", id = "name",
           midpoint = NA)
# Top 3 stations for Morning and Evening trips

fulldata <- fulldata[order(fulldata$MorningTrips,decreasing = TRUE),]
morningstationstop3 <- fulldata[1:3,]
View(morningstationstop3)


fulldata <- fulldata[order(fulldata$EveningTrips,decreasing = TRUE),]
eveningstationstop3 <- fulldata[1:3,]
View(eveningstationstop3)
---
title: "R Notebook"
output: html_notebook
---

install.packages(c(tidyverse, sf, tmap, spatstat, maptools, ggplot2, sqldf)) install.packages("spatstat.geom", dependencies = TRUE) install.packages("leaflet.providers") install.packages("markdown", dependencies = TRUE) install.packages("polyclip", dependencies = TRUE) install.packages("ggmap", dependencies = TRUE)

```{r echo=TRUE}
######## PART 1 SET UP ######### 

library(tidyverse) 
library(sf)
library(tmap)
library(spatstat)
library(maptools)
library(sqldf)
library(ggplot2)
library(spatstat.geom)
library(leaflet.providers)
library(markdown)
library(polyclip)
library(ggmap)

# Environment (change this path to your folder where you stored the CITI Bike data)
##setwd("C:\\Users\\candris3\\Dropbox\\Dropbox\\UrbanAnalytics_class\\code")
setwd("D:/GaTech/Academic/Fall 2021/Urban Analytics/Assignment__Citibikes_export/ForClass")
## We downloaded the data from here: https://s3.amazonaws.com/tripdata/index.html
## Some code taken from: http://lab.rady.ucsd.edu/sawtooth/business_analytics_in_r/maps.html
## Thanks to Bon Woo Koo for his help.

# Read data
rdf <- read.csv("201306-citibike-tripdata.csv")
#rdf <- sample_n(rdf1,(33719*2))
#write.csv(rdf, "citibike_smallsample.csv")
View(rdf) #to view data table
# Take a look by birth year!
rdf %>% 
  mutate(birth_year = as.numeric('birth.year')) %>% #THIS CHANGES A STRING INTO A NUMBER
  dplyr::select(-c('birth.year'))  %>%    #THIS DELETES THE OLD COLUMN 
  arrange(birth_year) #THIS SORTS ON BIRTH YEAR
```

```{r echo=TRUE}
######## PART 2 EXPLORING THE DATA ######### 

# Split time, birth year and filter by birth year
rdfClean <- rdf %>% 
  mutate(date = as.Date(starttime),
         time = format(as.POSIXct(starttime), format = "%H:%M:%S")) %>% ## SPECIAL FORMATTING TO GET H,M,S.
  mutate(hour = substring(.$time, first = 1, last = 2)) %>% ## TRUNCATES THE STRING
  mutate(gender = as.character(gender)) %>%  ##  CHANGES NUMBERS into STRINGS
  dplyr::select(date, time, hour, everything()) %>%  ## REORDERS THE COLUMNS 
  filter("birth_year" > 1930) ## KEEPS BIRTH YEARS AFTER 1930

View(rdfClean)

# Aggregate by hour
tripsByHour <- rdfClean  %>%
  group_by(`hour`, `gender`) %>%
  summarise(trips=n()) %>%
  mutate(hour = as.numeric(hour))

#other way to do it (may not return hour as int...)
#tripsByHour <- sqldf("select hour, gender, count(hour) as trips from rdfClean group by hour, gender")

# See trips by hour (very basic plot---you can also use another plotting call from ggplot2 etc)
plot(tripsByHour$hour, tripsByHour$trips, xlab = "Hour of the Day", ylab = "Number of Trips")
```

```{r echo=TRUE}
## SAME PLOT
ggplot(tripsByHour, aes(x=hour, y=trips, colour=gender)) + geom_line()

```

```{r echo=TRUE}
######## PART 3 SEGMENTATION AND MAPPING ######### 

# Sample for just the morning 
morning <- rdfClean %>% filter(hour %in% c('07', '08', '09'))

# Find most important stations
morningStations <- morning  %>%
  group_by(`start.station.id`) %>%
  summarise(lat=as.numeric(`start.station.latitude`[1]),
            lon=as.numeric(`start.station.longitude`[1]),
            name=`start.station.name`[1],
            trips=n())

## Take a look
morningStations %>%
  arrange(desc(trips))
```

```{r echo=TRUE}
# Turns it into a gis file
morningStationsGeo <- st_as_sf(morningStations, coords = c("lon", "lat"), dim = "XY", crs = 4326) #st_as_sf is Data to shape file

# Save if you feel like it! # st_write(popularStations, "popularStations.shp", delete_layer = TRUE)

# Look at the map 
tmap_mode('view')
tm_shape(morningStationsGeo %>% 
           filter(!is.na(trips)) %>% 
           mutate(trips = as.numeric(trips))) + tm_dots(col = "trips", style="quantile", id = "name")
```

```{r echo=TRUE}
######## PART 4 KERNEL DENSITY  ######### 

## Read SF for a file of boroughs
boroughs <- st_read("borough.shp")
boroughs_prj <- st_transform(boroughs, crs = 2263)

# Density map (This method doesn't allow us to see the background map)
df.size.proj <- st_transform(morningStationsGeo, crs = 2263)
p.sp <- as(df.size.proj, "Spatial")
p.ppp <- as(p.sp, "ppp")
kernelDensityMorning <- (density(p.ppp, sigma = 1000))

##Plot the two together
plot(kernelDensityMorning)
plot(boroughs_prj$geometry, add=TRUE)

##CHALLENGES FOR YOUR DENSITY MAP
##Extra: Clip the raster and clip the vector as well. (only plot the intersection of the two?) 
##Extra: Make the lines white (hint: it is now a polygon!)
##Extra: Add the top 3 (or 5) points and label them.
```

```{r echo=TRUE}
######## PART 5 GGPLOT EXAMPLE WITH CONTOURS ######### 

# Heatmap in ggplot
library(ggplot2)
library(ggmap)

df.size.degree <- st_transform(morningStationsGeo, crs = 4326) %>% 
  mutate(lat = st_coordinates(.)[,2],
         lon = st_coordinates(.)[,1])

##Get borders
height <- max(st_coordinates(df.size.degree)[,2]) - min(st_coordinates(df.size.degree)[,2])
width <- max(st_coordinates(df.size.degree)[,1]) - min(st_coordinates(df.size.degree)[,1])
  
sac_borders <- c(bottom = min(st_coordinates(df.size.degree)[,2]) - 0.1*height,
                top = max(st_coordinates(df.size.degree)[,2]) + 0.1*height,
                left = min(st_coordinates(df.size.degree)[,1]) - 0.1*width,
                right = max(st_coordinates(df.size.degree)[,1]) + 0.1*width)

map <- get_stamenmap(sac_borders, zoom = 12, maptype = "toner-lite")

ggmap(map) +
  geom_density2d(data = df.size.degree, aes(x = lon, y = lat), size = 0.3) +
  stat_density2d(data = df.size.degree, aes(x = lon, y = lat, fill = ..level.., alpha = ..level..), 
                 geom = "polygon", bins = 10) + 
  scale_fill_gradient(low = "green", high = "red") +
  scale_alpha(range = c(0, 0.4), guide = FALSE) #Plot +
 coord_cartesian(xlim = c(-74.02, -95.7), 
                  ylim = c(29.5, 30.1))
```

```{r echo=TRUE, paged.print=TRUE}
#Q2 Age of Users
UserAge <- rdfClean
View(UserAge)
UserAge$Age <- 2013 - as.numeric(UserAge$birth.year)
mean(as.numeric(UserAge$Age),na.rm=TRUE)


#Q11 Morning and Evening rides
morning_rides <- rdfClean %>% filter(hour %in% c('07', '08', '09'))
morningstations <- morning_rides  %>%
  group_by(`start.station.id`) %>%
  summarise(lat=as.numeric(`start.station.latitude`[1]),
            lon=as.numeric(`start.station.longitude`[1]),
            name=`start.station.name`[1],
            trips=n())
names(morningstations)[names(morningstations) == "trips"] <- "MorningTrips"


evening_rides <- rdfClean %>% filter(hour %in% c('17', '18', '19'))
eveningstations <- evening_rides  %>%
  group_by(`start.station.id`) %>%
  summarise(lat=as.numeric(`start.station.latitude`[1]),
            lon=as.numeric(`start.station.longitude`[1]),
            name=`start.station.name`[1],
            trips=n())
names(eveningstations)[names(eveningstations) == "trips"] <- "EveningTrips"

  
fulldata <- merge(morningstations, eveningstations)
View(fulldata)
fulldata$Difference <- (fulldata$MorningTrips-fulldata$EveningTrips)
View(fulldata)


# Turns it into a gis file
DifferenceGeo <- st_as_sf(fulldata, coords = c("lon", "lat"), dim = "XY", crs = 4326) #st_as_sf is Data to shape file

# Look at the map 
tmap_mode('view')
tm_shape(DifferenceGeo %>% 
           filter(!is.na(Difference)) %>% 
           mutate(trips = as.numeric(Difference))) + tm_dots(col = "Difference", style="quantile", id = "name",
           midpoint = NA)


```

```{r echo=TRUE, paged.print=TRUE}
# Top 3 stations for Morning and Evening trips

fulldata <- fulldata[order(fulldata$MorningTrips,decreasing = TRUE),]
morningstationstop3 <- fulldata[1:3,]
View(morningstationstop3)


fulldata <- fulldata[order(fulldata$EveningTrips,decreasing = TRUE),]
eveningstationstop3 <- fulldata[1:3,]
View(eveningstationstop3)

```
