Name: Stanley Nguyen Student ID: 1001692

library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1.9000     ✔ purrr   0.2.4     
## ✔ tibble  1.4.2          ✔ dplyr   0.7.4     
## ✔ tidyr   0.8.0          ✔ stringr 1.3.0     
## ✔ readr   1.1.1          ✔ forcats 0.3.0
## ── Conflicts ──────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::vars()   masks ggplot2::vars()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(sf)
## Warning: package 'sf' was built under R version 3.4.4
## Linking to GEOS 3.6.1, GDAL 2.1.3, proj.4 4.9.3
library(ggmap)
## Google Maps API Terms of Service: http://developers.google.com/maps/terms.
## Please cite ggmap if you use it: see citation("ggmap") for details.
library(spatstat)
## Loading required package: spatstat.data
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## Loading required package: rpart
## 
## spatstat 1.55-0       (nickname: 'Stunned Mullet') 
## For an introduction to spatstat, type 'beginner'
library(sp)
library(rgdal)
## Warning: package 'rgdal' was built under R version 3.4.4
## rgdal: version: 1.2-18, (SVN revision 718)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.1.3, released 2017/20/01
##  Path to GDAL shared files: /Users/stanleynguyen/Library/R/3.4/library/rgdal/gdal
##  GDAL binary built with GEOS: FALSE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: /Users/stanleynguyen/Library/R/3.4/library/rgdal/proj
##  Linking to sp version: 1.2-7
library(maptools)
## Checking rgeos availability: FALSE
##      Note: when rgeos is not available, polygon geometry     computations in maptools depend on gpclib,
##      which has a restricted licence. It is disabled by default;
##      to enable gpclib, type gpclibPermit()

Data Manipulation

tenders <- read_csv("tabula-tender-bids-from-mar-2012-to-jan-2017.csv", col_names = c("centre", "stall", "area", "trade", "bid", "month"))
## Parsed with column specification:
## cols(
##   centre = col_character(),
##   stall = col_character(),
##   area = col_double(),
##   trade = col_character(),
##   bid = col_character(),
##   month = col_character()
## )
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 121 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected  actual            file                             expected   <int> <chr> <chr>     <chr>             <chr>                            actual 1  1175 area  a double  HALAL COOKED FOOD 'tabula-tender-bids-from-mar-20… file 2  1175 <NA>  6 columns 5 columns         'tabula-tender-bids-from-mar-20… row 3  1177 area  a double  INDIAN CUISINE    'tabula-tender-bids-from-mar-20… col 4  1177 <NA>  6 columns 5 columns         'tabula-tender-bids-from-mar-20… expected 5  1179 area  a double  COOKED FOOD       'tabula-tender-bids-from-mar-20…
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
tenders <- tenders %>%
  drop_na(centre) %>%
  filter(row_number() != 1135) %>%
  mutate(type = if_else(row_number() < 1135, "cooked", "lockup")) %>%
  mutate(bidNum = as.numeric(str_replace_all(bid, pattern="\\$|,", replacement = ""))) %>%
  mutate(date = dmy(paste0("01-", month))) %>%
  mutate(priceM2 = bidNum / area)
## Warning in evalq(as.numeric(str_replace_all(bid, pattern = "\\$|,",
## replacement = "")), : NAs introduced by coercion
## Warning: 50 failed to parse.
tenders %>% # avg price per center
  group_by(centre) %>%
  summarise(price = mean(priceM2))

tenders %>% # avg price center for cooked food 
  filter(type == 'cooked') %>%
  group_by(centre) %>%
  summarise(price = mean(priceM2))

tenders %>% # number of bids per center
  group_by(centre) %>%
  summarise(bids = n())

Is there a relation between area and price?

tenders %>%
  drop_na(priceM2, area) %>%
  ggplot(aes(area, priceM2)) + geom_line()

No

What is the average price per m2 per hawker center?

tenders %>%
  drop_na(priceM2, centre) %>%
  group_by(centre) %>%
  summarise(avg_price = mean(priceM2)) %>%
  ggplot(aes(centre, avg_price)) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle = 90, size = 3))

How many bids are there for each hawker center?

tenders %>%
  drop_na(centre) %>%
  group_by(centre) %>%
  summarise(num_bid = n()) %>%
  ggplot(aes(centre, num_bid)) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle = 90, size = 3))

Spatial Data

centres <- st_read("hawker-centres-kml.kml")
plot(centres)

centres %>%
  mutate(centreName = str_extract(Description, "(?<=<th>NAME<\\/th> <td>).*?(?=<\\/td>)"))
centres <- centres %>%
  mutate(centreName = str_extract(Description, "(?<=<th>NAME<\\/th> <td>).*?(?=<\\/td>)")) %>%
  mutate(centreUpper = toupper(centreName)) %>%
  mutate(lon = st_coordinates(centres)[,1]) %>%
  mutate(lat = st_coordinates(centres)[,2])

tenders.sp <- left_join(tenders, centres, by = c("centre" = "centreUpper"))
centres.unique <- tenders %>%
  group_by(centre) %>%
  summarise(count = n()) %>%
  mutate(location = paste0(centre, ", Singapore"))
# register_google(key = "AIzaSyCrAQ2uOsqVeuHjQTtitNtfNvo5L2KFuWA")
# g <- geocode(centres.unique$location, output = "latlon", source = "google", sensor = F)
centres.unique <- centres.unique[!centres.unique$centre == 'BLK 226D ANG MO KIO AVENUE1',]
g <- read_csv("centres-geocoded.csv")
## Parsed with column specification:
## cols(
##   centre = col_character(),
##   count = col_integer(),
##   location = col_character(),
##   lon = col_double(),
##   lat = col_double()
## )
centres <- bind_cols(centres.unique, g)
tenders.sp <- left_join(tenders, centres, by = c("centre" = "centre"))
tenders.sp %>% drop_na(lon, lat, priceM2, type) %>% ggplot(aes(x=lon, y=lat, size=priceM2, color=type)) + geom_point(alpha=0.3) + coord_fixed()

tenders.sp %>% drop_na(lon, lat) %>% ggplot( aes(x=lon, y=lat)) + geom_point() + geom_density2d() + coord_fixed()

tenders.sp %>% drop_na(lon, lat) %>% ggplot(aes(x=lon, y=lat)) + geom_point() + geom_hex() + coord_fixed()

Do you think there is a specific spatial distribution with regard to the number of bids per ‘trade’ or ‘type’?

tenders.sp %>% drop_na(lon, lat, trade, area) %>% ggplot(aes(x=lon, y=lat)) + geom_point() + geom_density2d() + coord_fixed() + facet_wrap(~trade, ncol = 5) + theme(axis.text = element_blank(), axis.ticks = element_blank()) + xlab("") + ylab("") + ggtitle("Density of bids based on types of Hawker Centres 2012-2017")

For certain types of food (i.e. Mutton), the stores are closely packed but there is no trends in term of location based on types of food.

Or different patterns based on the year?

tenders.sp %>% drop_na(lon, lat, date) %>% mutate(date = substr(date, 0, 4)) %>% ggplot(aes(x=lon, y=lat)) + geom_point() + geom_density2d() + coord_fixed() + facet_wrap(~date, ncol = 3) + theme(axis.text = element_blank(), axis.ticks = element_blank()) + xlab("") + ylab("") + ggtitle("Density of bids for Hawker Centres year on year 2012-2017")

The distribution seems to have an alternate parttern between “packed” and “spread-out” (2012-2013 vs 2014-2016 vs 2017). A wild guess would be that store owners who failed to bid for good spots from the previous years have to go to a less ideal location subsequent years.

Spatial Point Patterns

centres.sp <- tenders.sp %>%
  filter(lat > 0) %>%
  group_by(centre, lon, lat) %>%
  summarise(price = mean(priceM2, na.rm = T)) %>%
  replace_na(list(price = 0))
coordinates(centres.sp) <- c('lon', 'lat')
centres.ppp <- unmark(as.ppp(centres.sp))
sg <- readOGR(".", "sg-all")
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/stanleynguyen/Documents/Projects/school/maps/02221-Lab9", layer: "sg-all"
## with 1 features
## It has 13 fields
sg.window <- as.owin(sg)
centres.ppp <- centres.ppp[sg.window]
plot(Kest(centres.ppp), main = "Spatial Distribution of Hawkers 2012-2017")

plot(density(centres.ppp, 0.02), main = "Spatial Density of Hawkers 2012-2017")

contour(density(centres.ppp, 0.02), main = "Spatial Contour of Hawkers 2012-2017")

pop <- as.im(readGDAL("sg-pop.tif"))
## sg-pop.tif has GDAL driver GTiff 
## and has 37 rows and 58 columns
plot(rhohat(centres.ppp, pop), main = "Hawkers Intensity vs Population", xlab="population", ylab="intensity")

plot(rhohat(centres.ppp, pop, weights = centres.sp$price), main="Weighted Hawkers Price Intensity vs Population", xlab="population", ylab="intensity")

plot(pop, main="Hawkers on Populations Density Map")
plot(centres.ppp, col="white", add=T)

We can observe that there are estates with high population density but not a lot of hawkers like Choa Chu Kang, Woodlands, and Hougang. The reason behind these outliers might be that these estates are relatively new so there are still development and future plan for public spaces that haven’t kicked in of which hawker centers are among. We can back this up by looking at the past data of how long it took for these development to be in place for older estates.