Install relevant R packages
packages = c('rgdal', 'maptools', 'raster','spatstat', 'tmap','sf','SpatialEpi', 'tidyverse','spData')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
Import MP14_SUBZONE layer and named it as mpsz
mpsz <- readOGR(dsn = "data/geospatial", layer="MP14_SUBZONE_WEB_PL")
## OGR data source with driver: ESRI Shapefile
## Source: "C:\IS415 Geospatial Analytics and Applications\take-home2\Take_home_Exe02\data\geospatial", layer: "MP14_SUBZONE_WEB_PL"
## with 323 features
## It has 15 fields
mpsz
## class : SpatialPolygonsDataFrame
## features : 323
## extent : 2667.538, 56396.44, 15748.72, 50256.33 (xmin, xmax, ymin, ymax)
## crs : +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0
## variables : 15
## names : OBJECTID, SUBZONE_NO, SUBZONE_N, SUBZONE_C, CA_IND, PLN_AREA_N, PLN_AREA_C, REGION_N, REGION_C, INC_CRC, FMEL_UPD_D, X_ADDR, Y_ADDR, SHAPE_Leng, SHAPE_Area
## min values : 1, 1, ADMIRALTY, AMSZ01, N, ANG MO KIO, AM, CENTRAL REGION, CR, 00F5E30B5C9B7AD8, 2014/12/05, 5092.8949, 19579.069, 871.554887798, 39437.9352703
## max values : 323, 17, YUNNAN, YSSZ09, Y, YISHUN, YS, WEST REGION, WR, FFCCF172717C2EAF, 2014/12/05, 50424.7923, 49552.7904, 68083.9364708, 69748298.792
To remove the 1 column and 2 column in the data of SpatialPolygonsDataFrame, this stage is for the the future identifying purpose. Once first two columns has been removed, Subzone name will be in the first column when we display the data.
mpsz <- mpsz[,-(1:2)]
Import CostalOutline layer and named it as sg. Currently the data set is in SpatialPolygonsDataFrame format.
sg <- readOGR(dsn = "data/geospatial", layer="CostalOutline")
## OGR data source with driver: ESRI Shapefile
## Source: "C:\IS415 Geospatial Analytics and Applications\take-home2\Take_home_Exe02\data\geospatial", layer: "CostalOutline"
## with 60 features
## It has 4 fields
Convert the data set into SpatialPolygons format.
sg_sp <- as(sg, "SpatialPolygons")
Import listings data set and named it as airbnb.
airbnb <- read_csv("data/aspatial/listings.csv")
After looking through the details of the data set, the coords of the data set is in longitude and latitude format. One key point to take note is the CRS of the langitude and latitude. It should be in WGS84 (EPSG:4326).
Commonly used by organizations that provide GIS data for the entire globe or many countries. CRS used by Google Earth.
airbnb_sf <- st_as_sf(airbnb, coords = c("longitude","latitude"), crs = 4326)
By using function st_transform, we are able to convert the longitude and latitude to XY coords by defining the EPSG as 3414 (SV21).
airbnb_sf <-st_transform(airbnb_sf, CRS("+init=epsg:3414"))
airbnb_sf
## Simple feature collection with 7713 features and 14 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 7215.566 ymin: 25166.35 xmax: 43401.32 ymax: 48466.72
## epsg (SRID): NA
## proj4string: +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +units=m +no_defs
## # A tibble: 7,713 x 15
## id name host_id host_name neighbourhood_g~ neighbourhood room_type price
## <dbl> <chr> <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 49091 "COZ~ 266763 Francesca North Region Woodlands Private ~ 87
## 2 50646 "Ple~ 227796 Sujatha Central Region Bukit Timah Private ~ 80
## 3 56334 "COZ~ 266763 Francesca North Region Woodlands Private ~ 72
## 4 71609 "Ens~ 367042 Belinda East Region Tampines Private ~ 214
## 5 71896 "B&B~ 367042 Belinda East Region Tampines Private ~ 99
## 6 71903 "Roo~ 367042 Belinda East Region Tampines Private ~ 109
## 7 71907 "3rd~ 367042 Belinda East Region Tampines Private ~ 217
## 8 117957 "Pri~ 448620 Lynnity North-East Regi~ Sengkang Private ~ 67
## 9 241503 "Lon~ 1017645 Bianca East Region Bedok Private ~ 52
## 10 241508 "Lon~ 1017645 Bianca East Region Bedok Private ~ 54
## # ... with 7,703 more rows, and 7 more variables: minimum_nights <dbl>,
## # number_of_reviews <dbl>, last_review <date>, reviews_per_month <dbl>,
## # calculated_host_listings_count <dbl>, availability_365 <dbl>,
## # geometry <POINT [m]>
Converting the airbnb data set in to SpatialPointsDataFrame format.
airbnb_sp <- as(airbnb_sf, "Spatial")
Removing those irrelevant column and only keep the room_type column.
airbnb_sf <- subset(airbnb_sf, select = -c(name, host_id,host_name,neighbourhood_group,price,minimum_nights,number_of_reviews,last_review,reviews_per_month, calculated_host_listings_count,availability_365,id,neighbourhood))
Filter room_type by Private room.
private <- airbnb_sf %>%
filter(room_type == "Private room")
Filter room_type by Hotel room.
Hotel <- airbnb_sf %>%
filter(room_type == "Hotel room")
Filter room_type by Shared room.
Shared <- airbnb_sf %>%
filter(room_type == "Shared room")
Filter room_type by Entire home/apt
Entire <- airbnb_sf %>%
filter(room_type == "Entire home/apt")
Convert private to Spatialpoints format which only left the coords without any other information.
private_sp <- as(private, "Spatial")
private_sp <- as(private_sp, "SpatialPoints")
Convert hotel to Spatialpoints format which only left the coords without any other information.
Hotel_sp <- as(Hotel, "Spatial")
Hotel_sp <- as(Hotel_sp, "SpatialPoints")
Convert share to Spatialpoints format which only left the coords without any other information.
Shared_sp <- as(Shared, "Spatial")
Shared_sp <- as(Shared_sp, "SpatialPoints")
Convert entire to Spatialpoints format which only left the coords without any other information.
Entire_sp <- as(Entire, "Spatial")
Entire_sp <- as(Entire_sp, "SpatialPoints")
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(sg) +
tm_borders(alpha = 0.5) +
tm_shape(airbnb_sp)+
tm_dots(col= 'room_type',alpha=0.8, size=0.02)
## Warning: The shape sg is invalid. See sf::st_is_valid
When mapping all different types of room_type on a single open street map without zooming in, we are only can observe and identify that both “Entire home/apt” and “Private room” have the most number of airbnb listing in Singapore. However, it does not provide any useful information as all the point are overlapping with one another. In order to provide more useful analysis we should separate room_type into individual open street map.
tm_shape(sg) +
tm_borders(alpha = 0.5) +
tm_shape(airbnb_sp) +
tm_bubbles(col = "room_type",
border.col = "black",
border.lwd = 1) +
tm_facets(by= "room_type",
ncol = 2,
nrow = 2,
sync = TRUE)
## Warning: The shape sg is invalid. See sf::st_is_valid
Entire home/apt: Entire home/apt tend to spread across in most of the residence areas, especially in the center of Singapore. However, some of the areas like north, west part of Singapore will have lesser listing compare to town area. By looking at the individual open street map we are see Pirvate room have the second most number of listing in Singapore market.
Hotel: Hotels tend to only spread within the bottom part of the Singapore as known as town area. However, there are few hotels located at NE part of Singapore.
Private: Private tend to spread across entire Singapore area, except those water catchment areas, military training areas and industry areas. By looking at the individual open street map we are see Pirvate room have the most number of listing in Singapore market.
Shared: Shared also tend to spread across in most of the residence areas, however the different is that are only a few listing of share room in each residence areas compare to Entire home/apt.
tmap_mode('plot')
## tmap mode set to plotting
plotting all the Private rooms on a CoastOutline layer
plot(sg, border = "lightgrey")
plot(sg, add = TRUE)
plot(private_sp, add = TRUE)
Converting the data set in to spatstat’s ppp object format.
private_ppp <- as(private_sp,"ppp")
private_ppp
## Planar point pattern: 3206 points
## window: rectangle = [10737.87, 43386.87] x [26334.02, 48466.72] units
Plotting the private in ppp object format
plot(private_ppp)
The summary report of pitvate in ppp object format
summary(private_ppp)
## Planar point pattern: 3206 points
## Average intensity 4.436692e-06 points per square unit
##
## *Pattern contains duplicated points*
##
## Coordinates are given to 3 decimal places
## i.e. rounded to the nearest multiple of 0.001 units
##
## Window: rectangle = [10737.87, 43386.87] x [26334.02, 48466.72] units
## (32650 x 22130 units)
## Window area = 722610000 square units
Checking is there any duplicated ppp object. If it returns true, it means there is duplicated ppp object as knwon as there are 2 ppp object having the same coords.
any(duplicated(private_ppp))
## [1] TRUE
To count the number of coindicence points.
To identify the number of locations have more than one point event.
sum(multiplicity(private_ppp) > 1)
## [1] 120
In order to overcome the the current issue, jittering function will be applied, which will add a small perturbation to the duplicate points so that they do not occupy the exact same space.
private_ppp_jit <- rjitter(private_ppp, retry=TRUE, nsim=1, drop=TRUE)
plot(private_ppp_jit)
When analysing spatial point patterns, it is a good practice to confine the analysis with a geographical area like Singapore boundary. In spatstat, an object called owin is specially designed to represent this polygonal region.
sg_owin <- as(sg_sp, "owin")
plot(sg_owin)
The summary report of sg_owin
To extract private that is within the specifi region to do our analysis later on.
privateSG_ppp = private_ppp_jit[sg_owin]
plot(privateSG_ppp)
The test hypotheses are:
Null hypothesis: The distribution of private rooms are randomly distributed.
Alternative hypothesis: The distribustion of private rooms are not randomly distributed.
Confident Interval: 95%
Chi-squared test of CSR suing quadrat counts
qt <- quadrat.test(privateSG_ppp, nx = 20, ny = 15)
## Warning: Some expected counts are small; chi^2 approximation may be inaccurate
qt
##
## Chi-squared test of CSR using quadrat counts
##
## data: privateSG_ppp
## X2 = 21992, df = 184, p-value < 2.2e-16
## alternative hypothesis: two.sided
##
## Quadrats: 185 tiles (irregular windows)
Conculsion: Since the p-value is less than the alpha, then we reject the null hypothesis, and we say the result is statistically significant. In this case The distribution of private rooms are not randomly distributed.
plot(privateSG_ppp)
plot(qt, add = TRUE, cex =.1)
quadrat.test(privateSG_ppp,
nx = 20, ny = 15,
method="M",
nsim=999)
##
## Conditional Monte Carlo test of CSR using quadrat counts
## Test statistic: Pearson X2 statistic
##
## data: privateSG_ppp
## X2 = 21992, p-value = 0.002
## alternative hypothesis: two.sided
##
## Quadrats: 185 tiles (irregular windows)
Conculsion: Since the p-value is less than the alpha, then we reject the null hypothesis, and we say the result is statistically significant. In this case The distribution of private rooms are not randomly distributed.
However, Quadrat Analysis is not a good method to use as we can see from the private_ppp map, there are too many empty grids.
The test hypotheses are:
Null hypothesis: The distribution of private rooms are randomly distributed.
Alternative hypothesis: The distribustion of private rooms are not randomly distributed.
Confident Interval: 95%
Testing spatial point patterns using Clark and Evan Test
clarkevans.test(privateSG_ppp,
correction="none",
clipregion="sg_owin",
alternative=c("two.sided"),
nsim=999)
##
## Clark-Evans test
## No edge correction
## Monte Carlo test based on 999 simulations of CSR with fixed n
##
## data: privateSG_ppp
## R = 0.37933, p-value = 0.002
## alternative hypothesis: two-sided
Conculsion: Since the p-value is less than the alpha, then we reject the null hypothesis, and we say the result is statistically significant. In this case The distribution of private rooms are not randomly distributed.
Since the Quadrat Analysis is not an appropriate. Therefore, Nearest Neighbour Analysis will be used
Even though in the following steps, we going to use the auto bandwidth selection method, but is good to check the auto selection bandwidth value used in the following steps. At least we can have an ideal about the value that the system choose to use.
diggle_value <- bw.diggle(privateSG_ppp)
diggle_value
## sigma
## 49.73491
Computing kernal density estimation using automatic bandwidth selection method.
kde_privateSG_bw <- density(privateSG_ppp, sigma=bw.diggle, edge=TRUE, kernel="gaussian")
## Warning: point-in-polygon test had difficulty with 215 points (total score not 0
## or 1)
plot(kde_privateSG_bw)
The density values of the output is way too small to comprehend. This is because the default unit of measurement of svy21 is in meter. As a result, the density values computed is in “number of points per square meter”.
In the code chunk below, rescale() is used to covert the unit of measurement from meter to kilometer.
privateSG_ppp.km <- rescale(privateSG_ppp, 1000, "km")
Re-run the density using the resale data set.
kde_privateSG_bw <- density(privateSG_ppp.km, sigma=bw.diggle, edge=TRUE, kernel="gaussian")
## Warning: point-in-polygon test had difficulty with 215 points (total score not 0
## or 1)
plot(kde_privateSG_bw)
kde_privateSG_bw
## real-valued pixel image
## 128 x 128 pixel array (ny, nx)
## enclosing rectangle: [2.6639, 56.048] x [16.358, 50.244] km
Computing kernel density estimation using defining bandwidth manually.
kde_privateSG_60 <- density(privateSG_ppp.km, sigma=0.06, edge=TRUE, kernel="gaussian")
## Warning: point-in-polygon test had difficulty with 215 points (total score not 0
## or 1)
plot(kde_privateSG_60)
Converting KDE output into grid object and at the same time we need to convert back to meter by rescale the value down by 1000.
gridded_kde_privateSG_bw <- as.SpatialGridDataFrame.im(rescale(kde_privateSG_bw, 1/1000,"m"))
spplot(gridded_kde_privateSG_bw)
Converting gridded output into raster
kde_privateSG_bw_raster <- raster(gridded_kde_privateSG_bw)
kde_privateSG_bw_raster
## class : RasterLayer
## dimensions : 128, 128, 16384 (nrow, ncol, ncell)
## resolution : 417.0614, 264.7348 (x, y)
## extent : 2663.926, 56047.79, 16357.98, 50244.03 (xmin, xmax, ymin, ymax)
## crs : NA
## source : memory
## names : v
## values : -1.186521e-13, 833.2521 (min, max)
Assigning project systems
projection(kde_privateSG_bw_raster) <- CRS("+init=EPSG:3414")
kde_privateSG_bw_raster
## class : RasterLayer
## dimensions : 128, 128, 16384 (nrow, ncol, ncell)
## resolution : 417.0614, 264.7348 (x, y)
## extent : 2663.926, 56047.79, 16357.98, 50244.03 (xmin, xmax, ymin, ymax)
## crs : +init=EPSG:3414 +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +units=m +no_defs
## source : memory
## names : v
## values : -1.186521e-13, 833.2521 (min, max)
tmap_mode('view')
## tmap mode set to interactive viewing
map1 <-tm_shape(kde_privateSG_bw_raster) +
tm_raster("v", breaks = c(0,5,10,15,20,25,30,35,40,45,50,100,200,400,600,800,1000), alpha = 0.7) +
tm_layout(legend.position = c("right", "bottom"), frame = FALSE)+
tm_shape(mpsz)+
tm_polygons(alpha = 0)
map2 <- tm_shape(kde_privateSG_bw_raster) +
tm_raster("v",alpha = 0.7) +
tm_layout(legend.position = c("right", "bottom"), frame = FALSE)+
tm_shape(mpsz)+
tm_polygons(alpha = 0)
tmap_arrange(map2,map1, ncol = 2 , sync = TRUE)
Notes: By using tmap, we are able to change the legend range accordingly. In the default setting we are not able to get any insightful analysis as the legend range are pre-set (left). After change the legend range, we are able to draw more insightful information for future analysis. However, the range very wide, therefore, from 0 to 50 are incremental by 5 and from 100 onwards are incremental by 200.
tmap_mode('plot')
## tmap mode set to plotting
Plotting all the Entire rooms on a CoastOutline layer
plot(sg, border = "lightgrey")
plot(sg, add = TRUE)
plot(Entire_sp, add = TRUE)
Converting the data set in to spatstat’s ppp object format.
entire_ppp <- as(Entire_sp,"ppp")
entire_ppp
## Planar point pattern: 3728 points
## window: rectangle = [7215.57, 42932.86] x [25166.35, 48181.41] units
Plotting the entire in ppp object format
plot(entire_ppp)
he summary report of entire in ppp object format
summary(entire_ppp)
## Planar point pattern: 3728 points
## Average intensity 4.535082e-06 points per square unit
##
## *Pattern contains duplicated points*
##
## Coordinates are given to 3 decimal places
## i.e. rounded to the nearest multiple of 0.001 units
##
## Window: rectangle = [7215.57, 42932.86] x [25166.35, 48181.41] units
## (35720 x 23020 units)
## Window area = 822036000 square units
Checking is there any duplicated ppp object. If it returns true, it means there is duplicated ppp object as knwon as there are 2 ppp object having the same coords.
any(duplicated(entire_ppp))
## [1] TRUE
To identify the number of locations have more than one point event.
sum(multiplicity(entire_ppp) > 1)
## [1] 35
In order to overcome the the current issue, jittering function will be applied, which will add a small perturbation to the duplicate points so that they do not occupy the exact same space.
entire_ppp_jit <- rjitter(entire_ppp, retry=TRUE, nsim=1, drop=TRUE)
plot(entire_ppp_jit)
To extract private that is within the specific region to do our analysis later on.
entireSG_ppp = entire_ppp_jit[sg_owin]
plot(entireSG_ppp)
## 2.3.2 Nearest Neighbour analysis for Entire Room
The test hypotheses are:
Null hypothesis: The distribution of entire rooms are randomly distributed.
Alternative hypothesis: The distribustion of entire rooms are not randomly distributed.
Confident Interval: 95%
Testing spatial point patterns using Clark and Evan Test
clarkevans.test(entireSG_ppp,
correction="none",
clipregion="sg_owin",
alternative=c("two.sided"),
nsim=999)
##
## Clark-Evans test
## No edge correction
## Monte Carlo test based on 999 simulations of CSR with fixed n
##
## data: entireSG_ppp
## R = 0.26352, p-value = 0.002
## alternative hypothesis: two-sided
Conculsion: Since the p-value is less than the alpha, then we reject the null hypothesis, and we say the result is statistically significant. In this case The distribution of private rooms are not randomly distributed.
Even though in the following steps, we going to use the auto bandwidth selection method, but is good to check the auto selection bandwidth value used in the following steps. At least we can have an ideal about the value that the system choose to use.
diggle_value <- bw.diggle(entireSG_ppp)
diggle_value
## sigma
## 46.9474
Computing kernal density estimation using automatic bandwidth selection method.
kde_entireSG_bw <- density(entireSG_ppp, sigma=bw.diggle, edge=TRUE, kernel="gaussian")
## Warning: point-in-polygon test had difficulty with 215 points (total score not 0
## or 1)
plot(kde_entireSG_bw)
The density values of the output is way too small to comprehend. This is because the default unit of measurement of svy21 is in meter. As a result, the density values computed is in “number of points per square meter”.
In the code chunk below, rescale() is used to covert the unit of measurement from meter to kilometer.
entireSG_ppp.km <- rescale(entireSG_ppp, 1000, "km")
Re-run the density using the resale data set.
kde_entireSG_bw <- density(entireSG_ppp.km, sigma=bw.diggle, edge=TRUE, kernel="gaussian")
## Warning: point-in-polygon test had difficulty with 215 points (total score not 0
## or 1)
plot(kde_entireSG_bw)
kde_entireSG_bw
## real-valued pixel image
## 128 x 128 pixel array (ny, nx)
## enclosing rectangle: [2.6639, 56.048] x [16.358, 50.244] km
Computing kernel density estimation using defining bandwidth manually.
kde_entireSG_60 <- density(entireSG_ppp.km, sigma=0.06, edge=TRUE, kernel="gaussian")
## Warning: point-in-polygon test had difficulty with 215 points (total score not 0
## or 1)
plot(kde_entireSG_60)
Converting KDE output into grid object and at the same time we need to convert back to meter by rescale the value down by 1000.
gridded_kde_entireSG_bw <- as.SpatialGridDataFrame.im(rescale(kde_entireSG_bw, 1/1000,"m"))
spplot(gridded_kde_entireSG_bw)
Converting gridded output into raster
kde_entireSG_bw_raster <- raster(gridded_kde_entireSG_bw)
kde_entireSG_bw_raster
## class : RasterLayer
## dimensions : 128, 128, 16384 (nrow, ncol, ncell)
## resolution : 417.0614, 264.7348 (x, y)
## extent : 2663.926, 56047.79, 16357.98, 50244.03 (xmin, xmax, ymin, ymax)
## crs : NA
## source : memory
## names : v
## values : -2.06931e-13, 1186.479 (min, max)
Assigning project systems
projection(kde_entireSG_bw_raster) <- CRS("+init=EPSG:3414")
kde_entireSG_bw_raster
## class : RasterLayer
## dimensions : 128, 128, 16384 (nrow, ncol, ncell)
## resolution : 417.0614, 264.7348 (x, y)
## extent : 2663.926, 56047.79, 16357.98, 50244.03 (xmin, xmax, ymin, ymax)
## crs : +init=EPSG:3414 +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +units=m +no_defs
## source : memory
## names : v
## values : -2.06931e-13, 1186.479 (min, max)
tmap_mode('view')
## tmap mode set to interactive viewing
map3 <-tm_shape(kde_entireSG_bw_raster) +
tm_raster("v", breaks = c(0,5,10,15,20,25,30,35,40,45,50,100,200,400,600,800,1000,1200,1400),alpha = 0.7) +
tm_layout(legend.position = c("right", "bottom"), frame = FALSE)+
tm_shape(mpsz)+
tm_polygons(alpha = 0)
map4 <- tm_shape(kde_entireSG_bw_raster) +
tm_raster("v",alpha = 0.7) +
tm_layout(legend.position = c("right", "bottom"), frame = FALSE)+
tm_shape(mpsz)+
tm_polygons(alpha = 0)
tmap_arrange(map4,map3, ncol = 2 , sync = TRUE)
After plotting the raster and mpsz layers on top of the open street map, we are able to idenity that kernel density of the entire distribution by subzone area. As we can see those with higher kernel density value are also mainly in the town areas and some of the subzone areas’s kernel density may go as high as 1400 private rooms within 1km square. Outside of the town areas, there also have entire room in the listing, but the kernel density is about 25 to 35 private rooms within 1km square.
Notes: By using tmap, we are able to change the legend range accordingly. In the default setting we are not able to get any insightful analysis as the legend range are pre-set (left). After change the legend range, we are able to draw more insightful information for future analysis. However, the range very wide, therefore, from 0 to 50 are incremental by 5 and from 100 onwards are incremental by 200.
tmap_mode('plot')
## tmap mode set to plotting
plotting all the hotel rooms on a CoastOutline layer
plot(sg, border = "lightgrey")
plot(sg, add = TRUE)
plot(Hotel_sp, add = TRUE)
Converting the data set in to spatstat’s ppp object format.
hotel_ppp <- as(Hotel_sp,"ppp")
hotel_ppp
## Planar point pattern: 507 points
## window: rectangle = [20126.35, 36915.62] x [28442.68, 39865.12] units
Plotting the hotel in ppp object format
plot(hotel_ppp)
The summary report of hotel in ppp object format
summary(hotel_ppp)
## Planar point pattern: 507 points
## Average intensity 2.643733e-06 points per square unit
##
## *Pattern contains duplicated points*
##
## Coordinates are given to 3 decimal places
## i.e. rounded to the nearest multiple of 0.001 units
##
## Window: rectangle = [20126.35, 36915.62] x [28442.68, 39865.12] units
## (16790 x 11420 units)
## Window area = 191774000 square units
Checking is there any duplicated ppp object. If it returns true, it means there is duplicated ppp object as knwon as there are 2 ppp object having the same coords.
any(duplicated(hotel_ppp))
## [1] TRUE
To count the number of coindicence points.
To identify the number of locations have more than one point event.
sum(multiplicity(hotel_ppp) > 1)
## [1] 59
In order to overcome the the current issue, jittering function will be applied, which will add a small perturbation to the duplicate points so that they do not occupy the exact same space.
hotel_ppp_jit <- rjitter(hotel_ppp, retry=TRUE, nsim=1, drop=TRUE)
plot(hotel_ppp_jit)
To extract private that is within the specific region to do our analysis later on.
hotelSG_ppp = hotel_ppp_jit[sg_owin]
plot(hotelSG_ppp)
The test hypotheses are:
Null hypothesis: The distribution of hotel rooms are randomly distributed.
Alternative hypothesis: The distribustion of hotel rooms are not randomly distributed.
Confident Interval: 95%
Testing spatial point patterns using Clark and Evan Test
clarkevans.test(hotelSG_ppp,
correction="none",
clipregion="sg_owin",
alternative=c("two.sided"),
nsim=999)
##
## Clark-Evans test
## No edge correction
## Monte Carlo test based on 999 simulations of CSR with fixed n
##
## data: hotelSG_ppp
## R = 0.10311, p-value = 0.002
## alternative hypothesis: two-sided
Conculsion: Since the p-value is less than the alpha, then we reject the null hypothesis, and we say the result is statistically significant. In this case The distribution of hotel rooms are not randomly distributed.
Even though in the following steps, we going to use the auto bandwidth selection method, but is good to check the auto selection bandwidth value used in the following steps. At least we can have an ideal about the value that the system choose to use.
diggle_value <- bw.diggle(hotelSG_ppp)
## Warning: point-in-polygon test had difficulty with 215 points (total score not 0
## or 1)
diggle_value
## sigma
## 66.31322
Computing kernal density estimation using automatic bandwidth selection method.
kde_hotelSG_bw <- density(hotelSG_ppp, sigma=bw.diggle, edge=TRUE, kernel="gaussian")
## Warning: point-in-polygon test had difficulty with 215 points (total score not 0
## or 1)
## Warning: point-in-polygon test had difficulty with 215 points (total score not 0
## or 1)
plot(kde_hotelSG_bw)
The density values of the output is way too small to comprehend. This is because the default unit of measurement of svy21 is in meter. As a result, the density values computed is in “number of points per square meter”.
In the code chunk below, rescale() is used to covert the unit of measurement from meter to kilometer.
hotelSG_ppp.km <- rescale(hotelSG_ppp, 1000, "km")
Re-run the density using the resale data set.
kde_hotelSG_bw <- density(hotelSG_ppp.km, sigma=bw.diggle, edge=TRUE, kernel="gaussian")
## Warning: point-in-polygon test had difficulty with 215 points (total score not 0
## or 1)
## Warning: point-in-polygon test had difficulty with 215 points (total score not 0
## or 1)
plot(kde_hotelSG_bw)
kde_privateSG_bw
## real-valued pixel image
## 128 x 128 pixel array (ny, nx)
## enclosing rectangle: [2.6639, 56.048] x [16.358, 50.244] km
Computing kernel density estimation using defining bandwidth manually.
kde_hotelSG_90 <- density(hotelSG_ppp.km, sigma=0.09, edge=TRUE, kernel="gaussian")
## Warning: point-in-polygon test had difficulty with 215 points (total score not 0
## or 1)
plot(kde_hotelSG_90)
Converting KDE output into grid object and at the same time we need to convert back to meter by rescale the value down by 1000.
gridded_kde_hotelSG_bw <- as.SpatialGridDataFrame.im(rescale(kde_hotelSG_bw, 1/1000,"m"))
spplot(gridded_kde_hotelSG_bw)
Converting gridded output into raster
kde_hotelSG_bw_raster <- raster(gridded_kde_hotelSG_bw)
kde_hotelSG_bw_raster
## class : RasterLayer
## dimensions : 128, 128, 16384 (nrow, ncol, ncell)
## resolution : 417.0614, 264.7348 (x, y)
## extent : 2663.926, 56047.79, 16357.98, 50244.03 (xmin, xmax, ymin, ymax)
## crs : NA
## source : memory
## names : v
## values : -8.918128e-14, 570.2712 (min, max)
Assigning project systems
projection(kde_hotelSG_bw_raster) <- CRS("+init=EPSG:3414")
kde_hotelSG_bw_raster
## class : RasterLayer
## dimensions : 128, 128, 16384 (nrow, ncol, ncell)
## resolution : 417.0614, 264.7348 (x, y)
## extent : 2663.926, 56047.79, 16357.98, 50244.03 (xmin, xmax, ymin, ymax)
## crs : +init=EPSG:3414 +proj=tmerc +lat_0=1.366666666666667 +lon_0=103.8333333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +units=m +no_defs
## source : memory
## names : v
## values : -8.918128e-14, 570.2712 (min, max)
tmap_mode('view')
## tmap mode set to interactive viewing
tm_shape(kde_hotelSG_bw_raster) +
tm_raster("v", breaks = c(0,50,100,150,200,250,300,350,400,450,500,550,600),alpha = 0.7) +
tm_layout(legend.position = c("right", "bottom"), frame = FALSE)+
tm_shape(mpsz)+
tm_polygons(alpha = 0)
As we mentioned, for the hotels are tend to located in the town areas and do not have many hotels in the listing, therefore, with the highest kernel density area is in Chinatown area followed by Boat Quay Area on an average of 500 to 600 hotels within 1km square.
tmap_mode('plot')
## tmap mode set to plotting
In kernel density map it take the distance into consideration, it provide a more accurate, insightful infromation and unbiased than point map, as point map only showing the location of the object without any other useful information for us to use.