Intro
The first thing we need to do to use GSV images for urban analytics is to sample points, prepare location data (i.e., coordinates and headings) and download the images. The literature uses various methods to sample points to download GSV images from. For example, some studies sampled four images per street segments while some others downloaded four images (i.e., panorama) every 20 meters. We will be downloading four images per street segment, but you will also learn how to do it in a different way at the end.
NOTE: The method for sampling GSV images in this document is a simplified version.
#install.packages("osmdata")
#install.packages("sfnetworks")
#install.packages("nominatimlite")
library(tidyverse)
library(osmdata)
library(sfnetworks)
library(units)
library(sf)
library(tidygraph)
library(tmap)
library(here)
library(progress)
library(nominatimlite)
library(magrittr)
ttm()
Step 2. Get OSM data and clean it.
2-1. Download OSM.
Download OSM data, convert it to an sfnetwork object, and clean it.
# Bounding Box for the city you chose.
my_bb <- nominatimlite::geo_lite_sf("Toyosu, Japan", points_only = F) %>%
st_bbox()
# Get OSM data.
osm_road <- opq(bbox = my_bb) %>%
add_osm_feature(key = "highway",
value = c("motorway", "trunk", "primary",
"secondary", "tertiary", "unclassified",
"residential")) %>%
osmdata_sf() %>%
osm_poly2line()
# Convert the OSM line to sfnetwork and clean it.
net <- osm_road$osm_lines %>%
select(osm_id, highway) %>%
sfnetworks::as_sfnetwork(directed = FALSE) %>%
activate("edges") %>%
filter(!edge_is_multiple()) %>% # remove duplicated edges
filter(!edge_is_loop()) %>% # remove loops
convert(.,sfnetworks::to_spatial_subdivision) %>% # subdivide edges
convert(., sfnetworks::to_spatial_smooth) # delete pseudo nodes
## Warning: to_spatial_subdivision assumes attributes are constant over geometries
2-2. Extract edges.
Extract “edges” from the cleaned network, add length column. Then delete segments that are too short (< 100m). Finally, add a unique ID for each edge.
edges <- net %>%
# Extract 'edges'
st_as_sf("edges") %>%
# Drop redundant columns
select(osm_id, highway) %>%
# Add length column
mutate(length = st_length(.) %>% unclass()) %>%
# Drop segments that are too short (100m)
filter(length > 100) %>%
# Add a unique ID for each edge
mutate(edge_id = seq(1,nrow(.)))
Step 3. Get coordinates and headings.
We will download four images for street segment, one at the start of
a segment, two in the middle of a segment (each looking opposite to the
other), and one at the end of a segment. The code below, we will extract
a street segment (i.e., e below) and demonstrate how to
calculate azimuth for the start point, end point, and mid point.
# Select an edge for illustration
test_edge_n <- 12
test_edge <- edges %>% filter(edge_id == test_edge_n) %>%
st_geometry()
# View it
test_edge %>% st_coordinates() %>%
as.data.frame() %>%
st_as_sf(coords = c("X", "Y"), crs = 4326) %>%
with(
tm_basemap("OpenStreetMap") +
tm_shape(.) + tm_dots() +
tm_shape(.[1,]) + tm_dots(col = "red") + # Start = red
tm_shape(.[nrow(.),]) + tm_dots(col = "yellow") # End = yellow
)
3-1. Sample points and get azimuth.
Extract start, mid, and end points of each segment. For those points, calculate the azimuth.
# -----------------------------------------------------------
# First intersection
# First two points from a line
start_p <- test_edge %>%
st_coordinates() %>%
.[1:2,1:2]
# Calculate the azimuth of the line connecting the two points
start_azi <- atan2(start_p[2,"X"] - start_p[1, "X"],
start_p[2,"Y"] - start_p[1, "Y"])*180/pi # 180/pi because trigonometry in R takes radians
# -----------------------------------------------------------
# The other intersection
# Last two points from a line
end_p <- test_edge %>%
st_coordinates() %>%
.[(nrow(.)-1):nrow(.),1:2]
# Calculate the azimuth of the line connecting the two points
end_azi <- atan2(end_p[2,"X"] - end_p[1, "X"],
end_p[2,"Y"] - end_p[1, "Y"])*180/pi
# Flip the azimuth so that the camera would be looking back
end_azi <- if (end_azi < 180) {end_azi + 180} else {end_azi - 180}
# ----------------------------------------------------------
# mid point
mid_p3 <- test_edge %>%
.[[1]] %>%
st_line_sample(sample = c(0.45, 0.5, 0.55)) %>% # 0.45 and 0.55 are for azimuth calculation
st_cast("POINT") %>%
st_coordinates()
mid_p <- mid_p3[2,]
mid_azi <- atan2(mid_p3[2,"X"] - mid_p3[1, "X"],
mid_p3[2,"Y"] - mid_p3[1, "Y"])*180/pi
mid_azi2 <- ifelse(mid_azi < 180, mid_azi + 180, mid_azi - 180)
3-2. Define a function.
Define a function that performs Step 3-1.
getAzimuth <- function(line){
# end point 1 ----------------------------------------------
start_p <- line %>%
st_coordinates() %>%
.[1:2,1:2]
start_azi <- atan2(start_p[2,"X"] - start_p[1, "X"],
start_p[2,"Y"] - start_p[1, "Y"])*180/pi
# end point 2 ----------------------------------------------
end_p <- line %>%
st_coordinates() %>%
.[(nrow(.)-1):nrow(.),1:2]
end_azi <- atan2(end_p[2,"X"] - end_p[1, "X"],
end_p[2,"Y"] - end_p[1, "Y"])*180/pi
end_azi <- if (end_azi < 180) {end_azi + 180} else {end_azi - 180}
# mid point 1 ---------------------------------------------
mid_p3 <- line %>%
st_line_sample(sample = c(0.45, 0.5, 0.55)) %>%
st_cast("POINT") %>%
st_coordinates()
mid_p <- mid_p3[2,]
mid_azi <- atan2(mid_p3[2,"X"] - mid_p3[1, "X"],
mid_p3[2,"Y"] - mid_p3[1, "Y"])*180/pi
mid_azi2 <- ifelse(mid_azi < 180, mid_azi + 180, mid_azi - 180)
# return in data frame ------------------------------------
return(tribble(
~type, ~X, ~Y, ~azi,
"start", start_p[1,"X"], start_p[1,"Y"], start_azi,
"mid1", mid_p["X"], mid_p["Y"], mid_azi,
"mid2", mid_p["X"], mid_p["Y"], mid_azi2,
"end", end_p[2,"X"], end_p[2,"Y"], end_azi))
}
3-3. Apply the function to edges.
Apply the function to all street segments.
edges_azi <- edges %>%
st_geometry() %>%
map_df(getAzimuth, .progress = T)
edges_azi <- edges_azi %>%
bind_cols(edges %>%
st_drop_geometry() %>% # same as `st_set_geometry(NULL)`
slice(rep(1:nrow(edges),each=4))) %>%
st_as_sf(coords = c("X", "Y"), crs = 4326, remove=FALSE) %>%
mutate(node_id = seq(1, nrow(.)))
#st_write(edges_azi, "edges_azi_toyosu.geojson")
edges_azi
## Simple feature collection with 1672 features and 9 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 139.7665 ymin: 35.63225 xmax: 139.8216 ymax: 35.67058
## Geodetic CRS: WGS 84
## # A tibble: 1,672 × 10
## type X Y azi osm_id highway length edge_id
## * <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <int>
## 1 start 140. 35.6 -112. 4853035 trunk 1312. 1
## 2 mid1 140. 35.6 -118. 4853035 trunk 1312. 1
## 3 mid2 140. 35.6 61.8 4853035 trunk 1312. 1
## 4 end 140. 35.6 56.5 4853035 trunk 1312. 1
## 5 start 140. 35.6 -118. 4853039 tertiary 212. 2
## 6 mid1 140. 35.6 -119. 4853039 tertiary 212. 2
## 7 mid2 140. 35.6 61.4 4853039 tertiary 212. 2
## 8 end 140. 35.6 61.2 4853039 tertiary 212. 2
## 9 start 140. 35.7 -50.8 22823716 primary 120. 3
## 10 mid1 140. 35.7 -46.1 22823716 primary 120. 3
## # ℹ 1,662 more rows
## # ℹ 2 more variables: geometry <POINT [°]>, node_id <int>
Export the result
edges_azito a geojson file (e.g., edges_azi_{city_name}.geojson) usingst_write(). We will need this data later.
You may as well use the Chamblee data I created.
# Download images
edges_azi <- st_read("edges_azi_toyosu.geojson")
Step 4. Get GSV images.
4-1. Define a function.
Define a function that formats request URL and download images.
getImage <- function(iterrow){
type = iterrow$type
location <- paste0(iterrow$Y %>% round(5), ",", iterrow$X %>% round(5))
heading <- iterrow$azi %>% round(1)
edge_id <- iterrow$edge_id
node_id <- iterrow$node_id
highway <- iterrow$highway
key <- Sys.getenv("google_api") # your Google API key
endpoint <- "https://maps.googleapis.com/maps/api/streetview"
request <- glue::glue("{endpoint}?size=640x640&location={location}&heading={heading}&fov=90&pitch=0&key={key}")
fname <- glue::glue("GSV-nid_{node_id}-eid_{edge_id}-type_{type}-Location_{location}-heading_{heading}-highway_{highway}.jpg")
fpath <- here("toyosu_images", fname) # you may need to change this
download.file(request, fpath, mode = 'wb')
}
4-2. Download the images.
Download the images using the custom function
getImage(). KEEP IN MIND THAT THIS WILL TAKE
A SIGNIFICANT AMOUNT OF TIME TO COMPLETE. FOR CHAMBLEE, IT TOOK ALMOST
30 MINUTES.ADDITIONALLY, WHEN RUNNING
THE FOLLOWING CODE CHUNK, MONITOR YOUR API USAGE IN THE GOOGLE DEVELOPER
CONSOLE TO AVOID UNEXPECTED CHARGES
# Create a progress bar to monitor the progress
#pb <- txtProgressBar(min = 0, max = nrow(edges_azi), style = 3)
# Loop!
#for (i in seq(1,nrow(edges_azi))){
#getImage(edges_azi[i,])
#setTxtProgressBar(pb, i)
#}
ZIP THE DOWNLOADED IMAGES AND NAME IT ‘gsv_images.zip’ FOR THE SUBSEQUENT ANALYSIS.
(Optional) Another way to sample points.
Previously, we sampled four points per street segment. However, other studies have used different methods of sampling GSV images. One of the most widely used sampling method is to download one panoramic image at some fixed distance interval, such as 20 meters. This can be easily achieved by a small change to the code shown above.
Here, two sets of points are generated, (1) for actual location of points and (2) for the azimuth calculation. Notice that the points for azimuth calculation are twice as many as the points for actual location.
# Extract a curvy line
z <- edges %>% filter(edge_id == test_edge_n)
tm_basemap("OpenStreetMap") + tm_shape(z) + tm_lines()
# calculate what proportion equals 40 in the given segment
prop <- 40/z$length
# Vector for actual points
sample_actual <- c(seq(0, 1, by=prop),1)
# Vector for two points before/after the actual points for azimuth calculation
sample_azi <- c(sample_actual-0.02, sample_actual+0.02) %>% sort()
# Sample points
sampled_actual <- z %>% st_transform(32616) %>% st_line_sample(sample = sample_actual) %>% st_cast("POINT")
sampled_azi <- z %>% st_transform(32616) %>% st_line_sample(sample = sample_azi) %>% st_cast("POINT")
# View them
tm_basemap("OpenStreetMap") +
tm_shape(sampled_actual) + tm_dots(col = "red", size=0.07) +
tm_shape(sampled_azi) + tm_dots(col = "blue", alpha = 0.5)
# Calculate azimuth
point_azi <- sampled_actual %>%
st_sf() %>%
mutate(azi = NA)
for (i in 1:nrow(point_azi)){
j <- i*2 - 1
point_azi[i, "azi"] <- sampled_azi %>%
st_cast("POINT") %>%
.[j:(j+1)] %>%
st_coordinates() %>%
as.data.frame() %>%
with(
.[2,] - .[1,]
) %>%
with(
atan2(.[["X"]], .[["Y"]])*180/pi
)
}
segmentation results
We are going to do quick analyses using the semantic segmentation model results.
Merge the data processed from Colab back to R.
If you are having trouble running the Colab script, you may use these files: geojson and csv.
# Load the sampled points.
edges_azi <- st_read("edges_azi_toyosu.geojson") # path to your "edges_azi_{city_name}.geojson" file
## Reading layer `edges_azi_toyosu' from data source
## `C:\Users\kz\Desktop\Study\UA_Class\Spring_2025\Tokyo Studio\Data\2025 Data\GSV\Toyosu_GSV_Analysis\edges_azi_toyosu.geojson'
## using driver `GeoJSON'
## Simple feature collection with 1672 features and 9 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 139.7665 ymin: 35.63225 xmax: 139.8216 ymax: 35.67058
## Geodetic CRS: WGS 84
# Download the output from the computer vision models.
seg_output <- read.csv("seg_output_toyosu.csv") # path to your "seg_output.csv" file
# Join them back to the `edges_azi` object that was used to download images.
edges_seg_output <- edges_azi %>%
inner_join(seg_output, by=c("node_id"="img_id"))
The numeric values in each object column represent the number of pixels in images predicted as that object. Check out the first few rows and see which objects comprise the majority of streetscapes.
head(edges_seg_output)
## Simple feature collection with 6 features and 159 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 139.7894 ymin: 35.63411 xmax: 139.8026 ymax: 35.64059
## Geodetic CRS: WGS 84
## type X Y azi osm_id highway length edge_id node_id
## 1 start 139.8026 35.64059 -112.09098 4853035 trunk 1311.5636 1 1
## 2 mid1 139.7965 35.63738 -118.16681 4853035 trunk 1311.5636 1 2
## 3 mid2 139.7965 35.63738 61.83319 4853035 trunk 1311.5636 1 3
## 4 end 139.7904 35.63411 56.49810 4853035 trunk 1311.5636 1 4
## 5 start 139.7904 35.63800 -118.08810 4853039 tertiary 211.6149 2 5
## 6 mid1 139.7894 35.63747 -118.56614 4853039 tertiary 211.6149 2 6
## wall building sky floor tree ceiling road bed windowpane grass cabinet
## 1 538 33113 147594 0 247 0 175886 0 0 7789 0
## 2 12127 113727 107533 0 2 0 153895 0 0 0 0
## 3 1780 22222 142336 0 2425 0 161910 0 0 0 0
## 4 822 39644 79217 0 3854 48476 170136 0 0 1006 0
## 5 91 35180 151183 0 26341 0 175436 0 0 1633 0
## 6 1629 85328 95583 0 28069 0 155884 0 0 0 0
## sidewalk person earth door table mountain plant curtain chair car water
## 1 3972 0 604 0 0 0 1223 0 0 0 0
## 2 5543 0 1721 0 0 0 0 0 0 105 0
## 3 0 0 5692 0 0 0 0 0 0 381 0
## 4 0 0 4625 0 0 0 0 0 0 3048 0
## 5 5676 1275 0 0 0 0 0 0 0 10323 0
## 6 27496 0 0 0 0 0 1949 0 0 1352 0
## painting sofa shelf house sea mirror rug field armchair seat fence desk rock
## 1 0 0 0 0 0 0 0 0 0 0 11740 0 0
## 2 0 0 0 0 0 0 0 0 0 0 13650 0 0
## 3 0 0 0 0 0 0 0 410 0 0 43596 0 0
## 4 0 0 0 0 0 0 0 0 0 0 25528 0 0
## 5 0 0 0 0 0 0 0 0 0 0 2188 0 0
## 6 0 0 0 0 0 0 0 0 0 0 11807 0 0
## wardrobe lamp bathtub railing cushion base box column signboard
## 1 0 0 0 0 0 0 0 0 2806
## 2 0 0 0 0 0 0 0 0 119
## 3 0 0 0 230 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 5
## 5 0 0 0 0 0 0 0 0 145
## 6 0 0 0 15 0 0 0 0 0
## chest.of.drawers counter sand sink skyscraper fireplace refrigerator
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## grandstand path stairs runway case pool.table pillow screen.door stairway
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## river bridge bookcase blind coffee.table toilet flower book hill bench
## 1 0 23918 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 28042 0 0 0 0 0 0 157 0
## 4 0 26441 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## countertop stove palm kitchen.island computer swivel.chair boat bar
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## arcade.machine hovel bus towel light truck tower chandelier awning
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 6798 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 488 0 0 0 0 0 0
## streetlight booth television.receiver airplane dirt.track apparel pole land
## 1 170 0 0 0 0 0 0 0
## 2 1149 0 0 0 0 0 29 0
## 3 294 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 122 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## bannister escalator ottoman bottle buffet poster stage van ship fountain
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 7 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## conveyer.belt canopy washer plaything swimming.pool stool barrel basket
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 125 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## waterfall tent bag minibike cradle oven ball food step tank trade.name
## 1 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0
## microwave pot animal bicycle lake dishwasher screen blanket sculpture hood
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## sconce vase traffic.light tray ashcan fan pier crt.screen plate monitor
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## bulletin.board shower radiator glass clock flag geometry
## 1 0 0 0 0 0 0 POINT (139.8026 35.64059)
## 2 0 0 0 0 0 0 POINT (139.7965 35.63738)
## 3 0 0 0 0 0 0 POINT (139.7965 35.63738)
## 4 0 0 0 0 0 0 POINT (139.7904 35.63411)
## 5 0 0 0 0 0 0 POINT (139.7904 35.638)
## 6 0 0 0 0 0 0 POINT (139.7894 35.63747)
Calculate Greenness
Let’s define greenness as the sum of tree, grass, and plant.
edges_seg_output %<>% # `a %<>%` is the same as `a <- a %>%`; you need to import `magrittr` to use this pipe
mutate(greenness = tree + grass + plant,
pct_greenness = greenness/(640*640))
# Map!
t1 <- tm_basemap("OpenStreetMap")+
tm_shape(edges_seg_output) +
tm_dots(col = "pct_greenness", style="quantile", palette = 'viridis')
t2 <- tm_basemap(leaflet::providers$Esri.WorldImagery) +
tm_shape(edges_seg_output %>% st_bbox() %>% st_as_sfc()) +
tm_borders(lwd = 2, col = 'white')
tmap_arrange(t1, t2, sync = T)
Calculate Building-to-street Ratio
Let’s calculate building-to-street ratio. Major ‘building’ objects include building and house. Major ‘street’ objects include road, sidewalk, and car.
edges_seg_output %<>%
mutate(b2s_ratio = (building+house)/(road+sidewalk+car)) %>%
mutate(b2s_ratio = case_when(
b2s_ratio >= 1 ~ 1, # Let's set the upper limit as 1.
TRUE ~ b2s_ratio))
t1 <- tm_basemap("OpenStreetMap")+
tm_shape(edges_seg_output) +
tm_dots(col = "b2s_ratio", style="quantile", palette = 'viridis')
t2 <- tm_basemap(leaflet::providers$Esri.WorldImagery) +
tm_shape(edges_seg_output %>% st_bbox() %>% st_as_sfc()) +
tm_borders(lwd = 2, col = 'white')
tmap_arrange(t1, t2, sync = T)
Violin plots comparing the segmentation results by type of road
Are there significant differences between road types?
edges_seg_output %<>%
filter(!is.na(highway)) %>%
mutate(highway = factor(highway, levels = c('unclassified','residential','tertiary',
'secondary','primary','trunk'))) %>%
mutate(pct_building = building/(640*640),
pct_sky = sky/(640*640),
pct_road = road/(640*640),
pct_sidewalk = sidewalk/(640*640))
edges_seg_output %>%
pivot_longer(cols = c(b2s_ratio, pct_greenness, pct_building,
pct_sky, pct_road, pct_sidewalk),
names_to = 'variable',
values_to = "value") %>%
ggplot(mapping = aes(x = highway, y = value)) +
geom_violin(fill = "#c7ffd6") +
stat_summary(fun=mean, geom="point", size=1, color="red")+
coord_flip() +
theme_bw() +
facet_wrap(~variable, scales = "free_x", nrow = 2)
## Warning: Removed 32 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 32 rows containing non-finite outside the scale range
## (`stat_summary()`).