# Authored mostly by ChatGPT
# I had no part in this montrosity except its acceptance.
# Most of this is unnecessary and you should ideally just write something yourself to process the JSONs.
import json
import csv
import os
from datetime import datetime
= 1e7
COORDINATE_FACTOR
def extract_node_info(activity, key_type, nodes, node_writer, node_id):
= activity[f"{key_type}Location"]['latitudeE7'] / COORDINATE_FACTOR
lat long = activity[f"{key_type}Location"]['longitudeE7'] / COORDINATE_FACTOR
= f"{lat},{long}"
key
if key not in nodes:
= node_id
nodes[key] 'ID': node_id, 'latitude': lat, 'longitude': long})
node_writer.writerow({+= 1
node_id
return nodes[key], node_id
def extract_edge_info(activity, start_id, end_id):
return {
'StartID': start_id,
'EndID': end_id,
'activityType': activity['activityType'],
'distance': activity.get('distance', 'N/A'),
'duration': f"{activity['duration']['startTimestamp']} to {activity['duration']['endTimestamp']}",
'month': datetime.fromisoformat(activity['duration']['startTimestamp'].replace('Z', '+00:00')).strftime('%Y-%m')
}
def extract_coordinates_to_csv(folder_path, nodes_csv, edges_csv):
= {}
nodes = 0
node_id
= ['StartID', 'EndID', 'activityType', 'distance', 'duration', 'month']
edge_fields = ['ID', 'latitude', 'longitude']
node_fields
with open(edges_csv, 'w', newline='') as edgefile, open(nodes_csv, 'w', newline='') as nodefile:
= csv.DictWriter(edgefile, fieldnames=edge_fields), csv.DictWriter(nodefile, fieldnames=node_fields)
edge_writer, node_writer
edge_writer.writeheader()
node_writer.writeheader()
for filename in [f for f in os.listdir(folder_path) if f.endswith('.json')]:
= os.path.join(folder_path, filename)
json_file
with open(json_file, 'r') as f:
= json.load(f)
location_data
for segment in location_data['timelineObjects']:
if 'activitySegment' not in segment:
continue
= segment['activitySegment']
activity
= extract_node_info(activity, 'start', nodes, node_writer, node_id)
start_id, node_id = extract_node_info(activity, 'end', nodes, node_writer, node_id)
end_id, node_id
= extract_edge_info(activity, start_id, end_id)
edge_info
edge_writer.writerow(edge_info)
# Enter the relative path to you data folder before running
if __name__ == '__main__':
= './data'
folder_path = 'nodes.csv'
nodes_csv = 'edges.csv'
edges_csv extract_coordinates_to_csv(folder_path, nodes_csv, edges_csv)
Everywhere I go
Mapping your movements with Google Takeout
This is a small guide to create something like this map I made. The code is a little here and there but hopefully enough to get you going. Unless noted otherwise, the code is written by me and you’re free to do with it what you want.
Processing
The source of the data is my Google Takeout, which I exported for this year. You can do this for any length of time. This essentially gives you these kind of JSONs:
We want to compile these in a more sensible format, I wanted CSVs. The following Python code reads in these jsons and creates two files, nodes.csv
(point data) and edges.csv
. I did this because I wanted to make a flow map, but each JSON also has start coordinates and end coordinates so might be simpler to process that. Anyway, here’s what 15 mins with ChatGPT threw up (sorry):
Getting routes between points
Mapbox Navigation API has a very generous free tier and it didn’t ask me for my credit card so it was an obvious choice. Since Google Takeout also classifies activities by travel mode (driving, walking, cycling, bus), I could also use Mapbox’s ‘profiles’ to approximate the route I would have taken in that mode.
Also, I get around almost exclusively with the help of Google Maps, so there’s a good chance that whatever linestrings these services returned was the path I had probably taken.
First join the nodes and edges. We’re back in R land by the way.
library(sf)
library(tidyverse)
library(mapboxapi)
<- read_csv('paths.csv')
paths <- read_csv('points.csv')
points
<- paths %>%
paths left_join(points, by = c("StartID" = "ID")) %>%
rename(StartLat = latitude, StartLong = longitude) %>%
left_join(points, by = c("EndID" = "ID")) %>%
rename(EndLat = latitude, EndLong = longitude)
Then use the API to get routes between each start and end point. I also simplified the travel modes to just walking, cycling and driving.
<-
get_route_linestring function(start_lat, start_long, end_lat, end_long) {
<- mb_directions(
route origin = c(start_long, start_lat),
destination = c(end_long, end_lat),
profile = "driving",
geometries = "geojson",
# Don't forget your access token.
)return(route)
}
# Run on sample first to see if everything works well
<- paths %>% head(10)
sample
<- paths %>%
data mutate(activityType = fct_lump(activityType, 3)) %>%
mutate(
activityType = case_when(
== "CYCLING" ~ "cycling",
activityType == "WALKING" ~ "walking",
activityType TRUE ~ "driving"
)%>%
) rowwise() %>%
mutate(route = {
<- mb_directions(
route_data origin = c(StartLong, StartLat),
destination = c(EndLong, EndLat),
profile = activityType,
geometries = "geojson"
)$geometry
route_data%>%
}) ungroup()
<- st_as_sf(data, crs = 4326)
data_sf st_write(data_sf, "movements.geojson", driver = "GeoJSON")
You could also visualize it right inside RStudio
leaflet(data = data) %>%
addTiles() %>%
addPolylines(
data = st_as_sf(data %>% filter(activityType == "driving")),
color = "red",
weight = 0.5
%>%
) addPolylines(
data = st_as_sf(data %>% filter(activityType == "cycling")),
color = "blue",
weight = 0.5
%>%
) addPolylines(
data = st_as_sf(data %>% filter(activityType == "walking")),
color = "green",
weight = 0.5
)
Daz all, folks.