library(httr)
library(jsonlite)
url <- "https://en.wikipedia.org/w/api.php"
# Coordinates of Bern and Zurich
bern_coor <- c(46.949722, 7.439444)
zurich_coor <- c(47.377455, 8.539688)
get_articles_in_radius <- function(lat, lon, radius = 1000) {
response <- GET(url, query = list(
action = "query",
format = "json",
list = "geosearch",
gscoord = paste(lat, lon, sep = "|"),
gsradius = radius,
gslimit = 500
))
content_json <- fromJSON(content(response, "text", encoding = "UTF-8"))
content_json$query$geosearch
}
# Bern
bern_articles <- get_articles_in_radius(bern_coor[1], bern_coor[2])
cat("Number of articles in Bern:", dim(bern_articles)[1], "\n")
## Number of articles in Bern: 67
# Zurich
zurich_articles <- get_articles_in_radius(zurich_coor[1], zurich_coor[2])
cat("Number of articles in Zurich:", dim(zurich_articles)[1], "\n")
## Number of articles in Zurich: 93
For the comparison of the two cities, i considered only images with the file extension .jpg or .JPG. Commons-logo.svg and others are not considered.
# get_image_count
get_image_count <- function(title) {
response <- GET(url, query = list(
action = "query",
format = "json",
titles = title,
prop = "images"
))
content_json <- fromJSON(content(response, "text", encoding = "UTF-8"))
# consider only .jpg and JPG images
sum(grepl("\\.jpg$", content_json$query$pages[[1]]$images$title, ignore.case = TRUE))
}
# Number of images Bern
bern_image_count <- sum(sapply(bern_articles$title, get_image_count))
cat("Number of pictures Bern:", bern_image_count, "\n")
## Number of pictures Bern: 124
# Number of images Zurich
zurich_image_count <- sum(sapply(zurich_articles$title, get_image_count))
cat("Number of pictures Zurich:", zurich_image_count, "\n")
## Number of pictures Zurich: 299
# Winner
if (bern_image_count > zurich_image_count) {
cat("Bern is popularer")
} else if (zurich_image_count > bern_image_count) {
cat("Zurich is popularer")
}
## Zurich is popularer