This short note accompanies “The voice of users - analysing mobile app reviews” post on UXBooth and presents a sample code to download user app reviews from Google Play.
In the first step a connection with MASHAPE API is established. To deal with API’s minor bugs a number of fixes is included in the code.
library(RCurl);
library(jsonlite);
urlPattern <- 'https://gplaystore.p.mashape.com/applicationReviews?id=%s&lang=en&page=%d';
progressMessage <- '%s : page %d downloaded; %d reviews. First author: %s';
headers <- c(
'X-Mashape-Key' = 'YOUR MASHAPE KEY',
'Accept' = 'application/json'
)
helper_tryFetchJSON <- function(pageUrl) {
out <- tryCatch({
got <- getURL(pageUrl, httpheader = headers, ssl.verifyhost = 0L, ssl.verifypeer = 0L); #WARNING!! - ssl.verifyhost = 0L, ssl.verifypeer = 0L avoid SSL, which may not be a good idea for security reasons, but solves the issues that RCurl packages has with SSL on Windows. For Unix based OS the code would run without those two arguments/
pageJSON <- fromJSON(got);
return(pageJSON);
},
error=function(cond) {
message(paste("error downloading ", pageUrl));
message(cond);
return(NA);
});
return(out);
}
fetchAllReviews <- function(appId) {
page<-1;
results <- NULL;
repeat
{
pageUrl <- sprintf(urlPattern, appId, page);
try <- 0;
repeat {
pageJSON <- helper_tryFetchJSON(pageUrl);
if (!is.na(pageJSON)) {
break; # success!
} else {
try <- try+1;
if (try <= 20) {
Sys.sleep(2);
}
else {
stop("The internet is not working. Turn off/on?")
}
}
}
if (!is.null(pageJSON$error)) {
# this page got an error, most likely it means no more reviews
break;
}
# R does not like nested lists in dataframes
pageContent <- flatten(pageJSON)
pageContent$isoDate <- as.Date(pageJSON$date, format = "%B %d, %Y")
print(sprintf(progressMessage, appId, page, nrow(pageContent), pageContent[1,"author.name"]));
# append new page to existing results
results <- rbind(results, pageContent, make.row.names = F);
page <- page+1;
if (nrow(pageJSON) < 40) {
# API bug: when we fetch a page after the end of results, don't get an error right away -
# just repeats the last page of results
break;
}
}
print('-----------------------------------------------');
return(results);
}
Once the connection is established we can select the apps we are interested in and download the data.
appIds <- c("com.spotify.music", "com.facebook.katana"); #Two popular apps used as an example
z <- lapply(appIds, function(appId) {
reviews <- fetchAllReviews(appId);
reviews$appId <- appId;
return(reviews);
});
outDs <- do.call("rbind", z);
write.csv(outDs, './androidFetch.csv', row.names = F);