This short note accompanies “The voice of users - analysing mobile app reviews” post on UXBooth and presents a sample code to download user app reviews from Google Play.

In the first step a connection with MASHAPE API is established. To deal with API’s minor bugs a number of fixes is included in the code.

library(RCurl);
library(jsonlite);

urlPattern <- 'https://gplaystore.p.mashape.com/applicationReviews?id=%s&lang=en&page=%d';
progressMessage <- '%s : page %d downloaded; %d reviews. First author: %s';
headers <- c(
  'X-Mashape-Key' = 'YOUR MASHAPE KEY',
  'Accept' = 'application/json'
)

helper_tryFetchJSON <- function(pageUrl) {
  out <- tryCatch({
      got <- getURL(pageUrl, httpheader = headers, ssl.verifyhost = 0L, ssl.verifypeer = 0L); #WARNING!! - ssl.verifyhost = 0L, ssl.verifypeer = 0L       avoid SSL, which may not be a good idea for security reasons, but solves the issues that RCurl packages has with SSL on Windows. For Unix based OS the code would run without those two arguments/
      pageJSON <- fromJSON(got);
      return(pageJSON);
  },
  error=function(cond) {
    message(paste("error downloading ", pageUrl));
    message(cond);
    return(NA);
  });
  return(out);
}

fetchAllReviews <- function(appId) {
  page<-1;
  results <- NULL;
  repeat 
  {
    pageUrl <- sprintf(urlPattern, appId, page);
    try <- 0;
    repeat {
      pageJSON <- helper_tryFetchJSON(pageUrl);
      if (!is.na(pageJSON)) {
        break; # success!
      } else {
        try <- try+1;
        if (try <= 20) {
          Sys.sleep(2);
        }
        else {
          stop("The internet is not working. Turn off/on?")
        }
      }
    }
    if (!is.null(pageJSON$error)) {
      # this page got an error, most likely it means no more reviews
      break;
    }

    # R does not like nested lists in dataframes
    pageContent <- flatten(pageJSON)
    pageContent$isoDate <- as.Date(pageJSON$date, format = "%B %d, %Y")

    print(sprintf(progressMessage, appId, page, nrow(pageContent), pageContent[1,"author.name"]));
    # append new page to existing results
    results <- rbind(results, pageContent, make.row.names = F);
    page <- page+1;

    if (nrow(pageJSON) < 40) {
      # API bug: when we fetch a page after the end of results, don't get an error right away -
      # just repeats the last page of results
      break;
    }
  }
  print('-----------------------------------------------');
  return(results);
}

Once the connection is established we can select the apps we are interested in and download the data.

appIds <- c("com.spotify.music", "com.facebook.katana"); #Two popular apps used as an example

z <- lapply(appIds, function(appId) {
  reviews <- fetchAllReviews(appId);
  reviews$appId <- appId;
  return(reviews);
});


outDs <- do.call("rbind", z);
write.csv(outDs, './androidFetch.csv', row.names = F);