This short note accompanies “The voice of users - analysing mobile app reviews” post on UXBooth and presents a sample code to download user app reviews from iTunes.
In the first step we need to identify the list of countries and apps.
require(XML)
countryId <- c("AE","AG","AI","AL","AM","AO","AR","AT","AU","AZ","BB","BE","BF","BG","BH","BJ","BM","BN","BO","BR","BS","BT","BW","BY","BZ","CA","CG","CH","CL","CN","CO","CR","CV","CY","CZ","DE","DK","DM","DO","DZ","EC","EE","EG","ES","FI","FJ","FM","FR","GB","GD","GH","GM","GR","GT","GW","GY","HK","HN","HR","HU","ID","IE","IL","IN","IS","IT","JM","JO","JP","KE","KG","KH","KN","KR","KW","KY","KZ","LA","LB","LC","LK","LR","LT","LU","LV","MD","MG","MK","ML","MN","MO","MR","MS","MT","MU","MW","MX","MY","MZ","NA","NE","NG","NI","NL","NO","NP","NZ","OM","PA","PE","PG","PH","PK","PL","PT","PW","PY","QA","RO","RU","SA","SB","SC","SE","SG","SI","SK","SL","SN","SR","ST","SV","SZ","TC","TD","TH","TJ","TM","TN","TR","TT","TW","TZ","UA","UG","US","UY","UZ","VC","VE","VG","VN","YE","ZA","ZW") #A complete list of iTunes territories as of January 2016
appId <- c(324684580,284882215) #list of app Id's we want to fetch
appOwner <- c("spotify","facebook") #App names
df <- data.frame(Doubles=double(), #Create an empty data frame
Ints=integer(),
Factors=factor(),
Logicals=logical(),
Characters=character(),
stringsAsFactors=FALSE)
df0 <- df #Create another empty data frame
In the second stage we need to fetch XML files for each app for each country. Each page containts up to 50 reviews.
For each dwonloaded page we extract the data we are interested in from the parsed files.
for (j in 1:length(appId)){
for (l in 1:1){ #To download from all countries set 1:length(countryId)
for (k in 1:10){ #iTunes RSS feed keeps only 10 last pages of the feed
url1 = 'http://itunes.apple.com/'
url2 = '/rss/customerreviews/page='
url3 = '/id='
url4 = '/sortby=mostrecent/xml'
url <- paste(url1,countryId[l], url2,k,url3,appId[j],url4, sep = "")
doc1<-xmlParse(url,useInternalNodes = FALSE)
docRoot <- xmlRoot(doc1)
docChildren <- xmlChildren(docRoot)
len <- length(docChildren)
if (len<14) {
break
}
else
idR <- vector(mode="character", length=len-13)
for (i in 14:len) {
aux <- as.character(docChildren[[i]][[2]][[1]])[[6]]
idR[i-13] = aux
}
appVersion <- vector(mode="character", length=len-13)
for (i in 14:len) {
aux <- as.character(docChildren[[i]][[9]][[1]])[[6]]
appVersion[i-13] = aux
}
rating <- vector(mode="character", length=len-13)
for (i in 14:len) {
aux <- as.character(docChildren[[i]][[8]][[1]])[[6]]
rating[i-13] = aux
}
titleReview <- vector(mode="character", length=len-13)
for (i in 14:len) {
aux <- as.character(docChildren[[i]][[3]][[1]])[[6]]
titleReview[i-13] = aux
}
reviewBody <- vector(mode="character", length=len-13)
for (i in 14:len) {
aux <- as.character(docChildren[[i]][[4]][[1]])[[6]]
reviewBody[i-13] = aux
}
reviewDate<- vector(mode="character", length=len-13)
for (i in 14:len) {
aux <- as.character(docChildren[[i]][[1]][[1]])[[6]]
reviewDate[i-13] = aux
}
country <- vector(mode="character", length=len-13)
for (i in 14:len) {
country[i-13] = countryId[l]
}
appName <- vector(mode="character", length=len-13)
for (i in 14:len) {
appName[i-13] = appOwner[j]
}
x<-cbind(idR, appVersion, rating, titleReview, reviewBody, reviewDate, country, appName)
df1<-rbind(df0,x)
df<-rbind(df,df1)
}
}
}
df$reviewDate<-as.Date(df$reviewDate)
write.csv(df,"./appstoreFetch.csv")