require(ggplot2)
require(knitr)

This short note accompanies “The voice of users - analysing mobile app reviews” post on UXBooth. The first chunks of code simulate data, subsequently data on individual features is aggregated to indices, summary statistics calcualted and finally plotted.

Data Simulation

In the next chunk of code Sentiment assessment for each of the subdimenions discussed in the post is simulated (including missing observations). For this exercise 100 observations for each of the 25 dimensions is generated. The first rows of the generated raw data are shown in a table below.

personalisation <- c("personalDetails","savedPreferences")
bookingManagement   <- c("flightSelection","payment","extraServices","flightChanges","seatSelection","generalBooking")
checkIn <- c("boardingPass","generalCheckin")
flightInfo <- "flightInfo"
accountManagegement <- c("generalAccount", "loyaltyProgramme", "websiteCompatibility", "flightList", "milesStatus")
compatibility <- c("calendar", "passbook", "appleWatch")    
performance <- c("bugs", "crashes", "speed", "incompatibility")
design <- c("navigation","graphics")

randomNumbers = function(x){
   return(2 * rbinom(100, 1, x) - 1)
}

appReviews_aux <- data.frame(sapply(seq(0.1, 0.9, length.out = 25),randomNumbers))  #Generate 25 columns with 100 observations randomly drawn between -1 and  1 

insert_nas <- function(x) { 
  len <- length(x)
  n <- 5*length(x)/6 + floor(sample.int(floor(0.9*len), 1)/6)
  i <- sample(seq_along(x), n)
  x[i] <- NA 
  x
} #This function randomly selects observations to be replaced with missing values

names(appReviews_aux) <-c(personalisation, bookingManagement, checkIn, flightInfo, accountManagegement, compatibility, performance, design);

appReviews <- data.frame(sapply(appReviews_aux, insert_nas));

Aggregating data to 8 indices

Aggregation from features into indices needs to take account of the missings values, which as the table above shows are quite frequent.

indices <- c("personalisationIndex", "checkInIndex", "bookingIndex", "flightInfoIndex", "accountManagegementIndex", "compatibilityIndex", "performanceIndex", "designIndex")

appReviews$personalisationIndex  <- rowMeans(appReviews[,c(personalisation)],  na.rm = TRUE)
appReviews$checkInIndex  <- rowMeans(appReviews[,c(checkIn)],  na.rm = TRUE)
appReviews$bookingIndex  <- rowMeans(appReviews[,c(bookingManagement)],  na.rm = TRUE)
appReviews$flightInfoIndex  <- appReviews[,c(flightInfo)]
appReviews$accountManagegementIndex  <- rowMeans(appReviews[,c(accountManagegement)],  na.rm = TRUE)
appReviews$compatibilityIndex  <- rowMeans(appReviews[,c(compatibility)],  na.rm = TRUE)
appReviews$performanceIndex  <- rowMeans(appReviews[,c(performance)],  na.rm = TRUE)
appReviews$designIndex  <- rowMeans(appReviews[,c(design)],  na.rm = TRUE)

Calculating index scores and frequencies

For each index we calculate the average score and frequency

ratingSum <- rapply(appReviews[,c(indices)], sum, na.rm=TRUE);
ratingN <- colSums(!is.na(appReviews[,c(indices)]));
score <- ratingSum/ratingN;
frequency <- colSums(!is.na(appReviews[,c(indices)]))/nrow(appReviews);
ReviewPlot<-data.frame(indices, score, frequency);
names(ReviewPlot) <- c("feature", "score","frequency")

PLOT

Finally we plot the results.

#setting colors
mycols <- c("#6184DB","#533A71", "#50C5B7", "#9CEC5B", "#F0F465", "#6184DB","#533A71", "#50C5B7", "#9CEC5B", "#F0F465")
flex <- palette(mycols)

ggplot(ReviewPlot, aes(x=score, y=frequency)) +
  geom_point(aes(color = feature, shape = feature), size=5) +
  scale_shape_manual(values=c(15,16,15,16,15,16,15,16)) +
  scale_color_manual(values=flex) +
  xlim(-1,1) + ylim(0,0.5) +
  labs(x="Sentiment", y="Frequency",
       title = "Flexponsive app matrix" ) +
  theme_bw() + 
  theme(plot.title = element_text(color="#666666", face="bold", size=16)) +
  theme(axis.title = element_text(color="#666666", face="bold", size=12)) +
  theme(legend.position="bottom") + 
  theme(legend.text=element_text(size=10, color="#666666")) +
  theme(legend.key = element_blank()) +
  theme(legend.title=element_blank()) +
  guides(shape = guide_legend(nrow=2,byrow=TRUE)) +
  geom_rect(aes(xmin=-1, xmax=-0.05, ymin=0, ymax=0.24), fill= "#6184DB", alpha=0.01) +
  geom_text(aes(-1, 0, label="OTHER FLAWS"), color="#6184DB",hjust=0, vjust=-0.1, face="bold", size = 6) +
    geom_rect(aes(xmin=-1, xmax=-0.05, ymin=0.26, ymax=0.5), fill= "#533A71", alpha=0.01) +
    geom_text(aes(-1, 0.5, label="KEY FLAWS"), color="#533A71",hjust=0, vjust=1, face="bold", size = 6) +
  geom_rect(aes(xmin=0.05, xmax=1, ymin=0.26, ymax=0.5), fill= "#50C5B7", alpha=0.01) +
    geom_text(aes(0.30, 0.5, label="KEY STRENGTHS"), color="#50C5B7",hjust=0, vjust=1, face="bold", size = 6) +
  geom_rect(aes(xmin=0.05, xmax=1, ymin=0, ymax=0.24), fill= "#9CEC5B", alpha=0.01) +  
  geom_text(aes(0.18, 0, label="OTHER STRENGTHS"), color="#9CEC5B",hjust=0, vjust=-0.1, face="bold", size = 6);