Dataset on Kaggle

Data

vending
## # A tibble: 9,659 × 23
##    App       Category Rating Reviews  Size Installs  Type Price `Content Rating`
##    <chr>        <dbl>  <dbl>   <dbl> <dbl>    <dbl> <dbl> <dbl> <chr>           
##  1 Photo Ed…        0    4.1     159  19      10000     0     0 Everyone        
##  2 Coloring…        0    3.9     967  14     500000     0     0 Everyone        
##  3 U Launch…        0    4.7   87510   8.7  5000000     0     0 Everyone        
##  4 Sketch -…        0    4.5  215644  25   50000000     0     0 Teen            
##  5 Pixel Dr…        0    4.3     967   2.8   100000     0     0 Everyone        
##  6 Paper fl…        0    4.4     167   5.6    50000     0     0 Everyone        
##  7 Smoke Ef…        0    3.8     178  19      50000     0     0 Everyone        
##  8 Infinite…        0    4.1   36815  29    1000000     0     0 Everyone        
##  9 Garden C…        0    4.4   13791  33    1000000     0     0 Everyone        
## 10 Kids Pai…        0    4.7     121   3.1    10000     0     0 Everyone        
## # … with 9,649 more rows, and 14 more variables: Genres <dbl>,
## #   `Last Updated` <date>, `Current Ver` <dbl>, `Android Ver` <dbl>,
## #   `Last Updated (Year)` <dbl>, `Last Updated (Year/Month)` <dbl>,
## #   `Category (categorical)` <chr>, `Type (categorical)` <chr>,
## #   `Genres (categorical)` <chr>, `Current Ver (categorical)` <chr>,
## #   `Android Ver (categorical)` <chr>,
## #   `Last Updated (Year/Month) (categorical)` <chr>, No_reviews_count <dbl>, …

Likely spam

Apps that are likely spam.

5 star ratings

likelyspam$fivestar <- vending %>%
  filter(Rating == 5) %>%
  select(App, Reviews, Installs)
head(likelyspam$fivestar)
## # A tibble: 6 × 3
##   App                                        Reviews Installs
##   <chr>                                        <dbl>    <dbl>
## 1 Hojiboy Tojiboyev Life Hacks                    15     1000
## 2 American Girls Mobile Numbers                    5     1000
## 3 Awake Dating                                     2      100
## 4 Spine- The dating app                            5      500
## 5 Girls Live Talk - Free Text and Video Chat       6      100
## 6 Online Girls Chat Group                          5      100

Lotta reviews

likelyspam$reviewcount <- vending %>%
  arrange(desc(Reviews)) %>%
  select(App, Reviews, Installs)
head(likelyspam$reviewcount, n=10)
## # A tibble: 10 × 3
##    App                                                 Reviews   Installs
##    <chr>                                                 <dbl>      <dbl>
##  1 Facebook                                           78158306 1000000000
##  2 WhatsApp Messenger                                 69119316 1000000000
##  3 Instagram                                          66577313 1000000000
##  4 Messenger – Text and Video Chat for Free           56642847 1000000000
##  5 Clash of Clans                                     44891723  100000000
##  6 Clean Master- Space Cleaner & Antivirus            42916526  500000000
##  7 Subway Surfers                                     27722264 1000000000
##  8 YouTube                                            25655305 1000000000
##  9 Security Master - Antivirus, VPN, AppLock, Booster 24900999  500000000
## 10 Clash Royale                                       23133508  100000000

Reviews

Most common ratings

ggplot(vending, aes(Rating)) +
  ggtitle("Most common Google Play Ratings") +
  ylab("Amount of apps") +
  xlab("Average Rating") +
  geom_bar()

Downloads/Reviews ratio

Also includes percentage of how many reviews were written against downloads

vending_with_ratio <- vending %>%
  select(App, Reviews, Installs) %>%
  mutate(Ratio = Installs/Reviews, 
         Percentage = Reviews/Installs * 100)
vending_with_ratio
## # A tibble: 9,659 × 5
##    App                                         Reviews Installs Ratio Percentage
##    <chr>                                         <dbl>    <dbl> <dbl>      <dbl>
##  1 Photo Editor & Candy Camera & Grid & Scrap…     159    10000  62.9      1.59 
##  2 Coloring book moana                             967   500000 517.       0.193
##  3 U Launcher Lite – FREE Live Cool Themes, H…   87510  5000000  57.1      1.75 
##  4 Sketch - Draw & Paint                        215644 50000000 232.       0.431
##  5 Pixel Draw - Number Art Coloring Book           967   100000 103.       0.967
##  6 Paper flowers instructions                      167    50000 299.       0.334
##  7 Smoke Effect Photo Maker - Smoke Editor         178    50000 281.       0.356
##  8 Infinite Painter                              36815  1000000  27.2      3.68 
##  9 Garden Coloring Book                          13791  1000000  72.5      1.38 
## 10 Kids Paint Free - Drawing Fun                   121    10000  82.6      1.21 
## # … with 9,649 more rows

Average ratio and percentage

data.frame(AverageRatio = median(pull(vending_with_ratio, Ratio), na.rm = TRUE), AveragePercentage = median(pull(vending_with_ratio, Percentage), na.rm=TRUE))
##   AverageRatio AveragePercentage
## 1     58.82353               1.7