vending
## # A tibble: 9,659 × 23
## App Category Rating Reviews Size Installs Type Price `Content Rating`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Photo Ed… 0 4.1 159 19 10000 0 0 Everyone
## 2 Coloring… 0 3.9 967 14 500000 0 0 Everyone
## 3 U Launch… 0 4.7 87510 8.7 5000000 0 0 Everyone
## 4 Sketch -… 0 4.5 215644 25 50000000 0 0 Teen
## 5 Pixel Dr… 0 4.3 967 2.8 100000 0 0 Everyone
## 6 Paper fl… 0 4.4 167 5.6 50000 0 0 Everyone
## 7 Smoke Ef… 0 3.8 178 19 50000 0 0 Everyone
## 8 Infinite… 0 4.1 36815 29 1000000 0 0 Everyone
## 9 Garden C… 0 4.4 13791 33 1000000 0 0 Everyone
## 10 Kids Pai… 0 4.7 121 3.1 10000 0 0 Everyone
## # … with 9,649 more rows, and 14 more variables: Genres <dbl>,
## # `Last Updated` <date>, `Current Ver` <dbl>, `Android Ver` <dbl>,
## # `Last Updated (Year)` <dbl>, `Last Updated (Year/Month)` <dbl>,
## # `Category (categorical)` <chr>, `Type (categorical)` <chr>,
## # `Genres (categorical)` <chr>, `Current Ver (categorical)` <chr>,
## # `Android Ver (categorical)` <chr>,
## # `Last Updated (Year/Month) (categorical)` <chr>, No_reviews_count <dbl>, …
Apps that are likely spam.
likelyspam$fivestar <- vending %>%
filter(Rating == 5) %>%
select(App, Reviews, Installs)
head(likelyspam$fivestar)
## # A tibble: 6 × 3
## App Reviews Installs
## <chr> <dbl> <dbl>
## 1 Hojiboy Tojiboyev Life Hacks 15 1000
## 2 American Girls Mobile Numbers 5 1000
## 3 Awake Dating 2 100
## 4 Spine- The dating app 5 500
## 5 Girls Live Talk - Free Text and Video Chat 6 100
## 6 Online Girls Chat Group 5 100
likelyspam$reviewcount <- vending %>%
arrange(desc(Reviews)) %>%
select(App, Reviews, Installs)
head(likelyspam$reviewcount, n=10)
## # A tibble: 10 × 3
## App Reviews Installs
## <chr> <dbl> <dbl>
## 1 Facebook 78158306 1000000000
## 2 WhatsApp Messenger 69119316 1000000000
## 3 Instagram 66577313 1000000000
## 4 Messenger – Text and Video Chat for Free 56642847 1000000000
## 5 Clash of Clans 44891723 100000000
## 6 Clean Master- Space Cleaner & Antivirus 42916526 500000000
## 7 Subway Surfers 27722264 1000000000
## 8 YouTube 25655305 1000000000
## 9 Security Master - Antivirus, VPN, AppLock, Booster 24900999 500000000
## 10 Clash Royale 23133508 100000000
ggplot(vending, aes(Rating)) +
ggtitle("Most common Google Play Ratings") +
ylab("Amount of apps") +
xlab("Average Rating") +
geom_bar()
Also includes percentage of how many reviews were written against downloads
vending_with_ratio <- vending %>%
select(App, Reviews, Installs) %>%
mutate(Ratio = Installs/Reviews,
Percentage = Reviews/Installs * 100)
vending_with_ratio
## # A tibble: 9,659 × 5
## App Reviews Installs Ratio Percentage
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Photo Editor & Candy Camera & Grid & Scrap… 159 10000 62.9 1.59
## 2 Coloring book moana 967 500000 517. 0.193
## 3 U Launcher Lite – FREE Live Cool Themes, H… 87510 5000000 57.1 1.75
## 4 Sketch - Draw & Paint 215644 50000000 232. 0.431
## 5 Pixel Draw - Number Art Coloring Book 967 100000 103. 0.967
## 6 Paper flowers instructions 167 50000 299. 0.334
## 7 Smoke Effect Photo Maker - Smoke Editor 178 50000 281. 0.356
## 8 Infinite Painter 36815 1000000 27.2 3.68
## 9 Garden Coloring Book 13791 1000000 72.5 1.38
## 10 Kids Paint Free - Drawing Fun 121 10000 82.6 1.21
## # … with 9,649 more rows
data.frame(AverageRatio = median(pull(vending_with_ratio, Ratio), na.rm = TRUE), AveragePercentage = median(pull(vending_with_ratio, Percentage), na.rm=TRUE))
## AverageRatio AveragePercentage
## 1 58.82353 1.7