Agregasi dan Visualisasi
1. Top 10 Kategori Berdasarkan Jumlah Aplikasi
# Pipeline agregasi: Top 10 kategori berdasarkan jumlah aplikasi
top_kategori_pipeline <- '[
{
"$group": {
"_id": "$category",
"jumlah_aplikasi": { "$sum": 1 }
}
},
{
"$sort": { "jumlah_aplikasi": -1 }
},
{
"$limit": 10
}
]'
# Eksekusi pipeline dan ubah nama kolom
top_kategori <- mongo_conn$aggregate(top_kategori_pipeline) %>%
rename(category = `_id`)
# Visualisasi dengan ggplot2
ggplot(top_kategori, aes(x = reorder(category, jumlah_aplikasi), y = jumlah_aplikasi)) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(
title = "Top 10 Kategori Berdasarkan Jumlah Aplikasi",
x = "Kategori",
y = "Jumlah Aplikasi"
) +
theme_minimal()

2. Rata-rata Download vs Rating per Kategori
pipeline_avg_download_rating <- '[
{
"$addFields": {
"downloads_numeric": {
"$toDouble": {
"$replaceAll": {
"input": {
"$replaceAll": {
"input": { "$toString": "$number_of_downloads" },
"find": ",",
"replacement": ""
}
},
"find": "+",
"replacement": ""
}
}
},
"rating_numeric": { "$toDouble": "$rating" }
}
},
{
"$group": {
"_id": "$category",
"avg_downloads": { "$avg": "$downloads_numeric" },
"avg_rating": { "$avg": "$rating_numeric" }
}
},
{
"$sort": { "avg_downloads": -1 }
}
]'
# Eksekusi pipeline dan ubah nama kolom
avg_download_rating <- mongo_conn$aggregate(pipeline_avg_download_rating) %>%
rename(category = `_id`)
# Visualisasi
ggplot(avg_download_rating, aes(x = avg_rating, y = avg_downloads, label = category)) +
geom_point(color = "darkgreen", size = 3, alpha = 0.7) +
geom_text(size = 3, hjust = 0, vjust = -0.5) +
scale_y_continuous(labels = scales::label_number(scale_cut = scales::cut_short_scale())) +
labs(
title = "Rata-rata Download vs Rating per Kategori",
x = "Rata-rata Rating",
y = "Rata-rata Jumlah Download"
) +
theme_minimal()

3. Developer dengan Total Download Terbesar (Top 10)
# Pipeline untuk menghitung total download per developer (Top 10)
pipeline_top_dev_download <- '[
{
"$addFields": {
"downloads_numeric": {
"$toDouble": {
"$replaceAll": {
"input": {
"$replaceAll": {
"input": { "$toString": "$number_of_downloads" },
"find": ",",
"replacement": ""
}
},
"find": "+",
"replacement": ""
}
}
}
}
},
{
"$group": {
"_id": "$developer",
"total_downloads": { "$sum": "$downloads_numeric" }
}
},
{
"$sort": { "total_downloads": -1 }
},
{
"$limit": 10
}
]'
# Eksekusi pipeline dan ubah nama kolom
top_dev_download <- mongo_conn$aggregate(pipeline_top_dev_download) %>%
rename(developer = `_id`)
# Visualisasi bar chart horizontal
ggplot(top_dev_download, aes(x = reorder(developer, total_downloads), y = total_downloads)) +
geom_col(fill = "steelblue") +
coord_flip() +
scale_y_continuous(labels = scales::label_number(scale_cut = scales::cut_short_scale())) +
labs(
title = "Top 10 Developer dengan Total Download Terbesar",
x = "Developer",
y = "Total Download"
) +
theme_minimal()

4. Aplikasi dengan Download Tinggi tapi Rating Rendah (Risiko
UX)
# Pipeline untuk menemukan aplikasi populer tapi rating rendah
pipeline_risk_ux <- '[
{
"$addFields": {
"downloads_numeric": {
"$toDouble": {
"$replaceAll": {
"input": {
"$replaceAll": {
"input": { "$toString": "$number_of_downloads" },
"find": ",",
"replacement": ""
}
},
"find": "+",
"replacement": ""
}
}
},
"rating_numeric": { "$toDouble": "$rating" }
}
},
{
"$match": {
"downloads_numeric": { "$gte": 10000000 },
"rating_numeric": { "$lte": 3.5 }
}
},
{
"$project": {
"app_name": 1,
"developer": 1,
"category": 1,
"downloads_numeric": 1,
"rating_numeric": 1
}
},
{
"$sort": { "downloads_numeric": -1 }
},
{
"$limit": 10
}
]'
# Eksekusi pipeline
apps_risk_ux <- mongo_conn$aggregate(pipeline_risk_ux)
# Tampilkan tabel hasil
print(apps_risk_ux)
## _id app_name
## 1 683b33a628b5b791a60d8745 Google Maps
## 2 683b33a628b5b791a60d8664 LINE: Calls & Messages
## 3 683b33a628b5b791a60d8786 Flipboard:Your Social Magazine
## 4 683b33a628b5b791a60d86ee Health Connect
## 5 683b33a628b5b791a60d8566 happn: dating app
## 6 683b33a628b5b791a60d8837 NFL
## 7 683b33a628b5b791a60d87c3 VSCO: Photo Editor
## 8 683b33a628b5b791a60d8887 AccuWeather: Weather Radar
## 9 683b33a628b5b791a60d8683 Google Classroom
## 10 683b33a628b5b791a60d8832 DAZN - Watch Live Sports
## developer category downloads_numeric rating_numeric
## 1 Google LLC Travel & Local 1e+10 3.215799
## 2 LINE (LY Corporation) Communication 1e+09 3.496082
## 3 Flipboard News & Magazines 5e+08 3.483313
## 4 Google LLC Health & Fitness 5e+08 3.147321
## 5 happn Dating 1e+08 3.337754
## 6 NFL Enterprises LLC Sports 1e+08 3.007707
## 7 VSCO Photography 1e+08 3.471409
## 8 AccuWeather Weather 1e+08 3.482612
## 9 Google LLC Education 1e+08 2.551703
## 10 DAZN Sports 5e+07 2.304272
# Visualisasi (opsional)
ggplot(apps_risk_ux, aes(x = reorder(app_name, downloads_numeric), y = downloads_numeric, fill = rating_numeric)) +
geom_col() +
coord_flip() +
scale_fill_gradient(low = "red", high = "orange") +
scale_y_continuous(labels = scales::label_number(scale_cut = scales::cut_short_scale())) +
labs(
title = "Top 10 Aplikasi Populer dengan Rating Rendah (Risiko UX)",
x = "Aplikasi",
y = "Jumlah Download",
fill = "Rating"
) +
theme_minimal()

5. Korelasi Rating dan Download
# Pipeline untuk konversi numerik rating dan download
pipeline_rating_download <- '[
{
"$addFields": {
"downloads_numeric": {
"$toDouble": {
"$replaceAll": {
"input": {
"$replaceAll": {
"input": { "$toString": "$number_of_downloads" },
"find": ",",
"replacement": ""
}
},
"find": "+",
"replacement": ""
}
}
},
"rating_numeric": { "$toDouble": "$rating" }
}
},
{
"$match": {
"downloads_numeric": { "$ne": null },
"rating_numeric": { "$ne": null }
}
},
{
"$project": {
"app_name": 1,
"downloads_numeric": 1,
"rating_numeric": 1
}
}
]'
# Eksekusi pipeline
rating_download_data <- mongo_conn$aggregate(pipeline_rating_download)
# Hitung korelasi Pearson
correlation <- cor(rating_download_data$rating_numeric, rating_download_data$downloads_numeric, method = "pearson", use = "complete.obs")
cat("Korelasi Pearson antara Rating dan Jumlah Download:", correlation, "\n")
## Korelasi Pearson antara Rating dan Jumlah Download: -0.01507175
# Visualisasi scatter plot
ggplot(rating_download_data, aes(x = rating_numeric, y = downloads_numeric)) +
geom_point(alpha = 0.4, color = "steelblue") +
geom_smooth(method = "lm", color = "darkred", se = FALSE) +
scale_y_continuous(labels = scales::label_number(scale_cut = scales::cut_short_scale())) +
labs(
title = paste0("Korelasi Rating dan Jumlah Download (r = ", round(correlation, 3), ")"),
x = "Rating",
y = "Jumlah Download"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

6. Aplikasi dengan Rating Tertinggi per Kategori
# Pipeline: Ambil aplikasi dengan rating tertinggi per kategori
pipeline_top_rating_per_category <- '[
{
"$addFields": {
"rating_numeric": { "$toDouble": "$rating" }
}
},
{
"$match": {
"rating_numeric": { "$ne": null }
}
},
{
"$sort": {
"category": 1,
"rating_numeric": -1,
"number_of_reviews": -1
}
},
{
"$group": {
"_id": "$category",
"top_app": { "$first": "$app_name" },
"top_rating": { "$first": "$rating_numeric" },
"developer": { "$first": "$developer" }
}
},
{
"$sort": { "top_rating": -1 }
}
]'
# Eksekusi pipeline
top_apps_per_category <- mongo_conn$aggregate(pipeline_top_rating_per_category) %>%
rename(
category = `_id`,
app_name = top_app,
rating = top_rating
)
# Tampilkan tabel
print(top_apps_per_category)
## category app_name rating
## 1 Communication TC Connect 5.000000
## 2 Adventure A Story of A Company 5.000000
## 3 Word Word Puzzle Time - Crossword 4.936412
## 4 Puzzle Fable Town: Merging Games 4.925234
## 5 Productivity GrowthDay: Daily Mindset Coach 4.925234
## 6 Board Tile Story: Match Puzzle Game 4.916149
## 7 Lifestyle In Love while Parenting 4.902840
## 8 Comics Bonelli Digital Classic 4.900000
## 9 Education The Happy Child Parenting App 4.894947
## 10 Casual Dream Family - Match 3 Games 4.890380
## 11 Finance Nav Business Financial Health 4.890323
## 12 Weather Local Weather Forecast - Radar 4.888557
## 13 Card Gin Rummy Stars - Card Game 4.880276
## 14 Health & Fitness Me+ Lifestyle Routine 4.876358
## 15 Parenting Pregnancy App & Baby Tracker 4.876284
## 16 Casino Jackpot Master™ Slots - Casino 4.868519
## 17 Shopping Klarna | Shop now. Pay later. 4.863370
## 18 Travel & Local Eurostar Trains 4.862745
## 19 Trivia Brain Test Premium 4.857143
## 20 Role Playing Dungeon Clawler 4.848342
## 21 Events Sympla: Ingressos para eventos 4.840000
## 22 Food & Drink Starbucks 4.837782
## 23 Strategy Kingdom Rush Origins TD 4.836303
## 24 Arcade Streets of Rage 4 4.836258
## 25 Books & Reference Libby, the Library App 4.833476
## 26 Entertainment Tubi: Free Movies & Live TV 4.827010
## 27 Photography AI Photo Editor - Polish 4.825959
## 28 Maps & Navigation Trainline: Train travel Europe 4.824301
## 29 Simulation Dungeon Village 2 4.822581
## 30 News & Magazines Substack 4.820242
## 31 Action Dead Cells 4.819661
## 32 Sports FotMob - Soccer Live Scores 4.817960
## 33 Music & Audio DICE: Live Shows 4.808293
## 34 Social Widgetable: Besties & Couples 4.808152
## 35 Video Players & Editors Video Player All Format 4.807214
## 36 Art & Design Canva: AI Photo & Video Editor 4.807056
## 37 Auto & Vehicles Carvana: Buy/Sell Used Cars 4.792783
## 38 Business Popl: Digital Business Card 4.746198
## 39 House & Home Trulia: Homes For Sale & Rent 4.741194
## 40 Educational Balloon Pop Kids Learning Game 4.736287
## 41 Tools TickTock-TikTok Live Wallpaper 4.729252
## 42 Music My Singing Monsters 4.729149
## 43 Racing CarX Highway Racing 4.703173
## 44 Personalization Microsoft Launcher 4.701242
## 45 Medical TATA 1mg Online Healthcare App 4.700787
## 46 Dating Lovely – Meet and Date Locals 4.597504
## 47 Beauty Ulta Beauty: Makeup & Skincare 4.583845
## 48 Libraries & Demo Libraries for developers 4.340000
## developer
## 1 ThetaCore
## 2 Buff Studio Co.,Ltd.
## 3 Word Puzzle Lab
## 4 REEF GAMES LTD
## 5 GrowthDay
## 6 LinkDesks Classic Puzzle Games
## 7 HJB Ventures
## 8 Sergio Bonelli Editore
## 9 HJB Ventures
## 10 Playflux
## 11 Nav Technologies
## 12 Vitality App Studios
## 13 Beach Bum Ltd.
## 14 ENERJOY PTE. LTD.
## 15 BabyCenter
## 16 Zeroo Gravity Games
## 17 Klarna Bank AB (publ)
## 18 Eurostar International Limited
## 19 Unico Studio
## 20 Stray Fawn Studio
## 21 Sympla
## 22 Starbucks Coffee Company
## 23 Ironhide Games
## 24 Playdigious
## 25 OverDrive, Inc.
## 26 Tubi TV
## 27 InShot Inc.
## 28 trainline
## 29 Kairosoft
## 30 Substack Inc.
## 31 Playdigious
## 32 FotMob AS
## 33 DICE FM
## 34 Happeny Technology Pte. Ltd.
## 35 InShot Inc.
## 36 Canva
## 37 Carvana
## 38 Popl Co
## 39 Trulia
## 40 Bebi Family: preschool learning games for kids
## 41 TikTok Pte. Ltd.
## 42 Big Blue Bubble Inc
## 43 CarX Technologies
## 44 Microsoft Corporation
## 45 Tata 1MG Healthcare Solutions Private Limited
## 46 Joyride GmbH
## 47 Ulta Inc.
## 48 Desarrollo Droide
# (Opsional) Visualisasi
ggplot(top_apps_per_category, aes(x = reorder(category, rating), y = rating, fill = category)) +
geom_col(show.legend = FALSE) +
coord_flip() +
labs(
title = "Aplikasi dengan Rating Tertinggi per Kategori",
x = "Kategori",
y = "Rating Tertinggi"
) +
theme_minimal()

7. Aplikasi dengan Download Tertinggi per Kategori
# Pipeline untuk menemukan aplikasi dengan download tertinggi per kategori
pipeline_top_download_per_category <- '[
{
"$addFields": {
"downloads_numeric": {
"$toDouble": {
"$replaceAll": {
"input": {
"$replaceAll": {
"input": { "$toString": "$number_of_downloads" },
"find": ",",
"replacement": ""
}
},
"find": "+",
"replacement": ""
}
}
}
}
},
{
"$match": {
"downloads_numeric": { "$ne": null }
}
},
{
"$sort": {
"category": 1,
"downloads_numeric": -1
}
},
{
"$group": {
"_id": "$category",
"top_app": { "$first": "$app_name" },
"top_download": { "$first": "$downloads_numeric" },
"developer": { "$first": "$developer" }
}
},
{
"$sort": { "top_download": -1 }
}
]'
# Eksekusi pipeline
top_download_apps <- mongo_conn$aggregate(pipeline_top_download_per_category) %>%
rename(
category = `_id`,
app_name = top_app,
downloads = top_download
)
# Tampilkan tabel hasil
print(top_download_apps)
## category app_name downloads
## 1 Video Players & Editors Google TV 1e+10
## 2 Travel & Local Google Maps 1e+10
## 3 Social Facebook 1e+10
## 4 Productivity Google Drive 1e+10
## 5 Tools Gboard - the Google Keyboard 1e+10
## 6 Communication Gmail 1e+10
## 7 Photography Google Photos 1e+10
## 8 Entertainment Google Play Games 5e+09
## 9 Music & Audio YouTube Music 5e+09
## 10 Sports 8 Ball Pool 1e+09
## 11 News & Magazines X 1e+09
## 12 Racing Hill Climb Racing 1e+09
## 13 Business Zoom Workplace 1e+09
## 14 Arcade Subway Surfers 1e+09
## 15 Books & Reference Google Play Books & Audiobooks 1e+09
## 16 Board Ludo King® 1e+09
## 17 Personalization Wallpapers 1e+09
## 18 Adventure Roblox 1e+09
## 19 Finance Google Wallet 1e+09
## 20 Casual Candy Crush Saga 1e+09
## 21 Action Temple Run 2: Endless Escape 1e+09
## 22 Lifestyle Google Home 1e+09
## 23 Weather Weather - By Xiaomi 1e+09
## 24 Health & Fitness Samsung Health 1e+09
## 25 Maps & Navigation Waze Navigation & Live Traffic 5e+08
## 26 Music Magic Tiles 3™ - Piano Game 5e+08
## 27 Puzzle Block Blast! 5e+08
## 28 Strategy Clash of Clans 5e+08
## 29 Art & Design Canva: AI Photo & Video Editor 5e+08
## 30 Shopping Temu: Shop Like a Billionaire 5e+08
## 31 Education Samsung Global Goals 5e+08
## 32 Simulation Barbie Dreamhouse Adventures 1e+08
## 33 Educational Toca Boca World 1e+08
## 34 Comics WEBTOON: Manga, Comics, Manhwa 1e+08
## 35 Word Wordscapes 1e+08
## 36 Parenting Family Space 1e+08
## 37 Card UNO!™ 1e+08
## 38 Trivia Trivia Crack: Fun Quiz Games 1e+08
## 39 Dating Tinder Dating App: Chat & Date 1e+08
## 40 Role Playing Avatar World ® 1e+08
## 41 Food & Drink DoorDash: Food, Grocery, More 5e+07
## 42 Casino Slotomania™ Slots Casino Games 5e+07
## 43 House & Home Realtor.com Real Estate & Rent 1e+07
## 44 Auto & Vehicles Cars.com: Buy and sell cars 1e+07
## 45 Medical WebMD: Symptom Checker 1e+07
## 46 Events StubHub - Live Event Tickets 1e+07
## 47 Beauty Sephora: Buy Makeup & Skincare 5e+06
## 48 Libraries & Demo Libraries for developers 1e+05
## developer
## 1 Google LLC
## 2 Google LLC
## 3 Meta Platforms, Inc.
## 4 Google LLC
## 5 Google LLC
## 6 Google LLC
## 7 Google LLC
## 8 Google LLC
## 9 Google LLC
## 10 Miniclip.com
## 11 X Corp.
## 12 Fingersoft
## 13 zoom.com
## 14 SYBO Games
## 15 Google LLC
## 16 Gametion
## 17 Google LLC
## 18 Roblox Corporation
## 19 Google LLC
## 20 King
## 21 Imangi Studios
## 22 Google LLC
## 23 Xiaomi Inc.
## 24 Samsung Electronics Co., Ltd.
## 25 Waze
## 26 AMANOTES PTE. LTD.
## 27 HungryStudio
## 28 Supercell
## 29 Canva
## 30 Temu
## 31 Samsung Electronics Co., Ltd.
## 32 Budge Studios
## 33 Toca Boca
## 34 NAVER WEBTOON
## 35 PeopleFun
## 36 Motorola Mobility LLC.
## 37 Mattel163 Limited
## 38 etermax
## 39 Tinder LLC
## 40 Pazu Games
## 41 DoorDash
## 42 Playtika
## 43 realtor.com®
## 44 Cars.com
## 45 WebMD, LLC
## 46 StubHub
## 47 Sephora USA, Inc.
## 48 Desarrollo Droide
# (Opsional) Visualisasi
ggplot(top_download_apps, aes(x = reorder(category, downloads), y = downloads, fill = category)) +
geom_col(show.legend = FALSE) +
coord_flip() +
scale_y_continuous(labels = scales::comma_format()) +
labs(
title = "Aplikasi dengan Jumlah Download Tertinggi per Kategori",
x = "Kategori",
y = "Jumlah Download"
) +
theme_minimal()

8. Volatilitas Rating Antar Aplikasi dalam Satu Kategori
# Pipeline: Hitung standar deviasi rating per kategori
pipeline_rating_volatility <- '[
{
"$addFields": {
"rating_numeric": { "$toDouble": "$rating" }
}
},
{
"$match": {
"rating_numeric": { "$ne": null }
}
},
{
"$group": {
"_id": "$category",
"avg_rating": { "$avg": "$rating_numeric" },
"sd_rating": { "$stdDevPop": "$rating_numeric" },
"min_rating": { "$min": "$rating_numeric" },
"max_rating": { "$max": "$rating_numeric" },
"count": { "$sum": 1 }
}
},
{
"$match": {
"count": { "$gte": 10 }
}
},
{
"$sort": { "sd_rating": -1 }
}
]'
# Eksekusi pipeline dan ubah nama kolom
rating_volatility <- mongo_conn$aggregate(pipeline_rating_volatility) %>%
rename(
category = `_id`
)
# Tampilkan hasil
print(rating_volatility)
## category avg_rating sd_rating min_rating max_rating count
## 1 Parenting 3.767982 1.4218594 0.000000 4.876284 27
## 2 Medical 3.942047 1.3304026 0.000000 4.700787 12
## 3 Trivia 4.155185 1.1304184 0.000000 4.857143 16
## 4 News & Magazines 4.018238 1.1044887 0.000000 4.820242 17
## 5 Entertainment 3.912152 1.0808625 0.000000 4.827010 35
## 6 Business 4.041068 1.0067718 0.000000 4.746198 21
## 7 Art & Design 4.224495 0.9738677 0.000000 4.807056 21
## 8 Health & Fitness 4.054604 0.9223663 0.000000 4.876358 31
## 9 Puzzle 4.413690 0.8955169 0.000000 4.925234 53
## 10 Social 4.047273 0.8731352 0.000000 4.808152 27
## 11 Communication 4.195740 0.8464970 0.000000 5.000000 30
## 12 Lifestyle 4.213791 0.8440310 0.000000 4.902840 35
## 13 Tools 4.209271 0.8390360 0.000000 4.729252 30
## 14 Role Playing 4.309503 0.8238666 0.000000 4.848342 34
## 15 Word 4.496225 0.8218532 0.000000 4.936412 33
## 16 Events 4.108783 0.8169071 2.300652 4.840000 12
## 17 Finance 4.357201 0.7883307 0.000000 4.890323 40
## 18 Card 4.365843 0.7682717 0.000000 4.880276 39
## 19 Productivity 4.438780 0.7601084 0.000000 4.925234 40
## 20 Comics 4.154610 0.6407544 2.425743 4.900000 27
## 21 Adventure 4.364424 0.5533991 1.653021 5.000000 37
## 22 Travel & Local 4.303415 0.5127988 2.605769 4.862745 31
## 23 Dating 3.863097 0.4875795 2.938393 4.597504 17
## 24 Education 4.380218 0.4724776 2.551703 4.894947 37
## 25 Sports 4.337591 0.4522180 2.304272 4.817960 58
## 26 Auto & Vehicles 4.379808 0.4374807 3.280899 4.792783 11
## 27 Maps & Navigation 4.163358 0.4308544 3.093313 4.824301 21
## 28 Weather 4.543374 0.4290824 2.899666 4.888557 29
## 29 Photography 4.285735 0.4155738 2.900730 4.825959 31
## 30 Educational 4.269345 0.3884863 3.158416 4.736287 22
## 31 Board 4.452036 0.3771551 2.722944 4.916149 37
## 32 Strategy 4.364947 0.3535338 3.247901 4.836303 46
## 33 Shopping 4.458543 0.3477599 3.461997 4.863370 46
## 34 Simulation 4.411816 0.3272977 2.765270 4.822581 48
## 35 Personalization 4.391174 0.3096564 3.695881 4.701242 15
## 36 Books & Reference 4.533771 0.3080158 3.892820 4.833476 12
## 37 Music & Audio 4.448196 0.3043929 3.544713 4.808293 25
## 38 Video Players & Editors 4.290024 0.3008915 3.802075 4.807214 23
## 39 Action 4.329678 0.2989127 3.437535 4.819661 41
## 40 Casino 4.605110 0.2403233 3.830189 4.868519 26
## 41 Casual 4.510622 0.2227701 4.090000 4.890380 44
## 42 Arcade 4.515642 0.2218313 3.913229 4.836258 25
## 43 Music 4.418079 0.1639742 4.081081 4.729149 11
## 44 Racing 4.495370 0.1211388 4.245698 4.703173 23
# Visualisasi: Volatilitas Rating (Standar Deviasi)
ggplot(rating_volatility, aes(x = reorder(category, sd_rating), y = sd_rating)) +
geom_col(fill = "tomato") +
coord_flip() +
labs(
title = "Volatilitas Rating Antar Aplikasi dalam Satu Kategori",
x = "Kategori",
y = "Standar Deviasi Rating"
) +
theme_minimal()

9. Konsentrasi Developer Dominan per Kategori
# Pipeline: Hitung jumlah aplikasi per developer per kategori
pipeline_dev_concentration <- '[
{
"$match": {
"category": { "$ne": null },
"developer": { "$ne": null }
}
},
{
"$group": {
"_id": { "category": "$category", "developer": "$developer" },
"app_count": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.category",
"total_apps": { "$sum": "$app_count" },
"top_dev": { "$max": "$app_count" }
}
},
{
"$project": {
"category": "$_id",
"total_apps": 1,
"top_dev_apps": "$top_dev",
"dominance_ratio": { "$divide": ["$top_dev", "$total_apps"] }
}
},
{
"$sort": { "dominance_ratio": -1 }
}
]'
# Eksekusi pipeline dan ubah nama kolom
dev_concentration <- mongo_conn$aggregate(pipeline_dev_concentration) %>%
rename(
category = category,
total_apps = total_apps,
top_dev_apps = top_dev_apps,
dominance_ratio = dominance_ratio
)
# Tampilkan tabel
print(dev_concentration)
## _id total_apps category top_dev_apps
## 1 Libraries & Demo 1 Libraries & Demo 1
## 2 Tools 32 Tools 13
## 3 House & Home 4 House & Home 1
## 4 Productivity 40 Productivity 10
## 5 Communication 31 Communication 6
## 6 Trivia 16 Trivia 3
## 7 Auto & Vehicles 11 Auto & Vehicles 2
## 8 Educational 22 Educational 4
## 9 Music 11 Music 2
## 10 Role Playing 34 Role Playing 6
## 11 Racing 23 Racing 4
## 12 Casino 26 Casino 4
## 13 Personalization 15 Personalization 2
## 14 Photography 31 Photography 4
## 15 Food & Drink 8 Food & Drink 1
## 16 Beauty 8 Beauty 1
## 17 Sports 59 Sports 7
## 18 Dating 17 Dating 2
## 19 Art & Design 21 Art & Design 2
## 20 Travel & Local 32 Travel & Local 3
## 21 Word 34 Word 3
## 22 Health & Fitness 34 Health & Fitness 3
## 23 Video Players & Editors 23 Video Players & Editors 2
## 24 Casual 46 Casual 4
## 25 Business 23 Business 2
## 26 Maps & Navigation 23 Maps & Navigation 2
## 27 Books & Reference 12 Books & Reference 1
## 28 Entertainment 36 Entertainment 3
## 29 Medical 12 Medical 1
## 30 Adventure 37 Adventure 3
## 31 Arcade 25 Arcade 2
## 32 Board 38 Board 3
## 33 Action 41 Action 3
## 34 Card 41 Card 3
## 35 Comics 28 Comics 2
## 36 Events 14 Events 1
## 37 Social 28 Social 2
## 38 Weather 29 Weather 2
## 39 Strategy 47 Strategy 3
## 40 Simulation 48 Simulation 3
## 41 News & Magazines 17 News & Magazines 1
## 42 Puzzle 53 Puzzle 3
## 43 Education 40 Education 2
## 44 Finance 40 Finance 2
## 45 Music & Audio 25 Music & Audio 1
## 46 Shopping 52 Shopping 2
## 47 Parenting 29 Parenting 1
## 48 Lifestyle 35 Lifestyle 1
## dominance_ratio
## 1 1.00000000
## 2 0.40625000
## 3 0.25000000
## 4 0.25000000
## 5 0.19354839
## 6 0.18750000
## 7 0.18181818
## 8 0.18181818
## 9 0.18181818
## 10 0.17647059
## 11 0.17391304
## 12 0.15384615
## 13 0.13333333
## 14 0.12903226
## 15 0.12500000
## 16 0.12500000
## 17 0.11864407
## 18 0.11764706
## 19 0.09523810
## 20 0.09375000
## 21 0.08823529
## 22 0.08823529
## 23 0.08695652
## 24 0.08695652
## 25 0.08695652
## 26 0.08695652
## 27 0.08333333
## 28 0.08333333
## 29 0.08333333
## 30 0.08108108
## 31 0.08000000
## 32 0.07894737
## 33 0.07317073
## 34 0.07317073
## 35 0.07142857
## 36 0.07142857
## 37 0.07142857
## 38 0.06896552
## 39 0.06382979
## 40 0.06250000
## 41 0.05882353
## 42 0.05660377
## 43 0.05000000
## 44 0.05000000
## 45 0.04000000
## 46 0.03846154
## 47 0.03448276
## 48 0.02857143
# Visualisasi konsentrasi
ggplot(dev_concentration, aes(x = reorder(category, dominance_ratio), y = dominance_ratio)) +
geom_col(fill = "darkblue") +
coord_flip() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(
title = "Konsentrasi Developer Dominan per Kategori",
x = "Kategori",
y = "Proporsi Aplikasi oleh Developer Terbesar"
) +
theme_minimal()

10. Rasio Aplikasi Berbayar per Kategori
# Pipeline: Hitung jumlah aplikasi berbayar dan total aplikasi per kategori
pipeline_paid_ratio <- '[
{
"$addFields": {
"is_paid": {
"$cond": {
"if": { "$eq": ["$price", "Free"] },
"then": 0,
"else": 1
}
}
}
},
{
"$match": {
"category": { "$ne": null }
}
},
{
"$group": {
"_id": "$category",
"total_apps": { "$sum": 1 },
"paid_apps": { "$sum": "$is_paid" }
}
},
{
"$project": {
"category": "$_id",
"total_apps": 1,
"paid_apps": 1,
"paid_ratio": { "$divide": ["$paid_apps", "$total_apps"] }
}
},
{
"$sort": { "paid_ratio": -1 }
}
]'
# Eksekusi pipeline
paid_ratio <- mongo_conn$aggregate(pipeline_paid_ratio) %>%
rename(
category = category,
total_apps = total_apps,
paid_apps = paid_apps,
paid_ratio = paid_ratio
)
# Tampilkan tabel hasil
print(paid_ratio)
## _id total_apps paid_apps category
## 1 Social 28 28 Social
## 2 Racing 23 23 Racing
## 3 Card 41 41 Card
## 4 News & Magazines 17 17 News & Magazines
## 5 Video Players & Editors 23 23 Video Players & Editors
## 6 Maps & Navigation 23 23 Maps & Navigation
## 7 Business 23 23 Business
## 8 Music & Audio 25 25 Music & Audio
## 9 Parenting 29 29 Parenting
## 10 Personalization 15 15 Personalization
## 11 Auto & Vehicles 11 11 Auto & Vehicles
## 12 House & Home 4 4 House & Home
## 13 Puzzle 53 53 Puzzle
## 14 Word 34 34 Word
## 15 Productivity 40 40 Productivity
## 16 Strategy 47 47 Strategy
## 17 Comics 28 28 Comics
## 18 Food & Drink 8 8 Food & Drink
## 19 Communication 31 31 Communication
## 20 Libraries & Demo 1 1 Libraries & Demo
## 21 Education 40 40 Education
## 22 Photography 31 31 Photography
## 23 Trivia 16 16 Trivia
## 24 Finance 40 40 Finance
## 25 Casino 26 26 Casino
## 26 Travel & Local 32 32 Travel & Local
## 27 Tools 32 32 Tools
## 28 Action 41 41 Action
## 29 Beauty 8 8 Beauty
## 30 Lifestyle 35 35 Lifestyle
## 31 Casual 46 46 Casual
## 32 Educational 22 22 Educational
## 33 Adventure 37 37 Adventure
## 34 Medical 12 12 Medical
## 35 Simulation 48 48 Simulation
## 36 Music 11 11 Music
## 37 Arcade 25 25 Arcade
## 38 Board 38 38 Board
## 39 Sports 59 59 Sports
## 40 Dating 17 17 Dating
## 41 Books & Reference 12 12 Books & Reference
## 42 Weather 29 29 Weather
## 43 Health & Fitness 34 34 Health & Fitness
## 44 Art & Design 21 21 Art & Design
## 45 Shopping 52 52 Shopping
## 46 Events 14 14 Events
## 47 Entertainment 36 36 Entertainment
## 48 Role Playing 34 34 Role Playing
## paid_ratio
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
## 7 1
## 8 1
## 9 1
## 10 1
## 11 1
## 12 1
## 13 1
## 14 1
## 15 1
## 16 1
## 17 1
## 18 1
## 19 1
## 20 1
## 21 1
## 22 1
## 23 1
## 24 1
## 25 1
## 26 1
## 27 1
## 28 1
## 29 1
## 30 1
## 31 1
## 32 1
## 33 1
## 34 1
## 35 1
## 36 1
## 37 1
## 38 1
## 39 1
## 40 1
## 41 1
## 42 1
## 43 1
## 44 1
## 45 1
## 46 1
## 47 1
## 48 1
# Visualisasi bar chart
ggplot(paid_ratio, aes(x = reorder(category, paid_ratio), y = paid_ratio)) +
geom_col(fill = "darkorange") +
coord_flip() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(
title = "Rasio Aplikasi Berbayar per Kategori",
x = "Kategori",
y = "Proporsi Aplikasi Berbayar"
) +
theme_minimal()

11. Aplikasi Rating Tinggi tapi Download Rendah (Hidden Gems)
# Pipeline: Ambil aplikasi dengan rating tinggi & download rendah
pipeline_hidden_gems <- '[
{
"$addFields": {
"downloads_numeric": {
"$toDouble": {
"$replaceAll": {
"input": {
"$replaceAll": {
"input": { "$toString": "$number_of_downloads" },
"find": ",",
"replacement": ""
}
},
"find": "+",
"replacement": ""
}
}
},
"rating_numeric": { "$toDouble": "$rating" },
"reviews_numeric": { "$toDouble": "$number_of_reviews" }
}
},
{
"$match": {
"rating_numeric": { "$gte": 4.5 },
"downloads_numeric": { "$lte": 100000 },
"reviews_numeric": { "$gte": 50 }
}
},
{
"$project": {
"app_name": 1,
"category": 1,
"developer": 1,
"rating_numeric": 1,
"downloads_numeric": 1,
"reviews_numeric": 1
}
},
{
"$sort": { "rating_numeric": -1, "downloads_numeric": 1 }
},
{
"$limit": 10
}
]'
# Eksekusi pipeline
hidden_gems <- mongo_conn$aggregate(pipeline_hidden_gems)
# Tampilkan tabel hasil
print(hidden_gems)
## _id app_name
## 1 683b33a628b5b791a60d8736 GrowthDay: Daily Mindset Coach
## 2 683b33a628b5b791a60d863b Nav Business Financial Health
## 3 683b33a628b5b791a60d889c MyRadar Weather Radar Pro
## 4 683b33a628b5b791a60d8652 Gurukula Stories Comics Audios
## 5 683b33a628b5b791a60d86bf Partiful: Fun Party Invites
## 6 683b33a628b5b791a60d852a Dungeon Clawler
## 7 683b33a628b5b791a60d8584 Streets of Rage 4
## 8 683b33a628b5b791a60d864c CLZ Comics comic book database
## 9 683b33a628b5b791a60d8585 Dungeon Village 2
## 10 683b33a628b5b791a60d85b4 ReadEra Premium – ebook reader
## developer category downloads_numeric
## 1 GrowthDay Productivity 5e+04
## 2 Nav Technologies Finance 1e+05
## 3 ACME AtronOmatic LLC Weather 1e+05
## 4 Gurukula.com Sriram Raghavan Comics 1e+04
## 5 Partiful Lifestyle 1e+05
## 6 Stray Fawn Studio Role Playing 1e+05
## 7 Playdigious Arcade 1e+05
## 8 CLZ Comics 1e+05
## 9 Kairosoft Simulation 1e+05
## 10 READERA LLC Books & Reference 1e+05
## rating_numeric reviews_numeric
## 1 4.925234 359
## 2 4.890323 760
## 3 4.870394 3873
## 4 4.870000 99
## 5 4.866667 356
## 6 4.848342 547
## 7 4.836258 911
## 8 4.825050 4687
## 9 4.822581 4264
## 10 4.798913 2289
# Visualisasi (opsional)
ggplot(hidden_gems, aes(x = reorder(app_name, downloads_numeric), y = downloads_numeric, fill = rating_numeric)) +
geom_col() +
coord_flip() +
scale_y_continuous(labels = scales::comma_format()) +
scale_fill_gradient(low = "lightblue", high = "darkgreen") +
labs(
title = "Top 10 Hidden Gems: Rating Tinggi, Download Rendah",
x = "Aplikasi",
y = "Jumlah Download",
fill = "Rating"
) +
theme_minimal()
