Setup Awal dan Koneksi MongoDB

library(mongolite)
library(jsonlite)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
mongo_conn <- mongo(
  collection = "google_play_apps",
  db = "playstore_db",
  url = "mongodb://localhost:27017"
)

Agregasi dan Visualisasi

1. Top 10 Kategori Berdasarkan Jumlah Aplikasi

# Pipeline agregasi: Top 10 kategori berdasarkan jumlah aplikasi
top_kategori_pipeline <- '[
  {
    "$group": {
      "_id": "$category",
      "jumlah_aplikasi": { "$sum": 1 }
    }
  },
  {
    "$sort": { "jumlah_aplikasi": -1 }
  },
  {
    "$limit": 10
  }
]'

# Eksekusi pipeline dan ubah nama kolom
top_kategori <- mongo_conn$aggregate(top_kategori_pipeline) %>%
  rename(category = `_id`)

# Visualisasi dengan ggplot2
ggplot(top_kategori, aes(x = reorder(category, jumlah_aplikasi), y = jumlah_aplikasi)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(
    title = "Top 10 Kategori Berdasarkan Jumlah Aplikasi",
    x = "Kategori",
    y = "Jumlah Aplikasi"
  ) +
  theme_minimal()

2. Rata-rata Download vs Rating per Kategori

pipeline_avg_download_rating <- '[
  {
    "$addFields": {
      "downloads_numeric": {
        "$toDouble": {
          "$replaceAll": {
            "input": {
              "$replaceAll": {
                "input": { "$toString": "$number_of_downloads" },
                "find": ",",
                "replacement": ""
              }
            },
            "find": "+",
            "replacement": ""
          }
        }
      },
      "rating_numeric": { "$toDouble": "$rating" }
    }
  },
  {
    "$group": {
      "_id": "$category",
      "avg_downloads": { "$avg": "$downloads_numeric" },
      "avg_rating": { "$avg": "$rating_numeric" }
    }
  },
  {
    "$sort": { "avg_downloads": -1 }
  }
]'

# Eksekusi pipeline dan ubah nama kolom
avg_download_rating <- mongo_conn$aggregate(pipeline_avg_download_rating) %>%
  rename(category = `_id`)

# Visualisasi
ggplot(avg_download_rating, aes(x = avg_rating, y = avg_downloads, label = category)) +
  geom_point(color = "darkgreen", size = 3, alpha = 0.7) +
  geom_text(size = 3, hjust = 0, vjust = -0.5) +
  scale_y_continuous(labels = scales::label_number(scale_cut = scales::cut_short_scale())) +
  labs(
    title = "Rata-rata Download vs Rating per Kategori",
    x = "Rata-rata Rating",
    y = "Rata-rata Jumlah Download"
  ) +
  theme_minimal()

3. Developer dengan Total Download Terbesar (Top 10)

# Pipeline untuk menghitung total download per developer (Top 10)
pipeline_top_dev_download <- '[
  {
    "$addFields": {
      "downloads_numeric": {
        "$toDouble": {
          "$replaceAll": {
            "input": {
              "$replaceAll": {
                "input": { "$toString": "$number_of_downloads" },
                "find": ",",
                "replacement": ""
              }
            },
            "find": "+",
            "replacement": ""
          }
        }
      }
    }
  },
  {
    "$group": {
      "_id": "$developer",
      "total_downloads": { "$sum": "$downloads_numeric" }
    }
  },
  {
    "$sort": { "total_downloads": -1 }
  },
  {
    "$limit": 10
  }
]'

# Eksekusi pipeline dan ubah nama kolom
top_dev_download <- mongo_conn$aggregate(pipeline_top_dev_download) %>%
  rename(developer = `_id`)

# Visualisasi bar chart horizontal
ggplot(top_dev_download, aes(x = reorder(developer, total_downloads), y = total_downloads)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  scale_y_continuous(labels = scales::label_number(scale_cut = scales::cut_short_scale())) +
  labs(
    title = "Top 10 Developer dengan Total Download Terbesar",
    x = "Developer",
    y = "Total Download"
  ) +
  theme_minimal()

4. Aplikasi dengan Download Tinggi tapi Rating Rendah (Risiko UX)

# Pipeline untuk menemukan aplikasi populer tapi rating rendah
pipeline_risk_ux <- '[
  {
    "$addFields": {
      "downloads_numeric": {
        "$toDouble": {
          "$replaceAll": {
            "input": {
              "$replaceAll": {
                "input": { "$toString": "$number_of_downloads" },
                "find": ",",
                "replacement": ""
              }
            },
            "find": "+",
            "replacement": ""
          }
        }
      },
      "rating_numeric": { "$toDouble": "$rating" }
    }
  },
  {
    "$match": {
      "downloads_numeric": { "$gte": 10000000 },
      "rating_numeric": { "$lte": 3.5 }
    }
  },
  {
    "$project": {
      "app_name": 1,
      "developer": 1,
      "category": 1,
      "downloads_numeric": 1,
      "rating_numeric": 1
    }
  },
  {
    "$sort": { "downloads_numeric": -1 }
  },
  {
    "$limit": 10
  }
]'

# Eksekusi pipeline
apps_risk_ux <- mongo_conn$aggregate(pipeline_risk_ux)

# Tampilkan tabel hasil
print(apps_risk_ux)
##                         _id                       app_name
## 1  683b33a628b5b791a60d8745                    Google Maps
## 2  683b33a628b5b791a60d8664         LINE: Calls & Messages
## 3  683b33a628b5b791a60d8786 Flipboard:Your Social Magazine
## 4  683b33a628b5b791a60d86ee                 Health Connect
## 5  683b33a628b5b791a60d8566              happn: dating app
## 6  683b33a628b5b791a60d8837                            NFL
## 7  683b33a628b5b791a60d87c3             VSCO: Photo Editor
## 8  683b33a628b5b791a60d8887     AccuWeather: Weather Radar
## 9  683b33a628b5b791a60d8683               Google Classroom
## 10 683b33a628b5b791a60d8832       DAZN - Watch Live Sports
##                developer         category downloads_numeric rating_numeric
## 1             Google LLC   Travel & Local             1e+10       3.215799
## 2  LINE (LY Corporation)    Communication             1e+09       3.496082
## 3              Flipboard News & Magazines             5e+08       3.483313
## 4             Google LLC Health & Fitness             5e+08       3.147321
## 5                  happn           Dating             1e+08       3.337754
## 6    NFL Enterprises LLC           Sports             1e+08       3.007707
## 7                   VSCO      Photography             1e+08       3.471409
## 8            AccuWeather          Weather             1e+08       3.482612
## 9             Google LLC        Education             1e+08       2.551703
## 10                  DAZN           Sports             5e+07       2.304272
# Visualisasi (opsional)
ggplot(apps_risk_ux, aes(x = reorder(app_name, downloads_numeric), y = downloads_numeric, fill = rating_numeric)) +
  geom_col() +
  coord_flip() +
  scale_fill_gradient(low = "red", high = "orange") +
  scale_y_continuous(labels = scales::label_number(scale_cut = scales::cut_short_scale())) +
  labs(
    title = "Top 10 Aplikasi Populer dengan Rating Rendah (Risiko UX)",
    x = "Aplikasi",
    y = "Jumlah Download",
    fill = "Rating"
  ) +
  theme_minimal()

5. Korelasi Rating dan Download

# Pipeline untuk konversi numerik rating dan download
pipeline_rating_download <- '[
  {
    "$addFields": {
      "downloads_numeric": {
        "$toDouble": {
          "$replaceAll": {
            "input": {
              "$replaceAll": {
                "input": { "$toString": "$number_of_downloads" },
                "find": ",",
                "replacement": ""
              }
            },
            "find": "+",
            "replacement": ""
          }
        }
      },
      "rating_numeric": { "$toDouble": "$rating" }
    }
  },
  {
    "$match": {
      "downloads_numeric": { "$ne": null },
      "rating_numeric": { "$ne": null }
    }
  },
  {
    "$project": {
      "app_name": 1,
      "downloads_numeric": 1,
      "rating_numeric": 1
    }
  }
]'

# Eksekusi pipeline
rating_download_data <- mongo_conn$aggregate(pipeline_rating_download)

# Hitung korelasi Pearson
correlation <- cor(rating_download_data$rating_numeric, rating_download_data$downloads_numeric, method = "pearson", use = "complete.obs")
cat("Korelasi Pearson antara Rating dan Jumlah Download:", correlation, "\n")
## Korelasi Pearson antara Rating dan Jumlah Download: -0.01507175
# Visualisasi scatter plot
ggplot(rating_download_data, aes(x = rating_numeric, y = downloads_numeric)) +
  geom_point(alpha = 0.4, color = "steelblue") +
  geom_smooth(method = "lm", color = "darkred", se = FALSE) +
  scale_y_continuous(labels = scales::label_number(scale_cut = scales::cut_short_scale())) +
  labs(
    title = paste0("Korelasi Rating dan Jumlah Download (r = ", round(correlation, 3), ")"),
    x = "Rating",
    y = "Jumlah Download"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

6. Aplikasi dengan Rating Tertinggi per Kategori

# Pipeline: Ambil aplikasi dengan rating tertinggi per kategori
pipeline_top_rating_per_category <- '[
  {
    "$addFields": {
      "rating_numeric": { "$toDouble": "$rating" }
    }
  },
  {
    "$match": {
      "rating_numeric": { "$ne": null }
    }
  },
  {
    "$sort": {
      "category": 1,
      "rating_numeric": -1,
      "number_of_reviews": -1
    }
  },
  {
    "$group": {
      "_id": "$category",
      "top_app": { "$first": "$app_name" },
      "top_rating": { "$first": "$rating_numeric" },
      "developer": { "$first": "$developer" }
    }
  },
  {
    "$sort": { "top_rating": -1 }
  }
]'

# Eksekusi pipeline
top_apps_per_category <- mongo_conn$aggregate(pipeline_top_rating_per_category) %>%
  rename(
    category = `_id`,
    app_name = top_app,
    rating = top_rating
  )

# Tampilkan tabel
print(top_apps_per_category)
##                   category                       app_name   rating
## 1            Communication                     TC Connect 5.000000
## 2                Adventure           A Story of A Company 5.000000
## 3                     Word   Word Puzzle Time - Crossword 4.936412
## 4                   Puzzle      Fable Town: Merging Games 4.925234
## 5             Productivity GrowthDay: Daily Mindset Coach 4.925234
## 6                    Board  Tile Story: Match Puzzle Game 4.916149
## 7                Lifestyle        In Love while Parenting 4.902840
## 8                   Comics        Bonelli Digital Classic 4.900000
## 9                Education  The Happy Child Parenting App 4.894947
## 10                  Casual   Dream Family - Match 3 Games 4.890380
## 11                 Finance  Nav Business Financial Health 4.890323
## 12                 Weather Local Weather Forecast - Radar 4.888557
## 13                    Card    Gin Rummy Stars - Card Game 4.880276
## 14        Health & Fitness          Me+ Lifestyle Routine 4.876358
## 15               Parenting   Pregnancy App & Baby Tracker 4.876284
## 16                  Casino Jackpot Master™ Slots - Casino 4.868519
## 17                Shopping  Klarna | Shop now. Pay later. 4.863370
## 18          Travel & Local                Eurostar Trains 4.862745
## 19                  Trivia             Brain Test Premium 4.857143
## 20            Role Playing                Dungeon Clawler 4.848342
## 21                  Events Sympla: Ingressos para eventos 4.840000
## 22            Food & Drink                      Starbucks 4.837782
## 23                Strategy        Kingdom Rush Origins TD 4.836303
## 24                  Arcade              Streets of Rage 4 4.836258
## 25       Books & Reference         Libby, the Library App 4.833476
## 26           Entertainment    Tubi: Free Movies & Live TV 4.827010
## 27             Photography       AI Photo Editor - Polish 4.825959
## 28       Maps & Navigation Trainline: Train travel Europe 4.824301
## 29              Simulation              Dungeon Village 2 4.822581
## 30        News & Magazines                       Substack 4.820242
## 31                  Action                     Dead Cells 4.819661
## 32                  Sports    FotMob - Soccer Live Scores 4.817960
## 33           Music & Audio               DICE: Live Shows 4.808293
## 34                  Social  Widgetable: Besties & Couples 4.808152
## 35 Video Players & Editors        Video Player All Format 4.807214
## 36            Art & Design Canva: AI Photo & Video Editor 4.807056
## 37         Auto & Vehicles    Carvana: Buy/Sell Used Cars 4.792783
## 38                Business    Popl: Digital Business Card 4.746198
## 39            House & Home  Trulia: Homes For Sale & Rent 4.741194
## 40             Educational Balloon Pop Kids Learning Game 4.736287
## 41                   Tools TickTock-TikTok Live Wallpaper 4.729252
## 42                   Music            My Singing Monsters 4.729149
## 43                  Racing            CarX Highway Racing 4.703173
## 44         Personalization             Microsoft Launcher 4.701242
## 45                 Medical TATA 1mg Online Healthcare App 4.700787
## 46                  Dating  Lovely – Meet and Date Locals 4.597504
## 47                  Beauty Ulta Beauty: Makeup & Skincare 4.583845
## 48        Libraries & Demo       Libraries for developers 4.340000
##                                         developer
## 1                                       ThetaCore
## 2                            Buff Studio Co.,Ltd.
## 3                                 Word Puzzle Lab
## 4                                  REEF GAMES LTD
## 5                                       GrowthDay
## 6                  LinkDesks Classic Puzzle Games
## 7                                    HJB Ventures
## 8                          Sergio Bonelli Editore
## 9                                    HJB Ventures
## 10                                       Playflux
## 11                               Nav Technologies
## 12                           Vitality App Studios
## 13                                 Beach Bum Ltd.
## 14                              ENERJOY PTE. LTD.
## 15                                     BabyCenter
## 16                            Zeroo Gravity Games
## 17                          Klarna Bank AB (publ)
## 18                 Eurostar International Limited
## 19                                   Unico Studio
## 20                              Stray Fawn Studio
## 21                                         Sympla
## 22                       Starbucks Coffee Company
## 23                                 Ironhide Games
## 24                                    Playdigious
## 25                                OverDrive, Inc.
## 26                                        Tubi TV
## 27                                    InShot Inc.
## 28                                      trainline
## 29                                      Kairosoft
## 30                                  Substack Inc.
## 31                                    Playdigious
## 32                                      FotMob AS
## 33                                        DICE FM
## 34                   Happeny Technology Pte. Ltd.
## 35                                    InShot Inc.
## 36                                          Canva
## 37                                        Carvana
## 38                                        Popl Co
## 39                                         Trulia
## 40 Bebi Family: preschool learning games for kids
## 41                               TikTok Pte. Ltd.
## 42                            Big Blue Bubble Inc
## 43                              CarX Technologies
## 44                          Microsoft Corporation
## 45  Tata 1MG Healthcare Solutions Private Limited
## 46                                   Joyride GmbH
## 47                                      Ulta Inc.
## 48                              Desarrollo Droide
# (Opsional) Visualisasi
ggplot(top_apps_per_category, aes(x = reorder(category, rating), y = rating, fill = category)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  labs(
    title = "Aplikasi dengan Rating Tertinggi per Kategori",
    x = "Kategori",
    y = "Rating Tertinggi"
  ) +
  theme_minimal()

7. Aplikasi dengan Download Tertinggi per Kategori

# Pipeline untuk menemukan aplikasi dengan download tertinggi per kategori
pipeline_top_download_per_category <- '[
  {
    "$addFields": {
      "downloads_numeric": {
        "$toDouble": {
          "$replaceAll": {
            "input": {
              "$replaceAll": {
                "input": { "$toString": "$number_of_downloads" },
                "find": ",",
                "replacement": ""
              }
            },
            "find": "+",
            "replacement": ""
          }
        }
      }
    }
  },
  {
    "$match": {
      "downloads_numeric": { "$ne": null }
    }
  },
  {
    "$sort": {
      "category": 1,
      "downloads_numeric": -1
    }
  },
  {
    "$group": {
      "_id": "$category",
      "top_app": { "$first": "$app_name" },
      "top_download": { "$first": "$downloads_numeric" },
      "developer": { "$first": "$developer" }
    }
  },
  {
    "$sort": { "top_download": -1 }
  }
]'

# Eksekusi pipeline
top_download_apps <- mongo_conn$aggregate(pipeline_top_download_per_category) %>%
  rename(
    category = `_id`,
    app_name = top_app,
    downloads = top_download
  )

# Tampilkan tabel hasil
print(top_download_apps)
##                   category                       app_name downloads
## 1  Video Players & Editors                      Google TV     1e+10
## 2           Travel & Local                    Google Maps     1e+10
## 3                   Social                       Facebook     1e+10
## 4             Productivity                   Google Drive     1e+10
## 5                    Tools   Gboard - the Google Keyboard     1e+10
## 6            Communication                          Gmail     1e+10
## 7              Photography                  Google Photos     1e+10
## 8            Entertainment              Google Play Games     5e+09
## 9            Music & Audio                  YouTube Music     5e+09
## 10                  Sports                    8 Ball Pool     1e+09
## 11        News & Magazines                              X     1e+09
## 12                  Racing              Hill Climb Racing     1e+09
## 13                Business                 Zoom Workplace     1e+09
## 14                  Arcade                 Subway Surfers     1e+09
## 15       Books & Reference Google Play Books & Audiobooks     1e+09
## 16                   Board                     Ludo King®     1e+09
## 17         Personalization                     Wallpapers     1e+09
## 18               Adventure                         Roblox     1e+09
## 19                 Finance                  Google Wallet     1e+09
## 20                  Casual               Candy Crush Saga     1e+09
## 21                  Action   Temple Run 2: Endless Escape     1e+09
## 22               Lifestyle                    Google Home     1e+09
## 23                 Weather            Weather - By Xiaomi     1e+09
## 24        Health & Fitness                 Samsung Health     1e+09
## 25       Maps & Navigation Waze Navigation & Live Traffic     5e+08
## 26                   Music    Magic Tiles 3™ - Piano Game     5e+08
## 27                  Puzzle                   Block Blast!     5e+08
## 28                Strategy                 Clash of Clans     5e+08
## 29            Art & Design Canva: AI Photo & Video Editor     5e+08
## 30                Shopping  Temu: Shop Like a Billionaire     5e+08
## 31               Education           Samsung Global Goals     5e+08
## 32              Simulation   Barbie Dreamhouse Adventures     1e+08
## 33             Educational                Toca Boca World     1e+08
## 34                  Comics WEBTOON: Manga, Comics, Manhwa     1e+08
## 35                    Word                     Wordscapes     1e+08
## 36               Parenting                   Family Space     1e+08
## 37                    Card                          UNO!™     1e+08
## 38                  Trivia   Trivia Crack: Fun Quiz Games     1e+08
## 39                  Dating Tinder Dating App: Chat & Date     1e+08
## 40            Role Playing                 Avatar World ®     1e+08
## 41            Food & Drink  DoorDash: Food, Grocery, More     5e+07
## 42                  Casino Slotomania™ Slots Casino Games     5e+07
## 43            House & Home Realtor.com Real Estate & Rent     1e+07
## 44         Auto & Vehicles    Cars.com: Buy and sell cars     1e+07
## 45                 Medical         WebMD: Symptom Checker     1e+07
## 46                  Events   StubHub - Live Event Tickets     1e+07
## 47                  Beauty Sephora: Buy Makeup & Skincare     5e+06
## 48        Libraries & Demo       Libraries for developers     1e+05
##                         developer
## 1                      Google LLC
## 2                      Google LLC
## 3            Meta Platforms, Inc.
## 4                      Google LLC
## 5                      Google LLC
## 6                      Google LLC
## 7                      Google LLC
## 8                      Google LLC
## 9                      Google LLC
## 10                   Miniclip.com
## 11                        X Corp.
## 12                     Fingersoft
## 13                       zoom.com
## 14                     SYBO Games
## 15                     Google LLC
## 16                       Gametion
## 17                     Google LLC
## 18             Roblox Corporation
## 19                     Google LLC
## 20                           King
## 21                 Imangi Studios
## 22                     Google LLC
## 23                    Xiaomi Inc.
## 24 Samsung Electronics Co.,  Ltd.
## 25                           Waze
## 26             AMANOTES PTE. LTD.
## 27                   HungryStudio
## 28                      Supercell
## 29                          Canva
## 30                           Temu
## 31 Samsung Electronics Co.,  Ltd.
## 32                  Budge Studios
## 33                      Toca Boca
## 34                  NAVER WEBTOON
## 35                      PeopleFun
## 36         Motorola Mobility LLC.
## 37              Mattel163 Limited
## 38                        etermax
## 39                     Tinder LLC
## 40                     Pazu Games
## 41                       DoorDash
## 42                       Playtika
## 43                   realtor.com®
## 44                       Cars.com
## 45                     WebMD, LLC
## 46                        StubHub
## 47              Sephora USA, Inc.
## 48              Desarrollo Droide
# (Opsional) Visualisasi
ggplot(top_download_apps, aes(x = reorder(category, downloads), y = downloads, fill = category)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  scale_y_continuous(labels = scales::comma_format()) +
  labs(
    title = "Aplikasi dengan Jumlah Download Tertinggi per Kategori",
    x = "Kategori",
    y = "Jumlah Download"
  ) +
  theme_minimal()

8. Volatilitas Rating Antar Aplikasi dalam Satu Kategori

# Pipeline: Hitung standar deviasi rating per kategori
pipeline_rating_volatility <- '[
  {
    "$addFields": {
      "rating_numeric": { "$toDouble": "$rating" }
    }
  },
  {
    "$match": {
      "rating_numeric": { "$ne": null }
    }
  },
  {
    "$group": {
      "_id": "$category",
      "avg_rating": { "$avg": "$rating_numeric" },
      "sd_rating": { "$stdDevPop": "$rating_numeric" },
      "min_rating": { "$min": "$rating_numeric" },
      "max_rating": { "$max": "$rating_numeric" },
      "count": { "$sum": 1 }
    }
  },
  {
    "$match": {
      "count": { "$gte": 10 }
    }
  },
  {
    "$sort": { "sd_rating": -1 }
  }
]'

# Eksekusi pipeline dan ubah nama kolom
rating_volatility <- mongo_conn$aggregate(pipeline_rating_volatility) %>%
  rename(
    category = `_id`
  )

# Tampilkan hasil
print(rating_volatility)
##                   category avg_rating sd_rating min_rating max_rating count
## 1                Parenting   3.767982 1.4218594   0.000000   4.876284    27
## 2                  Medical   3.942047 1.3304026   0.000000   4.700787    12
## 3                   Trivia   4.155185 1.1304184   0.000000   4.857143    16
## 4         News & Magazines   4.018238 1.1044887   0.000000   4.820242    17
## 5            Entertainment   3.912152 1.0808625   0.000000   4.827010    35
## 6                 Business   4.041068 1.0067718   0.000000   4.746198    21
## 7             Art & Design   4.224495 0.9738677   0.000000   4.807056    21
## 8         Health & Fitness   4.054604 0.9223663   0.000000   4.876358    31
## 9                   Puzzle   4.413690 0.8955169   0.000000   4.925234    53
## 10                  Social   4.047273 0.8731352   0.000000   4.808152    27
## 11           Communication   4.195740 0.8464970   0.000000   5.000000    30
## 12               Lifestyle   4.213791 0.8440310   0.000000   4.902840    35
## 13                   Tools   4.209271 0.8390360   0.000000   4.729252    30
## 14            Role Playing   4.309503 0.8238666   0.000000   4.848342    34
## 15                    Word   4.496225 0.8218532   0.000000   4.936412    33
## 16                  Events   4.108783 0.8169071   2.300652   4.840000    12
## 17                 Finance   4.357201 0.7883307   0.000000   4.890323    40
## 18                    Card   4.365843 0.7682717   0.000000   4.880276    39
## 19            Productivity   4.438780 0.7601084   0.000000   4.925234    40
## 20                  Comics   4.154610 0.6407544   2.425743   4.900000    27
## 21               Adventure   4.364424 0.5533991   1.653021   5.000000    37
## 22          Travel & Local   4.303415 0.5127988   2.605769   4.862745    31
## 23                  Dating   3.863097 0.4875795   2.938393   4.597504    17
## 24               Education   4.380218 0.4724776   2.551703   4.894947    37
## 25                  Sports   4.337591 0.4522180   2.304272   4.817960    58
## 26         Auto & Vehicles   4.379808 0.4374807   3.280899   4.792783    11
## 27       Maps & Navigation   4.163358 0.4308544   3.093313   4.824301    21
## 28                 Weather   4.543374 0.4290824   2.899666   4.888557    29
## 29             Photography   4.285735 0.4155738   2.900730   4.825959    31
## 30             Educational   4.269345 0.3884863   3.158416   4.736287    22
## 31                   Board   4.452036 0.3771551   2.722944   4.916149    37
## 32                Strategy   4.364947 0.3535338   3.247901   4.836303    46
## 33                Shopping   4.458543 0.3477599   3.461997   4.863370    46
## 34              Simulation   4.411816 0.3272977   2.765270   4.822581    48
## 35         Personalization   4.391174 0.3096564   3.695881   4.701242    15
## 36       Books & Reference   4.533771 0.3080158   3.892820   4.833476    12
## 37           Music & Audio   4.448196 0.3043929   3.544713   4.808293    25
## 38 Video Players & Editors   4.290024 0.3008915   3.802075   4.807214    23
## 39                  Action   4.329678 0.2989127   3.437535   4.819661    41
## 40                  Casino   4.605110 0.2403233   3.830189   4.868519    26
## 41                  Casual   4.510622 0.2227701   4.090000   4.890380    44
## 42                  Arcade   4.515642 0.2218313   3.913229   4.836258    25
## 43                   Music   4.418079 0.1639742   4.081081   4.729149    11
## 44                  Racing   4.495370 0.1211388   4.245698   4.703173    23
# Visualisasi: Volatilitas Rating (Standar Deviasi)
ggplot(rating_volatility, aes(x = reorder(category, sd_rating), y = sd_rating)) +
  geom_col(fill = "tomato") +
  coord_flip() +
  labs(
    title = "Volatilitas Rating Antar Aplikasi dalam Satu Kategori",
    x = "Kategori",
    y = "Standar Deviasi Rating"
  ) +
  theme_minimal()

9. Konsentrasi Developer Dominan per Kategori

# Pipeline: Hitung jumlah aplikasi per developer per kategori
pipeline_dev_concentration <- '[
  {
    "$match": {
      "category": { "$ne": null },
      "developer": { "$ne": null }
    }
  },
  {
    "$group": {
      "_id": { "category": "$category", "developer": "$developer" },
      "app_count": { "$sum": 1 }
    }
  },
  {
    "$group": {
      "_id": "$_id.category",
      "total_apps": { "$sum": "$app_count" },
      "top_dev": { "$max": "$app_count" }
    }
  },
  {
    "$project": {
      "category": "$_id",
      "total_apps": 1,
      "top_dev_apps": "$top_dev",
      "dominance_ratio": { "$divide": ["$top_dev", "$total_apps"] }
    }
  },
  {
    "$sort": { "dominance_ratio": -1 }
  }
]'

# Eksekusi pipeline dan ubah nama kolom
dev_concentration <- mongo_conn$aggregate(pipeline_dev_concentration) %>%
  rename(
    category = category,
    total_apps = total_apps,
    top_dev_apps = top_dev_apps,
    dominance_ratio = dominance_ratio
  )

# Tampilkan tabel
print(dev_concentration)
##                        _id total_apps                category top_dev_apps
## 1         Libraries & Demo          1        Libraries & Demo            1
## 2                    Tools         32                   Tools           13
## 3             House & Home          4            House & Home            1
## 4             Productivity         40            Productivity           10
## 5            Communication         31           Communication            6
## 6                   Trivia         16                  Trivia            3
## 7          Auto & Vehicles         11         Auto & Vehicles            2
## 8              Educational         22             Educational            4
## 9                    Music         11                   Music            2
## 10            Role Playing         34            Role Playing            6
## 11                  Racing         23                  Racing            4
## 12                  Casino         26                  Casino            4
## 13         Personalization         15         Personalization            2
## 14             Photography         31             Photography            4
## 15            Food & Drink          8            Food & Drink            1
## 16                  Beauty          8                  Beauty            1
## 17                  Sports         59                  Sports            7
## 18                  Dating         17                  Dating            2
## 19            Art & Design         21            Art & Design            2
## 20          Travel & Local         32          Travel & Local            3
## 21                    Word         34                    Word            3
## 22        Health & Fitness         34        Health & Fitness            3
## 23 Video Players & Editors         23 Video Players & Editors            2
## 24                  Casual         46                  Casual            4
## 25                Business         23                Business            2
## 26       Maps & Navigation         23       Maps & Navigation            2
## 27       Books & Reference         12       Books & Reference            1
## 28           Entertainment         36           Entertainment            3
## 29                 Medical         12                 Medical            1
## 30               Adventure         37               Adventure            3
## 31                  Arcade         25                  Arcade            2
## 32                   Board         38                   Board            3
## 33                  Action         41                  Action            3
## 34                    Card         41                    Card            3
## 35                  Comics         28                  Comics            2
## 36                  Events         14                  Events            1
## 37                  Social         28                  Social            2
## 38                 Weather         29                 Weather            2
## 39                Strategy         47                Strategy            3
## 40              Simulation         48              Simulation            3
## 41        News & Magazines         17        News & Magazines            1
## 42                  Puzzle         53                  Puzzle            3
## 43               Education         40               Education            2
## 44                 Finance         40                 Finance            2
## 45           Music & Audio         25           Music & Audio            1
## 46                Shopping         52                Shopping            2
## 47               Parenting         29               Parenting            1
## 48               Lifestyle         35               Lifestyle            1
##    dominance_ratio
## 1       1.00000000
## 2       0.40625000
## 3       0.25000000
## 4       0.25000000
## 5       0.19354839
## 6       0.18750000
## 7       0.18181818
## 8       0.18181818
## 9       0.18181818
## 10      0.17647059
## 11      0.17391304
## 12      0.15384615
## 13      0.13333333
## 14      0.12903226
## 15      0.12500000
## 16      0.12500000
## 17      0.11864407
## 18      0.11764706
## 19      0.09523810
## 20      0.09375000
## 21      0.08823529
## 22      0.08823529
## 23      0.08695652
## 24      0.08695652
## 25      0.08695652
## 26      0.08695652
## 27      0.08333333
## 28      0.08333333
## 29      0.08333333
## 30      0.08108108
## 31      0.08000000
## 32      0.07894737
## 33      0.07317073
## 34      0.07317073
## 35      0.07142857
## 36      0.07142857
## 37      0.07142857
## 38      0.06896552
## 39      0.06382979
## 40      0.06250000
## 41      0.05882353
## 42      0.05660377
## 43      0.05000000
## 44      0.05000000
## 45      0.04000000
## 46      0.03846154
## 47      0.03448276
## 48      0.02857143
# Visualisasi konsentrasi
ggplot(dev_concentration, aes(x = reorder(category, dominance_ratio), y = dominance_ratio)) +
  geom_col(fill = "darkblue") +
  coord_flip() +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  labs(
    title = "Konsentrasi Developer Dominan per Kategori",
    x = "Kategori",
    y = "Proporsi Aplikasi oleh Developer Terbesar"
  ) +
  theme_minimal()


10. Rasio Aplikasi Berbayar per Kategori

# Pipeline: Hitung jumlah aplikasi berbayar dan total aplikasi per kategori
pipeline_paid_ratio <- '[
  {
    "$addFields": {
      "is_paid": {
        "$cond": {
          "if": { "$eq": ["$price", "Free"] },
          "then": 0,
          "else": 1
        }
      }
    }
  },
  {
    "$match": {
      "category": { "$ne": null }
    }
  },
  {
    "$group": {
      "_id": "$category",
      "total_apps": { "$sum": 1 },
      "paid_apps": { "$sum": "$is_paid" }
    }
  },
  {
    "$project": {
      "category": "$_id",
      "total_apps": 1,
      "paid_apps": 1,
      "paid_ratio": { "$divide": ["$paid_apps", "$total_apps"] }
    }
  },
  {
    "$sort": { "paid_ratio": -1 }
  }
]'

# Eksekusi pipeline
paid_ratio <- mongo_conn$aggregate(pipeline_paid_ratio) %>%
  rename(
    category = category,
    total_apps = total_apps,
    paid_apps = paid_apps,
    paid_ratio = paid_ratio
  )

# Tampilkan tabel hasil
print(paid_ratio)
##                        _id total_apps paid_apps                category
## 1                   Social         28        28                  Social
## 2                   Racing         23        23                  Racing
## 3                     Card         41        41                    Card
## 4         News & Magazines         17        17        News & Magazines
## 5  Video Players & Editors         23        23 Video Players & Editors
## 6        Maps & Navigation         23        23       Maps & Navigation
## 7                 Business         23        23                Business
## 8            Music & Audio         25        25           Music & Audio
## 9                Parenting         29        29               Parenting
## 10         Personalization         15        15         Personalization
## 11         Auto & Vehicles         11        11         Auto & Vehicles
## 12            House & Home          4         4            House & Home
## 13                  Puzzle         53        53                  Puzzle
## 14                    Word         34        34                    Word
## 15            Productivity         40        40            Productivity
## 16                Strategy         47        47                Strategy
## 17                  Comics         28        28                  Comics
## 18            Food & Drink          8         8            Food & Drink
## 19           Communication         31        31           Communication
## 20        Libraries & Demo          1         1        Libraries & Demo
## 21               Education         40        40               Education
## 22             Photography         31        31             Photography
## 23                  Trivia         16        16                  Trivia
## 24                 Finance         40        40                 Finance
## 25                  Casino         26        26                  Casino
## 26          Travel & Local         32        32          Travel & Local
## 27                   Tools         32        32                   Tools
## 28                  Action         41        41                  Action
## 29                  Beauty          8         8                  Beauty
## 30               Lifestyle         35        35               Lifestyle
## 31                  Casual         46        46                  Casual
## 32             Educational         22        22             Educational
## 33               Adventure         37        37               Adventure
## 34                 Medical         12        12                 Medical
## 35              Simulation         48        48              Simulation
## 36                   Music         11        11                   Music
## 37                  Arcade         25        25                  Arcade
## 38                   Board         38        38                   Board
## 39                  Sports         59        59                  Sports
## 40                  Dating         17        17                  Dating
## 41       Books & Reference         12        12       Books & Reference
## 42                 Weather         29        29                 Weather
## 43        Health & Fitness         34        34        Health & Fitness
## 44            Art & Design         21        21            Art & Design
## 45                Shopping         52        52                Shopping
## 46                  Events         14        14                  Events
## 47           Entertainment         36        36           Entertainment
## 48            Role Playing         34        34            Role Playing
##    paid_ratio
## 1           1
## 2           1
## 3           1
## 4           1
## 5           1
## 6           1
## 7           1
## 8           1
## 9           1
## 10          1
## 11          1
## 12          1
## 13          1
## 14          1
## 15          1
## 16          1
## 17          1
## 18          1
## 19          1
## 20          1
## 21          1
## 22          1
## 23          1
## 24          1
## 25          1
## 26          1
## 27          1
## 28          1
## 29          1
## 30          1
## 31          1
## 32          1
## 33          1
## 34          1
## 35          1
## 36          1
## 37          1
## 38          1
## 39          1
## 40          1
## 41          1
## 42          1
## 43          1
## 44          1
## 45          1
## 46          1
## 47          1
## 48          1
# Visualisasi bar chart
ggplot(paid_ratio, aes(x = reorder(category, paid_ratio), y = paid_ratio)) +
  geom_col(fill = "darkorange") +
  coord_flip() +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  labs(
    title = "Rasio Aplikasi Berbayar per Kategori",
    x = "Kategori",
    y = "Proporsi Aplikasi Berbayar"
  ) +
  theme_minimal()


11. Aplikasi Rating Tinggi tapi Download Rendah (Hidden Gems)

# Pipeline: Ambil aplikasi dengan rating tinggi & download rendah
pipeline_hidden_gems <- '[
  {
    "$addFields": {
      "downloads_numeric": {
        "$toDouble": {
          "$replaceAll": {
            "input": {
              "$replaceAll": {
                "input": { "$toString": "$number_of_downloads" },
                "find": ",",
                "replacement": ""
              }
            },
            "find": "+",
            "replacement": ""
          }
        }
      },
      "rating_numeric": { "$toDouble": "$rating" },
      "reviews_numeric": { "$toDouble": "$number_of_reviews" }
    }
  },
  {
    "$match": {
      "rating_numeric": { "$gte": 4.5 },
      "downloads_numeric": { "$lte": 100000 },
      "reviews_numeric": { "$gte": 50 }
    }
  },
  {
    "$project": {
      "app_name": 1,
      "category": 1,
      "developer": 1,
      "rating_numeric": 1,
      "downloads_numeric": 1,
      "reviews_numeric": 1
    }
  },
  {
    "$sort": { "rating_numeric": -1, "downloads_numeric": 1 }
  },
  {
    "$limit": 10
  }
]'

# Eksekusi pipeline
hidden_gems <- mongo_conn$aggregate(pipeline_hidden_gems)

# Tampilkan tabel hasil
print(hidden_gems)
##                         _id                       app_name
## 1  683b33a628b5b791a60d8736 GrowthDay: Daily Mindset Coach
## 2  683b33a628b5b791a60d863b  Nav Business Financial Health
## 3  683b33a628b5b791a60d889c      MyRadar Weather Radar Pro
## 4  683b33a628b5b791a60d8652 Gurukula Stories Comics Audios
## 5  683b33a628b5b791a60d86bf    Partiful: Fun Party Invites
## 6  683b33a628b5b791a60d852a                Dungeon Clawler
## 7  683b33a628b5b791a60d8584              Streets of Rage 4
## 8  683b33a628b5b791a60d864c CLZ Comics comic book database
## 9  683b33a628b5b791a60d8585              Dungeon Village 2
## 10 683b33a628b5b791a60d85b4 ReadEra Premium – ebook reader
##                       developer          category downloads_numeric
## 1                     GrowthDay      Productivity             5e+04
## 2              Nav Technologies           Finance             1e+05
## 3          ACME AtronOmatic LLC           Weather             1e+05
## 4  Gurukula.com Sriram Raghavan            Comics             1e+04
## 5                      Partiful         Lifestyle             1e+05
## 6             Stray Fawn Studio      Role Playing             1e+05
## 7                   Playdigious            Arcade             1e+05
## 8                           CLZ            Comics             1e+05
## 9                     Kairosoft        Simulation             1e+05
## 10                  READERA LLC Books & Reference             1e+05
##    rating_numeric reviews_numeric
## 1        4.925234             359
## 2        4.890323             760
## 3        4.870394            3873
## 4        4.870000              99
## 5        4.866667             356
## 6        4.848342             547
## 7        4.836258             911
## 8        4.825050            4687
## 9        4.822581            4264
## 10       4.798913            2289
# Visualisasi (opsional)
ggplot(hidden_gems, aes(x = reorder(app_name, downloads_numeric), y = downloads_numeric, fill = rating_numeric)) +
  geom_col() +
  coord_flip() +
  scale_y_continuous(labels = scales::comma_format()) +
  scale_fill_gradient(low = "lightblue", high = "darkgreen") +
  labs(
    title = "Top 10 Hidden Gems: Rating Tinggi, Download Rendah",
    x = "Aplikasi",
    y = "Jumlah Download",
    fill = "Rating"
  ) +
  theme_minimal()