needed <- c("dplyr", "ggplot2", "plotly", "pwr", "effsize", "knitr", "kableExtra")
new_pkg <- needed[!needed %in% installed.packages()[, "Package"]]
if (length(new_pkg) > 0) {
install.packages(new_pkg, repos = "https://cloud.r-project.org", type = "binary")
}
library(dplyr); library(ggplot2); library(plotly)
library(pwr); library(effsize); library(knitr); library(kableExtra)df_raw <- read.csv("C:/Users/IU Student/Downloads/googleplaystore.csv - Copy/googleplaystore.csv", stringsAsFactors = FALSE)
df <- df_raw %>%
mutate(Rating = suppressWarnings(as.numeric(Rating))) %>%
filter(Type %in% c("Free", "Paid"), !is.na(Rating), Rating >= 1, Rating <= 5) %>%
mutate(
AudienceGroup = case_when(
Content.Rating %in% c("Mature 17+", "Adults only 18+") ~ "Mature",
Content.Rating == "Everyone" ~ "Everyone",
TRUE ~ NA_character_
)
)
cat("Usable rows:", nrow(df))## Usable rows: 9366
The Google Play Store dataset covers ~10,000 Android apps with ratings, install counts, pricing, and content classifications. Two questions occured to me when going over this dataset:
\[H_0: \mu_{\text{Free}} = \mu_{\text{Paid}}\]
The mean star rating is the same for free and paid apps. The alternative is two-sided (\(H_1: \mu_{\text{Free}} \neq \mu_{\text{Paid}}\)) because there is no prior theoretical reason to expect one direction over the other.
free_r <- df %>% filter(Type == "Free") %>% pull(Rating)
paid_r <- df %>% filter(Type == "Paid") %>% pull(Rating)
ss <- pwr.t.test(d = 0.20, sig.level = 0.05, power = 0.80,
type = "two.sample", alternative = "two.sided")
cat("Required n per group:", ceiling(ss$n), "\n")## Required n per group: 394
## Available -- Free: 8719 | Paid: 647
## Sufficient data? YES
We have adequate data to do the test.
t_res <- t.test(free_r, paid_r, alternative = "two.sided", var.equal = FALSE)
cd <- cohen.d(free_r, paid_r)
cat("t =", round(t_res$statistic, 3), "| p =", format(t_res$p.value, scientific = TRUE), "\n")## t = -3.62 | p = 3.149307e-04
## Mean (Free): 4.186 | Mean (Paid): 4.267
## Cohen's D: 0.156 ( 1 effect)
My interpretation: I think I can reject \(H_0\) — paid apps are rated slightly higher. However, with Cohen’s D ≈ 0.156 the effect is negligible in practice. A fraction of a star difference will not meaningfully shift a consumer’s decision. Pricing alone is a poor proxy for quality.
mean_free <- mean(free_r)
mean_paid <- mean(paid_r)
dens_free <- density(free_r, bw = "SJ", from = 1, to = 5)
dens_paid <- density(paid_r, bw = "SJ", from = 1, to = 5)
fig1 <- plot_ly() %>%
add_trace(x = dens_free$x, y = dens_free$y, type = "scatter", mode = "lines",
fill = "tozeroy", fillcolor = "rgba(0,180,216,0.35)",
line = list(color = "#0096C7", width = 2.5), name = "Free Apps",
hovertemplate = "Rating: %{x:.2f}<br>Density: %{y:.4f}<extra>Free</extra>") %>%
add_trace(x = dens_paid$x, y = dens_paid$y, type = "scatter", mode = "lines",
fill = "tozeroy", fillcolor = "rgba(230,57,70,0.35)",
line = list(color = "#E63946", width = 2.5), name = "Paid Apps",
hovertemplate = "Rating: %{x:.2f}<br>Density: %{y:.4f}<extra>Paid</extra>") %>%
add_segments(x = mean_free, xend = mean_free, y = 0, yend = max(dens_free$y) * 1.05,
line = list(color = "#023E8A", width = 2, dash = "dash"),
name = paste0("Mean Free = ", round(mean_free, 2))) %>%
add_segments(x = mean_paid, xend = mean_paid, y = 0, yend = max(dens_paid$y) * 1.05,
line = list(color = "#9D0208", width = 2, dash = "dash"),
name = paste0("Mean Paid = ", round(mean_paid, 2))) %>%
add_annotations(
x = 2.1, y = max(dens_free$y) * 0.85,
text = paste0("<b>Welch t-test</b><br>p = ",
format(t_res$p.value, digits = 3, scientific = TRUE),
"<br>Cohen's D = ", round(abs(cd$estimate), 3),
" (", cd$magnitude, ")"),
showarrow = FALSE, font = list(size = 11),
bgcolor = "rgba(255,255,255,0.85)", bordercolor = "#aaa", borderwidth = 1
) %>%
layout(
title = list(text = "<b>Rating Distribution: Free vs. Paid Apps</b>",
font = list(size = 17, color = "#1a1a2e")),
xaxis = list(title = "<b>User Rating (1-5 Stars)</b>", range = c(1, 5),
gridcolor = "#e0e0e0"),
yaxis = list(title = "<b>Density</b>", gridcolor = "#e0e0e0"),
legend = list(orientation = "h", x = 0.01, y = 1.08),
plot_bgcolor = "#FAFAFA", paper_bgcolor = "#FFFFFF"
)
fig1The teal (Free) and coral (Paid) density curves overlap almost entirely. The tiny mean difference is only visible by comparing the two dashed vertical lines. This confirms that while the difference is statistically real, it is practically invisible, statistical significance and practical significance are not the same thing.
\[H_0: P(\text{Paid} \mid \text{Mature}) = P(\text{Paid} \mid \text{Everyone})\]
Content rating and payment model are independent. Fisher’s Exact Test is purpose-built for exactly this computing the exact hypergeometric probability of a contingency table as extreme as what we observed, under the assumption that the two categorical variables share no relationship.
h2_df <- df %>%
filter(AudienceGroup %in% c("Everyone", "Mature")) %>%
mutate(Type_bin = ifelse(Type == "Paid", "Paid", "Free"))
ct <- table(h2_df$AudienceGroup, h2_df$Type_bin)
ct <- ct[c("Everyone", "Mature"), c("Free", "Paid")]
cat("-- Contingency Table --\n"); print(ct)## -- Contingency Table --
##
## Free Paid
## Everyone 6868 552
## Mature 447 17
##
## -- Row Proportions --
##
## Free Paid
## Everyone 0.9256 0.0744
## Mature 0.9634 0.0366
f_res <- fisher.test(ct, alternative = "two.sided")
cat("\nFisher p-value:", format(f_res$p.value, scientific = TRUE))##
## Fisher p-value: 1.550224e-03
##
## Odds Ratio: 0.473
##
## 95% CI (OR): [ 0.271 , 0.774 ]
My interpretation: The p-value is essentially zero. The odds ratio of 0.473 tells us Mature-rated apps have lower odds of being paid. This makes intuitive sense: adult-targeted apps tend to monetize through advertising and in-app purchases rather than upfront pricing, because a paywall discourages the casual browsers who make up most of that audience.
h2_plot <- h2_df %>%
group_by(AudienceGroup, Type_bin) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(AudienceGroup) %>%
mutate(pct = round(count / sum(count) * 100, 2)) %>%
ungroup()
fig2 <- plot_ly(
data = h2_plot,
x = ~AudienceGroup,
y = ~pct,
color = ~Type_bin,
colors = c("Free" = "#00B4D8", "Paid" = "#F4A261"),
type = "bar",
text = ~paste0("<b>", AudienceGroup, " -- ", Type_bin, "</b><br>",
"Count: ", count, "<br>Proportion: ", pct, "%"),
hoverinfo = "text",
marker = list(line = list(color = "white", width = 1.5))
) %>%
layout(
barmode = "group",
title = list(text = "<b>Proportion of Free vs. Paid Apps by Audience Group</b>",
font = list(size = 17, color = "#1a1a2e")),
xaxis = list(title = "<b>Content Rating Group</b>"),
yaxis = list(title = "<b>Percentage of Apps (%)</b>",
gridcolor = "#e0e0e0", range = c(0, 105)),
legend = list(title = list(text = "<b>App Type</b>"),
orientation = "h", x = 0.35, y = 1.08),
annotations = list(list(
x = 1.35, y = 90, xref = "x", yref = "y",
text = paste0("<b>Fisher's Exact</b><br>p ~= 0<br>OR = ",
round(f_res$estimate, 3)),
showarrow = FALSE, font = list(size = 11),
bgcolor = "rgba(255,255,255,0.9)", bordercolor = "#aaa", borderwidth = 1
)),
plot_bgcolor = "#FAFAFA", paper_bgcolor = "#FFFFFF"
)
fig2The orange “Paid” bar nearly vanishes in the mature group compared to the everyone group. This visual asymmetry directly reflects the odds ratio below 1. Mature rated apps are overwhelmingly free. This pattern that aligns with the freemium economy of adult-targeted mobile content.
| Hypothesis | Framework | Result | Decision |
|---|---|---|---|
| H1: Free vs. Paid ratings | Neyman-Pearson (Welch t-test) | t = -3.62, Cohen’s D = 0.156 | Reject H0 — but negligible practical effect |
| H2: Audience x Paid/Free | Fisher’s Significance (Exact Test) | p ~= 0, OR = 0.473 | Reject H0 — Mature apps skew heavily free |
Further questions: Does the Free/Paid rating gap hold within individual app categories, or does it wash out in the aggregate? And is the Mature-app freemium pattern driven purely by mobile gaming’s dominance of that segment, or does it persist across genres even after controlling for category?