library(tidyverse)

## Warning: package 'stringr' was built under R version 4.5.2

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

# 1. 讀取資料
test_table <- read_csv("test_table.csv")

## Rows: 100000 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): date, device
## dbl (3): user_id, test, purchase_amount
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

user_table <- read_csv("user_table.csv")

## Rows: 20000 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): gender, service, country
## dbl (2): user_id, age
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

glimpse(test_table)

## Rows: 100,000
## Columns: 5
## $ user_id         <dbl> 1983, 4704, 14995, 15089, 17522, 186, 6654, 935, 16459…
## $ date            <chr> "2017/1/1", "2017/1/1", "2017/1/1", "2017/1/1", "2017/…
## $ device          <chr> "pc_web", "android_app", "ios_app", "pc_web", "pc_web"…
## $ test            <dbl> 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, …
## $ purchase_amount <dbl> 33.7, 30.2, 37.2, 49.8, 19.7, 33.6, 45.1, 12.2, 34.4, …

glimpse(user_table)

## Rows: 20,000
## Columns: 5
## $ user_id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,…
## $ gender  <chr> "male", "male", "female", "male", "male", "female", "female", …
## $ service <chr> "golden", "premium", "premium", "golden", "normal", "golden", …
## $ age     <dbl> 20, 53, 44, 44, 20, 26, 17, 29, 42, 26, 19, 21, 48, 26, 33, 27…
## $ country <chr> "US", "US", "TW", "UK", "US", "UK", "UK", "US", "US", "US", "U…

# 2. 合併資料
full_table <- test_table %>% 
  left_join(user_table, by = "user_id")
glimpse(full_table)

## Rows: 100,000
## Columns: 9
## $ user_id         <dbl> 1983, 4704, 14995, 15089, 17522, 186, 6654, 935, 16459…
## $ date            <chr> "2017/1/1", "2017/1/1", "2017/1/1", "2017/1/1", "2017/…
## $ device          <chr> "pc_web", "android_app", "ios_app", "pc_web", "pc_web"…
## $ test            <dbl> 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, …
## $ purchase_amount <dbl> 33.7, 30.2, 37.2, 49.8, 19.7, 33.6, 45.1, 12.2, 34.4, …
## $ gender          <chr> "male", "male", "male", "male", "male", "female", "fem…
## $ service         <chr> "golden", "golden", "golden", "golden", "normal", "gol…
## $ age             <dbl> 37, 33, 31, 38, 35, 50, 37, 39, 50, 22, 34, 14, 29, 40…
## $ country         <chr> "US", "UK", "US", "US", "UK", "BR", "US", "BR", "US", …

# 3. t-test
t.test(purchase_amount ~ test, data = full_table)

## 
##  Welch Two Sample t-test
## 
## data:  purchase_amount by test
## t = -100.88, df = 99961, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -7.363126 -7.082470
## sample estimates:
## mean in group 0 mean in group 1 
##        36.15099        43.37379

P-value < 0.05 成功拒絕了虛無假設支持對立假設: 新介面真的造成了消費金額的改變。

# 4. ANOVA
model_anova <- aov(purchase_amount ~ device, data = full_table) 
summary(model_anova)

##                Df   Sum Sq Mean Sq F value Pr(>F)    
## device          2   467742  233871    1713 <2e-16 ***
## Residuals   99997 13651494     137                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(model_anova)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = purchase_amount ~ device, data = full_table)
## 
## $device
##                          diff       lwr       upr p adj
## ios_app-android_app  5.905261  5.668809  6.141713     0
## pc_web-android_app   3.946331  3.709934  4.182729     0
## pc_web-ios_app      -1.958930 -2.152823 -1.765037     0

P-value < 0.05 「不同的裝置 (Device)，確實會造成消費金額顯著的不同。」

「iOS 使用者是含金量最高的客群！」

# 5. 視覺化結果：不同裝置的消費金額差異
library(ggplot2)
ggplot(full_table, aes(x = device, y = purchase_amount, fill = device)) +
  # 畫出平均值的長條圖 (Bar Chart)
  stat_summary(fun = mean, geom = "bar", alpha = 0.8) +
  # 加上誤差線 (Error Bar)，代表信賴區間，這在統計上很重要！
  stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2) +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2") + # 使用漂亮的配色
  labs(
    title = "各裝置平均消費金額比較 (Average Purchase by Device)",
    subtitle = "經 ANOVA 檢定證實：iOS 用戶消費力顯著最高",
    x = "使用裝置 (Device)",
    y = "平均消費金額 (USD)"
  )

# 6.試試別的變數
glimpse(full_table)

## Rows: 100,000
## Columns: 9
## $ user_id         <dbl> 1983, 4704, 14995, 15089, 17522, 186, 6654, 935, 16459…
## $ date            <chr> "2017/1/1", "2017/1/1", "2017/1/1", "2017/1/1", "2017/…
## $ device          <chr> "pc_web", "android_app", "ios_app", "pc_web", "pc_web"…
## $ test            <dbl> 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, …
## $ purchase_amount <dbl> 33.7, 30.2, 37.2, 49.8, 19.7, 33.6, 45.1, 12.2, 34.4, …
## $ gender          <chr> "male", "male", "male", "male", "male", "female", "fem…
## $ service         <chr> "golden", "golden", "golden", "golden", "normal", "gol…
## $ age             <dbl> 37, 33, 31, 38, 35, 50, 37, 39, 50, 22, 34, 14, 29, 40…
## $ country         <chr> "US", "UK", "US", "US", "UK", "BR", "US", "BR", "US", …

model_country <- aov(purchase_amount ~ country, data = full_table)
summary(model_country)

##                Df  Sum Sq Mean Sq F value Pr(>F)    
## country         4 6728442 1682110   22758 <2e-16 ***
## Residuals   99995 7390794      74                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(model_country)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = purchase_amount ~ country, data = full_table)
## 
## $country
##             diff        lwr        upr     p adj
## JP-BR 19.9135755 19.5058423 20.3213086 0.0000000
## TW-BR 10.9160206 10.4465844 11.3854568 0.0000000
## UK-BR 10.6705596 10.2982537 11.0428654 0.0000000
## US-BR 26.6288791 26.2822400 26.9755182 0.0000000
## TW-JP -8.9975548 -9.4032526 -8.5918570 0.0000000
## UK-JP -9.2430159 -9.5308214 -8.9552105 0.0000000
## US-JP  6.7153036  6.4615734  6.9690338 0.0000000
## UK-TW -0.2454611 -0.6155368  0.1246146 0.3680842
## US-TW 15.7128584 15.3686157 16.0571012 0.0000000
## US-UK 15.9583195 15.7666314 16.1500077 0.0000000

「國家」這個變數是顯著的

7. 商業洞察與建議 (Business Insights & Conclusion)

綜合 T-檢定與 ANOVA 變異數分析的結果，我們得出以下關鍵結論：

實驗成效顯著：T-test 顯示實驗組 (test=1) 與對照組 (test=0) 的消費金額有顯著差異 (P-value < 0.05)，證明新介面或策略確實有效。
鎖定高價值裝置：ANOVA 與視覺化結果顯示，iOS App 使用者 的平均消費金額顯著高於 Android 與 PC Web。
行動建議：
- 全面推廣：建議將實驗組的新策略全面推行至所有用戶。
- 差異化行銷：針對高價值的iOS用戶投入更多行銷預算（如專屬推播、針對性廣告），以最大化營收回報。

t-test & ANOVA

Phil Kao

2025-12-31

P-value < 0.05 成功拒絕了虛無假設支持對立假設: 新介面真的造成了消費金額的改變。

P-value < 0.05 「不同的裝置 (Device)，確實會造成消費金額顯著的不同。」

「iOS 使用者是含金量最高的客群！」

「國家」這個變數是顯著的

7. 商業洞察與建議 (Business Insights & Conclusion)