1. 資料整理

整合來自問卷調查 (survey_data) 與內部系統 (internal_data) 的資料,建立一個 360 度的客戶視圖。

survey_data <- read_csv("survey_data.csv")
internal_data <- read_csv("internal_data.csv")
full_data <- survey_data %>% 
  left_join(internal_data, by = "user_id") %>% 
  mutate(# 目標變數:把 0/1 轉成 No/Yes,方便繪圖與解讀
    is_loyal = factor(is_loyal, levels = c(0, 1), labels = c("No", "Yes")),
    credit_card_vendor = factor(credit_card_vendor),
    register_method = factor(register_method),
    class = factor(class))
glimpse(full_data)
## Rows: 1,000
## Columns: 21
## $ user_id            <chr> "c8c41c4a18675a74e01c8a20e8a0f662", "9f396fe44e7c05…
## $ is_loyal           <fct> No, No, Yes, Yes, No, Yes, Yes, No, Yes, Yes, Yes, …
## $ depart_on_time     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, …
## $ arrive_on_time     <dbl> 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, …
## $ register_method    <fct> others, others, website, others, others, website, o…
## $ register_rate      <dbl> 2, 3, 3, 3, 4, 3, 2, 3, 2, 1, 3, 4, 4, 5, 2, 1, 3, …
## $ class              <fct> 3, 3, 2, 3, 3, 1, 3, 3, 3, 1, 2, 3, 2, 3, 2, 2, 3, …
## $ seat_rate          <dbl> 1, 3, 3, 2, 2, 4, 2, 2, 3, 3, 3, 2, 2, 2, 4, 4, 2, …
## $ meal_rate          <dbl> 1, 1, 2, 2, 2, 3, 3, 1, 2, 4, 4, 2, 2, 2, 4, 3, 2, …
## $ flight_rate        <dbl> 1, 3, 2, 4, 4, 2, 1, 2, 3, 1, 2, 2, 4, 1, 2, 4, 4, …
## $ package_rate       <dbl> 2, 2, 3, 3, 3, 3, 2, 1, 2, 3, 3, 2, 3, 3, 4, 4, 2, …
## $ tv_ad              <dbl> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, …
## $ youtube_ad_1       <dbl> 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, …
## $ youtube_ad_2       <dbl> 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ youtube_ad_3       <dbl> 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, …
## $ dm_message         <dbl> 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, …
## $ dm_post            <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, …
## $ dm_email           <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, …
## $ credit_card_vendor <fct> Vendor C, Vendor B, Vendor B, Vendor C, Vendor B, V…
## $ credit_card_bonus  <dbl> 1, 1, 1, 3, 1, 3, 3, 1, 3, 3, 1, 2, 3, 3, 3, 1, 3, …
## $ coupon             <dbl> 65.7, 23.5, 43.6, 119.3, 196.1, 10.1, 144.7, 104.9,…

2. 探索性資料分析

# 用 na.rm = TRUE 確保若有缺漏值不會報錯
ggplot(full_data, aes(x = is_loyal, y = coupon, fill = is_loyal)) +
  geom_boxplot(alpha = 0.7) +
  theme_minimal() +
  scale_fill_manual(values = c("gray70", "steelblue")) +
  labs(
    title = "折扣金額對客戶忠誠度的影響",
    x = "是否忠誠 (Loyal)",
    y = "折扣金額 (Coupon Value)"
  )

# 3. 建立模型 ## 關鍵變數: ## 服務滿意度:seat_rate, meal_rate ## 準點狀況:depart_on_time ## 行銷誘因:coupon, credit_card_vendor

model_logit <- glm(is_loyal ~ seat_rate + meal_rate + depart_on_time + coupon + credit_card_vendor, data = full_data, family = "binomial")
model_summary <- tidy(model_logit)
knitr::kable(model_summary, digits = 4, caption = "邏輯迴歸模型係數表")
邏輯迴歸模型係數表
term estimate std.error statistic p.value
(Intercept) -5.0082 0.4543 -11.0252 0.0000
seat_rate 0.8403 0.1057 7.9514 0.0000
meal_rate 0.5877 0.1159 5.0712 0.0000
depart_on_time 1.5605 0.1778 8.7754 0.0000
coupon 0.0085 0.0015 5.7847 0.0000
credit_card_vendorVendor B -0.2679 0.1992 -1.3452 0.1785
credit_card_vendorVendor C 0.1628 0.2119 0.7683 0.4423

4. 商業意義解讀

# 計算 Odds Ratio 與 信賴區間
model_interpretation <- tidy(model_logit, conf.int = TRUE) %>%
  mutate(
    Odds_Ratio = exp(estimate),           # 還原係數為勝率比
    Conf_Low = exp(conf.low),             # 信賴區間下限
    Conf_High = exp(conf.high),           # 信賴區間上限
    Impact_Pct = (Odds_Ratio - 1) * 100   # 轉換成百分比影響力
  ) %>%
  select(term, Odds_Ratio, Impact_Pct, p.value, Conf_Low, Conf_High) %>%
  filter(term != "(Intercept)") # 移除截距項,只看變數
model_interpretation %>%
  filter(p.value < 0.05) %>%
  arrange(desc(Odds_Ratio)) %>% # 依影響力排序
  knitr::kable(digits = 2, caption = "顯著變數的商業影響力分析")
顯著變數的商業影響力分析
term Odds_Ratio Impact_Pct p.value Conf_Low Conf_High
depart_on_time 4.76 376.14 0 3.37 6.78
seat_rate 2.32 131.70 0 1.89 2.86
meal_rate 1.80 79.98 0 1.44 2.26
coupon 1.01 0.86 0 1.01 1.01

從上表可以看出 “準時出發” 對客戶忠誠度的影響最大,折扣反而沒有想像中大

5. 預測與評估

# 1. 預測:算出每個人的忠誠機率
full_data_pred <- full_data %>%
  mutate(
    pred_prob = predict(model_logit, type = "response"), # 預測機率
    pred_class = ifelse(pred_prob > 0.5, "Yes", "No")    # 設定門檻 0.5
  )

# 2. 建立混淆矩陣
conf_matrix <- table(Predicted = full_data_pred$pred_class, Actual = full_data$is_loyal)

# 顯示矩陣
conf_matrix
##          Actual
## Predicted  No Yes
##       No  206  95
##       Yes 163 536
# 3. 計算準確度
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
cat("模型整體準確度 (Accuracy):", round(accuracy * 100, 2), "%")
## 模型整體準確度 (Accuracy): 74.2 %

6. 視覺化S曲線

ggplot(full_data_pred, aes(x = coupon, y = pred_prob)) +
  # 畫出每個客戶的預測點 (藍色)
  geom_point(alpha = 0.3, color = "steelblue") +
  # 畫出趨勢線 (S-Curve)
  geom_smooth(method = "glm", method.args = list(family = "binomial"), 
              color = "darkred", se = FALSE) +
  theme_minimal() +
  labs(
    title = "折扣金額與客戶忠誠度機率之關係",
    subtitle = "呈現邏輯迴歸經典的 S 型曲線 (Logistic Sigmoid)",
    x = "折扣金額 (USD)",
    y = "預測忠誠機率 (Predicted Probability)"
  ) +
  scale_y_continuous(labels = scales::percent) # Y軸顯示百分比

# 預測機率
full_data_pred <- full_data %>%
  mutate(pred_prob = predict(model_logit, type = "response"))

ggplot(full_data_pred, aes(x = coupon, y = pred_prob, color = factor(depart_on_time))) +
  geom_smooth(method = "glm", method.args = list(family = "binomial"), se = FALSE, size = 1.5) +
  theme_minimal() +
  scale_color_manual(values = c("gray", "red"), labels = c("誤點 (0)", "準點 (1)")) +
  labs(
    title = "關鍵洞察:準點的影響力遠大於折扣",
    subtitle = "紅色線 (準點) 的起始機率遠高於灰色線 (誤點),即使折扣增加,誤點的挽回效果也有限。",
    x = "折扣金額 (Coupon Value)",
    y = "預測忠誠機率",
    color = "出發準點與否"
  )

# 7.總結 ## 根據邏輯迴歸模型的分析結果,我們得出以下關鍵結論:準點率是核心關鍵:depart_on_time 的勝率比 (Odds Ratio) 高達 4.76,是所有變數中影響力最強的。 ## 折扣效果有限:雖然折扣金額 (coupon) 統計上顯著,但其邊際效益極低 (OR \(\approx\) 1.01)。 ## 策略建議:建議公司將行銷預算轉移至營運優化,優先確保航班準點率。對於發生誤點的航班,單純發放小額折價券可能無法有效挽回客戶信心。